Skip to content

Commit ccdadac

Browse files
committed
fix(api): tag crawl status polling origin
1 parent 42a3783 commit ccdadac

1 file changed

Lines changed: 102 additions & 7 deletions

File tree

src/index.ts

Lines changed: 102 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,6 +1016,82 @@ async function keylessPost(
10161016
return json;
10171017
}
10181018

1019+
async function getCrawlStatusWithOrigin(
1020+
client: FirecrawlApp,
1021+
jobId: string
1022+
): Promise<Record<string, unknown>> {
1023+
const res = await (client as any).http.get(
1024+
`/v2/crawl/${encodeURIComponent(jobId)}`,
1025+
ORIGIN_HEADERS
1026+
);
1027+
const body = (res?.data ?? {}) as any;
1028+
const initialDocs = Array.isArray(body.data) ? body.data : [];
1029+
1030+
if (!body.next) {
1031+
return {
1032+
id: jobId,
1033+
status: body.status,
1034+
completed: body.completed ?? 0,
1035+
total: body.total ?? 0,
1036+
creditsUsed: body.creditsUsed,
1037+
expiresAt: body.expiresAt,
1038+
next: body.next ?? null,
1039+
data: initialDocs,
1040+
};
1041+
}
1042+
1043+
const docs = initialDocs.slice();
1044+
let current = body.next as string | null;
1045+
while (current) {
1046+
const pageRes = await (client as any).http.get(current, ORIGIN_HEADERS);
1047+
const payload = (pageRes?.data ?? {}) as any;
1048+
if (!payload.success) break;
1049+
1050+
const pageData = Array.isArray(payload.data)
1051+
? payload.data
1052+
: payload.data?.pages || [];
1053+
docs.push(...pageData);
1054+
current =
1055+
payload.next ??
1056+
(Array.isArray(payload.data) ? null : payload.data?.next) ??
1057+
null;
1058+
}
1059+
1060+
return {
1061+
id: jobId,
1062+
status: body.status,
1063+
completed: body.completed ?? 0,
1064+
total: body.total ?? 0,
1065+
creditsUsed: body.creditsUsed,
1066+
expiresAt: body.expiresAt,
1067+
next: null,
1068+
data: docs,
1069+
};
1070+
}
1071+
1072+
async function waitForCrawlCompletionWithOrigin(
1073+
client: FirecrawlApp,
1074+
jobId: string,
1075+
pollInterval = 2,
1076+
timeout?: number
1077+
): Promise<Record<string, unknown>> {
1078+
const startedAt = Date.now();
1079+
while (true) {
1080+
const status = await getCrawlStatusWithOrigin(client, jobId);
1081+
if (
1082+
['completed', 'failed', 'cancelled'].includes(String(status.status ?? ''))
1083+
) {
1084+
return status;
1085+
}
1086+
if (timeout != null && Date.now() - startedAt > timeout * 1000) {
1087+
throw new Error(`Crawl job ${jobId} did not complete within ${timeout}s`);
1088+
}
1089+
await new Promise((resolve) =>
1090+
setTimeout(resolve, Math.max(1000, pollInterval * 1000))
1091+
);
1092+
}
1093+
}
1094+
10191095
const feedbackIssueSchema = z
10201096
.string()
10211097
.trim()
@@ -1459,11 +1535,33 @@ server.addTool({
14591535
delete opts.webhookHeaders;
14601536

14611537
const cleaned = removeEmptyTopLevel(opts);
1538+
const pollInterval =
1539+
typeof cleaned.pollInterval === 'number'
1540+
? (cleaned.pollInterval as number)
1541+
: 2;
1542+
const timeout =
1543+
typeof cleaned.timeout === 'number'
1544+
? (cleaned.timeout as number)
1545+
: undefined;
1546+
delete (cleaned as Record<string, unknown>).pollInterval;
1547+
delete (cleaned as Record<string, unknown>).timeout;
1548+
14621549
log.info('Starting crawl', { url: String(url) });
1463-
const res = await client.crawl(String(url), {
1464-
...(cleaned as any),
1550+
const started = await (client as any).http.post('/v2/crawl', {
1551+
url: String(url),
1552+
...(cleaned as Record<string, unknown>),
14651553
origin: ORIGIN,
14661554
});
1555+
const crawlId = started?.data?.id;
1556+
if (!crawlId) {
1557+
return asText(started?.data ?? {});
1558+
}
1559+
const res = await waitForCrawlCompletionWithOrigin(
1560+
client,
1561+
crawlId,
1562+
pollInterval,
1563+
timeout
1564+
);
14671565
return asText(res);
14681566
},
14691567
});
@@ -1497,11 +1595,8 @@ Check the status of a crawl job.
14971595
): Promise<string> => {
14981596
const client = getClient(session);
14991597
const id = (args as any).id as string;
1500-
const res = await (client as any).http.get(
1501-
`/v2/crawl/${encodeURIComponent(id)}`,
1502-
ORIGIN_HEADERS
1503-
);
1504-
return asText(res?.data ?? {});
1598+
const res = await getCrawlStatusWithOrigin(client, id);
1599+
return asText(res);
15051600
},
15061601
});
15071602

0 commit comments

Comments
 (0)