@@ -1016,6 +1016,82 @@ async function keylessPost(
10161016 return json ;
10171017}
10181018
1019+ async function getCrawlStatusWithOrigin (
1020+ client : FirecrawlApp ,
1021+ jobId : string
1022+ ) : Promise < Record < string , unknown >> {
1023+ const res = await ( client as any ) . http . get (
1024+ `/v2/crawl/${ encodeURIComponent ( jobId ) } ` ,
1025+ ORIGIN_HEADERS
1026+ ) ;
1027+ const body = ( res ?. data ?? { } ) as any ;
1028+ const initialDocs = Array . isArray ( body . data ) ? body . data : [ ] ;
1029+
1030+ if ( ! body . next ) {
1031+ return {
1032+ id : jobId ,
1033+ status : body . status ,
1034+ completed : body . completed ?? 0 ,
1035+ total : body . total ?? 0 ,
1036+ creditsUsed : body . creditsUsed ,
1037+ expiresAt : body . expiresAt ,
1038+ next : body . next ?? null ,
1039+ data : initialDocs ,
1040+ } ;
1041+ }
1042+
1043+ const docs = initialDocs . slice ( ) ;
1044+ let current = body . next as string | null ;
1045+ while ( current ) {
1046+ const pageRes = await ( client as any ) . http . get ( current , ORIGIN_HEADERS ) ;
1047+ const payload = ( pageRes ?. data ?? { } ) as any ;
1048+ if ( ! payload . success ) break ;
1049+
1050+ const pageData = Array . isArray ( payload . data )
1051+ ? payload . data
1052+ : payload . data ?. pages || [ ] ;
1053+ docs . push ( ...pageData ) ;
1054+ current =
1055+ payload . next ??
1056+ ( Array . isArray ( payload . data ) ? null : payload . data ?. next ) ??
1057+ null ;
1058+ }
1059+
1060+ return {
1061+ id : jobId ,
1062+ status : body . status ,
1063+ completed : body . completed ?? 0 ,
1064+ total : body . total ?? 0 ,
1065+ creditsUsed : body . creditsUsed ,
1066+ expiresAt : body . expiresAt ,
1067+ next : null ,
1068+ data : docs ,
1069+ } ;
1070+ }
1071+
1072+ async function waitForCrawlCompletionWithOrigin (
1073+ client : FirecrawlApp ,
1074+ jobId : string ,
1075+ pollInterval = 2 ,
1076+ timeout ?: number
1077+ ) : Promise < Record < string , unknown >> {
1078+ const startedAt = Date . now ( ) ;
1079+ while ( true ) {
1080+ const status = await getCrawlStatusWithOrigin ( client , jobId ) ;
1081+ if (
1082+ [ 'completed' , 'failed' , 'cancelled' ] . includes ( String ( status . status ?? '' ) )
1083+ ) {
1084+ return status ;
1085+ }
1086+ if ( timeout != null && Date . now ( ) - startedAt > timeout * 1000 ) {
1087+ throw new Error ( `Crawl job ${ jobId } did not complete within ${ timeout } s` ) ;
1088+ }
1089+ await new Promise ( ( resolve ) =>
1090+ setTimeout ( resolve , Math . max ( 1000 , pollInterval * 1000 ) )
1091+ ) ;
1092+ }
1093+ }
1094+
10191095const feedbackIssueSchema = z
10201096 . string ( )
10211097 . trim ( )
@@ -1459,11 +1535,33 @@ server.addTool({
14591535 delete opts . webhookHeaders ;
14601536
14611537 const cleaned = removeEmptyTopLevel ( opts ) ;
1538+ const pollInterval =
1539+ typeof cleaned . pollInterval === 'number'
1540+ ? ( cleaned . pollInterval as number )
1541+ : 2 ;
1542+ const timeout =
1543+ typeof cleaned . timeout === 'number'
1544+ ? ( cleaned . timeout as number )
1545+ : undefined ;
1546+ delete ( cleaned as Record < string , unknown > ) . pollInterval ;
1547+ delete ( cleaned as Record < string , unknown > ) . timeout ;
1548+
14621549 log . info ( 'Starting crawl' , { url : String ( url ) } ) ;
1463- const res = await client . crawl ( String ( url ) , {
1464- ...( cleaned as any ) ,
1550+ const started = await ( client as any ) . http . post ( '/v2/crawl' , {
1551+ url : String ( url ) ,
1552+ ...( cleaned as Record < string , unknown > ) ,
14651553 origin : ORIGIN ,
14661554 } ) ;
1555+ const crawlId = started ?. data ?. id ;
1556+ if ( ! crawlId ) {
1557+ return asText ( started ?. data ?? { } ) ;
1558+ }
1559+ const res = await waitForCrawlCompletionWithOrigin (
1560+ client ,
1561+ crawlId ,
1562+ pollInterval ,
1563+ timeout
1564+ ) ;
14671565 return asText ( res ) ;
14681566 } ,
14691567} ) ;
@@ -1497,11 +1595,8 @@ Check the status of a crawl job.
14971595 ) : Promise < string > => {
14981596 const client = getClient ( session ) ;
14991597 const id = ( args as any ) . id as string ;
1500- const res = await ( client as any ) . http . get (
1501- `/v2/crawl/${ encodeURIComponent ( id ) } ` ,
1502- ORIGIN_HEADERS
1503- ) ;
1504- return asText ( res ?. data ?? { } ) ;
1598+ const res = await getCrawlStatusWithOrigin ( client , id ) ;
1599+ return asText ( res ) ;
15051600 } ,
15061601} ) ;
15071602
0 commit comments