Skip to content

Commit 0925b76

Browse files
authored
(feat/search) Search feedback (#236)
* Nick: search feedback * Update index.ts * Update package.json
1 parent 71aa555 commit 0925b76

3 files changed

Lines changed: 258 additions & 5 deletions

File tree

README.md

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,12 +565,51 @@ Search the web and optionally extract content from search results.
565565

566566
**Returns:**
567567

568-
- Array of search results (with optional scraped content)
568+
- Array of search results (with optional scraped content), plus an `id` field. Pass that `id` to `firecrawl_search_feedback` after you've used the results to refund 1 credit (search costs 2) and improve search quality.
569569

570570
**Prompt Example:**
571571

572572
> "Find the latest research papers on AI published in 2023."
573573
574+
### 5b. Search Feedback Tool (`firecrawl_search_feedback`)
575+
576+
Sends structured feedback on a previous `firecrawl_search` result. The first feedback per search id refunds 1 credit and improves Firecrawl's search quality. Idempotent per search id.
577+
578+
**Call this after every search you actually use** (or that didn't help). Bad/partial feedback with `missingContent` is just as valuable as good feedback.
579+
580+
**Opt out:** set `FIRECRAWL_NO_SEARCH_FEEDBACK=1` (or `FIRECRAWL_DISABLE_SEARCH_FEEDBACK=1`) in the environment when starting the MCP server. The `firecrawl_search_feedback` tool will not be registered, so agents can't call it. Team admins can also disable feedback server-side; in that case the tool is registered but always returns `feedbackErrorCode: "TEAM_OPTED_OUT"`.
581+
582+
**Most important field:** `missingContent`. It's an array of specific pieces of content the agent expected to find but did not. One entry per missing topic — these aggregate across teams and tell us what to index next.
583+
584+
**Daily refund cap (per team, per UTC day, default 100 credits).** Once a team's `creditsRefundedToday` reaches `dailyRefundCap`, further submissions still record feedback but no longer refund credits. The response sets `dailyCapReached: true`. Agents should stop calling this tool for the rest of the UTC day when they see that flag.
585+
586+
**Usage Example:**
587+
588+
```json
589+
{
590+
"name": "firecrawl_search_feedback",
591+
"arguments": {
592+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
593+
"rating": "good",
594+
"valuableSources": [
595+
{
596+
"url": "https://docs.firecrawl.dev/features/search",
597+
"reason": "Most up-to-date description of /search."
598+
}
599+
],
600+
"missingContent": [
601+
{ "topic": "Pricing for the search endpoint", "description": "No pricing tier table for /search specifically." },
602+
{ "topic": "Per-team rate limits" }
603+
],
604+
"querySuggestions": "Boost docs.firecrawl.dev for queries that mention 'firecrawl'"
605+
}
606+
}
607+
```
608+
609+
**Returns:**
610+
611+
- `{ success, feedbackId, creditsRefunded, alreadySubmitted? }` JSON.
612+
574613
### 6. Crawl Tool (`firecrawl_crawl`)
575614

576615
Starts an asynchronous crawl job on a website and extract content from all pages.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "firecrawl-mcp",
3-
"version": "3.15.0",
3+
"version": "3.16.0",
44
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
55
"type": "module",
66
"mcpName": "io.github.firecrawl/firecrawl-mcp-server",

src/index.ts

Lines changed: 217 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,7 @@ The query also supports search operators, that you can use if needed to refine t
610610
**Domain filters:** Use includeDomains to restrict results to specific domains, or excludeDomains to remove domains. Do not use both in the same request. Domains must be hostnames only, without protocol or path.
611611
**Scrape Options:** Only use scrapeOptions when you think it is absolutely necessary. When you do so default to a lower limit to avoid timeouts, 5 or lower.
612612
**Optimal Workflow:** Search first using firecrawl_search without formats, then after fetching the results, use the scrape tool to get the content of the relevantpage(s) that you want to scrape
613+
**After the search:** Once you have processed the results (or decided they were not useful), call \`firecrawl_search_feedback\` with the \`id\` from this response. The first feedback per search refunds 1 credit and helps Firecrawl improve search quality.
613614
614615
**Usage Example without formats (Preferred):**
615616
\`\`\`json
@@ -646,7 +647,7 @@ The query also supports search operators, that you can use if needed to refine t
646647
}
647648
}
648649
\`\`\`
649-
**Returns:** Array of search results (with optional scraped content).
650+
**Returns:** A JSON envelope of the form \`{ success, data: { web?, images?, news? }, id, creditsUsed }\`. Each result array contains the search results (with optional scraped content). Pass the top-level \`id\` to \`firecrawl_search_feedback\` after you've used the results.
650651
`,
651652
parameters: z
652653
.object({
@@ -696,14 +697,227 @@ The query also supports search operators, that you can use if needed to refine t
696697
excludeDomains
697698
);
698699
log.info('Searching', { query: searchQuery });
699-
const res = await client.search(searchQuery, {
700+
// Call /v2/search through the SDK's HTTP layer (auth + retries) instead
701+
// of `client.search()` so we preserve the full response envelope. The
702+
// high-level `search()` helper strips `id` and `creditsUsed`, which
703+
// breaks the `firecrawl_search_feedback` workflow that this server
704+
// explicitly tells the LLM to use after every search.
705+
const httpRes = await (client as any).http.post('/v2/search', {
706+
query: searchQuery,
700707
...(cleaned as any),
701708
origin: ORIGIN,
702709
});
703-
return asText(res);
710+
return asText(httpRes?.data ?? {});
704711
},
705712
});
706713

714+
const DEFAULT_CLOUD_API_URL = 'https://api.firecrawl.dev';
715+
716+
function resolveApiBaseUrl(): string {
717+
return (process.env.FIRECRAWL_API_URL || DEFAULT_CLOUD_API_URL).replace(
718+
/\/$/,
719+
''
720+
);
721+
}
722+
723+
const SEARCH_FEEDBACK_DISABLED = ['1', 'true', 'yes', 'on'].includes(
724+
(
725+
process.env.FIRECRAWL_NO_SEARCH_FEEDBACK ||
726+
process.env.FIRECRAWL_DISABLE_SEARCH_FEEDBACK ||
727+
''
728+
)
729+
.trim()
730+
.toLowerCase()
731+
);
732+
733+
if (SEARCH_FEEDBACK_DISABLED) {
734+
console.error(
735+
'[firecrawl-mcp] Search feedback tool disabled by FIRECRAWL_NO_SEARCH_FEEDBACK; firecrawl_search_feedback will not be registered.'
736+
);
737+
}
738+
739+
if (!SEARCH_FEEDBACK_DISABLED) {
740+
server.addTool({
741+
name: 'firecrawl_search_feedback',
742+
annotations: {
743+
title: 'Send feedback on a search result',
744+
readOnlyHint: false,
745+
openWorldHint: true,
746+
},
747+
description: `
748+
Send structured feedback on a previous \`firecrawl_search\` result. **Call this immediately after a search where you used the results** so we can improve search quality and refund 1 credit (search costs 2).
749+
750+
Pass the \`searchId\` returned by \`firecrawl_search\` (the \`id\` field on the response) and tell us:
751+
752+
- **rating** — overall result quality: \`good\`, \`partial\`, or \`bad\`.
753+
- **valuableSources** — which result URLs were actually useful, and a short reason why.
754+
- **missingContent** — **the most important field.** An ARRAY of specific pieces of content you expected to find but didn't. One entry per missing piece, each with a short \`topic\` and an optional longer \`description\`. Examples: \`{"topic":"enterprise pricing","description":"no pricing tier table for the Enterprise plan was returned"}\`, \`{"topic":"API rate limits"}\`, \`{"topic":"comparison vs competitors"}\`. **Be specific** — these aggregate across teams and tell us what to index next. Do not pack multiple topics into one entry.
755+
- **querySuggestions** — how the query or response shape could be improved (e.g. "would have liked official docs first", "should boost github.com").
756+
757+
**Substantive-feedback requirement** (zero-effort feedback is rejected with HTTP 400):
758+
- \`good\` — must include at least one \`valuableSources\` entry
759+
- \`partial\` — must include \`valuableSources\` or at least one \`missingContent\` entry
760+
- \`bad\` — must include at least one \`missingContent\` entry or \`querySuggestions\`
761+
762+
**Time window:** Feedback must be submitted within ~2 minutes of the search. Beyond that, the call returns HTTP 409 with \`feedbackErrorCode: "FEEDBACK_WINDOW_EXPIRED"\` — do not retry, just move on. Same goes for any 4xx response: do not retry-loop.
763+
764+
**Behaviors:**
765+
- Idempotent per \`searchId\`. Re-submitting for the same id returns \`alreadySubmitted: true\` with \`creditsRefunded: 0\`.
766+
- Refund only applies to billable searches; preview teams are blocked.
767+
- Failed searches cannot receive feedback (the search itself already returned an error you can act on).
768+
- **Daily refund cap (per team, per UTC day, default 100 credits).** Once a team's \`creditsRefundedToday\` reaches \`dailyRefundCap\`, the response returns \`dailyCapReached: true\` with \`creditsRefunded: 0\`. The feedback is still recorded for search-quality improvement — only the credit refund is gated. **Stop calling this tool for the rest of the UTC day** when you see \`dailyCapReached: true\`.
769+
770+
**When to call:** Right after processing a search result. If the result didn't help, send rating \`bad\` with a clear \`missingContent\` — that is just as valuable as a \`good\` rating.
771+
772+
**Usage Example (good rating with valuable sources + missing content):**
773+
\`\`\`json
774+
{
775+
"name": "firecrawl_search_feedback",
776+
"arguments": {
777+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
778+
"rating": "good",
779+
"valuableSources": [
780+
{ "url": "https://docs.firecrawl.dev/features/search", "reason": "Most up-to-date description of /search." }
781+
],
782+
"missingContent": [
783+
{ "topic": "Pricing for the search endpoint", "description": "No pricing tier table for /search specifically." },
784+
{ "topic": "Rate limits", "description": "Per-team RPS for /search not documented." }
785+
],
786+
"querySuggestions": "Boost docs.firecrawl.dev for queries that mention 'firecrawl'"
787+
}
788+
}
789+
\`\`\`
790+
791+
**Usage Example (bad rating, what was missing):**
792+
\`\`\`json
793+
{
794+
"name": "firecrawl_search_feedback",
795+
"arguments": {
796+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
797+
"rating": "bad",
798+
"missingContent": [
799+
{ "topic": "Recent benchmarks", "description": "All results were >12 months old." },
800+
{ "topic": "Comparison vs Algolia" }
801+
]
802+
}
803+
}
804+
\`\`\`
805+
806+
**Returns:** \`{ success, feedbackId, creditsRefunded, creditsRefundedToday, dailyRefundCap, dailyCapReached?, alreadySubmitted?, warning? }\` JSON.
807+
`,
808+
parameters: z.object({
809+
searchId: z
810+
.string()
811+
.uuid('searchId must be the UUID returned by firecrawl_search'),
812+
rating: z.enum(['good', 'bad', 'partial']),
813+
valuableSources: z
814+
.array(
815+
z.object({
816+
url: z.string().url(),
817+
reason: z.string().max(1000).optional(),
818+
})
819+
)
820+
.max(50)
821+
.optional(),
822+
missingContent: z
823+
.array(
824+
z.object({
825+
topic: z
826+
.string()
827+
.min(1, 'topic must not be empty')
828+
.max(200, 'topic must be 200 characters or fewer'),
829+
description: z.string().max(2000).optional(),
830+
})
831+
)
832+
.max(20)
833+
.optional()
834+
.describe(
835+
'Array of specific pieces of content the agent expected to find but did not. ' +
836+
'One entry per distinct topic. Each entry has a short `topic` and optional ' +
837+
'longer `description`.'
838+
),
839+
querySuggestions: z.string().max(2000).optional(),
840+
}),
841+
execute: async (
842+
args: unknown,
843+
{ session, log }: { session?: SessionData; log: Logger }
844+
): Promise<string> => {
845+
const {
846+
searchId,
847+
rating,
848+
valuableSources,
849+
missingContent,
850+
querySuggestions,
851+
} = args as {
852+
searchId: string;
853+
rating: 'good' | 'bad' | 'partial';
854+
valuableSources?: { url: string; reason?: string }[];
855+
missingContent?: { topic: string; description?: string }[];
856+
querySuggestions?: string;
857+
};
858+
859+
const apiBase = resolveApiBaseUrl();
860+
const endpoint = `${apiBase}/v2/search/${encodeURIComponent(searchId)}/feedback`;
861+
862+
const body: Record<string, unknown> = {
863+
rating,
864+
origin: ORIGIN,
865+
};
866+
if (valuableSources && valuableSources.length > 0) {
867+
body.valuableSources = valuableSources;
868+
}
869+
if (missingContent && missingContent.length > 0) {
870+
body.missingContent = missingContent;
871+
}
872+
if (querySuggestions) body.querySuggestions = querySuggestions;
873+
874+
const headers: Record<string, string> = {
875+
'Content-Type': 'application/json',
876+
};
877+
const apiKey = session?.firecrawlApiKey;
878+
if (apiKey) {
879+
headers['Authorization'] = `Bearer ${apiKey}`;
880+
} else if (process.env.CLOUD_SERVICE === 'true') {
881+
throw new Error('Unauthorized: missing API key for search feedback.');
882+
}
883+
884+
log.info('Submitting search feedback', { searchId, rating });
885+
const response = await fetch(endpoint, {
886+
method: 'POST',
887+
headers,
888+
body: JSON.stringify(body),
889+
});
890+
891+
const responseText = await response.text();
892+
let parsed: any;
893+
try {
894+
parsed = JSON.parse(responseText);
895+
} catch {
896+
parsed = { raw: responseText };
897+
}
898+
899+
// 4xx is terminal; surface a structured payload (with retryable=false)
900+
// so agents do not retry-loop on substantive-feedback rejections,
901+
// expired windows, etc.
902+
if (!response.ok) {
903+
log.warn('Search feedback rejected', {
904+
status: response.status,
905+
feedbackErrorCode: parsed?.feedbackErrorCode,
906+
});
907+
return asText({
908+
success: false,
909+
status: response.status,
910+
feedbackErrorCode: parsed?.feedbackErrorCode,
911+
error: parsed?.error ?? `HTTP ${response.status}`,
912+
retryable: response.status >= 500,
913+
});
914+
}
915+
916+
return asText(parsed);
917+
},
918+
});
919+
}
920+
707921
server.addTool({
708922
name: 'firecrawl_crawl',
709923
annotations: {

0 commit comments

Comments
 (0)