mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
feat: refactor web-fetch tool to remove google genai dependency
This commit is contained in:
@@ -19,13 +19,17 @@ describe('WebFetchTool', () => {
|
||||
describe('shouldConfirmExecute', () => {
|
||||
it('should return confirmation details with the correct prompt and urls', async () => {
|
||||
const tool = new WebFetchTool(mockConfig);
|
||||
const params = { prompt: 'fetch https://example.com' };
|
||||
const params = {
|
||||
url: 'https://example.com',
|
||||
prompt: 'summarize this page',
|
||||
};
|
||||
const confirmationDetails = await tool.shouldConfirmExecute(params);
|
||||
|
||||
expect(confirmationDetails).toEqual({
|
||||
type: 'info',
|
||||
title: 'Confirm Web Fetch',
|
||||
prompt: 'fetch https://example.com',
|
||||
prompt:
|
||||
'Fetch content from https://example.com and process with: summarize this page',
|
||||
urls: ['https://example.com'],
|
||||
onConfirm: expect.any(Function),
|
||||
});
|
||||
@@ -34,8 +38,8 @@ describe('WebFetchTool', () => {
|
||||
it('should convert github urls to raw format', async () => {
|
||||
const tool = new WebFetchTool(mockConfig);
|
||||
const params = {
|
||||
prompt:
|
||||
'fetch https://github.com/google/gemini-react/blob/main/README.md',
|
||||
url: 'https://github.com/google/gemini-react/blob/main/README.md',
|
||||
prompt: 'summarize the README',
|
||||
};
|
||||
const confirmationDetails = await tool.shouldConfirmExecute(params);
|
||||
|
||||
@@ -43,7 +47,7 @@ describe('WebFetchTool', () => {
|
||||
type: 'info',
|
||||
title: 'Confirm Web Fetch',
|
||||
prompt:
|
||||
'fetch https://github.com/google/gemini-react/blob/main/README.md',
|
||||
'Fetch content from https://github.com/google/gemini-react/blob/main/README.md and process with: summarize the README',
|
||||
urls: [
|
||||
'https://raw.githubusercontent.com/google/gemini-react/main/README.md',
|
||||
],
|
||||
@@ -56,7 +60,10 @@ describe('WebFetchTool', () => {
|
||||
...mockConfig,
|
||||
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
|
||||
} as unknown as Config);
|
||||
const params = { prompt: 'fetch https://example.com' };
|
||||
const params = {
|
||||
url: 'https://example.com',
|
||||
prompt: 'summarize this page',
|
||||
};
|
||||
const confirmationDetails = await tool.shouldConfirmExecute(params);
|
||||
|
||||
expect(confirmationDetails).toBe(false);
|
||||
@@ -68,7 +75,10 @@ describe('WebFetchTool', () => {
|
||||
...mockConfig,
|
||||
setApprovalMode,
|
||||
} as unknown as Config);
|
||||
const params = { prompt: 'fetch https://example.com' };
|
||||
const params = {
|
||||
url: 'https://example.com',
|
||||
prompt: 'summarize this page',
|
||||
};
|
||||
const confirmationDetails = await tool.shouldConfirmExecute(params);
|
||||
|
||||
if (
|
||||
|
||||
@@ -13,49 +13,25 @@ import {
|
||||
Icon,
|
||||
} from './tools.js';
|
||||
import { Type } from '@google/genai';
|
||||
import { getErrorMessage } from '../utils/errors.js';
|
||||
import { Config, ApprovalMode } from '../config/config.js';
|
||||
import { getResponseText } from '../utils/generateContentResponseUtilities.js';
|
||||
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
|
||||
import { fetchWithTimeout } from '../utils/fetch.js';
|
||||
import { convert } from 'html-to-text';
|
||||
import { ProxyAgent, setGlobalDispatcher } from 'undici';
|
||||
|
||||
const URL_FETCH_TIMEOUT_MS = 10000;
|
||||
const MAX_CONTENT_LENGTH = 100000;
|
||||
|
||||
// Helper function to extract URLs from a string
|
||||
function extractUrls(text: string): string[] {
|
||||
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
||||
return text.match(urlRegex) || [];
|
||||
}
|
||||
|
||||
// Interfaces for grounding metadata (similar to web-search.ts)
|
||||
interface GroundingChunkWeb {
|
||||
uri?: string;
|
||||
title?: string;
|
||||
}
|
||||
|
||||
interface GroundingChunkItem {
|
||||
web?: GroundingChunkWeb;
|
||||
}
|
||||
|
||||
interface GroundingSupportSegment {
|
||||
startIndex: number;
|
||||
endIndex: number;
|
||||
text?: string;
|
||||
}
|
||||
|
||||
interface GroundingSupportItem {
|
||||
segment?: GroundingSupportSegment;
|
||||
groundingChunkIndices?: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for the WebFetch tool
|
||||
*/
|
||||
export interface WebFetchToolParams {
|
||||
/**
|
||||
* The prompt containing URL(s) (up to 20) and instructions for processing their content.
|
||||
* The URL to fetch content from
|
||||
*/
|
||||
url: string;
|
||||
/**
|
||||
* The prompt to run on the fetched content
|
||||
*/
|
||||
prompt: string;
|
||||
}
|
||||
@@ -70,17 +46,20 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
super(
|
||||
WebFetchTool.Name,
|
||||
'WebFetch',
|
||||
"Processes content from URL(s), including local and private network addresses (e.g., localhost), embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter.",
|
||||
'Fetches content from a specified URL and processes it using an AI model\n- Takes a URL and a prompt as input\n- Fetches the URL content, converts HTML to markdown\n- Processes the content with the prompt using a small, fast model\n- Returns the model\'s response about the content\n- Use this tool when you need to retrieve and analyze web content\n\nUsage notes:\n - IMPORTANT: If an MCP-provided web fetch tool is available, prefer using that tool instead of this one, as it may have fewer restrictions. All MCP-provided tools start with "mcp__".\n - The URL must be a fully-formed valid URL\n - HTTP URLs will be automatically upgraded to HTTPS\n - The prompt should describe what information you want to extract from the page\n - This tool is read-only and does not modify any files\n - Results may be summarized if the content is very large',
|
||||
Icon.Globe,
|
||||
{
|
||||
properties: {
|
||||
url: {
|
||||
description: 'The URL to fetch content from',
|
||||
type: Type.STRING,
|
||||
},
|
||||
prompt: {
|
||||
description:
|
||||
'A comprehensive prompt that includes the URL(s) (up to 20) to fetch and specific instructions on how to process their content (e.g., "Summarize https://example.com/article and extract key points from https://another.com/data"). Must contain as least one URL starting with http:// or https://.',
|
||||
description: 'The prompt to run on the fetched content',
|
||||
type: Type.STRING,
|
||||
},
|
||||
},
|
||||
required: ['prompt'],
|
||||
required: ['url', 'prompt'],
|
||||
type: Type.OBJECT,
|
||||
},
|
||||
);
|
||||
@@ -90,19 +69,11 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
}
|
||||
}
|
||||
|
||||
private async executeFallback(
|
||||
private async executeFetch(
|
||||
params: WebFetchToolParams,
|
||||
signal: AbortSignal,
|
||||
): Promise<ToolResult> {
|
||||
const urls = extractUrls(params.prompt);
|
||||
if (urls.length === 0) {
|
||||
return {
|
||||
llmContent: 'Error: No URL found in the prompt for fallback.',
|
||||
returnDisplay: 'Error: No URL found in the prompt for fallback.',
|
||||
};
|
||||
}
|
||||
// For now, we only support one URL for fallback
|
||||
let url = urls[0];
|
||||
let url = params.url;
|
||||
|
||||
// Convert GitHub blob URL to raw URL
|
||||
if (url.includes('github.com') && url.includes('/blob/')) {
|
||||
@@ -111,6 +82,11 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
.replace('/blob/', '/');
|
||||
}
|
||||
|
||||
// Upgrade HTTP to HTTPS
|
||||
if (url.startsWith('http://')) {
|
||||
url = url.replace('http://', 'https://');
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS);
|
||||
if (!response.ok) {
|
||||
@@ -130,7 +106,7 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
const geminiClient = this.config.getGeminiClient();
|
||||
const fallbackPrompt = `The user requested the following: "${params.prompt}".
|
||||
|
||||
I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the user's request. Do not attempt to access the URL again.
|
||||
I have fetched the content from ${params.url}. Please use the following content to answer the user's request.
|
||||
|
||||
---
|
||||
${textContent}
|
||||
@@ -143,11 +119,11 @@ ${textContent}
|
||||
const resultText = getResponseText(result) || '';
|
||||
return {
|
||||
llmContent: resultText,
|
||||
returnDisplay: `Content for ${url} processed using fallback fetch.`,
|
||||
returnDisplay: `Content from ${params.url} processed successfully.`,
|
||||
};
|
||||
} catch (e) {
|
||||
const error = e as Error;
|
||||
const errorMessage = `Error during fallback fetch for ${url}: ${error.message}`;
|
||||
const errorMessage = `Error during fetch for ${url}: ${error.message}`;
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
@@ -160,14 +136,17 @@ ${textContent}
|
||||
if (errors) {
|
||||
return errors;
|
||||
}
|
||||
if (!params.prompt || params.prompt.trim() === '') {
|
||||
return "The 'prompt' parameter cannot be empty and must contain URL(s) and instructions.";
|
||||
if (!params.url || params.url.trim() === '') {
|
||||
return "The 'url' parameter cannot be empty.";
|
||||
}
|
||||
if (
|
||||
!params.prompt.includes('http://') &&
|
||||
!params.prompt.includes('https://')
|
||||
!params.url.startsWith('http://') &&
|
||||
!params.url.startsWith('https://')
|
||||
) {
|
||||
return "The 'prompt' must contain at least one valid URL (starting with http:// or https://).";
|
||||
return "The 'url' must be a valid URL starting with http:// or https://.";
|
||||
}
|
||||
if (!params.prompt || params.prompt.trim() === '') {
|
||||
return "The 'prompt' parameter cannot be empty.";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@@ -177,7 +156,7 @@ ${textContent}
|
||||
params.prompt.length > 100
|
||||
? params.prompt.substring(0, 97) + '...'
|
||||
: params.prompt;
|
||||
return `Processing URLs and instructions from prompt: "${displayPrompt}"`;
|
||||
return `Fetching content from ${params.url} and processing with prompt: "${displayPrompt}"`;
|
||||
}
|
||||
|
||||
async shouldConfirmExecute(
|
||||
@@ -194,20 +173,18 @@ ${textContent}
|
||||
|
||||
// Perform GitHub URL conversion here to differentiate between user-provided
|
||||
// URL and the actual URL to be fetched.
|
||||
const urls = extractUrls(params.prompt).map((url) => {
|
||||
if (url.includes('github.com') && url.includes('/blob/')) {
|
||||
return url
|
||||
.replace('github.com', 'raw.githubusercontent.com')
|
||||
.replace('/blob/', '/');
|
||||
}
|
||||
return url;
|
||||
});
|
||||
let url = params.url;
|
||||
if (url.includes('github.com') && url.includes('/blob/')) {
|
||||
url = url
|
||||
.replace('github.com', 'raw.githubusercontent.com')
|
||||
.replace('/blob/', '/');
|
||||
}
|
||||
|
||||
const confirmationDetails: ToolCallConfirmationDetails = {
|
||||
type: 'info',
|
||||
title: `Confirm Web Fetch`,
|
||||
prompt: params.prompt,
|
||||
urls,
|
||||
prompt: `Fetch content from ${params.url} and process with: ${params.prompt}`,
|
||||
urls: [url],
|
||||
onConfirm: async (outcome: ToolConfirmationOutcome) => {
|
||||
if (outcome === ToolConfirmationOutcome.ProceedAlways) {
|
||||
this.config.setApprovalMode(ApprovalMode.AUTO_EDIT);
|
||||
@@ -229,132 +206,6 @@ ${textContent}
|
||||
};
|
||||
}
|
||||
|
||||
const userPrompt = params.prompt;
|
||||
const urls = extractUrls(userPrompt);
|
||||
const url = urls[0];
|
||||
const isPrivate = isPrivateIp(url);
|
||||
|
||||
if (isPrivate) {
|
||||
return this.executeFallback(params, signal);
|
||||
}
|
||||
|
||||
const geminiClient = this.config.getGeminiClient();
|
||||
|
||||
try {
|
||||
const response = await geminiClient.generateContent(
|
||||
[{ role: 'user', parts: [{ text: userPrompt }] }],
|
||||
{ tools: [{ urlContext: {} }] },
|
||||
signal, // Pass signal
|
||||
);
|
||||
|
||||
console.debug(
|
||||
`[WebFetchTool] Full response for prompt "${userPrompt.substring(
|
||||
0,
|
||||
50,
|
||||
)}...":`,
|
||||
JSON.stringify(response, null, 2),
|
||||
);
|
||||
|
||||
let responseText = getResponseText(response) || '';
|
||||
const urlContextMeta = response.candidates?.[0]?.urlContextMetadata;
|
||||
const groundingMetadata = response.candidates?.[0]?.groundingMetadata;
|
||||
const sources = groundingMetadata?.groundingChunks as
|
||||
| GroundingChunkItem[]
|
||||
| undefined;
|
||||
const groundingSupports = groundingMetadata?.groundingSupports as
|
||||
| GroundingSupportItem[]
|
||||
| undefined;
|
||||
|
||||
// Error Handling
|
||||
let processingError = false;
|
||||
|
||||
if (
|
||||
urlContextMeta?.urlMetadata &&
|
||||
urlContextMeta.urlMetadata.length > 0
|
||||
) {
|
||||
const allStatuses = urlContextMeta.urlMetadata.map(
|
||||
(m) => m.urlRetrievalStatus,
|
||||
);
|
||||
if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) {
|
||||
processingError = true;
|
||||
}
|
||||
} else if (!responseText.trim() && !sources?.length) {
|
||||
// No URL metadata and no content/sources
|
||||
processingError = true;
|
||||
}
|
||||
|
||||
if (
|
||||
!processingError &&
|
||||
!responseText.trim() &&
|
||||
(!sources || sources.length === 0)
|
||||
) {
|
||||
// Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data.
|
||||
processingError = true;
|
||||
}
|
||||
|
||||
if (processingError) {
|
||||
return this.executeFallback(params, signal);
|
||||
}
|
||||
|
||||
const sourceListFormatted: string[] = [];
|
||||
if (sources && sources.length > 0) {
|
||||
sources.forEach((source: GroundingChunkItem, index: number) => {
|
||||
const title = source.web?.title || 'Untitled';
|
||||
const uri = source.web?.uri || 'Unknown URI'; // Fallback if URI is missing
|
||||
sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`);
|
||||
});
|
||||
|
||||
if (groundingSupports && groundingSupports.length > 0) {
|
||||
const insertions: Array<{ index: number; marker: string }> = [];
|
||||
groundingSupports.forEach((support: GroundingSupportItem) => {
|
||||
if (support.segment && support.groundingChunkIndices) {
|
||||
const citationMarker = support.groundingChunkIndices
|
||||
.map((chunkIndex: number) => `[${chunkIndex + 1}]`)
|
||||
.join('');
|
||||
insertions.push({
|
||||
index: support.segment.endIndex,
|
||||
marker: citationMarker,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
insertions.sort((a, b) => b.index - a.index);
|
||||
const responseChars = responseText.split('');
|
||||
insertions.forEach((insertion) => {
|
||||
responseChars.splice(insertion.index, 0, insertion.marker);
|
||||
});
|
||||
responseText = responseChars.join('');
|
||||
}
|
||||
|
||||
if (sourceListFormatted.length > 0) {
|
||||
responseText += `
|
||||
|
||||
Sources:
|
||||
${sourceListFormatted.join('\n')}`;
|
||||
}
|
||||
}
|
||||
|
||||
const llmContent = responseText;
|
||||
|
||||
console.debug(
|
||||
`[WebFetchTool] Formatted tool response for prompt "${userPrompt}:\n\n":`,
|
||||
llmContent,
|
||||
);
|
||||
|
||||
return {
|
||||
llmContent,
|
||||
returnDisplay: `Content processed from prompt.`,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = `Error processing web content for prompt "${userPrompt.substring(
|
||||
0,
|
||||
50,
|
||||
)}...": ${getErrorMessage(error)}`;
|
||||
console.error(errorMessage, error);
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
};
|
||||
}
|
||||
return this.executeFetch(params, signal);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user