feat: refactor web-fetch tool to remove google genai dependency

2025-12-19 09:33:53 +00:00 · 2025-08-15 17:06:00 +08:00
parent 3e082ae89a
commit 5d4a9452d8
3 changed files with 78 additions and 207 deletions
--- a/packages/core/src/tools/web-fetch.test.ts
+++ b/packages/core/src/tools/web-fetch.test.ts
@@ -19,13 +19,17 @@ describe('WebFetchTool', () => {
  describe('shouldConfirmExecute', () => {
    it('should return confirmation details with the correct prompt and urls', async () => {
      const tool = new WebFetchTool(mockConfig);
-      const params = { prompt: 'fetch https://example.com' };
+      const params = {
+        url: 'https://example.com',
+        prompt: 'summarize this page',
+      };
      const confirmationDetails = await tool.shouldConfirmExecute(params);

      expect(confirmationDetails).toEqual({
        type: 'info',
        title: 'Confirm Web Fetch',
-        prompt: 'fetch https://example.com',
+        prompt:
+          'Fetch content from https://example.com and process with: summarize this page',
        urls: ['https://example.com'],
        onConfirm: expect.any(Function),
      });
@@ -34,8 +38,8 @@ describe('WebFetchTool', () => {
    it('should convert github urls to raw format', async () => {
      const tool = new WebFetchTool(mockConfig);
      const params = {
-        prompt:
-          'fetch https://github.com/google/gemini-react/blob/main/README.md',
+        url: 'https://github.com/google/gemini-react/blob/main/README.md',
+        prompt: 'summarize the README',
      };
      const confirmationDetails = await tool.shouldConfirmExecute(params);

@@ -43,7 +47,7 @@ describe('WebFetchTool', () => {
        type: 'info',
        title: 'Confirm Web Fetch',
        prompt:
-          'fetch https://github.com/google/gemini-react/blob/main/README.md',
+          'Fetch content from https://github.com/google/gemini-react/blob/main/README.md and process with: summarize the README',
        urls: [
          'https://raw.githubusercontent.com/google/gemini-react/main/README.md',
        ],
@@ -56,7 +60,10 @@ describe('WebFetchTool', () => {
        ...mockConfig,
        getApprovalMode: () => ApprovalMode.AUTO_EDIT,
      } as unknown as Config);
-      const params = { prompt: 'fetch https://example.com' };
+      const params = {
+        url: 'https://example.com',
+        prompt: 'summarize this page',
+      };
      const confirmationDetails = await tool.shouldConfirmExecute(params);

      expect(confirmationDetails).toBe(false);
@@ -68,7 +75,10 @@ describe('WebFetchTool', () => {
        ...mockConfig,
        setApprovalMode,
      } as unknown as Config);
-      const params = { prompt: 'fetch https://example.com' };
+      const params = {
+        url: 'https://example.com',
+        prompt: 'summarize this page',
+      };
      const confirmationDetails = await tool.shouldConfirmExecute(params);

      if (
--- a/packages/core/src/tools/web-fetch.ts
+++ b/packages/core/src/tools/web-fetch.ts
@@ -13,49 +13,25 @@ import {
  Icon,
 } from './tools.js';
 import { Type } from '@google/genai';
-import { getErrorMessage } from '../utils/errors.js';
 import { Config, ApprovalMode } from '../config/config.js';
 import { getResponseText } from '../utils/generateContentResponseUtilities.js';
-import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
+import { fetchWithTimeout } from '../utils/fetch.js';
 import { convert } from 'html-to-text';
 import { ProxyAgent, setGlobalDispatcher } from 'undici';

 const URL_FETCH_TIMEOUT_MS = 10000;
 const MAX_CONTENT_LENGTH = 100000;

-// Helper function to extract URLs from a string
-function extractUrls(text: string): string[] {
-  const urlRegex = /(https?:\/\/[^\s]+)/g;
-  return text.match(urlRegex) || [];
-}
-
-// Interfaces for grounding metadata (similar to web-search.ts)
-interface GroundingChunkWeb {
-  uri?: string;
-  title?: string;
-}
-
-interface GroundingChunkItem {
-  web?: GroundingChunkWeb;
-}
-
-interface GroundingSupportSegment {
-  startIndex: number;
-  endIndex: number;
-  text?: string;
-}
-
-interface GroundingSupportItem {
-  segment?: GroundingSupportSegment;
-  groundingChunkIndices?: number[];
-}
-
 /**
 * Parameters for the WebFetch tool
 */
 export interface WebFetchToolParams {
  /**
-   * The prompt containing URL(s) (up to 20) and instructions for processing their content.
+   * The URL to fetch content from
+   */
+  url: string;
+  /**
+   * The prompt to run on the fetched content
   */
  prompt: string;
 }
@@ -70,17 +46,20 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
    super(
      WebFetchTool.Name,
      'WebFetch',
-      "Processes content from URL(s), including local and private network addresses (e.g., localhost), embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter.",
+      'Fetches content from a specified URL and processes it using an AI model\n- Takes a URL and a prompt as input\n- Fetches the URL content, converts HTML to markdown\n- Processes the content with the prompt using a small, fast model\n- Returns the model\'s response about the content\n- Use this tool when you need to retrieve and analyze web content\n\nUsage notes:\n  - IMPORTANT: If an MCP-provided web fetch tool is available, prefer using that tool instead of this one, as it may have fewer restrictions. All MCP-provided tools start with "mcp__".\n  - The URL must be a fully-formed valid URL\n  - HTTP URLs will be automatically upgraded to HTTPS\n  - The prompt should describe what information you want to extract from the page\n  - This tool is read-only and does not modify any files\n  - Results may be summarized if the content is very large',
      Icon.Globe,
      {
        properties: {
+          url: {
+            description: 'The URL to fetch content from',
+            type: Type.STRING,
+          },
          prompt: {
-            description:
-              'A comprehensive prompt that includes the URL(s) (up to 20) to fetch and specific instructions on how to process their content (e.g., "Summarize https://example.com/article and extract key points from https://another.com/data"). Must contain as least one URL starting with http:// or https://.',
+            description: 'The prompt to run on the fetched content',
            type: Type.STRING,
          },
        },
-        required: ['prompt'],
+        required: ['url', 'prompt'],
        type: Type.OBJECT,
      },
    );
@@ -90,19 +69,11 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
    }
  }

-  private async executeFallback(
+  private async executeFetch(
    params: WebFetchToolParams,
    signal: AbortSignal,
  ): Promise<ToolResult> {
-    const urls = extractUrls(params.prompt);
-    if (urls.length === 0) {
-      return {
-        llmContent: 'Error: No URL found in the prompt for fallback.',
-        returnDisplay: 'Error: No URL found in the prompt for fallback.',
-      };
-    }
-    // For now, we only support one URL for fallback
-    let url = urls[0];
+    let url = params.url;

    // Convert GitHub blob URL to raw URL
    if (url.includes('github.com') && url.includes('/blob/')) {
@@ -111,6 +82,11 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
        .replace('/blob/', '/');
    }

+    // Upgrade HTTP to HTTPS
+    if (url.startsWith('http://')) {
+      url = url.replace('http://', 'https://');
+    }
+
    try {
      const response = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS);
      if (!response.ok) {
@@ -130,7 +106,7 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
      const geminiClient = this.config.getGeminiClient();
      const fallbackPrompt = `The user requested the following: "${params.prompt}".

-I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the user's request. Do not attempt to access the URL again.
+I have fetched the content from ${params.url}. Please use the following content to answer the user's request.

 ---
 ${textContent}
@@ -143,11 +119,11 @@ ${textContent}
      const resultText = getResponseText(result) || '';
      return {
        llmContent: resultText,
-        returnDisplay: `Content for ${url} processed using fallback fetch.`,
+        returnDisplay: `Content from ${params.url} processed successfully.`,
      };
    } catch (e) {
      const error = e as Error;
-      const errorMessage = `Error during fallback fetch for ${url}: ${error.message}`;
+      const errorMessage = `Error during fetch for ${url}: ${error.message}`;
      return {
        llmContent: `Error: ${errorMessage}`,
        returnDisplay: `Error: ${errorMessage}`,
@@ -160,14 +136,17 @@ ${textContent}
    if (errors) {
      return errors;
    }
-    if (!params.prompt || params.prompt.trim() === '') {
-      return "The 'prompt' parameter cannot be empty and must contain URL(s) and instructions.";
+    if (!params.url || params.url.trim() === '') {
+      return "The 'url' parameter cannot be empty.";
    }
    if (
-      !params.prompt.includes('http://') &&
-      !params.prompt.includes('https://')
+      !params.url.startsWith('http://') &&
+      !params.url.startsWith('https://')
    ) {
-      return "The 'prompt' must contain at least one valid URL (starting with http:// or https://).";
+      return "The 'url' must be a valid URL starting with http:// or https://.";
+    }
+    if (!params.prompt || params.prompt.trim() === '') {
+      return "The 'prompt' parameter cannot be empty.";
    }
    return null;
  }
@@ -177,7 +156,7 @@ ${textContent}
      params.prompt.length > 100
        ? params.prompt.substring(0, 97) + '...'
        : params.prompt;
-    return `Processing URLs and instructions from prompt: "${displayPrompt}"`;
+    return `Fetching content from ${params.url} and processing with prompt: "${displayPrompt}"`;
  }

  async shouldConfirmExecute(
@@ -194,20 +173,18 @@ ${textContent}

    // Perform GitHub URL conversion here to differentiate between user-provided
    // URL and the actual URL to be fetched.
-    const urls = extractUrls(params.prompt).map((url) => {
-      if (url.includes('github.com') && url.includes('/blob/')) {
-        return url
-          .replace('github.com', 'raw.githubusercontent.com')
-          .replace('/blob/', '/');
-      }
-      return url;
-    });
+    let url = params.url;
+    if (url.includes('github.com') && url.includes('/blob/')) {
+      url = url
+        .replace('github.com', 'raw.githubusercontent.com')
+        .replace('/blob/', '/');
+    }

    const confirmationDetails: ToolCallConfirmationDetails = {
      type: 'info',
      title: `Confirm Web Fetch`,
-      prompt: params.prompt,
-      urls,
+      prompt: `Fetch content from ${params.url} and process with: ${params.prompt}`,
+      urls: [url],
      onConfirm: async (outcome: ToolConfirmationOutcome) => {
        if (outcome === ToolConfirmationOutcome.ProceedAlways) {
          this.config.setApprovalMode(ApprovalMode.AUTO_EDIT);
@@ -229,132 +206,6 @@ ${textContent}
      };
    }

-    const userPrompt = params.prompt;
-    const urls = extractUrls(userPrompt);
-    const url = urls[0];
-    const isPrivate = isPrivateIp(url);
-
-    if (isPrivate) {
-      return this.executeFallback(params, signal);
-    }
-
-    const geminiClient = this.config.getGeminiClient();
-
-    try {
-      const response = await geminiClient.generateContent(
-        [{ role: 'user', parts: [{ text: userPrompt }] }],
-        { tools: [{ urlContext: {} }] },
-        signal, // Pass signal
-      );
-
-      console.debug(
-        `[WebFetchTool] Full response for prompt "${userPrompt.substring(
-          0,
-          50,
-        )}...":`,
-        JSON.stringify(response, null, 2),
-      );
-
-      let responseText = getResponseText(response) || '';
-      const urlContextMeta = response.candidates?.[0]?.urlContextMetadata;
-      const groundingMetadata = response.candidates?.[0]?.groundingMetadata;
-      const sources = groundingMetadata?.groundingChunks as
-        | GroundingChunkItem[]
-        | undefined;
-      const groundingSupports = groundingMetadata?.groundingSupports as
-        | GroundingSupportItem[]
-        | undefined;
-
-      // Error Handling
-      let processingError = false;
-
-      if (
-        urlContextMeta?.urlMetadata &&
-        urlContextMeta.urlMetadata.length > 0
-      ) {
-        const allStatuses = urlContextMeta.urlMetadata.map(
-          (m) => m.urlRetrievalStatus,
-        );
-        if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) {
-          processingError = true;
-        }
-      } else if (!responseText.trim() && !sources?.length) {
-        // No URL metadata and no content/sources
-        processingError = true;
-      }
-
-      if (
-        !processingError &&
-        !responseText.trim() &&
-        (!sources || sources.length === 0)
-      ) {
-        // Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data.
-        processingError = true;
-      }
-
-      if (processingError) {
-        return this.executeFallback(params, signal);
-      }
-
-      const sourceListFormatted: string[] = [];
-      if (sources && sources.length > 0) {
-        sources.forEach((source: GroundingChunkItem, index: number) => {
-          const title = source.web?.title || 'Untitled';
-          const uri = source.web?.uri || 'Unknown URI'; // Fallback if URI is missing
-          sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`);
-        });
-
-        if (groundingSupports && groundingSupports.length > 0) {
-          const insertions: Array<{ index: number; marker: string }> = [];
-          groundingSupports.forEach((support: GroundingSupportItem) => {
-            if (support.segment && support.groundingChunkIndices) {
-              const citationMarker = support.groundingChunkIndices
-                .map((chunkIndex: number) => `[${chunkIndex + 1}]`)
-                .join('');
-              insertions.push({
-                index: support.segment.endIndex,
-                marker: citationMarker,
-              });
-            }
-          });
-
-          insertions.sort((a, b) => b.index - a.index);
-          const responseChars = responseText.split('');
-          insertions.forEach((insertion) => {
-            responseChars.splice(insertion.index, 0, insertion.marker);
-          });
-          responseText = responseChars.join('');
-        }
-
-        if (sourceListFormatted.length > 0) {
-          responseText += `
-
-Sources:
-${sourceListFormatted.join('\n')}`;
-        }
-      }
-
-      const llmContent = responseText;
-
-      console.debug(
-        `[WebFetchTool] Formatted tool response for prompt "${userPrompt}:\n\n":`,
-        llmContent,
-      );
-
-      return {
-        llmContent,
-        returnDisplay: `Content processed from prompt.`,
-      };
-    } catch (error: unknown) {
-      const errorMessage = `Error processing web content for prompt "${userPrompt.substring(
-        0,
-        50,
-      )}...": ${getErrorMessage(error)}`;
-      console.error(errorMessage, error);
-      return {
-        llmContent: `Error: ${errorMessage}`,
-        returnDisplay: `Error: ${errorMessage}`,
-      };
-    }
+    return this.executeFetch(params, signal);
  }
 }