Improvements to web-fetch tool (#1030)

This commit is contained in:
Allen Hutchison
2025-06-13 17:44:14 -07:00
committed by GitHub
parent 8eb505fbba
commit 31b28ade01
11 changed files with 5151 additions and 21 deletions

4583
packages/core/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -30,10 +30,12 @@
"@opentelemetry/instrumentation-http": "^0.52.0",
"@opentelemetry/sdk-node": "^0.52.0",
"@types/glob": "^8.1.0",
"@types/html-to-text": "^9.0.4",
"diff": "^7.0.0",
"dotenv": "^16.4.7",
"glob": "^10.4.5",
"google-auth-library": "^9.11.0",
"html-to-text": "^9.0.5",
"ignore": "^7.0.0",
"micromatch": "^4.0.8",
"open": "^10.1.2",

View File

@@ -222,10 +222,19 @@ export interface ToolMcpConfirmationDetails {
onConfirm: (outcome: ToolConfirmationOutcome) => Promise<void>;
}
export interface ToolInfoConfirmationDetails {
type: 'info';
title: string;
onConfirm: (outcome: ToolConfirmationOutcome) => Promise<void>;
prompt: string;
urls?: string[];
}
export type ToolCallConfirmationDetails =
| ToolEditConfirmationDetails
| ToolExecuteConfirmationDetails
| ToolMcpConfirmationDetails;
| ToolMcpConfirmationDetails
| ToolInfoConfirmationDetails;
export enum ToolConfirmationOutcome {
ProceedOnce = 'proceed_once',

View File

@@ -0,0 +1,86 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi } from 'vitest';
import { WebFetchTool } from './web-fetch.js';
import { Config, ApprovalMode } from '../config/config.js';
import { ToolConfirmationOutcome } from './tools.js';
describe('WebFetchTool', () => {
const mockConfig = {
getApprovalMode: vi.fn(),
setApprovalMode: vi.fn(),
} as unknown as Config;
describe('shouldConfirmExecute', () => {
it('should return confirmation details with the correct prompt and urls', async () => {
const tool = new WebFetchTool(mockConfig);
const params = { prompt: 'fetch https://example.com' };
const confirmationDetails = await tool.shouldConfirmExecute(params);
expect(confirmationDetails).toEqual({
type: 'info',
title: 'Confirm Web Fetch',
prompt: 'fetch https://example.com',
urls: ['https://example.com'],
onConfirm: expect.any(Function),
});
});
it('should convert github urls to raw format', async () => {
const tool = new WebFetchTool(mockConfig);
const params = {
prompt:
'fetch https://github.com/google/gemini-react/blob/main/README.md',
};
const confirmationDetails = await tool.shouldConfirmExecute(params);
expect(confirmationDetails).toEqual({
type: 'info',
title: 'Confirm Web Fetch',
prompt:
'fetch https://github.com/google/gemini-react/blob/main/README.md',
urls: [
'https://raw.githubusercontent.com/google/gemini-react/main/README.md',
],
onConfirm: expect.any(Function),
});
});
it('should return false if approval mode is AUTO_EDIT', async () => {
const tool = new WebFetchTool({
...mockConfig,
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
} as unknown as Config);
const params = { prompt: 'fetch https://example.com' };
const confirmationDetails = await tool.shouldConfirmExecute(params);
expect(confirmationDetails).toBe(false);
});
it('should call setApprovalMode when onConfirm is called with ProceedAlways', async () => {
const setApprovalMode = vi.fn();
const tool = new WebFetchTool({
...mockConfig,
setApprovalMode,
} as unknown as Config);
const params = { prompt: 'fetch https://example.com' };
const confirmationDetails = await tool.shouldConfirmExecute(params);
if (
confirmationDetails &&
typeof confirmationDetails === 'object' &&
'onConfirm' in confirmationDetails
) {
await confirmationDetails.onConfirm(
ToolConfirmationOutcome.ProceedAlways,
);
}
expect(setApprovalMode).toHaveBeenCalledWith(ApprovalMode.AUTO_EDIT);
});
});
});

View File

@@ -6,10 +6,26 @@
import { GroundingMetadata } from '@google/genai';
import { SchemaValidator } from '../utils/schemaValidator.js';
import { BaseTool, ToolResult } from './tools.js';
import {
BaseTool,
ToolResult,
ToolCallConfirmationDetails,
ToolConfirmationOutcome,
} from './tools.js';
import { getErrorMessage } from '../utils/errors.js';
import { Config } from '../config/config.js';
import { Config, ApprovalMode } from '../config/config.js';
import { getResponseText } from '../utils/generateContentResponseUtilities.js';
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
import { convert } from 'html-to-text';
const URL_FETCH_TIMEOUT_MS = 10000;
const MAX_CONTENT_LENGTH = 100000;
// Helper function to extract URLs from a string
function extractUrls(text: string): string[] {
const urlRegex = /(https?:\/\/[^\s]+)/g;
return text.match(urlRegex) || [];
}
// Interfaces for grounding metadata (similar to web-search.ts)
interface GroundingChunkWeb {
@@ -52,7 +68,7 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
super(
WebFetchTool.Name,
'WebFetch',
"Processes content from URL(s) embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter.",
"Processes content from URL(s), including local and private network addresses (e.g., localhost), embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter.",
{
properties: {
prompt: {
@@ -67,6 +83,71 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
);
}
private async executeFallback(
params: WebFetchToolParams,
signal: AbortSignal,
): Promise<ToolResult> {
const urls = extractUrls(params.prompt);
if (urls.length === 0) {
return {
llmContent: 'Error: No URL found in the prompt for fallback.',
returnDisplay: 'Error: No URL found in the prompt for fallback.',
};
}
// For now, we only support one URL for fallback
let url = urls[0];
// Convert GitHub blob URL to raw URL
if (url.includes('github.com') && url.includes('/blob/')) {
url = url
.replace('github.com', 'raw.githubusercontent.com')
.replace('/blob/', '/');
}
try {
const response = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS);
if (!response.ok) {
throw new Error(
`Request failed with status code ${response.status} ${response.statusText}`,
);
}
const html = await response.text();
const textContent = convert(html, {
wordwrap: false,
selectors: [
{ selector: 'a', options: { ignoreHref: true } },
{ selector: 'img', format: 'skip' },
],
}).substring(0, MAX_CONTENT_LENGTH);
const geminiClient = this.config.getGeminiClient();
const fallbackPrompt = `The user requested the following: "${params.prompt}".
I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the user's request. Do not attempt to access the URL again.
---
${textContent}
---`;
const result = await geminiClient.generateContent(
[{ role: 'user', parts: [{ text: fallbackPrompt }] }],
{},
signal,
);
const resultText = getResponseText(result) || '';
return {
llmContent: resultText,
returnDisplay: `Content for ${url} processed using fallback fetch.`,
};
} catch (e) {
const error = e as Error;
const errorMessage = `Error during fallback fetch for ${url}: ${error.message}`;
return {
llmContent: `Error: ${errorMessage}`,
returnDisplay: `Error: ${errorMessage}`,
};
}
}
validateParams(params: WebFetchToolParams): string | null {
if (
this.schema.parameters &&
@@ -97,6 +178,43 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
return `Processing URLs and instructions from prompt: "${displayPrompt}"`;
}
async shouldConfirmExecute(
params: WebFetchToolParams,
): Promise<ToolCallConfirmationDetails | false> {
if (this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT) {
return false;
}
const validationError = this.validateParams(params);
if (validationError) {
return false;
}
// Perform GitHub URL conversion here to differentiate between user-provided
// URL and the actual URL to be fetched.
const urls = extractUrls(params.prompt).map((url) => {
if (url.includes('github.com') && url.includes('/blob/')) {
return url
.replace('github.com', 'raw.githubusercontent.com')
.replace('/blob/', '/');
}
return url;
});
const confirmationDetails: ToolCallConfirmationDetails = {
type: 'info',
title: `Confirm Web Fetch`,
prompt: params.prompt,
urls,
onConfirm: async (outcome: ToolConfirmationOutcome) => {
if (outcome === ToolConfirmationOutcome.ProceedAlways) {
this.config.setApprovalMode(ApprovalMode.AUTO_EDIT);
}
},
};
return confirmationDetails;
}
async execute(
params: WebFetchToolParams,
signal: AbortSignal,
@@ -110,6 +228,14 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
}
const userPrompt = params.prompt;
const urls = extractUrls(userPrompt);
const url = urls[0];
const isPrivate = isPrivateIp(url);
if (isPrivate) {
return this.executeFallback(params, signal);
}
const geminiClient = this.config.getGeminiClient();
try {
@@ -120,7 +246,10 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
);
console.debug(
`[WebFetchTool] Full response for prompt "${userPrompt.substring(0, 50)}...":`,
`[WebFetchTool] Full response for prompt "${userPrompt.substring(
0,
50,
)}...":`,
JSON.stringify(response, null, 2),
);
@@ -138,7 +267,6 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
// Error Handling
let processingError = false;
let errorDetail = 'An unknown error occurred during content processing.';
if (
urlContextMeta?.urlMetadata &&
@@ -149,13 +277,10 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
);
if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) {
processingError = true;
errorDetail = `All URL retrieval attempts failed. Statuses: ${allStatuses.join(', ')}. API reported: "${responseText || 'No additional detail.'}"`;
}
} else if (!responseText.trim() && !sources?.length) {
// No URL metadata and no content/sources
processingError = true;
errorDetail =
'No content was returned and no URL metadata was available to determine fetch status.';
}
if (
@@ -165,16 +290,10 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
) {
// Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data.
processingError = true;
errorDetail =
'URL(s) processed, but no substantive content or grounding information was found.';
}
if (processingError) {
const errorText = `Failed to process prompt and fetch URL data. ${errorDetail}`;
return {
llmContent: `Error: ${errorText}`,
returnDisplay: `Error: ${errorText}`,
};
return this.executeFallback(params, signal);
}
const sourceListFormatted: string[] = [];
@@ -227,7 +346,10 @@ ${sourceListFormatted.join('\n')}`;
returnDisplay: `Content processed from prompt.`,
};
} catch (error: unknown) {
const errorMessage = `Error processing web content for prompt "${userPrompt.substring(0, 50)}...": ${getErrorMessage(error)}`;
const errorMessage = `Error processing web content for prompt "${userPrompt.substring(
0,
50,
)}...": ${getErrorMessage(error)}`;
console.error(errorMessage, error);
return {
llmContent: `Error: ${errorMessage}`,

View File

@@ -0,0 +1,57 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { getErrorMessage, isNodeError } from './errors.js';
import { URL } from 'url';
const PRIVATE_IP_RANGES = [
/^10\./,
/^127\./,
/^172\.(1[6-9]|2[0-9]|3[0-1])\./,
/^192\.168\./,
/^::1$/,
/^fc00:/,
/^fe80:/,
];
export class FetchError extends Error {
constructor(
message: string,
public code?: string,
) {
super(message);
this.name = 'FetchError';
}
}
export function isPrivateIp(url: string): boolean {
try {
const hostname = new URL(url).hostname;
return PRIVATE_IP_RANGES.some((range) => range.test(hostname));
} catch (_e) {
return false;
}
}
export async function fetchWithTimeout(
url: string,
timeout: number,
): Promise<Response> {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
const response = await fetch(url, { signal: controller.signal });
return response;
} catch (error) {
if (isNodeError(error) && error.code === 'ABORT_ERR') {
throw new FetchError(`Request timed out after ${timeout}ms`, 'ETIMEDOUT');
}
throw new FetchError(getErrorMessage(error));
} finally {
clearTimeout(timeoutId);
}
}

View File

@@ -38,8 +38,8 @@ const createDirent = (name: string, type: 'file' | 'dir'): FSDirent => ({
isSymbolicLink: () => false,
isFIFO: () => false,
isSocket: () => false,
parentPath: '',
path: '',
parentPath: '',
});
describe('getFolderStructure', () => {