mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
Improvements to web-fetch tool (#1030)
This commit is contained in:
4583
packages/core/package-lock.json
generated
Normal file
4583
packages/core/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -30,10 +30,12 @@
|
||||
"@opentelemetry/instrumentation-http": "^0.52.0",
|
||||
"@opentelemetry/sdk-node": "^0.52.0",
|
||||
"@types/glob": "^8.1.0",
|
||||
"@types/html-to-text": "^9.0.4",
|
||||
"diff": "^7.0.0",
|
||||
"dotenv": "^16.4.7",
|
||||
"glob": "^10.4.5",
|
||||
"google-auth-library": "^9.11.0",
|
||||
"html-to-text": "^9.0.5",
|
||||
"ignore": "^7.0.0",
|
||||
"micromatch": "^4.0.8",
|
||||
"open": "^10.1.2",
|
||||
|
||||
@@ -222,10 +222,19 @@ export interface ToolMcpConfirmationDetails {
|
||||
onConfirm: (outcome: ToolConfirmationOutcome) => Promise<void>;
|
||||
}
|
||||
|
||||
export interface ToolInfoConfirmationDetails {
|
||||
type: 'info';
|
||||
title: string;
|
||||
onConfirm: (outcome: ToolConfirmationOutcome) => Promise<void>;
|
||||
prompt: string;
|
||||
urls?: string[];
|
||||
}
|
||||
|
||||
export type ToolCallConfirmationDetails =
|
||||
| ToolEditConfirmationDetails
|
||||
| ToolExecuteConfirmationDetails
|
||||
| ToolMcpConfirmationDetails;
|
||||
| ToolMcpConfirmationDetails
|
||||
| ToolInfoConfirmationDetails;
|
||||
|
||||
export enum ToolConfirmationOutcome {
|
||||
ProceedOnce = 'proceed_once',
|
||||
|
||||
86
packages/core/src/tools/web-fetch.test.ts
Normal file
86
packages/core/src/tools/web-fetch.test.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
import { WebFetchTool } from './web-fetch.js';
|
||||
import { Config, ApprovalMode } from '../config/config.js';
|
||||
import { ToolConfirmationOutcome } from './tools.js';
|
||||
|
||||
describe('WebFetchTool', () => {
|
||||
const mockConfig = {
|
||||
getApprovalMode: vi.fn(),
|
||||
setApprovalMode: vi.fn(),
|
||||
} as unknown as Config;
|
||||
|
||||
describe('shouldConfirmExecute', () => {
|
||||
it('should return confirmation details with the correct prompt and urls', async () => {
|
||||
const tool = new WebFetchTool(mockConfig);
|
||||
const params = { prompt: 'fetch https://example.com' };
|
||||
const confirmationDetails = await tool.shouldConfirmExecute(params);
|
||||
|
||||
expect(confirmationDetails).toEqual({
|
||||
type: 'info',
|
||||
title: 'Confirm Web Fetch',
|
||||
prompt: 'fetch https://example.com',
|
||||
urls: ['https://example.com'],
|
||||
onConfirm: expect.any(Function),
|
||||
});
|
||||
});
|
||||
|
||||
it('should convert github urls to raw format', async () => {
|
||||
const tool = new WebFetchTool(mockConfig);
|
||||
const params = {
|
||||
prompt:
|
||||
'fetch https://github.com/google/gemini-react/blob/main/README.md',
|
||||
};
|
||||
const confirmationDetails = await tool.shouldConfirmExecute(params);
|
||||
|
||||
expect(confirmationDetails).toEqual({
|
||||
type: 'info',
|
||||
title: 'Confirm Web Fetch',
|
||||
prompt:
|
||||
'fetch https://github.com/google/gemini-react/blob/main/README.md',
|
||||
urls: [
|
||||
'https://raw.githubusercontent.com/google/gemini-react/main/README.md',
|
||||
],
|
||||
onConfirm: expect.any(Function),
|
||||
});
|
||||
});
|
||||
|
||||
it('should return false if approval mode is AUTO_EDIT', async () => {
|
||||
const tool = new WebFetchTool({
|
||||
...mockConfig,
|
||||
getApprovalMode: () => ApprovalMode.AUTO_EDIT,
|
||||
} as unknown as Config);
|
||||
const params = { prompt: 'fetch https://example.com' };
|
||||
const confirmationDetails = await tool.shouldConfirmExecute(params);
|
||||
|
||||
expect(confirmationDetails).toBe(false);
|
||||
});
|
||||
|
||||
it('should call setApprovalMode when onConfirm is called with ProceedAlways', async () => {
|
||||
const setApprovalMode = vi.fn();
|
||||
const tool = new WebFetchTool({
|
||||
...mockConfig,
|
||||
setApprovalMode,
|
||||
} as unknown as Config);
|
||||
const params = { prompt: 'fetch https://example.com' };
|
||||
const confirmationDetails = await tool.shouldConfirmExecute(params);
|
||||
|
||||
if (
|
||||
confirmationDetails &&
|
||||
typeof confirmationDetails === 'object' &&
|
||||
'onConfirm' in confirmationDetails
|
||||
) {
|
||||
await confirmationDetails.onConfirm(
|
||||
ToolConfirmationOutcome.ProceedAlways,
|
||||
);
|
||||
}
|
||||
|
||||
expect(setApprovalMode).toHaveBeenCalledWith(ApprovalMode.AUTO_EDIT);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -6,10 +6,26 @@
|
||||
|
||||
import { GroundingMetadata } from '@google/genai';
|
||||
import { SchemaValidator } from '../utils/schemaValidator.js';
|
||||
import { BaseTool, ToolResult } from './tools.js';
|
||||
import {
|
||||
BaseTool,
|
||||
ToolResult,
|
||||
ToolCallConfirmationDetails,
|
||||
ToolConfirmationOutcome,
|
||||
} from './tools.js';
|
||||
import { getErrorMessage } from '../utils/errors.js';
|
||||
import { Config } from '../config/config.js';
|
||||
import { Config, ApprovalMode } from '../config/config.js';
|
||||
import { getResponseText } from '../utils/generateContentResponseUtilities.js';
|
||||
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
|
||||
import { convert } from 'html-to-text';
|
||||
|
||||
const URL_FETCH_TIMEOUT_MS = 10000;
|
||||
const MAX_CONTENT_LENGTH = 100000;
|
||||
|
||||
// Helper function to extract URLs from a string
|
||||
function extractUrls(text: string): string[] {
|
||||
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
||||
return text.match(urlRegex) || [];
|
||||
}
|
||||
|
||||
// Interfaces for grounding metadata (similar to web-search.ts)
|
||||
interface GroundingChunkWeb {
|
||||
@@ -52,7 +68,7 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
super(
|
||||
WebFetchTool.Name,
|
||||
'WebFetch',
|
||||
"Processes content from URL(s) embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter.",
|
||||
"Processes content from URL(s), including local and private network addresses (e.g., localhost), embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter.",
|
||||
{
|
||||
properties: {
|
||||
prompt: {
|
||||
@@ -67,6 +83,71 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
);
|
||||
}
|
||||
|
||||
private async executeFallback(
|
||||
params: WebFetchToolParams,
|
||||
signal: AbortSignal,
|
||||
): Promise<ToolResult> {
|
||||
const urls = extractUrls(params.prompt);
|
||||
if (urls.length === 0) {
|
||||
return {
|
||||
llmContent: 'Error: No URL found in the prompt for fallback.',
|
||||
returnDisplay: 'Error: No URL found in the prompt for fallback.',
|
||||
};
|
||||
}
|
||||
// For now, we only support one URL for fallback
|
||||
let url = urls[0];
|
||||
|
||||
// Convert GitHub blob URL to raw URL
|
||||
if (url.includes('github.com') && url.includes('/blob/')) {
|
||||
url = url
|
||||
.replace('github.com', 'raw.githubusercontent.com')
|
||||
.replace('/blob/', '/');
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS);
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Request failed with status code ${response.status} ${response.statusText}`,
|
||||
);
|
||||
}
|
||||
const html = await response.text();
|
||||
const textContent = convert(html, {
|
||||
wordwrap: false,
|
||||
selectors: [
|
||||
{ selector: 'a', options: { ignoreHref: true } },
|
||||
{ selector: 'img', format: 'skip' },
|
||||
],
|
||||
}).substring(0, MAX_CONTENT_LENGTH);
|
||||
|
||||
const geminiClient = this.config.getGeminiClient();
|
||||
const fallbackPrompt = `The user requested the following: "${params.prompt}".
|
||||
|
||||
I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the user's request. Do not attempt to access the URL again.
|
||||
|
||||
---
|
||||
${textContent}
|
||||
---`;
|
||||
const result = await geminiClient.generateContent(
|
||||
[{ role: 'user', parts: [{ text: fallbackPrompt }] }],
|
||||
{},
|
||||
signal,
|
||||
);
|
||||
const resultText = getResponseText(result) || '';
|
||||
return {
|
||||
llmContent: resultText,
|
||||
returnDisplay: `Content for ${url} processed using fallback fetch.`,
|
||||
};
|
||||
} catch (e) {
|
||||
const error = e as Error;
|
||||
const errorMessage = `Error during fallback fetch for ${url}: ${error.message}`;
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
returnDisplay: `Error: ${errorMessage}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
validateParams(params: WebFetchToolParams): string | null {
|
||||
if (
|
||||
this.schema.parameters &&
|
||||
@@ -97,6 +178,43 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
return `Processing URLs and instructions from prompt: "${displayPrompt}"`;
|
||||
}
|
||||
|
||||
async shouldConfirmExecute(
|
||||
params: WebFetchToolParams,
|
||||
): Promise<ToolCallConfirmationDetails | false> {
|
||||
if (this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const validationError = this.validateParams(params);
|
||||
if (validationError) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Perform GitHub URL conversion here to differentiate between user-provided
|
||||
// URL and the actual URL to be fetched.
|
||||
const urls = extractUrls(params.prompt).map((url) => {
|
||||
if (url.includes('github.com') && url.includes('/blob/')) {
|
||||
return url
|
||||
.replace('github.com', 'raw.githubusercontent.com')
|
||||
.replace('/blob/', '/');
|
||||
}
|
||||
return url;
|
||||
});
|
||||
|
||||
const confirmationDetails: ToolCallConfirmationDetails = {
|
||||
type: 'info',
|
||||
title: `Confirm Web Fetch`,
|
||||
prompt: params.prompt,
|
||||
urls,
|
||||
onConfirm: async (outcome: ToolConfirmationOutcome) => {
|
||||
if (outcome === ToolConfirmationOutcome.ProceedAlways) {
|
||||
this.config.setApprovalMode(ApprovalMode.AUTO_EDIT);
|
||||
}
|
||||
},
|
||||
};
|
||||
return confirmationDetails;
|
||||
}
|
||||
|
||||
async execute(
|
||||
params: WebFetchToolParams,
|
||||
signal: AbortSignal,
|
||||
@@ -110,6 +228,14 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
}
|
||||
|
||||
const userPrompt = params.prompt;
|
||||
const urls = extractUrls(userPrompt);
|
||||
const url = urls[0];
|
||||
const isPrivate = isPrivateIp(url);
|
||||
|
||||
if (isPrivate) {
|
||||
return this.executeFallback(params, signal);
|
||||
}
|
||||
|
||||
const geminiClient = this.config.getGeminiClient();
|
||||
|
||||
try {
|
||||
@@ -120,7 +246,10 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
);
|
||||
|
||||
console.debug(
|
||||
`[WebFetchTool] Full response for prompt "${userPrompt.substring(0, 50)}...":`,
|
||||
`[WebFetchTool] Full response for prompt "${userPrompt.substring(
|
||||
0,
|
||||
50,
|
||||
)}...":`,
|
||||
JSON.stringify(response, null, 2),
|
||||
);
|
||||
|
||||
@@ -138,7 +267,6 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
|
||||
// Error Handling
|
||||
let processingError = false;
|
||||
let errorDetail = 'An unknown error occurred during content processing.';
|
||||
|
||||
if (
|
||||
urlContextMeta?.urlMetadata &&
|
||||
@@ -149,13 +277,10 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
);
|
||||
if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) {
|
||||
processingError = true;
|
||||
errorDetail = `All URL retrieval attempts failed. Statuses: ${allStatuses.join(', ')}. API reported: "${responseText || 'No additional detail.'}"`;
|
||||
}
|
||||
} else if (!responseText.trim() && !sources?.length) {
|
||||
// No URL metadata and no content/sources
|
||||
processingError = true;
|
||||
errorDetail =
|
||||
'No content was returned and no URL metadata was available to determine fetch status.';
|
||||
}
|
||||
|
||||
if (
|
||||
@@ -165,16 +290,10 @@ export class WebFetchTool extends BaseTool<WebFetchToolParams, ToolResult> {
|
||||
) {
|
||||
// Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data.
|
||||
processingError = true;
|
||||
errorDetail =
|
||||
'URL(s) processed, but no substantive content or grounding information was found.';
|
||||
}
|
||||
|
||||
if (processingError) {
|
||||
const errorText = `Failed to process prompt and fetch URL data. ${errorDetail}`;
|
||||
return {
|
||||
llmContent: `Error: ${errorText}`,
|
||||
returnDisplay: `Error: ${errorText}`,
|
||||
};
|
||||
return this.executeFallback(params, signal);
|
||||
}
|
||||
|
||||
const sourceListFormatted: string[] = [];
|
||||
@@ -227,7 +346,10 @@ ${sourceListFormatted.join('\n')}`;
|
||||
returnDisplay: `Content processed from prompt.`,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = `Error processing web content for prompt "${userPrompt.substring(0, 50)}...": ${getErrorMessage(error)}`;
|
||||
const errorMessage = `Error processing web content for prompt "${userPrompt.substring(
|
||||
0,
|
||||
50,
|
||||
)}...": ${getErrorMessage(error)}`;
|
||||
console.error(errorMessage, error);
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
|
||||
57
packages/core/src/utils/fetch.ts
Normal file
57
packages/core/src/utils/fetch.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { getErrorMessage, isNodeError } from './errors.js';
|
||||
import { URL } from 'url';
|
||||
|
||||
const PRIVATE_IP_RANGES = [
|
||||
/^10\./,
|
||||
/^127\./,
|
||||
/^172\.(1[6-9]|2[0-9]|3[0-1])\./,
|
||||
/^192\.168\./,
|
||||
/^::1$/,
|
||||
/^fc00:/,
|
||||
/^fe80:/,
|
||||
];
|
||||
|
||||
export class FetchError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public code?: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = 'FetchError';
|
||||
}
|
||||
}
|
||||
|
||||
export function isPrivateIp(url: string): boolean {
|
||||
try {
|
||||
const hostname = new URL(url).hostname;
|
||||
return PRIVATE_IP_RANGES.some((range) => range.test(hostname));
|
||||
} catch (_e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function fetchWithTimeout(
|
||||
url: string,
|
||||
timeout: number,
|
||||
): Promise<Response> {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(url, { signal: controller.signal });
|
||||
return response;
|
||||
} catch (error) {
|
||||
if (isNodeError(error) && error.code === 'ABORT_ERR') {
|
||||
throw new FetchError(`Request timed out after ${timeout}ms`, 'ETIMEDOUT');
|
||||
}
|
||||
throw new FetchError(getErrorMessage(error));
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
@@ -38,8 +38,8 @@ const createDirent = (name: string, type: 'file' | 'dir'): FSDirent => ({
|
||||
isSymbolicLink: () => false,
|
||||
isFIFO: () => false,
|
||||
isSocket: () => false,
|
||||
parentPath: '',
|
||||
path: '',
|
||||
parentPath: '',
|
||||
});
|
||||
|
||||
describe('getFolderStructure', () => {
|
||||
|
||||
Reference in New Issue
Block a user