mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
feat: replace google web search with tavily web search (#329)
This commit is contained in:
@@ -268,6 +268,11 @@ In addition to a project settings file, a project's `.gemini` directory can cont
|
||||
"loadMemoryFromIncludeDirectories": true
|
||||
```
|
||||
|
||||
- **`tavilyApiKey`** (string):
|
||||
- **Description:** API key for Tavily web search service. Required to enable the `web_search` tool functionality. If not configured, the web search tool will be disabled and skipped.
|
||||
- **Default:** `undefined` (web search disabled)
|
||||
- **Example:** `"tavilyApiKey": "tvly-your-api-key-here"`
|
||||
|
||||
### Example `settings.json`:
|
||||
|
||||
```json
|
||||
@@ -276,6 +281,7 @@ In addition to a project settings file, a project's `.gemini` directory can cont
|
||||
"sandbox": "docker",
|
||||
"toolDiscoveryCommand": "bin/get_tools",
|
||||
"toolCallCommand": "bin/call_tool",
|
||||
"tavilyApiKey": "$TAVILY_API_KEY",
|
||||
"mcpServers": {
|
||||
"mainServer": {
|
||||
"command": "bin/mcp_server.py"
|
||||
@@ -373,6 +379,11 @@ The CLI automatically loads environment variables from an `.env` file. The loadi
|
||||
- **`CODE_ASSIST_ENDPOINT`**:
|
||||
- Specifies the endpoint for the code assist server.
|
||||
- This is useful for development and testing.
|
||||
- **`TAVILY_API_KEY`**:
|
||||
- Your API key for the Tavily web search service.
|
||||
- Required to enable the `web_search` tool functionality.
|
||||
- If not configured, the web search tool will be disabled and skipped.
|
||||
- Example: `export TAVILY_API_KEY="tvly-your-api-key-here"`
|
||||
|
||||
## Command-Line Arguments
|
||||
|
||||
@@ -430,6 +441,9 @@ Arguments passed directly when running the CLI can override other configurations
|
||||
- Displays the version of the CLI.
|
||||
- **`--openai-logging`**:
|
||||
- Enables logging of OpenAI API calls for debugging and analysis. This flag overrides the `enableOpenAILogging` setting in `settings.json`.
|
||||
- **`--tavily-api-key <api_key>`**:
|
||||
- Sets the Tavily API key for web search functionality for this session.
|
||||
- Example: `gemini --tavily-api-key tvly-your-api-key-here`
|
||||
|
||||
## Context Files (Hierarchical Instructional Context)
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ This documentation is organized into the following sections:
|
||||
- **[Multi-File Read Tool](./tools/multi-file.md):** Documentation for the `read_many_files` tool.
|
||||
- **[Shell Tool](./tools/shell.md):** Documentation for the `run_shell_command` tool.
|
||||
- **[Web Fetch Tool](./tools/web-fetch.md):** Documentation for the `web_fetch` tool.
|
||||
- **[Web Search Tool](./tools/web-search.md):** Documentation for the `google_web_search` tool.
|
||||
- **[Web Search Tool](./tools/web-search.md):** Documentation for the `web_search` tool.
|
||||
- **[Memory Tool](./tools/memory.md):** Documentation for the `save_memory` tool.
|
||||
- **[Contributing & Development Guide](../CONTRIBUTING.md):** Information for contributors and developers, including setup, building, testing, and coding conventions.
|
||||
- **[NPM Workspaces and Publishing](./npm.md):** Details on how the project's packages are managed and published.
|
||||
|
||||
@@ -1,36 +1,43 @@
|
||||
# Web Search Tool (`google_web_search`)
|
||||
# Web Search Tool (`web_search`)
|
||||
|
||||
This document describes the `google_web_search` tool.
|
||||
This document describes the `web_search` tool.
|
||||
|
||||
## Description
|
||||
|
||||
Use `google_web_search` to perform a web search using Google Search via the Gemini API. The `google_web_search` tool returns a summary of web results with sources.
|
||||
Use `web_search` to perform a web search using the Tavily API. The tool returns a concise answer with sources when possible.
|
||||
|
||||
### Arguments
|
||||
|
||||
`google_web_search` takes one argument:
|
||||
`web_search` takes one argument:
|
||||
|
||||
- `query` (string, required): The search query.
|
||||
|
||||
## How to use `google_web_search` with the Gemini CLI
|
||||
## How to use `web_search`
|
||||
|
||||
The `google_web_search` tool sends a query to the Gemini API, which then performs a web search. `google_web_search` will return a generated response based on the search results, including citations and sources.
|
||||
`web_search` calls the Tavily API directly. You must configure the `TAVILY_API_KEY` through one of the following methods:
|
||||
|
||||
1. **Settings file**: Add `"tavilyApiKey": "your-key-here"` to your `settings.json`
|
||||
2. **Environment variable**: Set `TAVILY_API_KEY` in your environment or `.env` file
|
||||
3. **Command line**: Use `--tavily-api-key your-key-here` when running the CLI
|
||||
|
||||
If the key is not configured, the tool will be disabled and skipped.
|
||||
|
||||
Usage:
|
||||
|
||||
```
|
||||
google_web_search(query="Your query goes here.")
|
||||
web_search(query="Your query goes here.")
|
||||
```
|
||||
|
||||
## `google_web_search` examples
|
||||
## `web_search` examples
|
||||
|
||||
Get information on a topic:
|
||||
|
||||
```
|
||||
google_web_search(query="latest advancements in AI-powered code generation")
|
||||
web_search(query="latest advancements in AI-powered code generation")
|
||||
```
|
||||
|
||||
## Important notes
|
||||
|
||||
- **Response returned:** The `google_web_search` tool returns a processed summary, not a raw list of search results.
|
||||
- **Citations:** The response includes citations to the sources used to generate the summary.
|
||||
- **Response returned:** The `web_search` tool returns a concise answer when available, with a list of source links.
|
||||
- **Citations:** Source links are appended as a numbered list.
|
||||
- **API key:** Configure `TAVILY_API_KEY` via settings.json, environment variables, .env files, or command line arguments. If not configured, the tool is not registered.
|
||||
|
||||
@@ -9,6 +9,11 @@ import { strict as assert } from 'assert';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
|
||||
test('should be able to search the web', async () => {
|
||||
// Skip if Tavily key is not configured
|
||||
if (!process.env.TAVILY_API_KEY) {
|
||||
console.warn('Skipping web search test: TAVILY_API_KEY not set');
|
||||
return;
|
||||
}
|
||||
const rig = new TestRig();
|
||||
await rig.setup('should be able to search the web');
|
||||
|
||||
@@ -27,7 +32,7 @@ test('should be able to search the web', async () => {
|
||||
throw error; // Re-throw if not a network error
|
||||
}
|
||||
|
||||
const foundToolCall = await rig.waitForToolCall('google_web_search');
|
||||
const foundToolCall = await rig.waitForToolCall('web_search');
|
||||
|
||||
// Add debugging information
|
||||
if (!foundToolCall) {
|
||||
@@ -35,12 +40,11 @@ test('should be able to search the web', async () => {
|
||||
|
||||
// Check if the tool call failed due to network issues
|
||||
const failedSearchCalls = allTools.filter(
|
||||
(t) =>
|
||||
t.toolRequest.name === 'google_web_search' && !t.toolRequest.success,
|
||||
(t) => t.toolRequest.name === 'web_search' && !t.toolRequest.success,
|
||||
);
|
||||
if (failedSearchCalls.length > 0) {
|
||||
console.warn(
|
||||
'google_web_search tool was called but failed, possibly due to network issues',
|
||||
'web_search tool was called but failed, possibly due to network issues',
|
||||
);
|
||||
console.warn(
|
||||
'Failed calls:',
|
||||
@@ -50,20 +54,20 @@ test('should be able to search the web', async () => {
|
||||
}
|
||||
}
|
||||
|
||||
assert.ok(foundToolCall, 'Expected to find a call to google_web_search');
|
||||
assert.ok(foundToolCall, 'Expected to find a call to web_search');
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
const hasExpectedContent = validateModelOutput(
|
||||
result,
|
||||
['weather', 'london'],
|
||||
'Google web search test',
|
||||
'Web search test',
|
||||
);
|
||||
|
||||
// If content was missing, log the search queries used
|
||||
if (!hasExpectedContent) {
|
||||
const searchCalls = rig
|
||||
.readToolLogs()
|
||||
.filter((t) => t.toolRequest.name === 'google_web_search');
|
||||
.filter((t) => t.toolRequest.name === 'web_search');
|
||||
if (searchCalls.length > 0) {
|
||||
console.warn(
|
||||
'Search queries used:',
|
||||
@@ -69,6 +69,7 @@ export interface CliArgs {
|
||||
proxy: string | undefined;
|
||||
includeDirectories: string[] | undefined;
|
||||
loadMemoryFromIncludeDirectories: boolean | undefined;
|
||||
tavilyApiKey: string | undefined;
|
||||
}
|
||||
|
||||
export async function parseArguments(): Promise<CliArgs> {
|
||||
@@ -215,6 +216,10 @@ export async function parseArguments(): Promise<CliArgs> {
|
||||
type: 'string',
|
||||
description: 'OpenAI base URL (for custom endpoints)',
|
||||
})
|
||||
.option('tavily-api-key', {
|
||||
type: 'string',
|
||||
description: 'Tavily API key for web search functionality',
|
||||
})
|
||||
.option('proxy', {
|
||||
type: 'string',
|
||||
description:
|
||||
@@ -334,6 +339,11 @@ export async function loadCliConfig(
|
||||
process.env.OPENAI_BASE_URL = argv.openaiBaseUrl;
|
||||
}
|
||||
|
||||
// Handle Tavily API key from command line
|
||||
if (argv.tavilyApiKey) {
|
||||
process.env.TAVILY_API_KEY = argv.tavilyApiKey;
|
||||
}
|
||||
|
||||
// Set the context filename in the server's memoryTool module BEFORE loading memory
|
||||
// TODO(b/343434939): This is a bit of a hack. The contextFileName should ideally be passed
|
||||
// directly to the Config constructor in core, and have core handle setGeminiMdFilename.
|
||||
@@ -513,6 +523,8 @@ export async function loadCliConfig(
|
||||
],
|
||||
contentGenerator: settings.contentGenerator,
|
||||
cliVersion,
|
||||
tavilyApiKey:
|
||||
argv.tavilyApiKey || settings.tavilyApiKey || process.env.TAVILY_API_KEY,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -147,6 +147,9 @@ export interface Settings {
|
||||
includeDirectories?: string[];
|
||||
|
||||
loadMemoryFromIncludeDirectories?: boolean;
|
||||
|
||||
// Web search API keys
|
||||
tavilyApiKey?: string;
|
||||
}
|
||||
|
||||
export interface SettingsError {
|
||||
|
||||
@@ -562,6 +562,10 @@ export async function start_sandbox(
|
||||
if (process.env.OPENAI_API_KEY) {
|
||||
args.push('--env', `OPENAI_API_KEY=${process.env.OPENAI_API_KEY}`);
|
||||
}
|
||||
// copy TAVILY_API_KEY for web search tool
|
||||
if (process.env.TAVILY_API_KEY) {
|
||||
args.push('--env', `TAVILY_API_KEY=${process.env.TAVILY_API_KEY}`);
|
||||
}
|
||||
if (process.env.OPENAI_BASE_URL) {
|
||||
args.push('--env', `OPENAI_BASE_URL=${process.env.OPENAI_BASE_URL}`);
|
||||
}
|
||||
|
||||
@@ -211,6 +211,8 @@ export interface ConfigParameters {
|
||||
};
|
||||
cliVersion?: string;
|
||||
loadMemoryFromIncludeDirectories?: boolean;
|
||||
// Web search providers
|
||||
tavilyApiKey?: string;
|
||||
}
|
||||
|
||||
export class Config {
|
||||
@@ -286,6 +288,7 @@ export class Config {
|
||||
};
|
||||
private readonly cliVersion?: string;
|
||||
private readonly loadMemoryFromIncludeDirectories: boolean = false;
|
||||
private readonly tavilyApiKey?: string;
|
||||
|
||||
constructor(params: ConfigParameters) {
|
||||
this.sessionId = params.sessionId;
|
||||
@@ -363,6 +366,9 @@ export class Config {
|
||||
this.loadMemoryFromIncludeDirectories =
|
||||
params.loadMemoryFromIncludeDirectories ?? false;
|
||||
|
||||
// Web search
|
||||
this.tavilyApiKey = params.tavilyApiKey;
|
||||
|
||||
if (params.contextFileName) {
|
||||
setGeminiMdFilename(params.contextFileName);
|
||||
}
|
||||
@@ -695,6 +701,11 @@ export class Config {
|
||||
return this.summarizeToolOutput;
|
||||
}
|
||||
|
||||
// Web search provider configuration
|
||||
getTavilyApiKey(): string | undefined {
|
||||
return this.tavilyApiKey;
|
||||
}
|
||||
|
||||
getIdeModeFeature(): boolean {
|
||||
return this.ideModeFeature;
|
||||
}
|
||||
@@ -805,7 +816,10 @@ export class Config {
|
||||
registerCoreTool(ReadManyFilesTool, this);
|
||||
registerCoreTool(ShellTool, this);
|
||||
registerCoreTool(MemoryTool);
|
||||
registerCoreTool(WebSearchTool, this);
|
||||
// Conditionally register web search tool only if Tavily API key is set
|
||||
if (this.getTavilyApiKey()) {
|
||||
registerCoreTool(WebSearchTool, this);
|
||||
}
|
||||
|
||||
await registry.discoverAllTools();
|
||||
return registry;
|
||||
|
||||
@@ -4,35 +4,24 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { GroundingMetadata } from '@google/genai';
|
||||
import { BaseTool, Icon, ToolResult } from './tools.js';
|
||||
import { Type } from '@google/genai';
|
||||
import { SchemaValidator } from '../utils/schemaValidator.js';
|
||||
|
||||
import { getErrorMessage } from '../utils/errors.js';
|
||||
import { Config } from '../config/config.js';
|
||||
import { getResponseText } from '../utils/generateContentResponseUtilities.js';
|
||||
|
||||
interface GroundingChunkWeb {
|
||||
uri?: string;
|
||||
title?: string;
|
||||
interface TavilyResultItem {
|
||||
title: string;
|
||||
url: string;
|
||||
content?: string;
|
||||
score?: number;
|
||||
published_date?: string;
|
||||
}
|
||||
|
||||
interface GroundingChunkItem {
|
||||
web?: GroundingChunkWeb;
|
||||
// Other properties might exist if needed in the future
|
||||
}
|
||||
|
||||
interface GroundingSupportSegment {
|
||||
startIndex: number;
|
||||
endIndex: number;
|
||||
text?: string; // text is optional as per the example
|
||||
}
|
||||
|
||||
interface GroundingSupportItem {
|
||||
segment?: GroundingSupportSegment;
|
||||
groundingChunkIndices?: number[];
|
||||
confidenceScores?: number[]; // Optional as per example
|
||||
interface TavilySearchResponse {
|
||||
query: string;
|
||||
answer?: string;
|
||||
results: TavilyResultItem[];
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -42,7 +31,6 @@ export interface WebSearchToolParams {
|
||||
/**
|
||||
* The search query.
|
||||
*/
|
||||
|
||||
query: string;
|
||||
}
|
||||
|
||||
@@ -50,25 +38,23 @@ export interface WebSearchToolParams {
|
||||
* Extends ToolResult to include sources for web search.
|
||||
*/
|
||||
export interface WebSearchToolResult extends ToolResult {
|
||||
sources?: GroundingMetadata extends { groundingChunks: GroundingChunkItem[] }
|
||||
? GroundingMetadata['groundingChunks']
|
||||
: GroundingChunkItem[];
|
||||
sources?: Array<{ title: string; url: string }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* A tool to perform web searches using Google Search via the Gemini API.
|
||||
* A tool to perform web searches using Tavily API.
|
||||
*/
|
||||
export class WebSearchTool extends BaseTool<
|
||||
WebSearchToolParams,
|
||||
WebSearchToolResult
|
||||
> {
|
||||
static readonly Name: string = 'google_web_search';
|
||||
static readonly Name: string = 'web_search';
|
||||
|
||||
constructor(private readonly config: Config) {
|
||||
super(
|
||||
WebSearchTool.Name,
|
||||
'GoogleSearch',
|
||||
'Performs a web search using Google Search (via the Gemini API) and returns the results. This tool is useful for finding information on the internet based on a query.',
|
||||
'TavilySearch',
|
||||
'Performs a web search using the Tavily API and returns a concise answer with sources. Requires the TAVILY_API_KEY environment variable.',
|
||||
Icon.Globe,
|
||||
{
|
||||
type: Type.OBJECT,
|
||||
@@ -106,7 +92,7 @@ export class WebSearchTool extends BaseTool<
|
||||
|
||||
async execute(
|
||||
params: WebSearchToolParams,
|
||||
signal: AbortSignal,
|
||||
_signal: AbortSignal,
|
||||
): Promise<WebSearchToolResult> {
|
||||
const validationError = this.validateToolParams(params);
|
||||
if (validationError) {
|
||||
@@ -115,79 +101,83 @@ export class WebSearchTool extends BaseTool<
|
||||
returnDisplay: validationError,
|
||||
};
|
||||
}
|
||||
const geminiClient = this.config.getGeminiClient();
|
||||
|
||||
const apiKey = this.config.getTavilyApiKey() || process.env.TAVILY_API_KEY;
|
||||
if (!apiKey) {
|
||||
return {
|
||||
llmContent:
|
||||
'Web search is disabled because TAVILY_API_KEY is not configured. Please set it in your settings.json, .env file, or via --tavily-api-key command line argument to enable web search.',
|
||||
returnDisplay:
|
||||
'Web search disabled. Configure TAVILY_API_KEY to enable Tavily search.',
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await geminiClient.generateContent(
|
||||
[{ role: 'user', parts: [{ text: params.query }] }],
|
||||
{ tools: [{ googleSearch: {} }] },
|
||||
signal,
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), 15000);
|
||||
const response = await fetch('https://api.tavily.com/search', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
api_key: apiKey,
|
||||
query: params.query,
|
||||
search_depth: 'advanced',
|
||||
max_results: 5,
|
||||
include_answer: true,
|
||||
}),
|
||||
signal: controller.signal,
|
||||
});
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => '');
|
||||
throw new Error(
|
||||
`Tavily API error: ${response.status} ${response.statusText}${text ? ` - ${text}` : ''}`,
|
||||
);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as TavilySearchResponse;
|
||||
|
||||
const sources = (data.results || []).map((r) => ({
|
||||
title: r.title,
|
||||
url: r.url,
|
||||
}));
|
||||
|
||||
const sourceListFormatted = sources.map(
|
||||
(s, i) => `[${i + 1}] ${s.title || 'Untitled'} (${s.url})`,
|
||||
);
|
||||
|
||||
const responseText = getResponseText(response);
|
||||
const groundingMetadata = response.candidates?.[0]?.groundingMetadata;
|
||||
const sources = groundingMetadata?.groundingChunks as
|
||||
| GroundingChunkItem[]
|
||||
| undefined;
|
||||
const groundingSupports = groundingMetadata?.groundingSupports as
|
||||
| GroundingSupportItem[]
|
||||
| undefined;
|
||||
let content = data.answer?.trim() || '';
|
||||
if (!content) {
|
||||
// Fallback: build a concise summary from top results
|
||||
content = sources
|
||||
.slice(0, 3)
|
||||
.map((s, i) => `${i + 1}. ${s.title} - ${s.url}`)
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
if (!responseText || !responseText.trim()) {
|
||||
if (sourceListFormatted.length > 0) {
|
||||
content += `\n\nSources:\n${sourceListFormatted.join('\n')}`;
|
||||
}
|
||||
|
||||
if (!content.trim()) {
|
||||
return {
|
||||
llmContent: `No search results or information found for query: "${params.query}"`,
|
||||
returnDisplay: 'No information found.',
|
||||
};
|
||||
}
|
||||
|
||||
let modifiedResponseText = responseText;
|
||||
const sourceListFormatted: string[] = [];
|
||||
|
||||
if (sources && sources.length > 0) {
|
||||
sources.forEach((source: GroundingChunkItem, index: number) => {
|
||||
const title = source.web?.title || 'Untitled';
|
||||
const uri = source.web?.uri || 'No URI';
|
||||
sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`);
|
||||
});
|
||||
|
||||
if (groundingSupports && groundingSupports.length > 0) {
|
||||
const insertions: Array<{ index: number; marker: string }> = [];
|
||||
groundingSupports.forEach((support: GroundingSupportItem) => {
|
||||
if (support.segment && support.groundingChunkIndices) {
|
||||
const citationMarker = support.groundingChunkIndices
|
||||
.map((chunkIndex: number) => `[${chunkIndex + 1}]`)
|
||||
.join('');
|
||||
insertions.push({
|
||||
index: support.segment.endIndex,
|
||||
marker: citationMarker,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Sort insertions by index in descending order to avoid shifting subsequent indices
|
||||
insertions.sort((a, b) => b.index - a.index);
|
||||
|
||||
const responseChars = modifiedResponseText.split(''); // Use new variable
|
||||
insertions.forEach((insertion) => {
|
||||
// Fixed arrow function syntax
|
||||
responseChars.splice(insertion.index, 0, insertion.marker);
|
||||
});
|
||||
modifiedResponseText = responseChars.join(''); // Assign back to modifiedResponseText
|
||||
}
|
||||
|
||||
if (sourceListFormatted.length > 0) {
|
||||
modifiedResponseText +=
|
||||
'\n\nSources:\n' + sourceListFormatted.join('\n'); // Fixed string concatenation
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
llmContent: `Web search results for "${params.query}":\n\n${modifiedResponseText}`,
|
||||
llmContent: `Web search results for "${params.query}":\n\n${content}`,
|
||||
returnDisplay: `Search results for "${params.query}" returned.`,
|
||||
sources,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = `Error during web search for query "${params.query}": ${getErrorMessage(error)}`;
|
||||
const errorMessage = `Error during web search for query "${params.query}": ${getErrorMessage(
|
||||
error,
|
||||
)}`;
|
||||
console.error(errorMessage, error);
|
||||
return {
|
||||
llmContent: `Error: ${errorMessage}`,
|
||||
|
||||
Reference in New Issue
Block a user