feat: replace google web search with tavily web search (#329)

This commit is contained in:
tanzhenxin
2025-08-14 21:20:23 +08:00
committed by GitHub
parent 51207043d0
commit 3e082ae89a
9 changed files with 155 additions and 107 deletions

View File

@@ -268,6 +268,11 @@ In addition to a project settings file, a project's `.gemini` directory can cont
"loadMemoryFromIncludeDirectories": true
```
- **`tavilyApiKey`** (string):
- **Description:** API key for Tavily web search service. Required to enable the `web_search` tool functionality. If not configured, the web search tool will be disabled and skipped.
- **Default:** `undefined` (web search disabled)
- **Example:** `"tavilyApiKey": "tvly-your-api-key-here"`
### Example `settings.json`:
```json
@@ -276,6 +281,7 @@ In addition to a project settings file, a project's `.gemini` directory can cont
"sandbox": "docker",
"toolDiscoveryCommand": "bin/get_tools",
"toolCallCommand": "bin/call_tool",
"tavilyApiKey": "$TAVILY_API_KEY",
"mcpServers": {
"mainServer": {
"command": "bin/mcp_server.py"
@@ -373,6 +379,11 @@ The CLI automatically loads environment variables from an `.env` file. The loadi
- **`CODE_ASSIST_ENDPOINT`**:
- Specifies the endpoint for the code assist server.
- This is useful for development and testing.
- **`TAVILY_API_KEY`**:
- Your API key for the Tavily web search service.
- Required to enable the `web_search` tool functionality.
- If not configured, the web search tool will be disabled and skipped.
- Example: `export TAVILY_API_KEY="tvly-your-api-key-here"`
## Command-Line Arguments
@@ -430,6 +441,9 @@ Arguments passed directly when running the CLI can override other configurations
- Displays the version of the CLI.
- **`--openai-logging`**:
- Enables logging of OpenAI API calls for debugging and analysis. This flag overrides the `enableOpenAILogging` setting in `settings.json`.
- **`--tavily-api-key <api_key>`**:
- Sets the Tavily API key for web search functionality for this session.
- Example: `gemini --tavily-api-key tvly-your-api-key-here`
## Context Files (Hierarchical Instructional Context)

View File

@@ -28,7 +28,7 @@ This documentation is organized into the following sections:
- **[Multi-File Read Tool](./tools/multi-file.md):** Documentation for the `read_many_files` tool.
- **[Shell Tool](./tools/shell.md):** Documentation for the `run_shell_command` tool.
- **[Web Fetch Tool](./tools/web-fetch.md):** Documentation for the `web_fetch` tool.
- **[Web Search Tool](./tools/web-search.md):** Documentation for the `google_web_search` tool.
- **[Web Search Tool](./tools/web-search.md):** Documentation for the `web_search` tool.
- **[Memory Tool](./tools/memory.md):** Documentation for the `save_memory` tool.
- **[Contributing & Development Guide](../CONTRIBUTING.md):** Information for contributors and developers, including setup, building, testing, and coding conventions.
- **[NPM Workspaces and Publishing](./npm.md):** Details on how the project's packages are managed and published.

View File

@@ -1,36 +1,43 @@
# Web Search Tool (`google_web_search`)
# Web Search Tool (`web_search`)
This document describes the `google_web_search` tool.
This document describes the `web_search` tool.
## Description
Use `google_web_search` to perform a web search using Google Search via the Gemini API. The `google_web_search` tool returns a summary of web results with sources.
Use `web_search` to perform a web search using the Tavily API. The tool returns a concise answer with sources when possible.
### Arguments
`google_web_search` takes one argument:
`web_search` takes one argument:
- `query` (string, required): The search query.
## How to use `google_web_search` with the Gemini CLI
## How to use `web_search`
The `google_web_search` tool sends a query to the Gemini API, which then performs a web search. `google_web_search` will return a generated response based on the search results, including citations and sources.
`web_search` calls the Tavily API directly. You must configure the `TAVILY_API_KEY` through one of the following methods:
1. **Settings file**: Add `"tavilyApiKey": "your-key-here"` to your `settings.json`
2. **Environment variable**: Set `TAVILY_API_KEY` in your environment or `.env` file
3. **Command line**: Use `--tavily-api-key your-key-here` when running the CLI
If the key is not configured, the tool will be disabled and skipped.
Usage:
```
google_web_search(query="Your query goes here.")
web_search(query="Your query goes here.")
```
## `google_web_search` examples
## `web_search` examples
Get information on a topic:
```
google_web_search(query="latest advancements in AI-powered code generation")
web_search(query="latest advancements in AI-powered code generation")
```
## Important notes
- **Response returned:** The `google_web_search` tool returns a processed summary, not a raw list of search results.
- **Citations:** The response includes citations to the sources used to generate the summary.
- **Response returned:** The `web_search` tool returns a concise answer when available, with a list of source links.
- **Citations:** Source links are appended as a numbered list.
- **API key:** Configure `TAVILY_API_KEY` via settings.json, environment variables, .env files, or command line arguments. If not configured, the tool is not registered.

View File

@@ -9,6 +9,11 @@ import { strict as assert } from 'assert';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
test('should be able to search the web', async () => {
// Skip if Tavily key is not configured
if (!process.env.TAVILY_API_KEY) {
console.warn('Skipping web search test: TAVILY_API_KEY not set');
return;
}
const rig = new TestRig();
await rig.setup('should be able to search the web');
@@ -27,7 +32,7 @@ test('should be able to search the web', async () => {
throw error; // Re-throw if not a network error
}
const foundToolCall = await rig.waitForToolCall('google_web_search');
const foundToolCall = await rig.waitForToolCall('web_search');
// Add debugging information
if (!foundToolCall) {
@@ -35,12 +40,11 @@ test('should be able to search the web', async () => {
// Check if the tool call failed due to network issues
const failedSearchCalls = allTools.filter(
(t) =>
t.toolRequest.name === 'google_web_search' && !t.toolRequest.success,
(t) => t.toolRequest.name === 'web_search' && !t.toolRequest.success,
);
if (failedSearchCalls.length > 0) {
console.warn(
'google_web_search tool was called but failed, possibly due to network issues',
'web_search tool was called but failed, possibly due to network issues',
);
console.warn(
'Failed calls:',
@@ -50,20 +54,20 @@ test('should be able to search the web', async () => {
}
}
assert.ok(foundToolCall, 'Expected to find a call to google_web_search');
assert.ok(foundToolCall, 'Expected to find a call to web_search');
// Validate model output - will throw if no output, warn if missing expected content
const hasExpectedContent = validateModelOutput(
result,
['weather', 'london'],
'Google web search test',
'Web search test',
);
// If content was missing, log the search queries used
if (!hasExpectedContent) {
const searchCalls = rig
.readToolLogs()
.filter((t) => t.toolRequest.name === 'google_web_search');
.filter((t) => t.toolRequest.name === 'web_search');
if (searchCalls.length > 0) {
console.warn(
'Search queries used:',

View File

@@ -69,6 +69,7 @@ export interface CliArgs {
proxy: string | undefined;
includeDirectories: string[] | undefined;
loadMemoryFromIncludeDirectories: boolean | undefined;
tavilyApiKey: string | undefined;
}
export async function parseArguments(): Promise<CliArgs> {
@@ -215,6 +216,10 @@ export async function parseArguments(): Promise<CliArgs> {
type: 'string',
description: 'OpenAI base URL (for custom endpoints)',
})
.option('tavily-api-key', {
type: 'string',
description: 'Tavily API key for web search functionality',
})
.option('proxy', {
type: 'string',
description:
@@ -334,6 +339,11 @@ export async function loadCliConfig(
process.env.OPENAI_BASE_URL = argv.openaiBaseUrl;
}
// Handle Tavily API key from command line
if (argv.tavilyApiKey) {
process.env.TAVILY_API_KEY = argv.tavilyApiKey;
}
// Set the context filename in the server's memoryTool module BEFORE loading memory
// TODO(b/343434939): This is a bit of a hack. The contextFileName should ideally be passed
// directly to the Config constructor in core, and have core handle setGeminiMdFilename.
@@ -513,6 +523,8 @@ export async function loadCliConfig(
],
contentGenerator: settings.contentGenerator,
cliVersion,
tavilyApiKey:
argv.tavilyApiKey || settings.tavilyApiKey || process.env.TAVILY_API_KEY,
});
}

View File

@@ -147,6 +147,9 @@ export interface Settings {
includeDirectories?: string[];
loadMemoryFromIncludeDirectories?: boolean;
// Web search API keys
tavilyApiKey?: string;
}
export interface SettingsError {

View File

@@ -562,6 +562,10 @@ export async function start_sandbox(
if (process.env.OPENAI_API_KEY) {
args.push('--env', `OPENAI_API_KEY=${process.env.OPENAI_API_KEY}`);
}
// copy TAVILY_API_KEY for web search tool
if (process.env.TAVILY_API_KEY) {
args.push('--env', `TAVILY_API_KEY=${process.env.TAVILY_API_KEY}`);
}
if (process.env.OPENAI_BASE_URL) {
args.push('--env', `OPENAI_BASE_URL=${process.env.OPENAI_BASE_URL}`);
}

View File

@@ -211,6 +211,8 @@ export interface ConfigParameters {
};
cliVersion?: string;
loadMemoryFromIncludeDirectories?: boolean;
// Web search providers
tavilyApiKey?: string;
}
export class Config {
@@ -286,6 +288,7 @@ export class Config {
};
private readonly cliVersion?: string;
private readonly loadMemoryFromIncludeDirectories: boolean = false;
private readonly tavilyApiKey?: string;
constructor(params: ConfigParameters) {
this.sessionId = params.sessionId;
@@ -363,6 +366,9 @@ export class Config {
this.loadMemoryFromIncludeDirectories =
params.loadMemoryFromIncludeDirectories ?? false;
// Web search
this.tavilyApiKey = params.tavilyApiKey;
if (params.contextFileName) {
setGeminiMdFilename(params.contextFileName);
}
@@ -695,6 +701,11 @@ export class Config {
return this.summarizeToolOutput;
}
// Web search provider configuration
getTavilyApiKey(): string | undefined {
return this.tavilyApiKey;
}
getIdeModeFeature(): boolean {
return this.ideModeFeature;
}
@@ -805,7 +816,10 @@ export class Config {
registerCoreTool(ReadManyFilesTool, this);
registerCoreTool(ShellTool, this);
registerCoreTool(MemoryTool);
// Conditionally register web search tool only if Tavily API key is set
if (this.getTavilyApiKey()) {
registerCoreTool(WebSearchTool, this);
}
await registry.discoverAllTools();
return registry;

View File

@@ -4,35 +4,24 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { GroundingMetadata } from '@google/genai';
import { BaseTool, Icon, ToolResult } from './tools.js';
import { Type } from '@google/genai';
import { SchemaValidator } from '../utils/schemaValidator.js';
import { getErrorMessage } from '../utils/errors.js';
import { Config } from '../config/config.js';
import { getResponseText } from '../utils/generateContentResponseUtilities.js';
interface GroundingChunkWeb {
uri?: string;
title?: string;
interface TavilyResultItem {
title: string;
url: string;
content?: string;
score?: number;
published_date?: string;
}
interface GroundingChunkItem {
web?: GroundingChunkWeb;
// Other properties might exist if needed in the future
}
interface GroundingSupportSegment {
startIndex: number;
endIndex: number;
text?: string; // text is optional as per the example
}
interface GroundingSupportItem {
segment?: GroundingSupportSegment;
groundingChunkIndices?: number[];
confidenceScores?: number[]; // Optional as per example
interface TavilySearchResponse {
query: string;
answer?: string;
results: TavilyResultItem[];
}
/**
@@ -42,7 +31,6 @@ export interface WebSearchToolParams {
/**
* The search query.
*/
query: string;
}
@@ -50,25 +38,23 @@ export interface WebSearchToolParams {
* Extends ToolResult to include sources for web search.
*/
export interface WebSearchToolResult extends ToolResult {
sources?: GroundingMetadata extends { groundingChunks: GroundingChunkItem[] }
? GroundingMetadata['groundingChunks']
: GroundingChunkItem[];
sources?: Array<{ title: string; url: string }>;
}
/**
* A tool to perform web searches using Google Search via the Gemini API.
* A tool to perform web searches using Tavily API.
*/
export class WebSearchTool extends BaseTool<
WebSearchToolParams,
WebSearchToolResult
> {
static readonly Name: string = 'google_web_search';
static readonly Name: string = 'web_search';
constructor(private readonly config: Config) {
super(
WebSearchTool.Name,
'GoogleSearch',
'Performs a web search using Google Search (via the Gemini API) and returns the results. This tool is useful for finding information on the internet based on a query.',
'TavilySearch',
'Performs a web search using the Tavily API and returns a concise answer with sources. Requires the TAVILY_API_KEY environment variable.',
Icon.Globe,
{
type: Type.OBJECT,
@@ -106,7 +92,7 @@ export class WebSearchTool extends BaseTool<
async execute(
params: WebSearchToolParams,
signal: AbortSignal,
_signal: AbortSignal,
): Promise<WebSearchToolResult> {
const validationError = this.validateToolParams(params);
if (validationError) {
@@ -115,79 +101,83 @@ export class WebSearchTool extends BaseTool<
returnDisplay: validationError,
};
}
const geminiClient = this.config.getGeminiClient();
const apiKey = this.config.getTavilyApiKey() || process.env.TAVILY_API_KEY;
if (!apiKey) {
return {
llmContent:
'Web search is disabled because TAVILY_API_KEY is not configured. Please set it in your settings.json, .env file, or via --tavily-api-key command line argument to enable web search.',
returnDisplay:
'Web search disabled. Configure TAVILY_API_KEY to enable Tavily search.',
};
}
try {
const response = await geminiClient.generateContent(
[{ role: 'user', parts: [{ text: params.query }] }],
{ tools: [{ googleSearch: {} }] },
signal,
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 15000);
const response = await fetch('https://api.tavily.com/search', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
api_key: apiKey,
query: params.query,
search_depth: 'advanced',
max_results: 5,
include_answer: true,
}),
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
const text = await response.text().catch(() => '');
throw new Error(
`Tavily API error: ${response.status} ${response.statusText}${text ? ` - ${text}` : ''}`,
);
}
const data = (await response.json()) as TavilySearchResponse;
const sources = (data.results || []).map((r) => ({
title: r.title,
url: r.url,
}));
const sourceListFormatted = sources.map(
(s, i) => `[${i + 1}] ${s.title || 'Untitled'} (${s.url})`,
);
const responseText = getResponseText(response);
const groundingMetadata = response.candidates?.[0]?.groundingMetadata;
const sources = groundingMetadata?.groundingChunks as
| GroundingChunkItem[]
| undefined;
const groundingSupports = groundingMetadata?.groundingSupports as
| GroundingSupportItem[]
| undefined;
let content = data.answer?.trim() || '';
if (!content) {
// Fallback: build a concise summary from top results
content = sources
.slice(0, 3)
.map((s, i) => `${i + 1}. ${s.title} - ${s.url}`)
.join('\n');
}
if (!responseText || !responseText.trim()) {
if (sourceListFormatted.length > 0) {
content += `\n\nSources:\n${sourceListFormatted.join('\n')}`;
}
if (!content.trim()) {
return {
llmContent: `No search results or information found for query: "${params.query}"`,
returnDisplay: 'No information found.',
};
}
let modifiedResponseText = responseText;
const sourceListFormatted: string[] = [];
if (sources && sources.length > 0) {
sources.forEach((source: GroundingChunkItem, index: number) => {
const title = source.web?.title || 'Untitled';
const uri = source.web?.uri || 'No URI';
sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`);
});
if (groundingSupports && groundingSupports.length > 0) {
const insertions: Array<{ index: number; marker: string }> = [];
groundingSupports.forEach((support: GroundingSupportItem) => {
if (support.segment && support.groundingChunkIndices) {
const citationMarker = support.groundingChunkIndices
.map((chunkIndex: number) => `[${chunkIndex + 1}]`)
.join('');
insertions.push({
index: support.segment.endIndex,
marker: citationMarker,
});
}
});
// Sort insertions by index in descending order to avoid shifting subsequent indices
insertions.sort((a, b) => b.index - a.index);
const responseChars = modifiedResponseText.split(''); // Use new variable
insertions.forEach((insertion) => {
// Fixed arrow function syntax
responseChars.splice(insertion.index, 0, insertion.marker);
});
modifiedResponseText = responseChars.join(''); // Assign back to modifiedResponseText
}
if (sourceListFormatted.length > 0) {
modifiedResponseText +=
'\n\nSources:\n' + sourceListFormatted.join('\n'); // Fixed string concatenation
}
}
return {
llmContent: `Web search results for "${params.query}":\n\n${modifiedResponseText}`,
llmContent: `Web search results for "${params.query}":\n\n${content}`,
returnDisplay: `Search results for "${params.query}" returned.`,
sources,
};
} catch (error: unknown) {
const errorMessage = `Error during web search for query "${params.query}": ${getErrorMessage(error)}`;
const errorMessage = `Error during web search for query "${params.query}": ${getErrorMessage(
error,
)}`;
console.error(errorMessage, error);
return {
llmContent: `Error: ${errorMessage}`,