feat: replace google web search with tavily web search (#329)

2025-12-19 09:33:53 +00:00 · 2025-08-14 21:20:23 +08:00
parent 51207043d0
commit 3e082ae89a
9 changed files with 155 additions and 107 deletions
--- a/docs/cli/configuration.md
+++ b/docs/cli/configuration.md
@@ -268,6 +268,11 @@ In addition to a project settings file, a project's `.gemini` directory can cont
    "loadMemoryFromIncludeDirectories": true
    ```
 - **`tavilyApiKey`** (string):
  - **Description:** API key for Tavily web search service. Required to enable the `web_search` tool functionality. If not configured, the web search tool will be disabled and skipped.
  - **Default:** `undefined` (web search disabled)
  - **Example:** `"tavilyApiKey": "tvly-your-api-key-here"`
 ### Example `settings.json`:
 ```json
@@ -276,6 +281,7 @@ In addition to a project settings file, a project's `.gemini` directory can cont
  "sandbox": "docker",
  "toolDiscoveryCommand": "bin/get_tools",
  "toolCallCommand": "bin/call_tool",
  "tavilyApiKey": "$TAVILY_API_KEY",
  "mcpServers": {
    "mainServer": {
      "command": "bin/mcp_server.py"
@@ -373,6 +379,11 @@ The CLI automatically loads environment variables from an `.env` file. The loadi
 - **`CODE_ASSIST_ENDPOINT`**:
  - Specifies the endpoint for the code assist server.
  - This is useful for development and testing.
 - **`TAVILY_API_KEY`**:
  - Your API key for the Tavily web search service.
  - Required to enable the `web_search` tool functionality.
  - If not configured, the web search tool will be disabled and skipped.
  - Example: `export TAVILY_API_KEY="tvly-your-api-key-here"`
 ## Command-Line Arguments
@@ -430,6 +441,9 @@ Arguments passed directly when running the CLI can override other configurations
  - Displays the version of the CLI.
 - **`--openai-logging`**:
  - Enables logging of OpenAI API calls for debugging and analysis. This flag overrides the `enableOpenAILogging` setting in `settings.json`.
 - **`--tavily-api-key <api_key>`**:
  - Sets the Tavily API key for web search functionality for this session.
  - Example: `gemini --tavily-api-key tvly-your-api-key-here`
 ## Context Files (Hierarchical Instructional Context)
--- a/docs/index.md
+++ b/docs/index.md
@@ -28,7 +28,7 @@ This documentation is organized into the following sections:
  - **[Multi-File Read Tool](./tools/multi-file.md):** Documentation for the `read_many_files` tool.
  - **[Shell Tool](./tools/shell.md):** Documentation for the `run_shell_command` tool.
  - **[Web Fetch Tool](./tools/web-fetch.md):** Documentation for the `web_fetch` tool.
-  - **[Web Search Tool](./tools/web-search.md):** Documentation for the `google_web_search` tool.
+  - **[Web Search Tool](./tools/web-search.md):** Documentation for the `web_search` tool.
  - **[Memory Tool](./tools/memory.md):** Documentation for the `save_memory` tool.
 - **[Contributing & Development Guide](../CONTRIBUTING.md):** Information for contributors and developers, including setup, building, testing, and coding conventions.
 - **[NPM Workspaces and Publishing](./npm.md):** Details on how the project's packages are managed and published.
--- a/docs/tools/web-search.md
+++ b/docs/tools/web-search.md
@@ -1,36 +1,43 @@
-# Web Search Tool (`google_web_search`)
+# Web Search Tool (`web_search`)
-This document describes the `google_web_search` tool.
+This document describes the `web_search` tool.
 ## Description
-Use `google_web_search` to perform a web search using Google Search via the Gemini API. The `google_web_search` tool returns a summary of web results with sources.
+Use `web_search` to perform a web search using the Tavily API. The tool returns a concise answer with sources when possible.
 ### Arguments
-`google_web_search` takes one argument:
+`web_search` takes one argument:
 - `query` (string, required): The search query.
-## How to use `google_web_search` with the Gemini CLI
+## How to use `web_search`
-The `google_web_search` tool sends a query to the Gemini API, which then performs a web search. `google_web_search` will return a generated response based on the search results, including citations and sources.
+`web_search` calls the Tavily API directly. You must configure the `TAVILY_API_KEY` through one of the following methods:
 1. **Settings file**: Add `"tavilyApiKey": "your-key-here"` to your `settings.json`
 2. **Environment variable**: Set `TAVILY_API_KEY` in your environment or `.env` file
 3. **Command line**: Use `--tavily-api-key your-key-here` when running the CLI
 If the key is not configured, the tool will be disabled and skipped.
 Usage:
 ```
-google_web_search(query="Your query goes here.")
+web_search(query="Your query goes here.")
 ```
-## `google_web_search` examples
+## `web_search` examples
 Get information on a topic:
 ```
-google_web_search(query="latest advancements in AI-powered code generation")
+web_search(query="latest advancements in AI-powered code generation")
 ```
 ## Important notes
- **Response returned:** The `google_web_search` tool returns a processed summary, not a raw list of search results.
+- **Response returned:** The `web_search` tool returns a concise answer when available, with a list of source links.
- **Citations:** The response includes citations to the sources used to generate the summary.
+- **Citations:** Source links are appended as a numbered list.
 - **API key:** Configure `TAVILY_API_KEY` via settings.json, environment variables, .env files, or command line arguments. If not configured, the tool is not registered.
--- a/integration-tests/google_web_search.test.js
+++ b/integration-tests/google_web_search.test.js
@@ -9,6 +9,11 @@ import { strict as assert } from 'assert';
 import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
 test('should be able to search the web', async () => {
  // Skip if Tavily key is not configured
  if (!process.env.TAVILY_API_KEY) {
    console.warn('Skipping web search test: TAVILY_API_KEY not set');
    return;
  }
  const rig = new TestRig();
  await rig.setup('should be able to search the web');
@@ -27,7 +32,7 @@ test('should be able to search the web', async () => {
    throw error; // Re-throw if not a network error
  }
-  const foundToolCall = await rig.waitForToolCall('google_web_search');
+  const foundToolCall = await rig.waitForToolCall('web_search');
  // Add debugging information
  if (!foundToolCall) {
@@ -35,12 +40,11 @@ test('should be able to search the web', async () => {
    // Check if the tool call failed due to network issues
    const failedSearchCalls = allTools.filter(
-      (t) =>
+      (t) => t.toolRequest.name === 'web_search' && !t.toolRequest.success,
        t.toolRequest.name === 'google_web_search' && !t.toolRequest.success,
    );
    if (failedSearchCalls.length > 0) {
      console.warn(
-        'google_web_search tool was called but failed, possibly due to network issues',
+        'web_search tool was called but failed, possibly due to network issues',
      );
      console.warn(
        'Failed calls:',
@@ -50,20 +54,20 @@ test('should be able to search the web', async () => {
    }
  }
-  assert.ok(foundToolCall, 'Expected to find a call to google_web_search');
+  assert.ok(foundToolCall, 'Expected to find a call to web_search');
  // Validate model output - will throw if no output, warn if missing expected content
  const hasExpectedContent = validateModelOutput(
    result,
    ['weather', 'london'],
-    'Google web search test',
+    'Web search test',
  );
  // If content was missing, log the search queries used
  if (!hasExpectedContent) {
    const searchCalls = rig
      .readToolLogs()
-      .filter((t) => t.toolRequest.name === 'google_web_search');
+      .filter((t) => t.toolRequest.name === 'web_search');
    if (searchCalls.length > 0) {
      console.warn(
        'Search queries used:',
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -69,6 +69,7 @@ export interface CliArgs {
  proxy: string | undefined;
  includeDirectories: string[] | undefined;
  loadMemoryFromIncludeDirectories: boolean | undefined;
  tavilyApiKey: string | undefined;
 }
 export async function parseArguments(): Promise<CliArgs> {
@@ -215,6 +216,10 @@ export async function parseArguments(): Promise<CliArgs> {
      type: 'string',
      description: 'OpenAI base URL (for custom endpoints)',
    })
    .option('tavily-api-key', {
      type: 'string',
      description: 'Tavily API key for web search functionality',
    })
    .option('proxy', {
      type: 'string',
      description:
@@ -334,6 +339,11 @@ export async function loadCliConfig(
    process.env.OPENAI_BASE_URL = argv.openaiBaseUrl;
  }
  // Handle Tavily API key from command line
  if (argv.tavilyApiKey) {
    process.env.TAVILY_API_KEY = argv.tavilyApiKey;
  }
  // Set the context filename in the server's memoryTool module BEFORE loading memory
  // TODO(b/343434939): This is a bit of a hack. The contextFileName should ideally be passed
  // directly to the Config constructor in core, and have core handle setGeminiMdFilename.
@@ -513,6 +523,8 @@ export async function loadCliConfig(
    ],
    contentGenerator: settings.contentGenerator,
    cliVersion,
    tavilyApiKey:
      argv.tavilyApiKey || settings.tavilyApiKey || process.env.TAVILY_API_KEY,
  });
 }
--- a/packages/cli/src/config/settings.ts
+++ b/packages/cli/src/config/settings.ts
@@ -147,6 +147,9 @@ export interface Settings {
  includeDirectories?: string[];
  loadMemoryFromIncludeDirectories?: boolean;
  // Web search API keys
  tavilyApiKey?: string;
 }
 export interface SettingsError {
--- a/packages/cli/src/utils/sandbox.ts
+++ b/packages/cli/src/utils/sandbox.ts
@@ -562,6 +562,10 @@ export async function start_sandbox(
  if (process.env.OPENAI_API_KEY) {
    args.push('--env', `OPENAI_API_KEY=${process.env.OPENAI_API_KEY}`);
  }
  // copy TAVILY_API_KEY for web search tool
  if (process.env.TAVILY_API_KEY) {
    args.push('--env', `TAVILY_API_KEY=${process.env.TAVILY_API_KEY}`);
  }
  if (process.env.OPENAI_BASE_URL) {
    args.push('--env', `OPENAI_BASE_URL=${process.env.OPENAI_BASE_URL}`);
  }
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -211,6 +211,8 @@ export interface ConfigParameters {
  };
  cliVersion?: string;
  loadMemoryFromIncludeDirectories?: boolean;
  // Web search providers
  tavilyApiKey?: string;
 }
 export class Config {
@@ -286,6 +288,7 @@ export class Config {
  };
  private readonly cliVersion?: string;
  private readonly loadMemoryFromIncludeDirectories: boolean = false;
  private readonly tavilyApiKey?: string;
  constructor(params: ConfigParameters) {
    this.sessionId = params.sessionId;
@@ -363,6 +366,9 @@ export class Config {
    this.loadMemoryFromIncludeDirectories =
      params.loadMemoryFromIncludeDirectories ?? false;
    // Web search
    this.tavilyApiKey = params.tavilyApiKey;
    if (params.contextFileName) {
      setGeminiMdFilename(params.contextFileName);
    }
@@ -695,6 +701,11 @@ export class Config {
    return this.summarizeToolOutput;
  }
  // Web search provider configuration
  getTavilyApiKey(): string | undefined {
    return this.tavilyApiKey;
  }
  getIdeModeFeature(): boolean {
    return this.ideModeFeature;
  }
@@ -805,7 +816,10 @@ export class Config {
    registerCoreTool(ReadManyFilesTool, this);
    registerCoreTool(ShellTool, this);
    registerCoreTool(MemoryTool);
    // Conditionally register web search tool only if Tavily API key is set
    if (this.getTavilyApiKey()) {
      registerCoreTool(WebSearchTool, this);
    }
    await registry.discoverAllTools();
    return registry;
--- a/packages/core/src/tools/web-search.ts
+++ b/packages/core/src/tools/web-search.ts
@@ -4,35 +4,24 @@
 * SPDX-License-Identifier: Apache-2.0
 */
 import { GroundingMetadata } from '@google/genai';
 import { BaseTool, Icon, ToolResult } from './tools.js';
 import { Type } from '@google/genai';
 import { SchemaValidator } from '../utils/schemaValidator.js';
 import { getErrorMessage } from '../utils/errors.js';
 import { Config } from '../config/config.js';
 import { getResponseText } from '../utils/generateContentResponseUtilities.js';
-interface GroundingChunkWeb {
+interface TavilyResultItem {
-  uri?: string;
+  title: string;
-  title?: string;
+  url: string;
  content?: string;
  score?: number;
  published_date?: string;
 }
-interface GroundingChunkItem {
+interface TavilySearchResponse {
-  web?: GroundingChunkWeb;
+  query: string;
-  // Other properties might exist if needed in the future
+  answer?: string;
-}
+  results: TavilyResultItem[];
 interface GroundingSupportSegment {
  startIndex: number;
  endIndex: number;
  text?: string; // text is optional as per the example
 }
 interface GroundingSupportItem {
  segment?: GroundingSupportSegment;
  groundingChunkIndices?: number[];
  confidenceScores?: number[]; // Optional as per example
 }
 /**
@@ -42,7 +31,6 @@ export interface WebSearchToolParams {
  /**
   * The search query.
   */
  query: string;
 }
@@ -50,25 +38,23 @@ export interface WebSearchToolParams {
 * Extends ToolResult to include sources for web search.
 */
 export interface WebSearchToolResult extends ToolResult {
-  sources?: GroundingMetadata extends { groundingChunks: GroundingChunkItem[] }
+  sources?: Array<{ title: string; url: string }>;
    ? GroundingMetadata['groundingChunks']
    : GroundingChunkItem[];
 }
 /**
- * A tool to perform web searches using Google Search via the Gemini API.
+ * A tool to perform web searches using Tavily API.
 */
 export class WebSearchTool extends BaseTool<
  WebSearchToolParams,
  WebSearchToolResult
 > {
-  static readonly Name: string = 'google_web_search';
+  static readonly Name: string = 'web_search';
  constructor(private readonly config: Config) {
    super(
      WebSearchTool.Name,
-      'GoogleSearch',
+      'TavilySearch',
-      'Performs a web search using Google Search (via the Gemini API) and returns the results. This tool is useful for finding information on the internet based on a query.',
+      'Performs a web search using the Tavily API and returns a concise answer with sources. Requires the TAVILY_API_KEY environment variable.',
      Icon.Globe,
      {
        type: Type.OBJECT,
@@ -106,7 +92,7 @@ export class WebSearchTool extends BaseTool<
  async execute(
    params: WebSearchToolParams,
-    signal: AbortSignal,
+    _signal: AbortSignal,
  ): Promise<WebSearchToolResult> {
    const validationError = this.validateToolParams(params);
    if (validationError) {
@@ -115,79 +101,83 @@ export class WebSearchTool extends BaseTool<
        returnDisplay: validationError,
      };
    }
-    const geminiClient = this.config.getGeminiClient();
+
    const apiKey = this.config.getTavilyApiKey() || process.env.TAVILY_API_KEY;
    if (!apiKey) {
      return {
        llmContent:
          'Web search is disabled because TAVILY_API_KEY is not configured. Please set it in your settings.json, .env file, or via --tavily-api-key command line argument to enable web search.',
        returnDisplay:
          'Web search disabled. Configure TAVILY_API_KEY to enable Tavily search.',
      };
    }
    try {
-      const response = await geminiClient.generateContent(
+      const controller = new AbortController();
-        [{ role: 'user', parts: [{ text: params.query }] }],
+      const timeoutId = setTimeout(() => controller.abort(), 15000);
-        { tools: [{ googleSearch: {} }] },
+      const response = await fetch('https://api.tavily.com/search', {
-        signal,
+        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({
          api_key: apiKey,
          query: params.query,
          search_depth: 'advanced',
          max_results: 5,
          include_answer: true,
        }),
        signal: controller.signal,
      });
      clearTimeout(timeoutId);
      if (!response.ok) {
        const text = await response.text().catch(() => '');
        throw new Error(
          `Tavily API error: ${response.status} ${response.statusText}${text ? ` - ${text}` : ''}`,
        );
      }
      const data = (await response.json()) as TavilySearchResponse;
      const sources = (data.results || []).map((r) => ({
        title: r.title,
        url: r.url,
      }));
      const sourceListFormatted = sources.map(
        (s, i) => `[${i + 1}] ${s.title || 'Untitled'} (${s.url})`,
      );
-      const responseText = getResponseText(response);
+      let content = data.answer?.trim() || '';
-      const groundingMetadata = response.candidates?.[0]?.groundingMetadata;
+      if (!content) {
-      const sources = groundingMetadata?.groundingChunks as
+        // Fallback: build a concise summary from top results
-        | GroundingChunkItem[]
+        content = sources
-        | undefined;
+          .slice(0, 3)
-      const groundingSupports = groundingMetadata?.groundingSupports as
+          .map((s, i) => `${i + 1}. ${s.title} - ${s.url}`)
-        | GroundingSupportItem[]
+          .join('\n');
-        | undefined;
+      }
-      if (!responseText || !responseText.trim()) {
+      if (sourceListFormatted.length > 0) {
        content += `\n\nSources:\n${sourceListFormatted.join('\n')}`;
      }
      if (!content.trim()) {
        return {
          llmContent: `No search results or information found for query: "${params.query}"`,
          returnDisplay: 'No information found.',
        };
      }
      let modifiedResponseText = responseText;
      const sourceListFormatted: string[] = [];
      if (sources && sources.length > 0) {
        sources.forEach((source: GroundingChunkItem, index: number) => {
          const title = source.web?.title || 'Untitled';
          const uri = source.web?.uri || 'No URI';
          sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`);
        });
        if (groundingSupports && groundingSupports.length > 0) {
          const insertions: Array<{ index: number; marker: string }> = [];
          groundingSupports.forEach((support: GroundingSupportItem) => {
            if (support.segment && support.groundingChunkIndices) {
              const citationMarker = support.groundingChunkIndices
                .map((chunkIndex: number) => `[${chunkIndex + 1}]`)
                .join('');
              insertions.push({
                index: support.segment.endIndex,
                marker: citationMarker,
              });
            }
          });
          // Sort insertions by index in descending order to avoid shifting subsequent indices
          insertions.sort((a, b) => b.index - a.index);
          const responseChars = modifiedResponseText.split(''); // Use new variable
          insertions.forEach((insertion) => {
            // Fixed arrow function syntax
            responseChars.splice(insertion.index, 0, insertion.marker);
          });
          modifiedResponseText = responseChars.join(''); // Assign back to modifiedResponseText
        }
        if (sourceListFormatted.length > 0) {
          modifiedResponseText +=
            '\n\nSources:\n' + sourceListFormatted.join('\n'); // Fixed string concatenation
        }
      }
      return {
-        llmContent: `Web search results for "${params.query}":\n\n${modifiedResponseText}`,
+        llmContent: `Web search results for "${params.query}":\n\n${content}`,
        returnDisplay: `Search results for "${params.query}" returned.`,
        sources,
      };
    } catch (error: unknown) {
-      const errorMessage = `Error during web search for query "${params.query}": ${getErrorMessage(error)}`;
+      const errorMessage = `Error during web search for query "${params.query}": ${getErrorMessage(
        error,
      )}`;
      console.error(errorMessage, error);
      return {
        llmContent: `Error: ${errorMessage}`,