🎯 Feature: Customizable Model Training and Tool Output Management (#981)

2025-12-20 16:57:46 +00:00 · 2025-11-07 17:28:16 +08:00
parent 21fba6eb89
commit c3d427730e
32 changed files with 795 additions and 607 deletions
--- a/packages/core/src/tools/glob.test.ts
+++ b/packages/core/src/tools/glob.test.ts
@@ -37,6 +37,7 @@ describe('GlobTool', () => {
    getFileExclusions: () => ({
      getGlobExcludes: () => [],
    }),
+    getTruncateToolOutputLines: () => 1000,
  } as unknown as Config;

  beforeEach(async () => {
--- a/packages/core/src/tools/glob.ts
+++ b/packages/core/src/tools/glob.ts
@@ -161,11 +161,15 @@ class GlobToolInvocation extends BaseToolInvocation<
      );

      const totalFileCount = sortedEntries.length;
-      const truncated = totalFileCount > MAX_FILE_COUNT;
+      const fileLimit = Math.min(
+        MAX_FILE_COUNT,
+        this.config.getTruncateToolOutputLines(),
+      );
+      const truncated = totalFileCount > fileLimit;

-      // Limit to MAX_FILE_COUNT if needed
+      // Limit to fileLimit if needed
      const entriesToShow = truncated
-        ? sortedEntries.slice(0, MAX_FILE_COUNT)
+        ? sortedEntries.slice(0, fileLimit)
        : sortedEntries;

      const sortedAbsolutePaths = entriesToShow.map((entry) =>
@@ -178,7 +182,7 @@ class GlobToolInvocation extends BaseToolInvocation<

      // Add truncation notice if needed
      if (truncated) {
-        const omittedFiles = totalFileCount - MAX_FILE_COUNT;
+        const omittedFiles = totalFileCount - fileLimit;
        const fileTerm = omittedFiles === 1 ? 'file' : 'files';
        resultMessage += `\n---\n[${omittedFiles} ${fileTerm} truncated] ...`;
      }
--- a/packages/core/src/tools/grep.test.ts
+++ b/packages/core/src/tools/grep.test.ts
@@ -43,6 +43,8 @@ describe('GrepTool', () => {
    getFileExclusions: () => ({
      getGlobExcludes: () => [],
    }),
+    getTruncateToolOutputThreshold: () => 25000,
+    getTruncateToolOutputLines: () => 1000,
  } as unknown as Config;

  beforeEach(async () => {
@@ -282,6 +284,8 @@ describe('GrepTool', () => {
        getFileExclusions: () => ({
          getGlobExcludes: () => [],
        }),
+        getTruncateToolOutputThreshold: () => 25000,
+        getTruncateToolOutputLines: () => 1000,
      } as unknown as Config;

      const multiDirGrepTool = new GrepTool(multiDirConfig);
--- a/packages/core/src/tools/grep.ts
+++ b/packages/core/src/tools/grep.ts
@@ -19,8 +19,6 @@ import type { Config } from '../config/config.js';
 import type { FileExclusions } from '../utils/ignorePatterns.js';
 import { ToolErrorType } from './tool-error.js';

-const MAX_LLM_CONTENT_LENGTH = 20_000;
-
 // --- Interfaces ---

 /**
@@ -103,14 +101,17 @@ class GrepToolInvocation extends BaseToolInvocation<
        return { llmContent: noMatchMsg, returnDisplay: `No matches found` };
      }

+      const charLimit = this.config.getTruncateToolOutputThreshold();
+      const lineLimit = Math.min(
+        this.config.getTruncateToolOutputLines(),
+        this.params.limit ?? Number.POSITIVE_INFINITY,
+      );
+
      // Apply line limit if specified
      let truncatedByLineLimit = false;
      let matchesToInclude = rawMatches;
-      if (
-        this.params.limit !== undefined &&
-        rawMatches.length > this.params.limit
-      ) {
-        matchesToInclude = rawMatches.slice(0, this.params.limit);
+      if (rawMatches.length > lineLimit) {
+        matchesToInclude = rawMatches.slice(0, lineLimit);
        truncatedByLineLimit = true;
      }

@@ -147,8 +148,8 @@ class GrepToolInvocation extends BaseToolInvocation<

      // Apply character limit as safety net
      let truncatedByCharLimit = false;
-      if (grepOutput.length > MAX_LLM_CONTENT_LENGTH) {
-        grepOutput = grepOutput.slice(0, MAX_LLM_CONTENT_LENGTH) + '...';
+      if (Number.isFinite(charLimit) && grepOutput.length > charLimit) {
+        grepOutput = grepOutput.slice(0, charLimit) + '...';
        truncatedByCharLimit = true;
      }

--- a/packages/core/src/tools/read-file.test.ts
+++ b/packages/core/src/tools/read-file.test.ts
@@ -41,6 +41,8 @@ describe('ReadFileTool', () => {
      storage: {
        getProjectTempDir: () => path.join(tempRootDir, '.temp'),
      },
+      getTruncateToolOutputThreshold: () => 2500,
+      getTruncateToolOutputLines: () => 500,
    } as unknown as Config;
    tool = new ReadFileTool(mockConfigInstance);
  });
@@ -281,11 +283,9 @@ describe('ReadFileTool', () => {
      >;

      const result = await invocation.execute(abortSignal);
-      expect(result.llmContent).toContain(
-        'IMPORTANT: The file content has been truncated',
+      expect(result.returnDisplay).toContain(
+        'Read lines 1-2 of 3 from longlines.txt (truncated)',
      );
-      expect(result.llmContent).toContain('--- FILE CONTENT (truncated) ---');
-      expect(result.returnDisplay).toContain('some lines were shortened');
    });

    it('should handle image file and return appropriate content', async () => {
@@ -417,10 +417,7 @@ describe('ReadFileTool', () => {

      const result = await invocation.execute(abortSignal);
      expect(result.llmContent).toContain(
-        'IMPORTANT: The file content has been truncated',
-      );
-      expect(result.llmContent).toContain(
-        'Status: Showing lines 6-8 of 20 total lines',
+        'Showing lines 6-8 of 20 total lines',
      );
      expect(result.llmContent).toContain('Line 6');
      expect(result.llmContent).toContain('Line 7');
--- a/packages/core/src/tools/read-file.ts
+++ b/packages/core/src/tools/read-file.ts
@@ -67,8 +67,7 @@ class ReadFileToolInvocation extends BaseToolInvocation<
  async execute(): Promise<ToolResult> {
    const result = await processSingleFileContent(
      this.params.absolute_path,
-      this.config.getTargetDir(),
-      this.config.getFileSystemService(),
+      this.config,
      this.params.offset,
      this.params.limit,
    );
@@ -88,16 +87,7 @@ class ReadFileToolInvocation extends BaseToolInvocation<
    if (result.isTruncated) {
      const [start, end] = result.linesShown!;
      const total = result.originalLineCount!;
-      const nextOffset = this.params.offset
-        ? this.params.offset + end - start + 1
-        : end;
-      llmContent = `
-IMPORTANT: The file content has been truncated.
-Status: Showing lines ${start}-${end} of ${total} total lines.
-Action: To read more of the file, you can use the 'offset' and 'limit' parameters in a subsequent 'read_file' call. For example, to read the next section of the file, use offset: ${nextOffset}.
-
--- FILE CONTENT (truncated) ---
-${result.llmContent}`;
+      llmContent = `Showing lines ${start}-${end} of ${total} total lines.\n\n---\n\n${result.llmContent}`;
    } else {
      llmContent = result.llmContent || '';
    }
--- a/packages/core/src/tools/read-many-files.test.ts
+++ b/packages/core/src/tools/read-many-files.test.ts
@@ -88,6 +88,8 @@ describe('ReadManyFilesTool', () => {
        buildExcludePatterns: () => DEFAULT_FILE_EXCLUDES,
        getReadManyFilesExcludes: () => DEFAULT_FILE_EXCLUDES,
      }),
+      getTruncateToolOutputThreshold: () => 2500,
+      getTruncateToolOutputLines: () => 500,
    } as Partial<Config> as Config;
    tool = new ReadManyFilesTool(mockConfig);

@@ -500,6 +502,8 @@ describe('ReadManyFilesTool', () => {
          buildExcludePatterns: () => [],
          getReadManyFilesExcludes: () => [],
        }),
+        getTruncateToolOutputThreshold: () => 2500,
+        getTruncateToolOutputLines: () => 500,
      } as Partial<Config> as Config;
      tool = new ReadManyFilesTool(mockConfig);

@@ -552,15 +556,10 @@ describe('ReadManyFilesTool', () => {
        c.includes('large-file.txt'),
      );

-      expect(normalFileContent).not.toContain(
-        '[WARNING: This file was truncated.',
-      );
+      expect(normalFileContent).not.toContain('Showing lines');
      expect(truncatedFileContent).toContain(
-        "[WARNING: This file was truncated. To view the full content, use the 'read_file' tool on this specific file.]",
+        'Showing lines 1-250 of 2500 total lines.',
      );
-      // Check that the actual content is still there but truncated
-      expect(truncatedFileContent).toContain('L200');
-      expect(truncatedFileContent).not.toContain('L2400');
    });

    it('should read files with special characters like [] and () in the path', async () => {
--- a/packages/core/src/tools/read-many-files.ts
+++ b/packages/core/src/tools/read-many-files.ts
@@ -17,7 +17,6 @@ import {
  processSingleFileContent,
  DEFAULT_ENCODING,
  getSpecificMimeType,
-  DEFAULT_MAX_LINES_TEXT_FILE,
 } from '../utils/fileUtils.js';
 import type { PartListUnion } from '@google/genai';
 import {
@@ -278,8 +277,10 @@ ${finalExclusionPatternsForDescription
    }

    const sortedFiles = Array.from(filesToConsider).sort();
-    const file_line_limit =
-      DEFAULT_MAX_LINES_TEXT_FILE / Math.max(1, sortedFiles.length);
+    const truncateToolOutputLines = this.config.getTruncateToolOutputLines();
+    const file_line_limit = Number.isFinite(truncateToolOutputLines)
+      ? Math.floor(truncateToolOutputLines / Math.max(1, sortedFiles.length))
+      : undefined;

    const fileProcessingPromises = sortedFiles.map(
      async (filePath): Promise<FileProcessingResult> => {
@@ -316,8 +317,7 @@ ${finalExclusionPatternsForDescription
          // Use processSingleFileContent for all file types now
          const fileReadResult = await processSingleFileContent(
            filePath,
-            this.config.getTargetDir(),
-            this.config.getFileSystemService(),
+            this.config,
            0,
            file_line_limit,
          );
@@ -376,9 +376,12 @@ ${finalExclusionPatternsForDescription
            );
            let fileContentForLlm = '';
            if (fileReadResult.isTruncated) {
-              fileContentForLlm += `[WARNING: This file was truncated. To view the full content, use the 'read_file' tool on this specific file.]\n\n`;
+              const [start, end] = fileReadResult.linesShown!;
+              const total = fileReadResult.originalLineCount!;
+              fileContentForLlm = `Showing lines ${start}-${end} of ${total} total lines.\n---\n${fileReadResult.llmContent}`;
+            } else {
+              fileContentForLlm = fileReadResult.llmContent;
            }
-            fileContentForLlm += fileReadResult.llmContent;
            contentParts.push(`${separator}\n\n${fileContentForLlm}\n\n`);
          } else {
            // This is a Part for image/pdf, which we don't add the separator to.
--- a/packages/core/src/tools/ripGrep.test.ts
+++ b/packages/core/src/tools/ripGrep.test.ts
@@ -103,6 +103,8 @@ describe('RipGrepTool', () => {
    getWorkingDir: () => tempRootDir,
    getDebugMode: () => false,
    getUseBuiltinRipgrep: () => true,
+    getTruncateToolOutputThreshold: () => 25000,
+    getTruncateToolOutputLines: () => 1000,
  } as unknown as Config;

  beforeEach(async () => {
@@ -417,7 +419,7 @@ describe('RipGrepTool', () => {
    });

    it('should truncate llm content when exceeding maximum length', async () => {
-      const longMatch = 'fileA.txt:1:' + 'a'.repeat(25_000);
+      const longMatch = 'fileA.txt:1:' + 'a'.repeat(30_000);

      mockSpawn.mockImplementationOnce(
        createMockSpawn({
@@ -430,7 +432,7 @@ describe('RipGrepTool', () => {
      const invocation = grepTool.build(params);
      const result = await invocation.execute(abortSignal);

-      expect(String(result.llmContent).length).toBeLessThanOrEqual(21_000);
+      expect(String(result.llmContent).length).toBeLessThanOrEqual(26_000);
      expect(result.llmContent).toMatch(/\[\d+ lines? truncated\] \.\.\./);
      expect(result.returnDisplay).toContain('truncated');
    });
--- a/packages/core/src/tools/ripGrep.ts
+++ b/packages/core/src/tools/ripGrep.ts
@@ -19,8 +19,6 @@ import { SchemaValidator } from '../utils/schemaValidator.js';
 import type { FileFilteringOptions } from '../config/constants.js';
 import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js';

-const MAX_LLM_CONTENT_LENGTH = 20_000;
-
 /**
 * Parameters for the GrepTool (Simplified)
 */
@@ -97,43 +95,49 @@ class GrepToolInvocation extends BaseToolInvocation<
      // Build header early to calculate available space
      const header = `Found ${totalMatches} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}:\n---\n`;

+      const charLimit = this.config.getTruncateToolOutputThreshold();
+      const lineLimit = Math.min(
+        this.config.getTruncateToolOutputLines(),
+        this.params.limit ?? Number.POSITIVE_INFINITY,
+      );
+
      // Apply line limit first (if specified)
      let truncatedByLineLimit = false;
      let linesToInclude = allLines;
-      if (
-        this.params.limit !== undefined &&
-        allLines.length > this.params.limit
-      ) {
-        linesToInclude = allLines.slice(0, this.params.limit);
+      if (allLines.length > lineLimit) {
+        linesToInclude = allLines.slice(0, lineLimit);
        truncatedByLineLimit = true;
      }

      // Build output and track how many lines we include, respecting character limit
-      const parts: string[] = [];
-      let includedLines = 0;
+      let grepOutput = '';
      let truncatedByCharLimit = false;
-      let currentLength = 0;
+      let includedLines = 0;
+      if (Number.isFinite(charLimit)) {
+        const parts: string[] = [];
+        let currentLength = 0;

-      for (const line of linesToInclude) {
-        const sep = includedLines > 0 ? 1 : 0;
+        for (const line of linesToInclude) {
+          const sep = includedLines > 0 ? 1 : 0;
+          includedLines++;

-        includedLines++;
-
-        if (currentLength + line.length <= MAX_LLM_CONTENT_LENGTH) {
-          parts.push(line);
-          currentLength = currentLength + line.length + sep;
-        } else {
-          const remaining = Math.max(
-            MAX_LLM_CONTENT_LENGTH - currentLength - sep,
-            10,
-          );
-          parts.push(line.slice(0, remaining) + '...');
-          truncatedByCharLimit = true;
-          break;
+          const projectedLength = currentLength + line.length + sep;
+          if (projectedLength <= charLimit) {
+            parts.push(line);
+            currentLength = projectedLength;
+          } else {
+            const remaining = Math.max(charLimit - currentLength - sep, 10);
+            parts.push(line.slice(0, remaining) + '...');
+            truncatedByCharLimit = true;
+            break;
+          }
        }
-      }

-      const grepOutput = parts.join('\n');
+        grepOutput = parts.join('\n');
+      } else {
+        grepOutput = linesToInclude.join('\n');
+        includedLines = linesToInclude.length;
+      }

      // Build result
      let llmContent = header + grepOutput;
--- a/packages/core/src/tools/tool-names.ts
+++ b/packages/core/src/tools/tool-names.ts
@@ -21,4 +21,6 @@ export const ToolNames = {
  MEMORY: 'save_memory',
  TASK: 'task',
  EXIT_PLAN_MODE: 'exit_plan_mode',
+  WEB_FETCH: 'web_fetch',
+  WEB_SEARCH: 'web_search',
 } as const;
--- a/packages/core/src/tools/web-fetch.ts
+++ b/packages/core/src/tools/web-fetch.ts
@@ -23,6 +23,7 @@ import {
  ToolConfirmationOutcome,
 } from './tools.js';
 import { DEFAULT_QWEN_MODEL } from '../config/models.js';
+import { ToolNames } from './tool-names.js';

 const URL_FETCH_TIMEOUT_MS = 10000;
 const MAX_CONTENT_LENGTH = 100000;
@@ -190,7 +191,7 @@ export class WebFetchTool extends BaseDeclarativeTool<
  WebFetchToolParams,
  ToolResult
 > {
-  static readonly Name: string = 'web_fetch';
+  static readonly Name: string = ToolNames.WEB_FETCH;

  constructor(private readonly config: Config) {
    super(
--- a/packages/core/src/tools/web-search/index.ts
+++ b/packages/core/src/tools/web-search/index.ts
@@ -30,6 +30,7 @@ import type {
  WebSearchProviderConfig,
  DashScopeProviderConfig,
 } from './types.js';
+import { ToolNames } from '../tool-names.js';

 class WebSearchToolInvocation extends BaseToolInvocation<
  WebSearchToolParams,
@@ -274,7 +275,7 @@ export class WebSearchTool extends BaseDeclarativeTool<
  WebSearchToolParams,
  WebSearchToolResult
 > {
-  static readonly Name: string = 'web_search';
+  static readonly Name: string = ToolNames.WEB_SEARCH;

  constructor(private readonly config: Config) {
    super(