🎯 Feature: Customizable Model Training and Tool Output Management (#981)

2025-12-20 16:57:46 +00:00 · 2025-11-07 17:28:16 +08:00
parent 21fba6eb89
commit c3d427730e
32 changed files with 795 additions and 607 deletions
--- a/packages/core/src/utils/environmentContext.test.ts
+++ b/packages/core/src/utils/environmentContext.test.ts
@@ -13,9 +13,11 @@ import {
  afterEach,
  type Mock,
 } from 'vitest';
+import type { Content } from '@google/genai';
 import {
  getEnvironmentContext,
  getDirectoryContextString,
+  getInitialChatHistory,
 } from './environmentContext.js';
 import type { Config } from '../config/config.js';
 import { getFolderStructure } from './getFolderStructure.js';
@@ -213,3 +215,102 @@ describe('getEnvironmentContext', () => {
    expect(parts[1].text).toBe('\n--- Error reading full file context ---');
  });
 });
+
+describe('getInitialChatHistory', () => {
+  let mockConfig: Partial<Config>;
+
+  beforeEach(() => {
+    vi.mocked(getFolderStructure).mockResolvedValue('Mock Folder Structure');
+    mockConfig = {
+      getSkipStartupContext: vi.fn().mockReturnValue(false),
+      getWorkspaceContext: vi.fn().mockReturnValue({
+        getDirectories: vi.fn().mockReturnValue(['/test/dir']),
+      }),
+      getFileService: vi.fn(),
+      getFullContext: vi.fn().mockReturnValue(false),
+      getToolRegistry: vi.fn().mockReturnValue({ getTool: vi.fn() }),
+    };
+  });
+
+  afterEach(() => {
+    vi.clearAllMocks();
+    vi.restoreAllMocks();
+  });
+
+  it('includes startup context when skipStartupContext is false', async () => {
+    const history = await getInitialChatHistory(mockConfig as Config);
+
+    expect(mockConfig.getSkipStartupContext).toHaveBeenCalled();
+    expect(history).toHaveLength(2);
+    expect(history).toEqual([
+      expect.objectContaining({
+        role: 'user',
+        parts: [
+          expect.objectContaining({
+            text: expect.stringContaining(
+              "I'm currently working in the directory",
+            ),
+          }),
+        ],
+      }),
+      {
+        role: 'model',
+        parts: [{ text: 'Got it. Thanks for the context!' }],
+      },
+    ]);
+  });
+
+  it('returns only extra history when skipStartupContext is true', async () => {
+    mockConfig.getSkipStartupContext = vi.fn().mockReturnValue(true);
+    mockConfig.getWorkspaceContext = vi.fn(() => {
+      throw new Error(
+        'getWorkspaceContext should not be called when skipping startup context',
+      );
+    });
+    mockConfig.getFullContext = vi.fn(() => {
+      throw new Error(
+        'getFullContext should not be called when skipping startup context',
+      );
+    });
+    mockConfig.getToolRegistry = vi.fn(() => {
+      throw new Error(
+        'getToolRegistry should not be called when skipping startup context',
+      );
+    });
+    const extraHistory: Content[] = [
+      { role: 'user', parts: [{ text: 'custom context' }] },
+    ];
+
+    const history = await getInitialChatHistory(
+      mockConfig as Config,
+      extraHistory,
+    );
+
+    expect(mockConfig.getSkipStartupContext).toHaveBeenCalled();
+    expect(history).toEqual(extraHistory);
+    expect(history).not.toBe(extraHistory);
+  });
+
+  it('returns empty history when skipping startup context without extras', async () => {
+    mockConfig.getSkipStartupContext = vi.fn().mockReturnValue(true);
+    mockConfig.getWorkspaceContext = vi.fn(() => {
+      throw new Error(
+        'getWorkspaceContext should not be called when skipping startup context',
+      );
+    });
+    mockConfig.getFullContext = vi.fn(() => {
+      throw new Error(
+        'getFullContext should not be called when skipping startup context',
+      );
+    });
+    mockConfig.getToolRegistry = vi.fn(() => {
+      throw new Error(
+        'getToolRegistry should not be called when skipping startup context',
+      );
+    });
+
+    const history = await getInitialChatHistory(mockConfig as Config);
+
+    expect(history).toEqual([]);
+  });
+});
--- a/packages/core/src/utils/environmentContext.ts
+++ b/packages/core/src/utils/environmentContext.ts
@@ -112,6 +112,10 @@ export async function getInitialChatHistory(
  config: Config,
  extraHistory?: Content[],
 ): Promise<Content[]> {
+  if (config.getSkipStartupContext()) {
+    return extraHistory ? [...extraHistory] : [];
+  }
+
  const envParts = await getEnvironmentContext(config);
  const envContextString = envParts.map((part) => part.text || '').join('\n\n');

--- a/packages/core/src/utils/fileUtils.test.ts
+++ b/packages/core/src/utils/fileUtils.test.ts
@@ -30,7 +30,7 @@ import {
  readFileWithEncoding,
  fileExists,
 } from './fileUtils.js';
-import { StandardFileSystemService } from '../services/fileSystemService.js';
+import type { Config } from '../config/config.js';

 vi.mock('mime/lite', () => ({
  default: { getType: vi.fn() },
@@ -50,6 +50,12 @@ describe('fileUtils', () => {
  let nonexistentFilePath: string;
  let directoryPath: string;

+  const mockConfig = {
+    getTruncateToolOutputThreshold: () => 2500,
+    getTruncateToolOutputLines: () => 500,
+    getTargetDir: () => tempRootDir,
+  } as unknown as Config;
+
  beforeEach(() => {
    vi.resetAllMocks(); // Reset all mocks, including mime.getType

@@ -664,8 +670,7 @@ describe('fileUtils', () => {
      actualNodeFs.writeFileSync(testTextFilePath, content);
      const result = await processSingleFileContent(
        testTextFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
      );
      expect(result.llmContent).toBe(content);
      expect(result.returnDisplay).toBe('');
@@ -675,8 +680,7 @@ describe('fileUtils', () => {
    it('should handle file not found', async () => {
      const result = await processSingleFileContent(
        nonexistentFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
      );
      expect(result.error).toContain('File not found');
      expect(result.returnDisplay).toContain('File not found');
@@ -689,8 +693,7 @@ describe('fileUtils', () => {

      const result = await processSingleFileContent(
        testTextFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
      );
      expect(result.error).toContain('Simulated read error');
      expect(result.returnDisplay).toContain('Simulated read error');
@@ -704,8 +707,7 @@ describe('fileUtils', () => {

      const result = await processSingleFileContent(
        testImageFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
      );
      expect(result.error).toContain('Simulated image read error');
      expect(result.returnDisplay).toContain('Simulated image read error');
@@ -717,8 +719,7 @@ describe('fileUtils', () => {
      mockMimeGetType.mockReturnValue('image/png');
      const result = await processSingleFileContent(
        testImageFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
      );
      expect(
        (result.llmContent as { inlineData: unknown }).inlineData,
@@ -739,8 +740,7 @@ describe('fileUtils', () => {
      mockMimeGetType.mockReturnValue('application/pdf');
      const result = await processSingleFileContent(
        testPdfFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
      );
      expect(
        (result.llmContent as { inlineData: unknown }).inlineData,
@@ -768,8 +768,7 @@ describe('fileUtils', () => {

      const result = await processSingleFileContent(
        testSvgFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
      );

      expect(result.llmContent).toBe(svgContent);
@@ -786,8 +785,7 @@ describe('fileUtils', () => {

      const result = await processSingleFileContent(
        testBinaryFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
      );
      expect(result.llmContent).toContain(
        'Cannot display content of binary file',
@@ -796,11 +794,7 @@ describe('fileUtils', () => {
    });

    it('should handle path being a directory', async () => {
-      const result = await processSingleFileContent(
-        directoryPath,
-        tempRootDir,
-        new StandardFileSystemService(),
-      );
+      const result = await processSingleFileContent(directoryPath, mockConfig);
      expect(result.error).toContain('Path is a directory');
      expect(result.returnDisplay).toContain('Path is a directory');
    });
@@ -811,8 +805,7 @@ describe('fileUtils', () => {

      const result = await processSingleFileContent(
        testTextFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
        5,
        5,
      ); // Read lines 6-10
@@ -832,8 +825,7 @@ describe('fileUtils', () => {
      // Read from line 11 to 20. The start is not 0, so it's truncated.
      const result = await processSingleFileContent(
        testTextFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
        10,
        10,
      );
@@ -852,8 +844,7 @@ describe('fileUtils', () => {

      const result = await processSingleFileContent(
        testTextFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
        0,
        10,
      );
@@ -875,17 +866,16 @@ describe('fileUtils', () => {

      const result = await processSingleFileContent(
        testTextFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
      );

      expect(result.llmContent).toContain('Short line');
      expect(result.llmContent).toContain(
        longLine.substring(0, 2000) + '... [truncated]',
      );
-      expect(result.llmContent).toContain('Another short line');
+      expect(result.llmContent).not.toContain('Another short line');
      expect(result.returnDisplay).toBe(
-        'Read all 3 lines from test.txt (some lines were shortened)',
+        'Read lines 1-2 of 3 from test.txt (truncated)',
      );
      expect(result.isTruncated).toBe(true);
    });
@@ -897,8 +887,7 @@ describe('fileUtils', () => {
      // Read 5 lines, but there are 11 total
      const result = await processSingleFileContent(
        testTextFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
        0,
        5,
      );
@@ -916,15 +905,14 @@ describe('fileUtils', () => {
      // Read all 11 lines, including the long one
      const result = await processSingleFileContent(
        testTextFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
        0,
        11,
      );

      expect(result.isTruncated).toBe(true);
      expect(result.returnDisplay).toBe(
-        'Read all 11 lines from test.txt (some lines were shortened)',
+        'Read lines 1-11 of 11 from test.txt (truncated)',
      );
    });

@@ -942,14 +930,13 @@ describe('fileUtils', () => {
      // Read 10 lines out of 20, including the long line
      const result = await processSingleFileContent(
        testTextFilePath,
-        tempRootDir,
-        new StandardFileSystemService(),
+        mockConfig,
        0,
        10,
      );
      expect(result.isTruncated).toBe(true);
      expect(result.returnDisplay).toBe(
-        'Read lines 1-10 of 20 from test.txt (some lines were shortened)',
+        'Read lines 1-5 of 20 from test.txt (truncated)',
      );
    });

@@ -966,8 +953,7 @@ describe('fileUtils', () => {
      try {
        const result = await processSingleFileContent(
          testTextFilePath,
-          tempRootDir,
-          new StandardFileSystemService(),
+          mockConfig,
        );

        expect(result.error).toContain('File size exceeds the 20MB limit');
--- a/packages/core/src/utils/fileUtils.ts
+++ b/packages/core/src/utils/fileUtils.ts
@@ -9,13 +9,9 @@ import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import type { PartUnion } from '@google/genai';
 import mime from 'mime/lite';
-import type { FileSystemService } from '../services/fileSystemService.js';
 import { ToolErrorType } from '../tools/tool-error.js';
 import { BINARY_EXTENSIONS } from './ignorePatterns.js';
-
-// Constants for text file processing
-export const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
-const MAX_LINE_LENGTH_TEXT_FILE = 2000;
+import type { Config } from '../config/config.js';

 // Default values for encoding and separator format
 export const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
@@ -306,18 +302,18 @@ export interface ProcessedFileReadResult {
 /**
 * Reads and processes a single file, handling text, images, and PDFs.
 * @param filePath Absolute path to the file.
- * @param rootDirectory Absolute path to the project root for relative path display.
+ * @param config Config instance for truncation settings.
 * @param offset Optional offset for text files (0-based line number).
 * @param limit Optional limit for text files (number of lines to read).
 * @returns ProcessedFileReadResult object.
 */
 export async function processSingleFileContent(
  filePath: string,
-  rootDirectory: string,
-  fileSystemService: FileSystemService,
+  config: Config,
  offset?: number,
  limit?: number,
 ): Promise<ProcessedFileReadResult> {
+  const rootDirectory = config.getTargetDir();
  try {
    if (!fs.existsSync(filePath)) {
      // Sync check is acceptable before async read
@@ -379,45 +375,76 @@ export async function processSingleFileContent(
      case 'text': {
        // Use BOM-aware reader to avoid leaving a BOM character in content and to support UTF-16/32 transparently
        const content = await readFileWithEncoding(filePath);
-        const lines = content.split('\n');
+        const lines = content.split('\n').map((line) => line.trimEnd());
        const originalLineCount = lines.length;

        const startLine = offset || 0;
-        const effectiveLimit =
-          limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit;
+        const configLineLimit = config.getTruncateToolOutputLines();
+        const configCharLimit = config.getTruncateToolOutputThreshold();
+        const effectiveLimit = limit === undefined ? configLineLimit : limit;
+
        // Ensure endLine does not exceed originalLineCount
        const endLine = Math.min(startLine + effectiveLimit, originalLineCount);
        // Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high
        const actualStartLine = Math.min(startLine, originalLineCount);
        const selectedLines = lines.slice(actualStartLine, endLine);

-        let linesWereTruncatedInLength = false;
-        const formattedLines = selectedLines.map((line) => {
-          if (line.length > MAX_LINE_LENGTH_TEXT_FILE) {
-            linesWereTruncatedInLength = true;
-            return (
-              line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]'
-            );
+        // Apply character limit truncation
+        let llmContent = '';
+        let contentLengthTruncated = false;
+        let linesIncluded = 0;
+
+        if (Number.isFinite(configCharLimit)) {
+          const formattedLines: string[] = [];
+          let currentLength = 0;
+
+          for (const line of selectedLines) {
+            const sep = linesIncluded > 0 ? 1 : 0; // newline separator
+            linesIncluded++;
+
+            const projectedLength = currentLength + line.length + sep;
+            if (projectedLength <= configCharLimit) {
+              formattedLines.push(line);
+              currentLength = projectedLength;
+            } else {
+              // Truncate the current line to fit
+              const remaining = Math.max(
+                configCharLimit - currentLength - sep,
+                10,
+              );
+              formattedLines.push(
+                line.substring(0, remaining) + '... [truncated]',
+              );
+              contentLengthTruncated = true;
+              break;
+            }
          }
-          return line;
-        });
+
+          llmContent = formattedLines.join('\n');
+        } else {
+          // No character limit, use all selected lines
+          llmContent = selectedLines.join('\n');
+          linesIncluded = selectedLines.length;
+        }
+
+        // Calculate actual end line shown
+        const actualEndLine = contentLengthTruncated
+          ? actualStartLine + linesIncluded
+          : endLine;

        const contentRangeTruncated =
-          startLine > 0 || endLine < originalLineCount;
-        const isTruncated = contentRangeTruncated || linesWereTruncatedInLength;
-        const llmContent = formattedLines.join('\n');
+          startLine > 0 || actualEndLine < originalLineCount;
+        const isTruncated = contentRangeTruncated || contentLengthTruncated;

        // By default, return nothing to streamline the common case of a successful read_file.
        let returnDisplay = '';
-        if (contentRangeTruncated) {
+        if (isTruncated) {
          returnDisplay = `Read lines ${
            actualStartLine + 1
-          }-${endLine} of ${originalLineCount} from ${relativePathForDisplay}`;
-          if (linesWereTruncatedInLength) {
-            returnDisplay += ' (some lines were shortened)';
+          }-${actualEndLine} of ${originalLineCount} from ${relativePathForDisplay}`;
+          if (contentLengthTruncated) {
+            returnDisplay += ' (truncated)';
          }
-        } else if (linesWereTruncatedInLength) {
-          returnDisplay = `Read all ${originalLineCount} lines from ${relativePathForDisplay} (some lines were shortened)`;
        }

        return {
@@ -425,7 +452,7 @@ export async function processSingleFileContent(
          returnDisplay,
          isTruncated,
          originalLineCount,
-          linesShown: [actualStartLine + 1, endLine],
+          linesShown: [actualStartLine + 1, actualEndLine],
        };
      }
      case 'image':
--- a/packages/core/src/utils/pathReader.test.ts
+++ b/packages/core/src/utils/pathReader.test.ts
@@ -29,6 +29,8 @@ const createMockConfig = (
    getTargetDir: () => cwd,
    getFileSystemService: () => fileSystemService,
    getFileService: () => mockFileService,
+    getTruncateToolOutputThreshold: () => 2500,
+    getTruncateToolOutputLines: () => 500,
  } as unknown as Config;
 };

--- a/packages/core/src/utils/pathReader.ts
+++ b/packages/core/src/utils/pathReader.ts
@@ -83,11 +83,7 @@ export async function readPathFromWorkspace(
    for (const filePath of finalFiles) {
      const relativePathForDisplay = path.relative(absolutePath, filePath);
      allParts.push({ text: `--- ${relativePathForDisplay} ---\n` });
-      const result = await processSingleFileContent(
-        filePath,
-        config.getTargetDir(),
-        config.getFileSystemService(),
-      );
+      const result = await processSingleFileContent(filePath, config);
      allParts.push(result.llmContent);
      allParts.push({ text: '\n' }); // Add a newline for separation
    }
@@ -108,11 +104,7 @@ export async function readPathFromWorkspace(
    }

    // It's a single file, process it directly.
-    const result = await processSingleFileContent(
-      absolutePath,
-      config.getTargetDir(),
-      config.getFileSystemService(),
-    );
+    const result = await processSingleFileContent(absolutePath, config);
    return [result.llmContent];
  }
 }