🎯 Feature: Customizable Model Training and Tool Output Management (#981)

This commit is contained in:
tanzhenxin
2025-11-07 17:28:16 +08:00
committed by GitHub
parent 21fba6eb89
commit c3d427730e
32 changed files with 795 additions and 607 deletions

View File

@@ -37,6 +37,7 @@ describe('GlobTool', () => {
getFileExclusions: () => ({
getGlobExcludes: () => [],
}),
getTruncateToolOutputLines: () => 1000,
} as unknown as Config;
beforeEach(async () => {

View File

@@ -161,11 +161,15 @@ class GlobToolInvocation extends BaseToolInvocation<
);
const totalFileCount = sortedEntries.length;
const truncated = totalFileCount > MAX_FILE_COUNT;
const fileLimit = Math.min(
MAX_FILE_COUNT,
this.config.getTruncateToolOutputLines(),
);
const truncated = totalFileCount > fileLimit;
// Limit to MAX_FILE_COUNT if needed
// Limit to fileLimit if needed
const entriesToShow = truncated
? sortedEntries.slice(0, MAX_FILE_COUNT)
? sortedEntries.slice(0, fileLimit)
: sortedEntries;
const sortedAbsolutePaths = entriesToShow.map((entry) =>
@@ -178,7 +182,7 @@ class GlobToolInvocation extends BaseToolInvocation<
// Add truncation notice if needed
if (truncated) {
const omittedFiles = totalFileCount - MAX_FILE_COUNT;
const omittedFiles = totalFileCount - fileLimit;
const fileTerm = omittedFiles === 1 ? 'file' : 'files';
resultMessage += `\n---\n[${omittedFiles} ${fileTerm} truncated] ...`;
}

View File

@@ -43,6 +43,8 @@ describe('GrepTool', () => {
getFileExclusions: () => ({
getGlobExcludes: () => [],
}),
getTruncateToolOutputThreshold: () => 25000,
getTruncateToolOutputLines: () => 1000,
} as unknown as Config;
beforeEach(async () => {
@@ -282,6 +284,8 @@ describe('GrepTool', () => {
getFileExclusions: () => ({
getGlobExcludes: () => [],
}),
getTruncateToolOutputThreshold: () => 25000,
getTruncateToolOutputLines: () => 1000,
} as unknown as Config;
const multiDirGrepTool = new GrepTool(multiDirConfig);

View File

@@ -19,8 +19,6 @@ import type { Config } from '../config/config.js';
import type { FileExclusions } from '../utils/ignorePatterns.js';
import { ToolErrorType } from './tool-error.js';
const MAX_LLM_CONTENT_LENGTH = 20_000;
// --- Interfaces ---
/**
@@ -103,14 +101,17 @@ class GrepToolInvocation extends BaseToolInvocation<
return { llmContent: noMatchMsg, returnDisplay: `No matches found` };
}
const charLimit = this.config.getTruncateToolOutputThreshold();
const lineLimit = Math.min(
this.config.getTruncateToolOutputLines(),
this.params.limit ?? Number.POSITIVE_INFINITY,
);
// Apply line limit if specified
let truncatedByLineLimit = false;
let matchesToInclude = rawMatches;
if (
this.params.limit !== undefined &&
rawMatches.length > this.params.limit
) {
matchesToInclude = rawMatches.slice(0, this.params.limit);
if (rawMatches.length > lineLimit) {
matchesToInclude = rawMatches.slice(0, lineLimit);
truncatedByLineLimit = true;
}
@@ -147,8 +148,8 @@ class GrepToolInvocation extends BaseToolInvocation<
// Apply character limit as safety net
let truncatedByCharLimit = false;
if (grepOutput.length > MAX_LLM_CONTENT_LENGTH) {
grepOutput = grepOutput.slice(0, MAX_LLM_CONTENT_LENGTH) + '...';
if (Number.isFinite(charLimit) && grepOutput.length > charLimit) {
grepOutput = grepOutput.slice(0, charLimit) + '...';
truncatedByCharLimit = true;
}

View File

@@ -41,6 +41,8 @@ describe('ReadFileTool', () => {
storage: {
getProjectTempDir: () => path.join(tempRootDir, '.temp'),
},
getTruncateToolOutputThreshold: () => 2500,
getTruncateToolOutputLines: () => 500,
} as unknown as Config;
tool = new ReadFileTool(mockConfigInstance);
});
@@ -281,11 +283,9 @@ describe('ReadFileTool', () => {
>;
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain(
'IMPORTANT: The file content has been truncated',
expect(result.returnDisplay).toContain(
'Read lines 1-2 of 3 from longlines.txt (truncated)',
);
expect(result.llmContent).toContain('--- FILE CONTENT (truncated) ---');
expect(result.returnDisplay).toContain('some lines were shortened');
});
it('should handle image file and return appropriate content', async () => {
@@ -417,10 +417,7 @@ describe('ReadFileTool', () => {
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain(
'IMPORTANT: The file content has been truncated',
);
expect(result.llmContent).toContain(
'Status: Showing lines 6-8 of 20 total lines',
'Showing lines 6-8 of 20 total lines',
);
expect(result.llmContent).toContain('Line 6');
expect(result.llmContent).toContain('Line 7');

View File

@@ -67,8 +67,7 @@ class ReadFileToolInvocation extends BaseToolInvocation<
async execute(): Promise<ToolResult> {
const result = await processSingleFileContent(
this.params.absolute_path,
this.config.getTargetDir(),
this.config.getFileSystemService(),
this.config,
this.params.offset,
this.params.limit,
);
@@ -88,16 +87,7 @@ class ReadFileToolInvocation extends BaseToolInvocation<
if (result.isTruncated) {
const [start, end] = result.linesShown!;
const total = result.originalLineCount!;
const nextOffset = this.params.offset
? this.params.offset + end - start + 1
: end;
llmContent = `
IMPORTANT: The file content has been truncated.
Status: Showing lines ${start}-${end} of ${total} total lines.
Action: To read more of the file, you can use the 'offset' and 'limit' parameters in a subsequent 'read_file' call. For example, to read the next section of the file, use offset: ${nextOffset}.
--- FILE CONTENT (truncated) ---
${result.llmContent}`;
llmContent = `Showing lines ${start}-${end} of ${total} total lines.\n\n---\n\n${result.llmContent}`;
} else {
llmContent = result.llmContent || '';
}

View File

@@ -88,6 +88,8 @@ describe('ReadManyFilesTool', () => {
buildExcludePatterns: () => DEFAULT_FILE_EXCLUDES,
getReadManyFilesExcludes: () => DEFAULT_FILE_EXCLUDES,
}),
getTruncateToolOutputThreshold: () => 2500,
getTruncateToolOutputLines: () => 500,
} as Partial<Config> as Config;
tool = new ReadManyFilesTool(mockConfig);
@@ -500,6 +502,8 @@ describe('ReadManyFilesTool', () => {
buildExcludePatterns: () => [],
getReadManyFilesExcludes: () => [],
}),
getTruncateToolOutputThreshold: () => 2500,
getTruncateToolOutputLines: () => 500,
} as Partial<Config> as Config;
tool = new ReadManyFilesTool(mockConfig);
@@ -552,15 +556,10 @@ describe('ReadManyFilesTool', () => {
c.includes('large-file.txt'),
);
expect(normalFileContent).not.toContain(
'[WARNING: This file was truncated.',
);
expect(normalFileContent).not.toContain('Showing lines');
expect(truncatedFileContent).toContain(
"[WARNING: This file was truncated. To view the full content, use the 'read_file' tool on this specific file.]",
'Showing lines 1-250 of 2500 total lines.',
);
// Check that the actual content is still there but truncated
expect(truncatedFileContent).toContain('L200');
expect(truncatedFileContent).not.toContain('L2400');
});
it('should read files with special characters like [] and () in the path', async () => {

View File

@@ -17,7 +17,6 @@ import {
processSingleFileContent,
DEFAULT_ENCODING,
getSpecificMimeType,
DEFAULT_MAX_LINES_TEXT_FILE,
} from '../utils/fileUtils.js';
import type { PartListUnion } from '@google/genai';
import {
@@ -278,8 +277,10 @@ ${finalExclusionPatternsForDescription
}
const sortedFiles = Array.from(filesToConsider).sort();
const file_line_limit =
DEFAULT_MAX_LINES_TEXT_FILE / Math.max(1, sortedFiles.length);
const truncateToolOutputLines = this.config.getTruncateToolOutputLines();
const file_line_limit = Number.isFinite(truncateToolOutputLines)
? Math.floor(truncateToolOutputLines / Math.max(1, sortedFiles.length))
: undefined;
const fileProcessingPromises = sortedFiles.map(
async (filePath): Promise<FileProcessingResult> => {
@@ -316,8 +317,7 @@ ${finalExclusionPatternsForDescription
// Use processSingleFileContent for all file types now
const fileReadResult = await processSingleFileContent(
filePath,
this.config.getTargetDir(),
this.config.getFileSystemService(),
this.config,
0,
file_line_limit,
);
@@ -376,9 +376,12 @@ ${finalExclusionPatternsForDescription
);
let fileContentForLlm = '';
if (fileReadResult.isTruncated) {
fileContentForLlm += `[WARNING: This file was truncated. To view the full content, use the 'read_file' tool on this specific file.]\n\n`;
const [start, end] = fileReadResult.linesShown!;
const total = fileReadResult.originalLineCount!;
fileContentForLlm = `Showing lines ${start}-${end} of ${total} total lines.\n---\n${fileReadResult.llmContent}`;
} else {
fileContentForLlm = fileReadResult.llmContent;
}
fileContentForLlm += fileReadResult.llmContent;
contentParts.push(`${separator}\n\n${fileContentForLlm}\n\n`);
} else {
// This is a Part for image/pdf, which we don't add the separator to.

View File

@@ -103,6 +103,8 @@ describe('RipGrepTool', () => {
getWorkingDir: () => tempRootDir,
getDebugMode: () => false,
getUseBuiltinRipgrep: () => true,
getTruncateToolOutputThreshold: () => 25000,
getTruncateToolOutputLines: () => 1000,
} as unknown as Config;
beforeEach(async () => {
@@ -417,7 +419,7 @@ describe('RipGrepTool', () => {
});
it('should truncate llm content when exceeding maximum length', async () => {
const longMatch = 'fileA.txt:1:' + 'a'.repeat(25_000);
const longMatch = 'fileA.txt:1:' + 'a'.repeat(30_000);
mockSpawn.mockImplementationOnce(
createMockSpawn({
@@ -430,7 +432,7 @@ describe('RipGrepTool', () => {
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(String(result.llmContent).length).toBeLessThanOrEqual(21_000);
expect(String(result.llmContent).length).toBeLessThanOrEqual(26_000);
expect(result.llmContent).toMatch(/\[\d+ lines? truncated\] \.\.\./);
expect(result.returnDisplay).toContain('truncated');
});

View File

@@ -19,8 +19,6 @@ import { SchemaValidator } from '../utils/schemaValidator.js';
import type { FileFilteringOptions } from '../config/constants.js';
import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js';
const MAX_LLM_CONTENT_LENGTH = 20_000;
/**
* Parameters for the GrepTool (Simplified)
*/
@@ -97,43 +95,49 @@ class GrepToolInvocation extends BaseToolInvocation<
// Build header early to calculate available space
const header = `Found ${totalMatches} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}:\n---\n`;
const charLimit = this.config.getTruncateToolOutputThreshold();
const lineLimit = Math.min(
this.config.getTruncateToolOutputLines(),
this.params.limit ?? Number.POSITIVE_INFINITY,
);
// Apply line limit first (if specified)
let truncatedByLineLimit = false;
let linesToInclude = allLines;
if (
this.params.limit !== undefined &&
allLines.length > this.params.limit
) {
linesToInclude = allLines.slice(0, this.params.limit);
if (allLines.length > lineLimit) {
linesToInclude = allLines.slice(0, lineLimit);
truncatedByLineLimit = true;
}
// Build output and track how many lines we include, respecting character limit
const parts: string[] = [];
let includedLines = 0;
let grepOutput = '';
let truncatedByCharLimit = false;
let currentLength = 0;
let includedLines = 0;
if (Number.isFinite(charLimit)) {
const parts: string[] = [];
let currentLength = 0;
for (const line of linesToInclude) {
const sep = includedLines > 0 ? 1 : 0;
for (const line of linesToInclude) {
const sep = includedLines > 0 ? 1 : 0;
includedLines++;
includedLines++;
if (currentLength + line.length <= MAX_LLM_CONTENT_LENGTH) {
parts.push(line);
currentLength = currentLength + line.length + sep;
} else {
const remaining = Math.max(
MAX_LLM_CONTENT_LENGTH - currentLength - sep,
10,
);
parts.push(line.slice(0, remaining) + '...');
truncatedByCharLimit = true;
break;
const projectedLength = currentLength + line.length + sep;
if (projectedLength <= charLimit) {
parts.push(line);
currentLength = projectedLength;
} else {
const remaining = Math.max(charLimit - currentLength - sep, 10);
parts.push(line.slice(0, remaining) + '...');
truncatedByCharLimit = true;
break;
}
}
}
const grepOutput = parts.join('\n');
grepOutput = parts.join('\n');
} else {
grepOutput = linesToInclude.join('\n');
includedLines = linesToInclude.length;
}
// Build result
let llmContent = header + grepOutput;

View File

@@ -21,4 +21,6 @@ export const ToolNames = {
MEMORY: 'save_memory',
TASK: 'task',
EXIT_PLAN_MODE: 'exit_plan_mode',
WEB_FETCH: 'web_fetch',
WEB_SEARCH: 'web_search',
} as const;

View File

@@ -23,6 +23,7 @@ import {
ToolConfirmationOutcome,
} from './tools.js';
import { DEFAULT_QWEN_MODEL } from '../config/models.js';
import { ToolNames } from './tool-names.js';
const URL_FETCH_TIMEOUT_MS = 10000;
const MAX_CONTENT_LENGTH = 100000;
@@ -190,7 +191,7 @@ export class WebFetchTool extends BaseDeclarativeTool<
WebFetchToolParams,
ToolResult
> {
static readonly Name: string = 'web_fetch';
static readonly Name: string = ToolNames.WEB_FETCH;
constructor(private readonly config: Config) {
super(

View File

@@ -30,6 +30,7 @@ import type {
WebSearchProviderConfig,
DashScopeProviderConfig,
} from './types.js';
import { ToolNames } from '../tool-names.js';
class WebSearchToolInvocation extends BaseToolInvocation<
WebSearchToolParams,
@@ -274,7 +275,7 @@ export class WebSearchTool extends BaseDeclarativeTool<
WebSearchToolParams,
WebSearchToolResult
> {
static readonly Name: string = 'web_search';
static readonly Name: string = ToolNames.WEB_SEARCH;
constructor(private readonly config: Config) {
super(