🎯 Feature: Customizable Model Training and Tool Output Management (#981)

This commit is contained in:
tanzhenxin
2025-11-07 17:28:16 +08:00
committed by GitHub
parent 21fba6eb89
commit c3d427730e
32 changed files with 795 additions and 607 deletions

View File

@@ -13,9 +13,11 @@ import {
afterEach,
type Mock,
} from 'vitest';
import type { Content } from '@google/genai';
import {
getEnvironmentContext,
getDirectoryContextString,
getInitialChatHistory,
} from './environmentContext.js';
import type { Config } from '../config/config.js';
import { getFolderStructure } from './getFolderStructure.js';
@@ -213,3 +215,102 @@ describe('getEnvironmentContext', () => {
expect(parts[1].text).toBe('\n--- Error reading full file context ---');
});
});
describe('getInitialChatHistory', () => {
let mockConfig: Partial<Config>;
beforeEach(() => {
vi.mocked(getFolderStructure).mockResolvedValue('Mock Folder Structure');
mockConfig = {
getSkipStartupContext: vi.fn().mockReturnValue(false),
getWorkspaceContext: vi.fn().mockReturnValue({
getDirectories: vi.fn().mockReturnValue(['/test/dir']),
}),
getFileService: vi.fn(),
getFullContext: vi.fn().mockReturnValue(false),
getToolRegistry: vi.fn().mockReturnValue({ getTool: vi.fn() }),
};
});
afterEach(() => {
vi.clearAllMocks();
vi.restoreAllMocks();
});
it('includes startup context when skipStartupContext is false', async () => {
const history = await getInitialChatHistory(mockConfig as Config);
expect(mockConfig.getSkipStartupContext).toHaveBeenCalled();
expect(history).toHaveLength(2);
expect(history).toEqual([
expect.objectContaining({
role: 'user',
parts: [
expect.objectContaining({
text: expect.stringContaining(
"I'm currently working in the directory",
),
}),
],
}),
{
role: 'model',
parts: [{ text: 'Got it. Thanks for the context!' }],
},
]);
});
it('returns only extra history when skipStartupContext is true', async () => {
mockConfig.getSkipStartupContext = vi.fn().mockReturnValue(true);
mockConfig.getWorkspaceContext = vi.fn(() => {
throw new Error(
'getWorkspaceContext should not be called when skipping startup context',
);
});
mockConfig.getFullContext = vi.fn(() => {
throw new Error(
'getFullContext should not be called when skipping startup context',
);
});
mockConfig.getToolRegistry = vi.fn(() => {
throw new Error(
'getToolRegistry should not be called when skipping startup context',
);
});
const extraHistory: Content[] = [
{ role: 'user', parts: [{ text: 'custom context' }] },
];
const history = await getInitialChatHistory(
mockConfig as Config,
extraHistory,
);
expect(mockConfig.getSkipStartupContext).toHaveBeenCalled();
expect(history).toEqual(extraHistory);
expect(history).not.toBe(extraHistory);
});
it('returns empty history when skipping startup context without extras', async () => {
mockConfig.getSkipStartupContext = vi.fn().mockReturnValue(true);
mockConfig.getWorkspaceContext = vi.fn(() => {
throw new Error(
'getWorkspaceContext should not be called when skipping startup context',
);
});
mockConfig.getFullContext = vi.fn(() => {
throw new Error(
'getFullContext should not be called when skipping startup context',
);
});
mockConfig.getToolRegistry = vi.fn(() => {
throw new Error(
'getToolRegistry should not be called when skipping startup context',
);
});
const history = await getInitialChatHistory(mockConfig as Config);
expect(history).toEqual([]);
});
});

View File

@@ -112,6 +112,10 @@ export async function getInitialChatHistory(
config: Config,
extraHistory?: Content[],
): Promise<Content[]> {
if (config.getSkipStartupContext()) {
return extraHistory ? [...extraHistory] : [];
}
const envParts = await getEnvironmentContext(config);
const envContextString = envParts.map((part) => part.text || '').join('\n\n');

View File

@@ -30,7 +30,7 @@ import {
readFileWithEncoding,
fileExists,
} from './fileUtils.js';
import { StandardFileSystemService } from '../services/fileSystemService.js';
import type { Config } from '../config/config.js';
vi.mock('mime/lite', () => ({
default: { getType: vi.fn() },
@@ -50,6 +50,12 @@ describe('fileUtils', () => {
let nonexistentFilePath: string;
let directoryPath: string;
const mockConfig = {
getTruncateToolOutputThreshold: () => 2500,
getTruncateToolOutputLines: () => 500,
getTargetDir: () => tempRootDir,
} as unknown as Config;
beforeEach(() => {
vi.resetAllMocks(); // Reset all mocks, including mime.getType
@@ -664,8 +670,7 @@ describe('fileUtils', () => {
actualNodeFs.writeFileSync(testTextFilePath, content);
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
);
expect(result.llmContent).toBe(content);
expect(result.returnDisplay).toBe('');
@@ -675,8 +680,7 @@ describe('fileUtils', () => {
it('should handle file not found', async () => {
const result = await processSingleFileContent(
nonexistentFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
);
expect(result.error).toContain('File not found');
expect(result.returnDisplay).toContain('File not found');
@@ -689,8 +693,7 @@ describe('fileUtils', () => {
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
);
expect(result.error).toContain('Simulated read error');
expect(result.returnDisplay).toContain('Simulated read error');
@@ -704,8 +707,7 @@ describe('fileUtils', () => {
const result = await processSingleFileContent(
testImageFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
);
expect(result.error).toContain('Simulated image read error');
expect(result.returnDisplay).toContain('Simulated image read error');
@@ -717,8 +719,7 @@ describe('fileUtils', () => {
mockMimeGetType.mockReturnValue('image/png');
const result = await processSingleFileContent(
testImageFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
);
expect(
(result.llmContent as { inlineData: unknown }).inlineData,
@@ -739,8 +740,7 @@ describe('fileUtils', () => {
mockMimeGetType.mockReturnValue('application/pdf');
const result = await processSingleFileContent(
testPdfFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
);
expect(
(result.llmContent as { inlineData: unknown }).inlineData,
@@ -768,8 +768,7 @@ describe('fileUtils', () => {
const result = await processSingleFileContent(
testSvgFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
);
expect(result.llmContent).toBe(svgContent);
@@ -786,8 +785,7 @@ describe('fileUtils', () => {
const result = await processSingleFileContent(
testBinaryFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
);
expect(result.llmContent).toContain(
'Cannot display content of binary file',
@@ -796,11 +794,7 @@ describe('fileUtils', () => {
});
it('should handle path being a directory', async () => {
const result = await processSingleFileContent(
directoryPath,
tempRootDir,
new StandardFileSystemService(),
);
const result = await processSingleFileContent(directoryPath, mockConfig);
expect(result.error).toContain('Path is a directory');
expect(result.returnDisplay).toContain('Path is a directory');
});
@@ -811,8 +805,7 @@ describe('fileUtils', () => {
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
5,
5,
); // Read lines 6-10
@@ -832,8 +825,7 @@ describe('fileUtils', () => {
// Read from line 11 to 20. The start is not 0, so it's truncated.
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
10,
10,
);
@@ -852,8 +844,7 @@ describe('fileUtils', () => {
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
0,
10,
);
@@ -875,17 +866,16 @@ describe('fileUtils', () => {
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
);
expect(result.llmContent).toContain('Short line');
expect(result.llmContent).toContain(
longLine.substring(0, 2000) + '... [truncated]',
);
expect(result.llmContent).toContain('Another short line');
expect(result.llmContent).not.toContain('Another short line');
expect(result.returnDisplay).toBe(
'Read all 3 lines from test.txt (some lines were shortened)',
'Read lines 1-2 of 3 from test.txt (truncated)',
);
expect(result.isTruncated).toBe(true);
});
@@ -897,8 +887,7 @@ describe('fileUtils', () => {
// Read 5 lines, but there are 11 total
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
0,
5,
);
@@ -916,15 +905,14 @@ describe('fileUtils', () => {
// Read all 11 lines, including the long one
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
0,
11,
);
expect(result.isTruncated).toBe(true);
expect(result.returnDisplay).toBe(
'Read all 11 lines from test.txt (some lines were shortened)',
'Read lines 1-11 of 11 from test.txt (truncated)',
);
});
@@ -942,14 +930,13 @@ describe('fileUtils', () => {
// Read 10 lines out of 20, including the long line
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
0,
10,
);
expect(result.isTruncated).toBe(true);
expect(result.returnDisplay).toBe(
'Read lines 1-10 of 20 from test.txt (some lines were shortened)',
'Read lines 1-5 of 20 from test.txt (truncated)',
);
});
@@ -966,8 +953,7 @@ describe('fileUtils', () => {
try {
const result = await processSingleFileContent(
testTextFilePath,
tempRootDir,
new StandardFileSystemService(),
mockConfig,
);
expect(result.error).toContain('File size exceeds the 20MB limit');

View File

@@ -9,13 +9,9 @@ import fsPromises from 'node:fs/promises';
import path from 'node:path';
import type { PartUnion } from '@google/genai';
import mime from 'mime/lite';
import type { FileSystemService } from '../services/fileSystemService.js';
import { ToolErrorType } from '../tools/tool-error.js';
import { BINARY_EXTENSIONS } from './ignorePatterns.js';
// Constants for text file processing
export const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
const MAX_LINE_LENGTH_TEXT_FILE = 2000;
import type { Config } from '../config/config.js';
// Default values for encoding and separator format
export const DEFAULT_ENCODING: BufferEncoding = 'utf-8';
@@ -306,18 +302,18 @@ export interface ProcessedFileReadResult {
/**
* Reads and processes a single file, handling text, images, and PDFs.
* @param filePath Absolute path to the file.
* @param rootDirectory Absolute path to the project root for relative path display.
* @param config Config instance for truncation settings.
* @param offset Optional offset for text files (0-based line number).
* @param limit Optional limit for text files (number of lines to read).
* @returns ProcessedFileReadResult object.
*/
export async function processSingleFileContent(
filePath: string,
rootDirectory: string,
fileSystemService: FileSystemService,
config: Config,
offset?: number,
limit?: number,
): Promise<ProcessedFileReadResult> {
const rootDirectory = config.getTargetDir();
try {
if (!fs.existsSync(filePath)) {
// Sync check is acceptable before async read
@@ -379,45 +375,76 @@ export async function processSingleFileContent(
case 'text': {
// Use BOM-aware reader to avoid leaving a BOM character in content and to support UTF-16/32 transparently
const content = await readFileWithEncoding(filePath);
const lines = content.split('\n');
const lines = content.split('\n').map((line) => line.trimEnd());
const originalLineCount = lines.length;
const startLine = offset || 0;
const effectiveLimit =
limit === undefined ? DEFAULT_MAX_LINES_TEXT_FILE : limit;
const configLineLimit = config.getTruncateToolOutputLines();
const configCharLimit = config.getTruncateToolOutputThreshold();
const effectiveLimit = limit === undefined ? configLineLimit : limit;
// Ensure endLine does not exceed originalLineCount
const endLine = Math.min(startLine + effectiveLimit, originalLineCount);
// Ensure selectedLines doesn't try to slice beyond array bounds if startLine is too high
const actualStartLine = Math.min(startLine, originalLineCount);
const selectedLines = lines.slice(actualStartLine, endLine);
let linesWereTruncatedInLength = false;
const formattedLines = selectedLines.map((line) => {
if (line.length > MAX_LINE_LENGTH_TEXT_FILE) {
linesWereTruncatedInLength = true;
return (
line.substring(0, MAX_LINE_LENGTH_TEXT_FILE) + '... [truncated]'
);
// Apply character limit truncation
let llmContent = '';
let contentLengthTruncated = false;
let linesIncluded = 0;
if (Number.isFinite(configCharLimit)) {
const formattedLines: string[] = [];
let currentLength = 0;
for (const line of selectedLines) {
const sep = linesIncluded > 0 ? 1 : 0; // newline separator
linesIncluded++;
const projectedLength = currentLength + line.length + sep;
if (projectedLength <= configCharLimit) {
formattedLines.push(line);
currentLength = projectedLength;
} else {
// Truncate the current line to fit
const remaining = Math.max(
configCharLimit - currentLength - sep,
10,
);
formattedLines.push(
line.substring(0, remaining) + '... [truncated]',
);
contentLengthTruncated = true;
break;
}
}
return line;
});
llmContent = formattedLines.join('\n');
} else {
// No character limit, use all selected lines
llmContent = selectedLines.join('\n');
linesIncluded = selectedLines.length;
}
// Calculate actual end line shown
const actualEndLine = contentLengthTruncated
? actualStartLine + linesIncluded
: endLine;
const contentRangeTruncated =
startLine > 0 || endLine < originalLineCount;
const isTruncated = contentRangeTruncated || linesWereTruncatedInLength;
const llmContent = formattedLines.join('\n');
startLine > 0 || actualEndLine < originalLineCount;
const isTruncated = contentRangeTruncated || contentLengthTruncated;
// By default, return nothing to streamline the common case of a successful read_file.
let returnDisplay = '';
if (contentRangeTruncated) {
if (isTruncated) {
returnDisplay = `Read lines ${
actualStartLine + 1
}-${endLine} of ${originalLineCount} from ${relativePathForDisplay}`;
if (linesWereTruncatedInLength) {
returnDisplay += ' (some lines were shortened)';
}-${actualEndLine} of ${originalLineCount} from ${relativePathForDisplay}`;
if (contentLengthTruncated) {
returnDisplay += ' (truncated)';
}
} else if (linesWereTruncatedInLength) {
returnDisplay = `Read all ${originalLineCount} lines from ${relativePathForDisplay} (some lines were shortened)`;
}
return {
@@ -425,7 +452,7 @@ export async function processSingleFileContent(
returnDisplay,
isTruncated,
originalLineCount,
linesShown: [actualStartLine + 1, endLine],
linesShown: [actualStartLine + 1, actualEndLine],
};
}
case 'image':

View File

@@ -29,6 +29,8 @@ const createMockConfig = (
getTargetDir: () => cwd,
getFileSystemService: () => fileSystemService,
getFileService: () => mockFileService,
getTruncateToolOutputThreshold: () => 2500,
getTruncateToolOutputLines: () => 500,
} as unknown as Config;
};

View File

@@ -83,11 +83,7 @@ export async function readPathFromWorkspace(
for (const filePath of finalFiles) {
const relativePathForDisplay = path.relative(absolutePath, filePath);
allParts.push({ text: `--- ${relativePathForDisplay} ---\n` });
const result = await processSingleFileContent(
filePath,
config.getTargetDir(),
config.getFileSystemService(),
);
const result = await processSingleFileContent(filePath, config);
allParts.push(result.llmContent);
allParts.push({ text: '\n' }); // Add a newline for separation
}
@@ -108,11 +104,7 @@ export async function readPathFromWorkspace(
}
// It's a single file, process it directly.
const result = await processSingleFileContent(
absolutePath,
config.getTargetDir(),
config.getFileSystemService(),
);
const result = await processSingleFileContent(absolutePath, config);
return [result.llmContent];
}
}