mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-21 01:07:46 +00:00
chore: sync gemini-cli v0.1.19
This commit is contained in:
@@ -190,80 +190,43 @@ describe('bfsFileSearch', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('should perform parallel directory scanning efficiently (performance test)', async () => {
|
||||
// Create a more complex directory structure for performance testing
|
||||
console.log('\n🚀 Testing Parallel BFS Performance...');
|
||||
it('should find all files in a complex directory structure', async () => {
|
||||
// Create a complex directory structure to test correctness at scale
|
||||
// without flaky performance checks.
|
||||
const numDirs = 50;
|
||||
const numFilesPerDir = 2;
|
||||
const numTargetDirs = 10;
|
||||
|
||||
// Create 50 directories with multiple levels for faster test execution
|
||||
for (let i = 0; i < 50; i++) {
|
||||
await createEmptyDir(`dir${i}`);
|
||||
await createEmptyDir(`dir${i}`, 'subdir1');
|
||||
await createEmptyDir(`dir${i}`, 'subdir2');
|
||||
await createEmptyDir(`dir${i}`, 'subdir1', 'deep');
|
||||
if (i < 10) {
|
||||
// Add target files in some directories
|
||||
await createTestFile('content', `dir${i}`, 'QWEN.md');
|
||||
await createTestFile('content', `dir${i}`, 'subdir1', 'QWEN.md');
|
||||
}
|
||||
const dirCreationPromises: Array<Promise<unknown>> = [];
|
||||
for (let i = 0; i < numDirs; i++) {
|
||||
dirCreationPromises.push(createEmptyDir(`dir${i}`));
|
||||
dirCreationPromises.push(createEmptyDir(`dir${i}`, 'subdir1'));
|
||||
dirCreationPromises.push(createEmptyDir(`dir${i}`, 'subdir2'));
|
||||
dirCreationPromises.push(createEmptyDir(`dir${i}`, 'subdir1', 'deep'));
|
||||
}
|
||||
await Promise.all(dirCreationPromises);
|
||||
|
||||
// Run multiple iterations to ensure consistency
|
||||
const iterations = 3;
|
||||
const durations: number[] = [];
|
||||
let foundFiles = 0;
|
||||
let firstResultSorted: string[] | undefined;
|
||||
|
||||
for (let i = 0; i < iterations; i++) {
|
||||
const searchStartTime = performance.now();
|
||||
const result = await bfsFileSearch(testRootDir, {
|
||||
fileName: 'QWEN.md',
|
||||
maxDirs: 200,
|
||||
debug: false,
|
||||
});
|
||||
const duration = performance.now() - searchStartTime;
|
||||
durations.push(duration);
|
||||
|
||||
// Verify consistency: all iterations should find the exact same files
|
||||
if (firstResultSorted === undefined) {
|
||||
foundFiles = result.length;
|
||||
firstResultSorted = result.sort();
|
||||
} else {
|
||||
expect(result.sort()).toEqual(firstResultSorted);
|
||||
}
|
||||
|
||||
console.log(`📊 Iteration ${i + 1}: ${duration.toFixed(2)}ms`);
|
||||
const fileCreationPromises: Array<Promise<string>> = [];
|
||||
for (let i = 0; i < numTargetDirs; i++) {
|
||||
// Add target files in some directories
|
||||
fileCreationPromises.push(
|
||||
createTestFile('content', `dir${i}`, 'GEMINI.md'),
|
||||
);
|
||||
fileCreationPromises.push(
|
||||
createTestFile('content', `dir${i}`, 'subdir1', 'GEMINI.md'),
|
||||
);
|
||||
}
|
||||
const expectedFiles = await Promise.all(fileCreationPromises);
|
||||
|
||||
const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
|
||||
const maxDuration = Math.max(...durations);
|
||||
const minDuration = Math.min(...durations);
|
||||
const result = await bfsFileSearch(testRootDir, {
|
||||
fileName: 'GEMINI.md',
|
||||
// Provide a generous maxDirs limit to ensure it doesn't prematurely stop
|
||||
// in this large test case. Total dirs created is 200.
|
||||
maxDirs: 250,
|
||||
});
|
||||
|
||||
console.log(`📊 Average Duration: ${avgDuration.toFixed(2)}ms`);
|
||||
console.log(
|
||||
`📊 Min/Max Duration: ${minDuration.toFixed(2)}ms / ${maxDuration.toFixed(2)}ms`,
|
||||
);
|
||||
console.log(`📁 Found ${foundFiles} QWEN.md files`);
|
||||
console.log(
|
||||
`🏎️ Processing ~${Math.round(200 / (avgDuration / 1000))} dirs/second`,
|
||||
);
|
||||
|
||||
// Verify we found the expected files
|
||||
expect(foundFiles).toBe(20); // 10 dirs * 2 files each
|
||||
|
||||
// Performance expectation: check consistency rather than absolute time
|
||||
const variance = maxDuration - minDuration;
|
||||
const consistencyRatio = variance / avgDuration;
|
||||
|
||||
// Ensure reasonable performance (generous limit for CI environments)
|
||||
expect(avgDuration).toBeLessThan(2000); // Very generous limit
|
||||
|
||||
// Ensure consistency across runs (variance should not be too high)
|
||||
// More tolerant in CI environments where performance can be variable
|
||||
const maxConsistencyRatio = process.env.CI ? 3.0 : 1.5;
|
||||
expect(consistencyRatio).toBeLessThan(maxConsistencyRatio); // Max variance should be reasonable
|
||||
|
||||
console.log(
|
||||
`✅ Performance test passed: avg=${avgDuration.toFixed(2)}ms, consistency=${(consistencyRatio * 100).toFixed(1)}% (threshold: ${(maxConsistencyRatio * 100).toFixed(0)}%)`,
|
||||
);
|
||||
// Verify we found the exact files we created
|
||||
expect(result.length).toBe(numTargetDirs * numFilesPerDir);
|
||||
expect(result.sort()).toEqual(expectedFiles.sort());
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,12 +4,7 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import {
|
||||
Content,
|
||||
GenerateContentConfig,
|
||||
SchemaUnion,
|
||||
Type,
|
||||
} from '@google/genai';
|
||||
import { Content, GenerateContentConfig } from '@google/genai';
|
||||
import { GeminiClient } from '../core/client.js';
|
||||
import { EditToolParams, EditTool } from '../tools/edit.js';
|
||||
import { WriteFileTool } from '../tools/write-file.js';
|
||||
@@ -364,11 +359,11 @@ export async function ensureCorrectFileContent(
|
||||
}
|
||||
|
||||
// Define the expected JSON schema for the LLM response for old_string correction
|
||||
const OLD_STRING_CORRECTION_SCHEMA: SchemaUnion = {
|
||||
type: Type.OBJECT,
|
||||
const OLD_STRING_CORRECTION_SCHEMA: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
corrected_target_snippet: {
|
||||
type: Type.STRING,
|
||||
type: 'string',
|
||||
description:
|
||||
'The corrected version of the target snippet that exactly and uniquely matches a segment within the provided file content.',
|
||||
},
|
||||
@@ -438,11 +433,11 @@ Return ONLY the corrected target snippet in the specified JSON format with the k
|
||||
}
|
||||
|
||||
// Define the expected JSON schema for the new_string correction LLM response
|
||||
const NEW_STRING_CORRECTION_SCHEMA: SchemaUnion = {
|
||||
type: Type.OBJECT,
|
||||
const NEW_STRING_CORRECTION_SCHEMA: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
corrected_new_string: {
|
||||
type: Type.STRING,
|
||||
type: 'string',
|
||||
description:
|
||||
'The original_new_string adjusted to be a suitable replacement for the corrected_old_string, while maintaining the original intent of the change.',
|
||||
},
|
||||
@@ -521,11 +516,11 @@ Return ONLY the corrected string in the specified JSON format with the key 'corr
|
||||
}
|
||||
}
|
||||
|
||||
const CORRECT_NEW_STRING_ESCAPING_SCHEMA: SchemaUnion = {
|
||||
type: Type.OBJECT,
|
||||
const CORRECT_NEW_STRING_ESCAPING_SCHEMA: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
corrected_new_string_escaping: {
|
||||
type: Type.STRING,
|
||||
type: 'string',
|
||||
description:
|
||||
'The new_string with corrected escaping, ensuring it is a proper replacement for the old_string, especially considering potential over-escaping issues from previous LLM generations.',
|
||||
},
|
||||
@@ -593,11 +588,11 @@ Return ONLY the corrected string in the specified JSON format with the key 'corr
|
||||
}
|
||||
}
|
||||
|
||||
const CORRECT_STRING_ESCAPING_SCHEMA: SchemaUnion = {
|
||||
type: Type.OBJECT,
|
||||
const CORRECT_STRING_ESCAPING_SCHEMA: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
corrected_string_escaping: {
|
||||
type: Type.STRING,
|
||||
type: 'string',
|
||||
description:
|
||||
'The string with corrected escaping, ensuring it is valid, specially considering potential over-escaping issues from previous LLM generations.',
|
||||
},
|
||||
|
||||
205
packages/core/src/utils/environmentContext.test.ts
Normal file
205
packages/core/src/utils/environmentContext.test.ts
Normal file
@@ -0,0 +1,205 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import {
|
||||
describe,
|
||||
it,
|
||||
expect,
|
||||
vi,
|
||||
beforeEach,
|
||||
afterEach,
|
||||
type Mock,
|
||||
} from 'vitest';
|
||||
import {
|
||||
getEnvironmentContext,
|
||||
getDirectoryContextString,
|
||||
} from './environmentContext.js';
|
||||
import { Config } from '../config/config.js';
|
||||
import { getFolderStructure } from './getFolderStructure.js';
|
||||
|
||||
vi.mock('../config/config.js');
|
||||
vi.mock('./getFolderStructure.js', () => ({
|
||||
getFolderStructure: vi.fn(),
|
||||
}));
|
||||
vi.mock('../tools/read-many-files.js');
|
||||
|
||||
describe('getDirectoryContextString', () => {
|
||||
let mockConfig: Partial<Config>;
|
||||
|
||||
beforeEach(() => {
|
||||
mockConfig = {
|
||||
getWorkspaceContext: vi.fn().mockReturnValue({
|
||||
getDirectories: vi.fn().mockReturnValue(['/test/dir']),
|
||||
}),
|
||||
getFileService: vi.fn(),
|
||||
};
|
||||
vi.mocked(getFolderStructure).mockResolvedValue('Mock Folder Structure');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.resetAllMocks();
|
||||
});
|
||||
|
||||
it('should return context string for a single directory', async () => {
|
||||
const contextString = await getDirectoryContextString(mockConfig as Config);
|
||||
expect(contextString).toContain(
|
||||
"I'm currently working in the directory: /test/dir",
|
||||
);
|
||||
expect(contextString).toContain(
|
||||
'Here is the folder structure of the current working directories:\n\nMock Folder Structure',
|
||||
);
|
||||
});
|
||||
|
||||
it('should return context string for multiple directories', async () => {
|
||||
(
|
||||
vi.mocked(mockConfig.getWorkspaceContext!)().getDirectories as Mock
|
||||
).mockReturnValue(['/test/dir1', '/test/dir2']);
|
||||
vi.mocked(getFolderStructure)
|
||||
.mockResolvedValueOnce('Structure 1')
|
||||
.mockResolvedValueOnce('Structure 2');
|
||||
|
||||
const contextString = await getDirectoryContextString(mockConfig as Config);
|
||||
expect(contextString).toContain(
|
||||
"I'm currently working in the following directories:\n - /test/dir1\n - /test/dir2",
|
||||
);
|
||||
expect(contextString).toContain(
|
||||
'Here is the folder structure of the current working directories:\n\nStructure 1\nStructure 2',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getEnvironmentContext', () => {
|
||||
let mockConfig: Partial<Config>;
|
||||
let mockToolRegistry: { getTool: Mock };
|
||||
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date('2025-08-05T12:00:00Z'));
|
||||
|
||||
mockToolRegistry = {
|
||||
getTool: vi.fn(),
|
||||
};
|
||||
|
||||
mockConfig = {
|
||||
getWorkspaceContext: vi.fn().mockReturnValue({
|
||||
getDirectories: vi.fn().mockReturnValue(['/test/dir']),
|
||||
}),
|
||||
getFileService: vi.fn(),
|
||||
getFullContext: vi.fn().mockReturnValue(false),
|
||||
getToolRegistry: vi.fn().mockResolvedValue(mockToolRegistry),
|
||||
};
|
||||
|
||||
vi.mocked(getFolderStructure).mockResolvedValue('Mock Folder Structure');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
vi.resetAllMocks();
|
||||
});
|
||||
|
||||
it('should return basic environment context for a single directory', async () => {
|
||||
const parts = await getEnvironmentContext(mockConfig as Config);
|
||||
|
||||
expect(parts.length).toBe(1);
|
||||
const context = parts[0].text;
|
||||
|
||||
expect(context).toContain("Today's date is Tuesday, August 5, 2025");
|
||||
expect(context).toContain(`My operating system is: ${process.platform}`);
|
||||
expect(context).toContain(
|
||||
"I'm currently working in the directory: /test/dir",
|
||||
);
|
||||
expect(context).toContain(
|
||||
'Here is the folder structure of the current working directories:\n\nMock Folder Structure',
|
||||
);
|
||||
expect(getFolderStructure).toHaveBeenCalledWith('/test/dir', {
|
||||
fileService: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
it('should return basic environment context for multiple directories', async () => {
|
||||
(
|
||||
vi.mocked(mockConfig.getWorkspaceContext!)().getDirectories as Mock
|
||||
).mockReturnValue(['/test/dir1', '/test/dir2']);
|
||||
vi.mocked(getFolderStructure)
|
||||
.mockResolvedValueOnce('Structure 1')
|
||||
.mockResolvedValueOnce('Structure 2');
|
||||
|
||||
const parts = await getEnvironmentContext(mockConfig as Config);
|
||||
|
||||
expect(parts.length).toBe(1);
|
||||
const context = parts[0].text;
|
||||
|
||||
expect(context).toContain(
|
||||
"I'm currently working in the following directories:\n - /test/dir1\n - /test/dir2",
|
||||
);
|
||||
expect(context).toContain(
|
||||
'Here is the folder structure of the current working directories:\n\nStructure 1\nStructure 2',
|
||||
);
|
||||
expect(getFolderStructure).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('should include full file context when getFullContext is true', async () => {
|
||||
mockConfig.getFullContext = vi.fn().mockReturnValue(true);
|
||||
const mockReadManyFilesTool = {
|
||||
build: vi.fn().mockReturnValue({
|
||||
execute: vi
|
||||
.fn()
|
||||
.mockResolvedValue({ llmContent: 'Full file content here' }),
|
||||
}),
|
||||
};
|
||||
mockToolRegistry.getTool.mockReturnValue(mockReadManyFilesTool);
|
||||
|
||||
const parts = await getEnvironmentContext(mockConfig as Config);
|
||||
|
||||
expect(parts.length).toBe(2);
|
||||
expect(parts[1].text).toBe(
|
||||
'\n--- Full File Context ---\nFull file content here',
|
||||
);
|
||||
expect(mockToolRegistry.getTool).toHaveBeenCalledWith('read_many_files');
|
||||
expect(mockReadManyFilesTool.build).toHaveBeenCalledWith({
|
||||
paths: ['**/*'],
|
||||
useDefaultExcludes: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle read_many_files returning no content', async () => {
|
||||
mockConfig.getFullContext = vi.fn().mockReturnValue(true);
|
||||
const mockReadManyFilesTool = {
|
||||
build: vi.fn().mockReturnValue({
|
||||
execute: vi.fn().mockResolvedValue({ llmContent: '' }),
|
||||
}),
|
||||
};
|
||||
mockToolRegistry.getTool.mockReturnValue(mockReadManyFilesTool);
|
||||
|
||||
const parts = await getEnvironmentContext(mockConfig as Config);
|
||||
|
||||
expect(parts.length).toBe(1); // No extra part added
|
||||
});
|
||||
|
||||
it('should handle read_many_files tool not being found', async () => {
|
||||
mockConfig.getFullContext = vi.fn().mockReturnValue(true);
|
||||
mockToolRegistry.getTool.mockReturnValue(null);
|
||||
|
||||
const parts = await getEnvironmentContext(mockConfig as Config);
|
||||
|
||||
expect(parts.length).toBe(1); // No extra part added
|
||||
});
|
||||
|
||||
it('should handle errors when reading full file context', async () => {
|
||||
mockConfig.getFullContext = vi.fn().mockReturnValue(true);
|
||||
const mockReadManyFilesTool = {
|
||||
build: vi.fn().mockReturnValue({
|
||||
execute: vi.fn().mockRejectedValue(new Error('Read error')),
|
||||
}),
|
||||
};
|
||||
mockToolRegistry.getTool.mockReturnValue(mockReadManyFilesTool);
|
||||
|
||||
const parts = await getEnvironmentContext(mockConfig as Config);
|
||||
|
||||
expect(parts.length).toBe(2);
|
||||
expect(parts[1].text).toBe('\n--- Error reading full file context ---');
|
||||
});
|
||||
});
|
||||
109
packages/core/src/utils/environmentContext.ts
Normal file
109
packages/core/src/utils/environmentContext.ts
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { Part } from '@google/genai';
|
||||
import { Config } from '../config/config.js';
|
||||
import { getFolderStructure } from './getFolderStructure.js';
|
||||
|
||||
/**
|
||||
* Generates a string describing the current workspace directories and their structures.
|
||||
* @param {Config} config - The runtime configuration and services.
|
||||
* @returns {Promise<string>} A promise that resolves to the directory context string.
|
||||
*/
|
||||
export async function getDirectoryContextString(
|
||||
config: Config,
|
||||
): Promise<string> {
|
||||
const workspaceContext = config.getWorkspaceContext();
|
||||
const workspaceDirectories = workspaceContext.getDirectories();
|
||||
|
||||
const folderStructures = await Promise.all(
|
||||
workspaceDirectories.map((dir) =>
|
||||
getFolderStructure(dir, {
|
||||
fileService: config.getFileService(),
|
||||
}),
|
||||
),
|
||||
);
|
||||
|
||||
const folderStructure = folderStructures.join('\n');
|
||||
|
||||
let workingDirPreamble: string;
|
||||
if (workspaceDirectories.length === 1) {
|
||||
workingDirPreamble = `I'm currently working in the directory: ${workspaceDirectories[0]}`;
|
||||
} else {
|
||||
const dirList = workspaceDirectories.map((dir) => ` - ${dir}`).join('\n');
|
||||
workingDirPreamble = `I'm currently working in the following directories:\n${dirList}`;
|
||||
}
|
||||
|
||||
return `${workingDirPreamble}
|
||||
Here is the folder structure of the current working directories:
|
||||
|
||||
${folderStructure}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves environment-related information to be included in the chat context.
|
||||
* This includes the current working directory, date, operating system, and folder structure.
|
||||
* Optionally, it can also include the full file context if enabled.
|
||||
* @param {Config} config - The runtime configuration and services.
|
||||
* @returns A promise that resolves to an array of `Part` objects containing environment information.
|
||||
*/
|
||||
export async function getEnvironmentContext(config: Config): Promise<Part[]> {
|
||||
const today = new Date().toLocaleDateString(undefined, {
|
||||
weekday: 'long',
|
||||
year: 'numeric',
|
||||
month: 'long',
|
||||
day: 'numeric',
|
||||
});
|
||||
const platform = process.platform;
|
||||
const directoryContext = await getDirectoryContextString(config);
|
||||
|
||||
const context = `
|
||||
This is the Qwen Code. We are setting up the context for our chat.
|
||||
Today's date is ${today}.
|
||||
My operating system is: ${platform}
|
||||
${directoryContext}
|
||||
`.trim();
|
||||
|
||||
const initialParts: Part[] = [{ text: context }];
|
||||
const toolRegistry = await config.getToolRegistry();
|
||||
|
||||
// Add full file context if the flag is set
|
||||
if (config.getFullContext()) {
|
||||
try {
|
||||
const readManyFilesTool = toolRegistry.getTool('read_many_files');
|
||||
if (readManyFilesTool) {
|
||||
const invocation = readManyFilesTool.build({
|
||||
paths: ['**/*'], // Read everything recursively
|
||||
useDefaultExcludes: true, // Use default excludes
|
||||
});
|
||||
|
||||
// Read all files in the target directory
|
||||
const result = await invocation.execute(AbortSignal.timeout(30000));
|
||||
if (result.llmContent) {
|
||||
initialParts.push({
|
||||
text: `\n--- Full File Context ---\n${result.llmContent}`,
|
||||
});
|
||||
} else {
|
||||
console.warn(
|
||||
'Full context requested, but read_many_files returned no content.',
|
||||
);
|
||||
}
|
||||
} else {
|
||||
console.warn(
|
||||
'Full context requested, but read_many_files tool not found.',
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
// Not using reportError here as it's a startup/config phase, not a chat/generation phase error.
|
||||
console.error('Error reading full file context:', error);
|
||||
initialParts.push({
|
||||
text: '\n--- Error reading full file context ---',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return initialParts;
|
||||
}
|
||||
@@ -196,9 +196,13 @@ describe('fileUtils', () => {
|
||||
vi.restoreAllMocks(); // Restore spies on actualNodeFs
|
||||
});
|
||||
|
||||
it('should detect typescript type by extension (ts)', async () => {
|
||||
it('should detect typescript type by extension (ts, mts, cts, tsx)', async () => {
|
||||
expect(await detectFileType('file.ts')).toBe('text');
|
||||
expect(await detectFileType('file.test.ts')).toBe('text');
|
||||
expect(await detectFileType('file.mts')).toBe('text');
|
||||
expect(await detectFileType('vite.config.mts')).toBe('text');
|
||||
expect(await detectFileType('file.cts')).toBe('text');
|
||||
expect(await detectFileType('component.tsx')).toBe('text');
|
||||
});
|
||||
|
||||
it('should detect image type by extension (png)', async () => {
|
||||
@@ -416,10 +420,7 @@ describe('fileUtils', () => {
|
||||
); // Read lines 6-10
|
||||
const expectedContent = lines.slice(5, 10).join('\n');
|
||||
|
||||
expect(result.llmContent).toContain(expectedContent);
|
||||
expect(result.llmContent).toContain(
|
||||
'[File content truncated: showing lines 6-10 of 20 total lines. Use offset/limit parameters to view more.]',
|
||||
);
|
||||
expect(result.llmContent).toBe(expectedContent);
|
||||
expect(result.returnDisplay).toBe('Read lines 6-10 of 20 from test.txt');
|
||||
expect(result.isTruncated).toBe(true);
|
||||
expect(result.originalLineCount).toBe(20);
|
||||
@@ -440,9 +441,6 @@ describe('fileUtils', () => {
|
||||
const expectedContent = lines.slice(10, 20).join('\n');
|
||||
|
||||
expect(result.llmContent).toContain(expectedContent);
|
||||
expect(result.llmContent).toContain(
|
||||
'[File content truncated: showing lines 11-20 of 20 total lines. Use offset/limit parameters to view more.]',
|
||||
);
|
||||
expect(result.returnDisplay).toBe('Read lines 11-20 of 20 from test.txt');
|
||||
expect(result.isTruncated).toBe(true); // This is the key check for the bug
|
||||
expect(result.originalLineCount).toBe(20);
|
||||
@@ -485,9 +483,6 @@ describe('fileUtils', () => {
|
||||
longLine.substring(0, 2000) + '... [truncated]',
|
||||
);
|
||||
expect(result.llmContent).toContain('Another short line');
|
||||
expect(result.llmContent).toContain(
|
||||
'[File content partially truncated: some lines exceeded maximum length of 2000 characters.]',
|
||||
);
|
||||
expect(result.returnDisplay).toBe(
|
||||
'Read all 3 lines from test.txt (some lines were shortened)',
|
||||
);
|
||||
|
||||
@@ -122,9 +122,10 @@ export async function detectFileType(
|
||||
): Promise<'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg'> {
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
|
||||
// The mimetype for "ts" is MPEG transport stream (a video format) but we want
|
||||
// to assume these are typescript files instead.
|
||||
if (ext === '.ts') {
|
||||
// The mimetype for various TypeScript extensions (ts, mts, cts, tsx) can be
|
||||
// MPEG transport stream (a video format), but we want to assume these are
|
||||
// TypeScript files instead.
|
||||
if (['.ts', '.mts', '.cts'].includes(ext)) {
|
||||
return 'text';
|
||||
}
|
||||
|
||||
@@ -194,10 +195,18 @@ export async function detectFileType(
|
||||
return 'text';
|
||||
}
|
||||
|
||||
export enum FileErrorType {
|
||||
FILE_NOT_FOUND = 'FILE_NOT_FOUND',
|
||||
IS_DIRECTORY = 'IS_DIRECTORY',
|
||||
FILE_TOO_LARGE = 'FILE_TOO_LARGE',
|
||||
READ_ERROR = 'READ_ERROR',
|
||||
}
|
||||
|
||||
export interface ProcessedFileReadResult {
|
||||
llmContent: PartUnion; // string for text, Part for image/pdf/unreadable binary
|
||||
returnDisplay: string;
|
||||
error?: string; // Optional error message for the LLM if file processing failed
|
||||
errorType?: FileErrorType; // Structured error type using enum
|
||||
isTruncated?: boolean; // For text files, indicates if content was truncated
|
||||
originalLineCount?: number; // For text files
|
||||
linesShown?: [number, number]; // For text files [startLine, endLine] (1-based for display)
|
||||
@@ -224,6 +233,7 @@ export async function processSingleFileContent(
|
||||
llmContent: '',
|
||||
returnDisplay: 'File not found.',
|
||||
error: `File not found: ${filePath}`,
|
||||
errorType: FileErrorType.FILE_NOT_FOUND,
|
||||
};
|
||||
}
|
||||
const stats = await fs.promises.stat(filePath);
|
||||
@@ -232,6 +242,7 @@ export async function processSingleFileContent(
|
||||
llmContent: '',
|
||||
returnDisplay: 'Path is a directory.',
|
||||
error: `Path is a directory, not a file: ${filePath}`,
|
||||
errorType: FileErrorType.IS_DIRECTORY,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -302,14 +313,7 @@ export async function processSingleFileContent(
|
||||
const contentRangeTruncated =
|
||||
startLine > 0 || endLine < originalLineCount;
|
||||
const isTruncated = contentRangeTruncated || linesWereTruncatedInLength;
|
||||
|
||||
let llmTextContent = '';
|
||||
if (contentRangeTruncated) {
|
||||
llmTextContent += `[File content truncated: showing lines ${actualStartLine + 1}-${endLine} of ${originalLineCount} total lines. Use offset/limit parameters to view more.]\n`;
|
||||
} else if (linesWereTruncatedInLength) {
|
||||
llmTextContent += `[File content partially truncated: some lines exceeded maximum length of ${MAX_LINE_LENGTH_TEXT_FILE} characters.]\n`;
|
||||
}
|
||||
llmTextContent += formattedLines.join('\n');
|
||||
const llmContent = formattedLines.join('\n');
|
||||
|
||||
// By default, return nothing to streamline the common case of a successful read_file.
|
||||
let returnDisplay = '';
|
||||
@@ -325,7 +329,7 @@ export async function processSingleFileContent(
|
||||
}
|
||||
|
||||
return {
|
||||
llmContent: llmTextContent,
|
||||
llmContent,
|
||||
returnDisplay,
|
||||
isTruncated,
|
||||
originalLineCount,
|
||||
|
||||
@@ -26,6 +26,17 @@ describe('CrawlCache', () => {
|
||||
const key2 = getCacheKey('/foo', 'baz');
|
||||
expect(key1).not.toBe(key2);
|
||||
});
|
||||
|
||||
it('should generate a different hash for different maxDepth values', () => {
|
||||
const key1 = getCacheKey('/foo', 'bar', 1);
|
||||
const key2 = getCacheKey('/foo', 'bar', 2);
|
||||
const key3 = getCacheKey('/foo', 'bar', undefined);
|
||||
const key4 = getCacheKey('/foo', 'bar');
|
||||
expect(key1).not.toBe(key2);
|
||||
expect(key1).not.toBe(key3);
|
||||
expect(key2).not.toBe(key3);
|
||||
expect(key3).toBe(key4);
|
||||
});
|
||||
});
|
||||
|
||||
describe('in-memory cache operations', () => {
|
||||
|
||||
@@ -17,10 +17,14 @@ const cacheTimers = new Map<string, NodeJS.Timeout>();
|
||||
export const getCacheKey = (
|
||||
directory: string,
|
||||
ignoreContent: string,
|
||||
maxDepth?: number,
|
||||
): string => {
|
||||
const hash = crypto.createHash('sha256');
|
||||
hash.update(directory);
|
||||
hash.update(ignoreContent);
|
||||
if (maxDepth !== undefined) {
|
||||
hash.update(String(maxDepth));
|
||||
}
|
||||
return hash.digest('hex');
|
||||
};
|
||||
|
||||
|
||||
@@ -290,6 +290,30 @@ describe('FileSearch', () => {
|
||||
expect(results).toEqual(['src/file1.js', 'src/file2.js']); // Assuming alphabetical sort
|
||||
});
|
||||
|
||||
it('should use fzf for fuzzy matching when pattern does not contain wildcards', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
src: {
|
||||
'main.js': '',
|
||||
'util.ts': '',
|
||||
'style.css': '',
|
||||
},
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('sst');
|
||||
|
||||
expect(results).toEqual(['src/style.css']);
|
||||
});
|
||||
|
||||
it('should return empty array when no matches are found', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
src: ['file1.js'],
|
||||
@@ -446,6 +470,46 @@ describe('FileSearch', () => {
|
||||
|
||||
expect(crawlSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should miss the cache when maxDepth changes', async () => {
|
||||
tmpDir = await createTmpDir({ 'file1.js': '' });
|
||||
const getOptions = (maxDepth?: number) => ({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: true,
|
||||
cacheTtl: 10000,
|
||||
maxDepth,
|
||||
});
|
||||
|
||||
// 1. First search with maxDepth: 1, should trigger a crawl.
|
||||
const fs1 = new FileSearch(getOptions(1));
|
||||
const crawlSpy1 = vi.spyOn(
|
||||
fs1 as FileSearchWithPrivateMethods,
|
||||
'performCrawl',
|
||||
);
|
||||
await fs1.initialize();
|
||||
expect(crawlSpy1).toHaveBeenCalledTimes(1);
|
||||
|
||||
// 2. Second search with maxDepth: 2, should be a cache miss and trigger a crawl.
|
||||
const fs2 = new FileSearch(getOptions(2));
|
||||
const crawlSpy2 = vi.spyOn(
|
||||
fs2 as FileSearchWithPrivateMethods,
|
||||
'performCrawl',
|
||||
);
|
||||
await fs2.initialize();
|
||||
expect(crawlSpy2).toHaveBeenCalledTimes(1);
|
||||
|
||||
// 3. Third search with maxDepth: 1 again, should be a cache hit.
|
||||
const fs3 = new FileSearch(getOptions(1));
|
||||
const crawlSpy3 = vi.spyOn(
|
||||
fs3 as FileSearchWithPrivateMethods,
|
||||
'performCrawl',
|
||||
);
|
||||
await fs3.initialize();
|
||||
expect(crawlSpy3).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle empty or commented-only ignore files', async () => {
|
||||
@@ -639,4 +703,109 @@ describe('FileSearch', () => {
|
||||
// 3. Assert that the maxResults limit was respected, even with a cache hit.
|
||||
expect(limitedResults).toEqual(['file1.js', 'file2.js']);
|
||||
});
|
||||
|
||||
describe('with maxDepth', () => {
|
||||
beforeEach(async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'file-root.txt': '',
|
||||
level1: {
|
||||
'file-level1.txt': '',
|
||||
level2: {
|
||||
'file-level2.txt': '',
|
||||
level3: {
|
||||
'file-level3.txt': '',
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should only search top-level files when maxDepth is 0', async () => {
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
maxDepth: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual(['level1/', 'file-root.txt']);
|
||||
});
|
||||
|
||||
it('should search one level deep when maxDepth is 1', async () => {
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
maxDepth: 1,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual([
|
||||
'level1/',
|
||||
'level1/level2/',
|
||||
'file-root.txt',
|
||||
'level1/file-level1.txt',
|
||||
]);
|
||||
});
|
||||
|
||||
it('should search two levels deep when maxDepth is 2', async () => {
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
maxDepth: 2,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual([
|
||||
'level1/',
|
||||
'level1/level2/',
|
||||
'level1/level2/level3/',
|
||||
'file-root.txt',
|
||||
'level1/file-level1.txt',
|
||||
'level1/level2/file-level2.txt',
|
||||
]);
|
||||
});
|
||||
|
||||
it('should perform a full recursive search when maxDepth is undefined', async () => {
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
maxDepth: undefined, // Explicitly undefined
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual([
|
||||
'level1/',
|
||||
'level1/level2/',
|
||||
'level1/level2/level3/',
|
||||
'file-root.txt',
|
||||
'level1/file-level1.txt',
|
||||
'level1/level2/file-level2.txt',
|
||||
'level1/level2/level3/file-level3.txt',
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -11,6 +11,7 @@ import picomatch from 'picomatch';
|
||||
import { Ignore } from './ignore.js';
|
||||
import { ResultCache } from './result-cache.js';
|
||||
import * as cache from './crawlCache.js';
|
||||
import { AsyncFzf, FzfResultItem } from 'fzf';
|
||||
|
||||
export type FileSearchOptions = {
|
||||
projectRoot: string;
|
||||
@@ -19,6 +20,7 @@ export type FileSearchOptions = {
|
||||
useGeminiignore: boolean;
|
||||
cache: boolean;
|
||||
cacheTtl: number;
|
||||
maxDepth?: number;
|
||||
};
|
||||
|
||||
export class AbortError extends Error {
|
||||
@@ -91,6 +93,7 @@ export class FileSearch {
|
||||
private readonly ignore: Ignore = new Ignore();
|
||||
private resultCache: ResultCache | undefined;
|
||||
private allFiles: string[] = [];
|
||||
private fzf: AsyncFzf<string[]> | undefined;
|
||||
|
||||
/**
|
||||
* Constructs a new `FileSearch` instance.
|
||||
@@ -122,22 +125,38 @@ export class FileSearch {
|
||||
pattern: string,
|
||||
options: SearchOptions = {},
|
||||
): Promise<string[]> {
|
||||
if (!this.resultCache) {
|
||||
if (!this.resultCache || !this.fzf) {
|
||||
throw new Error('Engine not initialized. Call initialize() first.');
|
||||
}
|
||||
|
||||
pattern = pattern || '*';
|
||||
|
||||
let filteredCandidates;
|
||||
const { files: candidates, isExactMatch } =
|
||||
await this.resultCache!.get(pattern);
|
||||
|
||||
let filteredCandidates;
|
||||
if (isExactMatch) {
|
||||
// Use the cached result.
|
||||
filteredCandidates = candidates;
|
||||
} else {
|
||||
// Apply the user's picomatch pattern filter
|
||||
filteredCandidates = await filter(candidates, pattern, options.signal);
|
||||
this.resultCache!.set(pattern, filteredCandidates);
|
||||
let shouldCache = true;
|
||||
if (pattern.includes('*')) {
|
||||
filteredCandidates = await filter(candidates, pattern, options.signal);
|
||||
} else {
|
||||
filteredCandidates = await this.fzf
|
||||
.find(pattern)
|
||||
.then((results: Array<FzfResultItem<string>>) =>
|
||||
results.map((entry: FzfResultItem<string>) => entry.item),
|
||||
)
|
||||
.catch(() => {
|
||||
shouldCache = false;
|
||||
return [];
|
||||
});
|
||||
}
|
||||
|
||||
if (shouldCache) {
|
||||
this.resultCache!.set(pattern, filteredCandidates);
|
||||
}
|
||||
}
|
||||
|
||||
// Trade-off: We apply a two-stage filtering process.
|
||||
@@ -215,6 +234,7 @@ export class FileSearch {
|
||||
const cacheKey = cache.getCacheKey(
|
||||
this.absoluteDir,
|
||||
this.ignore.getFingerprint(),
|
||||
this.options.maxDepth,
|
||||
);
|
||||
const cachedResults = cache.read(cacheKey);
|
||||
|
||||
@@ -230,6 +250,7 @@ export class FileSearch {
|
||||
const cacheKey = cache.getCacheKey(
|
||||
this.absoluteDir,
|
||||
this.ignore.getFingerprint(),
|
||||
this.options.maxDepth,
|
||||
);
|
||||
cache.write(cacheKey, this.allFiles, this.options.cacheTtl * 1000);
|
||||
}
|
||||
@@ -257,6 +278,10 @@ export class FileSearch {
|
||||
return dirFilter(`${relativePath}/`);
|
||||
});
|
||||
|
||||
if (this.options.maxDepth !== undefined) {
|
||||
api.withMaxDepth(this.options.maxDepth);
|
||||
}
|
||||
|
||||
return api.crawl(this.absoluteDir).withPromise();
|
||||
}
|
||||
|
||||
@@ -265,5 +290,11 @@ export class FileSearch {
|
||||
*/
|
||||
private buildResultCache(): void {
|
||||
this.resultCache = new ResultCache(this.allFiles, this.absoluteDir);
|
||||
// The v1 algorithm is much faster since it only looks at the first
|
||||
// occurence of the pattern. We use it for search spaces that have >20k
|
||||
// files, because the v2 algorithm is just too slow in those cases.
|
||||
this.fzf = new AsyncFzf(this.allFiles, {
|
||||
fuzzy: this.allFiles.length > 20000 ? 'v1' : 'v2',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -335,5 +335,8 @@ export async function loadServerHierarchicalMemory(
|
||||
logger.debug(
|
||||
`Combined instructions (snippet): ${combinedInstructions.substring(0, 500)}...`,
|
||||
);
|
||||
return { memoryContent: combinedInstructions, fileCount: filePaths.length };
|
||||
return {
|
||||
memoryContent: combinedInstructions,
|
||||
fileCount: contentsWithPaths.length,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
import { describe, it, expect, vi, beforeEach, Mock, afterEach } from 'vitest';
|
||||
import { Content, GoogleGenAI, Models } from '@google/genai';
|
||||
import { DEFAULT_GEMINI_FLASH_LITE_MODEL } from '../config/models.js';
|
||||
import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
|
||||
import { GeminiClient } from '../core/client.js';
|
||||
import { Config } from '../config/config.js';
|
||||
import { checkNextSpeaker, NextSpeakerResponse } from './nextSpeakerChecker.js';
|
||||
@@ -248,6 +248,6 @@ describe('checkNextSpeaker', () => {
|
||||
expect(mockGeminiClient.generateJson).toHaveBeenCalled();
|
||||
const generateJsonCall = (mockGeminiClient.generateJson as Mock).mock
|
||||
.calls[0];
|
||||
expect(generateJsonCall[3]).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
|
||||
expect(generateJsonCall[3]).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { Content, SchemaUnion, Type } from '@google/genai';
|
||||
import { DEFAULT_GEMINI_FLASH_LITE_MODEL } from '../config/models.js';
|
||||
import { Content } from '@google/genai';
|
||||
import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
|
||||
import { GeminiClient } from '../core/client.js';
|
||||
import { GeminiChat } from '../core/geminiChat.js';
|
||||
import { isFunctionResponse } from './messageInspectors.js';
|
||||
@@ -16,16 +16,16 @@ const CHECK_PROMPT = `Analyze *only* the content and structure of your immediate
|
||||
2. **Question to User:** If your last response ends with a direct question specifically addressed *to the user*, then the **'user'** should speak next.
|
||||
3. **Waiting for User:** If your last response completed a thought, statement, or task *and* does not meet the criteria for Rule 1 (Model Continues) or Rule 2 (Question to User), it implies a pause expecting user input or reaction. In this case, the **'user'** should speak next.`;
|
||||
|
||||
const RESPONSE_SCHEMA: SchemaUnion = {
|
||||
type: Type.OBJECT,
|
||||
const RESPONSE_SCHEMA: Record<string, unknown> = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
reasoning: {
|
||||
type: Type.STRING,
|
||||
type: 'string',
|
||||
description:
|
||||
"Brief explanation justifying the 'next_speaker' choice based *strictly* on the applicable rule and the content/structure of the preceding turn.",
|
||||
},
|
||||
next_speaker: {
|
||||
type: Type.STRING,
|
||||
type: 'string',
|
||||
enum: ['user', 'model'],
|
||||
description:
|
||||
'Who should speak next based *only* on the preceding turn and the decision rules',
|
||||
@@ -112,7 +112,7 @@ export async function checkNextSpeaker(
|
||||
contents,
|
||||
RESPONSE_SCHEMA,
|
||||
abortSignal,
|
||||
DEFAULT_GEMINI_FLASH_LITE_MODEL,
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
)) as unknown as NextSpeakerResponse;
|
||||
|
||||
if (
|
||||
|
||||
@@ -1,362 +0,0 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import path from 'node:path';
|
||||
import fs from 'node:fs/promises';
|
||||
import { openaiLogger } from './openaiLogger.js';
|
||||
|
||||
/**
|
||||
* OpenAI API usage analytics
|
||||
*
|
||||
* This utility analyzes OpenAI API logs to provide insights into API usage
|
||||
* patterns, costs, and performance.
|
||||
*/
|
||||
export class OpenAIAnalytics {
|
||||
/**
|
||||
* Calculate statistics for OpenAI API usage
|
||||
* @param days Number of days to analyze (default: 7)
|
||||
*/
|
||||
static async calculateStats(days: number = 7): Promise<{
|
||||
totalRequests: number;
|
||||
successRate: number;
|
||||
avgResponseTime: number;
|
||||
requestsByModel: Record<string, number>;
|
||||
tokenUsage: {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
};
|
||||
estimatedCost: number;
|
||||
errorRates: Record<string, number>;
|
||||
timeDistribution: Record<string, number>;
|
||||
}> {
|
||||
const logs = await openaiLogger.getLogFiles();
|
||||
const now = new Date();
|
||||
const cutoffDate = new Date(now.getTime() - days * 24 * 60 * 60 * 1000);
|
||||
|
||||
let totalRequests = 0;
|
||||
let successfulRequests = 0;
|
||||
const totalResponseTime = 0;
|
||||
const requestsByModel: Record<string, number> = {};
|
||||
const tokenUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
||||
const errorTypes: Record<string, number> = {};
|
||||
const hourDistribution: Record<string, number> = {};
|
||||
|
||||
// Initialize hour distribution (0-23)
|
||||
for (let i = 0; i < 24; i++) {
|
||||
const hour = i.toString().padStart(2, '0');
|
||||
hourDistribution[hour] = 0;
|
||||
}
|
||||
|
||||
// Model pricing estimates (per 1000 tokens)
|
||||
const pricing: Record<string, { input: number; output: number }> = {
|
||||
'gpt-4': { input: 0.03, output: 0.06 },
|
||||
'gpt-4-32k': { input: 0.06, output: 0.12 },
|
||||
'gpt-4-1106-preview': { input: 0.01, output: 0.03 },
|
||||
'gpt-4-0125-preview': { input: 0.01, output: 0.03 },
|
||||
'gpt-4-0613': { input: 0.03, output: 0.06 },
|
||||
'gpt-4-32k-0613': { input: 0.06, output: 0.12 },
|
||||
'gpt-3.5-turbo': { input: 0.0015, output: 0.002 },
|
||||
'gpt-3.5-turbo-16k': { input: 0.003, output: 0.004 },
|
||||
'gpt-3.5-turbo-0613': { input: 0.0015, output: 0.002 },
|
||||
'gpt-3.5-turbo-16k-0613': { input: 0.003, output: 0.004 },
|
||||
};
|
||||
|
||||
// Default pricing for unknown models
|
||||
const defaultPricing = { input: 0.01, output: 0.03 };
|
||||
|
||||
let estimatedCost = 0;
|
||||
|
||||
for (const logFile of logs) {
|
||||
try {
|
||||
const logData = await openaiLogger.readLogFile(logFile);
|
||||
|
||||
// Type guard to check if logData has the expected structure
|
||||
if (!isObjectWith<{ timestamp: string }>(logData, ['timestamp'])) {
|
||||
continue; // Skip malformed logs
|
||||
}
|
||||
|
||||
const logDate = new Date(logData.timestamp);
|
||||
|
||||
// Skip if log is older than the cutoff date
|
||||
if (logDate < cutoffDate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
totalRequests++;
|
||||
const hour = logDate.getUTCHours().toString().padStart(2, '0');
|
||||
hourDistribution[hour]++;
|
||||
|
||||
// Check if request was successful
|
||||
if (
|
||||
isObjectWith<{ response?: unknown; error?: unknown }>(logData, [
|
||||
'response',
|
||||
'error',
|
||||
]) &&
|
||||
logData.response &&
|
||||
!logData.error
|
||||
) {
|
||||
successfulRequests++;
|
||||
|
||||
// Extract model if available
|
||||
const model = getModelFromLog(logData);
|
||||
if (model) {
|
||||
requestsByModel[model] = (requestsByModel[model] || 0) + 1;
|
||||
}
|
||||
|
||||
// Extract token usage if available
|
||||
const usage = getTokenUsageFromLog(logData);
|
||||
if (usage) {
|
||||
tokenUsage.promptTokens += usage.prompt_tokens || 0;
|
||||
tokenUsage.completionTokens += usage.completion_tokens || 0;
|
||||
tokenUsage.totalTokens += usage.total_tokens || 0;
|
||||
|
||||
// Calculate cost if model is known
|
||||
const modelName = model || 'unknown';
|
||||
const modelPricing = pricing[modelName] || defaultPricing;
|
||||
|
||||
const inputCost =
|
||||
((usage.prompt_tokens || 0) / 1000) * modelPricing.input;
|
||||
const outputCost =
|
||||
((usage.completion_tokens || 0) / 1000) * modelPricing.output;
|
||||
estimatedCost += inputCost + outputCost;
|
||||
}
|
||||
} else if (
|
||||
isObjectWith<{ error?: unknown }>(logData, ['error']) &&
|
||||
logData.error
|
||||
) {
|
||||
// Categorize errors
|
||||
const errorType = getErrorTypeFromLog(logData);
|
||||
errorTypes[errorType] = (errorTypes[errorType] || 0) + 1;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error processing log file ${logFile}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate success rate and average response time
|
||||
const successRate =
|
||||
totalRequests > 0 ? (successfulRequests / totalRequests) * 100 : 0;
|
||||
const avgResponseTime =
|
||||
totalRequests > 0 ? totalResponseTime / totalRequests : 0;
|
||||
|
||||
// Calculate error rates as percentages
|
||||
const errorRates: Record<string, number> = {};
|
||||
for (const [errorType, count] of Object.entries(errorTypes)) {
|
||||
errorRates[errorType] =
|
||||
totalRequests > 0 ? (count / totalRequests) * 100 : 0;
|
||||
}
|
||||
|
||||
return {
|
||||
totalRequests,
|
||||
successRate,
|
||||
avgResponseTime,
|
||||
requestsByModel,
|
||||
tokenUsage,
|
||||
estimatedCost,
|
||||
errorRates,
|
||||
timeDistribution: hourDistribution,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a human-readable report of OpenAI API usage
|
||||
* @param days Number of days to include in the report
|
||||
*/
|
||||
static async generateReport(days: number = 7): Promise<string> {
|
||||
const stats = await this.calculateStats(days);
|
||||
|
||||
let report = `# OpenAI API Usage Report\n`;
|
||||
report += `## Last ${days} days (${new Date().toISOString().split('T')[0]})\n\n`;
|
||||
|
||||
report += `### Overview\n`;
|
||||
report += `- Total Requests: ${stats.totalRequests}\n`;
|
||||
report += `- Success Rate: ${stats.successRate.toFixed(2)}%\n`;
|
||||
report += `- Estimated Cost: $${stats.estimatedCost.toFixed(2)}\n\n`;
|
||||
|
||||
report += `### Token Usage\n`;
|
||||
report += `- Prompt Tokens: ${stats.tokenUsage.promptTokens.toLocaleString()}\n`;
|
||||
report += `- Completion Tokens: ${stats.tokenUsage.completionTokens.toLocaleString()}\n`;
|
||||
report += `- Total Tokens: ${stats.tokenUsage.totalTokens.toLocaleString()}\n\n`;
|
||||
|
||||
report += `### Models Used\n`;
|
||||
const sortedModels = Object.entries(stats.requestsByModel) as Array<
|
||||
[string, number]
|
||||
>;
|
||||
sortedModels.sort((a, b) => b[1] - a[1]);
|
||||
|
||||
for (const [model, count] of sortedModels) {
|
||||
const percentage = ((count / stats.totalRequests) * 100).toFixed(1);
|
||||
report += `- ${model}: ${count} requests (${percentage}%)\n`;
|
||||
}
|
||||
|
||||
if (Object.keys(stats.errorRates).length > 0) {
|
||||
report += `\n### Error Types\n`;
|
||||
const sortedErrors = Object.entries(stats.errorRates) as Array<
|
||||
[string, number]
|
||||
>;
|
||||
sortedErrors.sort((a, b) => b[1] - a[1]);
|
||||
|
||||
for (const [errorType, rate] of sortedErrors) {
|
||||
report += `- ${errorType}: ${rate.toFixed(1)}%\n`;
|
||||
}
|
||||
}
|
||||
|
||||
report += `\n### Usage by Hour (UTC)\n`;
|
||||
report += `\`\`\`\n`;
|
||||
const maxRequests = Math.max(...Object.values(stats.timeDistribution));
|
||||
const scale = 40; // max bar length
|
||||
|
||||
for (let i = 0; i < 24; i++) {
|
||||
const hour = i.toString().padStart(2, '0');
|
||||
const requests = stats.timeDistribution[hour] || 0;
|
||||
const barLength =
|
||||
maxRequests > 0 ? Math.round((requests / maxRequests) * scale) : 0;
|
||||
const bar = '█'.repeat(barLength);
|
||||
report += `${hour}:00 ${bar.padEnd(scale)} ${requests}\n`;
|
||||
}
|
||||
report += `\`\`\`\n`;
|
||||
|
||||
return report;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save an analytics report to a file
|
||||
* @param days Number of days to include
|
||||
* @param outputPath File path for the report (defaults to logs/openai/analytics.md)
|
||||
*/
|
||||
static async saveReport(
|
||||
days: number = 7,
|
||||
outputPath?: string,
|
||||
): Promise<string> {
|
||||
const report = await this.generateReport(days);
|
||||
const reportPath =
|
||||
outputPath || path.join(process.cwd(), 'logs', 'openai', 'analytics.md');
|
||||
|
||||
await fs.writeFile(reportPath, report, 'utf-8');
|
||||
return reportPath;
|
||||
}
|
||||
}
|
||||
|
||||
function isObjectWith<T extends object>(
|
||||
obj: unknown,
|
||||
keys: Array<keyof T>,
|
||||
): obj is T {
|
||||
return (
|
||||
typeof obj === 'object' && obj !== null && keys.every((key) => key in obj)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the model name from a log entry
|
||||
*/
|
||||
function getModelFromLog(logData: unknown): string | undefined {
|
||||
if (
|
||||
isObjectWith<{
|
||||
request?: { model?: string };
|
||||
response?: { model?: string; modelVersion?: string };
|
||||
}>(logData, ['request', 'response'])
|
||||
) {
|
||||
const data = logData as {
|
||||
request?: { model?: string };
|
||||
response?: { model?: string; modelVersion?: string };
|
||||
};
|
||||
if (data.request && data.request.model) return data.request.model;
|
||||
if (data.response && data.response.model) return data.response.model;
|
||||
if (data.response && data.response.modelVersion)
|
||||
return data.response.modelVersion;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract token usage information from a log entry
|
||||
*/
|
||||
function getTokenUsageFromLog(logData: unknown):
|
||||
| {
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
total_tokens?: number;
|
||||
}
|
||||
| undefined {
|
||||
if (
|
||||
isObjectWith<{
|
||||
response?: {
|
||||
usage?: object;
|
||||
usageMetadata?: {
|
||||
promptTokenCount?: number;
|
||||
candidatesTokenCount?: number;
|
||||
totalTokenCount?: number;
|
||||
};
|
||||
};
|
||||
}>(logData, ['response'])
|
||||
) {
|
||||
const data = logData as {
|
||||
response?: {
|
||||
usage?: object;
|
||||
usageMetadata?: {
|
||||
promptTokenCount?: number;
|
||||
candidatesTokenCount?: number;
|
||||
totalTokenCount?: number;
|
||||
};
|
||||
};
|
||||
};
|
||||
if (data.response && data.response.usage) return data.response.usage;
|
||||
if (data.response && data.response.usageMetadata) {
|
||||
const metadata = data.response.usageMetadata;
|
||||
return {
|
||||
prompt_tokens: metadata.promptTokenCount,
|
||||
completion_tokens: metadata.candidatesTokenCount,
|
||||
total_tokens: metadata.totalTokenCount,
|
||||
};
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract and categorize error types from a log entry
|
||||
*/
|
||||
function getErrorTypeFromLog(logData: unknown): string {
|
||||
if (isObjectWith<{ error?: { message?: string } }>(logData, ['error'])) {
|
||||
const data = logData as { error?: { message?: string } };
|
||||
if (data.error) {
|
||||
const errorMsg = data.error.message || '';
|
||||
if (errorMsg.includes('rate limit')) return 'rate_limit';
|
||||
if (errorMsg.includes('timeout')) return 'timeout';
|
||||
if (errorMsg.includes('authentication')) return 'authentication';
|
||||
if (errorMsg.includes('quota')) return 'quota_exceeded';
|
||||
if (errorMsg.includes('invalid')) return 'invalid_request';
|
||||
if (errorMsg.includes('not available')) return 'model_unavailable';
|
||||
if (errorMsg.includes('content filter')) return 'content_filtered';
|
||||
return 'other';
|
||||
}
|
||||
}
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
// CLI interface when script is run directly
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const days = args[0] ? parseInt(args[0], 10) : 7;
|
||||
|
||||
try {
|
||||
const reportPath = await OpenAIAnalytics.saveReport(days);
|
||||
console.log(`Analytics report saved to: ${reportPath}`);
|
||||
|
||||
// Also print to console
|
||||
const report = await OpenAIAnalytics.generateReport(days);
|
||||
console.log(report);
|
||||
} catch (error) {
|
||||
console.error('Error generating analytics report:', error);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
}
|
||||
|
||||
export default OpenAIAnalytics;
|
||||
@@ -1,199 +0,0 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import path from 'node:path';
|
||||
import { openaiLogger } from './openaiLogger.js';
|
||||
|
||||
/**
|
||||
* CLI utility for viewing and managing OpenAI logs
|
||||
*/
|
||||
export class OpenAILogViewer {
|
||||
/**
|
||||
* List all available OpenAI logs
|
||||
* @param limit Optional limit on the number of logs to display
|
||||
*/
|
||||
static async listLogs(limit?: number): Promise<void> {
|
||||
try {
|
||||
const logs = await openaiLogger.getLogFiles(limit);
|
||||
|
||||
if (logs.length === 0) {
|
||||
console.log('No OpenAI logs found');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Found ${logs.length} OpenAI logs:`);
|
||||
for (let i = 0; i < logs.length; i++) {
|
||||
const filePath = logs[i];
|
||||
const filename = path.basename(filePath);
|
||||
const logData = await openaiLogger.readLogFile(filePath);
|
||||
|
||||
// Type guard for logData
|
||||
if (typeof logData !== 'object' || logData === null) {
|
||||
console.log(`${i + 1}. ${filename} - Invalid log data`);
|
||||
continue;
|
||||
}
|
||||
const data = logData as Record<string, unknown>;
|
||||
|
||||
// Format the log entry summary
|
||||
const requestType = getRequestType(data.request);
|
||||
const status = data.error ? 'ERROR' : 'OK';
|
||||
|
||||
console.log(
|
||||
`${i + 1}. ${filename} - ${requestType} - ${status} - ${data.timestamp}`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error listing logs:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* View details of a specific log file
|
||||
* @param identifier Either a log index (1-based) or a filename
|
||||
*/
|
||||
static async viewLog(identifier: number | string): Promise<void> {
|
||||
try {
|
||||
let logFile: string | undefined;
|
||||
const logs = await openaiLogger.getLogFiles();
|
||||
|
||||
if (logs.length === 0) {
|
||||
console.log('No OpenAI logs found');
|
||||
return;
|
||||
}
|
||||
|
||||
if (typeof identifier === 'number') {
|
||||
// Adjust for 1-based indexing
|
||||
if (identifier < 1 || identifier > logs.length) {
|
||||
console.error(
|
||||
`Invalid log index. Please provide a number between 1 and ${logs.length}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
logFile = logs[identifier - 1];
|
||||
} else {
|
||||
// Find by filename
|
||||
logFile = logs.find((log) => path.basename(log) === identifier);
|
||||
if (!logFile) {
|
||||
console.error(`Log file '${identifier}' not found`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const logData = await openaiLogger.readLogFile(logFile);
|
||||
console.log(JSON.stringify(logData, null, 2));
|
||||
} catch (error) {
|
||||
console.error('Error viewing log:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up old logs, keeping only the most recent ones
|
||||
* @param keepCount Number of recent logs to keep
|
||||
*/
|
||||
static async cleanupLogs(keepCount: number = 50): Promise<void> {
|
||||
try {
|
||||
const allLogs = await openaiLogger.getLogFiles();
|
||||
|
||||
if (allLogs.length === 0) {
|
||||
console.log('No OpenAI logs found');
|
||||
return;
|
||||
}
|
||||
|
||||
if (allLogs.length <= keepCount) {
|
||||
console.log(`Only ${allLogs.length} logs exist, no cleanup needed`);
|
||||
return;
|
||||
}
|
||||
|
||||
const logsToDelete = allLogs.slice(keepCount);
|
||||
const fs = await import('node:fs/promises');
|
||||
|
||||
for (const log of logsToDelete) {
|
||||
await fs.unlink(log);
|
||||
}
|
||||
|
||||
console.log(
|
||||
`Deleted ${logsToDelete.length} old log files. Kept ${keepCount} most recent logs.`,
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('Error cleaning up logs:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to determine the type of request in a log
|
||||
*/
|
||||
function getRequestType(request: unknown): string {
|
||||
if (!request) return 'unknown';
|
||||
|
||||
if (typeof request !== 'object' || request === null) return 'unknown';
|
||||
const req = request as Record<string, unknown>;
|
||||
|
||||
if (req.contents) {
|
||||
return 'generate_content';
|
||||
} else if (typeof req.model === 'string' && req.model.includes('embedding')) {
|
||||
return 'embedding';
|
||||
} else if (req.input) {
|
||||
return 'embedding';
|
||||
} else if ('countTokens' in req || 'contents' in req) {
|
||||
return 'count_tokens';
|
||||
}
|
||||
|
||||
return 'api_call';
|
||||
}
|
||||
|
||||
// CLI interface when script is run directly
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const command = args[0]?.toLowerCase();
|
||||
|
||||
switch (command) {
|
||||
case 'list': {
|
||||
const limit = args[1] ? parseInt(args[1], 10) : undefined;
|
||||
await OpenAILogViewer.listLogs(limit);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'view': {
|
||||
const identifier = args[1];
|
||||
if (!identifier) {
|
||||
console.error('Please provide a log index or filename to view');
|
||||
process.exit(1);
|
||||
}
|
||||
await OpenAILogViewer.viewLog(
|
||||
isNaN(Number(identifier)) ? identifier : Number(identifier),
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'cleanup': {
|
||||
const keepCount = args[1] ? parseInt(args[1], 10) : 50;
|
||||
await OpenAILogViewer.cleanupLogs(keepCount);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
console.log('OpenAI Log Viewer');
|
||||
console.log('----------------');
|
||||
console.log('Commands:');
|
||||
console.log(
|
||||
' list [limit] - List all logs, optionally limiting to the specified number',
|
||||
);
|
||||
console.log(
|
||||
' view <index|file> - View a specific log by index number or filename',
|
||||
);
|
||||
console.log(
|
||||
' cleanup [keepCount] - Remove old logs, keeping only the specified number (default: 50)',
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
}
|
||||
|
||||
export default OpenAILogViewer;
|
||||
@@ -4,7 +4,6 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { Schema } from '@google/genai';
|
||||
import AjvPkg from 'ajv';
|
||||
// Ajv's ESM/CJS interop: use 'any' for compatibility as recommended by Ajv docs
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
@@ -19,50 +18,18 @@ export class SchemaValidator {
|
||||
* Returns null if the data confroms to the schema described by schema (or if schema
|
||||
* is null). Otherwise, returns a string describing the error.
|
||||
*/
|
||||
static validate(schema: Schema | undefined, data: unknown): string | null {
|
||||
static validate(schema: unknown | undefined, data: unknown): string | null {
|
||||
if (!schema) {
|
||||
return null;
|
||||
}
|
||||
if (typeof data !== 'object' || data === null) {
|
||||
return 'Value of params must be an object';
|
||||
}
|
||||
const validate = ajValidator.compile(this.toObjectSchema(schema));
|
||||
const validate = ajValidator.compile(schema);
|
||||
const valid = validate(data);
|
||||
if (!valid && validate.errors) {
|
||||
return ajValidator.errorsText(validate.errors, { dataVar: 'params' });
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts @google/genai's Schema to an object compatible with avj.
|
||||
* This is necessary because it represents Types as an Enum (with
|
||||
* UPPERCASE values) and minItems and minLength as strings, when they should be numbers.
|
||||
*/
|
||||
private static toObjectSchema(schema: Schema): object {
|
||||
const newSchema: Record<string, unknown> = { ...schema };
|
||||
if (newSchema.anyOf && Array.isArray(newSchema.anyOf)) {
|
||||
newSchema.anyOf = newSchema.anyOf.map((v) => this.toObjectSchema(v));
|
||||
}
|
||||
if (newSchema.items) {
|
||||
newSchema.items = this.toObjectSchema(newSchema.items);
|
||||
}
|
||||
if (newSchema.properties && typeof newSchema.properties === 'object') {
|
||||
const newProperties: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(newSchema.properties)) {
|
||||
newProperties[key] = this.toObjectSchema(value as Schema);
|
||||
}
|
||||
newSchema.properties = newProperties;
|
||||
}
|
||||
if (newSchema.type) {
|
||||
newSchema.type = String(newSchema.type).toLowerCase();
|
||||
}
|
||||
if (newSchema.minItems) {
|
||||
newSchema.minItems = Number(newSchema.minItems);
|
||||
}
|
||||
if (newSchema.minLength) {
|
||||
newSchema.minLength = Number(newSchema.minLength);
|
||||
}
|
||||
return newSchema;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user