Compare commits

..

2 Commits

39 changed files with 666 additions and 962 deletions

View File

@@ -202,7 +202,7 @@ This is the most critical stage where files are moved and transformed into their
- Copies README.md and LICENSE to dist/
- Copies locales folder for internationalization
- Creates a clean package.json for distribution with only necessary dependencies
- Includes runtime dependencies like tiktoken
- Keeps distribution dependencies minimal (no bundled runtime deps)
- Maintains optional dependencies for node-pty
2. The JavaScript Bundle is Created:

View File

@@ -33,7 +33,6 @@ const external = [
'@lydell/node-pty-linux-x64',
'@lydell/node-pty-win32-arm64',
'@lydell/node-pty-win32-x64',
'tiktoken',
];
esbuild

20
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "@qwen-code/qwen-code",
"version": "0.7.1",
"version": "0.7.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@qwen-code/qwen-code",
"version": "0.7.1",
"version": "0.7.0",
"workspaces": [
"packages/*"
],
@@ -15682,12 +15682,6 @@
"tslib": "^2"
}
},
"node_modules/tiktoken": {
"version": "1.0.22",
"resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.22.tgz",
"integrity": "sha512-PKvy1rVF1RibfF3JlXBSP0Jrcw2uq3yXdgcEXtKTYn3QJ/cBRBHDnrJ5jHky+MENZ6DIPwNUGWpkVx+7joCpNA==",
"license": "MIT"
},
"node_modules/tinybench": {
"version": "2.9.0",
"resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz",
@@ -17310,7 +17304,7 @@
},
"packages/cli": {
"name": "@qwen-code/qwen-code",
"version": "0.7.1",
"version": "0.7.0",
"dependencies": {
"@google/genai": "1.30.0",
"@iarna/toml": "^2.2.5",
@@ -17947,7 +17941,7 @@
},
"packages/core": {
"name": "@qwen-code/qwen-code-core",
"version": "0.7.1",
"version": "0.7.0",
"hasInstallScript": true,
"dependencies": {
"@anthropic-ai/sdk": "^0.36.1",
@@ -17990,7 +17984,6 @@
"shell-quote": "^1.8.3",
"simple-git": "^3.28.0",
"strip-ansi": "^7.1.0",
"tiktoken": "^1.0.21",
"undici": "^6.22.0",
"uuid": "^9.0.1",
"ws": "^8.18.0"
@@ -18592,7 +18585,6 @@
"license": "Apache-2.0",
"dependencies": {
"@modelcontextprotocol/sdk": "^1.25.1",
"tiktoken": "^1.0.21",
"zod": "^3.25.0"
},
"devDependencies": {
@@ -21408,7 +21400,7 @@
},
"packages/test-utils": {
"name": "@qwen-code/qwen-code-test-utils",
"version": "0.7.1",
"version": "0.7.0",
"dev": true,
"license": "Apache-2.0",
"devDependencies": {
@@ -21420,7 +21412,7 @@
},
"packages/vscode-ide-companion": {
"name": "qwen-code-vscode-ide-companion",
"version": "0.7.1",
"version": "0.7.0",
"license": "LICENSE",
"dependencies": {
"@modelcontextprotocol/sdk": "^1.25.1",

View File

@@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code",
"version": "0.7.1",
"version": "0.7.0",
"engines": {
"node": ">=20.0.0"
},
@@ -13,7 +13,7 @@
"url": "git+https://github.com/QwenLM/qwen-code.git"
},
"config": {
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.7.1"
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.7.0"
},
"scripts": {
"start": "cross-env node scripts/start.js",

View File

@@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code",
"version": "0.7.1",
"version": "0.7.0",
"description": "Qwen Code",
"repository": {
"type": "git",
@@ -33,19 +33,20 @@
"dist"
],
"config": {
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.7.1"
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.7.0"
},
"dependencies": {
"@google/genai": "1.30.0",
"@iarna/toml": "^2.2.5",
"@qwen-code/qwen-code-core": "file:../core",
"@modelcontextprotocol/sdk": "^1.25.1",
"@qwen-code/qwen-code-core": "file:../core",
"@types/update-notifier": "^6.0.8",
"ansi-regex": "^6.2.2",
"command-exists": "^1.2.9",
"comment-json": "^4.2.5",
"diff": "^7.0.0",
"dotenv": "^17.1.0",
"extract-zip": "^2.0.1",
"fzf": "^0.5.2",
"glob": "^10.5.0",
"highlight.js": "^11.11.1",
@@ -65,7 +66,6 @@
"strip-json-comments": "^3.1.1",
"tar": "^7.5.2",
"undici": "^6.22.0",
"extract-zip": "^2.0.1",
"update-notifier": "^7.3.1",
"wrap-ansi": "9.0.2",
"yargs": "^17.7.2",
@@ -74,6 +74,7 @@
"devDependencies": {
"@babel/runtime": "^7.27.6",
"@google/gemini-cli-test-utils": "file:../test-utils",
"@qwen-code/qwen-code-test-utils": "file:../test-utils",
"@testing-library/react": "^16.3.0",
"@types/archiver": "^6.0.3",
"@types/command-exists": "^1.2.3",
@@ -92,8 +93,7 @@
"pretty-format": "^30.0.2",
"react-dom": "^19.1.0",
"typescript": "^5.3.3",
"vitest": "^3.1.1",
"@qwen-code/qwen-code-test-utils": "file:../test-utils"
"vitest": "^3.1.1"
},
"engines": {
"node": ">=20"

View File

@@ -874,10 +874,11 @@ export async function loadCliConfig(
}
};
// ACP mode check: must include both --acp (current) and --experimental-acp (deprecated).
// Without this check, edit, write_file, run_shell_command would be excluded in ACP mode.
const isAcpMode = argv.acp || argv.experimentalAcp;
if (!interactive && !isAcpMode && inputFormat !== InputFormat.STREAM_JSON) {
if (
!interactive &&
!argv.experimentalAcp &&
inputFormat !== InputFormat.STREAM_JSON
) {
switch (approvalMode) {
case ApprovalMode.PLAN:
case ApprovalMode.DEFAULT:

View File

@@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code-core",
"version": "0.7.1",
"version": "0.7.0",
"description": "Qwen Code Core",
"repository": {
"type": "git",
@@ -63,7 +63,6 @@
"shell-quote": "^1.8.3",
"simple-git": "^3.28.0",
"strip-ansi": "^7.1.0",
"tiktoken": "^1.0.21",
"undici": "^6.22.0",
"uuid": "^9.0.1",
"ws": "^8.18.0"

View File

@@ -404,7 +404,7 @@ export class Config {
private toolRegistry!: ToolRegistry;
private promptRegistry!: PromptRegistry;
private subagentManager!: SubagentManager;
private skillManager: SkillManager | null = null;
private skillManager!: SkillManager;
private fileSystemService: FileSystemService;
private contentGeneratorConfig!: ContentGeneratorConfig;
private contentGeneratorConfigSources: ContentGeneratorConfigSources = {};
@@ -672,10 +672,8 @@ export class Config {
}
this.promptRegistry = new PromptRegistry();
this.subagentManager = new SubagentManager(this);
if (this.getExperimentalSkills()) {
this.skillManager = new SkillManager(this);
await this.skillManager.startWatching();
}
this.skillManager = new SkillManager(this);
await this.skillManager.startWatching();
// Load session subagents if they were provided before initialization
if (this.sessionSubagents.length > 0) {
@@ -1441,7 +1439,7 @@ export class Config {
return this.subagentManager;
}
getSkillManager(): SkillManager | null {
getSkillManager(): SkillManager {
return this.skillManager;
}

View File

@@ -19,9 +19,7 @@ const mockTokenizer = {
};
vi.mock('../../utils/request-tokenizer/index.js', () => ({
getDefaultTokenizer: vi.fn(() => mockTokenizer),
DefaultRequestTokenizer: vi.fn(() => mockTokenizer),
disposeDefaultTokenizer: vi.fn(),
RequestTokenEstimator: vi.fn(() => mockTokenizer),
}));
type AnthropicCreateArgs = [unknown, { signal?: AbortSignal }?];
@@ -352,9 +350,7 @@ describe('AnthropicContentGenerator', () => {
};
const result = await generator.countTokens(request);
expect(mockTokenizer.calculateTokens).toHaveBeenCalledWith(request, {
textEncoding: 'cl100k_base',
});
expect(mockTokenizer.calculateTokens).toHaveBeenCalledWith(request);
expect(result.totalTokens).toBe(50);
});

View File

@@ -25,10 +25,9 @@ type MessageCreateParamsNonStreaming =
Anthropic.MessageCreateParamsNonStreaming;
type MessageCreateParamsStreaming = Anthropic.MessageCreateParamsStreaming;
type RawMessageStreamEvent = Anthropic.RawMessageStreamEvent;
import { getDefaultTokenizer } from '../../utils/request-tokenizer/index.js';
import { RequestTokenEstimator } from '../../utils/request-tokenizer/index.js';
import { safeJsonParse } from '../../utils/safeJsonParse.js';
import { AnthropicContentConverter } from './converter.js';
import { buildRuntimeFetchOptions } from '../../utils/runtimeFetchOptions.js';
type StreamingBlockState = {
type: string;
@@ -55,9 +54,6 @@ export class AnthropicContentGenerator implements ContentGenerator {
) {
const defaultHeaders = this.buildHeaders();
const baseURL = contentGeneratorConfig.baseUrl;
// Configure runtime options to ensure user-configured timeout works as expected
// bodyTimeout is always disabled (0) to let Anthropic SDK timeout control the request
const runtimeOptions = buildRuntimeFetchOptions('anthropic');
this.client = new Anthropic({
apiKey: contentGeneratorConfig.apiKey,
@@ -65,7 +61,6 @@ export class AnthropicContentGenerator implements ContentGenerator {
timeout: contentGeneratorConfig.timeout,
maxRetries: contentGeneratorConfig.maxRetries,
defaultHeaders,
...runtimeOptions,
});
this.converter = new AnthropicContentConverter(
@@ -110,10 +105,8 @@ export class AnthropicContentGenerator implements ContentGenerator {
request: CountTokensParameters,
): Promise<CountTokensResponse> {
try {
const tokenizer = getDefaultTokenizer();
const result = await tokenizer.calculateTokens(request, {
textEncoding: 'cl100k_base',
});
const estimator = new RequestTokenEstimator();
const result = await estimator.calculateTokens(request);
return {
totalTokens: result.totalTokens,

View File

@@ -153,6 +153,26 @@ vi.mock('../telemetry/loggers.js', () => ({
logNextSpeakerCheck: vi.fn(),
}));
// Mock RequestTokenizer to use simple character-based estimation
vi.mock('../utils/request-tokenizer/requestTokenizer.js', () => ({
RequestTokenizer: class {
async calculateTokens(request: { contents: unknown }) {
// Simple estimation: count characters in JSON and divide by 4
const totalChars = JSON.stringify(request.contents).length;
return {
totalTokens: Math.floor(totalChars / 4),
breakdown: {
textTokens: Math.floor(totalChars / 4),
imageTokens: 0,
audioTokens: 0,
otherTokens: 0,
},
processingTime: 0,
};
}
},
}));
/**
* Array.fromAsync ponyfill, which will be available in es 2024.
*
@@ -417,6 +437,12 @@ describe('Gemini Client (client.ts)', () => {
] as Content[],
originalTokenCount = 1000,
summaryText = 'This is a summary.',
// Token counts returned in usageMetadata to simulate what the API would return
// Default values ensure successful compression:
// newTokenCount = originalTokenCount - (compressionInputTokenCount - 1000) + compressionOutputTokenCount
// = 1000 - (1600 - 1000) + 50 = 1000 - 600 + 50 = 450 (< 1000, success)
compressionInputTokenCount = 1600,
compressionOutputTokenCount = 50,
} = {}) {
const mockOriginalChat: Partial<GeminiChat> = {
getHistory: vi.fn((_curated?: boolean) => chatHistory),
@@ -438,6 +464,12 @@ describe('Gemini Client (client.ts)', () => {
},
},
],
usageMetadata: {
promptTokenCount: compressionInputTokenCount,
candidatesTokenCount: compressionOutputTokenCount,
totalTokenCount:
compressionInputTokenCount + compressionOutputTokenCount,
},
} as unknown as GenerateContentResponse);
// Calculate what the new history will be
@@ -477,11 +509,13 @@ describe('Gemini Client (client.ts)', () => {
.fn()
.mockResolvedValue(mockNewChat as GeminiChat);
const totalChars = newCompressedHistory.reduce(
(total, content) => total + JSON.stringify(content).length,
// New token count formula: originalTokenCount - (compressionInputTokenCount - 1000) + compressionOutputTokenCount
const estimatedNewTokenCount = Math.max(
0,
originalTokenCount -
(compressionInputTokenCount - 1000) +
compressionOutputTokenCount,
);
const estimatedNewTokenCount = Math.floor(totalChars / 4);
return {
client,
@@ -493,49 +527,58 @@ describe('Gemini Client (client.ts)', () => {
describe('when compression inflates the token count', () => {
it('allows compression to be forced/manual after a failure', async () => {
// Call 1 (Fails): Setup with a long summary to inflate tokens
// Call 1 (Fails): Setup with token counts that will inflate
// newTokenCount = originalTokenCount - (compressionInputTokenCount - 1000) + compressionOutputTokenCount
// = 100 - (1010 - 1000) + 200 = 100 - 10 + 200 = 290 > 100 (inflation)
const longSummary = 'long summary '.repeat(100);
const { client, estimatedNewTokenCount: inflatedTokenCount } = setup({
originalTokenCount: 100,
summaryText: longSummary,
compressionInputTokenCount: 1010,
compressionOutputTokenCount: 200,
});
expect(inflatedTokenCount).toBeGreaterThan(100); // Ensure setup is correct
await client.tryCompressChat('prompt-id-4', false); // Fails
// Call 2 (Forced): Re-setup with a short summary
// Call 2 (Forced): Re-setup with token counts that will compress
// newTokenCount = 100 - (1100 - 1000) + 50 = 100 - 100 + 50 = 50 <= 100 (compression)
const shortSummary = 'short';
const { estimatedNewTokenCount: compressedTokenCount } = setup({
originalTokenCount: 100,
summaryText: shortSummary,
compressionInputTokenCount: 1100,
compressionOutputTokenCount: 50,
});
expect(compressedTokenCount).toBeLessThanOrEqual(100); // Ensure setup is correct
const result = await client.tryCompressChat('prompt-id-4', true); // Forced
expect(result).toEqual({
compressionStatus: CompressionStatus.COMPRESSED,
newTokenCount: compressedTokenCount,
originalTokenCount: 100,
});
expect(result.compressionStatus).toBe(CompressionStatus.COMPRESSED);
expect(result.originalTokenCount).toBe(100);
// newTokenCount might be clamped to originalTokenCount due to tolerance logic
expect(result.newTokenCount).toBeLessThanOrEqual(100);
});
it('yields the result even if the compression inflated the tokens', async () => {
// newTokenCount = 100 - (1010 - 1000) + 200 = 100 - 10 + 200 = 290 > 100 (inflation)
const longSummary = 'long summary '.repeat(100);
const { client, estimatedNewTokenCount } = setup({
originalTokenCount: 100,
summaryText: longSummary,
compressionInputTokenCount: 1010,
compressionOutputTokenCount: 200,
});
expect(estimatedNewTokenCount).toBeGreaterThan(100); // Ensure setup is correct
const result = await client.tryCompressChat('prompt-id-4', false);
expect(result).toEqual({
compressionStatus:
CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
newTokenCount: estimatedNewTokenCount,
originalTokenCount: 100,
});
expect(result.compressionStatus).toBe(
CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT,
);
expect(result.originalTokenCount).toBe(100);
// The newTokenCount should be higher than original since compression failed due to inflation
expect(result.newTokenCount).toBeGreaterThan(100);
// IMPORTANT: The change in client.ts means setLastPromptTokenCount is NOT called on failure
expect(
uiTelemetryService.setLastPromptTokenCount,
@@ -543,10 +586,13 @@ describe('Gemini Client (client.ts)', () => {
});
it('does not manipulate the source chat', async () => {
// newTokenCount = 100 - (1010 - 1000) + 200 = 100 - 10 + 200 = 290 > 100 (inflation)
const longSummary = 'long summary '.repeat(100);
const { client, mockOriginalChat, estimatedNewTokenCount } = setup({
originalTokenCount: 100,
summaryText: longSummary,
compressionInputTokenCount: 1010,
compressionOutputTokenCount: 200,
});
expect(estimatedNewTokenCount).toBeGreaterThan(100); // Ensure setup is correct
@@ -557,10 +603,13 @@ describe('Gemini Client (client.ts)', () => {
});
it('will not attempt to compress context after a failure', async () => {
// newTokenCount = 100 - (1010 - 1000) + 200 = 100 - 10 + 200 = 290 > 100 (inflation)
const longSummary = 'long summary '.repeat(100);
const { client, estimatedNewTokenCount } = setup({
originalTokenCount: 100,
summaryText: longSummary,
compressionInputTokenCount: 1010,
compressionOutputTokenCount: 200,
});
expect(estimatedNewTokenCount).toBeGreaterThan(100); // Ensure setup is correct
@@ -631,6 +680,7 @@ describe('Gemini Client (client.ts)', () => {
);
// Mock the summary response from the chat
// newTokenCount = 501 - (1400 - 1000) + 50 = 501 - 400 + 50 = 151 <= 501 (success)
const summaryText = 'This is a summary.';
mockGenerateContentFn.mockResolvedValue({
candidates: [
@@ -641,6 +691,11 @@ describe('Gemini Client (client.ts)', () => {
},
},
],
usageMetadata: {
promptTokenCount: 1400,
candidatesTokenCount: 50,
totalTokenCount: 1450,
},
} as unknown as GenerateContentResponse);
// Mock startChat to complete the compression flow
@@ -719,13 +774,8 @@ describe('Gemini Client (client.ts)', () => {
.fn()
.mockResolvedValue(mockNewChat as GeminiChat);
const totalChars = newCompressedHistory.reduce(
(total, content) => total + JSON.stringify(content).length,
0,
);
const newTokenCount = Math.floor(totalChars / 4);
// Mock the summary response from the chat
// newTokenCount = 501 - (1400 - 1000) + 50 = 501 - 400 + 50 = 151 <= 501 (success)
mockGenerateContentFn.mockResolvedValue({
candidates: [
{
@@ -735,6 +785,11 @@ describe('Gemini Client (client.ts)', () => {
},
},
],
usageMetadata: {
promptTokenCount: 1400,
candidatesTokenCount: 50,
totalTokenCount: 1450,
},
} as unknown as GenerateContentResponse);
const initialChat = client.getChat();
@@ -744,12 +799,11 @@ describe('Gemini Client (client.ts)', () => {
expect(tokenLimit).toHaveBeenCalled();
expect(mockGenerateContentFn).toHaveBeenCalled();
// Assert that summarization happened and returned the correct stats
expect(result).toEqual({
compressionStatus: CompressionStatus.COMPRESSED,
originalTokenCount,
newTokenCount,
});
// Assert that summarization happened
expect(result.compressionStatus).toBe(CompressionStatus.COMPRESSED);
expect(result.originalTokenCount).toBe(originalTokenCount);
// newTokenCount might be clamped to originalTokenCount due to tolerance logic
expect(result.newTokenCount).toBeLessThanOrEqual(originalTokenCount);
// Assert that the chat was reset
expect(newChat).not.toBe(initialChat);
@@ -809,13 +863,8 @@ describe('Gemini Client (client.ts)', () => {
.fn()
.mockResolvedValue(mockNewChat as GeminiChat);
const totalChars = newCompressedHistory.reduce(
(total, content) => total + JSON.stringify(content).length,
0,
);
const newTokenCount = Math.floor(totalChars / 4);
// Mock the summary response from the chat
// newTokenCount = 700 - (1500 - 1000) + 50 = 700 - 500 + 50 = 250 <= 700 (success)
mockGenerateContentFn.mockResolvedValue({
candidates: [
{
@@ -825,6 +874,11 @@ describe('Gemini Client (client.ts)', () => {
},
},
],
usageMetadata: {
promptTokenCount: 1500,
candidatesTokenCount: 50,
totalTokenCount: 1550,
},
} as unknown as GenerateContentResponse);
const initialChat = client.getChat();
@@ -834,12 +888,11 @@ describe('Gemini Client (client.ts)', () => {
expect(tokenLimit).toHaveBeenCalled();
expect(mockGenerateContentFn).toHaveBeenCalled();
// Assert that summarization happened and returned the correct stats
expect(result).toEqual({
compressionStatus: CompressionStatus.COMPRESSED,
originalTokenCount,
newTokenCount,
});
// Assert that summarization happened
expect(result.compressionStatus).toBe(CompressionStatus.COMPRESSED);
expect(result.originalTokenCount).toBe(originalTokenCount);
// newTokenCount might be clamped to originalTokenCount due to tolerance logic
expect(result.newTokenCount).toBeLessThanOrEqual(originalTokenCount);
// Assert that the chat was reset
expect(newChat).not.toBe(initialChat);
@@ -887,13 +940,8 @@ describe('Gemini Client (client.ts)', () => {
.fn()
.mockResolvedValue(mockNewChat as GeminiChat);
const totalChars = newCompressedHistory.reduce(
(total, content) => total + JSON.stringify(content).length,
0,
);
const newTokenCount = Math.floor(totalChars / 4);
// Mock the summary response from the chat
// newTokenCount = 100 - (1060 - 1000) + 20 = 100 - 60 + 20 = 60 <= 100 (success)
mockGenerateContentFn.mockResolvedValue({
candidates: [
{
@@ -903,6 +951,11 @@ describe('Gemini Client (client.ts)', () => {
},
},
],
usageMetadata: {
promptTokenCount: 1060,
candidatesTokenCount: 20,
totalTokenCount: 1080,
},
} as unknown as GenerateContentResponse);
const initialChat = client.getChat();
@@ -911,11 +964,10 @@ describe('Gemini Client (client.ts)', () => {
expect(mockGenerateContentFn).toHaveBeenCalled();
expect(result).toEqual({
compressionStatus: CompressionStatus.COMPRESSED,
originalTokenCount,
newTokenCount,
});
expect(result.compressionStatus).toBe(CompressionStatus.COMPRESSED);
expect(result.originalTokenCount).toBe(originalTokenCount);
// newTokenCount might be clamped to originalTokenCount due to tolerance logic
expect(result.newTokenCount).toBeLessThanOrEqual(originalTokenCount);
// Assert that the chat was reset
expect(newChat).not.toBe(initialChat);

View File

@@ -441,47 +441,19 @@ export class GeminiClient {
yield { type: GeminiEventType.ChatCompressed, value: compressed };
}
// Check session token limit after compression using accurate token counting
// Check session token limit after compression.
// `lastPromptTokenCount` is treated as authoritative for the (possibly compressed) history;
const sessionTokenLimit = this.config.getSessionTokenLimit();
if (sessionTokenLimit > 0) {
// Get all the content that would be sent in an API call
const currentHistory = this.getChat().getHistory(true);
const userMemory = this.config.getUserMemory();
const systemPrompt = getCoreSystemPrompt(
userMemory,
this.config.getModel(),
);
const initialHistory = await getInitialChatHistory(this.config);
// Create a mock request content to count total tokens
const mockRequestContent = [
{
role: 'system' as const,
parts: [{ text: systemPrompt }],
},
...initialHistory,
...currentHistory,
];
// Use the improved countTokens method for accurate counting
const { totalTokens: totalRequestTokens } = await this.config
.getContentGenerator()
.countTokens({
model: this.config.getModel(),
contents: mockRequestContent,
});
if (
totalRequestTokens !== undefined &&
totalRequestTokens > sessionTokenLimit
) {
const lastPromptTokenCount = uiTelemetryService.getLastPromptTokenCount();
if (lastPromptTokenCount > sessionTokenLimit) {
yield {
type: GeminiEventType.SessionTokenLimitExceeded,
value: {
currentTokens: totalRequestTokens,
currentTokens: lastPromptTokenCount,
limit: sessionTokenLimit,
message:
`Session token limit exceeded: ${totalRequestTokens} tokens > ${sessionTokenLimit} limit. ` +
`Session token limit exceeded: ${lastPromptTokenCount} tokens > ${sessionTokenLimit} limit. ` +
'Please start a new session or increase the sessionTokenLimit in your settings.json.',
},
};

View File

@@ -270,28 +270,28 @@ export function createContentGeneratorConfig(
}
export async function createContentGenerator(
generatorConfig: ContentGeneratorConfig,
config: Config,
config: ContentGeneratorConfig,
gcConfig: Config,
isInitialAuth?: boolean,
): Promise<ContentGenerator> {
const validation = validateModelConfig(generatorConfig, false);
const validation = validateModelConfig(config, false);
if (!validation.valid) {
throw new Error(validation.errors.map((e) => e.message).join('\n'));
}
const authType = generatorConfig.authType;
if (!authType) {
throw new Error('ContentGeneratorConfig must have an authType');
}
let baseGenerator: ContentGenerator;
if (authType === AuthType.USE_OPENAI) {
if (config.authType === AuthType.USE_OPENAI) {
// Import OpenAIContentGenerator dynamically to avoid circular dependencies
const { createOpenAIContentGenerator } = await import(
'./openaiContentGenerator/index.js'
);
baseGenerator = createOpenAIContentGenerator(generatorConfig, config);
} else if (authType === AuthType.QWEN_OAUTH) {
// Always use OpenAIContentGenerator, logging is controlled by enableOpenAILogging flag
const generator = createOpenAIContentGenerator(config, gcConfig);
return new LoggingContentGenerator(generator, gcConfig);
}
if (config.authType === AuthType.QWEN_OAUTH) {
// Import required classes dynamically
const { getQwenOAuthClient: getQwenOauthClient } = await import(
'../qwen/qwenOAuth2.js'
);
@@ -300,38 +300,44 @@ export async function createContentGenerator(
);
try {
// Get the Qwen OAuth client (now includes integrated token management)
// If this is initial auth, require cached credentials to detect missing credentials
const qwenClient = await getQwenOauthClient(
config,
gcConfig,
isInitialAuth ? { requireCachedCredentials: true } : undefined,
);
baseGenerator = new QwenContentGenerator(
qwenClient,
generatorConfig,
config,
);
// Create the content generator with dynamic token management
const generator = new QwenContentGenerator(qwenClient, config, gcConfig);
return new LoggingContentGenerator(generator, gcConfig);
} catch (error) {
throw new Error(
`${error instanceof Error ? error.message : String(error)}`,
);
}
} else if (authType === AuthType.USE_ANTHROPIC) {
}
if (config.authType === AuthType.USE_ANTHROPIC) {
const { createAnthropicContentGenerator } = await import(
'./anthropicContentGenerator/index.js'
);
baseGenerator = createAnthropicContentGenerator(generatorConfig, config);
} else if (
authType === AuthType.USE_GEMINI ||
authType === AuthType.USE_VERTEX_AI
const generator = createAnthropicContentGenerator(config, gcConfig);
return new LoggingContentGenerator(generator, gcConfig);
}
if (
config.authType === AuthType.USE_GEMINI ||
config.authType === AuthType.USE_VERTEX_AI
) {
const { createGeminiContentGenerator } = await import(
'./geminiContentGenerator/index.js'
);
baseGenerator = createGeminiContentGenerator(generatorConfig, config);
} else {
throw new Error(
`Error creating contentGenerator: Unsupported authType: ${authType}`,
);
const generator = createGeminiContentGenerator(config, gcConfig);
return new LoggingContentGenerator(generator, gcConfig);
}
return new LoggingContentGenerator(baseGenerator, config, generatorConfig);
throw new Error(
`Error creating contentGenerator: Unsupported authType: ${config.authType}`,
);
}

View File

@@ -708,7 +708,7 @@ describe('GeminiChat', () => {
// Verify that token counting is called when usageMetadata is present
expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledWith(
42,
57,
);
expect(uiTelemetryService.setLastPromptTokenCount).toHaveBeenCalledTimes(
1,

View File

@@ -529,10 +529,10 @@ export class GeminiChat {
// Collect token usage for consolidated recording
if (chunk.usageMetadata) {
usageMetadata = chunk.usageMetadata;
if (chunk.usageMetadata.promptTokenCount !== undefined) {
uiTelemetryService.setLastPromptTokenCount(
chunk.usageMetadata.promptTokenCount,
);
const lastPromptTokenCount =
usageMetadata.totalTokenCount ?? usageMetadata.promptTokenCount;
if (lastPromptTokenCount) {
uiTelemetryService.setLastPromptTokenCount(lastPromptTokenCount);
}
}

View File

@@ -12,7 +12,6 @@ import type {
import { GenerateContentResponse } from '@google/genai';
import type { Config } from '../../config/config.js';
import type { ContentGenerator } from '../contentGenerator.js';
import { AuthType } from '../contentGenerator.js';
import { LoggingContentGenerator } from './index.js';
import { OpenAIContentConverter } from '../openaiContentGenerator/converter.js';
import {
@@ -51,17 +50,14 @@ const convertGeminiResponseToOpenAISpy = vi
choices: [],
} as OpenAI.Chat.ChatCompletion);
const createConfig = (overrides: Record<string, unknown> = {}): Config => {
const configContent = {
authType: 'openai',
enableOpenAILogging: false,
...overrides,
};
return {
getContentGeneratorConfig: () => configContent,
getAuthType: () => configContent.authType as AuthType | undefined,
} as Config;
};
const createConfig = (overrides: Record<string, unknown> = {}): Config =>
({
getContentGeneratorConfig: () => ({
authType: 'openai',
enableOpenAILogging: false,
...overrides,
}),
}) as Config;
const createWrappedGenerator = (
generateContent: ContentGenerator['generateContent'],
@@ -128,17 +124,13 @@ describe('LoggingContentGenerator', () => {
),
vi.fn(),
);
const generatorConfig = {
model: 'test-model',
authType: AuthType.USE_OPENAI,
enableOpenAILogging: true,
openAILoggingDir: 'logs',
schemaCompliance: 'openapi_30' as const,
};
const generator = new LoggingContentGenerator(
wrapped,
createConfig(),
generatorConfig,
createConfig({
enableOpenAILogging: true,
openAILoggingDir: 'logs',
schemaCompliance: 'openapi_30',
}),
);
const request = {
@@ -233,15 +225,9 @@ describe('LoggingContentGenerator', () => {
vi.fn().mockRejectedValue(error),
vi.fn(),
);
const generatorConfig = {
model: 'test-model',
authType: AuthType.USE_OPENAI,
enableOpenAILogging: true,
};
const generator = new LoggingContentGenerator(
wrapped,
createConfig(),
generatorConfig,
createConfig({ enableOpenAILogging: true }),
);
const request = {
@@ -307,15 +293,9 @@ describe('LoggingContentGenerator', () => {
})(),
),
);
const generatorConfig = {
model: 'test-model',
authType: AuthType.USE_OPENAI,
enableOpenAILogging: true,
};
const generator = new LoggingContentGenerator(
wrapped,
createConfig(),
generatorConfig,
createConfig({ enableOpenAILogging: true }),
);
const request = {
@@ -365,15 +345,9 @@ describe('LoggingContentGenerator', () => {
})(),
),
);
const generatorConfig = {
model: 'test-model',
authType: AuthType.USE_OPENAI,
enableOpenAILogging: true,
};
const generator = new LoggingContentGenerator(
wrapped,
createConfig(),
generatorConfig,
createConfig({ enableOpenAILogging: true }),
);
const request = {

View File

@@ -31,10 +31,7 @@ import {
logApiRequest,
logApiResponse,
} from '../../telemetry/loggers.js';
import type {
ContentGenerator,
ContentGeneratorConfig,
} from '../contentGenerator.js';
import type { ContentGenerator } from '../contentGenerator.js';
import { isStructuredError } from '../../utils/quotaErrorDetection.js';
import { OpenAIContentConverter } from '../openaiContentGenerator/converter.js';
import { OpenAILogger } from '../../utils/openaiLogger.js';
@@ -53,11 +50,9 @@ export class LoggingContentGenerator implements ContentGenerator {
constructor(
private readonly wrapped: ContentGenerator,
private readonly config: Config,
generatorConfig: ContentGeneratorConfig,
) {
// Extract fields needed for initialization from passed config
// (config.getContentGeneratorConfig() may not be available yet during refreshAuth)
if (generatorConfig.enableOpenAILogging) {
const generatorConfig = this.config.getContentGeneratorConfig();
if (generatorConfig?.enableOpenAILogging) {
this.openaiLogger = new OpenAILogger(generatorConfig.openAILoggingDir);
this.schemaCompliance = generatorConfig.schemaCompliance;
}
@@ -94,7 +89,7 @@ export class LoggingContentGenerator implements ContentGenerator {
model,
durationMs,
prompt_id,
this.config.getAuthType(),
this.config.getContentGeneratorConfig()?.authType,
usageMetadata,
responseText,
),
@@ -131,7 +126,7 @@ export class LoggingContentGenerator implements ContentGenerator {
errorMessage,
durationMs,
prompt_id,
this.config.getAuthType(),
this.config.getContentGeneratorConfig()?.authType,
errorType,
errorStatus,
),

View File

@@ -22,17 +22,7 @@ const mockTokenizer = {
};
vi.mock('../../../utils/request-tokenizer/index.js', () => ({
getDefaultTokenizer: vi.fn(() => mockTokenizer),
DefaultRequestTokenizer: vi.fn(() => mockTokenizer),
disposeDefaultTokenizer: vi.fn(),
}));
// Mock tiktoken as well for completeness
vi.mock('tiktoken', () => ({
get_encoding: vi.fn(() => ({
encode: vi.fn(() => new Array(50)), // Mock 50 tokens
free: vi.fn(),
})),
RequestTokenEstimator: vi.fn(() => mockTokenizer),
}));
// Now import the modules that depend on the mocked modules
@@ -134,7 +124,7 @@ describe('OpenAIContentGenerator (Refactored)', () => {
});
describe('countTokens', () => {
it('should count tokens using tiktoken', async () => {
it('should count tokens using character-based estimation', async () => {
const request: CountTokensParameters = {
contents: [{ role: 'user', parts: [{ text: 'Hello world' }] }],
model: 'gpt-4',
@@ -142,26 +132,27 @@ describe('OpenAIContentGenerator (Refactored)', () => {
const result = await generator.countTokens(request);
expect(result.totalTokens).toBe(50); // Mocked value
// 'Hello world' = 11 ASCII chars
// 11 / 4 = 2.75 -> ceil = 3 tokens
expect(result.totalTokens).toBe(3);
});
it('should fall back to character approximation if tiktoken fails', async () => {
// Mock tiktoken to throw error
vi.doMock('tiktoken', () => ({
get_encoding: vi.fn().mockImplementation(() => {
throw new Error('Tiktoken failed');
}),
}));
it('should handle multimodal content', async () => {
const request: CountTokensParameters = {
contents: [{ role: 'user', parts: [{ text: 'Hello world' }] }],
contents: [
{
role: 'user',
parts: [{ text: 'Hello' }, { text: ' world' }],
},
],
model: 'gpt-4',
};
const result = await generator.countTokens(request);
// Should use character approximation (content length / 4)
expect(result.totalTokens).toBeGreaterThan(0);
// Parts are combined for estimation:
// 'Hello world' = 11 ASCII chars -> 11/4 = 2.75 -> ceil = 3 tokens
expect(result.totalTokens).toBe(3);
});
});

View File

@@ -12,7 +12,7 @@ import type {
import type { PipelineConfig } from './pipeline.js';
import { ContentGenerationPipeline } from './pipeline.js';
import { EnhancedErrorHandler } from './errorHandler.js';
import { getDefaultTokenizer } from '../../utils/request-tokenizer/index.js';
import { RequestTokenEstimator } from '../../utils/request-tokenizer/index.js';
import type { ContentGeneratorConfig } from '../contentGenerator.js';
export class OpenAIContentGenerator implements ContentGenerator {
@@ -68,11 +68,9 @@ export class OpenAIContentGenerator implements ContentGenerator {
request: CountTokensParameters,
): Promise<CountTokensResponse> {
try {
// Use the new high-performance request tokenizer
const tokenizer = getDefaultTokenizer();
const result = await tokenizer.calculateTokens(request, {
textEncoding: 'cl100k_base', // Use GPT-4 encoding for consistency
});
// Use the request token estimator (character-based).
const estimator = new RequestTokenEstimator();
const result = await estimator.calculateTokens(request);
return {
totalTokens: result.totalTokens,

View File

@@ -16,7 +16,6 @@ import type {
ChatCompletionContentPartWithCache,
ChatCompletionToolWithCache,
} from './types.js';
import { buildRuntimeFetchOptions } from '../../../utils/runtimeFetchOptions.js';
export class DashScopeOpenAICompatibleProvider
implements OpenAICompatibleProvider
@@ -69,16 +68,12 @@ export class DashScopeOpenAICompatibleProvider
maxRetries = DEFAULT_MAX_RETRIES,
} = this.contentGeneratorConfig;
const defaultHeaders = this.buildHeaders();
// Configure fetch options to ensure user-configured timeout works as expected
// bodyTimeout is always disabled (0) to let OpenAI SDK timeout control the request
const fetchOptions = buildRuntimeFetchOptions('openai');
return new OpenAI({
apiKey,
baseURL: baseUrl,
timeout,
maxRetries,
defaultHeaders,
...(fetchOptions ? { fetchOptions } : {}),
});
}

View File

@@ -4,7 +4,6 @@ import type { Config } from '../../../config/config.js';
import type { ContentGeneratorConfig } from '../../contentGenerator.js';
import { DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES } from '../constants.js';
import type { OpenAICompatibleProvider } from './types.js';
import { buildRuntimeFetchOptions } from '../../../utils/runtimeFetchOptions.js';
/**
* Default provider for standard OpenAI-compatible APIs
@@ -44,16 +43,12 @@ export class DefaultOpenAICompatibleProvider
maxRetries = DEFAULT_MAX_RETRIES,
} = this.contentGeneratorConfig;
const defaultHeaders = this.buildHeaders();
// Configure fetch options to ensure user-configured timeout works as expected
// bodyTimeout is always disabled (0) to let OpenAI SDK timeout control the request
const fetchOptions = buildRuntimeFetchOptions('openai');
return new OpenAI({
apiKey,
baseURL: baseUrl,
timeout,
maxRetries,
defaultHeaders,
...(fetchOptions ? { fetchOptions } : {}),
});
}

View File

@@ -15,13 +15,11 @@ import { uiTelemetryService } from '../telemetry/uiTelemetry.js';
import { tokenLimit } from '../core/tokenLimits.js';
import type { GeminiChat } from '../core/geminiChat.js';
import type { Config } from '../config/config.js';
import { getInitialChatHistory } from '../utils/environmentContext.js';
import type { ContentGenerator } from '../core/contentGenerator.js';
vi.mock('../telemetry/uiTelemetry.js');
vi.mock('../core/tokenLimits.js');
vi.mock('../telemetry/loggers.js');
vi.mock('../utils/environmentContext.js');
describe('findCompressSplitPoint', () => {
it('should throw an error for non-positive numbers', () => {
@@ -122,9 +120,6 @@ describe('ChatCompressionService', () => {
vi.mocked(tokenLimit).mockReturnValue(1000);
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(500);
vi.mocked(getInitialChatHistory).mockImplementation(
async (_config, extraHistory) => extraHistory || [],
);
});
afterEach(() => {
@@ -241,6 +236,7 @@ describe('ChatCompressionService', () => {
vi.mocked(mockChat.getHistory).mockReturnValue(history);
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(800);
vi.mocked(tokenLimit).mockReturnValue(1000);
// newTokenCount = 800 - (1600 - 1000) + 50 = 800 - 600 + 50 = 250 <= 800 (success)
const mockGenerateContent = vi.fn().mockResolvedValue({
candidates: [
{
@@ -249,6 +245,11 @@ describe('ChatCompressionService', () => {
},
},
],
usageMetadata: {
promptTokenCount: 1600,
candidatesTokenCount: 50,
totalTokenCount: 1650,
},
} as unknown as GenerateContentResponse);
vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
generateContent: mockGenerateContent,
@@ -264,6 +265,7 @@ describe('ChatCompressionService', () => {
);
expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
expect(result.info.newTokenCount).toBe(250); // 800 - (1600 - 1000) + 50
expect(result.newHistory).not.toBeNull();
expect(result.newHistory![0].parts![0].text).toBe('Summary');
expect(mockGenerateContent).toHaveBeenCalled();
@@ -280,6 +282,7 @@ describe('ChatCompressionService', () => {
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(100);
vi.mocked(tokenLimit).mockReturnValue(1000);
// newTokenCount = 100 - (1100 - 1000) + 50 = 100 - 100 + 50 = 50 <= 100 (success)
const mockGenerateContent = vi.fn().mockResolvedValue({
candidates: [
{
@@ -288,6 +291,11 @@ describe('ChatCompressionService', () => {
},
},
],
usageMetadata: {
promptTokenCount: 1100,
candidatesTokenCount: 50,
totalTokenCount: 1150,
},
} as unknown as GenerateContentResponse);
vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
generateContent: mockGenerateContent,
@@ -315,15 +323,19 @@ describe('ChatCompressionService', () => {
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(10);
vi.mocked(tokenLimit).mockReturnValue(1000);
const longSummary = 'a'.repeat(1000); // Long summary to inflate token count
const mockGenerateContent = vi.fn().mockResolvedValue({
candidates: [
{
content: {
parts: [{ text: longSummary }],
parts: [{ text: 'Summary' }],
},
},
],
usageMetadata: {
promptTokenCount: 1,
candidatesTokenCount: 20,
totalTokenCount: 21,
},
} as unknown as GenerateContentResponse);
vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
generateContent: mockGenerateContent,
@@ -344,6 +356,48 @@ describe('ChatCompressionService', () => {
expect(result.newHistory).toBeNull();
});
it('should return FAILED if usage metadata is missing', async () => {
const history: Content[] = [
{ role: 'user', parts: [{ text: 'msg1' }] },
{ role: 'model', parts: [{ text: 'msg2' }] },
{ role: 'user', parts: [{ text: 'msg3' }] },
{ role: 'model', parts: [{ text: 'msg4' }] },
];
vi.mocked(mockChat.getHistory).mockReturnValue(history);
vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(800);
vi.mocked(tokenLimit).mockReturnValue(1000);
const mockGenerateContent = vi.fn().mockResolvedValue({
candidates: [
{
content: {
parts: [{ text: 'Summary' }],
},
},
],
// No usageMetadata -> keep original token count
} as unknown as GenerateContentResponse);
vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
generateContent: mockGenerateContent,
} as unknown as ContentGenerator);
const result = await service.compress(
mockChat,
mockPromptId,
false,
mockModel,
mockConfig,
false,
);
expect(result.info.compressionStatus).toBe(
CompressionStatus.COMPRESSION_FAILED_TOKEN_COUNT_ERROR,
);
expect(result.info.originalTokenCount).toBe(800);
expect(result.info.newTokenCount).toBe(800);
expect(result.newHistory).toBeNull();
});
it('should return FAILED if summary is empty string', async () => {
const history: Content[] = [
{ role: 'user', parts: [{ text: 'msg1' }] },

View File

@@ -14,7 +14,6 @@ import { getCompressionPrompt } from '../core/prompts.js';
import { getResponseText } from '../utils/partUtils.js';
import { logChatCompression } from '../telemetry/loggers.js';
import { makeChatCompressionEvent } from '../telemetry/types.js';
import { getInitialChatHistory } from '../utils/environmentContext.js';
/**
* Threshold for compression token count as a fraction of the model's token limit.
@@ -163,9 +162,25 @@ export class ChatCompressionService {
);
const summary = getResponseText(summaryResponse) ?? '';
const isSummaryEmpty = !summary || summary.trim().length === 0;
const compressionUsageMetadata = summaryResponse.usageMetadata;
const compressionInputTokenCount =
compressionUsageMetadata?.promptTokenCount;
let compressionOutputTokenCount =
compressionUsageMetadata?.candidatesTokenCount;
if (
compressionOutputTokenCount === undefined &&
typeof compressionUsageMetadata?.totalTokenCount === 'number' &&
typeof compressionInputTokenCount === 'number'
) {
compressionOutputTokenCount = Math.max(
0,
compressionUsageMetadata.totalTokenCount - compressionInputTokenCount,
);
}
let newTokenCount = originalTokenCount;
let extraHistory: Content[] = [];
let canCalculateNewTokenCount = false;
if (!isSummaryEmpty) {
extraHistory = [
@@ -180,16 +195,26 @@ export class ChatCompressionService {
...historyToKeep,
];
// Use a shared utility to construct the initial history for an accurate token count.
const fullNewHistory = await getInitialChatHistory(config, extraHistory);
// Estimate token count 1 token ≈ 4 characters
newTokenCount = Math.floor(
fullNewHistory.reduce(
(total, content) => total + JSON.stringify(content).length,
// Best-effort token math using *only* model-reported token counts.
//
// Note: compressionInputTokenCount includes the compression prompt and
// the extra "reason in your scratchpad" instruction(approx. 1000 tokens), and
// compressionOutputTokenCount may include non-persisted tokens (thoughts).
// We accept these inaccuracies to avoid local token estimation.
if (
typeof compressionInputTokenCount === 'number' &&
compressionInputTokenCount > 0 &&
typeof compressionOutputTokenCount === 'number' &&
compressionOutputTokenCount > 0
) {
canCalculateNewTokenCount = true;
newTokenCount = Math.max(
0,
) / 4,
);
originalTokenCount -
(compressionInputTokenCount - 1000) +
compressionOutputTokenCount,
);
}
}
logChatCompression(
@@ -197,6 +222,8 @@ export class ChatCompressionService {
makeChatCompressionEvent({
tokens_before: originalTokenCount,
tokens_after: newTokenCount,
compression_input_token_count: compressionInputTokenCount,
compression_output_token_count: compressionOutputTokenCount,
}),
);
@@ -209,6 +236,16 @@ export class ChatCompressionService {
compressionStatus: CompressionStatus.COMPRESSION_FAILED_EMPTY_SUMMARY,
},
};
} else if (!canCalculateNewTokenCount) {
return {
newHistory: null,
info: {
originalTokenCount,
newTokenCount: originalTokenCount,
compressionStatus:
CompressionStatus.COMPRESSION_FAILED_TOKEN_COUNT_ERROR,
},
};
} else if (newTokenCount > originalTokenCount) {
return {
newHistory: null,

View File

@@ -235,7 +235,6 @@ export class SkillManager {
}
this.watchStarted = true;
await this.ensureUserSkillsDir();
await this.refreshCache();
this.updateWatchersFromCache();
}
@@ -487,14 +486,29 @@ export class SkillManager {
}
private updateWatchersFromCache(): void {
const watchTargets = new Set<string>(
(['project', 'user'] as const)
.map((level) => this.getSkillsBaseDir(level))
.filter((baseDir) => fsSync.existsSync(baseDir)),
);
const desiredPaths = new Set<string>();
for (const level of ['project', 'user'] as const) {
const baseDir = this.getSkillsBaseDir(level);
const parentDir = path.dirname(baseDir);
if (fsSync.existsSync(parentDir)) {
desiredPaths.add(parentDir);
}
if (fsSync.existsSync(baseDir)) {
desiredPaths.add(baseDir);
}
const levelSkills = this.skillsCache?.get(level) || [];
for (const skill of levelSkills) {
const skillDir = path.dirname(skill.filePath);
if (fsSync.existsSync(skillDir)) {
desiredPaths.add(skillDir);
}
}
}
for (const existingPath of this.watchers.keys()) {
if (!watchTargets.has(existingPath)) {
if (!desiredPaths.has(existingPath)) {
void this.watchers
.get(existingPath)
?.close()
@@ -508,7 +522,7 @@ export class SkillManager {
}
}
for (const watchPath of watchTargets) {
for (const watchPath of desiredPaths) {
if (this.watchers.has(watchPath)) {
continue;
}
@@ -543,16 +557,4 @@ export class SkillManager {
void this.refreshCache().then(() => this.updateWatchersFromCache());
}, 150);
}
private async ensureUserSkillsDir(): Promise<void> {
const baseDir = this.getSkillsBaseDir('user');
try {
await fs.mkdir(baseDir, { recursive: true });
} catch (error) {
console.warn(
`Failed to create user skills directory at ${baseDir}:`,
error,
);
}
}
}

View File

@@ -439,17 +439,27 @@ export interface ChatCompressionEvent extends BaseTelemetryEvent {
'event.timestamp': string;
tokens_before: number;
tokens_after: number;
compression_input_token_count?: number;
compression_output_token_count?: number;
}
export function makeChatCompressionEvent({
tokens_before,
tokens_after,
compression_input_token_count,
compression_output_token_count,
}: Omit<ChatCompressionEvent, CommonFields>): ChatCompressionEvent {
return {
'event.name': 'chat_compression',
'event.timestamp': new Date().toISOString(),
tokens_before,
tokens_after,
...(compression_input_token_count !== undefined
? { compression_input_token_count }
: {}),
...(compression_output_token_count !== undefined
? { compression_output_token_count }
: {}),
};
}

View File

@@ -53,7 +53,7 @@ export class SkillTool extends BaseDeclarativeTool<SkillParams, ToolResult> {
false, // canUpdateOutput
);
this.skillManager = config.getSkillManager()!;
this.skillManager = config.getSkillManager();
this.skillManager.addChangeListener(() => {
void this.refreshSkills();
});

View File

@@ -4,37 +4,8 @@
* SPDX-License-Identifier: Apache-2.0
*/
export { DefaultRequestTokenizer } from './requestTokenizer.js';
import { DefaultRequestTokenizer } from './requestTokenizer.js';
export { RequestTokenizer as RequestTokenEstimator } from './requestTokenizer.js';
export { TextTokenizer } from './textTokenizer.js';
export { ImageTokenizer } from './imageTokenizer.js';
export type {
RequestTokenizer,
TokenizerConfig,
TokenCalculationResult,
ImageMetadata,
} from './types.js';
// Singleton instance for convenient usage
let defaultTokenizer: DefaultRequestTokenizer | null = null;
/**
* Get the default request tokenizer instance
*/
export function getDefaultTokenizer(): DefaultRequestTokenizer {
if (!defaultTokenizer) {
defaultTokenizer = new DefaultRequestTokenizer();
}
return defaultTokenizer;
}
/**
* Dispose of the default tokenizer instance
*/
export async function disposeDefaultTokenizer(): Promise<void> {
if (defaultTokenizer) {
await defaultTokenizer.dispose();
defaultTokenizer = null;
}
}
export type { TokenCalculationResult, ImageMetadata } from './types.js';

View File

@@ -4,19 +4,15 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { DefaultRequestTokenizer } from './requestTokenizer.js';
import { describe, it, expect, beforeEach } from 'vitest';
import { RequestTokenizer } from './requestTokenizer.js';
import type { CountTokensParameters } from '@google/genai';
describe('DefaultRequestTokenizer', () => {
let tokenizer: DefaultRequestTokenizer;
describe('RequestTokenEstimator', () => {
let tokenizer: RequestTokenizer;
beforeEach(() => {
tokenizer = new DefaultRequestTokenizer();
});
afterEach(async () => {
await tokenizer.dispose();
tokenizer = new RequestTokenizer();
});
describe('text token calculation', () => {
@@ -221,25 +217,7 @@ describe('DefaultRequestTokenizer', () => {
});
});
describe('configuration', () => {
it('should use custom text encoding', async () => {
const request: CountTokensParameters = {
model: 'test-model',
contents: [
{
role: 'user',
parts: [{ text: 'Test text for encoding' }],
},
],
};
const result = await tokenizer.calculateTokens(request, {
textEncoding: 'cl100k_base',
});
expect(result.totalTokens).toBeGreaterThan(0);
});
describe('images', () => {
it('should process multiple images serially', async () => {
const pngBase64 =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77yQAAAABJRU5ErkJggg==';

View File

@@ -10,18 +10,14 @@ import type {
Part,
PartUnion,
} from '@google/genai';
import type {
RequestTokenizer,
TokenizerConfig,
TokenCalculationResult,
} from './types.js';
import type { TokenCalculationResult } from './types.js';
import { TextTokenizer } from './textTokenizer.js';
import { ImageTokenizer } from './imageTokenizer.js';
/**
* Simple request tokenizer that handles text and image content serially
* Simple request token estimator that handles text and image content serially
*/
export class DefaultRequestTokenizer implements RequestTokenizer {
export class RequestTokenizer {
private textTokenizer: TextTokenizer;
private imageTokenizer: ImageTokenizer;
@@ -35,15 +31,9 @@ export class DefaultRequestTokenizer implements RequestTokenizer {
*/
async calculateTokens(
request: CountTokensParameters,
config: TokenizerConfig = {},
): Promise<TokenCalculationResult> {
const startTime = performance.now();
// Apply configuration
if (config.textEncoding) {
this.textTokenizer = new TextTokenizer(config.textEncoding);
}
try {
// Process request content and group by type
const { textContents, imageContents, audioContents, otherContents } =
@@ -112,9 +102,8 @@ export class DefaultRequestTokenizer implements RequestTokenizer {
if (textContents.length === 0) return 0;
try {
const tokenCounts =
await this.textTokenizer.calculateTokensBatch(textContents);
return tokenCounts.reduce((sum, count) => sum + count, 0);
// Avoid per-part rounding inflation by estimating once on the combined text.
return await this.textTokenizer.calculateTokens(textContents.join(''));
} catch (error) {
console.warn('Error calculating text tokens:', error);
// Fallback: character-based estimation
@@ -177,10 +166,8 @@ export class DefaultRequestTokenizer implements RequestTokenizer {
if (otherContents.length === 0) return 0;
try {
// Treat other content as text for token calculation
const tokenCounts =
await this.textTokenizer.calculateTokensBatch(otherContents);
return tokenCounts.reduce((sum, count) => sum + count, 0);
// Treat other content as text, and avoid per-item rounding inflation.
return await this.textTokenizer.calculateTokens(otherContents.join(''));
} catch (error) {
console.warn('Error calculating other content tokens:', error);
// Fallback: character-based estimation
@@ -264,7 +251,18 @@ export class DefaultRequestTokenizer implements RequestTokenizer {
otherContents,
);
}
return;
}
// Some request shapes (e.g. CountTokensParameters) allow passing parts directly
// instead of wrapping them in a { parts: [...] } Content object.
this.processPart(
content as Part | string,
textContents,
imageContents,
audioContents,
otherContents,
);
}
/**
@@ -326,16 +324,4 @@ export class DefaultRequestTokenizer implements RequestTokenizer {
console.warn('Failed to serialize unknown part type:', error);
}
}
/**
* Dispose of resources
*/
async dispose(): Promise<void> {
try {
// Dispose of tokenizers
this.textTokenizer.dispose();
} catch (error) {
console.warn('Error disposing request tokenizer:', error);
}
}
}

View File

@@ -4,36 +4,14 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { describe, it, expect, beforeEach } from 'vitest';
import { TextTokenizer } from './textTokenizer.js';
// Mock tiktoken at the top level with hoisted functions
const mockEncode = vi.hoisted(() => vi.fn());
const mockFree = vi.hoisted(() => vi.fn());
const mockGetEncoding = vi.hoisted(() => vi.fn());
vi.mock('tiktoken', () => ({
get_encoding: mockGetEncoding,
}));
describe('TextTokenizer', () => {
let tokenizer: TextTokenizer;
let consoleWarnSpy: ReturnType<typeof vi.spyOn>;
beforeEach(() => {
vi.resetAllMocks();
consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
// Default mock implementation
mockGetEncoding.mockReturnValue({
encode: mockEncode,
free: mockFree,
});
});
afterEach(() => {
vi.restoreAllMocks();
tokenizer?.dispose();
tokenizer = new TextTokenizer();
});
describe('constructor', () => {
@@ -42,17 +20,14 @@ describe('TextTokenizer', () => {
expect(tokenizer).toBeInstanceOf(TextTokenizer);
});
it('should create tokenizer with custom encoding', () => {
tokenizer = new TextTokenizer('gpt2');
it('should create tokenizer with custom encoding (for backward compatibility)', () => {
tokenizer = new TextTokenizer();
expect(tokenizer).toBeInstanceOf(TextTokenizer);
// Note: encoding name is accepted but not used
});
});
describe('calculateTokens', () => {
beforeEach(() => {
tokenizer = new TextTokenizer();
});
it('should return 0 for empty text', async () => {
const result = await tokenizer.calculateTokens('');
expect(result).toBe(0);
@@ -69,99 +44,77 @@ describe('TextTokenizer', () => {
expect(result2).toBe(0);
});
it('should calculate tokens using tiktoken when available', async () => {
const testText = 'Hello, world!';
const mockTokens = [1, 2, 3, 4, 5]; // 5 tokens
mockEncode.mockReturnValue(mockTokens);
it('should calculate tokens using character-based estimation for ASCII text', async () => {
const testText = 'Hello, world!'; // 13 ASCII chars
const result = await tokenizer.calculateTokens(testText);
// 13 / 4 = 3.25 -> ceil = 4
expect(result).toBe(4);
});
expect(mockGetEncoding).toHaveBeenCalledWith('cl100k_base');
expect(mockEncode).toHaveBeenCalledWith(testText);
it('should calculate tokens for code (ASCII)', async () => {
const code = 'function test() { return 42; }'; // 30 ASCII chars
const result = await tokenizer.calculateTokens(code);
// 30 / 4 = 7.5 -> ceil = 8
expect(result).toBe(8);
});
it('should calculate tokens for non-ASCII text (CJK)', async () => {
const unicodeText = '你好世界'; // 4 non-ASCII chars
const result = await tokenizer.calculateTokens(unicodeText);
// 4 * 1.1 = 4.4 -> ceil = 5
expect(result).toBe(5);
});
it('should use fallback calculation when tiktoken fails to load', async () => {
mockGetEncoding.mockImplementation(() => {
throw new Error('Failed to load tiktoken');
});
const testText = 'Hello, world!'; // 13 characters
const result = await tokenizer.calculateTokens(testText);
expect(consoleWarnSpy).toHaveBeenCalledWith(
'Failed to load tiktoken with encoding cl100k_base:',
expect.any(Error),
);
// Fallback: Math.ceil(13 / 4) = 4
it('should calculate tokens for mixed ASCII and non-ASCII text', async () => {
const mixedText = 'Hello 世界'; // 6 ASCII + 2 non-ASCII
const result = await tokenizer.calculateTokens(mixedText);
// (6 / 4) + (2 * 1.1) = 1.5 + 2.2 = 3.7 -> ceil = 4
expect(result).toBe(4);
});
it('should use fallback calculation when encoding fails', async () => {
mockEncode.mockImplementation(() => {
throw new Error('Encoding failed');
});
const testText = 'Hello, world!'; // 13 characters
const result = await tokenizer.calculateTokens(testText);
expect(consoleWarnSpy).toHaveBeenCalledWith(
'Error encoding text with tiktoken:',
expect.any(Error),
);
// Fallback: Math.ceil(13 / 4) = 4
expect(result).toBe(4);
it('should calculate tokens for emoji', async () => {
const emojiText = '🌍'; // 2 UTF-16 code units (non-ASCII)
const result = await tokenizer.calculateTokens(emojiText);
// 2 * 1.1 = 2.2 -> ceil = 3
expect(result).toBe(3);
});
it('should handle very long text', async () => {
const longText = 'a'.repeat(10000);
const mockTokens = new Array(2500); // 2500 tokens
mockEncode.mockReturnValue(mockTokens);
const longText = 'a'.repeat(10000); // 10000 ASCII chars
const result = await tokenizer.calculateTokens(longText);
// 10000 / 4 = 2500 -> ceil = 2500
expect(result).toBe(2500);
});
it('should handle unicode characters', async () => {
const unicodeText = '你好世界 🌍';
const mockTokens = [1, 2, 3, 4, 5, 6];
mockEncode.mockReturnValue(mockTokens);
const result = await tokenizer.calculateTokens(unicodeText);
expect(result).toBe(6);
it('should handle text with only whitespace', async () => {
const whitespaceText = ' \n\t '; // 7 ASCII chars
const result = await tokenizer.calculateTokens(whitespaceText);
// 7 / 4 = 1.75 -> ceil = 2
expect(result).toBe(2);
});
it('should use custom encoding when specified', async () => {
tokenizer = new TextTokenizer('gpt2');
const testText = 'Hello, world!';
const mockTokens = [1, 2, 3];
mockEncode.mockReturnValue(mockTokens);
it('should handle special characters and symbols', async () => {
const specialText = '!@#$%^&*()_+-=[]{}|;:,.<>?'; // 26 ASCII chars
const result = await tokenizer.calculateTokens(specialText);
// 26 / 4 = 6.5 -> ceil = 7
expect(result).toBe(7);
});
const result = await tokenizer.calculateTokens(testText);
expect(mockGetEncoding).toHaveBeenCalledWith('gpt2');
expect(result).toBe(3);
it('should handle very short text', async () => {
const result = await tokenizer.calculateTokens('a');
// 1 / 4 = 0.25 -> ceil = 1
expect(result).toBe(1);
});
});
describe('calculateTokensBatch', () => {
beforeEach(() => {
tokenizer = new TextTokenizer();
});
it('should process multiple texts and return token counts', async () => {
const texts = ['Hello', 'world', 'test'];
mockEncode
.mockReturnValueOnce([1, 2]) // 2 tokens for 'Hello'
.mockReturnValueOnce([3, 4, 5]) // 3 tokens for 'world'
.mockReturnValueOnce([6]); // 1 token for 'test'
const result = await tokenizer.calculateTokensBatch(texts);
expect(result).toEqual([2, 3, 1]);
expect(mockEncode).toHaveBeenCalledTimes(3);
// 'Hello' = 5 / 4 = 1.25 -> ceil = 2
// 'world' = 5 / 4 = 1.25 -> ceil = 2
// 'test' = 4 / 4 = 1 -> ceil = 1
expect(result).toEqual([2, 2, 1]);
});
it('should handle empty array', async () => {
@@ -171,177 +124,156 @@ describe('TextTokenizer', () => {
it('should handle array with empty strings', async () => {
const texts = ['', 'hello', ''];
mockEncode.mockReturnValue([1, 2, 3]); // Only called for 'hello'
const result = await tokenizer.calculateTokensBatch(texts);
expect(result).toEqual([0, 3, 0]);
expect(mockEncode).toHaveBeenCalledTimes(1);
expect(mockEncode).toHaveBeenCalledWith('hello');
// '' = 0
// 'hello' = 5 / 4 = 1.25 -> ceil = 2
// '' = 0
expect(result).toEqual([0, 2, 0]);
});
it('should use fallback calculation when tiktoken fails to load', async () => {
mockGetEncoding.mockImplementation(() => {
throw new Error('Failed to load tiktoken');
});
const texts = ['Hello', 'world']; // 5 and 5 characters
it('should handle mixed ASCII and non-ASCII texts', async () => {
const texts = ['Hello', '世界', 'Hello 世界'];
const result = await tokenizer.calculateTokensBatch(texts);
expect(consoleWarnSpy).toHaveBeenCalledWith(
'Failed to load tiktoken with encoding cl100k_base:',
expect.any(Error),
);
// Fallback: Math.ceil(5/4) = 2 for both
expect(result).toEqual([2, 2]);
});
it('should use fallback calculation when encoding fails during batch processing', async () => {
mockEncode.mockImplementation(() => {
throw new Error('Encoding failed');
});
const texts = ['Hello', 'world']; // 5 and 5 characters
const result = await tokenizer.calculateTokensBatch(texts);
expect(consoleWarnSpy).toHaveBeenCalledWith(
'Error encoding texts with tiktoken:',
expect.any(Error),
);
// Fallback: Math.ceil(5/4) = 2 for both
expect(result).toEqual([2, 2]);
// 'Hello' = 5 / 4 = 1.25 -> ceil = 2
// '世界' = 2 * 1.1 = 2.2 -> ceil = 3
// 'Hello 世界' = (6/4) + (2*1.1) = 1.5 + 2.2 = 3.7 -> ceil = 4
expect(result).toEqual([2, 3, 4]);
});
it('should handle null and undefined values in batch', async () => {
const texts = [null, 'hello', undefined, 'world'] as unknown as string[];
mockEncode
.mockReturnValueOnce([1, 2, 3]) // 3 tokens for 'hello'
.mockReturnValueOnce([4, 5]); // 2 tokens for 'world'
const result = await tokenizer.calculateTokensBatch(texts);
// null = 0
// 'hello' = 5 / 4 = 1.25 -> ceil = 2
// undefined = 0
// 'world' = 5 / 4 = 1.25 -> ceil = 2
expect(result).toEqual([0, 2, 0, 2]);
});
expect(result).toEqual([0, 3, 0, 2]);
it('should process large batches efficiently', async () => {
const texts = Array.from({ length: 1000 }, (_, i) => `text${i}`);
const result = await tokenizer.calculateTokensBatch(texts);
expect(result).toHaveLength(1000);
// Verify results are reasonable
result.forEach((count) => {
expect(count).toBeGreaterThan(0);
expect(count).toBeLessThan(10); // 'textNNN' should be less than 10 tokens
});
});
});
describe('dispose', () => {
beforeEach(() => {
tokenizer = new TextTokenizer();
describe('backward compatibility', () => {
it('should accept encoding parameter in constructor', () => {
const tokenizer1 = new TextTokenizer();
const tokenizer2 = new TextTokenizer();
const tokenizer3 = new TextTokenizer();
expect(tokenizer1).toBeInstanceOf(TextTokenizer);
expect(tokenizer2).toBeInstanceOf(TextTokenizer);
expect(tokenizer3).toBeInstanceOf(TextTokenizer);
});
it('should free tiktoken encoding when disposing', async () => {
// Initialize the encoding by calling calculateTokens
await tokenizer.calculateTokens('test');
it('should produce same results regardless of encoding parameter', async () => {
const text = 'Hello, world!';
const tokenizer1 = new TextTokenizer();
const tokenizer2 = new TextTokenizer();
const tokenizer3 = new TextTokenizer();
tokenizer.dispose();
const result1 = await tokenizer1.calculateTokens(text);
const result2 = await tokenizer2.calculateTokens(text);
const result3 = await tokenizer3.calculateTokens(text);
expect(mockFree).toHaveBeenCalled();
// All should use character-based estimation, ignoring encoding parameter
expect(result1).toBe(result2);
expect(result2).toBe(result3);
expect(result1).toBe(4); // 13 / 4 = 3.25 -> ceil = 4
});
it('should handle disposal when encoding is not initialized', () => {
expect(() => tokenizer.dispose()).not.toThrow();
expect(mockFree).not.toHaveBeenCalled();
it('should maintain async interface for calculateTokens', async () => {
const result = tokenizer.calculateTokens('test');
expect(result).toBeInstanceOf(Promise);
await expect(result).resolves.toBe(1);
});
it('should handle disposal when encoding is null', async () => {
// Force encoding to be null by making tiktoken fail
mockGetEncoding.mockImplementation(() => {
throw new Error('Failed to load');
});
await tokenizer.calculateTokens('test');
expect(() => tokenizer.dispose()).not.toThrow();
expect(mockFree).not.toHaveBeenCalled();
});
it('should handle errors during disposal gracefully', async () => {
await tokenizer.calculateTokens('test');
mockFree.mockImplementation(() => {
throw new Error('Free failed');
});
tokenizer.dispose();
expect(consoleWarnSpy).toHaveBeenCalledWith(
'Error freeing tiktoken encoding:',
expect.any(Error),
);
});
it('should allow multiple calls to dispose', async () => {
await tokenizer.calculateTokens('test');
tokenizer.dispose();
tokenizer.dispose(); // Second call should not throw
expect(mockFree).toHaveBeenCalledTimes(1);
});
});
describe('lazy initialization', () => {
beforeEach(() => {
tokenizer = new TextTokenizer();
});
it('should not initialize tiktoken until first use', () => {
expect(mockGetEncoding).not.toHaveBeenCalled();
});
it('should initialize tiktoken on first calculateTokens call', async () => {
await tokenizer.calculateTokens('test');
expect(mockGetEncoding).toHaveBeenCalledTimes(1);
});
it('should not reinitialize tiktoken on subsequent calls', async () => {
await tokenizer.calculateTokens('test1');
await tokenizer.calculateTokens('test2');
expect(mockGetEncoding).toHaveBeenCalledTimes(1);
});
it('should initialize tiktoken on first calculateTokensBatch call', async () => {
await tokenizer.calculateTokensBatch(['test']);
expect(mockGetEncoding).toHaveBeenCalledTimes(1);
it('should maintain async interface for calculateTokensBatch', async () => {
const result = tokenizer.calculateTokensBatch(['test']);
expect(result).toBeInstanceOf(Promise);
await expect(result).resolves.toEqual([1]);
});
});
describe('edge cases', () => {
beforeEach(() => {
tokenizer = new TextTokenizer();
});
it('should handle very short text', async () => {
const result = await tokenizer.calculateTokens('a');
if (mockGetEncoding.mock.calls.length > 0) {
// If tiktoken was called, use its result
expect(mockEncode).toHaveBeenCalledWith('a');
} else {
// If tiktoken failed, should use fallback: Math.ceil(1/4) = 1
expect(result).toBe(1);
}
});
it('should handle text with only whitespace', async () => {
const whitespaceText = ' \n\t ';
const mockTokens = [1];
mockEncode.mockReturnValue(mockTokens);
const result = await tokenizer.calculateTokens(whitespaceText);
it('should handle text with only newlines', async () => {
const text = '\n\n\n'; // 3 ASCII chars
const result = await tokenizer.calculateTokens(text);
// 3 / 4 = 0.75 -> ceil = 1
expect(result).toBe(1);
});
it('should handle special characters and symbols', async () => {
const specialText = '!@#$%^&*()_+-=[]{}|;:,.<>?';
const mockTokens = new Array(10);
mockEncode.mockReturnValue(mockTokens);
it('should handle text with tabs', async () => {
const text = '\t\t\t\t'; // 4 ASCII chars
const result = await tokenizer.calculateTokens(text);
// 4 / 4 = 1 -> ceil = 1
expect(result).toBe(1);
});
const result = await tokenizer.calculateTokens(specialText);
it('should handle surrogate pairs correctly', async () => {
// Character outside BMP (Basic Multilingual Plane)
const text = '𝕳𝖊𝖑𝖑𝖔'; // Mathematical bold letters (2 UTF-16 units each)
const result = await tokenizer.calculateTokens(text);
// Each character is 2 UTF-16 units, all non-ASCII
// Total: 10 non-ASCII units
// 10 * 1.1 = 11 -> ceil = 11
expect(result).toBe(11);
});
expect(result).toBe(10);
it('should handle combining characters', async () => {
// e + combining acute accent
const text = 'e\u0301'; // 2 chars: 'e' (ASCII) + combining acute (non-ASCII)
const result = await tokenizer.calculateTokens(text);
// ASCII: 1 / 4 = 0.25
// Non-ASCII: 1 * 1.1 = 1.1
// Total: 0.25 + 1.1 = 1.35 -> ceil = 2
expect(result).toBe(2);
});
it('should handle accented characters', async () => {
const text = 'café'; // 'caf' = 3 ASCII, 'é' = 1 non-ASCII
const result = await tokenizer.calculateTokens(text);
// ASCII: 3 / 4 = 0.75
// Non-ASCII: 1 * 1.1 = 1.1
// Total: 0.75 + 1.1 = 1.85 -> ceil = 2
expect(result).toBe(2);
});
it('should handle various unicode scripts', async () => {
const cyrillic = 'Привет'; // 6 non-ASCII chars
const arabic = 'مرحبا'; // 5 non-ASCII chars
const japanese = 'こんにちは'; // 5 non-ASCII chars
const result1 = await tokenizer.calculateTokens(cyrillic);
const result2 = await tokenizer.calculateTokens(arabic);
const result3 = await tokenizer.calculateTokens(japanese);
// All should use 1.1 tokens per char
expect(result1).toBe(7); // 6 * 1.1 = 6.6 -> ceil = 7
expect(result2).toBe(6); // 5 * 1.1 = 5.5 -> ceil = 6
expect(result3).toBe(6); // 5 * 1.1 = 5.5 -> ceil = 6
});
});
describe('large inputs', () => {
it('should handle very long text', async () => {
const longText = 'a'.repeat(200000); // 200k characters
const result = await tokenizer.calculateTokens(longText);
expect(result).toBe(50000); // 200000 / 4
});
it('should handle large batches', async () => {
const texts = Array.from({ length: 5000 }, () => 'Hello, world!');
const result = await tokenizer.calculateTokensBatch(texts);
expect(result).toHaveLength(5000);
expect(result[0]).toBe(4);
});
});
});

View File

@@ -4,94 +4,55 @@
* SPDX-License-Identifier: Apache-2.0
*/
import type { TiktokenEncoding, Tiktoken } from 'tiktoken';
import { get_encoding } from 'tiktoken';
/**
* Text tokenizer for calculating text tokens using tiktoken
* Text tokenizer for calculating text tokens using character-based estimation.
*
* Uses a lightweight character-based approach that is "good enough" for
* guardrail features like sessionTokenLimit.
*
* Algorithm:
* - ASCII characters: 0.25 tokens per char (4 chars = 1 token)
* - Non-ASCII characters: 1.1 tokens per char (conservative for CJK, emoji, etc.)
*/
export class TextTokenizer {
private encoding: Tiktoken | null = null;
private encodingName: string;
constructor(encodingName: string = 'cl100k_base') {
this.encodingName = encodingName;
}
/**
* Initialize the tokenizer (lazy loading)
*/
private async ensureEncoding(): Promise<void> {
if (this.encoding) return;
try {
// Use type assertion since we know the encoding name is valid
this.encoding = get_encoding(this.encodingName as TiktokenEncoding);
} catch (error) {
console.warn(
`Failed to load tiktoken with encoding ${this.encodingName}:`,
error,
);
this.encoding = null;
}
}
/**
* Calculate tokens for text content
*
* @param text - The text to estimate tokens for
* @returns The estimated token count
*/
async calculateTokens(text: string): Promise<number> {
if (!text) return 0;
await this.ensureEncoding();
if (this.encoding) {
try {
return this.encoding.encode(text).length;
} catch (error) {
console.warn('Error encoding text with tiktoken:', error);
}
}
// Fallback: rough approximation using character count
// This is a conservative estimate: 1 token ≈ 4 characters for most languages
return Math.ceil(text.length / 4);
return this.calculateTokensSync(text);
}
/**
* Calculate tokens for multiple text strings in parallel
* Calculate tokens for multiple text strings
*
* @param texts - Array of text strings to estimate tokens for
* @returns Array of token counts corresponding to each input text
*/
async calculateTokensBatch(texts: string[]): Promise<number[]> {
await this.ensureEncoding();
if (this.encoding) {
try {
return texts.map((text) => {
if (!text) return 0;
// this.encoding may be null, add a null check to satisfy lint
return this.encoding ? this.encoding.encode(text).length : 0;
});
} catch (error) {
console.warn('Error encoding texts with tiktoken:', error);
// In case of error, return fallback estimation for all texts
return texts.map((text) => Math.ceil((text || '').length / 4));
}
}
// Fallback for batch processing
return texts.map((text) => Math.ceil((text || '').length / 4));
return texts.map((text) => this.calculateTokensSync(text));
}
/**
* Dispose of resources
*/
dispose(): void {
if (this.encoding) {
try {
this.encoding.free();
} catch (error) {
console.warn('Error freeing tiktoken encoding:', error);
}
this.encoding = null;
private calculateTokensSync(text: string): number {
if (!text || text.length === 0) {
return 0;
}
let asciiChars = 0;
let nonAsciiChars = 0;
for (let i = 0; i < text.length; i++) {
const charCode = text.charCodeAt(i);
if (charCode < 128) {
asciiChars++;
} else {
nonAsciiChars++;
}
}
const tokens = asciiChars / 4 + nonAsciiChars * 1.1;
return Math.ceil(tokens);
}
}

View File

@@ -4,8 +4,6 @@
* SPDX-License-Identifier: Apache-2.0
*/
import type { CountTokensParameters } from '@google/genai';
/**
* Token calculation result for different content types
*/
@@ -23,14 +21,6 @@ export interface TokenCalculationResult {
processingTime: number;
}
/**
* Configuration for token calculation
*/
export interface TokenizerConfig {
/** Custom text tokenizer encoding (defaults to cl100k_base) */
textEncoding?: string;
}
/**
* Image metadata extracted from base64 data
*/
@@ -44,21 +34,3 @@ export interface ImageMetadata {
/** Size of the base64 data in bytes */
dataSize: number;
}
/**
* Request tokenizer interface
*/
export interface RequestTokenizer {
/**
* Calculate tokens for a request
*/
calculateTokens(
request: CountTokensParameters,
config?: TokenizerConfig,
): Promise<TokenCalculationResult>;
/**
* Dispose of resources (worker threads, etc.)
*/
dispose(): Promise<void>;
}

View File

@@ -1,167 +0,0 @@
/**
* @license
* Copyright 2025 Qwen Team
* SPDX-License-Identifier: Apache-2.0
*/
import { EnvHttpProxyAgent } from 'undici';
/**
* JavaScript runtime type
*/
export type Runtime = 'node' | 'bun' | 'unknown';
/**
* Detect the current JavaScript runtime
*/
export function detectRuntime(): Runtime {
if (typeof process !== 'undefined' && process.versions?.['bun']) {
return 'bun';
}
if (typeof process !== 'undefined' && process.versions?.node) {
return 'node';
}
return 'unknown';
}
/**
* Runtime fetch options for OpenAI SDK
*/
export type OpenAIRuntimeFetchOptions =
| {
dispatcher?: EnvHttpProxyAgent;
timeout?: false;
}
| undefined;
/**
* Runtime fetch options for Anthropic SDK
*/
export type AnthropicRuntimeFetchOptions = {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
httpAgent?: any;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
fetch?: any;
};
/**
* SDK type identifier
*/
export type SDKType = 'openai' | 'anthropic';
/**
* Build runtime-specific fetch options for OpenAI SDK
*/
export function buildRuntimeFetchOptions(
sdkType: 'openai',
): OpenAIRuntimeFetchOptions;
/**
* Build runtime-specific fetch options for Anthropic SDK
*/
export function buildRuntimeFetchOptions(
sdkType: 'anthropic',
): AnthropicRuntimeFetchOptions;
/**
* Build runtime-specific fetch options based on the detected runtime and SDK type
* This function applies runtime-specific configurations to handle timeout differences
* across Node.js and Bun, ensuring user-configured timeout works as expected.
*
* @param sdkType - The SDK type ('openai' or 'anthropic') to determine return type
* @returns Runtime-specific options compatible with the specified SDK
*/
export function buildRuntimeFetchOptions(
sdkType: SDKType,
): OpenAIRuntimeFetchOptions | AnthropicRuntimeFetchOptions {
const runtime = detectRuntime();
// Always disable bodyTimeout (set to 0) to let SDK's timeout parameter
// control the total request time. bodyTimeout only monitors intervals between
// data chunks, not the total request time, so we disable it to ensure user-configured
// timeout works as expected for both streaming and non-streaming requests.
switch (runtime) {
case 'bun': {
if (sdkType === 'openai') {
// Bun: Disable built-in 300s timeout to let OpenAI SDK timeout control
// This ensures user-configured timeout works as expected without interference
return {
timeout: false,
};
} else {
// Bun: Use custom fetch to disable built-in 300s timeout
// This allows Anthropic SDK timeout to control the request
// Note: Bun's fetch automatically uses proxy settings from environment variables
// (HTTP_PROXY, HTTPS_PROXY, NO_PROXY), so proxy behavior is preserved
const bunFetch: typeof fetch = async (
input: RequestInfo | URL,
init?: RequestInit,
) => {
const bunFetchOptions: RequestInit = {
...init,
// @ts-expect-error - Bun-specific timeout option
timeout: false,
};
return fetch(input, bunFetchOptions);
};
return {
fetch: bunFetch,
};
}
}
case 'node': {
// Node.js: Use EnvHttpProxyAgent to configure proxy and disable bodyTimeout
// EnvHttpProxyAgent automatically reads proxy settings from environment variables
// (HTTP_PROXY, HTTPS_PROXY, NO_PROXY, etc.) to preserve proxy functionality
// bodyTimeout is always 0 (disabled) to let SDK timeout control the request
try {
const agent = new EnvHttpProxyAgent({
bodyTimeout: 0, // Disable to let SDK timeout control total request time
});
if (sdkType === 'openai') {
return {
dispatcher: agent,
};
} else {
return {
httpAgent: agent,
};
}
} catch {
// If undici is not available, return appropriate default
if (sdkType === 'openai') {
return undefined;
} else {
return {};
}
}
}
default: {
// Unknown runtime: Try to use EnvHttpProxyAgent if available
// EnvHttpProxyAgent automatically reads proxy settings from environment variables
try {
const agent = new EnvHttpProxyAgent({
bodyTimeout: 0, // Disable to let SDK timeout control total request time
});
if (sdkType === 'openai') {
return {
dispatcher: agent,
};
} else {
return {
httpAgent: agent,
};
}
} catch {
if (sdkType === 'openai') {
return undefined;
} else {
return {};
}
}
}
}
}

View File

@@ -46,8 +46,7 @@
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.25.1",
"zod": "^3.25.0",
"tiktoken": "^1.0.21"
"zod": "^3.25.0"
},
"devDependencies": {
"@types/node": "^20.14.0",

View File

@@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code-test-utils",
"version": "0.7.1",
"version": "0.7.0",
"private": true,
"main": "src/index.ts",
"license": "Apache-2.0",

View File

@@ -1,11 +1,6 @@
# Qwen Code Companion
[![Version](https://img.shields.io/visual-studio-marketplace/v/qwenlm.qwen-code-vscode-ide-companion)](https://marketplace.visualstudio.com/items?itemName=qwenlm.qwen-code-vscode-ide-companion)
[![VS Code Installs](https://img.shields.io/visual-studio-marketplace/i/qwenlm.qwen-code-vscode-ide-companion)](https://marketplace.visualstudio.com/items?itemName=qwenlm.qwen-code-vscode-ide-companion)
[![Open VSX Downloads](https://img.shields.io/open-vsx/dt/qwenlm/qwen-code-vscode-ide-companion)](https://open-vsx.org/extension/qwenlm/qwen-code-vscode-ide-companion)
[![Rating](https://img.shields.io/visual-studio-marketplace/r/qwenlm.qwen-code-vscode-ide-companion)](https://marketplace.visualstudio.com/items?itemName=qwenlm.qwen-code-vscode-ide-companion)
Seamlessly integrate [Qwen Code](https://github.com/QwenLM/qwen-code) into Visual Studio Code with native IDE features and an intuitive chat interface. This extension bundles everything you need — no additional installation required.
Seamlessly integrate [Qwen Code](https://github.com/QwenLM/qwen-code) into Visual Studio Code with native IDE features and an intuitive interface. This extension bundles everything you need to get started immediately.
## Demo
@@ -16,7 +11,7 @@ Seamlessly integrate [Qwen Code](https://github.com/QwenLM/qwen-code) into Visua
## Features
- **Native IDE experience**: Dedicated Qwen Code Chat panel accessed via the Qwen icon in the editor title bar
- **Native IDE experience**: Dedicated Qwen Code sidebar panel accessed via the Qwen icon
- **Native diffing**: Review, edit, and accept changes in VS Code's diff view
- **Auto-accept edits mode**: Automatically apply Qwen's changes as they're made
- **File management**: @-mention files or attach files and images using the system file picker
@@ -25,46 +20,73 @@ Seamlessly integrate [Qwen Code](https://github.com/QwenLM/qwen-code) into Visua
## Requirements
- Visual Studio Code 1.85.0 or newer (also works with Cursor, Windsurf, and other VS Code-based editors)
- Visual Studio Code 1.85.0 or newer
## Quick Start
## Installation
1. **Install** from the [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=qwenlm.qwen-code-vscode-ide-companion) or [Open VSX Registry](https://open-vsx.org/extension/qwenlm/qwen-code-vscode-ide-companion)
1. Install from the VS Code Marketplace: https://marketplace.visualstudio.com/items?itemName=qwenlm.qwen-code-vscode-ide-companion
2. **Open the Chat panel** using one of these methods:
- Click the **Qwen icon** in the top-right corner of the editor
- Run `Qwen Code: Open` from the Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`)
2. Two ways to use
- Chat panel: Click the Qwen icon in the Activity Bar, or run `Qwen Code: Open` from the Command Palette (`Cmd+Shift+P` / `Ctrl+Shift+P`).
- Terminal session (classic): Run `Qwen Code: Run` to launch a session in the integrated terminal (bundled CLI).
3. **Start chatting** — Ask Qwen to help with coding tasks, explain code, fix bugs, or write new features
## Development and Debugging
## Commands
To debug and develop this extension locally:
| Command | Description |
| -------------------------------- | ------------------------------------------------------ |
| `Qwen Code: Open` | Open the Qwen Code Chat panel |
| `Qwen Code: Run` | Launch a classic terminal session with the bundled CLI |
| `Qwen Code: Accept Current Diff` | Accept the currently displayed diff |
| `Qwen Code: Close Diff Editor` | Close/reject the current diff |
1. **Clone the repository**
## Feedback & Issues
```bash
git clone https://github.com/QwenLM/qwen-code.git
cd qwen-code
```
- 🐛 [Report bugs](https://github.com/QwenLM/qwen-code/issues/new?template=bug_report.yml&labels=bug,vscode-ide-companion)
- 💡 [Request features](https://github.com/QwenLM/qwen-code/issues/new?template=feature_request.yml&labels=enhancement,vscode-ide-companion)
- 📖 [Documentation](https://qwenlm.github.io/qwen-code-docs/)
- 📋 [Changelog](https://github.com/QwenLM/qwen-code/releases)
2. **Install dependencies**
## Contributing
```bash
npm install
# or if using pnpm
pnpm install
```
We welcome contributions! See our [Contributing Guide](https://github.com/QwenLM/qwen-code/blob/main/CONTRIBUTING.md) for details on:
3. **Start debugging**
- Setting up the development environment
- Building and debugging the extension locally
- Submitting pull requests
```bash
code . # Open the project root in VS Code
```
- Open the `packages/vscode-ide-companion/src/extension.ts` file
- Open Debug panel (`Ctrl+Shift+D` or `Cmd+Shift+D`)
- Select **"Launch Companion VS Code Extension"** from the debug dropdown
- Press `F5` to launch Extension Development Host
4. **Make changes and reload**
- Edit the source code in the original VS Code window
- To see your changes, reload the Extension Development Host window by:
- Pressing `Ctrl+R` (Windows/Linux) or `Cmd+R` (macOS)
- Or clicking the "Reload" button in the debug toolbar
5. **View logs and debug output**
- Open the Debug Console in the original VS Code window to see extension logs
- In the Extension Development Host window, open Developer Tools with `Help > Toggle Developer Tools` to see webview logs
## Build for Production
To build the extension for distribution:
```bash
npm run compile
# or
pnpm run compile
```
To package the extension as a VSIX file:
```bash
npx vsce package
# or
pnpm vsce package
```
## Terms of Service and Privacy Notice
By installing this extension, you agree to the [Terms of Service](https://github.com/QwenLM/qwen-code/blob/main/docs/tos-privacy.md).
## License
[Apache-2.0](https://github.com/QwenLM/qwen-code/blob/main/LICENSE)

View File

@@ -2,7 +2,7 @@
"name": "qwen-code-vscode-ide-companion",
"displayName": "Qwen Code Companion",
"description": "Enable Qwen Code with direct access to your VS Code workspace.",
"version": "0.7.1",
"version": "0.7.0",
"publisher": "qwenlm",
"icon": "assets/icon.png",
"repository": {

View File

@@ -314,32 +314,34 @@ export async function activate(context: vscode.ExtensionContext) {
'cli.js',
).fsPath;
const execPath = process.execPath;
const lowerExecPath = execPath.toLowerCase();
const needsElectronRunAsNode =
lowerExecPath.includes('code') ||
lowerExecPath.includes('electron');
let qwenCmd: string;
const terminalOptions: vscode.TerminalOptions = {
name: `Qwen Code (${selectedFolder.name})`,
cwd: selectedFolder.uri.fsPath,
location,
};
let qwenCmd: string;
if (isWindows) {
// On Windows, try multiple strategies to find a Node.js runtime:
// 1. Check if VSCode ships a standalone node.exe alongside Code.exe
// 2. Check VSCode's internal Node.js in resources directory
// 3. Fall back to using Code.exe with ELECTRON_RUN_AS_NODE=1
// Use system Node via cmd.exe; avoid PowerShell parsing issues
const quoteCmd = (s: string) => `"${s.replace(/"/g, '""')}"`;
const cliQuoted = quoteCmd(cliEntry);
// TODO: @yiliang114, temporarily run through node, and later hope to decouple from the local node
qwenCmd = `node ${cliQuoted}`;
terminalOptions.shellPath = process.env.ComSpec;
} else {
// macOS/Linux: All VSCode-like IDEs (VSCode, Cursor, Windsurf, etc.)
// are Electron-based, so we always need ELECTRON_RUN_AS_NODE=1
// to run Node.js scripts using the IDE's bundled runtime.
const quotePosix = (s: string) => `"${s.replace(/"/g, '\\"')}"`;
const baseCmd = `${quotePosix(execPath)} ${quotePosix(cliEntry)}`;
qwenCmd = `ELECTRON_RUN_AS_NODE=1 ${baseCmd}`;
if (needsElectronRunAsNode) {
// macOS Electron helper needs ELECTRON_RUN_AS_NODE=1;
qwenCmd = `ELECTRON_RUN_AS_NODE=1 ${baseCmd}`;
} else {
qwenCmd = baseCmd;
}
}
const terminal = vscode.window.createTerminal(terminalOptions);

View File

@@ -98,17 +98,6 @@ console.log('Creating package.json for distribution...');
const rootPackageJson = JSON.parse(
fs.readFileSync(path.join(rootDir, 'package.json'), 'utf-8'),
);
const corePackageJson = JSON.parse(
fs.readFileSync(
path.join(rootDir, 'packages', 'core', 'package.json'),
'utf-8',
),
);
const runtimeDependencies = {};
if (corePackageJson.dependencies?.tiktoken) {
runtimeDependencies.tiktoken = corePackageJson.dependencies.tiktoken;
}
// Create a clean package.json for the published package
const distPackageJson = {
@@ -124,7 +113,7 @@ const distPackageJson = {
},
files: ['cli.js', 'vendor', '*.sb', 'README.md', 'LICENSE', 'locales'],
config: rootPackageJson.config,
dependencies: runtimeDependencies,
dependencies: {},
optionalDependencies: {
'@lydell/node-pty': '1.1.0',
'@lydell/node-pty-darwin-arm64': '1.1.0',