Compare commits

..

8 Commits

Author SHA1 Message Date
github-actions[bot]
4ab1b90dd3 chore(release): v0.1.0-preview 2025-09-24 00:13:30 +00:00
tanzhenxin
5ecb4a2430 fix: make ripgrep lazy load, to fix vscode ide companion unable to start (#676) 2025-09-23 14:44:48 +08:00
Mingholy
9c1d7228cb fix: auth hang when select qwen-oauth (#684) 2025-09-23 14:30:22 +08:00
hokupod
deb99a3b21 feat: add OpenAI and Qwen OAuth auth support to Zed ACP integration (#678)
- Add USE_OPENAI and QWEN_OAUTH authentication methods to GeminiAgent's authMethods array
- Enables Zed editor integration to support all available authentication options
- Add test case for QWEN_OAUTH authentication configuration
- Maintains backward compatibility with existing Google authentication methods

This allows Zed users to authenticate using:
- OpenAI API key (requires OPENAI_API_KEY environment variable)
- Qwen OAuth (2000 daily requests with OAuth2 flow)
- Existing Google authentication methods (unchanged)
2025-09-23 14:29:29 +08:00
Mingholy
014059e8a6 fix: output token limit for qwen (#664) 2025-09-23 14:28:59 +08:00
Mingholy
3579d6555a chore: bump version to 0.0.12 (#662) 2025-09-19 20:13:31 +08:00
Mingholy
9a56560eb4 fix: arrow keys on windows (#661) 2025-09-19 19:44:57 +08:00
Mingholy
da0863b943 fix: missing tool call chunks for openai logging (#657) 2025-09-19 15:19:30 +08:00
20 changed files with 612 additions and 101 deletions

13
.vscode/launch.json vendored
View File

@@ -101,6 +101,13 @@
"env": {
"GEMINI_SANDBOX": "false"
}
},
{
"name": "Attach by Process ID",
"processId": "${command:PickProcess}",
"request": "attach",
"skipFiles": ["<node_internals>/**"],
"type": "node"
}
],
"inputs": [
@@ -115,6 +122,12 @@
"type": "promptString",
"description": "Enter your prompt for non-interactive mode",
"default": "Explain this code"
},
{
"id": "debugPort",
"type": "promptString",
"description": "Enter the debug port number (default: 9229)",
"default": "9229"
}
]
}

View File

@@ -1,5 +1,25 @@
# Changelog
## 0.0.12
- Added vision model support for Qwen-OAuth authentication.
- Synced upstream `gemini-cli` to v0.3.4 with numerous improvements and bug fixes.
- Enhanced subagent functionality with system reminders and improved user experience.
- Added tool call type coercion for better compatibility.
- Fixed arrow key navigation issues on Windows.
- Fixed missing tool call chunks for OpenAI logging.
- Fixed system prompt issues to avoid malformed tool calls.
- Fixed terminal flicker when subagent is executing.
- Fixed duplicate subagents configuration when running in home directory.
- Fixed Esc key unable to cancel subagent dialog.
- Added confirmation prompt for `/init` command when context file exists.
- Added `skipLoopDetection` configuration option.
- Fixed `is_background` parameter reset issues.
- Enhanced Windows compatibility with multi-line paste handling.
- Improved subagent documentation and branding consistency.
- Fixed various linting errors and improved code quality.
- Miscellaneous improvements and bug fixes.
## 0.0.11
- Added subagents feature with file-based configuration system for specialized AI assistants.

12
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "@qwen-code/qwen-code",
"version": "0.0.11",
"version": "0.1.0-preview",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@qwen-code/qwen-code",
"version": "0.0.11",
"version": "0.1.0-preview",
"workspaces": [
"packages/*"
],
@@ -13454,7 +13454,7 @@
},
"packages/cli": {
"name": "@qwen-code/qwen-code",
"version": "0.0.11",
"version": "0.1.0-preview",
"dependencies": {
"@google/genai": "1.9.0",
"@iarna/toml": "^2.2.5",
@@ -13662,7 +13662,7 @@
},
"packages/core": {
"name": "@qwen-code/qwen-code-core",
"version": "0.0.11",
"version": "0.1.0-preview",
"dependencies": {
"@google/genai": "1.13.0",
"@lvce-editor/ripgrep": "^1.6.0",
@@ -13788,7 +13788,7 @@
},
"packages/test-utils": {
"name": "@qwen-code/qwen-code-test-utils",
"version": "0.0.11",
"version": "0.1.0-preview",
"dev": true,
"license": "Apache-2.0",
"devDependencies": {
@@ -13800,7 +13800,7 @@
},
"packages/vscode-ide-companion": {
"name": "qwen-code-vscode-ide-companion",
"version": "0.0.11",
"version": "0.1.0-preview",
"license": "LICENSE",
"dependencies": {
"@modelcontextprotocol/sdk": "^1.15.1",

View File

@@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code",
"version": "0.0.11",
"version": "0.1.0-preview",
"engines": {
"node": ">=20.0.0"
},
@@ -13,7 +13,7 @@
"url": "git+https://github.com/QwenLM/qwen-code.git"
},
"config": {
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.11"
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.1.0-preview"
},
"scripts": {
"start": "node scripts/start.js",

View File

@@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code",
"version": "0.0.11",
"version": "0.1.0-preview",
"description": "Qwen Code",
"repository": {
"type": "git",
@@ -25,7 +25,7 @@
"dist"
],
"config": {
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.11"
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.1.0-preview"
},
"dependencies": {
"@google/genai": "1.9.0",

View File

@@ -526,7 +526,7 @@ describe('KeypressContext - Kitty Protocol', () => {
});
await waitFor(() => {
expect(keyHandler).toHaveBeenCalledTimes(2); // 1 paste event + 1 paste event for 'after'
expect(keyHandler).toHaveBeenCalledTimes(6); // 1 paste event + 5 individual chars for 'after'
});
// Should emit paste event first
@@ -538,12 +538,40 @@ describe('KeypressContext - Kitty Protocol', () => {
}),
);
// Then process 'after' as a paste event (since it's > 2 chars)
// Then process 'after' as individual characters (since it doesn't contain return)
expect(keyHandler).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
paste: true,
sequence: 'after',
name: 'a',
paste: false,
}),
);
expect(keyHandler).toHaveBeenNthCalledWith(
3,
expect.objectContaining({
name: 'f',
paste: false,
}),
);
expect(keyHandler).toHaveBeenNthCalledWith(
4,
expect.objectContaining({
name: 't',
paste: false,
}),
);
expect(keyHandler).toHaveBeenNthCalledWith(
5,
expect.objectContaining({
name: 'e',
paste: false,
}),
);
expect(keyHandler).toHaveBeenNthCalledWith(
6,
expect.objectContaining({
name: 'r',
paste: false,
}),
);
});
@@ -571,7 +599,7 @@ describe('KeypressContext - Kitty Protocol', () => {
});
await waitFor(() => {
expect(keyHandler).toHaveBeenCalledTimes(14); // Adjusted based on actual behavior
expect(keyHandler).toHaveBeenCalledTimes(16); // 5 + 1 + 6 + 1 + 3 = 16 calls
});
// Check the sequence: 'start' (5 chars) + paste1 + 'middle' (6 chars) + paste2 + 'end' (3 chars as paste)
@@ -643,13 +671,18 @@ describe('KeypressContext - Kitty Protocol', () => {
}),
);
// 'end' as paste event (since it's > 2 chars)
// 'end' as individual characters (since it doesn't contain return)
expect(keyHandler).toHaveBeenNthCalledWith(
callIndex++,
expect.objectContaining({
paste: true,
sequence: 'end',
}),
expect.objectContaining({ name: 'e' }),
);
expect(keyHandler).toHaveBeenNthCalledWith(
callIndex++,
expect.objectContaining({ name: 'n' }),
);
expect(keyHandler).toHaveBeenNthCalledWith(
callIndex++,
expect.objectContaining({ name: 'd' }),
);
});
@@ -738,16 +771,18 @@ describe('KeypressContext - Kitty Protocol', () => {
});
await waitFor(() => {
// With the current implementation, fragmented data gets processed differently
// The first fragment '\x1b[20' gets processed as individual characters
// The second fragment '0~content\x1b[2' gets processed as paste + individual chars
// The third fragment '01~' gets processed as individual characters
expect(keyHandler).toHaveBeenCalled();
// With the current implementation, fragmented paste markers get reconstructed
// into a single paste event for 'content'
expect(keyHandler).toHaveBeenCalledTimes(1);
});
// The current implementation processes fragmented paste markers as separate events
// rather than reconstructing them into a single paste event
expect(keyHandler.mock.calls.length).toBeGreaterThan(1);
// Should reconstruct the fragmented paste markers into a single paste event
expect(keyHandler).toHaveBeenCalledWith(
expect.objectContaining({
paste: true,
sequence: 'content',
}),
);
});
});
@@ -851,19 +886,38 @@ describe('KeypressContext - Kitty Protocol', () => {
stdin.emit('data', Buffer.from('lo'));
});
// With the current implementation, data is processed as it arrives
// First chunk 'hel' is treated as paste (multi-character)
// With the current implementation, data is processed as individual characters
// since 'hel' doesn't contain return (0x0d)
expect(keyHandler).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
paste: true,
sequence: 'hel',
name: 'h',
sequence: 'h',
paste: false,
}),
);
// Second chunk 'lo' is processed as individual characters
expect(keyHandler).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
name: 'e',
sequence: 'e',
paste: false,
}),
);
expect(keyHandler).toHaveBeenNthCalledWith(
3,
expect.objectContaining({
name: 'l',
sequence: 'l',
paste: false,
}),
);
// Second chunk 'lo' is also processed as individual characters
expect(keyHandler).toHaveBeenNthCalledWith(
4,
expect.objectContaining({
name: 'l',
sequence: 'l',
@@ -872,7 +926,7 @@ describe('KeypressContext - Kitty Protocol', () => {
);
expect(keyHandler).toHaveBeenNthCalledWith(
3,
5,
expect.objectContaining({
name: 'o',
sequence: 'o',
@@ -880,7 +934,7 @@ describe('KeypressContext - Kitty Protocol', () => {
}),
);
expect(keyHandler).toHaveBeenCalledTimes(3);
expect(keyHandler).toHaveBeenCalledTimes(5);
} finally {
vi.useRealTimers();
}
@@ -907,14 +961,20 @@ describe('KeypressContext - Kitty Protocol', () => {
});
// Should flush immediately without waiting for timeout
// Large data gets treated as paste event
expect(keyHandler).toHaveBeenCalledTimes(1);
expect(keyHandler).toHaveBeenCalledWith(
expect.objectContaining({
paste: true,
sequence: largeData,
}),
);
// Large data without return gets treated as individual characters
expect(keyHandler).toHaveBeenCalledTimes(65);
// Each character should be processed individually
for (let i = 0; i < 65; i++) {
expect(keyHandler).toHaveBeenNthCalledWith(
i + 1,
expect.objectContaining({
name: 'x',
sequence: 'x',
paste: false,
}),
);
}
// Advancing timer should not cause additional calls
const callCountBefore = keyHandler.mock.calls.length;

View File

@@ -407,7 +407,11 @@ export function KeypressProvider({
return;
}
if (rawDataBuffer.length <= 2 || isPaste) {
if (
(rawDataBuffer.length <= 2 && rawDataBuffer.includes(0x0d)) ||
!rawDataBuffer.includes(0x0d) ||
isPaste
) {
keypressStream.write(rawDataBuffer);
} else {
// Flush raw data buffer as a paste event

View File

@@ -126,6 +126,18 @@ describe('validateNonInterActiveAuth', () => {
expect(refreshAuthMock).toHaveBeenCalledWith(AuthType.USE_OPENAI);
});
it('uses configured QWEN_OAUTH if provided', async () => {
const nonInteractiveConfig: NonInteractiveConfig = {
refreshAuth: refreshAuthMock,
};
await validateNonInteractiveAuth(
AuthType.QWEN_OAUTH,
undefined,
nonInteractiveConfig,
);
expect(refreshAuthMock).toHaveBeenCalledWith(AuthType.QWEN_OAUTH);
});
it('uses USE_VERTEX_AI if GOOGLE_GENAI_USE_VERTEXAI is true (with GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION)', async () => {
process.env['GOOGLE_GENAI_USE_VERTEXAI'] = 'true';
process.env['GOOGLE_CLOUD_PROJECT'] = 'test-project';

View File

@@ -97,6 +97,18 @@ class GeminiAgent {
name: 'Vertex AI',
description: null,
},
{
id: AuthType.USE_OPENAI,
name: 'Use OpenAI API key',
description:
'Requires setting the `OPENAI_API_KEY` environment variable',
},
{
id: AuthType.QWEN_OAUTH,
name: 'Qwen OAuth',
description:
'OAuth authentication for Qwen models with 2000 daily requests',
},
];
return {

View File

@@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code-core",
"version": "0.0.11",
"version": "0.1.0-preview",
"description": "Qwen Code Core",
"repository": {
"type": "git",

View File

@@ -560,4 +560,146 @@ describe('DashScopeOpenAICompatibleProvider', () => {
]);
});
});
describe('output token limits', () => {
it('should limit max_tokens when it exceeds model limit for qwen3-coder-plus', () => {
const request: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'qwen3-coder-plus',
messages: [{ role: 'user', content: 'Hello' }],
max_tokens: 100000, // Exceeds the 65536 limit
};
const result = provider.buildRequest(request, 'test-prompt-id');
expect(result.max_tokens).toBe(65536); // Should be limited to model's output limit
});
it('should limit max_tokens when it exceeds model limit for qwen-vl-max-latest', () => {
const request: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'qwen-vl-max-latest',
messages: [{ role: 'user', content: 'Hello' }],
max_tokens: 20000, // Exceeds the 8192 limit
};
const result = provider.buildRequest(request, 'test-prompt-id');
expect(result.max_tokens).toBe(8192); // Should be limited to model's output limit
});
it('should not modify max_tokens when it is within model limit', () => {
const request: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'qwen3-coder-plus',
messages: [{ role: 'user', content: 'Hello' }],
max_tokens: 1000, // Within the 65536 limit
};
const result = provider.buildRequest(request, 'test-prompt-id');
expect(result.max_tokens).toBe(1000); // Should remain unchanged
});
it('should not add max_tokens when not present in request', () => {
const request: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'qwen3-coder-plus',
messages: [{ role: 'user', content: 'Hello' }],
// No max_tokens parameter
};
const result = provider.buildRequest(request, 'test-prompt-id');
expect(result.max_tokens).toBeUndefined(); // Should remain undefined
});
it('should handle null max_tokens parameter', () => {
const request: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'qwen3-coder-plus',
messages: [{ role: 'user', content: 'Hello' }],
max_tokens: null,
};
const result = provider.buildRequest(request, 'test-prompt-id');
expect(result.max_tokens).toBeNull(); // Should remain null
});
it('should use default output limit for unknown models', () => {
const request: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'unknown-model',
messages: [{ role: 'user', content: 'Hello' }],
max_tokens: 10000, // Exceeds the default 4096 limit
};
const result = provider.buildRequest(request, 'test-prompt-id');
expect(result.max_tokens).toBe(4096); // Should be limited to default output limit
});
it('should preserve other request parameters when limiting max_tokens', () => {
const request: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'qwen3-coder-plus',
messages: [{ role: 'user', content: 'Hello' }],
max_tokens: 100000, // Will be limited
temperature: 0.8,
top_p: 0.9,
frequency_penalty: 0.1,
presence_penalty: 0.2,
stop: ['END'],
user: 'test-user',
};
const result = provider.buildRequest(request, 'test-prompt-id');
// max_tokens should be limited
expect(result.max_tokens).toBe(65536);
// Other parameters should be preserved
expect(result.temperature).toBe(0.8);
expect(result.top_p).toBe(0.9);
expect(result.frequency_penalty).toBe(0.1);
expect(result.presence_penalty).toBe(0.2);
expect(result.stop).toEqual(['END']);
expect(result.user).toBe('test-user');
});
it('should work with vision models and output token limits', () => {
const request: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'qwen-vl-max-latest',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Look at this image:' },
{
type: 'image_url',
image_url: { url: 'https://example.com/image.jpg' },
},
],
},
],
max_tokens: 20000, // Exceeds the 8192 limit
};
const result = provider.buildRequest(request, 'test-prompt-id');
expect(result.max_tokens).toBe(8192); // Should be limited
expect(
(result as { vl_high_resolution_images?: boolean })
.vl_high_resolution_images,
).toBe(true); // Vision-specific parameter should be preserved
});
it('should handle streaming requests with output token limits', () => {
const request: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'qwen3-coder-plus',
messages: [{ role: 'user', content: 'Hello' }],
max_tokens: 100000, // Exceeds the 65536 limit
stream: true,
};
const result = provider.buildRequest(request, 'test-prompt-id');
expect(result.max_tokens).toBe(65536); // Should be limited
expect(result.stream).toBe(true); // Streaming should be preserved
});
});
});

View File

@@ -3,6 +3,7 @@ import type { Config } from '../../../config/config.js';
import type { ContentGeneratorConfig } from '../../contentGenerator.js';
import { AuthType } from '../../contentGenerator.js';
import { DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES } from '../constants.js';
import { tokenLimit } from '../../tokenLimits.js';
import type {
OpenAICompatibleProvider,
DashScopeRequestMetadata,
@@ -65,6 +66,19 @@ export class DashScopeOpenAICompatibleProvider
});
}
/**
* Build and configure the request for DashScope API.
*
* This method applies DashScope-specific configurations including:
* - Cache control for system and user messages
* - Output token limits based on model capabilities
* - Vision model specific parameters (vl_high_resolution_images)
* - Request metadata for session tracking
*
* @param request - The original chat completion request parameters
* @param userPromptId - Unique identifier for the user prompt for session tracking
* @returns Configured request with DashScope-specific parameters applied
*/
buildRequest(
request: OpenAI.Chat.ChatCompletionCreateParams,
userPromptId: string,
@@ -79,21 +93,28 @@ export class DashScopeOpenAICompatibleProvider
messages = this.addDashScopeCacheControl(messages, cacheTarget);
}
// Apply output token limits based on model capabilities
// This ensures max_tokens doesn't exceed the model's maximum output limit
const requestWithTokenLimits = this.applyOutputTokenLimit(
request,
request.model,
);
if (request.model.startsWith('qwen-vl')) {
return {
...request,
...requestWithTokenLimits,
messages,
...(this.buildMetadata(userPromptId) || {}),
/* @ts-expect-error dashscope exclusive */
vl_high_resolution_images: true,
};
} as OpenAI.Chat.ChatCompletionCreateParams;
}
return {
...request, // Preserve all original parameters including sampling params
...requestWithTokenLimits, // Preserve all original parameters including sampling params and adjusted max_tokens
messages,
...(this.buildMetadata(userPromptId) || {}),
};
} as OpenAI.Chat.ChatCompletionCreateParams;
}
buildMetadata(userPromptId: string): DashScopeRequestMetadata {
@@ -246,6 +267,41 @@ export class DashScopeOpenAICompatibleProvider
return contentArray;
}
/**
* Apply output token limit to a request's max_tokens parameter.
*
* Ensures that existing max_tokens parameters don't exceed the model's maximum output
* token limit. Only modifies max_tokens when already present in the request.
*
* @param request - The chat completion request parameters
* @param model - The model name to get the output token limit for
* @returns The request with max_tokens adjusted to respect the model's limits (if present)
*/
private applyOutputTokenLimit<T extends { max_tokens?: number | null }>(
request: T,
model: string,
): T {
const currentMaxTokens = request.max_tokens;
// Only process if max_tokens is already present in the request
if (currentMaxTokens === undefined || currentMaxTokens === null) {
return request; // No max_tokens parameter, return unchanged
}
const modelLimit = tokenLimit(model, 'output');
// If max_tokens exceeds the model limit, cap it to the model's limit
if (currentMaxTokens > modelLimit) {
return {
...request,
max_tokens: modelLimit,
};
}
// If max_tokens is within the limit, return the request unchanged
return request;
}
/**
* Check if cache control should be disabled based on configuration.
*

View File

@@ -1,5 +1,10 @@
import { describe, it, expect } from 'vitest';
import { normalize, tokenLimit, DEFAULT_TOKEN_LIMIT } from './tokenLimits.js';
import {
normalize,
tokenLimit,
DEFAULT_TOKEN_LIMIT,
DEFAULT_OUTPUT_TOKEN_LIMIT,
} from './tokenLimits.js';
describe('normalize', () => {
it('should lowercase and trim the model string', () => {
@@ -225,3 +230,96 @@ describe('tokenLimit', () => {
expect(tokenLimit('CLAUDE-3.5-SONNET')).toBe(200000);
});
});
describe('tokenLimit with output type', () => {
describe('Qwen models with output limits', () => {
it('should return the correct output limit for qwen3-coder-plus', () => {
expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536);
expect(tokenLimit('qwen3-coder-plus-20250601', 'output')).toBe(65536);
});
it('should return the correct output limit for qwen-vl-max-latest', () => {
expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192);
});
});
describe('Default output limits', () => {
it('should return the default output limit for unknown models', () => {
expect(tokenLimit('unknown-model', 'output')).toBe(
DEFAULT_OUTPUT_TOKEN_LIMIT,
);
expect(tokenLimit('gpt-4', 'output')).toBe(DEFAULT_OUTPUT_TOKEN_LIMIT);
expect(tokenLimit('claude-3.5-sonnet', 'output')).toBe(
DEFAULT_OUTPUT_TOKEN_LIMIT,
);
});
it('should return the default output limit for models without specific output patterns', () => {
expect(tokenLimit('qwen3-coder-7b', 'output')).toBe(
DEFAULT_OUTPUT_TOKEN_LIMIT,
);
expect(tokenLimit('qwen-plus', 'output')).toBe(
DEFAULT_OUTPUT_TOKEN_LIMIT,
);
expect(tokenLimit('qwen-vl-max', 'output')).toBe(
DEFAULT_OUTPUT_TOKEN_LIMIT,
);
});
});
describe('Input vs Output limits comparison', () => {
it('should return different limits for input vs output for qwen3-coder-plus', () => {
expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1048576); // 1M input
expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536); // 64K output
});
it('should return different limits for input vs output for qwen-vl-max-latest', () => {
expect(tokenLimit('qwen-vl-max-latest', 'input')).toBe(131072); // 128K input
expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192); // 8K output
});
it('should return same default limits for unknown models', () => {
expect(tokenLimit('unknown-model', 'input')).toBe(DEFAULT_TOKEN_LIMIT); // 128K input
expect(tokenLimit('unknown-model', 'output')).toBe(
DEFAULT_OUTPUT_TOKEN_LIMIT,
); // 4K output
});
});
describe('Backward compatibility', () => {
it('should default to input type when no type is specified', () => {
expect(tokenLimit('qwen3-coder-plus')).toBe(1048576); // Should be input limit
expect(tokenLimit('qwen-vl-max-latest')).toBe(131072); // Should be input limit
expect(tokenLimit('unknown-model')).toBe(DEFAULT_TOKEN_LIMIT); // Should be input default
});
it('should work with explicit input type', () => {
expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1048576);
expect(tokenLimit('qwen-vl-max-latest', 'input')).toBe(131072);
expect(tokenLimit('unknown-model', 'input')).toBe(DEFAULT_TOKEN_LIMIT);
});
});
describe('Model normalization with output limits', () => {
it('should handle normalized model names for output limits', () => {
expect(tokenLimit('QWEN3-CODER-PLUS', 'output')).toBe(65536);
expect(tokenLimit('qwen3-coder-plus-20250601', 'output')).toBe(65536);
expect(tokenLimit('QWEN-VL-MAX-LATEST', 'output')).toBe(8192);
});
it('should handle complex model strings for output limits', () => {
expect(
tokenLimit(
' a/b/c|QWEN3-CODER-PLUS:qwen3-coder-plus-2024-05-13 ',
'output',
),
).toBe(65536);
expect(
tokenLimit(
'provider/qwen-vl-max-latest:qwen-vl-max-latest-v1',
'output',
),
).toBe(8192);
});
});
});

View File

@@ -1,7 +1,15 @@
type Model = string;
type TokenCount = number;
/**
* Token limit types for different use cases.
* - 'input': Maximum input context window size
* - 'output': Maximum output tokens that can be generated in a single response
*/
export type TokenLimitType = 'input' | 'output';
export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 4_096; // 4K tokens
/**
* Accurate numeric limits:
@@ -18,6 +26,10 @@ const LIMITS = {
'1m': 1_048_576,
'2m': 2_097_152,
'10m': 10_485_760, // 10 million tokens
// Output token limits (typically much smaller than input limits)
'4k': 4_096,
'8k': 8_192,
'16k': 16_384,
} as const;
/** Robust normalizer: strips provider prefixes, pipes/colons, date/version suffixes, etc. */
@@ -36,7 +48,7 @@ export function normalize(model: string): string {
// - dates (e.g., -20250219), -v1, version numbers, 'latest', 'preview' etc.
s = s.replace(/-preview/g, '');
// Special handling for Qwen model names that include "-latest" as part of the model name
if (!s.match(/^qwen-(?:plus|flash)-latest$/)) {
if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) {
// \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates)
// \d+x\d+b - Match patterns like 4x8b, -7b, -70b
// v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
@@ -142,16 +154,48 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
[/^mistral-large-2.*$/, LIMITS['128k']],
];
/** Return the token limit for a model string (uses normalize + ordered regex list). */
export function tokenLimit(model: Model): TokenCount {
/**
* Output token limit patterns for specific model families.
* These patterns define the maximum number of tokens that can be generated
* in a single response for specific models.
*/
const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
// -------------------
// Alibaba / Qwen - DashScope Models
// -------------------
// Qwen3-Coder-Plus: 65,536 max output tokens
[/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']],
// Qwen-VL-Max-Latest: 8,192 max output tokens
[/^qwen-vl-max-latest$/, LIMITS['8k']],
];
/**
* Return the token limit for a model string based on the specified type.
*
* This function determines the maximum number of tokens for either input context
* or output generation based on the model and token type. It uses the same
* normalization logic for consistency across both input and output limits.
*
* @param model - The model name to get the token limit for
* @param type - The type of token limit ('input' for context window, 'output' for generation)
* @returns The maximum number of tokens allowed for this model and type
*/
export function tokenLimit(
model: Model,
type: TokenLimitType = 'input',
): TokenCount {
const norm = normalize(model);
for (const [regex, limit] of PATTERNS) {
// Choose the appropriate patterns based on token type
const patterns = type === 'output' ? OUTPUT_PATTERNS : PATTERNS;
for (const [regex, limit] of patterns) {
if (regex.test(norm)) {
return limit;
}
}
// final fallback: DEFAULT_TOKEN_LIMIT (power-of-two 128K)
return DEFAULT_TOKEN_LIMIT;
// Return appropriate default based on token type
return type === 'output' ? DEFAULT_OUTPUT_TOKEN_LIMIT : DEFAULT_TOKEN_LIMIT;
}

View File

@@ -712,8 +712,6 @@ async function authWithQwenDeviceFlow(
`Polling... (attempt ${attempt + 1}/${maxAttempts})`,
);
process.stdout.write('.');
// Wait with cancellation check every 100ms
await new Promise<void>((resolve) => {
const checkInterval = 100; // Check every 100ms

View File

@@ -901,5 +901,37 @@ describe('SharedTokenManager', () => {
);
}
});
it('should properly clean up timeout when file operation completes before timeout', async () => {
const tokenManager = SharedTokenManager.getInstance();
tokenManager.clearCache();
const mockClient = {
getCredentials: vi.fn().mockReturnValue(null),
setCredentials: vi.fn(),
getAccessToken: vi.fn(),
requestDeviceAuthorization: vi.fn(),
pollDeviceToken: vi.fn(),
refreshAccessToken: vi.fn(),
};
// Mock clearTimeout to verify it's called
const clearTimeoutSpy = vi.spyOn(global, 'clearTimeout');
// Mock file stat to resolve quickly (before timeout)
mockFs.stat.mockResolvedValue({ mtimeMs: 12345 } as Stats);
// Call checkAndReloadIfNeeded which uses withTimeout internally
const checkMethod = getPrivateProperty(
tokenManager,
'checkAndReloadIfNeeded',
) as (client?: IQwenOAuth2Client) => Promise<void>;
await checkMethod.call(tokenManager, mockClient);
// Verify that clearTimeout was called to clean up the timer
expect(clearTimeoutSpy).toHaveBeenCalled();
clearTimeoutSpy.mockRestore();
});
});
});

View File

@@ -290,6 +290,36 @@ export class SharedTokenManager {
}
}
/**
* Utility method to add timeout to any promise operation
* Properly cleans up the timeout when the promise completes
*/
private withTimeout<T>(
promise: Promise<T>,
timeoutMs: number,
operationType = 'Operation',
): Promise<T> {
let timeoutId: NodeJS.Timeout;
return Promise.race([
promise.finally(() => {
// Clear timeout when main promise completes (success or failure)
if (timeoutId) {
clearTimeout(timeoutId);
}
}),
new Promise<never>((_, reject) => {
timeoutId = setTimeout(
() =>
reject(
new Error(`${operationType} timed out after ${timeoutMs}ms`),
),
timeoutMs,
);
}),
]);
}
/**
* Perform the actual file check and reload operation
* This is separated to enable proper promise-based synchronization
@@ -303,25 +333,12 @@ export class SharedTokenManager {
try {
const filePath = this.getCredentialFilePath();
// Add timeout to file stat operation
const withTimeout = async <T>(
promise: Promise<T>,
timeoutMs: number,
): Promise<T> =>
Promise.race([
promise,
new Promise<never>((_, reject) =>
setTimeout(
() =>
reject(
new Error(`File operation timed out after ${timeoutMs}ms`),
),
timeoutMs,
),
),
]);
const stats = await withTimeout(fs.stat(filePath), 3000);
const stats = await this.withTimeout(
fs.stat(filePath),
3000,
'File operation',
);
const fileModTime = stats.mtimeMs;
// Reload credentials if file has been modified since last cache
@@ -451,7 +468,7 @@ export class SharedTokenManager {
// Check if we have a refresh token before attempting refresh
const currentCredentials = qwenClient.getCredentials();
if (!currentCredentials.refresh_token) {
console.debug('create a NO_REFRESH_TOKEN error');
// console.debug('create a NO_REFRESH_TOKEN error');
throw new TokenManagerError(
TokenError.NO_REFRESH_TOKEN,
'No refresh token available for token refresh',
@@ -589,26 +606,12 @@ export class SharedTokenManager {
const dirPath = path.dirname(filePath);
const tempPath = `${filePath}.tmp.${randomUUID()}`;
// Add timeout wrapper for file operations
const withTimeout = async <T>(
promise: Promise<T>,
timeoutMs: number,
): Promise<T> =>
Promise.race([
promise,
new Promise<never>((_, reject) =>
setTimeout(
() => reject(new Error(`Operation timed out after ${timeoutMs}ms`)),
timeoutMs,
),
),
]);
// Create directory with restricted permissions
try {
await withTimeout(
await this.withTimeout(
fs.mkdir(dirPath, { recursive: true, mode: 0o700 }),
5000,
'File operation',
);
} catch (error) {
throw new TokenManagerError(
@@ -622,21 +625,30 @@ export class SharedTokenManager {
try {
// Write to temporary file first with restricted permissions
await withTimeout(
await this.withTimeout(
fs.writeFile(tempPath, credString, { mode: 0o600 }),
5000,
'File operation',
);
// Atomic move to final location
await withTimeout(fs.rename(tempPath, filePath), 5000);
await this.withTimeout(
fs.rename(tempPath, filePath),
5000,
'File operation',
);
// Update cached file modification time atomically after successful write
const stats = await withTimeout(fs.stat(filePath), 5000);
const stats = await this.withTimeout(
fs.stat(filePath),
5000,
'File operation',
);
this.memoryCache.fileModTime = stats.mtimeMs;
} catch (error) {
// Clean up temp file if it exists
try {
await withTimeout(fs.unlink(tempPath), 1000);
await this.withTimeout(fs.unlink(tempPath), 1000, 'File operation');
} catch (_cleanupError) {
// Ignore cleanup errors - temp file might not exist
}

View File

@@ -8,7 +8,6 @@ import fs from 'node:fs';
import path from 'node:path';
import { EOL } from 'node:os';
import { spawn } from 'node:child_process';
import { rgPath } from '@lvce-editor/ripgrep';
import type { ToolInvocation, ToolResult } from './tools.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { SchemaValidator } from '../utils/schemaValidator.js';
@@ -18,6 +17,14 @@ import type { Config } from '../config/config.js';
const DEFAULT_TOTAL_MAX_MATCHES = 20000;
/**
* Lazy loads the ripgrep binary path to avoid loading the library until needed
*/
async function getRipgrepPath(): Promise<string> {
const { rgPath } = await import('@lvce-editor/ripgrep');
return rgPath;
}
/**
* Parameters for the GrepTool
*/
@@ -292,8 +299,9 @@ class GrepToolInvocation extends BaseToolInvocation<
rgArgs.push(absolutePath);
try {
const ripgrepPath = await getRipgrepPath();
const output = await new Promise<string>((resolve, reject) => {
const child = spawn(rgPath, rgArgs, {
const child = spawn(ripgrepPath, rgArgs, {
windowsHide: true,
});

View File

@@ -1,6 +1,6 @@
{
"name": "@qwen-code/qwen-code-test-utils",
"version": "0.0.11",
"version": "0.1.0-preview",
"private": true,
"main": "src/index.ts",
"license": "Apache-2.0",

View File

@@ -2,7 +2,7 @@
"name": "qwen-code-vscode-ide-companion",
"displayName": "Qwen Code Companion",
"description": "Enable Qwen Code with direct access to your VS Code workspace.",
"version": "0.0.11",
"version": "0.1.0-preview",
"publisher": "qwenlm",
"icon": "assets/icon.png",
"repository": {