mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-25 11:09:13 +00:00
Compare commits
4 Commits
fix/openai
...
fix/max-to
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
af03eaa57f | ||
|
|
3579d6555a | ||
|
|
9a56560eb4 | ||
|
|
da0863b943 |
20
CHANGELOG.md
20
CHANGELOG.md
@@ -1,5 +1,25 @@
|
||||
# Changelog
|
||||
|
||||
## 0.0.12
|
||||
|
||||
- Added vision model support for Qwen-OAuth authentication.
|
||||
- Synced upstream `gemini-cli` to v0.3.4 with numerous improvements and bug fixes.
|
||||
- Enhanced subagent functionality with system reminders and improved user experience.
|
||||
- Added tool call type coercion for better compatibility.
|
||||
- Fixed arrow key navigation issues on Windows.
|
||||
- Fixed missing tool call chunks for OpenAI logging.
|
||||
- Fixed system prompt issues to avoid malformed tool calls.
|
||||
- Fixed terminal flicker when subagent is executing.
|
||||
- Fixed duplicate subagents configuration when running in home directory.
|
||||
- Fixed Esc key unable to cancel subagent dialog.
|
||||
- Added confirmation prompt for `/init` command when context file exists.
|
||||
- Added `skipLoopDetection` configuration option.
|
||||
- Fixed `is_background` parameter reset issues.
|
||||
- Enhanced Windows compatibility with multi-line paste handling.
|
||||
- Improved subagent documentation and branding consistency.
|
||||
- Fixed various linting errors and improved code quality.
|
||||
- Miscellaneous improvements and bug fixes.
|
||||
|
||||
## 0.0.11
|
||||
|
||||
- Added subagents feature with file-based configuration system for specialized AI assistants.
|
||||
|
||||
12
package-lock.json
generated
12
package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@qwen-code/qwen-code",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@qwen-code/qwen-code",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"workspaces": [
|
||||
"packages/*"
|
||||
],
|
||||
@@ -13454,7 +13454,7 @@
|
||||
},
|
||||
"packages/cli": {
|
||||
"name": "@qwen-code/qwen-code",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"dependencies": {
|
||||
"@google/genai": "1.9.0",
|
||||
"@iarna/toml": "^2.2.5",
|
||||
@@ -13662,7 +13662,7 @@
|
||||
},
|
||||
"packages/core": {
|
||||
"name": "@qwen-code/qwen-code-core",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"dependencies": {
|
||||
"@google/genai": "1.13.0",
|
||||
"@lvce-editor/ripgrep": "^1.6.0",
|
||||
@@ -13788,7 +13788,7 @@
|
||||
},
|
||||
"packages/test-utils": {
|
||||
"name": "@qwen-code/qwen-code-test-utils",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"devDependencies": {
|
||||
@@ -13800,7 +13800,7 @@
|
||||
},
|
||||
"packages/vscode-ide-companion": {
|
||||
"name": "qwen-code-vscode-ide-companion",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"license": "LICENSE",
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.15.1",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@qwen-code/qwen-code",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
},
|
||||
@@ -13,7 +13,7 @@
|
||||
"url": "git+https://github.com/QwenLM/qwen-code.git"
|
||||
},
|
||||
"config": {
|
||||
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.11"
|
||||
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.12"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "node scripts/start.js",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@qwen-code/qwen-code",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"description": "Qwen Code",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@@ -25,7 +25,7 @@
|
||||
"dist"
|
||||
],
|
||||
"config": {
|
||||
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.11"
|
||||
"sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.12"
|
||||
},
|
||||
"dependencies": {
|
||||
"@google/genai": "1.9.0",
|
||||
|
||||
@@ -526,7 +526,7 @@ describe('KeypressContext - Kitty Protocol', () => {
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(keyHandler).toHaveBeenCalledTimes(2); // 1 paste event + 1 paste event for 'after'
|
||||
expect(keyHandler).toHaveBeenCalledTimes(6); // 1 paste event + 5 individual chars for 'after'
|
||||
});
|
||||
|
||||
// Should emit paste event first
|
||||
@@ -538,12 +538,40 @@ describe('KeypressContext - Kitty Protocol', () => {
|
||||
}),
|
||||
);
|
||||
|
||||
// Then process 'after' as a paste event (since it's > 2 chars)
|
||||
// Then process 'after' as individual characters (since it doesn't contain return)
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
paste: true,
|
||||
sequence: 'after',
|
||||
name: 'a',
|
||||
paste: false,
|
||||
}),
|
||||
);
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
expect.objectContaining({
|
||||
name: 'f',
|
||||
paste: false,
|
||||
}),
|
||||
);
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
4,
|
||||
expect.objectContaining({
|
||||
name: 't',
|
||||
paste: false,
|
||||
}),
|
||||
);
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
5,
|
||||
expect.objectContaining({
|
||||
name: 'e',
|
||||
paste: false,
|
||||
}),
|
||||
);
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
6,
|
||||
expect.objectContaining({
|
||||
name: 'r',
|
||||
paste: false,
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -571,7 +599,7 @@ describe('KeypressContext - Kitty Protocol', () => {
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(keyHandler).toHaveBeenCalledTimes(14); // Adjusted based on actual behavior
|
||||
expect(keyHandler).toHaveBeenCalledTimes(16); // 5 + 1 + 6 + 1 + 3 = 16 calls
|
||||
});
|
||||
|
||||
// Check the sequence: 'start' (5 chars) + paste1 + 'middle' (6 chars) + paste2 + 'end' (3 chars as paste)
|
||||
@@ -643,13 +671,18 @@ describe('KeypressContext - Kitty Protocol', () => {
|
||||
}),
|
||||
);
|
||||
|
||||
// 'end' as paste event (since it's > 2 chars)
|
||||
// 'end' as individual characters (since it doesn't contain return)
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
callIndex++,
|
||||
expect.objectContaining({
|
||||
paste: true,
|
||||
sequence: 'end',
|
||||
}),
|
||||
expect.objectContaining({ name: 'e' }),
|
||||
);
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
callIndex++,
|
||||
expect.objectContaining({ name: 'n' }),
|
||||
);
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
callIndex++,
|
||||
expect.objectContaining({ name: 'd' }),
|
||||
);
|
||||
});
|
||||
|
||||
@@ -738,16 +771,18 @@ describe('KeypressContext - Kitty Protocol', () => {
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
// With the current implementation, fragmented data gets processed differently
|
||||
// The first fragment '\x1b[20' gets processed as individual characters
|
||||
// The second fragment '0~content\x1b[2' gets processed as paste + individual chars
|
||||
// The third fragment '01~' gets processed as individual characters
|
||||
expect(keyHandler).toHaveBeenCalled();
|
||||
// With the current implementation, fragmented paste markers get reconstructed
|
||||
// into a single paste event for 'content'
|
||||
expect(keyHandler).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
// The current implementation processes fragmented paste markers as separate events
|
||||
// rather than reconstructing them into a single paste event
|
||||
expect(keyHandler.mock.calls.length).toBeGreaterThan(1);
|
||||
// Should reconstruct the fragmented paste markers into a single paste event
|
||||
expect(keyHandler).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
paste: true,
|
||||
sequence: 'content',
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -851,19 +886,38 @@ describe('KeypressContext - Kitty Protocol', () => {
|
||||
stdin.emit('data', Buffer.from('lo'));
|
||||
});
|
||||
|
||||
// With the current implementation, data is processed as it arrives
|
||||
// First chunk 'hel' is treated as paste (multi-character)
|
||||
// With the current implementation, data is processed as individual characters
|
||||
// since 'hel' doesn't contain return (0x0d)
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({
|
||||
paste: true,
|
||||
sequence: 'hel',
|
||||
name: 'h',
|
||||
sequence: 'h',
|
||||
paste: false,
|
||||
}),
|
||||
);
|
||||
|
||||
// Second chunk 'lo' is processed as individual characters
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
name: 'e',
|
||||
sequence: 'e',
|
||||
paste: false,
|
||||
}),
|
||||
);
|
||||
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
expect.objectContaining({
|
||||
name: 'l',
|
||||
sequence: 'l',
|
||||
paste: false,
|
||||
}),
|
||||
);
|
||||
|
||||
// Second chunk 'lo' is also processed as individual characters
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
4,
|
||||
expect.objectContaining({
|
||||
name: 'l',
|
||||
sequence: 'l',
|
||||
@@ -872,7 +926,7 @@ describe('KeypressContext - Kitty Protocol', () => {
|
||||
);
|
||||
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
5,
|
||||
expect.objectContaining({
|
||||
name: 'o',
|
||||
sequence: 'o',
|
||||
@@ -880,7 +934,7 @@ describe('KeypressContext - Kitty Protocol', () => {
|
||||
}),
|
||||
);
|
||||
|
||||
expect(keyHandler).toHaveBeenCalledTimes(3);
|
||||
expect(keyHandler).toHaveBeenCalledTimes(5);
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
@@ -907,14 +961,20 @@ describe('KeypressContext - Kitty Protocol', () => {
|
||||
});
|
||||
|
||||
// Should flush immediately without waiting for timeout
|
||||
// Large data gets treated as paste event
|
||||
expect(keyHandler).toHaveBeenCalledTimes(1);
|
||||
expect(keyHandler).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
paste: true,
|
||||
sequence: largeData,
|
||||
}),
|
||||
);
|
||||
// Large data without return gets treated as individual characters
|
||||
expect(keyHandler).toHaveBeenCalledTimes(65);
|
||||
|
||||
// Each character should be processed individually
|
||||
for (let i = 0; i < 65; i++) {
|
||||
expect(keyHandler).toHaveBeenNthCalledWith(
|
||||
i + 1,
|
||||
expect.objectContaining({
|
||||
name: 'x',
|
||||
sequence: 'x',
|
||||
paste: false,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
// Advancing timer should not cause additional calls
|
||||
const callCountBefore = keyHandler.mock.calls.length;
|
||||
|
||||
@@ -407,7 +407,11 @@ export function KeypressProvider({
|
||||
return;
|
||||
}
|
||||
|
||||
if (rawDataBuffer.length <= 2 || isPaste) {
|
||||
if (
|
||||
(rawDataBuffer.length <= 2 && rawDataBuffer.includes(0x0d)) ||
|
||||
!rawDataBuffer.includes(0x0d) ||
|
||||
isPaste
|
||||
) {
|
||||
keypressStream.write(rawDataBuffer);
|
||||
} else {
|
||||
// Flush raw data buffer as a paste event
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@qwen-code/qwen-code-core",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"description": "Qwen Code Core",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
||||
@@ -560,4 +560,146 @@ describe('DashScopeOpenAICompatibleProvider', () => {
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('output token limits', () => {
|
||||
it('should limit max_tokens when it exceeds model limit for qwen3-coder-plus', () => {
|
||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||
model: 'qwen3-coder-plus',
|
||||
messages: [{ role: 'user', content: 'Hello' }],
|
||||
max_tokens: 100000, // Exceeds the 65536 limit
|
||||
};
|
||||
|
||||
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||
|
||||
expect(result.max_tokens).toBe(65536); // Should be limited to model's output limit
|
||||
});
|
||||
|
||||
it('should limit max_tokens when it exceeds model limit for qwen-vl-max-latest', () => {
|
||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||
model: 'qwen-vl-max-latest',
|
||||
messages: [{ role: 'user', content: 'Hello' }],
|
||||
max_tokens: 20000, // Exceeds the 8192 limit
|
||||
};
|
||||
|
||||
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||
|
||||
expect(result.max_tokens).toBe(8192); // Should be limited to model's output limit
|
||||
});
|
||||
|
||||
it('should not modify max_tokens when it is within model limit', () => {
|
||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||
model: 'qwen3-coder-plus',
|
||||
messages: [{ role: 'user', content: 'Hello' }],
|
||||
max_tokens: 1000, // Within the 65536 limit
|
||||
};
|
||||
|
||||
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||
|
||||
expect(result.max_tokens).toBe(1000); // Should remain unchanged
|
||||
});
|
||||
|
||||
it('should not add max_tokens when not present in request', () => {
|
||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||
model: 'qwen3-coder-plus',
|
||||
messages: [{ role: 'user', content: 'Hello' }],
|
||||
// No max_tokens parameter
|
||||
};
|
||||
|
||||
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||
|
||||
expect(result.max_tokens).toBeUndefined(); // Should remain undefined
|
||||
});
|
||||
|
||||
it('should handle null max_tokens parameter', () => {
|
||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||
model: 'qwen3-coder-plus',
|
||||
messages: [{ role: 'user', content: 'Hello' }],
|
||||
max_tokens: null,
|
||||
};
|
||||
|
||||
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||
|
||||
expect(result.max_tokens).toBeNull(); // Should remain null
|
||||
});
|
||||
|
||||
it('should use default output limit for unknown models', () => {
|
||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||
model: 'unknown-model',
|
||||
messages: [{ role: 'user', content: 'Hello' }],
|
||||
max_tokens: 10000, // Exceeds the default 4096 limit
|
||||
};
|
||||
|
||||
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||
|
||||
expect(result.max_tokens).toBe(4096); // Should be limited to default output limit
|
||||
});
|
||||
|
||||
it('should preserve other request parameters when limiting max_tokens', () => {
|
||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||
model: 'qwen3-coder-plus',
|
||||
messages: [{ role: 'user', content: 'Hello' }],
|
||||
max_tokens: 100000, // Will be limited
|
||||
temperature: 0.8,
|
||||
top_p: 0.9,
|
||||
frequency_penalty: 0.1,
|
||||
presence_penalty: 0.2,
|
||||
stop: ['END'],
|
||||
user: 'test-user',
|
||||
};
|
||||
|
||||
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||
|
||||
// max_tokens should be limited
|
||||
expect(result.max_tokens).toBe(65536);
|
||||
|
||||
// Other parameters should be preserved
|
||||
expect(result.temperature).toBe(0.8);
|
||||
expect(result.top_p).toBe(0.9);
|
||||
expect(result.frequency_penalty).toBe(0.1);
|
||||
expect(result.presence_penalty).toBe(0.2);
|
||||
expect(result.stop).toEqual(['END']);
|
||||
expect(result.user).toBe('test-user');
|
||||
});
|
||||
|
||||
it('should work with vision models and output token limits', () => {
|
||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||
model: 'qwen-vl-max-latest',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: 'Look at this image:' },
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: { url: 'https://example.com/image.jpg' },
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens: 20000, // Exceeds the 8192 limit
|
||||
};
|
||||
|
||||
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||
|
||||
expect(result.max_tokens).toBe(8192); // Should be limited
|
||||
expect(
|
||||
(result as { vl_high_resolution_images?: boolean })
|
||||
.vl_high_resolution_images,
|
||||
).toBe(true); // Vision-specific parameter should be preserved
|
||||
});
|
||||
|
||||
it('should handle streaming requests with output token limits', () => {
|
||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||
model: 'qwen3-coder-plus',
|
||||
messages: [{ role: 'user', content: 'Hello' }],
|
||||
max_tokens: 100000, // Exceeds the 65536 limit
|
||||
stream: true,
|
||||
};
|
||||
|
||||
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||
|
||||
expect(result.max_tokens).toBe(65536); // Should be limited
|
||||
expect(result.stream).toBe(true); // Streaming should be preserved
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -3,6 +3,7 @@ import type { Config } from '../../../config/config.js';
|
||||
import type { ContentGeneratorConfig } from '../../contentGenerator.js';
|
||||
import { AuthType } from '../../contentGenerator.js';
|
||||
import { DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES } from '../constants.js';
|
||||
import { tokenLimit } from '../../tokenLimits.js';
|
||||
import type {
|
||||
OpenAICompatibleProvider,
|
||||
DashScopeRequestMetadata,
|
||||
@@ -65,6 +66,19 @@ export class DashScopeOpenAICompatibleProvider
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build and configure the request for DashScope API.
|
||||
*
|
||||
* This method applies DashScope-specific configurations including:
|
||||
* - Cache control for system and user messages
|
||||
* - Output token limits based on model capabilities
|
||||
* - Vision model specific parameters (vl_high_resolution_images)
|
||||
* - Request metadata for session tracking
|
||||
*
|
||||
* @param request - The original chat completion request parameters
|
||||
* @param userPromptId - Unique identifier for the user prompt for session tracking
|
||||
* @returns Configured request with DashScope-specific parameters applied
|
||||
*/
|
||||
buildRequest(
|
||||
request: OpenAI.Chat.ChatCompletionCreateParams,
|
||||
userPromptId: string,
|
||||
@@ -79,21 +93,28 @@ export class DashScopeOpenAICompatibleProvider
|
||||
messages = this.addDashScopeCacheControl(messages, cacheTarget);
|
||||
}
|
||||
|
||||
// Apply output token limits based on model capabilities
|
||||
// This ensures max_tokens doesn't exceed the model's maximum output limit
|
||||
const requestWithTokenLimits = this.applyOutputTokenLimit(
|
||||
request,
|
||||
request.model,
|
||||
);
|
||||
|
||||
if (request.model.startsWith('qwen-vl')) {
|
||||
return {
|
||||
...request,
|
||||
...requestWithTokenLimits,
|
||||
messages,
|
||||
...(this.buildMetadata(userPromptId) || {}),
|
||||
/* @ts-expect-error dashscope exclusive */
|
||||
vl_high_resolution_images: true,
|
||||
};
|
||||
} as OpenAI.Chat.ChatCompletionCreateParams;
|
||||
}
|
||||
|
||||
return {
|
||||
...request, // Preserve all original parameters including sampling params
|
||||
...requestWithTokenLimits, // Preserve all original parameters including sampling params and adjusted max_tokens
|
||||
messages,
|
||||
...(this.buildMetadata(userPromptId) || {}),
|
||||
};
|
||||
} as OpenAI.Chat.ChatCompletionCreateParams;
|
||||
}
|
||||
|
||||
buildMetadata(userPromptId: string): DashScopeRequestMetadata {
|
||||
@@ -246,6 +267,41 @@ export class DashScopeOpenAICompatibleProvider
|
||||
return contentArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply output token limit to a request's max_tokens parameter.
|
||||
*
|
||||
* Ensures that existing max_tokens parameters don't exceed the model's maximum output
|
||||
* token limit. Only modifies max_tokens when already present in the request.
|
||||
*
|
||||
* @param request - The chat completion request parameters
|
||||
* @param model - The model name to get the output token limit for
|
||||
* @returns The request with max_tokens adjusted to respect the model's limits (if present)
|
||||
*/
|
||||
private applyOutputTokenLimit<T extends { max_tokens?: number | null }>(
|
||||
request: T,
|
||||
model: string,
|
||||
): T {
|
||||
const currentMaxTokens = request.max_tokens;
|
||||
|
||||
// Only process if max_tokens is already present in the request
|
||||
if (currentMaxTokens === undefined || currentMaxTokens === null) {
|
||||
return request; // No max_tokens parameter, return unchanged
|
||||
}
|
||||
|
||||
const modelLimit = tokenLimit(model, 'output');
|
||||
|
||||
// If max_tokens exceeds the model limit, cap it to the model's limit
|
||||
if (currentMaxTokens > modelLimit) {
|
||||
return {
|
||||
...request,
|
||||
max_tokens: modelLimit,
|
||||
};
|
||||
}
|
||||
|
||||
// If max_tokens is within the limit, return the request unchanged
|
||||
return request;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if cache control should be disabled based on configuration.
|
||||
*
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { normalize, tokenLimit, DEFAULT_TOKEN_LIMIT } from './tokenLimits.js';
|
||||
import {
|
||||
normalize,
|
||||
tokenLimit,
|
||||
DEFAULT_TOKEN_LIMIT,
|
||||
DEFAULT_OUTPUT_TOKEN_LIMIT,
|
||||
} from './tokenLimits.js';
|
||||
|
||||
describe('normalize', () => {
|
||||
it('should lowercase and trim the model string', () => {
|
||||
@@ -225,3 +230,96 @@ describe('tokenLimit', () => {
|
||||
expect(tokenLimit('CLAUDE-3.5-SONNET')).toBe(200000);
|
||||
});
|
||||
});
|
||||
|
||||
describe('tokenLimit with output type', () => {
|
||||
describe('Qwen models with output limits', () => {
|
||||
it('should return the correct output limit for qwen3-coder-plus', () => {
|
||||
expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536);
|
||||
expect(tokenLimit('qwen3-coder-plus-20250601', 'output')).toBe(65536);
|
||||
});
|
||||
|
||||
it('should return the correct output limit for qwen-vl-max-latest', () => {
|
||||
expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Default output limits', () => {
|
||||
it('should return the default output limit for unknown models', () => {
|
||||
expect(tokenLimit('unknown-model', 'output')).toBe(
|
||||
DEFAULT_OUTPUT_TOKEN_LIMIT,
|
||||
);
|
||||
expect(tokenLimit('gpt-4', 'output')).toBe(DEFAULT_OUTPUT_TOKEN_LIMIT);
|
||||
expect(tokenLimit('claude-3.5-sonnet', 'output')).toBe(
|
||||
DEFAULT_OUTPUT_TOKEN_LIMIT,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return the default output limit for models without specific output patterns', () => {
|
||||
expect(tokenLimit('qwen3-coder-7b', 'output')).toBe(
|
||||
DEFAULT_OUTPUT_TOKEN_LIMIT,
|
||||
);
|
||||
expect(tokenLimit('qwen-plus', 'output')).toBe(
|
||||
DEFAULT_OUTPUT_TOKEN_LIMIT,
|
||||
);
|
||||
expect(tokenLimit('qwen-vl-max', 'output')).toBe(
|
||||
DEFAULT_OUTPUT_TOKEN_LIMIT,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Input vs Output limits comparison', () => {
|
||||
it('should return different limits for input vs output for qwen3-coder-plus', () => {
|
||||
expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1048576); // 1M input
|
||||
expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536); // 64K output
|
||||
});
|
||||
|
||||
it('should return different limits for input vs output for qwen-vl-max-latest', () => {
|
||||
expect(tokenLimit('qwen-vl-max-latest', 'input')).toBe(131072); // 128K input
|
||||
expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192); // 8K output
|
||||
});
|
||||
|
||||
it('should return same default limits for unknown models', () => {
|
||||
expect(tokenLimit('unknown-model', 'input')).toBe(DEFAULT_TOKEN_LIMIT); // 128K input
|
||||
expect(tokenLimit('unknown-model', 'output')).toBe(
|
||||
DEFAULT_OUTPUT_TOKEN_LIMIT,
|
||||
); // 4K output
|
||||
});
|
||||
});
|
||||
|
||||
describe('Backward compatibility', () => {
|
||||
it('should default to input type when no type is specified', () => {
|
||||
expect(tokenLimit('qwen3-coder-plus')).toBe(1048576); // Should be input limit
|
||||
expect(tokenLimit('qwen-vl-max-latest')).toBe(131072); // Should be input limit
|
||||
expect(tokenLimit('unknown-model')).toBe(DEFAULT_TOKEN_LIMIT); // Should be input default
|
||||
});
|
||||
|
||||
it('should work with explicit input type', () => {
|
||||
expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1048576);
|
||||
expect(tokenLimit('qwen-vl-max-latest', 'input')).toBe(131072);
|
||||
expect(tokenLimit('unknown-model', 'input')).toBe(DEFAULT_TOKEN_LIMIT);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Model normalization with output limits', () => {
|
||||
it('should handle normalized model names for output limits', () => {
|
||||
expect(tokenLimit('QWEN3-CODER-PLUS', 'output')).toBe(65536);
|
||||
expect(tokenLimit('qwen3-coder-plus-20250601', 'output')).toBe(65536);
|
||||
expect(tokenLimit('QWEN-VL-MAX-LATEST', 'output')).toBe(8192);
|
||||
});
|
||||
|
||||
it('should handle complex model strings for output limits', () => {
|
||||
expect(
|
||||
tokenLimit(
|
||||
' a/b/c|QWEN3-CODER-PLUS:qwen3-coder-plus-2024-05-13 ',
|
||||
'output',
|
||||
),
|
||||
).toBe(65536);
|
||||
expect(
|
||||
tokenLimit(
|
||||
'provider/qwen-vl-max-latest:qwen-vl-max-latest-v1',
|
||||
'output',
|
||||
),
|
||||
).toBe(8192);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,7 +1,15 @@
|
||||
type Model = string;
|
||||
type TokenCount = number;
|
||||
|
||||
/**
|
||||
* Token limit types for different use cases.
|
||||
* - 'input': Maximum input context window size
|
||||
* - 'output': Maximum output tokens that can be generated in a single response
|
||||
*/
|
||||
export type TokenLimitType = 'input' | 'output';
|
||||
|
||||
export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
|
||||
export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 4_096; // 4K tokens
|
||||
|
||||
/**
|
||||
* Accurate numeric limits:
|
||||
@@ -18,6 +26,10 @@ const LIMITS = {
|
||||
'1m': 1_048_576,
|
||||
'2m': 2_097_152,
|
||||
'10m': 10_485_760, // 10 million tokens
|
||||
// Output token limits (typically much smaller than input limits)
|
||||
'4k': 4_096,
|
||||
'8k': 8_192,
|
||||
'16k': 16_384,
|
||||
} as const;
|
||||
|
||||
/** Robust normalizer: strips provider prefixes, pipes/colons, date/version suffixes, etc. */
|
||||
@@ -36,7 +48,7 @@ export function normalize(model: string): string {
|
||||
// - dates (e.g., -20250219), -v1, version numbers, 'latest', 'preview' etc.
|
||||
s = s.replace(/-preview/g, '');
|
||||
// Special handling for Qwen model names that include "-latest" as part of the model name
|
||||
if (!s.match(/^qwen-(?:plus|flash)-latest$/)) {
|
||||
if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) {
|
||||
// \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates)
|
||||
// \d+x\d+b - Match patterns like 4x8b, -7b, -70b
|
||||
// v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
|
||||
@@ -142,16 +154,48 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
|
||||
[/^mistral-large-2.*$/, LIMITS['128k']],
|
||||
];
|
||||
|
||||
/** Return the token limit for a model string (uses normalize + ordered regex list). */
|
||||
export function tokenLimit(model: Model): TokenCount {
|
||||
/**
|
||||
* Output token limit patterns for specific model families.
|
||||
* These patterns define the maximum number of tokens that can be generated
|
||||
* in a single response for specific models.
|
||||
*/
|
||||
const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
|
||||
// -------------------
|
||||
// Alibaba / Qwen - DashScope Models
|
||||
// -------------------
|
||||
// Qwen3-Coder-Plus: 65,536 max output tokens
|
||||
[/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']],
|
||||
|
||||
// Qwen-VL-Max-Latest: 8,192 max output tokens
|
||||
[/^qwen-vl-max-latest$/, LIMITS['8k']],
|
||||
];
|
||||
|
||||
/**
|
||||
* Return the token limit for a model string based on the specified type.
|
||||
*
|
||||
* This function determines the maximum number of tokens for either input context
|
||||
* or output generation based on the model and token type. It uses the same
|
||||
* normalization logic for consistency across both input and output limits.
|
||||
*
|
||||
* @param model - The model name to get the token limit for
|
||||
* @param type - The type of token limit ('input' for context window, 'output' for generation)
|
||||
* @returns The maximum number of tokens allowed for this model and type
|
||||
*/
|
||||
export function tokenLimit(
|
||||
model: Model,
|
||||
type: TokenLimitType = 'input',
|
||||
): TokenCount {
|
||||
const norm = normalize(model);
|
||||
|
||||
for (const [regex, limit] of PATTERNS) {
|
||||
// Choose the appropriate patterns based on token type
|
||||
const patterns = type === 'output' ? OUTPUT_PATTERNS : PATTERNS;
|
||||
|
||||
for (const [regex, limit] of patterns) {
|
||||
if (regex.test(norm)) {
|
||||
return limit;
|
||||
}
|
||||
}
|
||||
|
||||
// final fallback: DEFAULT_TOKEN_LIMIT (power-of-two 128K)
|
||||
return DEFAULT_TOKEN_LIMIT;
|
||||
// Return appropriate default based on token type
|
||||
return type === 'output' ? DEFAULT_OUTPUT_TOKEN_LIMIT : DEFAULT_TOKEN_LIMIT;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@qwen-code/qwen-code-test-utils",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"private": true,
|
||||
"main": "src/index.ts",
|
||||
"license": "Apache-2.0",
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"name": "qwen-code-vscode-ide-companion",
|
||||
"displayName": "Qwen Code Companion",
|
||||
"description": "Enable Qwen Code with direct access to your VS Code workspace.",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.12",
|
||||
"publisher": "qwenlm",
|
||||
"icon": "assets/icon.png",
|
||||
"repository": {
|
||||
|
||||
Reference in New Issue
Block a user