mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
Fix and update the token limits handling (#754)
* fix: make token limits regex normalize e.g. `some-model-1.1` -> `some-model` while preserve e.g. `gpt-4.1` as-is. * feat: update token limits regex for latest models `GLM-4.6`, `deepseek-v3.2-exp`. * feat: add exact token limit number 202752 per the model config file for `GLM-4.6`.
This commit is contained in:
@@ -58,9 +58,29 @@ describe('normalize', () => {
|
||||
expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash');
|
||||
});
|
||||
|
||||
it('should remove version numbers with dots when they are at the end', () => {
|
||||
expect(normalize('gpt-4.1.1-latest')).toBe('gpt-4.1.1');
|
||||
it('should not remove "-latest" from specific Qwen model names', () => {
|
||||
expect(normalize('qwen-plus-latest')).toBe('qwen-plus-latest');
|
||||
expect(normalize('qwen-flash-latest')).toBe('qwen-flash-latest');
|
||||
expect(normalize('qwen-vl-max-latest')).toBe('qwen-vl-max-latest');
|
||||
});
|
||||
|
||||
it('should remove date like suffixes', () => {
|
||||
expect(normalize('deepseek-r1-0528')).toBe('deepseek-r1');
|
||||
});
|
||||
|
||||
it('should remove literal "-latest" "-exp" suffixes', () => {
|
||||
expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1');
|
||||
expect(normalize('deepseek-v3.2-exp')).toBe('deepseek-v3.2');
|
||||
});
|
||||
|
||||
it('should remove suffix version numbers with "v" prefix', () => {
|
||||
expect(normalize('model-test-v1.1')).toBe('model-test');
|
||||
expect(normalize('model-v1.1')).toBe('model');
|
||||
});
|
||||
|
||||
it('should remove suffix version numbers w/o "v" prefix only if they are preceded by another dash', () => {
|
||||
expect(normalize('model-test-1.1')).toBe('model-test');
|
||||
expect(normalize('gpt-4.1')).toBe('gpt-4.1');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -188,6 +208,9 @@ describe('tokenLimit', () => {
|
||||
it('should return the correct limit for glm-4.5', () => {
|
||||
expect(tokenLimit('glm-4.5')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for glm-4.6', () => {
|
||||
expect(tokenLimit('glm-4.6')).toBe(202752);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Other models', () => {
|
||||
@@ -200,6 +223,9 @@ describe('tokenLimit', () => {
|
||||
it('should return the correct limit for deepseek-v3.1', () => {
|
||||
expect(tokenLimit('deepseek-v3.1')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for deepseek-v3.2', () => {
|
||||
expect(tokenLimit('deepseek-v3.2-exp')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for kimi-k2-instruct', () => {
|
||||
expect(tokenLimit('kimi-k2-instruct')).toBe(131072);
|
||||
});
|
||||
|
||||
@@ -20,7 +20,7 @@ const LIMITS = {
|
||||
'32k': 32_768,
|
||||
'64k': 65_536,
|
||||
'128k': 131_072,
|
||||
'200k': 200_000, // vendor-declared decimal (OpenAI / Anthropic use 200k)
|
||||
'200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, GLM etc.
|
||||
'256k': 262_144,
|
||||
'512k': 524_288,
|
||||
'1m': 1_048_576,
|
||||
@@ -49,15 +49,18 @@ export function normalize(model: string): string {
|
||||
s = s.replace(/-preview/g, '');
|
||||
// Special handling for Qwen model names that include "-latest" as part of the model name
|
||||
if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) {
|
||||
// \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates)
|
||||
// Regex breakdown:
|
||||
// -(?:...)$ - Non-capturing group for suffixes at the end of the string
|
||||
// The following patterns are matched within the group:
|
||||
// \d{4,} - Match 4 or more digits (dates) like -20250219 -0528 (4+ digit dates)
|
||||
// \d+x\d+b - Match patterns like 4x8b, -7b, -70b
|
||||
// v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
|
||||
// -\d+(?:\.\d+)+ - Match version numbers with dots (that are preceded by a dash),
|
||||
// like -1.1, -2.0.1 but only when they're suffixes, Example: model-test-1.1 → model-test;
|
||||
// Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before 4.1 in that context.
|
||||
// latest - Match the literal string "latest"
|
||||
// (?<=-[^-]+-)\d+(?:\.\d+)+ - Match version numbers with dots that are preceded by another dash,
|
||||
// like -1.1, -2.0.1 but only when they are preceded by another dash, Example: model-test-1.1 → model-test;
|
||||
// Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before -4.1 in that context.
|
||||
// latest|exp - Match the literal string "latest" or "exp"
|
||||
s = s.replace(
|
||||
/-(?:\d{6,}|\d+x\d+b|v\d+(?:\.\d+)*|-\d+(?:\.\d+)+|latest)$/g,
|
||||
/-(?:\d{4,}|\d+x\d+b|v\d+(?:\.\d+)*|(?<=-[^-]+-)\d+(?:\.\d+)+|latest|exp)$/g,
|
||||
'',
|
||||
);
|
||||
}
|
||||
@@ -149,18 +152,24 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
|
||||
// -------------------
|
||||
// Zhipu GLM
|
||||
// -------------------
|
||||
[/^glm-4\.5v.*$/, LIMITS['64k']],
|
||||
[/^glm-4\.5-air.*$/, LIMITS['128k']],
|
||||
[/^glm-4\.5.*$/, LIMITS['128k']],
|
||||
[/^glm-4\.5v(?:-.*)?$/, LIMITS['64k']],
|
||||
[/^glm-4\.5-air(?:-.*)?$/, LIMITS['128k']],
|
||||
[/^glm-4\.5(?:-.*)?$/, LIMITS['128k']],
|
||||
[/^glm-4\.6(?:-.*)?$/, 202_752 as unknown as TokenCount], // exact limit from the model config file
|
||||
|
||||
// -------------------
|
||||
// DeepSeek / GPT-OSS / Kimi / Llama & Mistral examples
|
||||
// DeepSeek
|
||||
// -------------------
|
||||
[/^deepseek$/, LIMITS['128k']],
|
||||
[/^deepseek-r1(?:-.*)?$/, LIMITS['128k']],
|
||||
[/^deepseek-v3(?:\.\d+)?(?:-.*)?$/, LIMITS['128k']],
|
||||
|
||||
// -------------------
|
||||
// GPT-OSS / Kimi / Llama & Mistral examples
|
||||
// -------------------
|
||||
[/^deepseek-r1.*$/, LIMITS['128k']],
|
||||
[/^deepseek-v3(?:\.1)?.*$/, LIMITS['128k']],
|
||||
[/^kimi-k2-instruct.*$/, LIMITS['128k']],
|
||||
[/^gpt-oss.*$/, LIMITS['128k']],
|
||||
[/^llama-4-scout.*$/, LIMITS['10m'] as unknown as TokenCount], // ultra-long variants - handle carefully
|
||||
[/^llama-4-scout.*$/, LIMITS['10m']],
|
||||
[/^mistral-large-2.*$/, LIMITS['128k']],
|
||||
];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user