mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
Fix and update the token limits handling (#754)
* fix: make token limits regex normalize e.g. `some-model-1.1` -> `some-model` while preserve e.g. `gpt-4.1` as-is. * feat: update token limits regex for latest models `GLM-4.6`, `deepseek-v3.2-exp`. * feat: add exact token limit number 202752 per the model config file for `GLM-4.6`.
This commit is contained in:
@@ -58,9 +58,29 @@ describe('normalize', () => {
|
|||||||
expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash');
|
expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should remove version numbers with dots when they are at the end', () => {
|
it('should not remove "-latest" from specific Qwen model names', () => {
|
||||||
expect(normalize('gpt-4.1.1-latest')).toBe('gpt-4.1.1');
|
expect(normalize('qwen-plus-latest')).toBe('qwen-plus-latest');
|
||||||
|
expect(normalize('qwen-flash-latest')).toBe('qwen-flash-latest');
|
||||||
|
expect(normalize('qwen-vl-max-latest')).toBe('qwen-vl-max-latest');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should remove date like suffixes', () => {
|
||||||
|
expect(normalize('deepseek-r1-0528')).toBe('deepseek-r1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should remove literal "-latest" "-exp" suffixes', () => {
|
||||||
expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1');
|
expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1');
|
||||||
|
expect(normalize('deepseek-v3.2-exp')).toBe('deepseek-v3.2');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should remove suffix version numbers with "v" prefix', () => {
|
||||||
|
expect(normalize('model-test-v1.1')).toBe('model-test');
|
||||||
|
expect(normalize('model-v1.1')).toBe('model');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should remove suffix version numbers w/o "v" prefix only if they are preceded by another dash', () => {
|
||||||
|
expect(normalize('model-test-1.1')).toBe('model-test');
|
||||||
|
expect(normalize('gpt-4.1')).toBe('gpt-4.1');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -188,6 +208,9 @@ describe('tokenLimit', () => {
|
|||||||
it('should return the correct limit for glm-4.5', () => {
|
it('should return the correct limit for glm-4.5', () => {
|
||||||
expect(tokenLimit('glm-4.5')).toBe(131072);
|
expect(tokenLimit('glm-4.5')).toBe(131072);
|
||||||
});
|
});
|
||||||
|
it('should return the correct limit for glm-4.6', () => {
|
||||||
|
expect(tokenLimit('glm-4.6')).toBe(202752);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('Other models', () => {
|
describe('Other models', () => {
|
||||||
@@ -200,6 +223,9 @@ describe('tokenLimit', () => {
|
|||||||
it('should return the correct limit for deepseek-v3.1', () => {
|
it('should return the correct limit for deepseek-v3.1', () => {
|
||||||
expect(tokenLimit('deepseek-v3.1')).toBe(131072);
|
expect(tokenLimit('deepseek-v3.1')).toBe(131072);
|
||||||
});
|
});
|
||||||
|
it('should return the correct limit for deepseek-v3.2', () => {
|
||||||
|
expect(tokenLimit('deepseek-v3.2-exp')).toBe(131072);
|
||||||
|
});
|
||||||
it('should return the correct limit for kimi-k2-instruct', () => {
|
it('should return the correct limit for kimi-k2-instruct', () => {
|
||||||
expect(tokenLimit('kimi-k2-instruct')).toBe(131072);
|
expect(tokenLimit('kimi-k2-instruct')).toBe(131072);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ const LIMITS = {
|
|||||||
'32k': 32_768,
|
'32k': 32_768,
|
||||||
'64k': 65_536,
|
'64k': 65_536,
|
||||||
'128k': 131_072,
|
'128k': 131_072,
|
||||||
'200k': 200_000, // vendor-declared decimal (OpenAI / Anthropic use 200k)
|
'200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, GLM etc.
|
||||||
'256k': 262_144,
|
'256k': 262_144,
|
||||||
'512k': 524_288,
|
'512k': 524_288,
|
||||||
'1m': 1_048_576,
|
'1m': 1_048_576,
|
||||||
@@ -49,15 +49,18 @@ export function normalize(model: string): string {
|
|||||||
s = s.replace(/-preview/g, '');
|
s = s.replace(/-preview/g, '');
|
||||||
// Special handling for Qwen model names that include "-latest" as part of the model name
|
// Special handling for Qwen model names that include "-latest" as part of the model name
|
||||||
if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) {
|
if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) {
|
||||||
// \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates)
|
// Regex breakdown:
|
||||||
// \d+x\d+b - Match patterns like 4x8b, -7b, -70b
|
// -(?:...)$ - Non-capturing group for suffixes at the end of the string
|
||||||
// v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
|
// The following patterns are matched within the group:
|
||||||
// -\d+(?:\.\d+)+ - Match version numbers with dots (that are preceded by a dash),
|
// \d{4,} - Match 4 or more digits (dates) like -20250219 -0528 (4+ digit dates)
|
||||||
// like -1.1, -2.0.1 but only when they're suffixes, Example: model-test-1.1 → model-test;
|
// \d+x\d+b - Match patterns like 4x8b, -7b, -70b
|
||||||
// Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before 4.1 in that context.
|
// v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
|
||||||
// latest - Match the literal string "latest"
|
// (?<=-[^-]+-)\d+(?:\.\d+)+ - Match version numbers with dots that are preceded by another dash,
|
||||||
|
// like -1.1, -2.0.1 but only when they are preceded by another dash, Example: model-test-1.1 → model-test;
|
||||||
|
// Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before -4.1 in that context.
|
||||||
|
// latest|exp - Match the literal string "latest" or "exp"
|
||||||
s = s.replace(
|
s = s.replace(
|
||||||
/-(?:\d{6,}|\d+x\d+b|v\d+(?:\.\d+)*|-\d+(?:\.\d+)+|latest)$/g,
|
/-(?:\d{4,}|\d+x\d+b|v\d+(?:\.\d+)*|(?<=-[^-]+-)\d+(?:\.\d+)+|latest|exp)$/g,
|
||||||
'',
|
'',
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -149,18 +152,24 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
|
|||||||
// -------------------
|
// -------------------
|
||||||
// Zhipu GLM
|
// Zhipu GLM
|
||||||
// -------------------
|
// -------------------
|
||||||
[/^glm-4\.5v.*$/, LIMITS['64k']],
|
[/^glm-4\.5v(?:-.*)?$/, LIMITS['64k']],
|
||||||
[/^glm-4\.5-air.*$/, LIMITS['128k']],
|
[/^glm-4\.5-air(?:-.*)?$/, LIMITS['128k']],
|
||||||
[/^glm-4\.5.*$/, LIMITS['128k']],
|
[/^glm-4\.5(?:-.*)?$/, LIMITS['128k']],
|
||||||
|
[/^glm-4\.6(?:-.*)?$/, 202_752 as unknown as TokenCount], // exact limit from the model config file
|
||||||
|
|
||||||
// -------------------
|
// -------------------
|
||||||
// DeepSeek / GPT-OSS / Kimi / Llama & Mistral examples
|
// DeepSeek
|
||||||
|
// -------------------
|
||||||
|
[/^deepseek$/, LIMITS['128k']],
|
||||||
|
[/^deepseek-r1(?:-.*)?$/, LIMITS['128k']],
|
||||||
|
[/^deepseek-v3(?:\.\d+)?(?:-.*)?$/, LIMITS['128k']],
|
||||||
|
|
||||||
|
// -------------------
|
||||||
|
// GPT-OSS / Kimi / Llama & Mistral examples
|
||||||
// -------------------
|
// -------------------
|
||||||
[/^deepseek-r1.*$/, LIMITS['128k']],
|
|
||||||
[/^deepseek-v3(?:\.1)?.*$/, LIMITS['128k']],
|
|
||||||
[/^kimi-k2-instruct.*$/, LIMITS['128k']],
|
[/^kimi-k2-instruct.*$/, LIMITS['128k']],
|
||||||
[/^gpt-oss.*$/, LIMITS['128k']],
|
[/^gpt-oss.*$/, LIMITS['128k']],
|
||||||
[/^llama-4-scout.*$/, LIMITS['10m'] as unknown as TokenCount], // ultra-long variants - handle carefully
|
[/^llama-4-scout.*$/, LIMITS['10m']],
|
||||||
[/^mistral-large-2.*$/, LIMITS['128k']],
|
[/^mistral-large-2.*$/, LIMITS['128k']],
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user