Fix and update the token limits handling (#754)

* fix: make token limits regex normalize e.g. `some-model-1.1` -> `some-model` while preserve e.g. `gpt-4.1` as-is.

* feat: update token limits regex for latest models `GLM-4.6`, `deepseek-v3.2-exp`.

* feat: add exact token limit number 202752 per the model config file for `GLM-4.6`.
This commit is contained in:
zhutao100
2025-10-14 01:11:55 -07:00
committed by GitHub
parent a779d44b38
commit 9d664623f5
2 changed files with 53 additions and 18 deletions

View File

@@ -58,9 +58,29 @@ describe('normalize', () => {
expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash'); expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash');
}); });
it('should remove version numbers with dots when they are at the end', () => { it('should not remove "-latest" from specific Qwen model names', () => {
expect(normalize('gpt-4.1.1-latest')).toBe('gpt-4.1.1'); expect(normalize('qwen-plus-latest')).toBe('qwen-plus-latest');
expect(normalize('qwen-flash-latest')).toBe('qwen-flash-latest');
expect(normalize('qwen-vl-max-latest')).toBe('qwen-vl-max-latest');
});
it('should remove date like suffixes', () => {
expect(normalize('deepseek-r1-0528')).toBe('deepseek-r1');
});
it('should remove literal "-latest" "-exp" suffixes', () => {
expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1'); expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1');
expect(normalize('deepseek-v3.2-exp')).toBe('deepseek-v3.2');
});
it('should remove suffix version numbers with "v" prefix', () => {
expect(normalize('model-test-v1.1')).toBe('model-test');
expect(normalize('model-v1.1')).toBe('model');
});
it('should remove suffix version numbers w/o "v" prefix only if they are preceded by another dash', () => {
expect(normalize('model-test-1.1')).toBe('model-test');
expect(normalize('gpt-4.1')).toBe('gpt-4.1');
}); });
}); });
@@ -188,6 +208,9 @@ describe('tokenLimit', () => {
it('should return the correct limit for glm-4.5', () => { it('should return the correct limit for glm-4.5', () => {
expect(tokenLimit('glm-4.5')).toBe(131072); expect(tokenLimit('glm-4.5')).toBe(131072);
}); });
it('should return the correct limit for glm-4.6', () => {
expect(tokenLimit('glm-4.6')).toBe(202752);
});
}); });
describe('Other models', () => { describe('Other models', () => {
@@ -200,6 +223,9 @@ describe('tokenLimit', () => {
it('should return the correct limit for deepseek-v3.1', () => { it('should return the correct limit for deepseek-v3.1', () => {
expect(tokenLimit('deepseek-v3.1')).toBe(131072); expect(tokenLimit('deepseek-v3.1')).toBe(131072);
}); });
it('should return the correct limit for deepseek-v3.2', () => {
expect(tokenLimit('deepseek-v3.2-exp')).toBe(131072);
});
it('should return the correct limit for kimi-k2-instruct', () => { it('should return the correct limit for kimi-k2-instruct', () => {
expect(tokenLimit('kimi-k2-instruct')).toBe(131072); expect(tokenLimit('kimi-k2-instruct')).toBe(131072);
}); });

View File

@@ -20,7 +20,7 @@ const LIMITS = {
'32k': 32_768, '32k': 32_768,
'64k': 65_536, '64k': 65_536,
'128k': 131_072, '128k': 131_072,
'200k': 200_000, // vendor-declared decimal (OpenAI / Anthropic use 200k) '200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, GLM etc.
'256k': 262_144, '256k': 262_144,
'512k': 524_288, '512k': 524_288,
'1m': 1_048_576, '1m': 1_048_576,
@@ -49,15 +49,18 @@ export function normalize(model: string): string {
s = s.replace(/-preview/g, ''); s = s.replace(/-preview/g, '');
// Special handling for Qwen model names that include "-latest" as part of the model name // Special handling for Qwen model names that include "-latest" as part of the model name
if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) { if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) {
// \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates) // Regex breakdown:
// -(?:...)$ - Non-capturing group for suffixes at the end of the string
// The following patterns are matched within the group:
// \d{4,} - Match 4 or more digits (dates) like -20250219 -0528 (4+ digit dates)
// \d+x\d+b - Match patterns like 4x8b, -7b, -70b // \d+x\d+b - Match patterns like 4x8b, -7b, -70b
// v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3 // v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
// -\d+(?:\.\d+)+ - Match version numbers with dots (that are preceded by a dash), // (?<=-[^-]+-)\d+(?:\.\d+)+ - Match version numbers with dots that are preceded by another dash,
// like -1.1, -2.0.1 but only when they're suffixes, Example: model-test-1.1 → model-test; // like -1.1, -2.0.1 but only when they are preceded by another dash, Example: model-test-1.1 → model-test;
// Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before 4.1 in that context. // Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before -4.1 in that context.
// latest - Match the literal string "latest" // latest|exp - Match the literal string "latest" or "exp"
s = s.replace( s = s.replace(
/-(?:\d{6,}|\d+x\d+b|v\d+(?:\.\d+)*|-\d+(?:\.\d+)+|latest)$/g, /-(?:\d{4,}|\d+x\d+b|v\d+(?:\.\d+)*|(?<=-[^-]+-)\d+(?:\.\d+)+|latest|exp)$/g,
'', '',
); );
} }
@@ -149,18 +152,24 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
// ------------------- // -------------------
// Zhipu GLM // Zhipu GLM
// ------------------- // -------------------
[/^glm-4\.5v.*$/, LIMITS['64k']], [/^glm-4\.5v(?:-.*)?$/, LIMITS['64k']],
[/^glm-4\.5-air.*$/, LIMITS['128k']], [/^glm-4\.5-air(?:-.*)?$/, LIMITS['128k']],
[/^glm-4\.5.*$/, LIMITS['128k']], [/^glm-4\.5(?:-.*)?$/, LIMITS['128k']],
[/^glm-4\.6(?:-.*)?$/, 202_752 as unknown as TokenCount], // exact limit from the model config file
// ------------------- // -------------------
// DeepSeek / GPT-OSS / Kimi / Llama & Mistral examples // DeepSeek
// -------------------
[/^deepseek$/, LIMITS['128k']],
[/^deepseek-r1(?:-.*)?$/, LIMITS['128k']],
[/^deepseek-v3(?:\.\d+)?(?:-.*)?$/, LIMITS['128k']],
// -------------------
// GPT-OSS / Kimi / Llama & Mistral examples
// ------------------- // -------------------
[/^deepseek-r1.*$/, LIMITS['128k']],
[/^deepseek-v3(?:\.1)?.*$/, LIMITS['128k']],
[/^kimi-k2-instruct.*$/, LIMITS['128k']], [/^kimi-k2-instruct.*$/, LIMITS['128k']],
[/^gpt-oss.*$/, LIMITS['128k']], [/^gpt-oss.*$/, LIMITS['128k']],
[/^llama-4-scout.*$/, LIMITS['10m'] as unknown as TokenCount], // ultra-long variants - handle carefully [/^llama-4-scout.*$/, LIMITS['10m']],
[/^mistral-large-2.*$/, LIMITS['128k']], [/^mistral-large-2.*$/, LIMITS['128k']],
]; ];