diff --git a/packages/core/src/core/tokenLimits.test.ts b/packages/core/src/core/tokenLimits.test.ts index e5f79368..bb9f0fd2 100644 --- a/packages/core/src/core/tokenLimits.test.ts +++ b/packages/core/src/core/tokenLimits.test.ts @@ -58,9 +58,29 @@ describe('normalize', () => { expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash'); }); - it('should remove version numbers with dots when they are at the end', () => { - expect(normalize('gpt-4.1.1-latest')).toBe('gpt-4.1.1'); + it('should not remove "-latest" from specific Qwen model names', () => { + expect(normalize('qwen-plus-latest')).toBe('qwen-plus-latest'); + expect(normalize('qwen-flash-latest')).toBe('qwen-flash-latest'); + expect(normalize('qwen-vl-max-latest')).toBe('qwen-vl-max-latest'); + }); + + it('should remove date like suffixes', () => { + expect(normalize('deepseek-r1-0528')).toBe('deepseek-r1'); + }); + + it('should remove literal "-latest" "-exp" suffixes', () => { expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1'); + expect(normalize('deepseek-v3.2-exp')).toBe('deepseek-v3.2'); + }); + + it('should remove suffix version numbers with "v" prefix', () => { + expect(normalize('model-test-v1.1')).toBe('model-test'); + expect(normalize('model-v1.1')).toBe('model'); + }); + + it('should remove suffix version numbers w/o "v" prefix only if they are preceded by another dash', () => { + expect(normalize('model-test-1.1')).toBe('model-test'); + expect(normalize('gpt-4.1')).toBe('gpt-4.1'); }); }); @@ -188,6 +208,9 @@ describe('tokenLimit', () => { it('should return the correct limit for glm-4.5', () => { expect(tokenLimit('glm-4.5')).toBe(131072); }); + it('should return the correct limit for glm-4.6', () => { + expect(tokenLimit('glm-4.6')).toBe(202752); + }); }); describe('Other models', () => { @@ -200,6 +223,9 @@ describe('tokenLimit', () => { it('should return the correct limit for deepseek-v3.1', () => { expect(tokenLimit('deepseek-v3.1')).toBe(131072); }); + it('should return the correct limit for deepseek-v3.2', () => { + expect(tokenLimit('deepseek-v3.2-exp')).toBe(131072); + }); it('should return the correct limit for kimi-k2-instruct', () => { expect(tokenLimit('kimi-k2-instruct')).toBe(131072); }); diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts index 5992e8f0..cd3a0a0f 100644 --- a/packages/core/src/core/tokenLimits.ts +++ b/packages/core/src/core/tokenLimits.ts @@ -20,7 +20,7 @@ const LIMITS = { '32k': 32_768, '64k': 65_536, '128k': 131_072, - '200k': 200_000, // vendor-declared decimal (OpenAI / Anthropic use 200k) + '200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, GLM etc. '256k': 262_144, '512k': 524_288, '1m': 1_048_576, @@ -49,15 +49,18 @@ export function normalize(model: string): string { s = s.replace(/-preview/g, ''); // Special handling for Qwen model names that include "-latest" as part of the model name if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) { - // \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates) - // \d+x\d+b - Match patterns like 4x8b, -7b, -70b - // v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3 - // -\d+(?:\.\d+)+ - Match version numbers with dots (that are preceded by a dash), - // like -1.1, -2.0.1 but only when they're suffixes, Example: model-test-1.1 → model-test; - // Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before 4.1 in that context. - // latest - Match the literal string "latest" + // Regex breakdown: + // -(?:...)$ - Non-capturing group for suffixes at the end of the string + // The following patterns are matched within the group: + // \d{4,} - Match 4 or more digits (dates) like -20250219 -0528 (4+ digit dates) + // \d+x\d+b - Match patterns like 4x8b, -7b, -70b + // v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3 + // (?<=-[^-]+-)\d+(?:\.\d+)+ - Match version numbers with dots that are preceded by another dash, + // like -1.1, -2.0.1 but only when they are preceded by another dash, Example: model-test-1.1 → model-test; + // Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before -4.1 in that context. + // latest|exp - Match the literal string "latest" or "exp" s = s.replace( - /-(?:\d{6,}|\d+x\d+b|v\d+(?:\.\d+)*|-\d+(?:\.\d+)+|latest)$/g, + /-(?:\d{4,}|\d+x\d+b|v\d+(?:\.\d+)*|(?<=-[^-]+-)\d+(?:\.\d+)+|latest|exp)$/g, '', ); } @@ -149,18 +152,24 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [ // ------------------- // Zhipu GLM // ------------------- - [/^glm-4\.5v.*$/, LIMITS['64k']], - [/^glm-4\.5-air.*$/, LIMITS['128k']], - [/^glm-4\.5.*$/, LIMITS['128k']], + [/^glm-4\.5v(?:-.*)?$/, LIMITS['64k']], + [/^glm-4\.5-air(?:-.*)?$/, LIMITS['128k']], + [/^glm-4\.5(?:-.*)?$/, LIMITS['128k']], + [/^glm-4\.6(?:-.*)?$/, 202_752 as unknown as TokenCount], // exact limit from the model config file // ------------------- - // DeepSeek / GPT-OSS / Kimi / Llama & Mistral examples + // DeepSeek + // ------------------- + [/^deepseek$/, LIMITS['128k']], + [/^deepseek-r1(?:-.*)?$/, LIMITS['128k']], + [/^deepseek-v3(?:\.\d+)?(?:-.*)?$/, LIMITS['128k']], + + // ------------------- + // GPT-OSS / Kimi / Llama & Mistral examples // ------------------- - [/^deepseek-r1.*$/, LIMITS['128k']], - [/^deepseek-v3(?:\.1)?.*$/, LIMITS['128k']], [/^kimi-k2-instruct.*$/, LIMITS['128k']], [/^gpt-oss.*$/, LIMITS['128k']], - [/^llama-4-scout.*$/, LIMITS['10m'] as unknown as TokenCount], // ultra-long variants - handle carefully + [/^llama-4-scout.*$/, LIMITS['10m']], [/^mistral-large-2.*$/, LIMITS['128k']], ];