Fix and update the token limits handling (#754)

* fix: make token limits regex normalize e.g. `some-model-1.1` -> `some-model` while preserve e.g. `gpt-4.1` as-is. * feat: update token limits regex for latest models `GLM-4.6`, `deepseek-v3.2-exp`. * feat: add exact token limit number 202752 per the model config file for `GLM-4.6`.
2025-12-19 09:33:53 +00:00 · 2025-10-14 01:11:55 -07:00
parent a779d44b38
commit 9d664623f5
2 changed files with 53 additions and 18 deletions
--- a/packages/core/src/core/tokenLimits.test.ts
+++ b/packages/core/src/core/tokenLimits.test.ts
@@ -58,9 +58,29 @@ describe('normalize', () => {
    expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash');
  });
-  it('should remove version numbers with dots when they are at the end', () => {
+  it('should not remove "-latest" from specific Qwen model names', () => {
-    expect(normalize('gpt-4.1.1-latest')).toBe('gpt-4.1.1');
+    expect(normalize('qwen-plus-latest')).toBe('qwen-plus-latest');
    expect(normalize('qwen-flash-latest')).toBe('qwen-flash-latest');
    expect(normalize('qwen-vl-max-latest')).toBe('qwen-vl-max-latest');
  });
  it('should remove date like suffixes', () => {
    expect(normalize('deepseek-r1-0528')).toBe('deepseek-r1');
  });
  it('should remove literal "-latest" "-exp" suffixes', () => {
    expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1');
    expect(normalize('deepseek-v3.2-exp')).toBe('deepseek-v3.2');
  });
  it('should remove suffix version numbers with "v" prefix', () => {
    expect(normalize('model-test-v1.1')).toBe('model-test');
    expect(normalize('model-v1.1')).toBe('model');
  });
  it('should remove suffix version numbers w/o "v" prefix only if they are preceded by another dash', () => {
    expect(normalize('model-test-1.1')).toBe('model-test');
    expect(normalize('gpt-4.1')).toBe('gpt-4.1');
  });
 });
@@ -188,6 +208,9 @@ describe('tokenLimit', () => {
    it('should return the correct limit for glm-4.5', () => {
      expect(tokenLimit('glm-4.5')).toBe(131072);
    });
    it('should return the correct limit for glm-4.6', () => {
      expect(tokenLimit('glm-4.6')).toBe(202752);
    });
  });
  describe('Other models', () => {
@@ -200,6 +223,9 @@ describe('tokenLimit', () => {
    it('should return the correct limit for deepseek-v3.1', () => {
      expect(tokenLimit('deepseek-v3.1')).toBe(131072);
    });
    it('should return the correct limit for deepseek-v3.2', () => {
      expect(tokenLimit('deepseek-v3.2-exp')).toBe(131072);
    });
    it('should return the correct limit for kimi-k2-instruct', () => {
      expect(tokenLimit('kimi-k2-instruct')).toBe(131072);
    });
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -20,7 +20,7 @@ const LIMITS = {
  '32k': 32_768,
  '64k': 65_536,
  '128k': 131_072,
-  '200k': 200_000, // vendor-declared decimal (OpenAI / Anthropic use 200k)
+  '200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, GLM etc.
  '256k': 262_144,
  '512k': 524_288,
  '1m': 1_048_576,
@@ -49,15 +49,18 @@ export function normalize(model: string): string {
  s = s.replace(/-preview/g, '');
  // Special handling for Qwen model names that include "-latest" as part of the model name
  if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) {
-    // \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates)
+    // Regex breakdown:
    // -(?:...)$ - Non-capturing group for suffixes at the end of the string
    // The following patterns are matched within the group:
    //   \d{4,} - Match 4 or more digits (dates) like -20250219 -0528 (4+ digit dates)
    //   \d+x\d+b - Match patterns like 4x8b, -7b, -70b
    //   v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
-    // -\d+(?:\.\d+)+ - Match version numbers with dots (that are preceded by a dash),
+    //   (?<=-[^-]+-)\d+(?:\.\d+)+ - Match version numbers with dots that are preceded by another dash,
-    //   like -1.1, -2.0.1 but only when they're suffixes, Example: model-test-1.1 → model-test;
+    //     like -1.1, -2.0.1 but only when they are preceded by another dash, Example: model-test-1.1 → model-test;
-    //   Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before 4.1 in that context.
+    //     Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before -4.1 in that context.
-    // latest - Match the literal string "latest"
+    //   latest|exp - Match the literal string "latest" or "exp"
    s = s.replace(
-      /-(?:\d{6,}|\d+x\d+b|v\d+(?:\.\d+)*|-\d+(?:\.\d+)+|latest)$/g,
+      /-(?:\d{4,}|\d+x\d+b|v\d+(?:\.\d+)*|(?<=-[^-]+-)\d+(?:\.\d+)+|latest|exp)$/g,
      '',
    );
  }
@@ -149,18 +152,24 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
  // -------------------
  // Zhipu GLM
  // -------------------
-  [/^glm-4\.5v.*$/, LIMITS['64k']],
+  [/^glm-4\.5v(?:-.*)?$/, LIMITS['64k']],
-  [/^glm-4\.5-air.*$/, LIMITS['128k']],
+  [/^glm-4\.5-air(?:-.*)?$/, LIMITS['128k']],
-  [/^glm-4\.5.*$/, LIMITS['128k']],
+  [/^glm-4\.5(?:-.*)?$/, LIMITS['128k']],
  [/^glm-4\.6(?:-.*)?$/, 202_752 as unknown as TokenCount], // exact limit from the model config file
  // -------------------
-  // DeepSeek / GPT-OSS / Kimi / Llama & Mistral examples
+  // DeepSeek
  // -------------------
  [/^deepseek$/, LIMITS['128k']],
  [/^deepseek-r1(?:-.*)?$/, LIMITS['128k']],
  [/^deepseek-v3(?:\.\d+)?(?:-.*)?$/, LIMITS['128k']],
  // -------------------
  // GPT-OSS / Kimi / Llama & Mistral examples
  // -------------------
  [/^deepseek-r1.*$/, LIMITS['128k']],
  [/^deepseek-v3(?:\.1)?.*$/, LIMITS['128k']],
  [/^kimi-k2-instruct.*$/, LIMITS['128k']],
  [/^gpt-oss.*$/, LIMITS['128k']],
-  [/^llama-4-scout.*$/, LIMITS['10m'] as unknown as TokenCount], // ultra-long variants - handle carefully
+  [/^llama-4-scout.*$/, LIMITS['10m']],
  [/^mistral-large-2.*$/, LIMITS['128k']],
 ];