mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
Re-implement tokenLimits class to make it work correctly for Qwen and many other model types. (#542)
The original tokenLimits was copied over from gemini-cli and only works with gemini.
This commit is contained in:
227
packages/core/src/core/tokenLimits.test.ts
Normal file
227
packages/core/src/core/tokenLimits.test.ts
Normal file
@@ -0,0 +1,227 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { normalize, tokenLimit, DEFAULT_TOKEN_LIMIT } from './tokenLimits.js';
|
||||
|
||||
describe('normalize', () => {
|
||||
it('should lowercase and trim the model string', () => {
|
||||
expect(normalize(' GEMINI-1.5-PRO ')).toBe('gemini-1.5-pro');
|
||||
});
|
||||
|
||||
it('should strip provider prefixes', () => {
|
||||
expect(normalize('google/gemini-1.5-pro')).toBe('gemini-1.5-pro');
|
||||
expect(normalize('anthropic/claude-3.5-sonnet')).toBe('claude-3.5-sonnet');
|
||||
});
|
||||
|
||||
it('should handle pipe and colon separators', () => {
|
||||
expect(normalize('qwen|qwen2.5:qwen2.5-1m')).toBe('qwen2.5-1m');
|
||||
});
|
||||
|
||||
it('should collapse whitespace to a single hyphen', () => {
|
||||
expect(normalize('claude 3.5 sonnet')).toBe('claude-3.5-sonnet');
|
||||
});
|
||||
|
||||
it('should remove date and version suffixes', () => {
|
||||
expect(normalize('gemini-1.5-pro-20250219')).toBe('gemini-1.5-pro');
|
||||
expect(normalize('gpt-4o-mini-v1')).toBe('gpt-4o-mini');
|
||||
expect(normalize('claude-3.7-sonnet-20240715')).toBe('claude-3.7-sonnet');
|
||||
expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1');
|
||||
expect(normalize('gemini-2.0-flash-preview-20250520')).toBe(
|
||||
'gemini-2.0-flash',
|
||||
);
|
||||
});
|
||||
|
||||
it('should remove quantization and numeric suffixes', () => {
|
||||
expect(normalize('qwen3-coder-7b-4bit')).toBe('qwen3-coder-7b');
|
||||
expect(normalize('llama-4-scout-int8')).toBe('llama-4-scout');
|
||||
expect(normalize('mistral-large-2-bf16')).toBe('mistral-large-2');
|
||||
expect(normalize('deepseek-v3.1-q4')).toBe('deepseek-v3.1');
|
||||
expect(normalize('qwen2.5-quantized')).toBe('qwen2.5');
|
||||
});
|
||||
|
||||
it('should handle a combination of normalization rules', () => {
|
||||
expect(normalize(' Google/GEMINI-2.5-PRO:gemini-2.5-pro-20250605 ')).toBe(
|
||||
'gemini-2.5-pro',
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle empty or null input', () => {
|
||||
expect(normalize('')).toBe('');
|
||||
expect(normalize(undefined as unknown as string)).toBe('');
|
||||
expect(normalize(null as unknown as string)).toBe('');
|
||||
});
|
||||
|
||||
it('should remove preview suffixes', () => {
|
||||
expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash');
|
||||
});
|
||||
|
||||
it('should remove version numbers with dots when they are at the end', () => {
|
||||
expect(normalize('gpt-4.1.1-latest')).toBe('gpt-4.1.1');
|
||||
expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1');
|
||||
});
|
||||
});
|
||||
|
||||
describe('tokenLimit', () => {
|
||||
// Test cases for each model family
|
||||
describe('Google Gemini', () => {
|
||||
it('should return the correct limit for Gemini 1.5 Pro', () => {
|
||||
expect(tokenLimit('gemini-1.5-pro')).toBe(2097152);
|
||||
});
|
||||
it('should return the correct limit for Gemini 1.5 Flash', () => {
|
||||
expect(tokenLimit('gemini-1.5-flash')).toBe(1048576);
|
||||
});
|
||||
it('should return the correct limit for Gemini 2.5 Pro', () => {
|
||||
expect(tokenLimit('gemini-2.5-pro')).toBe(1048576);
|
||||
});
|
||||
it('should return the correct limit for Gemini 2.5 Flash', () => {
|
||||
expect(tokenLimit('gemini-2.5-flash')).toBe(1048576);
|
||||
});
|
||||
it('should return the correct limit for Gemini 2.0 Flash with image generation', () => {
|
||||
expect(tokenLimit('gemini-2.0-flash-image-generation')).toBe(32768);
|
||||
});
|
||||
it('should return the correct limit for Gemini 2.0 Flash', () => {
|
||||
expect(tokenLimit('gemini-2.0-flash')).toBe(1048576);
|
||||
});
|
||||
});
|
||||
|
||||
describe('OpenAI', () => {
|
||||
it('should return the correct limit for o3-mini', () => {
|
||||
expect(tokenLimit('o3-mini')).toBe(200000);
|
||||
});
|
||||
it('should return the correct limit for o3 models', () => {
|
||||
expect(tokenLimit('o3')).toBe(200000);
|
||||
});
|
||||
it('should return the correct limit for o4-mini', () => {
|
||||
expect(tokenLimit('o4-mini')).toBe(200000);
|
||||
});
|
||||
it('should return the correct limit for gpt-4o-mini', () => {
|
||||
expect(tokenLimit('gpt-4o-mini')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for gpt-4o', () => {
|
||||
expect(tokenLimit('gpt-4o')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for gpt-4.1-mini', () => {
|
||||
expect(tokenLimit('gpt-4.1-mini')).toBe(1048576);
|
||||
});
|
||||
it('should return the correct limit for gpt-4.1 models', () => {
|
||||
expect(tokenLimit('gpt-4.1')).toBe(1048576);
|
||||
});
|
||||
it('should return the correct limit for gpt-4', () => {
|
||||
expect(tokenLimit('gpt-4')).toBe(131072);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Anthropic Claude', () => {
|
||||
it('should return the correct limit for Claude 3.5 Sonnet', () => {
|
||||
expect(tokenLimit('claude-3.5-sonnet')).toBe(200000);
|
||||
});
|
||||
it('should return the correct limit for Claude 3.7 Sonnet', () => {
|
||||
expect(tokenLimit('claude-3.7-sonnet')).toBe(1048576);
|
||||
});
|
||||
it('should return the correct limit for Claude Sonnet 4', () => {
|
||||
expect(tokenLimit('claude-sonnet-4')).toBe(1048576);
|
||||
});
|
||||
it('should return the correct limit for Claude Opus 4', () => {
|
||||
expect(tokenLimit('claude-opus-4')).toBe(1048576);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Alibaba Qwen', () => {
|
||||
it('should return the correct limit for qwen3-coder commercial models', () => {
|
||||
expect(tokenLimit('qwen3-coder-plus')).toBe(1048576);
|
||||
expect(tokenLimit('qwen3-coder-plus-20250601')).toBe(1048576);
|
||||
expect(tokenLimit('qwen3-coder-flash')).toBe(1048576);
|
||||
expect(tokenLimit('qwen3-coder-flash-20250601')).toBe(1048576);
|
||||
});
|
||||
|
||||
it('should return the correct limit for qwen3-coder open source models', () => {
|
||||
expect(tokenLimit('qwen3-coder-7b')).toBe(262144);
|
||||
expect(tokenLimit('qwen3-coder-480b-a35b-instruct')).toBe(262144);
|
||||
expect(tokenLimit('qwen3-coder-30b-a3b-instruct')).toBe(262144);
|
||||
});
|
||||
|
||||
it('should return the correct limit for qwen3 2507 variants', () => {
|
||||
expect(tokenLimit('qwen3-some-model-2507-instruct')).toBe(262144);
|
||||
});
|
||||
|
||||
it('should return the correct limit for qwen2.5-1m', () => {
|
||||
expect(tokenLimit('qwen2.5-1m')).toBe(1048576);
|
||||
expect(tokenLimit('qwen2.5-1m-instruct')).toBe(1048576);
|
||||
});
|
||||
|
||||
it('should return the correct limit for qwen2.5', () => {
|
||||
expect(tokenLimit('qwen2.5')).toBe(131072);
|
||||
expect(tokenLimit('qwen2.5-instruct')).toBe(131072);
|
||||
});
|
||||
|
||||
it('should return the correct limit for qwen-plus', () => {
|
||||
expect(tokenLimit('qwen-plus-latest')).toBe(1048576);
|
||||
expect(tokenLimit('qwen-plus')).toBe(131072);
|
||||
});
|
||||
|
||||
it('should return the correct limit for qwen-flash', () => {
|
||||
expect(tokenLimit('qwen-flash-latest')).toBe(1048576);
|
||||
});
|
||||
|
||||
it('should return the correct limit for qwen-turbo', () => {
|
||||
expect(tokenLimit('qwen-turbo')).toBe(131072);
|
||||
expect(tokenLimit('qwen-turbo-latest')).toBe(131072);
|
||||
});
|
||||
});
|
||||
|
||||
describe('ByteDance Seed-OSS', () => {
|
||||
it('should return the correct limit for seed-oss', () => {
|
||||
expect(tokenLimit('seed-oss')).toBe(524288);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Zhipu GLM', () => {
|
||||
it('should return the correct limit for glm-4.5v', () => {
|
||||
expect(tokenLimit('glm-4.5v')).toBe(65536);
|
||||
});
|
||||
it('should return the correct limit for glm-4.5-air', () => {
|
||||
expect(tokenLimit('glm-4.5-air')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for glm-4.5', () => {
|
||||
expect(tokenLimit('glm-4.5')).toBe(131072);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Other models', () => {
|
||||
it('should return the correct limit for deepseek-r1', () => {
|
||||
expect(tokenLimit('deepseek-r1')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for deepseek-v3', () => {
|
||||
expect(tokenLimit('deepseek-v3')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for deepseek-v3.1', () => {
|
||||
expect(tokenLimit('deepseek-v3.1')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for kimi-k2-instruct', () => {
|
||||
expect(tokenLimit('kimi-k2-instruct')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for gpt-oss', () => {
|
||||
expect(tokenLimit('gpt-oss')).toBe(131072);
|
||||
});
|
||||
it('should return the correct limit for llama-4-scout', () => {
|
||||
expect(tokenLimit('llama-4-scout')).toBe(10485760);
|
||||
});
|
||||
it('should return the correct limit for mistral-large-2', () => {
|
||||
expect(tokenLimit('mistral-large-2')).toBe(131072);
|
||||
});
|
||||
});
|
||||
|
||||
// Test for default limit
|
||||
it('should return the default token limit for an unknown model', () => {
|
||||
expect(tokenLimit('unknown-model-v1.0')).toBe(DEFAULT_TOKEN_LIMIT);
|
||||
});
|
||||
|
||||
// Test with complex model string
|
||||
it('should return the correct limit for a complex model string', () => {
|
||||
expect(tokenLimit(' a/b/c|GPT-4o:gpt-4o-2024-05-13-q4 ')).toBe(131072);
|
||||
});
|
||||
|
||||
// Test case-insensitive matching
|
||||
it('should handle case-insensitive model names', () => {
|
||||
expect(tokenLimit('GPT-4O')).toBe(131072);
|
||||
expect(tokenLimit('CLAUDE-3.5-SONNET')).toBe(200000);
|
||||
});
|
||||
});
|
||||
@@ -1,32 +1,154 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
type Model = string;
|
||||
type TokenCount = number;
|
||||
|
||||
export const DEFAULT_TOKEN_LIMIT = 1_048_576;
|
||||
export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
|
||||
|
||||
export function tokenLimit(model: Model): TokenCount {
|
||||
// Add other models as they become relevant or if specified by config
|
||||
// Pulled from https://ai.google.dev/gemini-api/docs/models
|
||||
switch (model) {
|
||||
case 'gemini-1.5-pro':
|
||||
return 2_097_152;
|
||||
case 'gemini-1.5-flash':
|
||||
case 'gemini-2.5-pro-preview-05-06':
|
||||
case 'gemini-2.5-pro-preview-06-05':
|
||||
case 'gemini-2.5-pro':
|
||||
case 'gemini-2.5-flash-preview-05-20':
|
||||
case 'gemini-2.5-flash':
|
||||
case 'gemini-2.5-flash-lite':
|
||||
case 'gemini-2.0-flash':
|
||||
return 1_048_576;
|
||||
case 'gemini-2.0-flash-preview-image-generation':
|
||||
return 32_000;
|
||||
default:
|
||||
return DEFAULT_TOKEN_LIMIT;
|
||||
/**
|
||||
* Accurate numeric limits:
|
||||
* - power-of-two approximations (128K -> 131072, 256K -> 262144, etc.)
|
||||
* - vendor-declared exact values (e.g., 200k -> 200000) are used as stated in docs.
|
||||
*/
|
||||
const LIMITS = {
|
||||
'32k': 32_768,
|
||||
'64k': 65_536,
|
||||
'128k': 131_072,
|
||||
'200k': 200_000, // vendor-declared decimal (OpenAI / Anthropic use 200k)
|
||||
'256k': 262_144,
|
||||
'512k': 524_288,
|
||||
'1m': 1_048_576,
|
||||
'2m': 2_097_152,
|
||||
'10m': 10_485_760, // 10 million tokens
|
||||
} as const;
|
||||
|
||||
/** Robust normalizer: strips provider prefixes, pipes/colons, date/version suffixes, etc. */
|
||||
export function normalize(model: string): string {
|
||||
let s = (model ?? '').toLowerCase().trim();
|
||||
|
||||
// keep final path segment (strip provider prefixes), handle pipe/colon
|
||||
s = s.replace(/^.*\//, '');
|
||||
s = s.split('|').pop() ?? s;
|
||||
s = s.split(':').pop() ?? s;
|
||||
|
||||
// collapse whitespace to single hyphen
|
||||
s = s.replace(/\s+/g, '-');
|
||||
|
||||
// remove trailing build / date / revision suffixes:
|
||||
// - dates (e.g., -20250219), -v1, version numbers, 'latest', 'preview' etc.
|
||||
s = s.replace(/-preview/g, '');
|
||||
// Special handling for Qwen model names that include "-latest" as part of the model name
|
||||
if (!s.match(/^qwen-(?:plus|flash)-latest$/)) {
|
||||
// \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates)
|
||||
// \d+x\d+b - Match patterns like 4x8b, -7b, -70b
|
||||
// v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
|
||||
// -\d+(?:\.\d+)+ - Match version numbers with dots (that are preceded by a dash),
|
||||
// like -1.1, -2.0.1 but only when they're suffixes, Example: model-test-1.1 → model-test;
|
||||
// Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before 4.1 in that context.
|
||||
// latest - Match the literal string "latest"
|
||||
s = s.replace(
|
||||
/-(?:\d{6,}|\d+x\d+b|v\d+(?:\.\d+)*|-\d+(?:\.\d+)+|latest)$/g,
|
||||
'',
|
||||
);
|
||||
}
|
||||
|
||||
// remove quantization / numeric / precision suffixes common in local/community models
|
||||
s = s.replace(/-(?:\d?bit|int[48]|bf16|fp16|q[45]|quantized)$/g, '');
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/** Ordered regex patterns: most specific -> most general (first match wins). */
|
||||
const PATTERNS: Array<[RegExp, TokenCount]> = [
|
||||
// -------------------
|
||||
// Google Gemini
|
||||
// -------------------
|
||||
[/^gemini-1\.5-pro$/, LIMITS['2m']],
|
||||
[/^gemini-1\.5-flash$/, LIMITS['1m']],
|
||||
[/^gemini-2\.5-pro.*$/, LIMITS['1m']],
|
||||
[/^gemini-2\.5-flash.*$/, LIMITS['1m']],
|
||||
[/^gemini-2\.0-flash-image-generation$/, LIMITS['32k']],
|
||||
[/^gemini-2\.0-flash.*$/, LIMITS['1m']],
|
||||
|
||||
// -------------------
|
||||
// OpenAI (o3 / o4-mini / gpt-4.1 / gpt-4o family)
|
||||
// o3 and o4-mini document a 200,000-token context window (decimal).
|
||||
// Note: GPT-4.1 models typically report 1_048_576 (1M) context in OpenAI announcements.
|
||||
[/^o3(?:-mini|$).*$/, LIMITS['200k']],
|
||||
[/^o3.*$/, LIMITS['200k']],
|
||||
[/^o4-mini.*$/, LIMITS['200k']],
|
||||
[/^gpt-4\.1-mini.*$/, LIMITS['1m']],
|
||||
[/^gpt-4\.1.*$/, LIMITS['1m']],
|
||||
[/^gpt-4o-mini.*$/, LIMITS['128k']],
|
||||
[/^gpt-4o.*$/, LIMITS['128k']],
|
||||
[/^gpt-4.*$/, LIMITS['128k']],
|
||||
|
||||
// -------------------
|
||||
// Anthropic Claude
|
||||
// - Claude Sonnet / Sonnet 3.5 and related Sonnet variants: 200,000 tokens documented.
|
||||
// - Some Sonnet/Opus models offer 1M in beta/enterprise tiers (handled separately if needed).
|
||||
[/^claude-3\.5-sonnet.*$/, LIMITS['200k']],
|
||||
[/^claude-3\.7-sonnet.*$/, LIMITS['1m']], // some Sonnet 3.7/Opus variants advertise 1M beta in docs
|
||||
[/^claude-sonnet-4.*$/, LIMITS['1m']],
|
||||
[/^claude-opus-4.*$/, LIMITS['1m']],
|
||||
|
||||
// -------------------
|
||||
// Alibaba / Qwen
|
||||
// -------------------
|
||||
// Commercial Qwen3-Coder-Plus: 1M token context
|
||||
[/^qwen3-coder-plus(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-plus" and date variants
|
||||
|
||||
// Commercial Qwen3-Coder-Flash: 1M token context
|
||||
[/^qwen3-coder-flash(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-flash" and date variants
|
||||
|
||||
// Open-source Qwen3-Coder variants: 256K native
|
||||
[/^qwen3-coder-.*$/, LIMITS['256k']],
|
||||
// Open-source Qwen3 2507 variants: 256K native
|
||||
[/^qwen3-.*-2507-.*$/, LIMITS['256k']],
|
||||
|
||||
// Open-source long-context Qwen2.5-1M
|
||||
[/^qwen2\.5-1m.*$/, LIMITS['1m']],
|
||||
|
||||
// Standard Qwen2.5: 128K
|
||||
[/^qwen2\.5.*$/, LIMITS['128k']],
|
||||
|
||||
// Studio commercial Qwen-Plus / Qwen-Flash / Qwen-Turbo
|
||||
[/^qwen-plus-latest$/, LIMITS['1m']], // Commercial latest: 1M
|
||||
[/^qwen-plus.*$/, LIMITS['128k']], // Standard: 128K
|
||||
[/^qwen-flash-latest$/, LIMITS['1m']],
|
||||
[/^qwen-turbo.*$/, LIMITS['128k']],
|
||||
|
||||
// -------------------
|
||||
// ByteDance Seed-OSS (512K)
|
||||
// -------------------
|
||||
[/^seed-oss.*$/, LIMITS['512k']],
|
||||
|
||||
// -------------------
|
||||
// Zhipu GLM
|
||||
// -------------------
|
||||
[/^glm-4\.5v.*$/, LIMITS['64k']],
|
||||
[/^glm-4\.5-air.*$/, LIMITS['128k']],
|
||||
[/^glm-4\.5.*$/, LIMITS['128k']],
|
||||
|
||||
// -------------------
|
||||
// DeepSeek / GPT-OSS / Kimi / Llama & Mistral examples
|
||||
// -------------------
|
||||
[/^deepseek-r1.*$/, LIMITS['128k']],
|
||||
[/^deepseek-v3(?:\.1)?.*$/, LIMITS['128k']],
|
||||
[/^kimi-k2-instruct.*$/, LIMITS['128k']],
|
||||
[/^gpt-oss.*$/, LIMITS['128k']],
|
||||
[/^llama-4-scout.*$/, LIMITS['10m'] as unknown as TokenCount], // ultra-long variants - handle carefully
|
||||
[/^mistral-large-2.*$/, LIMITS['128k']],
|
||||
];
|
||||
|
||||
/** Return the token limit for a model string (uses normalize + ordered regex list). */
|
||||
export function tokenLimit(model: Model): TokenCount {
|
||||
const norm = normalize(model);
|
||||
|
||||
for (const [regex, limit] of PATTERNS) {
|
||||
if (regex.test(norm)) {
|
||||
return limit;
|
||||
}
|
||||
}
|
||||
|
||||
// final fallback: DEFAULT_TOKEN_LIMIT (power-of-two 128K)
|
||||
return DEFAULT_TOKEN_LIMIT;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user