mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
feat: add support for alternative cached_tokens format in OpenAI converter (#1035)
Co-authored-by: chenhuanjie <chenhuanjie@xiaohongshu.com>
This commit is contained in:
@@ -23,6 +23,14 @@ import type OpenAI from 'openai';
|
|||||||
import { safeJsonParse } from '../../utils/safeJsonParse.js';
|
import { safeJsonParse } from '../../utils/safeJsonParse.js';
|
||||||
import { StreamingToolCallParser } from './streamingToolCallParser.js';
|
import { StreamingToolCallParser } from './streamingToolCallParser.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extended usage type that supports both OpenAI standard format and alternative formats
|
||||||
|
* Some models return cached_tokens at the top level instead of in prompt_tokens_details
|
||||||
|
*/
|
||||||
|
interface ExtendedCompletionUsage extends OpenAI.CompletionUsage {
|
||||||
|
cached_tokens?: number;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tool call accumulator for streaming responses
|
* Tool call accumulator for streaming responses
|
||||||
*/
|
*/
|
||||||
@@ -582,7 +590,13 @@ export class OpenAIContentConverter {
|
|||||||
const promptTokens = usage.prompt_tokens || 0;
|
const promptTokens = usage.prompt_tokens || 0;
|
||||||
const completionTokens = usage.completion_tokens || 0;
|
const completionTokens = usage.completion_tokens || 0;
|
||||||
const totalTokens = usage.total_tokens || 0;
|
const totalTokens = usage.total_tokens || 0;
|
||||||
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
|
// Support both formats: prompt_tokens_details.cached_tokens (OpenAI standard)
|
||||||
|
// and cached_tokens (some models return it at top level)
|
||||||
|
const extendedUsage = usage as ExtendedCompletionUsage;
|
||||||
|
const cachedTokens =
|
||||||
|
usage.prompt_tokens_details?.cached_tokens ??
|
||||||
|
extendedUsage.cached_tokens ??
|
||||||
|
0;
|
||||||
|
|
||||||
// If we only have total tokens but no breakdown, estimate the split
|
// If we only have total tokens but no breakdown, estimate the split
|
||||||
// Typically input is ~70% and output is ~30% for most conversations
|
// Typically input is ~70% and output is ~30% for most conversations
|
||||||
@@ -707,7 +721,13 @@ export class OpenAIContentConverter {
|
|||||||
const promptTokens = usage.prompt_tokens || 0;
|
const promptTokens = usage.prompt_tokens || 0;
|
||||||
const completionTokens = usage.completion_tokens || 0;
|
const completionTokens = usage.completion_tokens || 0;
|
||||||
const totalTokens = usage.total_tokens || 0;
|
const totalTokens = usage.total_tokens || 0;
|
||||||
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
|
// Support both formats: prompt_tokens_details.cached_tokens (OpenAI standard)
|
||||||
|
// and cached_tokens (some models return it at top level)
|
||||||
|
const extendedUsage = usage as ExtendedCompletionUsage;
|
||||||
|
const cachedTokens =
|
||||||
|
usage.prompt_tokens_details?.cached_tokens ??
|
||||||
|
extendedUsage.cached_tokens ??
|
||||||
|
0;
|
||||||
|
|
||||||
// If we only have total tokens but no breakdown, estimate the split
|
// If we only have total tokens but no breakdown, estimate the split
|
||||||
// Typically input is ~70% and output is ~30% for most conversations
|
// Typically input is ~70% and output is ~30% for most conversations
|
||||||
|
|||||||
Reference in New Issue
Block a user