feat: add support for alternative cached_tokens format in OpenAI converter (#1035)

Co-authored-by: chenhuanjie <chenhuanjie@xiaohongshu.com>
This commit is contained in:
hj C
2025-11-14 18:09:33 +08:00
committed by GitHub
parent 29261c75e1
commit f0e21374c1

View File

@@ -23,6 +23,14 @@ import type OpenAI from 'openai';
import { safeJsonParse } from '../../utils/safeJsonParse.js';
import { StreamingToolCallParser } from './streamingToolCallParser.js';
/**
* Extended usage type that supports both OpenAI standard format and alternative formats
* Some models return cached_tokens at the top level instead of in prompt_tokens_details
*/
interface ExtendedCompletionUsage extends OpenAI.CompletionUsage {
cached_tokens?: number;
}
/**
* Tool call accumulator for streaming responses
*/
@@ -582,7 +590,13 @@ export class OpenAIContentConverter {
const promptTokens = usage.prompt_tokens || 0;
const completionTokens = usage.completion_tokens || 0;
const totalTokens = usage.total_tokens || 0;
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
// Support both formats: prompt_tokens_details.cached_tokens (OpenAI standard)
// and cached_tokens (some models return it at top level)
const extendedUsage = usage as ExtendedCompletionUsage;
const cachedTokens =
usage.prompt_tokens_details?.cached_tokens ??
extendedUsage.cached_tokens ??
0;
// If we only have total tokens but no breakdown, estimate the split
// Typically input is ~70% and output is ~30% for most conversations
@@ -707,7 +721,13 @@ export class OpenAIContentConverter {
const promptTokens = usage.prompt_tokens || 0;
const completionTokens = usage.completion_tokens || 0;
const totalTokens = usage.total_tokens || 0;
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
// Support both formats: prompt_tokens_details.cached_tokens (OpenAI standard)
// and cached_tokens (some models return it at top level)
const extendedUsage = usage as ExtendedCompletionUsage;
const cachedTokens =
usage.prompt_tokens_details?.cached_tokens ??
extendedUsage.cached_tokens ??
0;
// If we only have total tokens but no breakdown, estimate the split
// Typically input is ~70% and output is ~30% for most conversations