From a08bcb2f414e91fa0a9706776b32248e271668ee Mon Sep 17 00:00:00 2001 From: tanzhenxin Date: Wed, 30 Jul 2025 16:28:10 +0800 Subject: [PATCH] Fix: Enhanced OpenAI Usage Logging and Response Metadata Handling (#141) * feat: add support for cached tokens in OpenAI usage logging * fix: enhance response metadata handling in OpenAI content generator * fix: run format --- .../core/src/core/openaiContentGenerator.ts | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/packages/core/src/core/openaiContentGenerator.ts b/packages/core/src/core/openaiContentGenerator.ts index 9d1e1229..de46dbff 100644 --- a/packages/core/src/core/openaiContentGenerator.ts +++ b/packages/core/src/core/openaiContentGenerator.ts @@ -52,6 +52,9 @@ interface OpenAIUsage { prompt_tokens: number; completion_tokens: number; total_tokens: number; + prompt_tokens_details?: { + cached_tokens?: number; + }; } interface OpenAIChoice { @@ -515,6 +518,8 @@ export class OpenAIContentGenerator implements ContentGenerator { return new GenerateContentResponse(); } + const lastResponse = responses[responses.length - 1]; + // Find the last response with usage metadata const finalUsageMetadata = responses .slice() @@ -561,6 +566,8 @@ export class OpenAIContentGenerator implements ContentGenerator { safetyRatings: [], }, ]; + combinedResponse.responseId = lastResponse?.responseId; + combinedResponse.createTime = lastResponse?.createTime; combinedResponse.modelVersion = this.model; combinedResponse.promptFeedback = { safetyRatings: [] }; combinedResponse.usageMetadata = finalUsageMetadata; @@ -1128,6 +1135,9 @@ export class OpenAIContentGenerator implements ContentGenerator { } } + response.responseId = openaiResponse.id; + response.createTime = openaiResponse.created.toString(); + response.candidates = [ { content: { @@ -1145,15 +1155,12 @@ export class OpenAIContentGenerator implements ContentGenerator { // Add usage metadata if available if (openaiResponse.usage) { - const usage = openaiResponse.usage as { - prompt_tokens?: number; - completion_tokens?: number; - total_tokens?: number; - }; + const usage = openaiResponse.usage as OpenAIUsage; const promptTokens = usage.prompt_tokens || 0; const completionTokens = usage.completion_tokens || 0; const totalTokens = usage.total_tokens || 0; + const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0; // If we only have total tokens but no breakdown, estimate the split // Typically input is ~70% and output is ~30% for most conversations @@ -1170,6 +1177,7 @@ export class OpenAIContentGenerator implements ContentGenerator { promptTokenCount: finalPromptTokens, candidatesTokenCount: finalCompletionTokens, totalTokenCount: totalTokens, + cachedContentTokenCount: cachedTokens, }; } @@ -1263,20 +1271,20 @@ export class OpenAIContentGenerator implements ContentGenerator { response.candidates = []; } + response.responseId = chunk.id; + response.createTime = chunk.created.toString(); + response.modelVersion = this.model; response.promptFeedback = { safetyRatings: [] }; // Add usage metadata if available in the chunk if (chunk.usage) { - const usage = chunk.usage as { - prompt_tokens?: number; - completion_tokens?: number; - total_tokens?: number; - }; + const usage = chunk.usage as OpenAIUsage; const promptTokens = usage.prompt_tokens || 0; const completionTokens = usage.completion_tokens || 0; const totalTokens = usage.total_tokens || 0; + const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0; // If we only have total tokens but no breakdown, estimate the split // Typically input is ~70% and output is ~30% for most conversations @@ -1293,6 +1301,7 @@ export class OpenAIContentGenerator implements ContentGenerator { promptTokenCount: finalPromptTokens, candidatesTokenCount: finalCompletionTokens, totalTokenCount: totalTokens, + cachedContentTokenCount: cachedTokens, }; } @@ -1727,9 +1736,11 @@ export class OpenAIContentGenerator implements ContentGenerator { } const openaiResponse: OpenAIResponseFormat = { - id: `chatcmpl-${Date.now()}`, + id: response.responseId || `chatcmpl-${Date.now()}`, object: 'chat.completion', - created: Math.floor(Date.now() / 1000), + created: response.createTime + ? Number(response.createTime) + : Math.floor(Date.now() / 1000), model: this.model, choices: [choice], }; @@ -1741,6 +1752,12 @@ export class OpenAIContentGenerator implements ContentGenerator { completion_tokens: response.usageMetadata.candidatesTokenCount || 0, total_tokens: response.usageMetadata.totalTokenCount || 0, }; + + if (response.usageMetadata.cachedContentTokenCount) { + openaiResponse.usage.prompt_tokens_details = { + cached_tokens: response.usageMetadata.cachedContentTokenCount, + }; + } } return openaiResponse;