Fix: Enhanced OpenAI Usage Logging and Response Metadata Handling (#141)

* feat: add support for cached tokens in OpenAI usage logging

* fix: enhance response metadata handling in OpenAI content generator

* fix: run format
This commit is contained in:
tanzhenxin
2025-07-30 16:28:10 +08:00
committed by GitHub
parent 8e3b413fdd
commit a08bcb2f41

View File

@@ -52,6 +52,9 @@ interface OpenAIUsage {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
prompt_tokens_details?: {
cached_tokens?: number;
};
}
interface OpenAIChoice {
@@ -515,6 +518,8 @@ export class OpenAIContentGenerator implements ContentGenerator {
return new GenerateContentResponse();
}
const lastResponse = responses[responses.length - 1];
// Find the last response with usage metadata
const finalUsageMetadata = responses
.slice()
@@ -561,6 +566,8 @@ export class OpenAIContentGenerator implements ContentGenerator {
safetyRatings: [],
},
];
combinedResponse.responseId = lastResponse?.responseId;
combinedResponse.createTime = lastResponse?.createTime;
combinedResponse.modelVersion = this.model;
combinedResponse.promptFeedback = { safetyRatings: [] };
combinedResponse.usageMetadata = finalUsageMetadata;
@@ -1128,6 +1135,9 @@ export class OpenAIContentGenerator implements ContentGenerator {
}
}
response.responseId = openaiResponse.id;
response.createTime = openaiResponse.created.toString();
response.candidates = [
{
content: {
@@ -1145,15 +1155,12 @@ export class OpenAIContentGenerator implements ContentGenerator {
// Add usage metadata if available
if (openaiResponse.usage) {
const usage = openaiResponse.usage as {
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
};
const usage = openaiResponse.usage as OpenAIUsage;
const promptTokens = usage.prompt_tokens || 0;
const completionTokens = usage.completion_tokens || 0;
const totalTokens = usage.total_tokens || 0;
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
// If we only have total tokens but no breakdown, estimate the split
// Typically input is ~70% and output is ~30% for most conversations
@@ -1170,6 +1177,7 @@ export class OpenAIContentGenerator implements ContentGenerator {
promptTokenCount: finalPromptTokens,
candidatesTokenCount: finalCompletionTokens,
totalTokenCount: totalTokens,
cachedContentTokenCount: cachedTokens,
};
}
@@ -1263,20 +1271,20 @@ export class OpenAIContentGenerator implements ContentGenerator {
response.candidates = [];
}
response.responseId = chunk.id;
response.createTime = chunk.created.toString();
response.modelVersion = this.model;
response.promptFeedback = { safetyRatings: [] };
// Add usage metadata if available in the chunk
if (chunk.usage) {
const usage = chunk.usage as {
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
};
const usage = chunk.usage as OpenAIUsage;
const promptTokens = usage.prompt_tokens || 0;
const completionTokens = usage.completion_tokens || 0;
const totalTokens = usage.total_tokens || 0;
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
// If we only have total tokens but no breakdown, estimate the split
// Typically input is ~70% and output is ~30% for most conversations
@@ -1293,6 +1301,7 @@ export class OpenAIContentGenerator implements ContentGenerator {
promptTokenCount: finalPromptTokens,
candidatesTokenCount: finalCompletionTokens,
totalTokenCount: totalTokens,
cachedContentTokenCount: cachedTokens,
};
}
@@ -1727,9 +1736,11 @@ export class OpenAIContentGenerator implements ContentGenerator {
}
const openaiResponse: OpenAIResponseFormat = {
id: `chatcmpl-${Date.now()}`,
id: response.responseId || `chatcmpl-${Date.now()}`,
object: 'chat.completion',
created: Math.floor(Date.now() / 1000),
created: response.createTime
? Number(response.createTime)
: Math.floor(Date.now() / 1000),
model: this.model,
choices: [choice],
};
@@ -1741,6 +1752,12 @@ export class OpenAIContentGenerator implements ContentGenerator {
completion_tokens: response.usageMetadata.candidatesTokenCount || 0,
total_tokens: response.usageMetadata.totalTokenCount || 0,
};
if (response.usageMetadata.cachedContentTokenCount) {
openaiResponse.usage.prompt_tokens_details = {
cached_tokens: response.usageMetadata.cachedContentTokenCount,
};
}
}
return openaiResponse;