mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-21 09:17:53 +00:00
Fix: Enhanced OpenAI Usage Logging and Response Metadata Handling (#141)
* feat: add support for cached tokens in OpenAI usage logging * fix: enhance response metadata handling in OpenAI content generator * fix: run format
This commit is contained in:
@@ -52,6 +52,9 @@ interface OpenAIUsage {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
prompt_tokens_details?: {
|
||||
cached_tokens?: number;
|
||||
};
|
||||
}
|
||||
|
||||
interface OpenAIChoice {
|
||||
@@ -515,6 +518,8 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
||||
return new GenerateContentResponse();
|
||||
}
|
||||
|
||||
const lastResponse = responses[responses.length - 1];
|
||||
|
||||
// Find the last response with usage metadata
|
||||
const finalUsageMetadata = responses
|
||||
.slice()
|
||||
@@ -561,6 +566,8 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
||||
safetyRatings: [],
|
||||
},
|
||||
];
|
||||
combinedResponse.responseId = lastResponse?.responseId;
|
||||
combinedResponse.createTime = lastResponse?.createTime;
|
||||
combinedResponse.modelVersion = this.model;
|
||||
combinedResponse.promptFeedback = { safetyRatings: [] };
|
||||
combinedResponse.usageMetadata = finalUsageMetadata;
|
||||
@@ -1128,6 +1135,9 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
response.responseId = openaiResponse.id;
|
||||
response.createTime = openaiResponse.created.toString();
|
||||
|
||||
response.candidates = [
|
||||
{
|
||||
content: {
|
||||
@@ -1145,15 +1155,12 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
||||
|
||||
// Add usage metadata if available
|
||||
if (openaiResponse.usage) {
|
||||
const usage = openaiResponse.usage as {
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
total_tokens?: number;
|
||||
};
|
||||
const usage = openaiResponse.usage as OpenAIUsage;
|
||||
|
||||
const promptTokens = usage.prompt_tokens || 0;
|
||||
const completionTokens = usage.completion_tokens || 0;
|
||||
const totalTokens = usage.total_tokens || 0;
|
||||
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
|
||||
|
||||
// If we only have total tokens but no breakdown, estimate the split
|
||||
// Typically input is ~70% and output is ~30% for most conversations
|
||||
@@ -1170,6 +1177,7 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
||||
promptTokenCount: finalPromptTokens,
|
||||
candidatesTokenCount: finalCompletionTokens,
|
||||
totalTokenCount: totalTokens,
|
||||
cachedContentTokenCount: cachedTokens,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1263,20 +1271,20 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
||||
response.candidates = [];
|
||||
}
|
||||
|
||||
response.responseId = chunk.id;
|
||||
response.createTime = chunk.created.toString();
|
||||
|
||||
response.modelVersion = this.model;
|
||||
response.promptFeedback = { safetyRatings: [] };
|
||||
|
||||
// Add usage metadata if available in the chunk
|
||||
if (chunk.usage) {
|
||||
const usage = chunk.usage as {
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
total_tokens?: number;
|
||||
};
|
||||
const usage = chunk.usage as OpenAIUsage;
|
||||
|
||||
const promptTokens = usage.prompt_tokens || 0;
|
||||
const completionTokens = usage.completion_tokens || 0;
|
||||
const totalTokens = usage.total_tokens || 0;
|
||||
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
|
||||
|
||||
// If we only have total tokens but no breakdown, estimate the split
|
||||
// Typically input is ~70% and output is ~30% for most conversations
|
||||
@@ -1293,6 +1301,7 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
||||
promptTokenCount: finalPromptTokens,
|
||||
candidatesTokenCount: finalCompletionTokens,
|
||||
totalTokenCount: totalTokens,
|
||||
cachedContentTokenCount: cachedTokens,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1727,9 +1736,11 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
||||
}
|
||||
|
||||
const openaiResponse: OpenAIResponseFormat = {
|
||||
id: `chatcmpl-${Date.now()}`,
|
||||
id: response.responseId || `chatcmpl-${Date.now()}`,
|
||||
object: 'chat.completion',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
created: response.createTime
|
||||
? Number(response.createTime)
|
||||
: Math.floor(Date.now() / 1000),
|
||||
model: this.model,
|
||||
choices: [choice],
|
||||
};
|
||||
@@ -1741,6 +1752,12 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
||||
completion_tokens: response.usageMetadata.candidatesTokenCount || 0,
|
||||
total_tokens: response.usageMetadata.totalTokenCount || 0,
|
||||
};
|
||||
|
||||
if (response.usageMetadata.cachedContentTokenCount) {
|
||||
openaiResponse.usage.prompt_tokens_details = {
|
||||
cached_tokens: response.usageMetadata.cachedContentTokenCount,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return openaiResponse;
|
||||
|
||||
Reference in New Issue
Block a user