mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-21 01:07:46 +00:00
Fix: Enhanced OpenAI Usage Logging and Response Metadata Handling (#141)
* feat: add support for cached tokens in OpenAI usage logging * fix: enhance response metadata handling in OpenAI content generator * fix: run format
This commit is contained in:
@@ -52,6 +52,9 @@ interface OpenAIUsage {
|
|||||||
prompt_tokens: number;
|
prompt_tokens: number;
|
||||||
completion_tokens: number;
|
completion_tokens: number;
|
||||||
total_tokens: number;
|
total_tokens: number;
|
||||||
|
prompt_tokens_details?: {
|
||||||
|
cached_tokens?: number;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
interface OpenAIChoice {
|
interface OpenAIChoice {
|
||||||
@@ -515,6 +518,8 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
|||||||
return new GenerateContentResponse();
|
return new GenerateContentResponse();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const lastResponse = responses[responses.length - 1];
|
||||||
|
|
||||||
// Find the last response with usage metadata
|
// Find the last response with usage metadata
|
||||||
const finalUsageMetadata = responses
|
const finalUsageMetadata = responses
|
||||||
.slice()
|
.slice()
|
||||||
@@ -561,6 +566,8 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
|||||||
safetyRatings: [],
|
safetyRatings: [],
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
combinedResponse.responseId = lastResponse?.responseId;
|
||||||
|
combinedResponse.createTime = lastResponse?.createTime;
|
||||||
combinedResponse.modelVersion = this.model;
|
combinedResponse.modelVersion = this.model;
|
||||||
combinedResponse.promptFeedback = { safetyRatings: [] };
|
combinedResponse.promptFeedback = { safetyRatings: [] };
|
||||||
combinedResponse.usageMetadata = finalUsageMetadata;
|
combinedResponse.usageMetadata = finalUsageMetadata;
|
||||||
@@ -1128,6 +1135,9 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
response.responseId = openaiResponse.id;
|
||||||
|
response.createTime = openaiResponse.created.toString();
|
||||||
|
|
||||||
response.candidates = [
|
response.candidates = [
|
||||||
{
|
{
|
||||||
content: {
|
content: {
|
||||||
@@ -1145,15 +1155,12 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
|||||||
|
|
||||||
// Add usage metadata if available
|
// Add usage metadata if available
|
||||||
if (openaiResponse.usage) {
|
if (openaiResponse.usage) {
|
||||||
const usage = openaiResponse.usage as {
|
const usage = openaiResponse.usage as OpenAIUsage;
|
||||||
prompt_tokens?: number;
|
|
||||||
completion_tokens?: number;
|
|
||||||
total_tokens?: number;
|
|
||||||
};
|
|
||||||
|
|
||||||
const promptTokens = usage.prompt_tokens || 0;
|
const promptTokens = usage.prompt_tokens || 0;
|
||||||
const completionTokens = usage.completion_tokens || 0;
|
const completionTokens = usage.completion_tokens || 0;
|
||||||
const totalTokens = usage.total_tokens || 0;
|
const totalTokens = usage.total_tokens || 0;
|
||||||
|
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
|
||||||
|
|
||||||
// If we only have total tokens but no breakdown, estimate the split
|
// If we only have total tokens but no breakdown, estimate the split
|
||||||
// Typically input is ~70% and output is ~30% for most conversations
|
// Typically input is ~70% and output is ~30% for most conversations
|
||||||
@@ -1170,6 +1177,7 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
|||||||
promptTokenCount: finalPromptTokens,
|
promptTokenCount: finalPromptTokens,
|
||||||
candidatesTokenCount: finalCompletionTokens,
|
candidatesTokenCount: finalCompletionTokens,
|
||||||
totalTokenCount: totalTokens,
|
totalTokenCount: totalTokens,
|
||||||
|
cachedContentTokenCount: cachedTokens,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1263,20 +1271,20 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
|||||||
response.candidates = [];
|
response.candidates = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
response.responseId = chunk.id;
|
||||||
|
response.createTime = chunk.created.toString();
|
||||||
|
|
||||||
response.modelVersion = this.model;
|
response.modelVersion = this.model;
|
||||||
response.promptFeedback = { safetyRatings: [] };
|
response.promptFeedback = { safetyRatings: [] };
|
||||||
|
|
||||||
// Add usage metadata if available in the chunk
|
// Add usage metadata if available in the chunk
|
||||||
if (chunk.usage) {
|
if (chunk.usage) {
|
||||||
const usage = chunk.usage as {
|
const usage = chunk.usage as OpenAIUsage;
|
||||||
prompt_tokens?: number;
|
|
||||||
completion_tokens?: number;
|
|
||||||
total_tokens?: number;
|
|
||||||
};
|
|
||||||
|
|
||||||
const promptTokens = usage.prompt_tokens || 0;
|
const promptTokens = usage.prompt_tokens || 0;
|
||||||
const completionTokens = usage.completion_tokens || 0;
|
const completionTokens = usage.completion_tokens || 0;
|
||||||
const totalTokens = usage.total_tokens || 0;
|
const totalTokens = usage.total_tokens || 0;
|
||||||
|
const cachedTokens = usage.prompt_tokens_details?.cached_tokens || 0;
|
||||||
|
|
||||||
// If we only have total tokens but no breakdown, estimate the split
|
// If we only have total tokens but no breakdown, estimate the split
|
||||||
// Typically input is ~70% and output is ~30% for most conversations
|
// Typically input is ~70% and output is ~30% for most conversations
|
||||||
@@ -1293,6 +1301,7 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
|||||||
promptTokenCount: finalPromptTokens,
|
promptTokenCount: finalPromptTokens,
|
||||||
candidatesTokenCount: finalCompletionTokens,
|
candidatesTokenCount: finalCompletionTokens,
|
||||||
totalTokenCount: totalTokens,
|
totalTokenCount: totalTokens,
|
||||||
|
cachedContentTokenCount: cachedTokens,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1727,9 +1736,11 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const openaiResponse: OpenAIResponseFormat = {
|
const openaiResponse: OpenAIResponseFormat = {
|
||||||
id: `chatcmpl-${Date.now()}`,
|
id: response.responseId || `chatcmpl-${Date.now()}`,
|
||||||
object: 'chat.completion',
|
object: 'chat.completion',
|
||||||
created: Math.floor(Date.now() / 1000),
|
created: response.createTime
|
||||||
|
? Number(response.createTime)
|
||||||
|
: Math.floor(Date.now() / 1000),
|
||||||
model: this.model,
|
model: this.model,
|
||||||
choices: [choice],
|
choices: [choice],
|
||||||
};
|
};
|
||||||
@@ -1741,6 +1752,12 @@ export class OpenAIContentGenerator implements ContentGenerator {
|
|||||||
completion_tokens: response.usageMetadata.candidatesTokenCount || 0,
|
completion_tokens: response.usageMetadata.candidatesTokenCount || 0,
|
||||||
total_tokens: response.usageMetadata.totalTokenCount || 0,
|
total_tokens: response.usageMetadata.totalTokenCount || 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (response.usageMetadata.cachedContentTokenCount) {
|
||||||
|
openaiResponse.usage.prompt_tokens_details = {
|
||||||
|
cached_tokens: response.usageMetadata.cachedContentTokenCount,
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return openaiResponse;
|
return openaiResponse;
|
||||||
|
|||||||
Reference in New Issue
Block a user