fix: missing tool call chunks for openai logging (#657)

This commit is contained in:
Mingholy
2025-09-19 15:19:30 +08:00
committed by GitHub
parent 5f68a8b6b3
commit da0863b943
2 changed files with 162 additions and 9 deletions

View File

@@ -1105,5 +1105,164 @@ describe('ContentGenerationPipeline', () => {
expect.any(Array),
);
});
it('should collect all OpenAI chunks for logging even when Gemini responses are filtered', async () => {
// Create chunks that would produce empty Gemini responses (partial tool calls)
const partialToolCallChunk1: OpenAI.Chat.ChatCompletionChunk = {
id: 'chunk-1',
object: 'chat.completion.chunk',
created: Date.now(),
model: 'test-model',
choices: [
{
index: 0,
delta: {
tool_calls: [
{
index: 0,
id: 'call_123',
type: 'function',
function: { name: 'test_function', arguments: '{"par' },
},
],
},
finish_reason: null,
},
],
};
const partialToolCallChunk2: OpenAI.Chat.ChatCompletionChunk = {
id: 'chunk-2',
object: 'chat.completion.chunk',
created: Date.now(),
model: 'test-model',
choices: [
{
index: 0,
delta: {
tool_calls: [
{
index: 0,
function: { arguments: 'am": "value"}' },
},
],
},
finish_reason: null,
},
],
};
const finishChunk: OpenAI.Chat.ChatCompletionChunk = {
id: 'chunk-3',
object: 'chat.completion.chunk',
created: Date.now(),
model: 'test-model',
choices: [
{
index: 0,
delta: {},
finish_reason: 'tool_calls',
},
],
};
// Mock empty Gemini responses for partial chunks (they get filtered)
const emptyGeminiResponse1 = new GenerateContentResponse();
emptyGeminiResponse1.candidates = [
{
content: { parts: [], role: 'model' },
index: 0,
safetyRatings: [],
},
];
const emptyGeminiResponse2 = new GenerateContentResponse();
emptyGeminiResponse2.candidates = [
{
content: { parts: [], role: 'model' },
index: 0,
safetyRatings: [],
},
];
// Mock final Gemini response with tool call
const finalGeminiResponse = new GenerateContentResponse();
finalGeminiResponse.candidates = [
{
content: {
parts: [
{
functionCall: {
id: 'call_123',
name: 'test_function',
args: { param: 'value' },
},
},
],
role: 'model',
},
finishReason: FinishReason.STOP,
index: 0,
safetyRatings: [],
},
];
// Setup converter mocks
(mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue([
{ role: 'user', content: 'test' },
]);
(mockConverter.convertOpenAIChunkToGemini as Mock)
.mockReturnValueOnce(emptyGeminiResponse1) // First partial chunk -> empty response
.mockReturnValueOnce(emptyGeminiResponse2) // Second partial chunk -> empty response
.mockReturnValueOnce(finalGeminiResponse); // Finish chunk -> complete response
// Mock stream
const mockStream = {
async *[Symbol.asyncIterator]() {
yield partialToolCallChunk1;
yield partialToolCallChunk2;
yield finishChunk;
},
};
(mockClient.chat.completions.create as Mock).mockResolvedValue(
mockStream,
);
const request: GenerateContentParameters = {
model: 'test-model',
contents: [{ role: 'user', parts: [{ text: 'test' }] }],
};
// Collect responses
const responses: GenerateContentResponse[] = [];
const resultGenerator = await pipeline.executeStream(
request,
'test-prompt-id',
);
for await (const response of resultGenerator) {
responses.push(response);
}
// Should only yield the final response (empty ones are filtered)
expect(responses).toHaveLength(1);
expect(responses[0]).toBe(finalGeminiResponse);
// Verify telemetry was called with ALL OpenAI chunks, including the filtered ones
expect(mockTelemetryService.logStreamingSuccess).toHaveBeenCalledWith(
expect.objectContaining({
model: 'test-model',
duration: expect.any(Number),
userPromptId: 'test-prompt-id',
authType: 'openai',
}),
[finalGeminiResponse], // Only the non-empty Gemini response
expect.objectContaining({
model: 'test-model',
messages: [{ role: 'user', content: 'test' }],
}),
[partialToolCallChunk1, partialToolCallChunk2, finishChunk], // ALL OpenAI chunks
);
});
});
});

View File

@@ -118,6 +118,9 @@ export class ContentGenerationPipeline {
try {
// Stage 2a: Convert and yield each chunk while preserving original
for await (const chunk of stream) {
// Always collect OpenAI chunks for logging, regardless of Gemini conversion result
collectedOpenAIChunks.push(chunk);
const response = this.converter.convertOpenAIChunkToGemini(chunk);
// Stage 2b: Filter empty responses to avoid downstream issues
@@ -132,9 +135,7 @@ export class ContentGenerationPipeline {
// Stage 2c: Handle chunk merging for providers that send finishReason and usageMetadata separately
const shouldYield = this.handleChunkMerging(
response,
chunk,
collectedGeminiResponses,
collectedOpenAIChunks,
(mergedResponse) => {
pendingFinishResponse = mergedResponse;
},
@@ -182,17 +183,13 @@ export class ContentGenerationPipeline {
* finishReason and the most up-to-date usage information from any provider pattern.
*
* @param response Current Gemini response
* @param chunk Current OpenAI chunk
* @param collectedGeminiResponses Array to collect responses for logging
* @param collectedOpenAIChunks Array to collect chunks for logging
* @param setPendingFinish Callback to set pending finish response
* @returns true if the response should be yielded, false if it should be held for merging
*/
private handleChunkMerging(
response: GenerateContentResponse,
chunk: OpenAI.Chat.ChatCompletionChunk,
collectedGeminiResponses: GenerateContentResponse[],
collectedOpenAIChunks: OpenAI.Chat.ChatCompletionChunk[],
setPendingFinish: (response: GenerateContentResponse) => void,
): boolean {
const isFinishChunk = response.candidates?.[0]?.finishReason;
@@ -206,7 +203,6 @@ export class ContentGenerationPipeline {
if (isFinishChunk) {
// This is a finish reason chunk
collectedGeminiResponses.push(response);
collectedOpenAIChunks.push(chunk);
setPendingFinish(response);
return false; // Don't yield yet, wait for potential subsequent chunks to merge
} else if (hasPendingFinish) {
@@ -228,7 +224,6 @@ export class ContentGenerationPipeline {
// Update the collected responses with the merged response
collectedGeminiResponses[collectedGeminiResponses.length - 1] =
mergedResponse;
collectedOpenAIChunks.push(chunk);
setPendingFinish(mergedResponse);
return true; // Yield the merged response
@@ -236,7 +231,6 @@ export class ContentGenerationPipeline {
// Normal chunk - collect and yield
collectedGeminiResponses.push(response);
collectedOpenAIChunks.push(chunk);
return true;
}