mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
fix: missing tool call chunks for openai logging (#657)
This commit is contained in:
@@ -1105,5 +1105,164 @@ describe('ContentGenerationPipeline', () => {
|
||||
expect.any(Array),
|
||||
);
|
||||
});
|
||||
|
||||
it('should collect all OpenAI chunks for logging even when Gemini responses are filtered', async () => {
|
||||
// Create chunks that would produce empty Gemini responses (partial tool calls)
|
||||
const partialToolCallChunk1: OpenAI.Chat.ChatCompletionChunk = {
|
||||
id: 'chunk-1',
|
||||
object: 'chat.completion.chunk',
|
||||
created: Date.now(),
|
||||
model: 'test-model',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
tool_calls: [
|
||||
{
|
||||
index: 0,
|
||||
id: 'call_123',
|
||||
type: 'function',
|
||||
function: { name: 'test_function', arguments: '{"par' },
|
||||
},
|
||||
],
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const partialToolCallChunk2: OpenAI.Chat.ChatCompletionChunk = {
|
||||
id: 'chunk-2',
|
||||
object: 'chat.completion.chunk',
|
||||
created: Date.now(),
|
||||
model: 'test-model',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
tool_calls: [
|
||||
{
|
||||
index: 0,
|
||||
function: { arguments: 'am": "value"}' },
|
||||
},
|
||||
],
|
||||
},
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const finishChunk: OpenAI.Chat.ChatCompletionChunk = {
|
||||
id: 'chunk-3',
|
||||
object: 'chat.completion.chunk',
|
||||
created: Date.now(),
|
||||
model: 'test-model',
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {},
|
||||
finish_reason: 'tool_calls',
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
// Mock empty Gemini responses for partial chunks (they get filtered)
|
||||
const emptyGeminiResponse1 = new GenerateContentResponse();
|
||||
emptyGeminiResponse1.candidates = [
|
||||
{
|
||||
content: { parts: [], role: 'model' },
|
||||
index: 0,
|
||||
safetyRatings: [],
|
||||
},
|
||||
];
|
||||
|
||||
const emptyGeminiResponse2 = new GenerateContentResponse();
|
||||
emptyGeminiResponse2.candidates = [
|
||||
{
|
||||
content: { parts: [], role: 'model' },
|
||||
index: 0,
|
||||
safetyRatings: [],
|
||||
},
|
||||
];
|
||||
|
||||
// Mock final Gemini response with tool call
|
||||
const finalGeminiResponse = new GenerateContentResponse();
|
||||
finalGeminiResponse.candidates = [
|
||||
{
|
||||
content: {
|
||||
parts: [
|
||||
{
|
||||
functionCall: {
|
||||
id: 'call_123',
|
||||
name: 'test_function',
|
||||
args: { param: 'value' },
|
||||
},
|
||||
},
|
||||
],
|
||||
role: 'model',
|
||||
},
|
||||
finishReason: FinishReason.STOP,
|
||||
index: 0,
|
||||
safetyRatings: [],
|
||||
},
|
||||
];
|
||||
|
||||
// Setup converter mocks
|
||||
(mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue([
|
||||
{ role: 'user', content: 'test' },
|
||||
]);
|
||||
(mockConverter.convertOpenAIChunkToGemini as Mock)
|
||||
.mockReturnValueOnce(emptyGeminiResponse1) // First partial chunk -> empty response
|
||||
.mockReturnValueOnce(emptyGeminiResponse2) // Second partial chunk -> empty response
|
||||
.mockReturnValueOnce(finalGeminiResponse); // Finish chunk -> complete response
|
||||
|
||||
// Mock stream
|
||||
const mockStream = {
|
||||
async *[Symbol.asyncIterator]() {
|
||||
yield partialToolCallChunk1;
|
||||
yield partialToolCallChunk2;
|
||||
yield finishChunk;
|
||||
},
|
||||
};
|
||||
|
||||
(mockClient.chat.completions.create as Mock).mockResolvedValue(
|
||||
mockStream,
|
||||
);
|
||||
|
||||
const request: GenerateContentParameters = {
|
||||
model: 'test-model',
|
||||
contents: [{ role: 'user', parts: [{ text: 'test' }] }],
|
||||
};
|
||||
|
||||
// Collect responses
|
||||
const responses: GenerateContentResponse[] = [];
|
||||
const resultGenerator = await pipeline.executeStream(
|
||||
request,
|
||||
'test-prompt-id',
|
||||
);
|
||||
for await (const response of resultGenerator) {
|
||||
responses.push(response);
|
||||
}
|
||||
|
||||
// Should only yield the final response (empty ones are filtered)
|
||||
expect(responses).toHaveLength(1);
|
||||
expect(responses[0]).toBe(finalGeminiResponse);
|
||||
|
||||
// Verify telemetry was called with ALL OpenAI chunks, including the filtered ones
|
||||
expect(mockTelemetryService.logStreamingSuccess).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: 'test-model',
|
||||
duration: expect.any(Number),
|
||||
userPromptId: 'test-prompt-id',
|
||||
authType: 'openai',
|
||||
}),
|
||||
[finalGeminiResponse], // Only the non-empty Gemini response
|
||||
expect.objectContaining({
|
||||
model: 'test-model',
|
||||
messages: [{ role: 'user', content: 'test' }],
|
||||
}),
|
||||
[partialToolCallChunk1, partialToolCallChunk2, finishChunk], // ALL OpenAI chunks
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -118,6 +118,9 @@ export class ContentGenerationPipeline {
|
||||
try {
|
||||
// Stage 2a: Convert and yield each chunk while preserving original
|
||||
for await (const chunk of stream) {
|
||||
// Always collect OpenAI chunks for logging, regardless of Gemini conversion result
|
||||
collectedOpenAIChunks.push(chunk);
|
||||
|
||||
const response = this.converter.convertOpenAIChunkToGemini(chunk);
|
||||
|
||||
// Stage 2b: Filter empty responses to avoid downstream issues
|
||||
@@ -132,9 +135,7 @@ export class ContentGenerationPipeline {
|
||||
// Stage 2c: Handle chunk merging for providers that send finishReason and usageMetadata separately
|
||||
const shouldYield = this.handleChunkMerging(
|
||||
response,
|
||||
chunk,
|
||||
collectedGeminiResponses,
|
||||
collectedOpenAIChunks,
|
||||
(mergedResponse) => {
|
||||
pendingFinishResponse = mergedResponse;
|
||||
},
|
||||
@@ -182,17 +183,13 @@ export class ContentGenerationPipeline {
|
||||
* finishReason and the most up-to-date usage information from any provider pattern.
|
||||
*
|
||||
* @param response Current Gemini response
|
||||
* @param chunk Current OpenAI chunk
|
||||
* @param collectedGeminiResponses Array to collect responses for logging
|
||||
* @param collectedOpenAIChunks Array to collect chunks for logging
|
||||
* @param setPendingFinish Callback to set pending finish response
|
||||
* @returns true if the response should be yielded, false if it should be held for merging
|
||||
*/
|
||||
private handleChunkMerging(
|
||||
response: GenerateContentResponse,
|
||||
chunk: OpenAI.Chat.ChatCompletionChunk,
|
||||
collectedGeminiResponses: GenerateContentResponse[],
|
||||
collectedOpenAIChunks: OpenAI.Chat.ChatCompletionChunk[],
|
||||
setPendingFinish: (response: GenerateContentResponse) => void,
|
||||
): boolean {
|
||||
const isFinishChunk = response.candidates?.[0]?.finishReason;
|
||||
@@ -206,7 +203,6 @@ export class ContentGenerationPipeline {
|
||||
if (isFinishChunk) {
|
||||
// This is a finish reason chunk
|
||||
collectedGeminiResponses.push(response);
|
||||
collectedOpenAIChunks.push(chunk);
|
||||
setPendingFinish(response);
|
||||
return false; // Don't yield yet, wait for potential subsequent chunks to merge
|
||||
} else if (hasPendingFinish) {
|
||||
@@ -228,7 +224,6 @@ export class ContentGenerationPipeline {
|
||||
// Update the collected responses with the merged response
|
||||
collectedGeminiResponses[collectedGeminiResponses.length - 1] =
|
||||
mergedResponse;
|
||||
collectedOpenAIChunks.push(chunk);
|
||||
|
||||
setPendingFinish(mergedResponse);
|
||||
return true; // Yield the merged response
|
||||
@@ -236,7 +231,6 @@ export class ContentGenerationPipeline {
|
||||
|
||||
// Normal chunk - collect and yield
|
||||
collectedGeminiResponses.push(response);
|
||||
collectedOpenAIChunks.push(chunk);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user