mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
fix: relax chunk validation to avoid unnecessary retry (#584)
* fix: relax chunk validation to avoid unnecessary retry * fix: merge tail chunks for stable finishReason and usageMetadata
This commit is contained in:
@@ -50,9 +50,13 @@ const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = {
|
|||||||
};
|
};
|
||||||
/**
|
/**
|
||||||
* Returns true if the response is valid, false otherwise.
|
* Returns true if the response is valid, false otherwise.
|
||||||
|
*
|
||||||
|
* The DashScope provider may return the last 2 chunks as:
|
||||||
|
* 1. A choice(candidate) with finishReason and empty content
|
||||||
|
* 2. Empty choices with usage metadata
|
||||||
|
* We'll check separately for both of these cases.
|
||||||
*/
|
*/
|
||||||
function isValidResponse(response: GenerateContentResponse): boolean {
|
function isValidResponse(response: GenerateContentResponse): boolean {
|
||||||
// The Dashscope provider returns empty content with usage metadata at the end of the stream
|
|
||||||
if (response.usageMetadata) {
|
if (response.usageMetadata) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -61,6 +65,10 @@ function isValidResponse(response: GenerateContentResponse): boolean {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (response.candidates.some((candidate) => candidate.finishReason)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
const content = response.candidates[0]?.content;
|
const content = response.candidates[0]?.content;
|
||||||
return content !== undefined && isValidContent(content);
|
return content !== undefined && isValidContent(content);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import {
|
|||||||
ContentListUnion,
|
ContentListUnion,
|
||||||
ContentUnion,
|
ContentUnion,
|
||||||
PartUnion,
|
PartUnion,
|
||||||
|
Candidate,
|
||||||
} from '@google/genai';
|
} from '@google/genai';
|
||||||
import OpenAI from 'openai';
|
import OpenAI from 'openai';
|
||||||
import { safeJsonParse } from '../../utils/safeJsonParse.js';
|
import { safeJsonParse } from '../../utils/safeJsonParse.js';
|
||||||
@@ -652,19 +653,21 @@ export class OpenAIContentConverter {
|
|||||||
this.streamingToolCallParser.reset();
|
this.streamingToolCallParser.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
response.candidates = [
|
// Only include finishReason key if finish_reason is present
|
||||||
{
|
const candidate: Candidate = {
|
||||||
content: {
|
content: {
|
||||||
parts,
|
parts,
|
||||||
role: 'model' as const,
|
role: 'model' as const,
|
||||||
},
|
},
|
||||||
finishReason: choice.finish_reason
|
|
||||||
? this.mapOpenAIFinishReasonToGemini(choice.finish_reason)
|
|
||||||
: FinishReason.FINISH_REASON_UNSPECIFIED,
|
|
||||||
index: 0,
|
index: 0,
|
||||||
safetyRatings: [],
|
safetyRatings: [],
|
||||||
},
|
};
|
||||||
];
|
if (choice.finish_reason) {
|
||||||
|
candidate.finishReason = this.mapOpenAIFinishReasonToGemini(
|
||||||
|
choice.finish_reason,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
response.candidates = [candidate];
|
||||||
} else {
|
} else {
|
||||||
response.candidates = [];
|
response.candidates = [];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,20 +4,21 @@
|
|||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { describe, it, expect, beforeEach, vi, Mock } from 'vitest';
|
import { describe, it, expect, beforeEach, vi, type Mock } from 'vitest';
|
||||||
import OpenAI from 'openai';
|
import OpenAI from 'openai';
|
||||||
import {
|
import {
|
||||||
GenerateContentParameters,
|
type GenerateContentParameters,
|
||||||
GenerateContentResponse,
|
GenerateContentResponse,
|
||||||
Type,
|
Type,
|
||||||
|
FinishReason,
|
||||||
} from '@google/genai';
|
} from '@google/genai';
|
||||||
import { ContentGenerationPipeline, PipelineConfig } from './pipeline.js';
|
import { ContentGenerationPipeline, type PipelineConfig } from './pipeline.js';
|
||||||
import { OpenAIContentConverter } from './converter.js';
|
import { OpenAIContentConverter } from './converter.js';
|
||||||
import { Config } from '../../config/config.js';
|
import { Config } from '../../config/config.js';
|
||||||
import { ContentGeneratorConfig, AuthType } from '../contentGenerator.js';
|
import { type ContentGeneratorConfig, AuthType } from '../contentGenerator.js';
|
||||||
import { OpenAICompatibleProvider } from './provider/index.js';
|
import { type OpenAICompatibleProvider } from './provider/index.js';
|
||||||
import { TelemetryService } from './telemetryService.js';
|
import { type TelemetryService } from './telemetryService.js';
|
||||||
import { ErrorHandler } from './errorHandler.js';
|
import { type ErrorHandler } from './errorHandler.js';
|
||||||
|
|
||||||
// Mock dependencies
|
// Mock dependencies
|
||||||
vi.mock('./converter.js');
|
vi.mock('./converter.js');
|
||||||
@@ -470,6 +471,418 @@ describe('ContentGenerationPipeline', () => {
|
|||||||
request,
|
request,
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should merge finishReason and usageMetadata from separate chunks', async () => {
|
||||||
|
// Arrange
|
||||||
|
const request: GenerateContentParameters = {
|
||||||
|
model: 'test-model',
|
||||||
|
contents: [{ parts: [{ text: 'Hello' }], role: 'user' }],
|
||||||
|
};
|
||||||
|
const userPromptId = 'test-prompt-id';
|
||||||
|
|
||||||
|
// Content chunk
|
||||||
|
const mockChunk1 = {
|
||||||
|
id: 'chunk-1',
|
||||||
|
choices: [
|
||||||
|
{ delta: { content: 'Hello response' }, finish_reason: null },
|
||||||
|
],
|
||||||
|
} as OpenAI.Chat.ChatCompletionChunk;
|
||||||
|
|
||||||
|
// Finish reason chunk (empty content, has finish_reason)
|
||||||
|
const mockChunk2 = {
|
||||||
|
id: 'chunk-2',
|
||||||
|
choices: [{ delta: { content: '' }, finish_reason: 'stop' }],
|
||||||
|
} as OpenAI.Chat.ChatCompletionChunk;
|
||||||
|
|
||||||
|
// Usage metadata chunk (empty candidates, has usage)
|
||||||
|
const mockChunk3 = {
|
||||||
|
id: 'chunk-3',
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Date.now(),
|
||||||
|
model: 'test-model',
|
||||||
|
choices: [],
|
||||||
|
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
||||||
|
} as OpenAI.Chat.ChatCompletionChunk;
|
||||||
|
|
||||||
|
const mockStream = {
|
||||||
|
async *[Symbol.asyncIterator]() {
|
||||||
|
yield mockChunk1;
|
||||||
|
yield mockChunk2;
|
||||||
|
yield mockChunk3;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mock converter responses
|
||||||
|
const mockContentResponse = new GenerateContentResponse();
|
||||||
|
mockContentResponse.candidates = [
|
||||||
|
{ content: { parts: [{ text: 'Hello response' }], role: 'model' } },
|
||||||
|
];
|
||||||
|
|
||||||
|
const mockFinishResponse = new GenerateContentResponse();
|
||||||
|
mockFinishResponse.candidates = [
|
||||||
|
{
|
||||||
|
content: { parts: [], role: 'model' },
|
||||||
|
finishReason: FinishReason.STOP,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const mockUsageResponse = new GenerateContentResponse();
|
||||||
|
mockUsageResponse.candidates = [];
|
||||||
|
mockUsageResponse.usageMetadata = {
|
||||||
|
promptTokenCount: 10,
|
||||||
|
candidatesTokenCount: 20,
|
||||||
|
totalTokenCount: 30,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Expected merged response (finishReason + usageMetadata combined)
|
||||||
|
const mockMergedResponse = new GenerateContentResponse();
|
||||||
|
mockMergedResponse.candidates = [
|
||||||
|
{
|
||||||
|
content: { parts: [], role: 'model' },
|
||||||
|
finishReason: FinishReason.STOP,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
mockMergedResponse.usageMetadata = {
|
||||||
|
promptTokenCount: 10,
|
||||||
|
candidatesTokenCount: 20,
|
||||||
|
totalTokenCount: 30,
|
||||||
|
};
|
||||||
|
|
||||||
|
(mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue([]);
|
||||||
|
(mockConverter.convertOpenAIChunkToGemini as Mock)
|
||||||
|
.mockReturnValueOnce(mockContentResponse)
|
||||||
|
.mockReturnValueOnce(mockFinishResponse)
|
||||||
|
.mockReturnValueOnce(mockUsageResponse);
|
||||||
|
(mockClient.chat.completions.create as Mock).mockResolvedValue(
|
||||||
|
mockStream,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const resultGenerator = await pipeline.executeStream(
|
||||||
|
request,
|
||||||
|
userPromptId,
|
||||||
|
);
|
||||||
|
const results = [];
|
||||||
|
for await (const result of resultGenerator) {
|
||||||
|
results.push(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
expect(results).toHaveLength(2); // Content chunk + merged finish/usage chunk
|
||||||
|
expect(results[0]).toBe(mockContentResponse);
|
||||||
|
|
||||||
|
// The last result should have both finishReason and usageMetadata
|
||||||
|
const lastResult = results[1];
|
||||||
|
expect(lastResult.candidates?.[0]?.finishReason).toBe(FinishReason.STOP);
|
||||||
|
expect(lastResult.usageMetadata).toEqual({
|
||||||
|
promptTokenCount: 10,
|
||||||
|
candidatesTokenCount: 20,
|
||||||
|
totalTokenCount: 30,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockTelemetryService.logStreamingSuccess).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
userPromptId,
|
||||||
|
model: 'test-model',
|
||||||
|
authType: 'openai',
|
||||||
|
isStreaming: true,
|
||||||
|
}),
|
||||||
|
results,
|
||||||
|
expect.any(Object),
|
||||||
|
[mockChunk1, mockChunk2, mockChunk3],
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle ideal case where last chunk has both finishReason and usageMetadata', async () => {
|
||||||
|
// Arrange
|
||||||
|
const request: GenerateContentParameters = {
|
||||||
|
model: 'test-model',
|
||||||
|
contents: [{ parts: [{ text: 'Hello' }], role: 'user' }],
|
||||||
|
};
|
||||||
|
const userPromptId = 'test-prompt-id';
|
||||||
|
|
||||||
|
// Content chunk
|
||||||
|
const mockChunk1 = {
|
||||||
|
id: 'chunk-1',
|
||||||
|
choices: [
|
||||||
|
{ delta: { content: 'Hello response' }, finish_reason: null },
|
||||||
|
],
|
||||||
|
} as OpenAI.Chat.ChatCompletionChunk;
|
||||||
|
|
||||||
|
// Final chunk with both finish_reason and usage (ideal case)
|
||||||
|
const mockChunk2 = {
|
||||||
|
id: 'chunk-2',
|
||||||
|
choices: [{ delta: { content: '' }, finish_reason: 'stop' }],
|
||||||
|
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
||||||
|
} as OpenAI.Chat.ChatCompletionChunk;
|
||||||
|
|
||||||
|
const mockStream = {
|
||||||
|
async *[Symbol.asyncIterator]() {
|
||||||
|
yield mockChunk1;
|
||||||
|
yield mockChunk2;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mock converter responses
|
||||||
|
const mockContentResponse = new GenerateContentResponse();
|
||||||
|
mockContentResponse.candidates = [
|
||||||
|
{ content: { parts: [{ text: 'Hello response' }], role: 'model' } },
|
||||||
|
];
|
||||||
|
|
||||||
|
const mockFinalResponse = new GenerateContentResponse();
|
||||||
|
mockFinalResponse.candidates = [
|
||||||
|
{
|
||||||
|
content: { parts: [], role: 'model' },
|
||||||
|
finishReason: FinishReason.STOP,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
mockFinalResponse.usageMetadata = {
|
||||||
|
promptTokenCount: 10,
|
||||||
|
candidatesTokenCount: 20,
|
||||||
|
totalTokenCount: 30,
|
||||||
|
};
|
||||||
|
|
||||||
|
(mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue([]);
|
||||||
|
(mockConverter.convertOpenAIChunkToGemini as Mock)
|
||||||
|
.mockReturnValueOnce(mockContentResponse)
|
||||||
|
.mockReturnValueOnce(mockFinalResponse);
|
||||||
|
(mockClient.chat.completions.create as Mock).mockResolvedValue(
|
||||||
|
mockStream,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const resultGenerator = await pipeline.executeStream(
|
||||||
|
request,
|
||||||
|
userPromptId,
|
||||||
|
);
|
||||||
|
const results = [];
|
||||||
|
for await (const result of resultGenerator) {
|
||||||
|
results.push(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
expect(results).toHaveLength(2);
|
||||||
|
expect(results[0]).toBe(mockContentResponse);
|
||||||
|
expect(results[1]).toBe(mockFinalResponse);
|
||||||
|
|
||||||
|
// The last result should have both finishReason and usageMetadata
|
||||||
|
const lastResult = results[1];
|
||||||
|
expect(lastResult.candidates?.[0]?.finishReason).toBe(FinishReason.STOP);
|
||||||
|
expect(lastResult.usageMetadata).toEqual({
|
||||||
|
promptTokenCount: 10,
|
||||||
|
candidatesTokenCount: 20,
|
||||||
|
totalTokenCount: 30,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle providers that send zero usage in finish chunk (like modelscope)', async () => {
|
||||||
|
// Arrange
|
||||||
|
const request: GenerateContentParameters = {
|
||||||
|
model: 'test-model',
|
||||||
|
contents: [{ parts: [{ text: 'Hello' }], role: 'user' }],
|
||||||
|
};
|
||||||
|
const userPromptId = 'test-prompt-id';
|
||||||
|
|
||||||
|
// Content chunk with zero usage (typical for modelscope)
|
||||||
|
const mockChunk1 = {
|
||||||
|
id: 'chunk-1',
|
||||||
|
choices: [
|
||||||
|
{ delta: { content: 'Hello response' }, finish_reason: null },
|
||||||
|
],
|
||||||
|
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
||||||
|
} as OpenAI.Chat.ChatCompletionChunk;
|
||||||
|
|
||||||
|
// Finish chunk with zero usage (has finishReason but usage is all zeros)
|
||||||
|
const mockChunk2 = {
|
||||||
|
id: 'chunk-2',
|
||||||
|
choices: [{ delta: { content: '' }, finish_reason: 'stop' }],
|
||||||
|
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
||||||
|
} as OpenAI.Chat.ChatCompletionChunk;
|
||||||
|
|
||||||
|
// Final usage chunk with actual usage data
|
||||||
|
const mockChunk3 = {
|
||||||
|
id: 'chunk-3',
|
||||||
|
object: 'chat.completion.chunk',
|
||||||
|
created: Date.now(),
|
||||||
|
model: 'test-model',
|
||||||
|
choices: [],
|
||||||
|
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
||||||
|
} as OpenAI.Chat.ChatCompletionChunk;
|
||||||
|
|
||||||
|
const mockStream = {
|
||||||
|
async *[Symbol.asyncIterator]() {
|
||||||
|
yield mockChunk1;
|
||||||
|
yield mockChunk2;
|
||||||
|
yield mockChunk3;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mock converter responses
|
||||||
|
const mockContentResponse = new GenerateContentResponse();
|
||||||
|
mockContentResponse.candidates = [
|
||||||
|
{ content: { parts: [{ text: 'Hello response' }], role: 'model' } },
|
||||||
|
];
|
||||||
|
// Content chunk has zero usage metadata (should be filtered or ignored)
|
||||||
|
mockContentResponse.usageMetadata = {
|
||||||
|
promptTokenCount: 0,
|
||||||
|
candidatesTokenCount: 0,
|
||||||
|
totalTokenCount: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const mockFinishResponseWithZeroUsage = new GenerateContentResponse();
|
||||||
|
mockFinishResponseWithZeroUsage.candidates = [
|
||||||
|
{
|
||||||
|
content: { parts: [], role: 'model' },
|
||||||
|
finishReason: FinishReason.STOP,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
// Finish chunk has zero usage metadata (should be treated as no usage)
|
||||||
|
mockFinishResponseWithZeroUsage.usageMetadata = {
|
||||||
|
promptTokenCount: 0,
|
||||||
|
candidatesTokenCount: 0,
|
||||||
|
totalTokenCount: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const mockUsageResponse = new GenerateContentResponse();
|
||||||
|
mockUsageResponse.candidates = [];
|
||||||
|
mockUsageResponse.usageMetadata = {
|
||||||
|
promptTokenCount: 10,
|
||||||
|
candidatesTokenCount: 20,
|
||||||
|
totalTokenCount: 30,
|
||||||
|
};
|
||||||
|
|
||||||
|
(mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue([]);
|
||||||
|
(mockConverter.convertOpenAIChunkToGemini as Mock)
|
||||||
|
.mockReturnValueOnce(mockContentResponse)
|
||||||
|
.mockReturnValueOnce(mockFinishResponseWithZeroUsage)
|
||||||
|
.mockReturnValueOnce(mockUsageResponse);
|
||||||
|
(mockClient.chat.completions.create as Mock).mockResolvedValue(
|
||||||
|
mockStream,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const resultGenerator = await pipeline.executeStream(
|
||||||
|
request,
|
||||||
|
userPromptId,
|
||||||
|
);
|
||||||
|
const results = [];
|
||||||
|
for await (const result of resultGenerator) {
|
||||||
|
results.push(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
expect(results).toHaveLength(2); // Content chunk + merged finish/usage chunk
|
||||||
|
expect(results[0]).toBe(mockContentResponse);
|
||||||
|
|
||||||
|
// The last result should have both finishReason and valid usageMetadata
|
||||||
|
const lastResult = results[1];
|
||||||
|
expect(lastResult.candidates?.[0]?.finishReason).toBe(FinishReason.STOP);
|
||||||
|
expect(lastResult.usageMetadata).toEqual({
|
||||||
|
promptTokenCount: 10,
|
||||||
|
candidatesTokenCount: 20,
|
||||||
|
totalTokenCount: 30,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockTelemetryService.logStreamingSuccess).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
userPromptId,
|
||||||
|
model: 'test-model',
|
||||||
|
authType: 'openai',
|
||||||
|
isStreaming: true,
|
||||||
|
}),
|
||||||
|
results,
|
||||||
|
expect.any(Object),
|
||||||
|
[mockChunk1, mockChunk2, mockChunk3],
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle providers that send finishReason and valid usage in same chunk', async () => {
|
||||||
|
// Arrange
|
||||||
|
const request: GenerateContentParameters = {
|
||||||
|
model: 'test-model',
|
||||||
|
contents: [{ parts: [{ text: 'Hello' }], role: 'user' }],
|
||||||
|
};
|
||||||
|
const userPromptId = 'test-prompt-id';
|
||||||
|
|
||||||
|
// Content chunk with zero usage
|
||||||
|
const mockChunk1 = {
|
||||||
|
id: 'chunk-1',
|
||||||
|
choices: [
|
||||||
|
{ delta: { content: 'Hello response' }, finish_reason: null },
|
||||||
|
],
|
||||||
|
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
||||||
|
} as OpenAI.Chat.ChatCompletionChunk;
|
||||||
|
|
||||||
|
// Finish chunk with both finishReason and valid usage in same chunk
|
||||||
|
const mockChunk2 = {
|
||||||
|
id: 'chunk-2',
|
||||||
|
choices: [{ delta: { content: '' }, finish_reason: 'stop' }],
|
||||||
|
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
||||||
|
} as OpenAI.Chat.ChatCompletionChunk;
|
||||||
|
|
||||||
|
const mockStream = {
|
||||||
|
async *[Symbol.asyncIterator]() {
|
||||||
|
yield mockChunk1;
|
||||||
|
yield mockChunk2;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mock converter responses
|
||||||
|
const mockContentResponse = new GenerateContentResponse();
|
||||||
|
mockContentResponse.candidates = [
|
||||||
|
{ content: { parts: [{ text: 'Hello response' }], role: 'model' } },
|
||||||
|
];
|
||||||
|
mockContentResponse.usageMetadata = {
|
||||||
|
promptTokenCount: 0,
|
||||||
|
candidatesTokenCount: 0,
|
||||||
|
totalTokenCount: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const mockFinalResponse = new GenerateContentResponse();
|
||||||
|
mockFinalResponse.candidates = [
|
||||||
|
{
|
||||||
|
content: { parts: [], role: 'model' },
|
||||||
|
finishReason: FinishReason.STOP,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
mockFinalResponse.usageMetadata = {
|
||||||
|
promptTokenCount: 10,
|
||||||
|
candidatesTokenCount: 20,
|
||||||
|
totalTokenCount: 30,
|
||||||
|
};
|
||||||
|
|
||||||
|
(mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue([]);
|
||||||
|
(mockConverter.convertOpenAIChunkToGemini as Mock)
|
||||||
|
.mockReturnValueOnce(mockContentResponse)
|
||||||
|
.mockReturnValueOnce(mockFinalResponse);
|
||||||
|
(mockClient.chat.completions.create as Mock).mockResolvedValue(
|
||||||
|
mockStream,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
const resultGenerator = await pipeline.executeStream(
|
||||||
|
request,
|
||||||
|
userPromptId,
|
||||||
|
);
|
||||||
|
const results = [];
|
||||||
|
for await (const result of resultGenerator) {
|
||||||
|
results.push(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
expect(results).toHaveLength(2);
|
||||||
|
expect(results[0]).toBe(mockContentResponse);
|
||||||
|
expect(results[1]).toBe(mockFinalResponse);
|
||||||
|
|
||||||
|
// The last result should have both finishReason and valid usageMetadata
|
||||||
|
const lastResult = results[1];
|
||||||
|
expect(lastResult.candidates?.[0]?.finishReason).toBe(FinishReason.STOP);
|
||||||
|
expect(lastResult.usageMetadata).toEqual({
|
||||||
|
promptTokenCount: 10,
|
||||||
|
candidatesTokenCount: 20,
|
||||||
|
totalTokenCount: 30,
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('buildRequest', () => {
|
describe('buildRequest', () => {
|
||||||
|
|||||||
@@ -6,15 +6,18 @@
|
|||||||
|
|
||||||
import OpenAI from 'openai';
|
import OpenAI from 'openai';
|
||||||
import {
|
import {
|
||||||
GenerateContentParameters,
|
type GenerateContentParameters,
|
||||||
GenerateContentResponse,
|
GenerateContentResponse,
|
||||||
} from '@google/genai';
|
} from '@google/genai';
|
||||||
import { Config } from '../../config/config.js';
|
import { Config } from '../../config/config.js';
|
||||||
import { ContentGeneratorConfig } from '../contentGenerator.js';
|
import { type ContentGeneratorConfig } from '../contentGenerator.js';
|
||||||
import { type OpenAICompatibleProvider } from './provider/index.js';
|
import { type OpenAICompatibleProvider } from './provider/index.js';
|
||||||
import { OpenAIContentConverter } from './converter.js';
|
import { OpenAIContentConverter } from './converter.js';
|
||||||
import { TelemetryService, RequestContext } from './telemetryService.js';
|
import {
|
||||||
import { ErrorHandler } from './errorHandler.js';
|
type TelemetryService,
|
||||||
|
type RequestContext,
|
||||||
|
} from './telemetryService.js';
|
||||||
|
import { type ErrorHandler } from './errorHandler.js';
|
||||||
|
|
||||||
export interface PipelineConfig {
|
export interface PipelineConfig {
|
||||||
cliConfig: Config;
|
cliConfig: Config;
|
||||||
@@ -96,8 +99,9 @@ export class ContentGenerationPipeline {
|
|||||||
* This method handles the complete stream processing pipeline:
|
* This method handles the complete stream processing pipeline:
|
||||||
* 1. Convert OpenAI chunks to Gemini format while preserving original chunks
|
* 1. Convert OpenAI chunks to Gemini format while preserving original chunks
|
||||||
* 2. Filter empty responses
|
* 2. Filter empty responses
|
||||||
* 3. Collect both formats for logging
|
* 3. Handle chunk merging for providers that send finishReason and usageMetadata separately
|
||||||
* 4. Handle success/error logging with original OpenAI format
|
* 4. Collect both formats for logging
|
||||||
|
* 5. Handle success/error logging with original OpenAI format
|
||||||
*/
|
*/
|
||||||
private async *processStreamWithLogging(
|
private async *processStreamWithLogging(
|
||||||
stream: AsyncIterable<OpenAI.Chat.ChatCompletionChunk>,
|
stream: AsyncIterable<OpenAI.Chat.ChatCompletionChunk>,
|
||||||
@@ -111,6 +115,9 @@ export class ContentGenerationPipeline {
|
|||||||
// Reset streaming tool calls to prevent data pollution from previous streams
|
// Reset streaming tool calls to prevent data pollution from previous streams
|
||||||
this.converter.resetStreamingToolCalls();
|
this.converter.resetStreamingToolCalls();
|
||||||
|
|
||||||
|
// State for handling chunk merging
|
||||||
|
let pendingFinishResponse: GenerateContentResponse | null = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Stage 2a: Convert and yield each chunk while preserving original
|
// Stage 2a: Convert and yield each chunk while preserving original
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
@@ -119,18 +126,40 @@ export class ContentGenerationPipeline {
|
|||||||
// Stage 2b: Filter empty responses to avoid downstream issues
|
// Stage 2b: Filter empty responses to avoid downstream issues
|
||||||
if (
|
if (
|
||||||
response.candidates?.[0]?.content?.parts?.length === 0 &&
|
response.candidates?.[0]?.content?.parts?.length === 0 &&
|
||||||
|
!response.candidates?.[0]?.finishReason &&
|
||||||
!response.usageMetadata
|
!response.usageMetadata
|
||||||
) {
|
) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stage 2c: Collect both formats and yield Gemini format to consumer
|
// Stage 2c: Handle chunk merging for providers that send finishReason and usageMetadata separately
|
||||||
collectedGeminiResponses.push(response);
|
const shouldYield = this.handleChunkMerging(
|
||||||
collectedOpenAIChunks.push(chunk);
|
response,
|
||||||
|
chunk,
|
||||||
|
collectedGeminiResponses,
|
||||||
|
collectedOpenAIChunks,
|
||||||
|
(mergedResponse) => {
|
||||||
|
pendingFinishResponse = mergedResponse;
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
if (shouldYield) {
|
||||||
|
// If we have a pending finish response, yield it instead
|
||||||
|
if (pendingFinishResponse) {
|
||||||
|
yield pendingFinishResponse;
|
||||||
|
pendingFinishResponse = null;
|
||||||
|
} else {
|
||||||
yield response;
|
yield response;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Stage 2d: Stream completed successfully - perform logging with original OpenAI chunks
|
// Stage 2d: If there's still a pending finish response at the end, yield it
|
||||||
|
if (pendingFinishResponse) {
|
||||||
|
yield pendingFinishResponse;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stage 2e: Stream completed successfully - perform logging with original OpenAI chunks
|
||||||
context.duration = Date.now() - context.startTime;
|
context.duration = Date.now() - context.startTime;
|
||||||
|
|
||||||
await this.config.telemetryService.logStreamingSuccess(
|
await this.config.telemetryService.logStreamingSuccess(
|
||||||
@@ -156,6 +185,72 @@ export class ContentGenerationPipeline {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle chunk merging for providers that send finishReason and usageMetadata separately.
|
||||||
|
*
|
||||||
|
* Strategy: When we encounter a finishReason chunk, we hold it and merge all subsequent
|
||||||
|
* chunks into it until the stream ends. This ensures the final chunk contains both
|
||||||
|
* finishReason and the most up-to-date usage information from any provider pattern.
|
||||||
|
*
|
||||||
|
* @param response Current Gemini response
|
||||||
|
* @param chunk Current OpenAI chunk
|
||||||
|
* @param collectedGeminiResponses Array to collect responses for logging
|
||||||
|
* @param collectedOpenAIChunks Array to collect chunks for logging
|
||||||
|
* @param setPendingFinish Callback to set pending finish response
|
||||||
|
* @returns true if the response should be yielded, false if it should be held for merging
|
||||||
|
*/
|
||||||
|
private handleChunkMerging(
|
||||||
|
response: GenerateContentResponse,
|
||||||
|
chunk: OpenAI.Chat.ChatCompletionChunk,
|
||||||
|
collectedGeminiResponses: GenerateContentResponse[],
|
||||||
|
collectedOpenAIChunks: OpenAI.Chat.ChatCompletionChunk[],
|
||||||
|
setPendingFinish: (response: GenerateContentResponse) => void,
|
||||||
|
): boolean {
|
||||||
|
const isFinishChunk = response.candidates?.[0]?.finishReason;
|
||||||
|
|
||||||
|
// Check if we have a pending finish response from previous chunks
|
||||||
|
const hasPendingFinish =
|
||||||
|
collectedGeminiResponses.length > 0 &&
|
||||||
|
collectedGeminiResponses[collectedGeminiResponses.length - 1]
|
||||||
|
.candidates?.[0]?.finishReason;
|
||||||
|
|
||||||
|
if (isFinishChunk) {
|
||||||
|
// This is a finish reason chunk
|
||||||
|
collectedGeminiResponses.push(response);
|
||||||
|
collectedOpenAIChunks.push(chunk);
|
||||||
|
setPendingFinish(response);
|
||||||
|
return false; // Don't yield yet, wait for potential subsequent chunks to merge
|
||||||
|
} else if (hasPendingFinish) {
|
||||||
|
// We have a pending finish chunk, merge this chunk's data into it
|
||||||
|
const lastResponse =
|
||||||
|
collectedGeminiResponses[collectedGeminiResponses.length - 1];
|
||||||
|
const mergedResponse = new GenerateContentResponse();
|
||||||
|
|
||||||
|
// Keep the finish reason from the previous chunk
|
||||||
|
mergedResponse.candidates = lastResponse.candidates;
|
||||||
|
|
||||||
|
// Merge usage metadata if this chunk has it
|
||||||
|
if (response.usageMetadata) {
|
||||||
|
mergedResponse.usageMetadata = response.usageMetadata;
|
||||||
|
} else {
|
||||||
|
mergedResponse.usageMetadata = lastResponse.usageMetadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the collected responses with the merged response
|
||||||
|
collectedGeminiResponses[collectedGeminiResponses.length - 1] =
|
||||||
|
mergedResponse;
|
||||||
|
collectedOpenAIChunks.push(chunk);
|
||||||
|
|
||||||
|
setPendingFinish(mergedResponse);
|
||||||
|
return true; // Yield the merged response
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normal chunk - collect and yield
|
||||||
|
collectedGeminiResponses.push(response);
|
||||||
|
collectedOpenAIChunks.push(chunk);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private async buildRequest(
|
private async buildRequest(
|
||||||
request: GenerateContentParameters,
|
request: GenerateContentParameters,
|
||||||
userPromptId: string,
|
userPromptId: string,
|
||||||
|
|||||||
Reference in New Issue
Block a user