🚀 feat: DashScope cache control enhancement (#735)

This commit is contained in:
tanzhenxin
2025-09-29 14:01:16 +08:00
committed by GitHub
parent 9fce177bd8
commit 9a0cb64a34
8 changed files with 444 additions and 85 deletions

View File

@@ -12,6 +12,7 @@ import type { Config } from '../../config/config.js';
import { OpenAIContentGenerator } from './openaiContentGenerator.js'; import { OpenAIContentGenerator } from './openaiContentGenerator.js';
import { import {
DashScopeOpenAICompatibleProvider, DashScopeOpenAICompatibleProvider,
DeepSeekOpenAICompatibleProvider,
OpenRouterOpenAICompatibleProvider, OpenRouterOpenAICompatibleProvider,
type OpenAICompatibleProvider, type OpenAICompatibleProvider,
DefaultOpenAICompatibleProvider, DefaultOpenAICompatibleProvider,
@@ -23,6 +24,7 @@ export { ContentGenerationPipeline, type PipelineConfig } from './pipeline.js';
export { export {
type OpenAICompatibleProvider, type OpenAICompatibleProvider,
DashScopeOpenAICompatibleProvider, DashScopeOpenAICompatibleProvider,
DeepSeekOpenAICompatibleProvider,
OpenRouterOpenAICompatibleProvider, OpenRouterOpenAICompatibleProvider,
} from './provider/index.js'; } from './provider/index.js';
@@ -61,6 +63,13 @@ export function determineProvider(
); );
} }
if (DeepSeekOpenAICompatibleProvider.isDeepSeekProvider(config)) {
return new DeepSeekOpenAICompatibleProvider(
contentGeneratorConfig,
cliConfig,
);
}
// Check for OpenRouter provider // Check for OpenRouter provider
if (OpenRouterOpenAICompatibleProvider.isOpenRouterProvider(config)) { if (OpenRouterOpenAICompatibleProvider.isOpenRouterProvider(config)) {
return new OpenRouterOpenAICompatibleProvider( return new OpenRouterOpenAICompatibleProvider(

View File

@@ -248,26 +248,23 @@ export class ContentGenerationPipeline {
...this.buildSamplingParameters(request), ...this.buildSamplingParameters(request),
}; };
// Let provider enhance the request (e.g., add metadata, cache control) // Add streaming options if present
const enhancedRequest = this.config.provider.buildRequest( if (streaming) {
baseRequest, (
userPromptId, baseRequest as unknown as OpenAI.Chat.ChatCompletionCreateParamsStreaming
); ).stream = true;
baseRequest.stream_options = { include_usage: true };
}
// Add tools if present // Add tools if present
if (request.config?.tools) { if (request.config?.tools) {
enhancedRequest.tools = await this.converter.convertGeminiToolsToOpenAI( baseRequest.tools = await this.converter.convertGeminiToolsToOpenAI(
request.config.tools, request.config.tools,
); );
} }
// Add streaming options if needed // Let provider enhance the request (e.g., add metadata, cache control)
if (streaming) { return this.config.provider.buildRequest(baseRequest, userPromptId);
enhancedRequest.stream = true;
enhancedRequest.stream_options = { include_usage: true };
}
return enhancedRequest;
} }
private buildSamplingParameters( private buildSamplingParameters(

View File

@@ -17,6 +17,7 @@ import { DashScopeOpenAICompatibleProvider } from './dashscope.js';
import type { Config } from '../../../config/config.js'; import type { Config } from '../../../config/config.js';
import type { ContentGeneratorConfig } from '../../contentGenerator.js'; import type { ContentGeneratorConfig } from '../../contentGenerator.js';
import { AuthType } from '../../contentGenerator.js'; import { AuthType } from '../../contentGenerator.js';
import type { ChatCompletionToolWithCache } from './types.js';
import { DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES } from '../constants.js'; import { DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES } from '../constants.js';
// Mock OpenAI // Mock OpenAI
@@ -253,17 +254,110 @@ describe('DashScopeOpenAICompatibleProvider', () => {
}, },
]); ]);
// Last message should NOT have cache control for non-streaming // Last message should NOT have cache control for non-streaming requests
const lastMessage = result.messages[1]; const lastMessage = result.messages[1];
expect(lastMessage.role).toBe('user'); expect(lastMessage.role).toBe('user');
expect(lastMessage.content).toBe('Hello!'); expect(lastMessage.content).toBe('Hello!');
}); });
it('should add cache control to both system and last messages for streaming requests', () => { it('should add cache control to system message only for non-streaming requests with tools', () => {
const request = { ...baseRequest, stream: true }; const requestWithTool: OpenAI.Chat.ChatCompletionCreateParams = {
const result = provider.buildRequest(request, 'test-prompt-id'); ...baseRequest,
messages: [
{ role: 'system', content: 'You are a helpful assistant.' },
{
role: 'tool',
content: 'First tool output',
tool_call_id: 'call_1',
},
{
role: 'tool',
content: 'Second tool output',
tool_call_id: 'call_2',
},
{ role: 'user', content: 'Hello!' },
],
tools: [
{
type: 'function',
function: {
name: 'mockTool',
parameters: { type: 'object', properties: {} },
},
},
],
stream: false,
};
expect(result.messages).toHaveLength(2); const result = provider.buildRequest(requestWithTool, 'test-prompt-id');
expect(result.messages).toHaveLength(4);
const systemMessage = result.messages[0];
expect(systemMessage.content).toEqual([
{
type: 'text',
text: 'You are a helpful assistant.',
cache_control: { type: 'ephemeral' },
},
]);
// Tool messages should remain unchanged
const firstToolMessage = result.messages[1];
expect(firstToolMessage.role).toBe('tool');
expect(firstToolMessage.content).toBe('First tool output');
const secondToolMessage = result.messages[2];
expect(secondToolMessage.role).toBe('tool');
expect(secondToolMessage.content).toBe('Second tool output');
// Last message should NOT have cache control for non-streaming requests
const lastMessage = result.messages[3];
expect(lastMessage.role).toBe('user');
expect(lastMessage.content).toBe('Hello!');
// Tools should NOT have cache control for non-streaming requests
const tools = result.tools as ChatCompletionToolWithCache[];
expect(tools).toBeDefined();
expect(tools).toHaveLength(1);
expect(tools[0].cache_control).toBeUndefined();
});
it('should add cache control to system, last history message, and last tool definition for streaming requests', () => {
const request = { ...baseRequest, stream: true };
const requestWithToolMessage: OpenAI.Chat.ChatCompletionCreateParams = {
...request,
messages: [
{ role: 'system', content: 'You are a helpful assistant.' },
{
role: 'tool',
content: 'First tool output',
tool_call_id: 'call_1',
},
{
role: 'tool',
content: 'Second tool output',
tool_call_id: 'call_2',
},
{ role: 'user', content: 'Hello!' },
],
tools: [
{
type: 'function',
function: {
name: 'mockTool',
parameters: { type: 'object', properties: {} },
},
},
],
};
const result = provider.buildRequest(
requestWithToolMessage,
'test-prompt-id',
);
expect(result.messages).toHaveLength(4);
// System message should have cache control // System message should have cache control
const systemMessage = result.messages[0]; const systemMessage = result.messages[0];
@@ -275,8 +369,17 @@ describe('DashScopeOpenAICompatibleProvider', () => {
}, },
]); ]);
// Last message should also have cache control for streaming // Tool messages should remain unchanged
const lastMessage = result.messages[1]; const firstToolMessage = result.messages[1];
expect(firstToolMessage.role).toBe('tool');
expect(firstToolMessage.content).toBe('First tool output');
const secondToolMessage = result.messages[2];
expect(secondToolMessage.role).toBe('tool');
expect(secondToolMessage.content).toBe('Second tool output');
// Last message should also have cache control
const lastMessage = result.messages[3];
expect(lastMessage.content).toEqual([ expect(lastMessage.content).toEqual([
{ {
type: 'text', type: 'text',
@@ -284,6 +387,40 @@ describe('DashScopeOpenAICompatibleProvider', () => {
cache_control: { type: 'ephemeral' }, cache_control: { type: 'ephemeral' },
}, },
]); ]);
const tools = result.tools as ChatCompletionToolWithCache[];
expect(tools).toBeDefined();
expect(tools).toHaveLength(1);
expect(tools[0].cache_control).toEqual({ type: 'ephemeral' });
});
it('should not add cache control to tool messages when request.tools is undefined', () => {
const requestWithoutConfiguredTools: OpenAI.Chat.ChatCompletionCreateParams =
{
...baseRequest,
messages: [
{ role: 'system', content: 'You are a helpful assistant.' },
{
role: 'tool',
content: 'Tool output',
tool_call_id: 'call_1',
},
{ role: 'user', content: 'Hello!' },
],
};
const result = provider.buildRequest(
requestWithoutConfiguredTools,
'test-prompt-id',
);
expect(result.messages).toHaveLength(3);
const toolMessage = result.messages[1];
expect(toolMessage.role).toBe('tool');
expect(toolMessage.content).toBe('Tool output');
expect(result.tools).toBeUndefined();
}); });
it('should include metadata in the request', () => { it('should include metadata in the request', () => {

View File

@@ -9,6 +9,7 @@ import type {
DashScopeRequestMetadata, DashScopeRequestMetadata,
ChatCompletionContentPartTextWithCache, ChatCompletionContentPartTextWithCache,
ChatCompletionContentPartWithCache, ChatCompletionContentPartWithCache,
ChatCompletionToolWithCache,
} from './types.js'; } from './types.js';
export class DashScopeOpenAICompatibleProvider export class DashScopeOpenAICompatibleProvider
@@ -70,7 +71,8 @@ export class DashScopeOpenAICompatibleProvider
* Build and configure the request for DashScope API. * Build and configure the request for DashScope API.
* *
* This method applies DashScope-specific configurations including: * This method applies DashScope-specific configurations including:
* - Cache control for system and user messages * - Cache control for the system message, last tool message (when tools are configured),
* and the latest history message
* - Output token limits based on model capabilities * - Output token limits based on model capabilities
* - Vision model specific parameters (vl_high_resolution_images) * - Vision model specific parameters (vl_high_resolution_images)
* - Request metadata for session tracking * - Request metadata for session tracking
@@ -84,13 +86,17 @@ export class DashScopeOpenAICompatibleProvider
userPromptId: string, userPromptId: string,
): OpenAI.Chat.ChatCompletionCreateParams { ): OpenAI.Chat.ChatCompletionCreateParams {
let messages = request.messages; let messages = request.messages;
let tools = request.tools;
// Apply DashScope cache control only if not disabled // Apply DashScope cache control only if not disabled
if (!this.shouldDisableCacheControl()) { if (!this.shouldDisableCacheControl()) {
// Add cache control to system and last messages for DashScope providers const { messages: updatedMessages, tools: updatedTools } =
// Only add cache control to system message for non-streaming requests this.addDashScopeCacheControl(
const cacheTarget = request.stream ? 'both' : 'system'; request,
messages = this.addDashScopeCacheControl(messages, cacheTarget); request.stream ? 'all' : 'system_only',
);
messages = updatedMessages;
tools = updatedTools;
} }
// Apply output token limits based on model capabilities // Apply output token limits based on model capabilities
@@ -104,6 +110,7 @@ export class DashScopeOpenAICompatibleProvider
return { return {
...requestWithTokenLimits, ...requestWithTokenLimits,
messages, messages,
...(tools ? { tools } : {}),
...(this.buildMetadata(userPromptId) || {}), ...(this.buildMetadata(userPromptId) || {}),
/* @ts-expect-error dashscope exclusive */ /* @ts-expect-error dashscope exclusive */
vl_high_resolution_images: true, vl_high_resolution_images: true,
@@ -113,6 +120,7 @@ export class DashScopeOpenAICompatibleProvider
return { return {
...requestWithTokenLimits, // Preserve all original parameters including sampling params and adjusted max_tokens ...requestWithTokenLimits, // Preserve all original parameters including sampling params and adjusted max_tokens
messages, messages,
...(tools ? { tools } : {}),
...(this.buildMetadata(userPromptId) || {}), ...(this.buildMetadata(userPromptId) || {}),
} as OpenAI.Chat.ChatCompletionCreateParams; } as OpenAI.Chat.ChatCompletionCreateParams;
} }
@@ -130,75 +138,67 @@ export class DashScopeOpenAICompatibleProvider
* Add cache control flag to specified message(s) for DashScope providers * Add cache control flag to specified message(s) for DashScope providers
*/ */
private addDashScopeCacheControl( private addDashScopeCacheControl(
messages: OpenAI.Chat.ChatCompletionMessageParam[], request: OpenAI.Chat.ChatCompletionCreateParams,
target: 'system' | 'last' | 'both' = 'both', cacheControl: 'system_only' | 'all',
): OpenAI.Chat.ChatCompletionMessageParam[] { ): {
if (messages.length === 0) { messages: OpenAI.Chat.ChatCompletionMessageParam[];
return messages; tools?: ChatCompletionToolWithCache[];
} } {
const messages = request.messages;
let updatedMessages = [...messages]; const systemIndex = messages.findIndex((msg) => msg.role === 'system');
const lastIndex = messages.length - 1;
// Add cache control to system message if requested const updatedMessages =
if (target === 'system' || target === 'both') { messages.length === 0
updatedMessages = this.addCacheControlToMessage( ? messages
updatedMessages, : messages.map((message, index) => {
'system', const shouldAddCacheControl = Boolean(
); (index === systemIndex && systemIndex !== -1) ||
} (index === lastIndex && cacheControl === 'all'),
);
// Add cache control to last message if requested if (
if (target === 'last' || target === 'both') { !shouldAddCacheControl ||
updatedMessages = this.addCacheControlToMessage(updatedMessages, 'last'); !('content' in message) ||
} message.content === null ||
message.content === undefined
) {
return message;
}
return updatedMessages; return {
...message,
content: this.addCacheControlToContent(message.content),
} as OpenAI.Chat.ChatCompletionMessageParam;
});
const updatedTools =
cacheControl === 'all' && request.tools?.length
? this.addCacheControlToTools(request.tools)
: (request.tools as ChatCompletionToolWithCache[] | undefined);
return {
messages: updatedMessages,
tools: updatedTools,
};
} }
/** private addCacheControlToTools(
* Helper method to add cache control to a specific message tools: OpenAI.Chat.ChatCompletionTool[],
*/ ): ChatCompletionToolWithCache[] {
private addCacheControlToMessage( if (tools.length === 0) {
messages: OpenAI.Chat.ChatCompletionMessageParam[], return tools as ChatCompletionToolWithCache[];
target: 'system' | 'last',
): OpenAI.Chat.ChatCompletionMessageParam[] {
const updatedMessages = [...messages];
const messageIndex = this.findTargetMessageIndex(messages, target);
if (messageIndex === -1) {
return updatedMessages;
} }
const message = updatedMessages[messageIndex]; const updatedTools = [...tools] as ChatCompletionToolWithCache[];
const lastToolIndex = tools.length - 1;
updatedTools[lastToolIndex] = {
...updatedTools[lastToolIndex],
cache_control: { type: 'ephemeral' },
};
// Only process messages that have content return updatedTools;
if (
'content' in message &&
message.content !== null &&
message.content !== undefined
) {
const updatedContent = this.addCacheControlToContent(message.content);
updatedMessages[messageIndex] = {
...message,
content: updatedContent,
} as OpenAI.Chat.ChatCompletionMessageParam;
}
return updatedMessages;
}
/**
* Find the index of the target message (system or last)
*/
private findTargetMessageIndex(
messages: OpenAI.Chat.ChatCompletionMessageParam[],
target: 'system' | 'last',
): number {
if (target === 'system') {
return messages.findIndex((msg) => msg.role === 'system');
} else {
return messages.length - 1;
}
} }
/** /**

View File

@@ -0,0 +1,132 @@
/**
* @license
* Copyright 2025 Qwen
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach } from 'vitest';
import type OpenAI from 'openai';
import { DeepSeekOpenAICompatibleProvider } from './deepseek.js';
import type { ContentGeneratorConfig } from '../../contentGenerator.js';
import type { Config } from '../../../config/config.js';
// Mock OpenAI client to avoid real network calls
vi.mock('openai', () => ({
default: vi.fn().mockImplementation((config) => ({
config,
})),
}));
describe('DeepSeekOpenAICompatibleProvider', () => {
let provider: DeepSeekOpenAICompatibleProvider;
let mockContentGeneratorConfig: ContentGeneratorConfig;
let mockCliConfig: Config;
beforeEach(() => {
vi.clearAllMocks();
mockContentGeneratorConfig = {
apiKey: 'test-api-key',
baseUrl: 'https://api.deepseek.com/v1',
model: 'deepseek-chat',
} as ContentGeneratorConfig;
mockCliConfig = {
getCliVersion: vi.fn().mockReturnValue('1.0.0'),
} as unknown as Config;
provider = new DeepSeekOpenAICompatibleProvider(
mockContentGeneratorConfig,
mockCliConfig,
);
});
describe('isDeepSeekProvider', () => {
it('returns true when baseUrl includes deepseek', () => {
const result = DeepSeekOpenAICompatibleProvider.isDeepSeekProvider(
mockContentGeneratorConfig,
);
expect(result).toBe(true);
});
it('returns false for non deepseek baseUrl', () => {
const config = {
...mockContentGeneratorConfig,
baseUrl: 'https://api.example.com/v1',
} as ContentGeneratorConfig;
const result =
DeepSeekOpenAICompatibleProvider.isDeepSeekProvider(config);
expect(result).toBe(false);
});
});
describe('buildRequest', () => {
const userPromptId = 'prompt-123';
it('converts array content into a string', () => {
const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'deepseek-chat',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Hello' },
{ type: 'text', text: ' world' },
],
},
],
};
const result = provider.buildRequest(originalRequest, userPromptId);
expect(result.messages).toHaveLength(1);
expect(result.messages?.[0]).toEqual({
role: 'user',
content: 'Hello world',
});
expect(originalRequest.messages?.[0].content).toEqual([
{ type: 'text', text: 'Hello' },
{ type: 'text', text: ' world' },
]);
});
it('leaves string content unchanged', () => {
const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'deepseek-chat',
messages: [
{
role: 'user',
content: 'Hello world',
},
],
};
const result = provider.buildRequest(originalRequest, userPromptId);
expect(result.messages?.[0].content).toBe('Hello world');
});
it('throws when encountering non-text multimodal parts', () => {
const originalRequest: OpenAI.Chat.ChatCompletionCreateParams = {
model: 'deepseek-chat',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Hello' },
{
type: 'image_url',
image_url: { url: 'https://example.com/image.png' },
},
],
},
],
};
expect(() =>
provider.buildRequest(originalRequest, userPromptId),
).toThrow(/only supports text content/i);
});
});
});

View File

@@ -0,0 +1,79 @@
/**
* @license
* Copyright 2025 Qwen
* SPDX-License-Identifier: Apache-2.0
*/
import type OpenAI from 'openai';
import type { Config } from '../../../config/config.js';
import type { ContentGeneratorConfig } from '../../contentGenerator.js';
import { DefaultOpenAICompatibleProvider } from './default.js';
export class DeepSeekOpenAICompatibleProvider extends DefaultOpenAICompatibleProvider {
constructor(
contentGeneratorConfig: ContentGeneratorConfig,
cliConfig: Config,
) {
super(contentGeneratorConfig, cliConfig);
}
static isDeepSeekProvider(
contentGeneratorConfig: ContentGeneratorConfig,
): boolean {
const baseUrl = contentGeneratorConfig.baseUrl ?? '';
return baseUrl.toLowerCase().includes('api.deepseek.com');
}
override buildRequest(
request: OpenAI.Chat.ChatCompletionCreateParams,
userPromptId: string,
): OpenAI.Chat.ChatCompletionCreateParams {
const baseRequest = super.buildRequest(request, userPromptId);
if (!baseRequest.messages?.length) {
return baseRequest;
}
const messages = baseRequest.messages.map((message) => {
if (!('content' in message)) {
return message;
}
const { content } = message;
if (
typeof content === 'string' ||
content === null ||
content === undefined
) {
return message;
}
if (!Array.isArray(content)) {
return message;
}
const text = content
.map((part) => {
if (part.type !== 'text') {
throw new Error(
`DeepSeek provider only supports text content. Found non-text part of type '${part.type}' in message with role '${message.role}'.`,
);
}
return part.text ?? '';
})
.join('');
return {
...message,
content: text,
} as OpenAI.Chat.ChatCompletionMessageParam;
});
return {
...baseRequest,
messages,
};
}
}

View File

@@ -1,4 +1,5 @@
export { DashScopeOpenAICompatibleProvider } from './dashscope.js'; export { DashScopeOpenAICompatibleProvider } from './dashscope.js';
export { DeepSeekOpenAICompatibleProvider } from './deepseek.js';
export { OpenRouterOpenAICompatibleProvider } from './openrouter.js'; export { OpenRouterOpenAICompatibleProvider } from './openrouter.js';
export { DefaultOpenAICompatibleProvider } from './default.js'; export { DefaultOpenAICompatibleProvider } from './default.js';
export type { export type {

View File

@@ -11,6 +11,10 @@ export type ChatCompletionContentPartWithCache =
| OpenAI.Chat.ChatCompletionContentPartImage | OpenAI.Chat.ChatCompletionContentPartImage
| OpenAI.Chat.ChatCompletionContentPartRefusal; | OpenAI.Chat.ChatCompletionContentPartRefusal;
export type ChatCompletionToolWithCache = OpenAI.Chat.ChatCompletionTool & {
cache_control?: { type: 'ephemeral' };
};
export interface OpenAICompatibleProvider { export interface OpenAICompatibleProvider {
buildHeaders(): Record<string, string | undefined>; buildHeaders(): Record<string, string | undefined>;
buildClient(): OpenAI; buildClient(): OpenAI;