mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 16:57:46 +00:00
Vision model support for Qwen-OAuth (#525)
* refactor: openaiContentGenerator * refactor: optimize stream handling * refactor: re-organize refactored files * fix: unit test cases * feat: `/model` command for switching to vision model * fix: lint error * feat: add image tokenizer to fit vlm context window * fix: lint and type errors * feat: add `visionModelPreview` to control default visibility of vision models * fix: remove deprecated files * fix: align supported image formats with bailian doc
This commit is contained in:
@@ -106,6 +106,7 @@ describe('useSlashCommandProcessor', () => {
|
||||
const mockLoadHistory = vi.fn();
|
||||
const mockOpenThemeDialog = vi.fn();
|
||||
const mockOpenAuthDialog = vi.fn();
|
||||
const mockOpenModelSelectionDialog = vi.fn();
|
||||
const mockSetQuittingMessages = vi.fn();
|
||||
|
||||
const mockConfig = makeFakeConfig({});
|
||||
@@ -122,6 +123,7 @@ describe('useSlashCommandProcessor', () => {
|
||||
mockBuiltinLoadCommands.mockResolvedValue([]);
|
||||
mockFileLoadCommands.mockResolvedValue([]);
|
||||
mockMcpLoadCommands.mockResolvedValue([]);
|
||||
mockOpenModelSelectionDialog.mockClear();
|
||||
});
|
||||
|
||||
const setupProcessorHook = (
|
||||
@@ -150,11 +152,13 @@ describe('useSlashCommandProcessor', () => {
|
||||
mockSetQuittingMessages,
|
||||
vi.fn(), // openPrivacyNotice
|
||||
vi.fn(), // openSettingsDialog
|
||||
mockOpenModelSelectionDialog,
|
||||
vi.fn(), // openSubagentCreateDialog
|
||||
vi.fn(), // openAgentsManagerDialog
|
||||
vi.fn(), // toggleVimEnabled
|
||||
setIsProcessing,
|
||||
vi.fn(), // setGeminiMdFileCount
|
||||
vi.fn(), // _showQuitConfirmation
|
||||
),
|
||||
);
|
||||
|
||||
@@ -395,6 +399,21 @@ describe('useSlashCommandProcessor', () => {
|
||||
expect(mockOpenThemeDialog).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle "dialog: model" action', async () => {
|
||||
const command = createTestCommand({
|
||||
name: 'modelcmd',
|
||||
action: vi.fn().mockResolvedValue({ type: 'dialog', dialog: 'model' }),
|
||||
});
|
||||
const result = setupProcessorHook([command]);
|
||||
await waitFor(() => expect(result.current.slashCommands).toHaveLength(1));
|
||||
|
||||
await act(async () => {
|
||||
await result.current.handleSlashCommand('/modelcmd');
|
||||
});
|
||||
|
||||
expect(mockOpenModelSelectionDialog).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle "load_history" action', async () => {
|
||||
const command = createTestCommand({
|
||||
name: 'load',
|
||||
@@ -904,11 +923,13 @@ describe('useSlashCommandProcessor', () => {
|
||||
mockSetQuittingMessages,
|
||||
vi.fn(), // openPrivacyNotice
|
||||
vi.fn(), // openSettingsDialog
|
||||
vi.fn(), // openModelSelectionDialog
|
||||
vi.fn(), // openSubagentCreateDialog
|
||||
vi.fn(), // openAgentsManagerDialog
|
||||
vi.fn(), // toggleVimEnabled
|
||||
vi.fn(), // setIsProcessing
|
||||
vi.fn(), // setGeminiMdFileCount
|
||||
vi.fn(), // _showQuitConfirmation
|
||||
),
|
||||
);
|
||||
|
||||
|
||||
@@ -53,6 +53,7 @@ export const useSlashCommandProcessor = (
|
||||
setQuittingMessages: (message: HistoryItem[]) => void,
|
||||
openPrivacyNotice: () => void,
|
||||
openSettingsDialog: () => void,
|
||||
openModelSelectionDialog: () => void,
|
||||
openSubagentCreateDialog: () => void,
|
||||
openAgentsManagerDialog: () => void,
|
||||
toggleVimEnabled: () => Promise<boolean>,
|
||||
@@ -404,6 +405,9 @@ export const useSlashCommandProcessor = (
|
||||
case 'settings':
|
||||
openSettingsDialog();
|
||||
return { type: 'handled' };
|
||||
case 'model':
|
||||
openModelSelectionDialog();
|
||||
return { type: 'handled' };
|
||||
case 'subagent_create':
|
||||
openSubagentCreateDialog();
|
||||
return { type: 'handled' };
|
||||
@@ -663,6 +667,7 @@ export const useSlashCommandProcessor = (
|
||||
setSessionShellAllowlist,
|
||||
setIsProcessing,
|
||||
setConfirmationRequest,
|
||||
openModelSelectionDialog,
|
||||
session.stats,
|
||||
],
|
||||
);
|
||||
|
||||
@@ -56,6 +56,12 @@ const MockedUserPromptEvent = vi.hoisted(() =>
|
||||
);
|
||||
const mockParseAndFormatApiError = vi.hoisted(() => vi.fn());
|
||||
|
||||
// Vision auto-switch mocks (hoisted)
|
||||
const mockHandleVisionSwitch = vi.hoisted(() =>
|
||||
vi.fn().mockResolvedValue({ shouldProceed: true }),
|
||||
);
|
||||
const mockRestoreOriginalModel = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => {
|
||||
const actualCoreModule = (await importOriginal()) as any;
|
||||
return {
|
||||
@@ -76,6 +82,13 @@ vi.mock('./useReactToolScheduler.js', async (importOriginal) => {
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock('./useVisionAutoSwitch.js', () => ({
|
||||
useVisionAutoSwitch: vi.fn(() => ({
|
||||
handleVisionSwitch: mockHandleVisionSwitch,
|
||||
restoreOriginalModel: mockRestoreOriginalModel,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock('./useKeypress.js', () => ({
|
||||
useKeypress: vi.fn(),
|
||||
}));
|
||||
@@ -199,6 +212,7 @@ describe('useGeminiStream', () => {
|
||||
getContentGeneratorConfig: vi
|
||||
.fn()
|
||||
.mockReturnValue(contentGeneratorConfig),
|
||||
getMaxSessionTurns: vi.fn(() => 50),
|
||||
} as unknown as Config;
|
||||
mockOnDebugMessage = vi.fn();
|
||||
mockHandleSlashCommand = vi.fn().mockResolvedValue(false);
|
||||
@@ -1551,6 +1565,7 @@ describe('useGeminiStream', () => {
|
||||
expect.any(String), // Argument 3: The prompt_id string
|
||||
);
|
||||
});
|
||||
|
||||
describe('Thought Reset', () => {
|
||||
it('should reset thought to null when starting a new prompt', async () => {
|
||||
// First, simulate a response with a thought
|
||||
@@ -1900,4 +1915,166 @@ describe('useGeminiStream', () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// --- New tests focused on recent modifications ---
|
||||
describe('Vision Auto Switch Integration', () => {
|
||||
it('should call handleVisionSwitch and proceed to send when allowed', async () => {
|
||||
mockHandleVisionSwitch.mockResolvedValueOnce({ shouldProceed: true });
|
||||
mockSendMessageStream.mockReturnValue(
|
||||
(async function* () {
|
||||
yield { type: ServerGeminiEventType.Content, value: 'ok' };
|
||||
yield { type: ServerGeminiEventType.Finished, value: 'STOP' };
|
||||
})(),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() =>
|
||||
useGeminiStream(
|
||||
new MockedGeminiClientClass(mockConfig),
|
||||
[],
|
||||
mockAddItem,
|
||||
mockConfig,
|
||||
mockOnDebugMessage,
|
||||
mockHandleSlashCommand,
|
||||
false,
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
() => Promise.resolve(),
|
||||
false,
|
||||
() => {},
|
||||
() => {},
|
||||
() => {},
|
||||
),
|
||||
);
|
||||
|
||||
await act(async () => {
|
||||
await result.current.submitQuery('image prompt');
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockHandleVisionSwitch).toHaveBeenCalled();
|
||||
expect(mockSendMessageStream).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it('should gate submission when handleVisionSwitch returns shouldProceed=false', async () => {
|
||||
mockHandleVisionSwitch.mockResolvedValueOnce({ shouldProceed: false });
|
||||
|
||||
const { result } = renderHook(() =>
|
||||
useGeminiStream(
|
||||
new MockedGeminiClientClass(mockConfig),
|
||||
[],
|
||||
mockAddItem,
|
||||
mockConfig,
|
||||
mockOnDebugMessage,
|
||||
mockHandleSlashCommand,
|
||||
false,
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
() => Promise.resolve(),
|
||||
false,
|
||||
() => {},
|
||||
() => {},
|
||||
() => {},
|
||||
),
|
||||
);
|
||||
|
||||
await act(async () => {
|
||||
await result.current.submitQuery('vision-gated');
|
||||
});
|
||||
|
||||
// No call to API, no restoreOriginalModel needed since no override occurred
|
||||
expect(mockSendMessageStream).not.toHaveBeenCalled();
|
||||
expect(mockRestoreOriginalModel).not.toHaveBeenCalled();
|
||||
|
||||
// Next call allowed (flag reset path)
|
||||
mockHandleVisionSwitch.mockResolvedValueOnce({ shouldProceed: true });
|
||||
mockSendMessageStream.mockReturnValue(
|
||||
(async function* () {
|
||||
yield { type: ServerGeminiEventType.Content, value: 'ok' };
|
||||
yield { type: ServerGeminiEventType.Finished, value: 'STOP' };
|
||||
})(),
|
||||
);
|
||||
await act(async () => {
|
||||
await result.current.submitQuery('after-gate');
|
||||
});
|
||||
await waitFor(() => {
|
||||
expect(mockSendMessageStream).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Model restore on completion and errors', () => {
|
||||
it('should restore model after successful stream completion', async () => {
|
||||
mockSendMessageStream.mockReturnValue(
|
||||
(async function* () {
|
||||
yield { type: ServerGeminiEventType.Content, value: 'content' };
|
||||
yield { type: ServerGeminiEventType.Finished, value: 'STOP' };
|
||||
})(),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() =>
|
||||
useGeminiStream(
|
||||
new MockedGeminiClientClass(mockConfig),
|
||||
[],
|
||||
mockAddItem,
|
||||
mockConfig,
|
||||
mockOnDebugMessage,
|
||||
mockHandleSlashCommand,
|
||||
false,
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
() => Promise.resolve(),
|
||||
false,
|
||||
() => {},
|
||||
() => {},
|
||||
() => {},
|
||||
),
|
||||
);
|
||||
|
||||
await act(async () => {
|
||||
await result.current.submitQuery('restore-success');
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockRestoreOriginalModel).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
it('should restore model when an error occurs during streaming', async () => {
|
||||
const testError = new Error('stream failure');
|
||||
mockSendMessageStream.mockReturnValue(
|
||||
(async function* () {
|
||||
yield { type: ServerGeminiEventType.Content, value: 'content' };
|
||||
throw testError;
|
||||
})(),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() =>
|
||||
useGeminiStream(
|
||||
new MockedGeminiClientClass(mockConfig),
|
||||
[],
|
||||
mockAddItem,
|
||||
mockConfig,
|
||||
mockOnDebugMessage,
|
||||
mockHandleSlashCommand,
|
||||
false,
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
() => Promise.resolve(),
|
||||
false,
|
||||
() => {},
|
||||
() => {},
|
||||
() => {},
|
||||
),
|
||||
);
|
||||
|
||||
await act(async () => {
|
||||
await result.current.submitQuery('restore-error');
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockRestoreOriginalModel).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -42,6 +42,7 @@ import type {
|
||||
import { StreamingState, MessageType, ToolCallStatus } from '../types.js';
|
||||
import { isAtCommand, isSlashCommand } from '../utils/commandUtils.js';
|
||||
import { useShellCommandProcessor } from './shellCommandProcessor.js';
|
||||
import { useVisionAutoSwitch } from './useVisionAutoSwitch.js';
|
||||
import { handleAtCommand } from './atCommandProcessor.js';
|
||||
import { findLastSafeSplitPoint } from '../utils/markdownUtilities.js';
|
||||
import { useStateAndRef } from './useStateAndRef.js';
|
||||
@@ -88,6 +89,12 @@ export const useGeminiStream = (
|
||||
setModelSwitchedFromQuotaError: React.Dispatch<React.SetStateAction<boolean>>,
|
||||
onEditorClose: () => void,
|
||||
onCancelSubmit: () => void,
|
||||
visionModelPreviewEnabled: boolean = false,
|
||||
onVisionSwitchRequired?: (query: PartListUnion) => Promise<{
|
||||
modelOverride?: string;
|
||||
persistSessionModel?: string;
|
||||
showGuidance?: boolean;
|
||||
}>,
|
||||
) => {
|
||||
const [initError, setInitError] = useState<string | null>(null);
|
||||
const abortControllerRef = useRef<AbortController | null>(null);
|
||||
@@ -155,6 +162,13 @@ export const useGeminiStream = (
|
||||
geminiClient,
|
||||
);
|
||||
|
||||
const { handleVisionSwitch, restoreOriginalModel } = useVisionAutoSwitch(
|
||||
config,
|
||||
addItem,
|
||||
visionModelPreviewEnabled,
|
||||
onVisionSwitchRequired,
|
||||
);
|
||||
|
||||
const streamingState = useMemo(() => {
|
||||
if (toolCalls.some((tc) => tc.status === 'awaiting_approval')) {
|
||||
return StreamingState.WaitingForConfirmation;
|
||||
@@ -715,6 +729,20 @@ export const useGeminiStream = (
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle vision switch requirement
|
||||
const visionSwitchResult = await handleVisionSwitch(
|
||||
queryToSend,
|
||||
userMessageTimestamp,
|
||||
options?.isContinuation || false,
|
||||
);
|
||||
|
||||
if (!visionSwitchResult.shouldProceed) {
|
||||
isSubmittingQueryRef.current = false;
|
||||
return;
|
||||
}
|
||||
|
||||
const finalQueryToSend = queryToSend;
|
||||
|
||||
if (!options?.isContinuation) {
|
||||
startNewPrompt();
|
||||
setThought(null); // Reset thought when starting a new prompt
|
||||
@@ -725,7 +753,7 @@ export const useGeminiStream = (
|
||||
|
||||
try {
|
||||
const stream = geminiClient.sendMessageStream(
|
||||
queryToSend,
|
||||
finalQueryToSend,
|
||||
abortSignal,
|
||||
prompt_id!,
|
||||
);
|
||||
@@ -736,6 +764,8 @@ export const useGeminiStream = (
|
||||
);
|
||||
|
||||
if (processingStatus === StreamProcessingStatus.UserCancelled) {
|
||||
// Restore original model if it was temporarily overridden
|
||||
restoreOriginalModel();
|
||||
isSubmittingQueryRef.current = false;
|
||||
return;
|
||||
}
|
||||
@@ -748,7 +778,13 @@ export const useGeminiStream = (
|
||||
loopDetectedRef.current = false;
|
||||
handleLoopDetectedEvent();
|
||||
}
|
||||
|
||||
// Restore original model if it was temporarily overridden
|
||||
restoreOriginalModel();
|
||||
} catch (error: unknown) {
|
||||
// Restore original model if it was temporarily overridden
|
||||
restoreOriginalModel();
|
||||
|
||||
if (error instanceof UnauthorizedError) {
|
||||
onAuthError();
|
||||
} else if (!isNodeError(error) || error.name !== 'AbortError') {
|
||||
@@ -786,6 +822,8 @@ export const useGeminiStream = (
|
||||
startNewPrompt,
|
||||
getPromptCount,
|
||||
handleLoopDetectedEvent,
|
||||
handleVisionSwitch,
|
||||
restoreOriginalModel,
|
||||
],
|
||||
);
|
||||
|
||||
|
||||
374
packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
Normal file
374
packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
Normal file
@@ -0,0 +1,374 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { renderHook, act } from '@testing-library/react';
|
||||
import type { Part, PartListUnion } from '@google/genai';
|
||||
import { AuthType, type Config } from '@qwen-code/qwen-code-core';
|
||||
import {
|
||||
shouldOfferVisionSwitch,
|
||||
processVisionSwitchOutcome,
|
||||
getVisionSwitchGuidanceMessage,
|
||||
useVisionAutoSwitch,
|
||||
} from './useVisionAutoSwitch.js';
|
||||
import { VisionSwitchOutcome } from '../components/ModelSwitchDialog.js';
|
||||
import { MessageType } from '../types.js';
|
||||
import { getDefaultVisionModel } from '../models/availableModels.js';
|
||||
|
||||
describe('useVisionAutoSwitch helpers', () => {
|
||||
describe('shouldOfferVisionSwitch', () => {
|
||||
it('returns false when authType is not QWEN_OAUTH', () => {
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
const result = shouldOfferVisionSwitch(
|
||||
parts,
|
||||
AuthType.USE_GEMINI,
|
||||
'qwen3-coder-plus',
|
||||
true,
|
||||
);
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false when current model is already a vision model', () => {
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
const result = shouldOfferVisionSwitch(
|
||||
parts,
|
||||
AuthType.QWEN_OAUTH,
|
||||
'qwen-vl-max-latest',
|
||||
true,
|
||||
);
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it('returns true when image parts exist, QWEN_OAUTH, and model is not vision', () => {
|
||||
const parts: PartListUnion = [
|
||||
{ text: 'hello' },
|
||||
{ inlineData: { mimeType: 'image/jpeg', data: '...' } },
|
||||
];
|
||||
const result = shouldOfferVisionSwitch(
|
||||
parts,
|
||||
AuthType.QWEN_OAUTH,
|
||||
'qwen3-coder-plus',
|
||||
true,
|
||||
);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it('detects image when provided as a single Part object (non-array)', () => {
|
||||
const singleImagePart: PartListUnion = {
|
||||
fileData: { mimeType: 'image/gif', fileUri: 'file://image.gif' },
|
||||
} as Part;
|
||||
const result = shouldOfferVisionSwitch(
|
||||
singleImagePart,
|
||||
AuthType.QWEN_OAUTH,
|
||||
'qwen3-coder-plus',
|
||||
true,
|
||||
);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false when parts contain no images', () => {
|
||||
const parts: PartListUnion = [{ text: 'just text' }];
|
||||
const result = shouldOfferVisionSwitch(
|
||||
parts,
|
||||
AuthType.QWEN_OAUTH,
|
||||
'qwen3-coder-plus',
|
||||
true,
|
||||
);
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false when parts is a plain string', () => {
|
||||
const parts: PartListUnion = 'plain text';
|
||||
const result = shouldOfferVisionSwitch(
|
||||
parts,
|
||||
AuthType.QWEN_OAUTH,
|
||||
'qwen3-coder-plus',
|
||||
true,
|
||||
);
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false when visionModelPreviewEnabled is false', () => {
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
const result = shouldOfferVisionSwitch(
|
||||
parts,
|
||||
AuthType.QWEN_OAUTH,
|
||||
'qwen3-coder-plus',
|
||||
false,
|
||||
);
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('processVisionSwitchOutcome', () => {
|
||||
it('maps SwitchOnce to a one-time model override', () => {
|
||||
const vl = getDefaultVisionModel();
|
||||
const result = processVisionSwitchOutcome(VisionSwitchOutcome.SwitchOnce);
|
||||
expect(result).toEqual({ modelOverride: vl });
|
||||
});
|
||||
|
||||
it('maps SwitchSessionToVL to a persistent session model', () => {
|
||||
const vl = getDefaultVisionModel();
|
||||
const result = processVisionSwitchOutcome(
|
||||
VisionSwitchOutcome.SwitchSessionToVL,
|
||||
);
|
||||
expect(result).toEqual({ persistSessionModel: vl });
|
||||
});
|
||||
|
||||
it('maps DisallowWithGuidance to showGuidance', () => {
|
||||
const result = processVisionSwitchOutcome(
|
||||
VisionSwitchOutcome.DisallowWithGuidance,
|
||||
);
|
||||
expect(result).toEqual({ showGuidance: true });
|
||||
});
|
||||
});
|
||||
|
||||
describe('getVisionSwitchGuidanceMessage', () => {
|
||||
it('returns the expected guidance message', () => {
|
||||
const vl = getDefaultVisionModel();
|
||||
const expected =
|
||||
'To use images with your query, you can:\n' +
|
||||
`• Use /model set ${vl} to switch to a vision-capable model\n` +
|
||||
'• Or remove the image and provide a text description instead';
|
||||
expect(getVisionSwitchGuidanceMessage()).toBe(expected);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('useVisionAutoSwitch hook', () => {
|
||||
type AddItemFn = (
|
||||
item: { type: MessageType; text: string },
|
||||
ts: number,
|
||||
) => any;
|
||||
|
||||
const createMockConfig = (authType: AuthType, initialModel: string) => {
|
||||
let currentModel = initialModel;
|
||||
const mockConfig: Partial<Config> = {
|
||||
getModel: vi.fn(() => currentModel),
|
||||
setModel: vi.fn((m: string) => {
|
||||
currentModel = m;
|
||||
}),
|
||||
getContentGeneratorConfig: vi.fn(() => ({
|
||||
authType,
|
||||
model: currentModel,
|
||||
apiKey: 'test-key',
|
||||
vertexai: false,
|
||||
})),
|
||||
};
|
||||
return mockConfig as Config;
|
||||
};
|
||||
|
||||
let addItem: AddItemFn;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
addItem = vi.fn();
|
||||
});
|
||||
|
||||
it('returns shouldProceed=true immediately for continuations', async () => {
|
||||
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
|
||||
const { result } = renderHook(() =>
|
||||
useVisionAutoSwitch(config, addItem as any, true, vi.fn()),
|
||||
);
|
||||
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
let res: any;
|
||||
await act(async () => {
|
||||
res = await result.current.handleVisionSwitch(parts, Date.now(), true);
|
||||
});
|
||||
expect(res).toEqual({ shouldProceed: true });
|
||||
expect(addItem).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('does nothing when authType is not QWEN_OAUTH', async () => {
|
||||
const config = createMockConfig(AuthType.USE_GEMINI, 'qwen3-coder-plus');
|
||||
const onVisionSwitchRequired = vi.fn();
|
||||
const { result } = renderHook(() =>
|
||||
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
|
||||
);
|
||||
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
let res: any;
|
||||
await act(async () => {
|
||||
res = await result.current.handleVisionSwitch(parts, 123, false);
|
||||
});
|
||||
expect(res).toEqual({ shouldProceed: true });
|
||||
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('does nothing when there are no image parts', async () => {
|
||||
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
|
||||
const onVisionSwitchRequired = vi.fn();
|
||||
const { result } = renderHook(() =>
|
||||
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
|
||||
);
|
||||
|
||||
const parts: PartListUnion = [{ text: 'no images here' }];
|
||||
let res: any;
|
||||
await act(async () => {
|
||||
res = await result.current.handleVisionSwitch(parts, 456, false);
|
||||
});
|
||||
expect(res).toEqual({ shouldProceed: true });
|
||||
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('shows guidance and blocks when dialog returns showGuidance', async () => {
|
||||
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
|
||||
const onVisionSwitchRequired = vi
|
||||
.fn()
|
||||
.mockResolvedValue({ showGuidance: true });
|
||||
const { result } = renderHook(() =>
|
||||
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
|
||||
);
|
||||
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
|
||||
const userTs = 1010;
|
||||
let res: any;
|
||||
await act(async () => {
|
||||
res = await result.current.handleVisionSwitch(parts, userTs, false);
|
||||
});
|
||||
|
||||
expect(addItem).toHaveBeenCalledWith(
|
||||
{ type: MessageType.INFO, text: getVisionSwitchGuidanceMessage() },
|
||||
userTs,
|
||||
);
|
||||
expect(res).toEqual({ shouldProceed: false });
|
||||
expect(config.setModel).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('applies a one-time override and returns originalModel, then restores', async () => {
|
||||
const initialModel = 'qwen3-coder-plus';
|
||||
const config = createMockConfig(AuthType.QWEN_OAUTH, initialModel);
|
||||
const onVisionSwitchRequired = vi
|
||||
.fn()
|
||||
.mockResolvedValue({ modelOverride: 'qwen-vl-max-latest' });
|
||||
const { result } = renderHook(() =>
|
||||
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
|
||||
);
|
||||
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
|
||||
let res: any;
|
||||
await act(async () => {
|
||||
res = await result.current.handleVisionSwitch(parts, 2020, false);
|
||||
});
|
||||
|
||||
expect(res).toEqual({ shouldProceed: true, originalModel: initialModel });
|
||||
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
|
||||
|
||||
// Now restore
|
||||
act(() => {
|
||||
result.current.restoreOriginalModel();
|
||||
});
|
||||
expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
|
||||
});
|
||||
|
||||
it('persists session model when dialog requests persistence', async () => {
|
||||
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
|
||||
const onVisionSwitchRequired = vi
|
||||
.fn()
|
||||
.mockResolvedValue({ persistSessionModel: 'qwen-vl-max-latest' });
|
||||
const { result } = renderHook(() =>
|
||||
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
|
||||
);
|
||||
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
|
||||
let res: any;
|
||||
await act(async () => {
|
||||
res = await result.current.handleVisionSwitch(parts, 3030, false);
|
||||
});
|
||||
|
||||
expect(res).toEqual({ shouldProceed: true });
|
||||
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
|
||||
|
||||
// Restore should be a no-op since no one-time override was used
|
||||
act(() => {
|
||||
result.current.restoreOriginalModel();
|
||||
});
|
||||
// Last call should still be the persisted model set
|
||||
expect((config.setModel as any).mock.calls.pop()?.[0]).toBe(
|
||||
'qwen-vl-max-latest',
|
||||
);
|
||||
});
|
||||
|
||||
it('returns shouldProceed=true when dialog returns no special flags', async () => {
|
||||
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
|
||||
const onVisionSwitchRequired = vi.fn().mockResolvedValue({});
|
||||
const { result } = renderHook(() =>
|
||||
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
|
||||
);
|
||||
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
let res: any;
|
||||
await act(async () => {
|
||||
res = await result.current.handleVisionSwitch(parts, 4040, false);
|
||||
});
|
||||
expect(res).toEqual({ shouldProceed: true });
|
||||
expect(config.setModel).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('blocks when dialog throws or is cancelled', async () => {
|
||||
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
|
||||
const onVisionSwitchRequired = vi.fn().mockRejectedValue(new Error('x'));
|
||||
const { result } = renderHook(() =>
|
||||
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
|
||||
);
|
||||
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
let res: any;
|
||||
await act(async () => {
|
||||
res = await result.current.handleVisionSwitch(parts, 5050, false);
|
||||
});
|
||||
expect(res).toEqual({ shouldProceed: false });
|
||||
expect(config.setModel).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('does nothing when visionModelPreviewEnabled is false', async () => {
|
||||
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
|
||||
const onVisionSwitchRequired = vi.fn();
|
||||
const { result } = renderHook(() =>
|
||||
useVisionAutoSwitch(
|
||||
config,
|
||||
addItem as any,
|
||||
false,
|
||||
onVisionSwitchRequired,
|
||||
),
|
||||
);
|
||||
|
||||
const parts: PartListUnion = [
|
||||
{ inlineData: { mimeType: 'image/png', data: '...' } },
|
||||
];
|
||||
let res: any;
|
||||
await act(async () => {
|
||||
res = await result.current.handleVisionSwitch(parts, 6060, false);
|
||||
});
|
||||
expect(res).toEqual({ shouldProceed: true });
|
||||
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
304
packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
Normal file
304
packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
Normal file
@@ -0,0 +1,304 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { type PartListUnion, type Part } from '@google/genai';
|
||||
import { AuthType, type Config } from '@qwen-code/qwen-code-core';
|
||||
import { useCallback, useRef } from 'react';
|
||||
import { VisionSwitchOutcome } from '../components/ModelSwitchDialog.js';
|
||||
import {
|
||||
getDefaultVisionModel,
|
||||
isVisionModel,
|
||||
} from '../models/availableModels.js';
|
||||
import { MessageType } from '../types.js';
|
||||
import type { UseHistoryManagerReturn } from './useHistoryManager.js';
|
||||
import {
|
||||
isSupportedImageMimeType,
|
||||
getUnsupportedImageFormatWarning,
|
||||
} from '@qwen-code/qwen-code-core';
|
||||
|
||||
/**
|
||||
* Checks if a PartListUnion contains image parts
|
||||
*/
|
||||
function hasImageParts(parts: PartListUnion): boolean {
|
||||
if (typeof parts === 'string') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Array.isArray(parts)) {
|
||||
return parts.some((part) => {
|
||||
// Skip string parts
|
||||
if (typeof part === 'string') return false;
|
||||
return isImagePart(part);
|
||||
});
|
||||
}
|
||||
|
||||
// If it's a single Part (not a string), check if it's an image
|
||||
if (typeof parts === 'object') {
|
||||
return isImagePart(parts);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a single Part is an image part
|
||||
*/
|
||||
function isImagePart(part: Part): boolean {
|
||||
// Check for inlineData with image mime type
|
||||
if ('inlineData' in part && part.inlineData?.mimeType?.startsWith('image/')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for fileData with image mime type
|
||||
if ('fileData' in part && part.fileData?.mimeType?.startsWith('image/')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if image parts have supported formats and returns unsupported ones
|
||||
*/
|
||||
function checkImageFormatsSupport(parts: PartListUnion): {
|
||||
hasImages: boolean;
|
||||
hasUnsupportedFormats: boolean;
|
||||
unsupportedMimeTypes: string[];
|
||||
} {
|
||||
const unsupportedMimeTypes: string[] = [];
|
||||
let hasImages = false;
|
||||
|
||||
if (typeof parts === 'string') {
|
||||
return {
|
||||
hasImages: false,
|
||||
hasUnsupportedFormats: false,
|
||||
unsupportedMimeTypes: [],
|
||||
};
|
||||
}
|
||||
|
||||
const partsArray = Array.isArray(parts) ? parts : [parts];
|
||||
|
||||
for (const part of partsArray) {
|
||||
if (typeof part === 'string') continue;
|
||||
|
||||
let mimeType: string | undefined;
|
||||
|
||||
// Check inlineData
|
||||
if (
|
||||
'inlineData' in part &&
|
||||
part.inlineData?.mimeType?.startsWith('image/')
|
||||
) {
|
||||
hasImages = true;
|
||||
mimeType = part.inlineData.mimeType;
|
||||
}
|
||||
|
||||
// Check fileData
|
||||
if ('fileData' in part && part.fileData?.mimeType?.startsWith('image/')) {
|
||||
hasImages = true;
|
||||
mimeType = part.fileData.mimeType;
|
||||
}
|
||||
|
||||
// Check if the mime type is supported
|
||||
if (mimeType && !isSupportedImageMimeType(mimeType)) {
|
||||
unsupportedMimeTypes.push(mimeType);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
hasImages,
|
||||
hasUnsupportedFormats: unsupportedMimeTypes.length > 0,
|
||||
unsupportedMimeTypes,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if we should offer vision switch for the given parts, auth type, and current model
|
||||
*/
|
||||
export function shouldOfferVisionSwitch(
|
||||
parts: PartListUnion,
|
||||
authType: AuthType,
|
||||
currentModel: string,
|
||||
visionModelPreviewEnabled: boolean = false,
|
||||
): boolean {
|
||||
// Only trigger for qwen-oauth
|
||||
if (authType !== AuthType.QWEN_OAUTH) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If vision model preview is disabled, never offer vision switch
|
||||
if (!visionModelPreviewEnabled) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If current model is already a vision model, no need to switch
|
||||
if (isVisionModel(currentModel)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the current message contains image parts
|
||||
return hasImageParts(parts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for vision switch result
|
||||
*/
|
||||
export interface VisionSwitchResult {
|
||||
modelOverride?: string;
|
||||
persistSessionModel?: string;
|
||||
showGuidance?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the vision switch outcome and returns the appropriate result
|
||||
*/
|
||||
export function processVisionSwitchOutcome(
|
||||
outcome: VisionSwitchOutcome,
|
||||
): VisionSwitchResult {
|
||||
const vlModelId = getDefaultVisionModel();
|
||||
|
||||
switch (outcome) {
|
||||
case VisionSwitchOutcome.SwitchOnce:
|
||||
return { modelOverride: vlModelId };
|
||||
|
||||
case VisionSwitchOutcome.SwitchSessionToVL:
|
||||
return { persistSessionModel: vlModelId };
|
||||
|
||||
case VisionSwitchOutcome.DisallowWithGuidance:
|
||||
return { showGuidance: true };
|
||||
|
||||
default:
|
||||
return { showGuidance: true };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the guidance message for when vision switch is disallowed
|
||||
*/
|
||||
export function getVisionSwitchGuidanceMessage(): string {
|
||||
const vlModelId = getDefaultVisionModel();
|
||||
return `To use images with your query, you can:
|
||||
• Use /model set ${vlModelId} to switch to a vision-capable model
|
||||
• Or remove the image and provide a text description instead`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for vision switch handling result
|
||||
*/
|
||||
export interface VisionSwitchHandlingResult {
|
||||
shouldProceed: boolean;
|
||||
originalModel?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom hook for handling vision model auto-switching
|
||||
*/
|
||||
export function useVisionAutoSwitch(
|
||||
config: Config,
|
||||
addItem: UseHistoryManagerReturn['addItem'],
|
||||
visionModelPreviewEnabled: boolean = false,
|
||||
onVisionSwitchRequired?: (query: PartListUnion) => Promise<{
|
||||
modelOverride?: string;
|
||||
persistSessionModel?: string;
|
||||
showGuidance?: boolean;
|
||||
}>,
|
||||
) {
|
||||
const originalModelRef = useRef<string | null>(null);
|
||||
|
||||
const handleVisionSwitch = useCallback(
|
||||
async (
|
||||
query: PartListUnion,
|
||||
userMessageTimestamp: number,
|
||||
isContinuation: boolean,
|
||||
): Promise<VisionSwitchHandlingResult> => {
|
||||
// Skip vision switch handling for continuations or if no handler provided
|
||||
if (isContinuation || !onVisionSwitchRequired) {
|
||||
return { shouldProceed: true };
|
||||
}
|
||||
|
||||
const contentGeneratorConfig = config.getContentGeneratorConfig();
|
||||
|
||||
// Only handle qwen-oauth auth type
|
||||
if (contentGeneratorConfig?.authType !== AuthType.QWEN_OAUTH) {
|
||||
return { shouldProceed: true };
|
||||
}
|
||||
|
||||
// Check image format support first
|
||||
const formatCheck = checkImageFormatsSupport(query);
|
||||
|
||||
// If there are unsupported image formats, show warning
|
||||
if (formatCheck.hasUnsupportedFormats) {
|
||||
addItem(
|
||||
{
|
||||
type: MessageType.INFO,
|
||||
text: getUnsupportedImageFormatWarning(),
|
||||
},
|
||||
userMessageTimestamp,
|
||||
);
|
||||
// Continue processing but with warning shown
|
||||
}
|
||||
|
||||
// Check if vision switch is needed
|
||||
if (
|
||||
!shouldOfferVisionSwitch(
|
||||
query,
|
||||
contentGeneratorConfig.authType,
|
||||
config.getModel(),
|
||||
visionModelPreviewEnabled,
|
||||
)
|
||||
) {
|
||||
return { shouldProceed: true };
|
||||
}
|
||||
|
||||
try {
|
||||
const visionSwitchResult = await onVisionSwitchRequired(query);
|
||||
|
||||
if (visionSwitchResult.showGuidance) {
|
||||
// Show guidance and don't proceed with the request
|
||||
addItem(
|
||||
{
|
||||
type: MessageType.INFO,
|
||||
text: getVisionSwitchGuidanceMessage(),
|
||||
},
|
||||
userMessageTimestamp,
|
||||
);
|
||||
return { shouldProceed: false };
|
||||
}
|
||||
|
||||
if (visionSwitchResult.modelOverride) {
|
||||
// One-time model override
|
||||
originalModelRef.current = config.getModel();
|
||||
config.setModel(visionSwitchResult.modelOverride);
|
||||
return {
|
||||
shouldProceed: true,
|
||||
originalModel: originalModelRef.current,
|
||||
};
|
||||
} else if (visionSwitchResult.persistSessionModel) {
|
||||
// Persistent session model change
|
||||
config.setModel(visionSwitchResult.persistSessionModel);
|
||||
return { shouldProceed: true };
|
||||
}
|
||||
|
||||
return { shouldProceed: true };
|
||||
} catch (_error) {
|
||||
// If vision switch dialog was cancelled or errored, don't proceed
|
||||
return { shouldProceed: false };
|
||||
}
|
||||
},
|
||||
[config, addItem, visionModelPreviewEnabled, onVisionSwitchRequired],
|
||||
);
|
||||
|
||||
const restoreOriginalModel = useCallback(() => {
|
||||
if (originalModelRef.current) {
|
||||
config.setModel(originalModelRef.current);
|
||||
originalModelRef.current = null;
|
||||
}
|
||||
}, [config]);
|
||||
|
||||
return {
|
||||
handleVisionSwitch,
|
||||
restoreOriginalModel,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user