feat: add yolo mode support to auto vision model switch (#652)

* feat: add yolo mode support to auto vision model switch

* feat: add cli args & env variables for switch behavoir

* fix: use dedicated model names and settings

* docs: add vision model instructions

* fix: failed test case

* fix: setModel failure
This commit is contained in:
Mingholy
2025-09-24 10:21:09 +08:00
committed by GitHub
parent 5ecb4a2430
commit 48d8587bf9
26 changed files with 1133 additions and 122 deletions

View File

@@ -566,7 +566,9 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
}
// Switch model for future use but return false to stop current retry
config.setModel(fallbackModel);
config.setModel(fallbackModel).catch((error) => {
console.error('Failed to switch to fallback model:', error);
});
config.setFallbackMode(true);
logFlashFallback(
config,
@@ -650,17 +652,28 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
}, []);
const handleModelSelect = useCallback(
(modelId: string) => {
config.setModel(modelId);
setCurrentModel(modelId);
setIsModelSelectionDialogOpen(false);
addItem(
{
type: MessageType.INFO,
text: `Switched model to \`${modelId}\` for this session.`,
},
Date.now(),
);
async (modelId: string) => {
try {
await config.setModel(modelId);
setCurrentModel(modelId);
setIsModelSelectionDialogOpen(false);
addItem(
{
type: MessageType.INFO,
text: `Switched model to \`${modelId}\` for this session.`,
},
Date.now(),
);
} catch (error) {
console.error('Failed to switch model:', error);
addItem(
{
type: MessageType.ERROR,
text: `Failed to switch to model \`${modelId}\`. Please try again.`,
},
Date.now(),
);
}
},
[config, setCurrentModel, addItem],
);
@@ -670,7 +683,7 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
if (!contentGeneratorConfig) return [];
const visionModelPreviewEnabled =
settings.merged.experimental?.visionModelPreview ?? false;
settings.merged.experimental?.visionModelPreview ?? true;
switch (contentGeneratorConfig.authType) {
case AuthType.QWEN_OAUTH:
@@ -759,7 +772,7 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
setModelSwitchedFromQuotaError,
refreshStatic,
() => cancelHandlerRef.current(),
settings.merged.experimental?.visionModelPreview ?? false,
settings.merged.experimental?.visionModelPreview ?? true,
handleVisionSwitchRequired,
);

View File

@@ -46,8 +46,8 @@ describe('ModelSwitchDialog', () => {
value: VisionSwitchOutcome.SwitchSessionToVL,
},
{
label: 'Do not switch, show guidance',
value: VisionSwitchOutcome.DisallowWithGuidance,
label: 'Continue with current model',
value: VisionSwitchOutcome.ContinueWithCurrentModel,
},
];
@@ -81,18 +81,18 @@ describe('ModelSwitchDialog', () => {
);
});
it('should call onSelect with DisallowWithGuidance when third option is selected', () => {
it('should call onSelect with ContinueWithCurrentModel when third option is selected', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);
expect(mockOnSelect).toHaveBeenCalledWith(
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
});
it('should setup escape key handler to call onSelect with DisallowWithGuidance', () => {
it('should setup escape key handler to call onSelect with ContinueWithCurrentModel', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
expect(mockUseKeypress).toHaveBeenCalledWith(expect.any(Function), {
@@ -104,7 +104,7 @@ describe('ModelSwitchDialog', () => {
keypressHandler({ name: 'escape' });
expect(mockOnSelect).toHaveBeenCalledWith(
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
});
@@ -126,13 +126,9 @@ describe('ModelSwitchDialog', () => {
describe('VisionSwitchOutcome enum', () => {
it('should have correct enum values', () => {
expect(VisionSwitchOutcome.SwitchOnce).toBe('switch_once');
expect(VisionSwitchOutcome.SwitchSessionToVL).toBe(
'switch_session_to_vl',
);
expect(VisionSwitchOutcome.DisallowWithGuidance).toBe(
'disallow_with_guidance',
);
expect(VisionSwitchOutcome.SwitchOnce).toBe('once');
expect(VisionSwitchOutcome.SwitchSessionToVL).toBe('session');
expect(VisionSwitchOutcome.ContinueWithCurrentModel).toBe('persist');
});
});
@@ -144,7 +140,7 @@ describe('ModelSwitchDialog', () => {
// Call multiple times
onSelectCallback(VisionSwitchOutcome.SwitchOnce);
onSelectCallback(VisionSwitchOutcome.SwitchSessionToVL);
onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);
expect(mockOnSelect).toHaveBeenCalledTimes(3);
expect(mockOnSelect).toHaveBeenNthCalledWith(
@@ -157,7 +153,7 @@ describe('ModelSwitchDialog', () => {
);
expect(mockOnSelect).toHaveBeenNthCalledWith(
3,
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
});
@@ -179,7 +175,7 @@ describe('ModelSwitchDialog', () => {
expect(mockOnSelect).toHaveBeenCalledTimes(2);
expect(mockOnSelect).toHaveBeenCalledWith(
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
});
});

View File

@@ -14,9 +14,9 @@ import {
import { useKeypress } from '../hooks/useKeypress.js';
export enum VisionSwitchOutcome {
SwitchOnce = 'switch_once',
SwitchSessionToVL = 'switch_session_to_vl',
DisallowWithGuidance = 'disallow_with_guidance',
SwitchOnce = 'once',
SwitchSessionToVL = 'session',
ContinueWithCurrentModel = 'persist',
}
export interface ModelSwitchDialogProps {
@@ -29,7 +29,7 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
useKeypress(
(key) => {
if (key.name === 'escape') {
onSelect(VisionSwitchOutcome.DisallowWithGuidance);
onSelect(VisionSwitchOutcome.ContinueWithCurrentModel);
}
},
{ isActive: true },
@@ -45,8 +45,8 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
value: VisionSwitchOutcome.SwitchSessionToVL,
},
{
label: 'Do not switch, show guidance',
value: VisionSwitchOutcome.DisallowWithGuidance,
label: 'Continue with current model',
value: VisionSwitchOutcome.ContinueWithCurrentModel,
},
];

View File

@@ -60,7 +60,9 @@ const mockParseAndFormatApiError = vi.hoisted(() => vi.fn());
const mockHandleVisionSwitch = vi.hoisted(() =>
vi.fn().mockResolvedValue({ shouldProceed: true }),
);
const mockRestoreOriginalModel = vi.hoisted(() => vi.fn());
const mockRestoreOriginalModel = vi.hoisted(() =>
vi.fn().mockResolvedValue(undefined),
);
vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => {
const actualCoreModule = (await importOriginal()) as any;
@@ -301,6 +303,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
);
},
{
@@ -462,6 +466,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -541,6 +547,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -649,6 +657,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -758,6 +768,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -887,6 +899,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
cancelSubmitSpy,
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1198,6 +1212,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1251,6 +1267,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1301,6 +1319,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1349,6 +1369,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1398,6 +1420,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1487,6 +1511,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1537,6 +1563,8 @@ describe('useGeminiStream', () => {
vi.fn(), // setModelSwitched
vi.fn(), // onEditorClose
vi.fn(), // onCancelSubmit
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1602,6 +1630,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1680,6 +1710,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1734,6 +1766,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1943,6 +1977,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -1975,6 +2011,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -2028,6 +2066,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);
@@ -2065,6 +2105,8 @@ describe('useGeminiStream', () => {
() => {},
() => {},
() => {},
false, // visionModelPreviewEnabled
undefined, // onVisionSwitchRequired (optional)
),
);

View File

@@ -89,7 +89,7 @@ export const useGeminiStream = (
setModelSwitchedFromQuotaError: React.Dispatch<React.SetStateAction<boolean>>,
onEditorClose: () => void,
onCancelSubmit: () => void,
visionModelPreviewEnabled: boolean = false,
visionModelPreviewEnabled: boolean,
onVisionSwitchRequired?: (query: PartListUnion) => Promise<{
modelOverride?: string;
persistSessionModel?: string;
@@ -765,7 +765,9 @@ export const useGeminiStream = (
if (processingStatus === StreamProcessingStatus.UserCancelled) {
// Restore original model if it was temporarily overridden
restoreOriginalModel();
restoreOriginalModel().catch((error) => {
console.error('Failed to restore original model:', error);
});
isSubmittingQueryRef.current = false;
return;
}
@@ -780,10 +782,14 @@ export const useGeminiStream = (
}
// Restore original model if it was temporarily overridden
restoreOriginalModel();
restoreOriginalModel().catch((error) => {
console.error('Failed to restore original model:', error);
});
} catch (error: unknown) {
// Restore original model if it was temporarily overridden
restoreOriginalModel();
restoreOriginalModel().catch((error) => {
console.error('Failed to restore original model:', error);
});
if (error instanceof UnauthorizedError) {
onAuthError();

View File

@@ -8,7 +8,7 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { renderHook, act } from '@testing-library/react';
import type { Part, PartListUnion } from '@google/genai';
import { AuthType, type Config } from '@qwen-code/qwen-code-core';
import { AuthType, type Config, ApprovalMode } from '@qwen-code/qwen-code-core';
import {
shouldOfferVisionSwitch,
processVisionSwitchOutcome,
@@ -41,7 +41,7 @@ describe('useVisionAutoSwitch helpers', () => {
const result = shouldOfferVisionSwitch(
parts,
AuthType.QWEN_OAUTH,
'qwen-vl-max-latest',
'vision-model',
true,
);
expect(result).toBe(false);
@@ -108,6 +108,56 @@ describe('useVisionAutoSwitch helpers', () => {
);
expect(result).toBe(false);
});
it('returns true when image parts exist in YOLO mode context', () => {
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
const result = shouldOfferVisionSwitch(
parts,
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
true,
);
expect(result).toBe(true);
});
it('returns false when no image parts exist in YOLO mode context', () => {
const parts: PartListUnion = [{ text: 'just text' }];
const result = shouldOfferVisionSwitch(
parts,
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
true,
);
expect(result).toBe(false);
});
it('returns false when already using vision model in YOLO mode context', () => {
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
const result = shouldOfferVisionSwitch(
parts,
AuthType.QWEN_OAUTH,
'vision-model',
true,
);
expect(result).toBe(false);
});
it('returns false when authType is not QWEN_OAUTH in YOLO mode context', () => {
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
const result = shouldOfferVisionSwitch(
parts,
AuthType.USE_GEMINI,
'qwen3-coder-plus',
true,
);
expect(result).toBe(false);
});
});
describe('processVisionSwitchOutcome', () => {
@@ -125,11 +175,11 @@ describe('useVisionAutoSwitch helpers', () => {
expect(result).toEqual({ persistSessionModel: vl });
});
it('maps DisallowWithGuidance to showGuidance', () => {
it('maps ContinueWithCurrentModel to empty result', () => {
const result = processVisionSwitchOutcome(
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
expect(result).toEqual({ showGuidance: true });
expect(result).toEqual({});
});
});
@@ -151,13 +201,20 @@ describe('useVisionAutoSwitch hook', () => {
ts: number,
) => any;
const createMockConfig = (authType: AuthType, initialModel: string) => {
const createMockConfig = (
authType: AuthType,
initialModel: string,
approvalMode: ApprovalMode = ApprovalMode.DEFAULT,
vlmSwitchMode?: string,
) => {
let currentModel = initialModel;
const mockConfig: Partial<Config> = {
getModel: vi.fn(() => currentModel),
setModel: vi.fn((m: string) => {
setModel: vi.fn(async (m: string) => {
currentModel = m;
}),
getApprovalMode: vi.fn(() => approvalMode),
getVlmSwitchMode: vi.fn(() => vlmSwitchMode),
getContentGeneratorConfig: vi.fn(() => ({
authType,
model: currentModel,
@@ -226,11 +283,9 @@ describe('useVisionAutoSwitch hook', () => {
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('shows guidance and blocks when dialog returns showGuidance', async () => {
it('continues with current model when dialog returns empty result', async () => {
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
const onVisionSwitchRequired = vi
.fn()
.mockResolvedValue({ showGuidance: true });
const onVisionSwitchRequired = vi.fn().mockResolvedValue({}); // Empty result for ContinueWithCurrentModel
const { result } = renderHook(() =>
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
);
@@ -245,11 +300,12 @@ describe('useVisionAutoSwitch hook', () => {
res = await result.current.handleVisionSwitch(parts, userTs, false);
});
expect(addItem).toHaveBeenCalledWith(
// Should not add any guidance message
expect(addItem).not.toHaveBeenCalledWith(
{ type: MessageType.INFO, text: getVisionSwitchGuidanceMessage() },
userTs,
);
expect(res).toEqual({ shouldProceed: false });
expect(res).toEqual({ shouldProceed: true });
expect(config.setModel).not.toHaveBeenCalled();
});
@@ -258,7 +314,7 @@ describe('useVisionAutoSwitch hook', () => {
const config = createMockConfig(AuthType.QWEN_OAUTH, initialModel);
const onVisionSwitchRequired = vi
.fn()
.mockResolvedValue({ modelOverride: 'qwen-vl-max-latest' });
.mockResolvedValue({ modelOverride: 'coder-model' });
const { result } = renderHook(() =>
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
);
@@ -273,20 +329,26 @@ describe('useVisionAutoSwitch hook', () => {
});
expect(res).toEqual({ shouldProceed: true, originalModel: initialModel });
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
expect(config.setModel).toHaveBeenCalledWith('coder-model', {
reason: 'vision_auto_switch',
context: 'User-prompted vision switch (one-time override)',
});
// Now restore
act(() => {
result.current.restoreOriginalModel();
await act(async () => {
await result.current.restoreOriginalModel();
});
expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
reason: 'vision_auto_switch',
context: 'Restoring original model after vision switch',
});
expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
});
it('persists session model when dialog requests persistence', async () => {
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
const onVisionSwitchRequired = vi
.fn()
.mockResolvedValue({ persistSessionModel: 'qwen-vl-max-latest' });
.mockResolvedValue({ persistSessionModel: 'coder-model' });
const { result } = renderHook(() =>
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
);
@@ -301,16 +363,17 @@ describe('useVisionAutoSwitch hook', () => {
});
expect(res).toEqual({ shouldProceed: true });
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
expect(config.setModel).toHaveBeenCalledWith('coder-model', {
reason: 'vision_auto_switch',
context: 'User-prompted vision switch (session persistent)',
});
// Restore should be a no-op since no one-time override was used
act(() => {
result.current.restoreOriginalModel();
await act(async () => {
await result.current.restoreOriginalModel();
});
// Last call should still be the persisted model set
expect((config.setModel as any).mock.calls.pop()?.[0]).toBe(
'qwen-vl-max-latest',
);
expect((config.setModel as any).mock.calls.pop()?.[0]).toBe('coder-model');
});
it('returns shouldProceed=true when dialog returns no special flags', async () => {
@@ -371,4 +434,420 @@ describe('useVisionAutoSwitch hook', () => {
expect(res).toEqual({ shouldProceed: true });
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
describe('YOLO mode behavior', () => {
it('automatically switches to vision model in YOLO mode without showing dialog', async () => {
const initialModel = 'qwen3-coder-plus';
const config = createMockConfig(
AuthType.QWEN_OAUTH,
initialModel,
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn(); // Should not be called in YOLO mode
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 7070, false);
});
// Should automatically switch without calling the dialog
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
expect(res).toEqual({
shouldProceed: true,
originalModel: initialModel,
});
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
});
it('does not switch in YOLO mode when no images are present', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [{ text: 'no images here' }];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 8080, false);
});
expect(res).toEqual({ shouldProceed: true });
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
expect(config.setModel).not.toHaveBeenCalled();
});
it('does not switch in YOLO mode when already using vision model', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'vision-model',
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 9090, false);
});
expect(res).toEqual({ shouldProceed: true });
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
expect(config.setModel).not.toHaveBeenCalled();
});
it('restores original model after YOLO mode auto-switch', async () => {
const initialModel = 'qwen3-coder-plus';
const config = createMockConfig(
AuthType.QWEN_OAUTH,
initialModel,
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
// First, trigger the auto-switch
await act(async () => {
await result.current.handleVisionSwitch(parts, 10100, false);
});
// Verify model was switched
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
// Now restore the original model
await act(async () => {
await result.current.restoreOriginalModel();
});
// Verify model was restored
expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
reason: 'vision_auto_switch',
context: 'Restoring original model after vision switch',
});
});
it('does not switch in YOLO mode when authType is not QWEN_OAUTH', async () => {
const config = createMockConfig(
AuthType.USE_GEMINI,
'qwen3-coder-plus',
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 11110, false);
});
expect(res).toEqual({ shouldProceed: true });
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
expect(config.setModel).not.toHaveBeenCalled();
});
it('does not switch in YOLO mode when visionModelPreviewEnabled is false', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
false,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 12120, false);
});
expect(res).toEqual({ shouldProceed: true });
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
expect(config.setModel).not.toHaveBeenCalled();
});
it('handles multiple image formats in YOLO mode', async () => {
const initialModel = 'qwen3-coder-plus';
const config = createMockConfig(
AuthType.QWEN_OAUTH,
initialModel,
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ text: 'Here are some images:' },
{ inlineData: { mimeType: 'image/jpeg', data: '...' } },
{ fileData: { mimeType: 'image/png', fileUri: 'file://image.png' } },
{ text: 'Please analyze them.' },
];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 13130, false);
});
expect(res).toEqual({
shouldProceed: true,
originalModel: initialModel,
});
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
});
describe('VLM switch mode default behavior', () => {
it('should automatically switch once when vlmSwitchMode is "once"', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'once',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBe('qwen3-coder-plus');
expect(config.setModel).toHaveBeenCalledWith('vision-model', {
reason: 'vision_auto_switch',
context: 'Default VLM switch mode: once (one-time override)',
});
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('should switch session when vlmSwitchMode is "session"', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'session',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBeUndefined(); // No original model for session switch
expect(config.setModel).toHaveBeenCalledWith('vision-model', {
reason: 'vision_auto_switch',
context: 'Default VLM switch mode: session (session persistent)',
});
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('should continue with current model when vlmSwitchMode is "persist"', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'persist',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBeUndefined();
expect(config.setModel).not.toHaveBeenCalled();
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('should fall back to user prompt when vlmSwitchMode is not set', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
undefined, // No default mode
);
const onVisionSwitchRequired = vi
.fn()
.mockResolvedValue({ modelOverride: 'vision-model' });
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(onVisionSwitchRequired).toHaveBeenCalledWith(parts);
});
it('should fall back to persist behavior when vlmSwitchMode has invalid value', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'invalid-value',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBeUndefined();
// For invalid values, it should continue with current model (persist behavior)
expect(config.setModel).not.toHaveBeenCalled();
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
});
});

View File

@@ -5,7 +5,7 @@
*/
import { type PartListUnion, type Part } from '@google/genai';
import { AuthType, type Config } from '@qwen-code/qwen-code-core';
import { AuthType, type Config, ApprovalMode } from '@qwen-code/qwen-code-core';
import { useCallback, useRef } from 'react';
import { VisionSwitchOutcome } from '../components/ModelSwitchDialog.js';
import {
@@ -121,7 +121,7 @@ export function shouldOfferVisionSwitch(
parts: PartListUnion,
authType: AuthType,
currentModel: string,
visionModelPreviewEnabled: boolean = false,
visionModelPreviewEnabled: boolean = true,
): boolean {
// Only trigger for qwen-oauth
if (authType !== AuthType.QWEN_OAUTH) {
@@ -166,11 +166,11 @@ export function processVisionSwitchOutcome(
case VisionSwitchOutcome.SwitchSessionToVL:
return { persistSessionModel: vlModelId };
case VisionSwitchOutcome.DisallowWithGuidance:
return { showGuidance: true };
case VisionSwitchOutcome.ContinueWithCurrentModel:
return {}; // Continue with current model, no changes needed
default:
return { showGuidance: true };
return {}; // Default to continuing with current model
}
}
@@ -198,7 +198,7 @@ export interface VisionSwitchHandlingResult {
export function useVisionAutoSwitch(
config: Config,
addItem: UseHistoryManagerReturn['addItem'],
visionModelPreviewEnabled: boolean = false,
visionModelPreviewEnabled: boolean = true,
onVisionSwitchRequired?: (query: PartListUnion) => Promise<{
modelOverride?: string;
persistSessionModel?: string;
@@ -252,35 +252,91 @@ export function useVisionAutoSwitch(
return { shouldProceed: true };
}
try {
const visionSwitchResult = await onVisionSwitchRequired(query);
// In YOLO mode, automatically switch to vision model without user interaction
if (config.getApprovalMode() === ApprovalMode.YOLO) {
const vlModelId = getDefaultVisionModel();
originalModelRef.current = config.getModel();
await config.setModel(vlModelId, {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
return {
shouldProceed: true,
originalModel: originalModelRef.current,
};
}
if (visionSwitchResult.showGuidance) {
// Show guidance and don't proceed with the request
addItem(
{
type: MessageType.INFO,
text: getVisionSwitchGuidanceMessage(),
},
userMessageTimestamp,
);
return { shouldProceed: false };
// Check if there's a default VLM switch mode configured
const defaultVlmSwitchMode = config.getVlmSwitchMode();
if (defaultVlmSwitchMode) {
// Convert string value to VisionSwitchOutcome enum
let outcome: VisionSwitchOutcome;
switch (defaultVlmSwitchMode) {
case 'once':
outcome = VisionSwitchOutcome.SwitchOnce;
break;
case 'session':
outcome = VisionSwitchOutcome.SwitchSessionToVL;
break;
case 'persist':
outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
break;
default:
// Invalid value, fall back to prompting user
outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
}
// Process the default outcome
const visionSwitchResult = processVisionSwitchOutcome(outcome);
if (visionSwitchResult.modelOverride) {
// One-time model override
originalModelRef.current = config.getModel();
config.setModel(visionSwitchResult.modelOverride);
await config.setModel(visionSwitchResult.modelOverride, {
reason: 'vision_auto_switch',
context: `Default VLM switch mode: ${defaultVlmSwitchMode} (one-time override)`,
});
return {
shouldProceed: true,
originalModel: originalModelRef.current,
};
} else if (visionSwitchResult.persistSessionModel) {
// Persistent session model change
config.setModel(visionSwitchResult.persistSessionModel);
await config.setModel(visionSwitchResult.persistSessionModel, {
reason: 'vision_auto_switch',
context: `Default VLM switch mode: ${defaultVlmSwitchMode} (session persistent)`,
});
return { shouldProceed: true };
}
// For ContinueWithCurrentModel or any other case, proceed with current model
return { shouldProceed: true };
}
try {
const visionSwitchResult = await onVisionSwitchRequired(query);
if (visionSwitchResult.modelOverride) {
// One-time model override
originalModelRef.current = config.getModel();
await config.setModel(visionSwitchResult.modelOverride, {
reason: 'vision_auto_switch',
context: 'User-prompted vision switch (one-time override)',
});
return {
shouldProceed: true,
originalModel: originalModelRef.current,
};
} else if (visionSwitchResult.persistSessionModel) {
// Persistent session model change
await config.setModel(visionSwitchResult.persistSessionModel, {
reason: 'vision_auto_switch',
context: 'User-prompted vision switch (session persistent)',
});
return { shouldProceed: true };
}
// For ContinueWithCurrentModel or any other case, proceed with current model
return { shouldProceed: true };
} catch (_error) {
// If vision switch dialog was cancelled or errored, don't proceed
@@ -290,9 +346,12 @@ export function useVisionAutoSwitch(
[config, addItem, visionModelPreviewEnabled, onVisionSwitchRequired],
);
const restoreOriginalModel = useCallback(() => {
const restoreOriginalModel = useCallback(async () => {
if (originalModelRef.current) {
config.setModel(originalModelRef.current);
await config.setModel(originalModelRef.current, {
reason: 'vision_auto_switch',
context: 'Restoring original model after vision switch',
});
originalModelRef.current = null;
}
}, [config]);

View File

@@ -10,9 +10,12 @@ export type AvailableModel = {
isVision?: boolean;
};
export const MAINLINE_VLM = 'vision-model';
export const MAINLINE_CODER = 'coder-model';
export const AVAILABLE_MODELS_QWEN: AvailableModel[] = [
{ id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' },
{ id: 'qwen-vl-max-latest', label: 'qwen-vl-max', isVision: true },
{ id: MAINLINE_CODER, label: MAINLINE_CODER },
{ id: MAINLINE_VLM, label: MAINLINE_VLM, isVision: true },
];
/**
@@ -42,7 +45,7 @@ export function getOpenAIAvailableModelFromEnv(): AvailableModel | null {
* until our coding model supports multimodal.
*/
export function getDefaultVisionModel(): string {
return 'qwen-vl-max-latest';
return MAINLINE_VLM;
}
export function isVisionModel(modelId: string): boolean {