feat: add yolo mode support to auto vision model switch (#652)

* feat: add yolo mode support to auto vision model switch

* feat: add cli args & env variables for switch behavoir

* fix: use dedicated model names and settings

* docs: add vision model instructions

* fix: failed test case

* fix: setModel failure
This commit is contained in:
Mingholy
2025-09-24 10:21:09 +08:00
committed by GitHub
parent 5ecb4a2430
commit 48d8587bf9
26 changed files with 1133 additions and 122 deletions

View File

@@ -8,7 +8,7 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { renderHook, act } from '@testing-library/react';
import type { Part, PartListUnion } from '@google/genai';
import { AuthType, type Config } from '@qwen-code/qwen-code-core';
import { AuthType, type Config, ApprovalMode } from '@qwen-code/qwen-code-core';
import {
shouldOfferVisionSwitch,
processVisionSwitchOutcome,
@@ -41,7 +41,7 @@ describe('useVisionAutoSwitch helpers', () => {
const result = shouldOfferVisionSwitch(
parts,
AuthType.QWEN_OAUTH,
'qwen-vl-max-latest',
'vision-model',
true,
);
expect(result).toBe(false);
@@ -108,6 +108,56 @@ describe('useVisionAutoSwitch helpers', () => {
);
expect(result).toBe(false);
});
it('returns true when image parts exist in YOLO mode context', () => {
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
const result = shouldOfferVisionSwitch(
parts,
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
true,
);
expect(result).toBe(true);
});
it('returns false when no image parts exist in YOLO mode context', () => {
const parts: PartListUnion = [{ text: 'just text' }];
const result = shouldOfferVisionSwitch(
parts,
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
true,
);
expect(result).toBe(false);
});
it('returns false when already using vision model in YOLO mode context', () => {
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
const result = shouldOfferVisionSwitch(
parts,
AuthType.QWEN_OAUTH,
'vision-model',
true,
);
expect(result).toBe(false);
});
it('returns false when authType is not QWEN_OAUTH in YOLO mode context', () => {
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
const result = shouldOfferVisionSwitch(
parts,
AuthType.USE_GEMINI,
'qwen3-coder-plus',
true,
);
expect(result).toBe(false);
});
});
describe('processVisionSwitchOutcome', () => {
@@ -125,11 +175,11 @@ describe('useVisionAutoSwitch helpers', () => {
expect(result).toEqual({ persistSessionModel: vl });
});
it('maps DisallowWithGuidance to showGuidance', () => {
it('maps ContinueWithCurrentModel to empty result', () => {
const result = processVisionSwitchOutcome(
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
expect(result).toEqual({ showGuidance: true });
expect(result).toEqual({});
});
});
@@ -151,13 +201,20 @@ describe('useVisionAutoSwitch hook', () => {
ts: number,
) => any;
const createMockConfig = (authType: AuthType, initialModel: string) => {
const createMockConfig = (
authType: AuthType,
initialModel: string,
approvalMode: ApprovalMode = ApprovalMode.DEFAULT,
vlmSwitchMode?: string,
) => {
let currentModel = initialModel;
const mockConfig: Partial<Config> = {
getModel: vi.fn(() => currentModel),
setModel: vi.fn((m: string) => {
setModel: vi.fn(async (m: string) => {
currentModel = m;
}),
getApprovalMode: vi.fn(() => approvalMode),
getVlmSwitchMode: vi.fn(() => vlmSwitchMode),
getContentGeneratorConfig: vi.fn(() => ({
authType,
model: currentModel,
@@ -226,11 +283,9 @@ describe('useVisionAutoSwitch hook', () => {
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('shows guidance and blocks when dialog returns showGuidance', async () => {
it('continues with current model when dialog returns empty result', async () => {
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
const onVisionSwitchRequired = vi
.fn()
.mockResolvedValue({ showGuidance: true });
const onVisionSwitchRequired = vi.fn().mockResolvedValue({}); // Empty result for ContinueWithCurrentModel
const { result } = renderHook(() =>
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
);
@@ -245,11 +300,12 @@ describe('useVisionAutoSwitch hook', () => {
res = await result.current.handleVisionSwitch(parts, userTs, false);
});
expect(addItem).toHaveBeenCalledWith(
// Should not add any guidance message
expect(addItem).not.toHaveBeenCalledWith(
{ type: MessageType.INFO, text: getVisionSwitchGuidanceMessage() },
userTs,
);
expect(res).toEqual({ shouldProceed: false });
expect(res).toEqual({ shouldProceed: true });
expect(config.setModel).not.toHaveBeenCalled();
});
@@ -258,7 +314,7 @@ describe('useVisionAutoSwitch hook', () => {
const config = createMockConfig(AuthType.QWEN_OAUTH, initialModel);
const onVisionSwitchRequired = vi
.fn()
.mockResolvedValue({ modelOverride: 'qwen-vl-max-latest' });
.mockResolvedValue({ modelOverride: 'coder-model' });
const { result } = renderHook(() =>
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
);
@@ -273,20 +329,26 @@ describe('useVisionAutoSwitch hook', () => {
});
expect(res).toEqual({ shouldProceed: true, originalModel: initialModel });
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
expect(config.setModel).toHaveBeenCalledWith('coder-model', {
reason: 'vision_auto_switch',
context: 'User-prompted vision switch (one-time override)',
});
// Now restore
act(() => {
result.current.restoreOriginalModel();
await act(async () => {
await result.current.restoreOriginalModel();
});
expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
reason: 'vision_auto_switch',
context: 'Restoring original model after vision switch',
});
expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
});
it('persists session model when dialog requests persistence', async () => {
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
const onVisionSwitchRequired = vi
.fn()
.mockResolvedValue({ persistSessionModel: 'qwen-vl-max-latest' });
.mockResolvedValue({ persistSessionModel: 'coder-model' });
const { result } = renderHook(() =>
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
);
@@ -301,16 +363,17 @@ describe('useVisionAutoSwitch hook', () => {
});
expect(res).toEqual({ shouldProceed: true });
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
expect(config.setModel).toHaveBeenCalledWith('coder-model', {
reason: 'vision_auto_switch',
context: 'User-prompted vision switch (session persistent)',
});
// Restore should be a no-op since no one-time override was used
act(() => {
result.current.restoreOriginalModel();
await act(async () => {
await result.current.restoreOriginalModel();
});
// Last call should still be the persisted model set
expect((config.setModel as any).mock.calls.pop()?.[0]).toBe(
'qwen-vl-max-latest',
);
expect((config.setModel as any).mock.calls.pop()?.[0]).toBe('coder-model');
});
it('returns shouldProceed=true when dialog returns no special flags', async () => {
@@ -371,4 +434,420 @@ describe('useVisionAutoSwitch hook', () => {
expect(res).toEqual({ shouldProceed: true });
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
describe('YOLO mode behavior', () => {
it('automatically switches to vision model in YOLO mode without showing dialog', async () => {
const initialModel = 'qwen3-coder-plus';
const config = createMockConfig(
AuthType.QWEN_OAUTH,
initialModel,
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn(); // Should not be called in YOLO mode
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 7070, false);
});
// Should automatically switch without calling the dialog
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
expect(res).toEqual({
shouldProceed: true,
originalModel: initialModel,
});
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
});
it('does not switch in YOLO mode when no images are present', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [{ text: 'no images here' }];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 8080, false);
});
expect(res).toEqual({ shouldProceed: true });
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
expect(config.setModel).not.toHaveBeenCalled();
});
it('does not switch in YOLO mode when already using vision model', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'vision-model',
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 9090, false);
});
expect(res).toEqual({ shouldProceed: true });
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
expect(config.setModel).not.toHaveBeenCalled();
});
it('restores original model after YOLO mode auto-switch', async () => {
const initialModel = 'qwen3-coder-plus';
const config = createMockConfig(
AuthType.QWEN_OAUTH,
initialModel,
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
// First, trigger the auto-switch
await act(async () => {
await result.current.handleVisionSwitch(parts, 10100, false);
});
// Verify model was switched
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
// Now restore the original model
await act(async () => {
await result.current.restoreOriginalModel();
});
// Verify model was restored
expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
reason: 'vision_auto_switch',
context: 'Restoring original model after vision switch',
});
});
it('does not switch in YOLO mode when authType is not QWEN_OAUTH', async () => {
const config = createMockConfig(
AuthType.USE_GEMINI,
'qwen3-coder-plus',
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 11110, false);
});
expect(res).toEqual({ shouldProceed: true });
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
expect(config.setModel).not.toHaveBeenCalled();
});
it('does not switch in YOLO mode when visionModelPreviewEnabled is false', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
false,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: '...' } },
];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 12120, false);
});
expect(res).toEqual({ shouldProceed: true });
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
expect(config.setModel).not.toHaveBeenCalled();
});
it('handles multiple image formats in YOLO mode', async () => {
const initialModel = 'qwen3-coder-plus';
const config = createMockConfig(
AuthType.QWEN_OAUTH,
initialModel,
ApprovalMode.YOLO,
);
const onVisionSwitchRequired = vi.fn();
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ text: 'Here are some images:' },
{ inlineData: { mimeType: 'image/jpeg', data: '...' } },
{ fileData: { mimeType: 'image/png', fileUri: 'file://image.png' } },
{ text: 'Please analyze them.' },
];
let res: any;
await act(async () => {
res = await result.current.handleVisionSwitch(parts, 13130, false);
});
expect(res).toEqual({
shouldProceed: true,
originalModel: initialModel,
});
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
});
describe('VLM switch mode default behavior', () => {
it('should automatically switch once when vlmSwitchMode is "once"', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'once',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBe('qwen3-coder-plus');
expect(config.setModel).toHaveBeenCalledWith('vision-model', {
reason: 'vision_auto_switch',
context: 'Default VLM switch mode: once (one-time override)',
});
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('should switch session when vlmSwitchMode is "session"', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'session',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBeUndefined(); // No original model for session switch
expect(config.setModel).toHaveBeenCalledWith('vision-model', {
reason: 'vision_auto_switch',
context: 'Default VLM switch mode: session (session persistent)',
});
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('should continue with current model when vlmSwitchMode is "persist"', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'persist',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBeUndefined();
expect(config.setModel).not.toHaveBeenCalled();
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('should fall back to user prompt when vlmSwitchMode is not set', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
undefined, // No default mode
);
const onVisionSwitchRequired = vi
.fn()
.mockResolvedValue({ modelOverride: 'vision-model' });
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(onVisionSwitchRequired).toHaveBeenCalledWith(parts);
});
it('should fall back to persist behavior when vlmSwitchMode has invalid value', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'invalid-value',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBeUndefined();
// For invalid values, it should continue with current model (persist behavior)
expect(config.setModel).not.toHaveBeenCalled();
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
});
});