feat: add cli args & env variables for switch behavoir

This commit is contained in:
mingholy.lmh
2025-09-23 19:14:26 +08:00
parent 85a2b8d6e0
commit e4d16adf7b
12 changed files with 555 additions and 63 deletions

View File

@@ -82,6 +82,7 @@ export interface CliArgs {
includeDirectories: string[] | undefined;
tavilyApiKey: string | undefined;
screenReader: boolean | undefined;
vlmSwitchMode: string | undefined;
}
export async function parseArguments(settings: Settings): Promise<CliArgs> {
@@ -249,6 +250,13 @@ export async function parseArguments(settings: Settings): Promise<CliArgs> {
description: 'Enable screen reader mode for accessibility.',
default: false,
})
.option('vlm-switch-mode', {
type: 'string',
choices: ['once', 'session', 'persist'],
description:
'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). Overrides settings files.',
default: process.env['VLM_SWITCH_MODE'],
})
.check((argv) => {
if (argv.prompt && argv['promptInteractive']) {
throw new Error(
@@ -524,6 +532,9 @@ export async function loadCliConfig(
argv.screenReader !== undefined
? argv.screenReader
: (settings.ui?.accessibility?.screenReader ?? false);
const vlmSwitchMode =
argv.vlmSwitchMode || settings.experimental?.vlmSwitchMode;
return new Config({
sessionId,
embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL,
@@ -630,6 +641,7 @@ export async function loadCliConfig(
skipNextSpeakerCheck: settings.model?.skipNextSpeakerCheck,
enablePromptCompletion: settings.general?.enablePromptCompletion ?? false,
skipLoopDetection: settings.skipLoopDetection ?? false,
vlmSwitchMode,
});
}

View File

@@ -751,6 +751,16 @@ export const SETTINGS_SCHEMA = {
'Enable vision model support and auto-switching functionality. When disabled, vision models like qwen-vl-max-latest will be hidden and auto-switching will not occur.',
showInDialog: true,
},
vlmSwitchMode: {
type: 'string',
label: 'VLM Switch Mode',
category: 'Experimental',
requiresRestart: false,
default: undefined as string | undefined,
description:
'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). If not set, user will be prompted each time. This is a temporary experimental feature.',
showInDialog: false,
},
},
},

View File

@@ -46,8 +46,8 @@ describe('ModelSwitchDialog', () => {
value: VisionSwitchOutcome.SwitchSessionToVL,
},
{
label: 'Do not switch, show guidance',
value: VisionSwitchOutcome.DisallowWithGuidance,
label: 'Continue with current model',
value: VisionSwitchOutcome.ContinueWithCurrentModel,
},
];
@@ -81,18 +81,18 @@ describe('ModelSwitchDialog', () => {
);
});
it('should call onSelect with DisallowWithGuidance when third option is selected', () => {
it('should call onSelect with ContinueWithCurrentModel when third option is selected', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);
expect(mockOnSelect).toHaveBeenCalledWith(
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
});
it('should setup escape key handler to call onSelect with DisallowWithGuidance', () => {
it('should setup escape key handler to call onSelect with ContinueWithCurrentModel', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
expect(mockUseKeypress).toHaveBeenCalledWith(expect.any(Function), {
@@ -104,7 +104,7 @@ describe('ModelSwitchDialog', () => {
keypressHandler({ name: 'escape' });
expect(mockOnSelect).toHaveBeenCalledWith(
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
});
@@ -126,13 +126,9 @@ describe('ModelSwitchDialog', () => {
describe('VisionSwitchOutcome enum', () => {
it('should have correct enum values', () => {
expect(VisionSwitchOutcome.SwitchOnce).toBe('switch_once');
expect(VisionSwitchOutcome.SwitchSessionToVL).toBe(
'switch_session_to_vl',
);
expect(VisionSwitchOutcome.DisallowWithGuidance).toBe(
'disallow_with_guidance',
);
expect(VisionSwitchOutcome.SwitchOnce).toBe('once');
expect(VisionSwitchOutcome.SwitchSessionToVL).toBe('session');
expect(VisionSwitchOutcome.ContinueWithCurrentModel).toBe('persist');
});
});
@@ -144,7 +140,7 @@ describe('ModelSwitchDialog', () => {
// Call multiple times
onSelectCallback(VisionSwitchOutcome.SwitchOnce);
onSelectCallback(VisionSwitchOutcome.SwitchSessionToVL);
onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);
expect(mockOnSelect).toHaveBeenCalledTimes(3);
expect(mockOnSelect).toHaveBeenNthCalledWith(
@@ -157,7 +153,7 @@ describe('ModelSwitchDialog', () => {
);
expect(mockOnSelect).toHaveBeenNthCalledWith(
3,
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
});
@@ -179,7 +175,7 @@ describe('ModelSwitchDialog', () => {
expect(mockOnSelect).toHaveBeenCalledTimes(2);
expect(mockOnSelect).toHaveBeenCalledWith(
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
});
});

View File

@@ -14,9 +14,9 @@ import {
import { useKeypress } from '../hooks/useKeypress.js';
export enum VisionSwitchOutcome {
SwitchOnce = 'switch_once',
SwitchSessionToVL = 'switch_session_to_vl',
DisallowWithGuidance = 'disallow_with_guidance',
SwitchOnce = 'once',
SwitchSessionToVL = 'session',
ContinueWithCurrentModel = 'persist',
}
export interface ModelSwitchDialogProps {
@@ -29,7 +29,7 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
useKeypress(
(key) => {
if (key.name === 'escape') {
onSelect(VisionSwitchOutcome.DisallowWithGuidance);
onSelect(VisionSwitchOutcome.ContinueWithCurrentModel);
}
},
{ isActive: true },
@@ -45,8 +45,8 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
value: VisionSwitchOutcome.SwitchSessionToVL,
},
{
label: 'Do not switch, show guidance',
value: VisionSwitchOutcome.DisallowWithGuidance,
label: 'Continue with current model',
value: VisionSwitchOutcome.ContinueWithCurrentModel,
},
];

View File

@@ -175,11 +175,11 @@ describe('useVisionAutoSwitch helpers', () => {
expect(result).toEqual({ persistSessionModel: vl });
});
it('maps DisallowWithGuidance to showGuidance', () => {
it('maps ContinueWithCurrentModel to empty result', () => {
const result = processVisionSwitchOutcome(
VisionSwitchOutcome.DisallowWithGuidance,
VisionSwitchOutcome.ContinueWithCurrentModel,
);
expect(result).toEqual({ showGuidance: true });
expect(result).toEqual({});
});
});
@@ -205,6 +205,7 @@ describe('useVisionAutoSwitch hook', () => {
authType: AuthType,
initialModel: string,
approvalMode: ApprovalMode = ApprovalMode.DEFAULT,
vlmSwitchMode?: string,
) => {
let currentModel = initialModel;
const mockConfig: Partial<Config> = {
@@ -213,6 +214,7 @@ describe('useVisionAutoSwitch hook', () => {
currentModel = m;
}),
getApprovalMode: vi.fn(() => approvalMode),
getVlmSwitchMode: vi.fn(() => vlmSwitchMode),
getContentGeneratorConfig: vi.fn(() => ({
authType,
model: currentModel,
@@ -281,11 +283,9 @@ describe('useVisionAutoSwitch hook', () => {
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('shows guidance and blocks when dialog returns showGuidance', async () => {
it('continues with current model when dialog returns empty result', async () => {
const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
const onVisionSwitchRequired = vi
.fn()
.mockResolvedValue({ showGuidance: true });
const onVisionSwitchRequired = vi.fn().mockResolvedValue({}); // Empty result for ContinueWithCurrentModel
const { result } = renderHook(() =>
useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
);
@@ -300,11 +300,12 @@ describe('useVisionAutoSwitch hook', () => {
res = await result.current.handleVisionSwitch(parts, userTs, false);
});
expect(addItem).toHaveBeenCalledWith(
// Should not add any guidance message
expect(addItem).not.toHaveBeenCalledWith(
{ type: MessageType.INFO, text: getVisionSwitchGuidanceMessage() },
userTs,
);
expect(res).toEqual({ shouldProceed: false });
expect(res).toEqual({ shouldProceed: true });
expect(config.setModel).not.toHaveBeenCalled();
});
@@ -328,13 +329,19 @@ describe('useVisionAutoSwitch hook', () => {
});
expect(res).toEqual({ shouldProceed: true, originalModel: initialModel });
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
reason: 'vision_auto_switch',
context: 'User-prompted vision switch (one-time override)',
});
// Now restore
act(() => {
result.current.restoreOriginalModel();
});
expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
reason: 'vision_auto_switch',
context: 'Restoring original model after vision switch',
});
});
it('persists session model when dialog requests persistence', async () => {
@@ -356,7 +363,10 @@ describe('useVisionAutoSwitch hook', () => {
});
expect(res).toEqual({ shouldProceed: true });
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
reason: 'vision_auto_switch',
context: 'User-prompted vision switch (session persistent)',
});
// Restore should be a no-op since no one-time override was used
act(() => {
@@ -460,7 +470,10 @@ describe('useVisionAutoSwitch hook', () => {
shouldProceed: true,
originalModel: initialModel,
});
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel());
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
});
it('does not switch in YOLO mode when no images are present', async () => {
@@ -548,7 +561,10 @@ describe('useVisionAutoSwitch hook', () => {
});
// Verify model was switched
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel());
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
// Now restore the original model
act(() => {
@@ -556,7 +572,10 @@ describe('useVisionAutoSwitch hook', () => {
});
// Verify model was restored
expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
reason: 'vision_auto_switch',
context: 'Restoring original model after vision switch',
});
});
it('does not switch in YOLO mode when authType is not QWEN_OAUTH', async () => {
@@ -652,7 +671,184 @@ describe('useVisionAutoSwitch hook', () => {
shouldProceed: true,
originalModel: initialModel,
});
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel());
expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
});
describe('VLM switch mode default behavior', () => {
it('should automatically switch once when vlmSwitchMode is "once"', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'once',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBe('qwen3-coder-plus');
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
reason: 'vision_auto_switch',
context: 'Default VLM switch mode: once (one-time override)',
});
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('should switch session when vlmSwitchMode is "session"', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'session',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBeUndefined(); // No original model for session switch
expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
reason: 'vision_auto_switch',
context: 'Default VLM switch mode: session (session persistent)',
});
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('should continue with current model when vlmSwitchMode is "persist"', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'persist',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBeUndefined();
expect(config.setModel).not.toHaveBeenCalled();
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
it('should fall back to user prompt when vlmSwitchMode is not set', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
undefined, // No default mode
);
const onVisionSwitchRequired = vi
.fn()
.mockResolvedValue({ modelOverride: 'qwen-vl-max-latest' });
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(onVisionSwitchRequired).toHaveBeenCalledWith(parts);
});
it('should fall back to persist behavior when vlmSwitchMode has invalid value', async () => {
const config = createMockConfig(
AuthType.QWEN_OAUTH,
'qwen3-coder-plus',
ApprovalMode.DEFAULT,
'invalid-value',
);
const onVisionSwitchRequired = vi.fn(); // Should not be called
const { result } = renderHook(() =>
useVisionAutoSwitch(
config,
addItem as any,
true,
onVisionSwitchRequired,
),
);
const parts: PartListUnion = [
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
];
const switchResult = await result.current.handleVisionSwitch(
parts,
Date.now(),
false,
);
expect(switchResult.shouldProceed).toBe(true);
expect(switchResult.originalModel).toBeUndefined();
// For invalid values, it should continue with current model (persist behavior)
expect(config.setModel).not.toHaveBeenCalled();
expect(onVisionSwitchRequired).not.toHaveBeenCalled();
});
});

View File

@@ -166,11 +166,11 @@ export function processVisionSwitchOutcome(
case VisionSwitchOutcome.SwitchSessionToVL:
return { persistSessionModel: vlModelId };
case VisionSwitchOutcome.DisallowWithGuidance:
return { showGuidance: true };
case VisionSwitchOutcome.ContinueWithCurrentModel:
return {}; // Continue with current model, no changes needed
default:
return { showGuidance: true };
return {}; // Default to continuing with current model
}
}
@@ -256,42 +256,87 @@ export function useVisionAutoSwitch(
if (config.getApprovalMode() === ApprovalMode.YOLO) {
const vlModelId = getDefaultVisionModel();
originalModelRef.current = config.getModel();
config.setModel(vlModelId);
config.setModel(vlModelId, {
reason: 'vision_auto_switch',
context: 'YOLO mode auto-switch for image content',
});
return {
shouldProceed: true,
originalModel: originalModelRef.current,
};
}
try {
const visionSwitchResult = await onVisionSwitchRequired(query);
if (visionSwitchResult.showGuidance) {
// Show guidance and don't proceed with the request
addItem(
{
type: MessageType.INFO,
text: getVisionSwitchGuidanceMessage(),
},
userMessageTimestamp,
);
return { shouldProceed: false };
// Check if there's a default VLM switch mode configured
const defaultVlmSwitchMode = config.getVlmSwitchMode();
if (defaultVlmSwitchMode) {
// Convert string value to VisionSwitchOutcome enum
let outcome: VisionSwitchOutcome;
switch (defaultVlmSwitchMode) {
case 'once':
outcome = VisionSwitchOutcome.SwitchOnce;
break;
case 'session':
outcome = VisionSwitchOutcome.SwitchSessionToVL;
break;
case 'persist':
outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
break;
default:
// Invalid value, fall back to prompting user
outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
}
// Process the default outcome
const visionSwitchResult = processVisionSwitchOutcome(outcome);
if (visionSwitchResult.modelOverride) {
// One-time model override
originalModelRef.current = config.getModel();
config.setModel(visionSwitchResult.modelOverride);
config.setModel(visionSwitchResult.modelOverride, {
reason: 'vision_auto_switch',
context: `Default VLM switch mode: ${defaultVlmSwitchMode} (one-time override)`,
});
return {
shouldProceed: true,
originalModel: originalModelRef.current,
};
} else if (visionSwitchResult.persistSessionModel) {
// Persistent session model change
config.setModel(visionSwitchResult.persistSessionModel);
config.setModel(visionSwitchResult.persistSessionModel, {
reason: 'vision_auto_switch',
context: `Default VLM switch mode: ${defaultVlmSwitchMode} (session persistent)`,
});
return { shouldProceed: true };
}
// For ContinueWithCurrentModel or any other case, proceed with current model
return { shouldProceed: true };
}
try {
const visionSwitchResult = await onVisionSwitchRequired(query);
if (visionSwitchResult.modelOverride) {
// One-time model override
originalModelRef.current = config.getModel();
config.setModel(visionSwitchResult.modelOverride, {
reason: 'vision_auto_switch',
context: 'User-prompted vision switch (one-time override)',
});
return {
shouldProceed: true,
originalModel: originalModelRef.current,
};
} else if (visionSwitchResult.persistSessionModel) {
// Persistent session model change
config.setModel(visionSwitchResult.persistSessionModel, {
reason: 'vision_auto_switch',
context: 'User-prompted vision switch (session persistent)',
});
return { shouldProceed: true };
}
// For ContinueWithCurrentModel or any other case, proceed with current model
return { shouldProceed: true };
} catch (_error) {
// If vision switch dialog was cancelled or errored, don't proceed
@@ -303,7 +348,10 @@ export function useVisionAutoSwitch(
const restoreOriginalModel = useCallback(() => {
if (originalModelRef.current) {
config.setModel(originalModelRef.current);
config.setModel(originalModelRef.current, {
reason: 'vision_auto_switch',
context: 'Restoring original model after vision switch',
});
originalModelRef.current = null;
}
}, [config]);

View File

@@ -10,9 +10,12 @@ export type AvailableModel = {
isVision?: boolean;
};
export const MAINLINE_VLM = 'qwen-vl-max-latest';
export const MAINLINE_CODER = 'qwen3-coder-plus';
export const AVAILABLE_MODELS_QWEN: AvailableModel[] = [
{ id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' },
{ id: 'qwen-vl-max-latest', label: 'qwen-vl-max', isVision: true },
{ id: MAINLINE_CODER, label: MAINLINE_CODER },
{ id: MAINLINE_VLM, label: MAINLINE_VLM, isVision: true },
];
/**
@@ -42,7 +45,7 @@ export function getOpenAIAvailableModelFromEnv(): AvailableModel | null {
* until our coding model supports multimodal.
*/
export function getDefaultVisionModel(): string {
return 'qwen-vl-max-latest';
return MAINLINE_VLM;
}
export function isVisionModel(modelId: string): boolean {