From e4d16adf7b8f2befd0a88b3b649280f7295b86c6 Mon Sep 17 00:00:00 2001 From: "mingholy.lmh" Date: Tue, 23 Sep 2025 19:14:26 +0800 Subject: [PATCH] feat: add cli args & env variables for switch behavoir --- packages/cli/src/config/config.ts | 12 + packages/cli/src/config/settingsSchema.ts | 10 + .../ui/components/ModelSwitchDialog.test.tsx | 30 +-- .../src/ui/components/ModelSwitchDialog.tsx | 12 +- .../src/ui/hooks/useVisionAutoSwitch.test.ts | 228 ++++++++++++++++-- .../cli/src/ui/hooks/useVisionAutoSwitch.ts | 88 +++++-- packages/cli/src/ui/models/availableModels.ts | 9 +- packages/core/src/config/config.test.ts | 81 +++++++ packages/core/src/config/config.ts | 40 ++- packages/core/src/core/logger.test.ts | 80 ++++++ packages/core/src/core/logger.ts | 19 ++ packages/core/src/core/tokenLimits.ts | 9 + 12 files changed, 555 insertions(+), 63 deletions(-) diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index eaa354d6..e1ee021f 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -82,6 +82,7 @@ export interface CliArgs { includeDirectories: string[] | undefined; tavilyApiKey: string | undefined; screenReader: boolean | undefined; + vlmSwitchMode: string | undefined; } export async function parseArguments(settings: Settings): Promise { @@ -249,6 +250,13 @@ export async function parseArguments(settings: Settings): Promise { description: 'Enable screen reader mode for accessibility.', default: false, }) + .option('vlm-switch-mode', { + type: 'string', + choices: ['once', 'session', 'persist'], + description: + 'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). Overrides settings files.', + default: process.env['VLM_SWITCH_MODE'], + }) .check((argv) => { if (argv.prompt && argv['promptInteractive']) { throw new Error( @@ -524,6 +532,9 @@ export async function loadCliConfig( argv.screenReader !== undefined ? argv.screenReader : (settings.ui?.accessibility?.screenReader ?? false); + + const vlmSwitchMode = + argv.vlmSwitchMode || settings.experimental?.vlmSwitchMode; return new Config({ sessionId, embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL, @@ -630,6 +641,7 @@ export async function loadCliConfig( skipNextSpeakerCheck: settings.model?.skipNextSpeakerCheck, enablePromptCompletion: settings.general?.enablePromptCompletion ?? false, skipLoopDetection: settings.skipLoopDetection ?? false, + vlmSwitchMode, }); } diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index c7f1e94e..84261893 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -751,6 +751,16 @@ export const SETTINGS_SCHEMA = { 'Enable vision model support and auto-switching functionality. When disabled, vision models like qwen-vl-max-latest will be hidden and auto-switching will not occur.', showInDialog: true, }, + vlmSwitchMode: { + type: 'string', + label: 'VLM Switch Mode', + category: 'Experimental', + requiresRestart: false, + default: undefined as string | undefined, + description: + 'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). If not set, user will be prompted each time. This is a temporary experimental feature.', + showInDialog: false, + }, }, }, diff --git a/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx b/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx index f26dcc55..aab45cc2 100644 --- a/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx +++ b/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx @@ -46,8 +46,8 @@ describe('ModelSwitchDialog', () => { value: VisionSwitchOutcome.SwitchSessionToVL, }, { - label: 'Do not switch, show guidance', - value: VisionSwitchOutcome.DisallowWithGuidance, + label: 'Continue with current model', + value: VisionSwitchOutcome.ContinueWithCurrentModel, }, ]; @@ -81,18 +81,18 @@ describe('ModelSwitchDialog', () => { ); }); - it('should call onSelect with DisallowWithGuidance when third option is selected', () => { + it('should call onSelect with ContinueWithCurrentModel when third option is selected', () => { render(); const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect; - onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance); + onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel); expect(mockOnSelect).toHaveBeenCalledWith( - VisionSwitchOutcome.DisallowWithGuidance, + VisionSwitchOutcome.ContinueWithCurrentModel, ); }); - it('should setup escape key handler to call onSelect with DisallowWithGuidance', () => { + it('should setup escape key handler to call onSelect with ContinueWithCurrentModel', () => { render(); expect(mockUseKeypress).toHaveBeenCalledWith(expect.any(Function), { @@ -104,7 +104,7 @@ describe('ModelSwitchDialog', () => { keypressHandler({ name: 'escape' }); expect(mockOnSelect).toHaveBeenCalledWith( - VisionSwitchOutcome.DisallowWithGuidance, + VisionSwitchOutcome.ContinueWithCurrentModel, ); }); @@ -126,13 +126,9 @@ describe('ModelSwitchDialog', () => { describe('VisionSwitchOutcome enum', () => { it('should have correct enum values', () => { - expect(VisionSwitchOutcome.SwitchOnce).toBe('switch_once'); - expect(VisionSwitchOutcome.SwitchSessionToVL).toBe( - 'switch_session_to_vl', - ); - expect(VisionSwitchOutcome.DisallowWithGuidance).toBe( - 'disallow_with_guidance', - ); + expect(VisionSwitchOutcome.SwitchOnce).toBe('once'); + expect(VisionSwitchOutcome.SwitchSessionToVL).toBe('session'); + expect(VisionSwitchOutcome.ContinueWithCurrentModel).toBe('persist'); }); }); @@ -144,7 +140,7 @@ describe('ModelSwitchDialog', () => { // Call multiple times onSelectCallback(VisionSwitchOutcome.SwitchOnce); onSelectCallback(VisionSwitchOutcome.SwitchSessionToVL); - onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance); + onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel); expect(mockOnSelect).toHaveBeenCalledTimes(3); expect(mockOnSelect).toHaveBeenNthCalledWith( @@ -157,7 +153,7 @@ describe('ModelSwitchDialog', () => { ); expect(mockOnSelect).toHaveBeenNthCalledWith( 3, - VisionSwitchOutcome.DisallowWithGuidance, + VisionSwitchOutcome.ContinueWithCurrentModel, ); }); @@ -179,7 +175,7 @@ describe('ModelSwitchDialog', () => { expect(mockOnSelect).toHaveBeenCalledTimes(2); expect(mockOnSelect).toHaveBeenCalledWith( - VisionSwitchOutcome.DisallowWithGuidance, + VisionSwitchOutcome.ContinueWithCurrentModel, ); }); }); diff --git a/packages/cli/src/ui/components/ModelSwitchDialog.tsx b/packages/cli/src/ui/components/ModelSwitchDialog.tsx index 1a8c73d4..f2993c47 100644 --- a/packages/cli/src/ui/components/ModelSwitchDialog.tsx +++ b/packages/cli/src/ui/components/ModelSwitchDialog.tsx @@ -14,9 +14,9 @@ import { import { useKeypress } from '../hooks/useKeypress.js'; export enum VisionSwitchOutcome { - SwitchOnce = 'switch_once', - SwitchSessionToVL = 'switch_session_to_vl', - DisallowWithGuidance = 'disallow_with_guidance', + SwitchOnce = 'once', + SwitchSessionToVL = 'session', + ContinueWithCurrentModel = 'persist', } export interface ModelSwitchDialogProps { @@ -29,7 +29,7 @@ export const ModelSwitchDialog: React.FC = ({ useKeypress( (key) => { if (key.name === 'escape') { - onSelect(VisionSwitchOutcome.DisallowWithGuidance); + onSelect(VisionSwitchOutcome.ContinueWithCurrentModel); } }, { isActive: true }, @@ -45,8 +45,8 @@ export const ModelSwitchDialog: React.FC = ({ value: VisionSwitchOutcome.SwitchSessionToVL, }, { - label: 'Do not switch, show guidance', - value: VisionSwitchOutcome.DisallowWithGuidance, + label: 'Continue with current model', + value: VisionSwitchOutcome.ContinueWithCurrentModel, }, ]; diff --git a/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts b/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts index 3dcb153b..fa56a94b 100644 --- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts +++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts @@ -175,11 +175,11 @@ describe('useVisionAutoSwitch helpers', () => { expect(result).toEqual({ persistSessionModel: vl }); }); - it('maps DisallowWithGuidance to showGuidance', () => { + it('maps ContinueWithCurrentModel to empty result', () => { const result = processVisionSwitchOutcome( - VisionSwitchOutcome.DisallowWithGuidance, + VisionSwitchOutcome.ContinueWithCurrentModel, ); - expect(result).toEqual({ showGuidance: true }); + expect(result).toEqual({}); }); }); @@ -205,6 +205,7 @@ describe('useVisionAutoSwitch hook', () => { authType: AuthType, initialModel: string, approvalMode: ApprovalMode = ApprovalMode.DEFAULT, + vlmSwitchMode?: string, ) => { let currentModel = initialModel; const mockConfig: Partial = { @@ -213,6 +214,7 @@ describe('useVisionAutoSwitch hook', () => { currentModel = m; }), getApprovalMode: vi.fn(() => approvalMode), + getVlmSwitchMode: vi.fn(() => vlmSwitchMode), getContentGeneratorConfig: vi.fn(() => ({ authType, model: currentModel, @@ -281,11 +283,9 @@ describe('useVisionAutoSwitch hook', () => { expect(onVisionSwitchRequired).not.toHaveBeenCalled(); }); - it('shows guidance and blocks when dialog returns showGuidance', async () => { + it('continues with current model when dialog returns empty result', async () => { const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus'); - const onVisionSwitchRequired = vi - .fn() - .mockResolvedValue({ showGuidance: true }); + const onVisionSwitchRequired = vi.fn().mockResolvedValue({}); // Empty result for ContinueWithCurrentModel const { result } = renderHook(() => useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired), ); @@ -300,11 +300,12 @@ describe('useVisionAutoSwitch hook', () => { res = await result.current.handleVisionSwitch(parts, userTs, false); }); - expect(addItem).toHaveBeenCalledWith( + // Should not add any guidance message + expect(addItem).not.toHaveBeenCalledWith( { type: MessageType.INFO, text: getVisionSwitchGuidanceMessage() }, userTs, ); - expect(res).toEqual({ shouldProceed: false }); + expect(res).toEqual({ shouldProceed: true }); expect(config.setModel).not.toHaveBeenCalled(); }); @@ -328,13 +329,19 @@ describe('useVisionAutoSwitch hook', () => { }); expect(res).toEqual({ shouldProceed: true, originalModel: initialModel }); - expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest'); + expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', { + reason: 'vision_auto_switch', + context: 'User-prompted vision switch (one-time override)', + }); // Now restore act(() => { result.current.restoreOriginalModel(); }); - expect(config.setModel).toHaveBeenLastCalledWith(initialModel); + expect(config.setModel).toHaveBeenLastCalledWith(initialModel, { + reason: 'vision_auto_switch', + context: 'Restoring original model after vision switch', + }); }); it('persists session model when dialog requests persistence', async () => { @@ -356,7 +363,10 @@ describe('useVisionAutoSwitch hook', () => { }); expect(res).toEqual({ shouldProceed: true }); - expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest'); + expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', { + reason: 'vision_auto_switch', + context: 'User-prompted vision switch (session persistent)', + }); // Restore should be a no-op since no one-time override was used act(() => { @@ -460,7 +470,10 @@ describe('useVisionAutoSwitch hook', () => { shouldProceed: true, originalModel: initialModel, }); - expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel()); + expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), { + reason: 'vision_auto_switch', + context: 'YOLO mode auto-switch for image content', + }); }); it('does not switch in YOLO mode when no images are present', async () => { @@ -548,7 +561,10 @@ describe('useVisionAutoSwitch hook', () => { }); // Verify model was switched - expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel()); + expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), { + reason: 'vision_auto_switch', + context: 'YOLO mode auto-switch for image content', + }); // Now restore the original model act(() => { @@ -556,7 +572,10 @@ describe('useVisionAutoSwitch hook', () => { }); // Verify model was restored - expect(config.setModel).toHaveBeenLastCalledWith(initialModel); + expect(config.setModel).toHaveBeenLastCalledWith(initialModel, { + reason: 'vision_auto_switch', + context: 'Restoring original model after vision switch', + }); }); it('does not switch in YOLO mode when authType is not QWEN_OAUTH', async () => { @@ -652,7 +671,184 @@ describe('useVisionAutoSwitch hook', () => { shouldProceed: true, originalModel: initialModel, }); - expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel()); + expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), { + reason: 'vision_auto_switch', + context: 'YOLO mode auto-switch for image content', + }); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + }); + }); + + describe('VLM switch mode default behavior', () => { + it('should automatically switch once when vlmSwitchMode is "once"', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.DEFAULT, + 'once', + ); + const onVisionSwitchRequired = vi.fn(); // Should not be called + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } }, + ]; + + const switchResult = await result.current.handleVisionSwitch( + parts, + Date.now(), + false, + ); + + expect(switchResult.shouldProceed).toBe(true); + expect(switchResult.originalModel).toBe('qwen3-coder-plus'); + expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', { + reason: 'vision_auto_switch', + context: 'Default VLM switch mode: once (one-time override)', + }); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + }); + + it('should switch session when vlmSwitchMode is "session"', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.DEFAULT, + 'session', + ); + const onVisionSwitchRequired = vi.fn(); // Should not be called + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } }, + ]; + + const switchResult = await result.current.handleVisionSwitch( + parts, + Date.now(), + false, + ); + + expect(switchResult.shouldProceed).toBe(true); + expect(switchResult.originalModel).toBeUndefined(); // No original model for session switch + expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', { + reason: 'vision_auto_switch', + context: 'Default VLM switch mode: session (session persistent)', + }); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + }); + + it('should continue with current model when vlmSwitchMode is "persist"', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.DEFAULT, + 'persist', + ); + const onVisionSwitchRequired = vi.fn(); // Should not be called + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } }, + ]; + + const switchResult = await result.current.handleVisionSwitch( + parts, + Date.now(), + false, + ); + + expect(switchResult.shouldProceed).toBe(true); + expect(switchResult.originalModel).toBeUndefined(); + expect(config.setModel).not.toHaveBeenCalled(); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + }); + + it('should fall back to user prompt when vlmSwitchMode is not set', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.DEFAULT, + undefined, // No default mode + ); + const onVisionSwitchRequired = vi + .fn() + .mockResolvedValue({ modelOverride: 'qwen-vl-max-latest' }); + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } }, + ]; + + const switchResult = await result.current.handleVisionSwitch( + parts, + Date.now(), + false, + ); + + expect(switchResult.shouldProceed).toBe(true); + expect(onVisionSwitchRequired).toHaveBeenCalledWith(parts); + }); + + it('should fall back to persist behavior when vlmSwitchMode has invalid value', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.DEFAULT, + 'invalid-value', + ); + const onVisionSwitchRequired = vi.fn(); // Should not be called + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } }, + ]; + + const switchResult = await result.current.handleVisionSwitch( + parts, + Date.now(), + false, + ); + + expect(switchResult.shouldProceed).toBe(true); + expect(switchResult.originalModel).toBeUndefined(); + // For invalid values, it should continue with current model (persist behavior) + expect(config.setModel).not.toHaveBeenCalled(); expect(onVisionSwitchRequired).not.toHaveBeenCalled(); }); }); diff --git a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts index 86a2cf18..6e201876 100644 --- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts +++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts @@ -166,11 +166,11 @@ export function processVisionSwitchOutcome( case VisionSwitchOutcome.SwitchSessionToVL: return { persistSessionModel: vlModelId }; - case VisionSwitchOutcome.DisallowWithGuidance: - return { showGuidance: true }; + case VisionSwitchOutcome.ContinueWithCurrentModel: + return {}; // Continue with current model, no changes needed default: - return { showGuidance: true }; + return {}; // Default to continuing with current model } } @@ -256,42 +256,87 @@ export function useVisionAutoSwitch( if (config.getApprovalMode() === ApprovalMode.YOLO) { const vlModelId = getDefaultVisionModel(); originalModelRef.current = config.getModel(); - config.setModel(vlModelId); + config.setModel(vlModelId, { + reason: 'vision_auto_switch', + context: 'YOLO mode auto-switch for image content', + }); return { shouldProceed: true, originalModel: originalModelRef.current, }; } - try { - const visionSwitchResult = await onVisionSwitchRequired(query); - - if (visionSwitchResult.showGuidance) { - // Show guidance and don't proceed with the request - addItem( - { - type: MessageType.INFO, - text: getVisionSwitchGuidanceMessage(), - }, - userMessageTimestamp, - ); - return { shouldProceed: false }; + // Check if there's a default VLM switch mode configured + const defaultVlmSwitchMode = config.getVlmSwitchMode(); + if (defaultVlmSwitchMode) { + // Convert string value to VisionSwitchOutcome enum + let outcome: VisionSwitchOutcome; + switch (defaultVlmSwitchMode) { + case 'once': + outcome = VisionSwitchOutcome.SwitchOnce; + break; + case 'session': + outcome = VisionSwitchOutcome.SwitchSessionToVL; + break; + case 'persist': + outcome = VisionSwitchOutcome.ContinueWithCurrentModel; + break; + default: + // Invalid value, fall back to prompting user + outcome = VisionSwitchOutcome.ContinueWithCurrentModel; } + // Process the default outcome + const visionSwitchResult = processVisionSwitchOutcome(outcome); + if (visionSwitchResult.modelOverride) { // One-time model override originalModelRef.current = config.getModel(); - config.setModel(visionSwitchResult.modelOverride); + config.setModel(visionSwitchResult.modelOverride, { + reason: 'vision_auto_switch', + context: `Default VLM switch mode: ${defaultVlmSwitchMode} (one-time override)`, + }); return { shouldProceed: true, originalModel: originalModelRef.current, }; } else if (visionSwitchResult.persistSessionModel) { // Persistent session model change - config.setModel(visionSwitchResult.persistSessionModel); + config.setModel(visionSwitchResult.persistSessionModel, { + reason: 'vision_auto_switch', + context: `Default VLM switch mode: ${defaultVlmSwitchMode} (session persistent)`, + }); return { shouldProceed: true }; } + // For ContinueWithCurrentModel or any other case, proceed with current model + return { shouldProceed: true }; + } + + try { + const visionSwitchResult = await onVisionSwitchRequired(query); + + if (visionSwitchResult.modelOverride) { + // One-time model override + originalModelRef.current = config.getModel(); + config.setModel(visionSwitchResult.modelOverride, { + reason: 'vision_auto_switch', + context: 'User-prompted vision switch (one-time override)', + }); + return { + shouldProceed: true, + originalModel: originalModelRef.current, + }; + } else if (visionSwitchResult.persistSessionModel) { + // Persistent session model change + config.setModel(visionSwitchResult.persistSessionModel, { + reason: 'vision_auto_switch', + context: 'User-prompted vision switch (session persistent)', + }); + return { shouldProceed: true }; + } + + // For ContinueWithCurrentModel or any other case, proceed with current model return { shouldProceed: true }; } catch (_error) { // If vision switch dialog was cancelled or errored, don't proceed @@ -303,7 +348,10 @@ export function useVisionAutoSwitch( const restoreOriginalModel = useCallback(() => { if (originalModelRef.current) { - config.setModel(originalModelRef.current); + config.setModel(originalModelRef.current, { + reason: 'vision_auto_switch', + context: 'Restoring original model after vision switch', + }); originalModelRef.current = null; } }, [config]); diff --git a/packages/cli/src/ui/models/availableModels.ts b/packages/cli/src/ui/models/availableModels.ts index 7c3a1cf5..b2b643dd 100644 --- a/packages/cli/src/ui/models/availableModels.ts +++ b/packages/cli/src/ui/models/availableModels.ts @@ -10,9 +10,12 @@ export type AvailableModel = { isVision?: boolean; }; +export const MAINLINE_VLM = 'qwen-vl-max-latest'; +export const MAINLINE_CODER = 'qwen3-coder-plus'; + export const AVAILABLE_MODELS_QWEN: AvailableModel[] = [ - { id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' }, - { id: 'qwen-vl-max-latest', label: 'qwen-vl-max', isVision: true }, + { id: MAINLINE_CODER, label: MAINLINE_CODER }, + { id: MAINLINE_VLM, label: MAINLINE_VLM, isVision: true }, ]; /** @@ -42,7 +45,7 @@ export function getOpenAIAvailableModelFromEnv(): AvailableModel | null { * until our coding model supports multimodal. */ export function getDefaultVisionModel(): string { - return 'qwen-vl-max-latest'; + return MAINLINE_VLM; } export function isVisionModel(modelId: string): boolean { diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 8d18b89a..e4e1cd05 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -737,4 +737,85 @@ describe('setApprovalMode with folder trust', () => { expect(() => config.setApprovalMode(ApprovalMode.AUTO_EDIT)).not.toThrow(); expect(() => config.setApprovalMode(ApprovalMode.DEFAULT)).not.toThrow(); }); + + describe('Model Switch Logging', () => { + it('should log model switch when setModel is called with different model', async () => { + const config = new Config({ + sessionId: 'test-model-switch', + targetDir: '.', + debugMode: false, + model: 'qwen3-coder-plus', + cwd: '.', + }); + + // Initialize the config to set up content generator + await config.initialize(); + + // Mock the logger's logModelSwitch method + const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch'); + + // Change the model + config.setModel('qwen-vl-max-latest', { + reason: 'vision_auto_switch', + context: 'Test model switch', + }); + + // Verify that logModelSwitch was called with correct parameters + expect(logModelSwitchSpy).toHaveBeenCalledWith({ + fromModel: 'qwen3-coder-plus', + toModel: 'qwen-vl-max-latest', + reason: 'vision_auto_switch', + context: 'Test model switch', + }); + }); + + it('should not log when setModel is called with same model', async () => { + const config = new Config({ + sessionId: 'test-same-model', + targetDir: '.', + debugMode: false, + model: 'qwen3-coder-plus', + cwd: '.', + }); + + // Initialize the config to set up content generator + await config.initialize(); + + // Mock the logger's logModelSwitch method + const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch'); + + // Set the same model + config.setModel('qwen3-coder-plus'); + + // Verify that logModelSwitch was not called + expect(logModelSwitchSpy).not.toHaveBeenCalled(); + }); + + it('should use default reason when no options provided', async () => { + const config = new Config({ + sessionId: 'test-default-reason', + targetDir: '.', + debugMode: false, + model: 'qwen3-coder-plus', + cwd: '.', + }); + + // Initialize the config to set up content generator + await config.initialize(); + + // Mock the logger's logModelSwitch method + const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch'); + + // Change the model without options + config.setModel('qwen-vl-max-latest'); + + // Verify that logModelSwitch was called with default reason + expect(logModelSwitchSpy).toHaveBeenCalledWith({ + fromModel: 'qwen3-coder-plus', + toModel: 'qwen-vl-max-latest', + reason: 'manual', + context: undefined, + }); + }); + }); }); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 6956fb06..83d0bce0 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -56,6 +56,7 @@ import { DEFAULT_GEMINI_FLASH_MODEL, } from './models.js'; import { Storage } from './storage.js'; +import { Logger, type ModelSwitchEvent } from '../core/logger.js'; // Re-export OAuth config type export type { AnyToolInvocation, MCPOAuthConfig }; @@ -239,6 +240,7 @@ export interface ConfigParameters { extensionManagement?: boolean; enablePromptCompletion?: boolean; skipLoopDetection?: boolean; + vlmSwitchMode?: string; } export class Config { @@ -330,9 +332,11 @@ export class Config { private readonly extensionManagement: boolean; private readonly enablePromptCompletion: boolean = false; private readonly skipLoopDetection: boolean; + private readonly vlmSwitchMode: string | undefined; private initialized: boolean = false; readonly storage: Storage; private readonly fileExclusions: FileExclusions; + private logger: Logger | null = null; constructor(params: ConfigParameters) { this.sessionId = params.sessionId; @@ -424,8 +428,15 @@ export class Config { this.extensionManagement = params.extensionManagement ?? false; this.storage = new Storage(this.targetDir); this.enablePromptCompletion = params.enablePromptCompletion ?? false; + this.vlmSwitchMode = params.vlmSwitchMode; this.fileExclusions = new FileExclusions(this); + // Initialize logger asynchronously + this.logger = new Logger(this.sessionId, this.storage); + this.logger.initialize().catch((error) => { + console.debug('Failed to initialize logger:', error); + }); + if (params.contextFileName) { setGeminiMdFilename(params.contextFileName); } @@ -517,11 +528,34 @@ export class Config { return this.contentGeneratorConfig?.model || this.model; } - setModel(newModel: string): void { + setModel( + newModel: string, + options?: { + reason?: ModelSwitchEvent['reason']; + context?: string; + }, + ): void { + const oldModel = this.getModel(); + if (this.contentGeneratorConfig) { this.contentGeneratorConfig.model = newModel; } + // Log the model switch if the model actually changed + if (oldModel !== newModel && this.logger) { + const switchEvent: ModelSwitchEvent = { + fromModel: oldModel, + toModel: newModel, + reason: options?.reason || 'manual', + context: options?.context, + }; + + // Log asynchronously to avoid blocking + this.logger.logModelSwitch(switchEvent).catch((error) => { + console.debug('Failed to log model switch:', error); + }); + } + // Reinitialize chat with updated configuration while preserving history const geminiClient = this.getGeminiClient(); if (geminiClient && geminiClient.isInitialized()) { @@ -938,6 +972,10 @@ export class Config { return this.skipLoopDetection; } + getVlmSwitchMode(): string | undefined { + return this.vlmSwitchMode; + } + async getGitService(): Promise { if (!this.gitService) { this.gitService = new GitService(this.targetDir, this.storage); diff --git a/packages/core/src/core/logger.test.ts b/packages/core/src/core/logger.test.ts index 0b506b4c..29793a33 100644 --- a/packages/core/src/core/logger.test.ts +++ b/packages/core/src/core/logger.test.ts @@ -755,4 +755,84 @@ describe('Logger', () => { expect(logger['messageId']).toBe(0); }); }); + + describe('Model Switch Logging', () => { + it('should log model switch events correctly', async () => { + const testSessionId = 'test-session-model-switch'; + const logger = new Logger(testSessionId, new Storage(process.cwd())); + await logger.initialize(); + + const modelSwitchEvent = { + fromModel: 'qwen3-coder-plus', + toModel: 'qwen-vl-max-latest', + reason: 'vision_auto_switch' as const, + context: 'YOLO mode auto-switch for image content', + }; + + await logger.logModelSwitch(modelSwitchEvent); + + // Read the log file to verify the entry was written + const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8'); + const logs: LogEntry[] = JSON.parse(logContent); + + const modelSwitchLog = logs.find( + (log) => + log.sessionId === testSessionId && + log.type === MessageSenderType.MODEL_SWITCH, + ); + + expect(modelSwitchLog).toBeDefined(); + expect(modelSwitchLog!.type).toBe(MessageSenderType.MODEL_SWITCH); + + const loggedEvent = JSON.parse(modelSwitchLog!.message); + expect(loggedEvent.fromModel).toBe('qwen3-coder-plus'); + expect(loggedEvent.toModel).toBe('qwen-vl-max-latest'); + expect(loggedEvent.reason).toBe('vision_auto_switch'); + expect(loggedEvent.context).toBe( + 'YOLO mode auto-switch for image content', + ); + }); + + it('should handle multiple model switch events', async () => { + const testSessionId = 'test-session-multiple-switches'; + const logger = new Logger(testSessionId, new Storage(process.cwd())); + await logger.initialize(); + + // Log first switch + await logger.logModelSwitch({ + fromModel: 'qwen3-coder-plus', + toModel: 'qwen-vl-max-latest', + reason: 'vision_auto_switch', + context: 'Auto-switch for image', + }); + + // Log second switch (restore) + await logger.logModelSwitch({ + fromModel: 'qwen-vl-max-latest', + toModel: 'qwen3-coder-plus', + reason: 'vision_auto_switch', + context: 'Restoring original model', + }); + + // Read the log file to verify both entries were written + const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8'); + const logs: LogEntry[] = JSON.parse(logContent); + + const modelSwitchLogs = logs.filter( + (log) => + log.sessionId === testSessionId && + log.type === MessageSenderType.MODEL_SWITCH, + ); + + expect(modelSwitchLogs).toHaveLength(2); + + const firstSwitch = JSON.parse(modelSwitchLogs[0].message); + expect(firstSwitch.fromModel).toBe('qwen3-coder-plus'); + expect(firstSwitch.toModel).toBe('qwen-vl-max-latest'); + + const secondSwitch = JSON.parse(modelSwitchLogs[1].message); + expect(secondSwitch.fromModel).toBe('qwen-vl-max-latest'); + expect(secondSwitch.toModel).toBe('qwen3-coder-plus'); + }); + }); }); diff --git a/packages/core/src/core/logger.ts b/packages/core/src/core/logger.ts index a837b25d..4a9604b7 100644 --- a/packages/core/src/core/logger.ts +++ b/packages/core/src/core/logger.ts @@ -13,6 +13,7 @@ const LOG_FILE_NAME = 'logs.json'; export enum MessageSenderType { USER = 'user', + MODEL_SWITCH = 'model_switch', } export interface LogEntry { @@ -23,6 +24,13 @@ export interface LogEntry { message: string; } +export interface ModelSwitchEvent { + fromModel: string; + toModel: string; + reason: 'vision_auto_switch' | 'manual' | 'fallback' | 'other'; + context?: string; +} + // This regex matches any character that is NOT a letter (a-z, A-Z), // a number (0-9), a hyphen (-), an underscore (_), or a dot (.). @@ -270,6 +278,17 @@ export class Logger { } } + async logModelSwitch(event: ModelSwitchEvent): Promise { + const message = JSON.stringify({ + fromModel: event.fromModel, + toModel: event.toModel, + reason: event.reason, + context: event.context, + }); + + await this.logMessage(MessageSenderType.MODEL_SWITCH, message); + } + private _checkpointPath(tag: string): string { if (!tag.length) { throw new Error('No checkpoint tag specified.'); diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts index 67ff6a86..50ac191c 100644 --- a/packages/core/src/core/tokenLimits.ts +++ b/packages/core/src/core/tokenLimits.ts @@ -111,6 +111,9 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [ // Commercial Qwen3-Coder-Flash: 1M token context [/^qwen3-coder-flash(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-flash" and date variants + // Commercial Qwen3-Max-Preview: 256K token context + [/^qwen3-max-preview(-.*)?$/, LIMITS['256k']], // catches "qwen3-max-preview" and date variants + // Open-source Qwen3-Coder variants: 256K native [/^qwen3-coder-.*$/, LIMITS['256k']], // Open-source Qwen3 2507 variants: 256K native @@ -166,8 +169,14 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [ // Qwen3-Coder-Plus: 65,536 max output tokens [/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']], + // Qwen3-Max-Preview: 65,536 max output tokens + [/^qwen3-max-preview(-.*)?$/, LIMITS['64k']], + // Qwen-VL-Max-Latest: 8,192 max output tokens [/^qwen-vl-max-latest$/, LIMITS['8k']], + + // Qwen3-VL-Plus: 8,192 max output tokens + [/^qwen3-vl-plus$/, LIMITS['8k']], ]; /**