From e4d16adf7b8f2befd0a88b3b649280f7295b86c6 Mon Sep 17 00:00:00 2001
From: "mingholy.lmh" <mingholy.lmh@alibaba-inc.com>
Date: Tue, 23 Sep 2025 19:14:26 +0800
Subject: [PATCH] feat: add cli args & env variables for switch behavoir

---
 packages/cli/src/config/config.ts             |  12 +
 packages/cli/src/config/settingsSchema.ts     |  10 +
 .../ui/components/ModelSwitchDialog.test.tsx  |  30 +--
 .../src/ui/components/ModelSwitchDialog.tsx   |  12 +-
 .../src/ui/hooks/useVisionAutoSwitch.test.ts  | 228 ++++++++++++++++--
 .../cli/src/ui/hooks/useVisionAutoSwitch.ts   |  88 +++++--
 packages/cli/src/ui/models/availableModels.ts |   9 +-
 packages/core/src/config/config.test.ts       |  81 +++++++
 packages/core/src/config/config.ts            |  40 ++-
 packages/core/src/core/logger.test.ts         |  80 ++++++
 packages/core/src/core/logger.ts              |  19 ++
 packages/core/src/core/tokenLimits.ts         |   9 +
 12 files changed, 555 insertions(+), 63 deletions(-)
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index eaa354d6..e1ee021f 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -82,6 +82,7 @@ export interface CliArgs {
   includeDirectories: string[] | undefined;
   tavilyApiKey: string | undefined;
   screenReader: boolean | undefined;
+  vlmSwitchMode: string | undefined;
 }
 
 export async function parseArguments(settings: Settings): Promise<CliArgs> {
@@ -249,6 +250,13 @@ export async function parseArguments(settings: Settings): Promise<CliArgs> {
           description: 'Enable screen reader mode for accessibility.',
           default: false,
         })
+        .option('vlm-switch-mode', {
+          type: 'string',
+          choices: ['once', 'session', 'persist'],
+          description:
+            'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). Overrides settings files.',
+          default: process.env['VLM_SWITCH_MODE'],
+        })
         .check((argv) => {
           if (argv.prompt && argv['promptInteractive']) {
             throw new Error(
@@ -524,6 +532,9 @@ export async function loadCliConfig(
     argv.screenReader !== undefined
       ? argv.screenReader
       : (settings.ui?.accessibility?.screenReader ?? false);
+
+  const vlmSwitchMode =
+    argv.vlmSwitchMode || settings.experimental?.vlmSwitchMode;
   return new Config({
     sessionId,
     embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL,
@@ -630,6 +641,7 @@ export async function loadCliConfig(
     skipNextSpeakerCheck: settings.model?.skipNextSpeakerCheck,
     enablePromptCompletion: settings.general?.enablePromptCompletion ?? false,
     skipLoopDetection: settings.skipLoopDetection ?? false,
+    vlmSwitchMode,
   });
 }
 
diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
index c7f1e94e..84261893 100644
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -751,6 +751,16 @@ export const SETTINGS_SCHEMA = {
           'Enable vision model support and auto-switching functionality. When disabled, vision models like qwen-vl-max-latest will be hidden and auto-switching will not occur.',
         showInDialog: true,
       },
+      vlmSwitchMode: {
+        type: 'string',
+        label: 'VLM Switch Mode',
+        category: 'Experimental',
+        requiresRestart: false,
+        default: undefined as string | undefined,
+        description:
+          'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). If not set, user will be prompted each time. This is a temporary experimental feature.',
+        showInDialog: false,
+      },
     },
   },
 
diff --git a/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx b/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx
index f26dcc55..aab45cc2 100644
--- a/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx
+++ b/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx
@@ -46,8 +46,8 @@ describe('ModelSwitchDialog', () => {
         value: VisionSwitchOutcome.SwitchSessionToVL,
       },
       {
-        label: 'Do not switch, show guidance',
-        value: VisionSwitchOutcome.DisallowWithGuidance,
+        label: 'Continue with current model',
+        value: VisionSwitchOutcome.ContinueWithCurrentModel,
       },
     ];
 
@@ -81,18 +81,18 @@ describe('ModelSwitchDialog', () => {
     );
   });
 
-  it('should call onSelect with DisallowWithGuidance when third option is selected', () => {
+  it('should call onSelect with ContinueWithCurrentModel when third option is selected', () => {
     render(<ModelSwitchDialog onSelect={mockOnSelect} />);
 
     const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
-    onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
+    onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);
 
     expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
     );
   });
 
-  it('should setup escape key handler to call onSelect with DisallowWithGuidance', () => {
+  it('should setup escape key handler to call onSelect with ContinueWithCurrentModel', () => {
     render(<ModelSwitchDialog onSelect={mockOnSelect} />);
 
     expect(mockUseKeypress).toHaveBeenCalledWith(expect.any(Function), {
@@ -104,7 +104,7 @@ describe('ModelSwitchDialog', () => {
     keypressHandler({ name: 'escape' });
 
     expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
     );
   });
 
@@ -126,13 +126,9 @@ describe('ModelSwitchDialog', () => {
 
   describe('VisionSwitchOutcome enum', () => {
     it('should have correct enum values', () => {
-      expect(VisionSwitchOutcome.SwitchOnce).toBe('switch_once');
-      expect(VisionSwitchOutcome.SwitchSessionToVL).toBe(
-        'switch_session_to_vl',
-      );
-      expect(VisionSwitchOutcome.DisallowWithGuidance).toBe(
-        'disallow_with_guidance',
-      );
+      expect(VisionSwitchOutcome.SwitchOnce).toBe('once');
+      expect(VisionSwitchOutcome.SwitchSessionToVL).toBe('session');
+      expect(VisionSwitchOutcome.ContinueWithCurrentModel).toBe('persist');
     });
   });
 
@@ -144,7 +140,7 @@ describe('ModelSwitchDialog', () => {
     // Call multiple times
     onSelectCallback(VisionSwitchOutcome.SwitchOnce);
     onSelectCallback(VisionSwitchOutcome.SwitchSessionToVL);
-    onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
+    onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);
 
     expect(mockOnSelect).toHaveBeenCalledTimes(3);
     expect(mockOnSelect).toHaveBeenNthCalledWith(
@@ -157,7 +153,7 @@ describe('ModelSwitchDialog', () => {
     );
     expect(mockOnSelect).toHaveBeenNthCalledWith(
       3,
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
     );
   });
 
@@ -179,7 +175,7 @@ describe('ModelSwitchDialog', () => {
 
     expect(mockOnSelect).toHaveBeenCalledTimes(2);
     expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
     );
   });
 });
diff --git a/packages/cli/src/ui/components/ModelSwitchDialog.tsx b/packages/cli/src/ui/components/ModelSwitchDialog.tsx
index 1a8c73d4..f2993c47 100644
--- a/packages/cli/src/ui/components/ModelSwitchDialog.tsx
+++ b/packages/cli/src/ui/components/ModelSwitchDialog.tsx
@@ -14,9 +14,9 @@ import {
 import { useKeypress } from '../hooks/useKeypress.js';
 
 export enum VisionSwitchOutcome {
-  SwitchOnce = 'switch_once',
-  SwitchSessionToVL = 'switch_session_to_vl',
-  DisallowWithGuidance = 'disallow_with_guidance',
+  SwitchOnce = 'once',
+  SwitchSessionToVL = 'session',
+  ContinueWithCurrentModel = 'persist',
 }
 
 export interface ModelSwitchDialogProps {
@@ -29,7 +29,7 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
   useKeypress(
     (key) => {
       if (key.name === 'escape') {
-        onSelect(VisionSwitchOutcome.DisallowWithGuidance);
+        onSelect(VisionSwitchOutcome.ContinueWithCurrentModel);
       }
     },
     { isActive: true },
@@ -45,8 +45,8 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
       value: VisionSwitchOutcome.SwitchSessionToVL,
     },
     {
-      label: 'Do not switch, show guidance',
-      value: VisionSwitchOutcome.DisallowWithGuidance,
+      label: 'Continue with current model',
+      value: VisionSwitchOutcome.ContinueWithCurrentModel,
     },
   ];
 
diff --git a/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts b/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
index 3dcb153b..fa56a94b 100644
--- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
+++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
@@ -175,11 +175,11 @@ describe('useVisionAutoSwitch helpers', () => {
       expect(result).toEqual({ persistSessionModel: vl });
     });
 
-    it('maps DisallowWithGuidance to showGuidance', () => {
+    it('maps ContinueWithCurrentModel to empty result', () => {
       const result = processVisionSwitchOutcome(
-        VisionSwitchOutcome.DisallowWithGuidance,
+        VisionSwitchOutcome.ContinueWithCurrentModel,
       );
-      expect(result).toEqual({ showGuidance: true });
+      expect(result).toEqual({});
     });
   });
 
@@ -205,6 +205,7 @@ describe('useVisionAutoSwitch hook', () => {
     authType: AuthType,
     initialModel: string,
     approvalMode: ApprovalMode = ApprovalMode.DEFAULT,
+    vlmSwitchMode?: string,
   ) => {
     let currentModel = initialModel;
     const mockConfig: Partial<Config> = {
@@ -213,6 +214,7 @@ describe('useVisionAutoSwitch hook', () => {
         currentModel = m;
       }),
       getApprovalMode: vi.fn(() => approvalMode),
+      getVlmSwitchMode: vi.fn(() => vlmSwitchMode),
       getContentGeneratorConfig: vi.fn(() => ({
         authType,
         model: currentModel,
@@ -281,11 +283,9 @@ describe('useVisionAutoSwitch hook', () => {
     expect(onVisionSwitchRequired).not.toHaveBeenCalled();
   });
 
-  it('shows guidance and blocks when dialog returns showGuidance', async () => {
+  it('continues with current model when dialog returns empty result', async () => {
     const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
-    const onVisionSwitchRequired = vi
-      .fn()
-      .mockResolvedValue({ showGuidance: true });
+    const onVisionSwitchRequired = vi.fn().mockResolvedValue({}); // Empty result for ContinueWithCurrentModel
     const { result } = renderHook(() =>
       useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
     );
@@ -300,11 +300,12 @@ describe('useVisionAutoSwitch hook', () => {
       res = await result.current.handleVisionSwitch(parts, userTs, false);
     });
 
-    expect(addItem).toHaveBeenCalledWith(
+    // Should not add any guidance message
+    expect(addItem).not.toHaveBeenCalledWith(
       { type: MessageType.INFO, text: getVisionSwitchGuidanceMessage() },
       userTs,
     );
-    expect(res).toEqual({ shouldProceed: false });
+    expect(res).toEqual({ shouldProceed: true });
     expect(config.setModel).not.toHaveBeenCalled();
   });
 
@@ -328,13 +329,19 @@ describe('useVisionAutoSwitch hook', () => {
     });
 
     expect(res).toEqual({ shouldProceed: true, originalModel: initialModel });
-    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
+    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
+      reason: 'vision_auto_switch',
+      context: 'User-prompted vision switch (one-time override)',
+    });
 
     // Now restore
     act(() => {
       result.current.restoreOriginalModel();
     });
-    expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
+    expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
+      reason: 'vision_auto_switch',
+      context: 'Restoring original model after vision switch',
+    });
   });
 
   it('persists session model when dialog requests persistence', async () => {
@@ -356,7 +363,10 @@ describe('useVisionAutoSwitch hook', () => {
     });
 
     expect(res).toEqual({ shouldProceed: true });
-    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
+    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
+      reason: 'vision_auto_switch',
+      context: 'User-prompted vision switch (session persistent)',
+    });
 
     // Restore should be a no-op since no one-time override was used
     act(() => {
@@ -460,7 +470,10 @@ describe('useVisionAutoSwitch hook', () => {
         shouldProceed: true,
         originalModel: initialModel,
       });
-      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel());
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
+        reason: 'vision_auto_switch',
+        context: 'YOLO mode auto-switch for image content',
+      });
     });
 
     it('does not switch in YOLO mode when no images are present', async () => {
@@ -548,7 +561,10 @@ describe('useVisionAutoSwitch hook', () => {
       });
 
       // Verify model was switched
-      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel());
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
+        reason: 'vision_auto_switch',
+        context: 'YOLO mode auto-switch for image content',
+      });
 
       // Now restore the original model
       act(() => {
@@ -556,7 +572,10 @@ describe('useVisionAutoSwitch hook', () => {
       });
 
       // Verify model was restored
-      expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
+      expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
+        reason: 'vision_auto_switch',
+        context: 'Restoring original model after vision switch',
+      });
     });
 
     it('does not switch in YOLO mode when authType is not QWEN_OAUTH', async () => {
@@ -652,7 +671,184 @@ describe('useVisionAutoSwitch hook', () => {
         shouldProceed: true,
         originalModel: initialModel,
       });
-      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel());
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
+        reason: 'vision_auto_switch',
+        context: 'YOLO mode auto-switch for image content',
+      });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('VLM switch mode default behavior', () => {
+    it('should automatically switch once when vlmSwitchMode is "once"', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'once',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBe('qwen3-coder-plus');
+      expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
+        reason: 'vision_auto_switch',
+        context: 'Default VLM switch mode: once (one-time override)',
+      });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+
+    it('should switch session when vlmSwitchMode is "session"', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'session',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBeUndefined(); // No original model for session switch
+      expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
+        reason: 'vision_auto_switch',
+        context: 'Default VLM switch mode: session (session persistent)',
+      });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+
+    it('should continue with current model when vlmSwitchMode is "persist"', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'persist',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBeUndefined();
+      expect(config.setModel).not.toHaveBeenCalled();
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+
+    it('should fall back to user prompt when vlmSwitchMode is not set', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        undefined, // No default mode
+      );
+      const onVisionSwitchRequired = vi
+        .fn()
+        .mockResolvedValue({ modelOverride: 'qwen-vl-max-latest' });
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(onVisionSwitchRequired).toHaveBeenCalledWith(parts);
+    });
+
+    it('should fall back to persist behavior when vlmSwitchMode has invalid value', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'invalid-value',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBeUndefined();
+      // For invalid values, it should continue with current model (persist behavior)
+      expect(config.setModel).not.toHaveBeenCalled();
       expect(onVisionSwitchRequired).not.toHaveBeenCalled();
     });
   });
diff --git a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
index 86a2cf18..6e201876 100644
--- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
+++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
@@ -166,11 +166,11 @@ export function processVisionSwitchOutcome(
     case VisionSwitchOutcome.SwitchSessionToVL:
       return { persistSessionModel: vlModelId };
 
-    case VisionSwitchOutcome.DisallowWithGuidance:
-      return { showGuidance: true };
+    case VisionSwitchOutcome.ContinueWithCurrentModel:
+      return {}; // Continue with current model, no changes needed
 
     default:
-      return { showGuidance: true };
+      return {}; // Default to continuing with current model
   }
 }
 
@@ -256,42 +256,87 @@ export function useVisionAutoSwitch(
       if (config.getApprovalMode() === ApprovalMode.YOLO) {
         const vlModelId = getDefaultVisionModel();
         originalModelRef.current = config.getModel();
-        config.setModel(vlModelId);
+        config.setModel(vlModelId, {
+          reason: 'vision_auto_switch',
+          context: 'YOLO mode auto-switch for image content',
+        });
         return {
           shouldProceed: true,
           originalModel: originalModelRef.current,
         };
       }
 
-      try {
-        const visionSwitchResult = await onVisionSwitchRequired(query);
-
-        if (visionSwitchResult.showGuidance) {
-          // Show guidance and don't proceed with the request
-          addItem(
-            {
-              type: MessageType.INFO,
-              text: getVisionSwitchGuidanceMessage(),
-            },
-            userMessageTimestamp,
-          );
-          return { shouldProceed: false };
+      // Check if there's a default VLM switch mode configured
+      const defaultVlmSwitchMode = config.getVlmSwitchMode();
+      if (defaultVlmSwitchMode) {
+        // Convert string value to VisionSwitchOutcome enum
+        let outcome: VisionSwitchOutcome;
+        switch (defaultVlmSwitchMode) {
+          case 'once':
+            outcome = VisionSwitchOutcome.SwitchOnce;
+            break;
+          case 'session':
+            outcome = VisionSwitchOutcome.SwitchSessionToVL;
+            break;
+          case 'persist':
+            outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
+            break;
+          default:
+            // Invalid value, fall back to prompting user
+            outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
         }
 
+        // Process the default outcome
+        const visionSwitchResult = processVisionSwitchOutcome(outcome);
+
         if (visionSwitchResult.modelOverride) {
           // One-time model override
           originalModelRef.current = config.getModel();
-          config.setModel(visionSwitchResult.modelOverride);
+          config.setModel(visionSwitchResult.modelOverride, {
+            reason: 'vision_auto_switch',
+            context: `Default VLM switch mode: ${defaultVlmSwitchMode} (one-time override)`,
+          });
           return {
             shouldProceed: true,
             originalModel: originalModelRef.current,
           };
         } else if (visionSwitchResult.persistSessionModel) {
           // Persistent session model change
-          config.setModel(visionSwitchResult.persistSessionModel);
+          config.setModel(visionSwitchResult.persistSessionModel, {
+            reason: 'vision_auto_switch',
+            context: `Default VLM switch mode: ${defaultVlmSwitchMode} (session persistent)`,
+          });
           return { shouldProceed: true };
         }
 
+        // For ContinueWithCurrentModel or any other case, proceed with current model
+        return { shouldProceed: true };
+      }
+
+      try {
+        const visionSwitchResult = await onVisionSwitchRequired(query);
+
+        if (visionSwitchResult.modelOverride) {
+          // One-time model override
+          originalModelRef.current = config.getModel();
+          config.setModel(visionSwitchResult.modelOverride, {
+            reason: 'vision_auto_switch',
+            context: 'User-prompted vision switch (one-time override)',
+          });
+          return {
+            shouldProceed: true,
+            originalModel: originalModelRef.current,
+          };
+        } else if (visionSwitchResult.persistSessionModel) {
+          // Persistent session model change
+          config.setModel(visionSwitchResult.persistSessionModel, {
+            reason: 'vision_auto_switch',
+            context: 'User-prompted vision switch (session persistent)',
+          });
+          return { shouldProceed: true };
+        }
+
+        // For ContinueWithCurrentModel or any other case, proceed with current model
         return { shouldProceed: true };
       } catch (_error) {
         // If vision switch dialog was cancelled or errored, don't proceed
@@ -303,7 +348,10 @@ export function useVisionAutoSwitch(
 
   const restoreOriginalModel = useCallback(() => {
     if (originalModelRef.current) {
-      config.setModel(originalModelRef.current);
+      config.setModel(originalModelRef.current, {
+        reason: 'vision_auto_switch',
+        context: 'Restoring original model after vision switch',
+      });
       originalModelRef.current = null;
     }
   }, [config]);
diff --git a/packages/cli/src/ui/models/availableModels.ts b/packages/cli/src/ui/models/availableModels.ts
index 7c3a1cf5..b2b643dd 100644
--- a/packages/cli/src/ui/models/availableModels.ts
+++ b/packages/cli/src/ui/models/availableModels.ts
@@ -10,9 +10,12 @@ export type AvailableModel = {
   isVision?: boolean;
 };
 
+export const MAINLINE_VLM = 'qwen-vl-max-latest';
+export const MAINLINE_CODER = 'qwen3-coder-plus';
+
 export const AVAILABLE_MODELS_QWEN: AvailableModel[] = [
-  { id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' },
-  { id: 'qwen-vl-max-latest', label: 'qwen-vl-max', isVision: true },
+  { id: MAINLINE_CODER, label: MAINLINE_CODER },
+  { id: MAINLINE_VLM, label: MAINLINE_VLM, isVision: true },
 ];
 
 /**
@@ -42,7 +45,7 @@ export function getOpenAIAvailableModelFromEnv(): AvailableModel | null {
  * until our coding model supports multimodal.
  */
 export function getDefaultVisionModel(): string {
-  return 'qwen-vl-max-latest';
+  return MAINLINE_VLM;
 }
 
 export function isVisionModel(modelId: string): boolean {
diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts
index 8d18b89a..e4e1cd05 100644
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@@ -737,4 +737,85 @@ describe('setApprovalMode with folder trust', () => {
     expect(() => config.setApprovalMode(ApprovalMode.AUTO_EDIT)).not.toThrow();
     expect(() => config.setApprovalMode(ApprovalMode.DEFAULT)).not.toThrow();
   });
+
+  describe('Model Switch Logging', () => {
+    it('should log model switch when setModel is called with different model', async () => {
+      const config = new Config({
+        sessionId: 'test-model-switch',
+        targetDir: '.',
+        debugMode: false,
+        model: 'qwen3-coder-plus',
+        cwd: '.',
+      });
+
+      // Initialize the config to set up content generator
+      await config.initialize();
+
+      // Mock the logger's logModelSwitch method
+      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
+
+      // Change the model
+      config.setModel('qwen-vl-max-latest', {
+        reason: 'vision_auto_switch',
+        context: 'Test model switch',
+      });
+
+      // Verify that logModelSwitch was called with correct parameters
+      expect(logModelSwitchSpy).toHaveBeenCalledWith({
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'vision_auto_switch',
+        context: 'Test model switch',
+      });
+    });
+
+    it('should not log when setModel is called with same model', async () => {
+      const config = new Config({
+        sessionId: 'test-same-model',
+        targetDir: '.',
+        debugMode: false,
+        model: 'qwen3-coder-plus',
+        cwd: '.',
+      });
+
+      // Initialize the config to set up content generator
+      await config.initialize();
+
+      // Mock the logger's logModelSwitch method
+      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
+
+      // Set the same model
+      config.setModel('qwen3-coder-plus');
+
+      // Verify that logModelSwitch was not called
+      expect(logModelSwitchSpy).not.toHaveBeenCalled();
+    });
+
+    it('should use default reason when no options provided', async () => {
+      const config = new Config({
+        sessionId: 'test-default-reason',
+        targetDir: '.',
+        debugMode: false,
+        model: 'qwen3-coder-plus',
+        cwd: '.',
+      });
+
+      // Initialize the config to set up content generator
+      await config.initialize();
+
+      // Mock the logger's logModelSwitch method
+      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
+
+      // Change the model without options
+      config.setModel('qwen-vl-max-latest');
+
+      // Verify that logModelSwitch was called with default reason
+      expect(logModelSwitchSpy).toHaveBeenCalledWith({
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'manual',
+        context: undefined,
+      });
+    });
+  });
 });
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 6956fb06..83d0bce0 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -56,6 +56,7 @@ import {
   DEFAULT_GEMINI_FLASH_MODEL,
 } from './models.js';
 import { Storage } from './storage.js';
+import { Logger, type ModelSwitchEvent } from '../core/logger.js';
 
 // Re-export OAuth config type
 export type { AnyToolInvocation, MCPOAuthConfig };
@@ -239,6 +240,7 @@ export interface ConfigParameters {
   extensionManagement?: boolean;
   enablePromptCompletion?: boolean;
   skipLoopDetection?: boolean;
+  vlmSwitchMode?: string;
 }
 
 export class Config {
@@ -330,9 +332,11 @@ export class Config {
   private readonly extensionManagement: boolean;
   private readonly enablePromptCompletion: boolean = false;
   private readonly skipLoopDetection: boolean;
+  private readonly vlmSwitchMode: string | undefined;
   private initialized: boolean = false;
   readonly storage: Storage;
   private readonly fileExclusions: FileExclusions;
+  private logger: Logger | null = null;
 
   constructor(params: ConfigParameters) {
     this.sessionId = params.sessionId;
@@ -424,8 +428,15 @@ export class Config {
     this.extensionManagement = params.extensionManagement ?? false;
     this.storage = new Storage(this.targetDir);
     this.enablePromptCompletion = params.enablePromptCompletion ?? false;
+    this.vlmSwitchMode = params.vlmSwitchMode;
     this.fileExclusions = new FileExclusions(this);
 
+    // Initialize logger asynchronously
+    this.logger = new Logger(this.sessionId, this.storage);
+    this.logger.initialize().catch((error) => {
+      console.debug('Failed to initialize logger:', error);
+    });
+
     if (params.contextFileName) {
       setGeminiMdFilename(params.contextFileName);
     }
@@ -517,11 +528,34 @@ export class Config {
     return this.contentGeneratorConfig?.model || this.model;
   }
 
-  setModel(newModel: string): void {
+  setModel(
+    newModel: string,
+    options?: {
+      reason?: ModelSwitchEvent['reason'];
+      context?: string;
+    },
+  ): void {
+    const oldModel = this.getModel();
+
     if (this.contentGeneratorConfig) {
       this.contentGeneratorConfig.model = newModel;
     }
 
+    // Log the model switch if the model actually changed
+    if (oldModel !== newModel && this.logger) {
+      const switchEvent: ModelSwitchEvent = {
+        fromModel: oldModel,
+        toModel: newModel,
+        reason: options?.reason || 'manual',
+        context: options?.context,
+      };
+
+      // Log asynchronously to avoid blocking
+      this.logger.logModelSwitch(switchEvent).catch((error) => {
+        console.debug('Failed to log model switch:', error);
+      });
+    }
+
     // Reinitialize chat with updated configuration while preserving history
     const geminiClient = this.getGeminiClient();
     if (geminiClient && geminiClient.isInitialized()) {
@@ -938,6 +972,10 @@ export class Config {
     return this.skipLoopDetection;
   }
 
+  getVlmSwitchMode(): string | undefined {
+    return this.vlmSwitchMode;
+  }
+
   async getGitService(): Promise<GitService> {
     if (!this.gitService) {
       this.gitService = new GitService(this.targetDir, this.storage);
diff --git a/packages/core/src/core/logger.test.ts b/packages/core/src/core/logger.test.ts
index 0b506b4c..29793a33 100644
--- a/packages/core/src/core/logger.test.ts
+++ b/packages/core/src/core/logger.test.ts
@@ -755,4 +755,84 @@ describe('Logger', () => {
       expect(logger['messageId']).toBe(0);
     });
   });
+
+  describe('Model Switch Logging', () => {
+    it('should log model switch events correctly', async () => {
+      const testSessionId = 'test-session-model-switch';
+      const logger = new Logger(testSessionId, new Storage(process.cwd()));
+      await logger.initialize();
+
+      const modelSwitchEvent = {
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'vision_auto_switch' as const,
+        context: 'YOLO mode auto-switch for image content',
+      };
+
+      await logger.logModelSwitch(modelSwitchEvent);
+
+      // Read the log file to verify the entry was written
+      const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8');
+      const logs: LogEntry[] = JSON.parse(logContent);
+
+      const modelSwitchLog = logs.find(
+        (log) =>
+          log.sessionId === testSessionId &&
+          log.type === MessageSenderType.MODEL_SWITCH,
+      );
+
+      expect(modelSwitchLog).toBeDefined();
+      expect(modelSwitchLog!.type).toBe(MessageSenderType.MODEL_SWITCH);
+
+      const loggedEvent = JSON.parse(modelSwitchLog!.message);
+      expect(loggedEvent.fromModel).toBe('qwen3-coder-plus');
+      expect(loggedEvent.toModel).toBe('qwen-vl-max-latest');
+      expect(loggedEvent.reason).toBe('vision_auto_switch');
+      expect(loggedEvent.context).toBe(
+        'YOLO mode auto-switch for image content',
+      );
+    });
+
+    it('should handle multiple model switch events', async () => {
+      const testSessionId = 'test-session-multiple-switches';
+      const logger = new Logger(testSessionId, new Storage(process.cwd()));
+      await logger.initialize();
+
+      // Log first switch
+      await logger.logModelSwitch({
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'vision_auto_switch',
+        context: 'Auto-switch for image',
+      });
+
+      // Log second switch (restore)
+      await logger.logModelSwitch({
+        fromModel: 'qwen-vl-max-latest',
+        toModel: 'qwen3-coder-plus',
+        reason: 'vision_auto_switch',
+        context: 'Restoring original model',
+      });
+
+      // Read the log file to verify both entries were written
+      const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8');
+      const logs: LogEntry[] = JSON.parse(logContent);
+
+      const modelSwitchLogs = logs.filter(
+        (log) =>
+          log.sessionId === testSessionId &&
+          log.type === MessageSenderType.MODEL_SWITCH,
+      );
+
+      expect(modelSwitchLogs).toHaveLength(2);
+
+      const firstSwitch = JSON.parse(modelSwitchLogs[0].message);
+      expect(firstSwitch.fromModel).toBe('qwen3-coder-plus');
+      expect(firstSwitch.toModel).toBe('qwen-vl-max-latest');
+
+      const secondSwitch = JSON.parse(modelSwitchLogs[1].message);
+      expect(secondSwitch.fromModel).toBe('qwen-vl-max-latest');
+      expect(secondSwitch.toModel).toBe('qwen3-coder-plus');
+    });
+  });
 });
diff --git a/packages/core/src/core/logger.ts b/packages/core/src/core/logger.ts
index a837b25d..4a9604b7 100644
--- a/packages/core/src/core/logger.ts
+++ b/packages/core/src/core/logger.ts
@@ -13,6 +13,7 @@ const LOG_FILE_NAME = 'logs.json';
 
 export enum MessageSenderType {
   USER = 'user',
+  MODEL_SWITCH = 'model_switch',
 }
 
 export interface LogEntry {
@@ -23,6 +24,13 @@ export interface LogEntry {
   message: string;
 }
 
+export interface ModelSwitchEvent {
+  fromModel: string;
+  toModel: string;
+  reason: 'vision_auto_switch' | 'manual' | 'fallback' | 'other';
+  context?: string;
+}
+
 // This regex matches any character that is NOT a letter (a-z, A-Z),
 // a number (0-9), a hyphen (-), an underscore (_), or a dot (.).
 
@@ -270,6 +278,17 @@ export class Logger {
     }
   }
 
+  async logModelSwitch(event: ModelSwitchEvent): Promise<void> {
+    const message = JSON.stringify({
+      fromModel: event.fromModel,
+      toModel: event.toModel,
+      reason: event.reason,
+      context: event.context,
+    });
+
+    await this.logMessage(MessageSenderType.MODEL_SWITCH, message);
+  }
+
   private _checkpointPath(tag: string): string {
     if (!tag.length) {
       throw new Error('No checkpoint tag specified.');
diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts
index 67ff6a86..50ac191c 100644
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -111,6 +111,9 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
   // Commercial Qwen3-Coder-Flash: 1M token context
   [/^qwen3-coder-flash(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-flash" and date variants
 
+  // Commercial Qwen3-Max-Preview: 256K token context
+  [/^qwen3-max-preview(-.*)?$/, LIMITS['256k']], // catches "qwen3-max-preview" and date variants
+
   // Open-source Qwen3-Coder variants: 256K native
   [/^qwen3-coder-.*$/, LIMITS['256k']],
   // Open-source Qwen3 2507 variants: 256K native
@@ -166,8 +169,14 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
   // Qwen3-Coder-Plus: 65,536 max output tokens
   [/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']],
 
+  // Qwen3-Max-Preview: 65,536 max output tokens
+  [/^qwen3-max-preview(-.*)?$/, LIMITS['64k']],
+
   // Qwen-VL-Max-Latest: 8,192 max output tokens
   [/^qwen-vl-max-latest$/, LIMITS['8k']],
+
+  // Qwen3-VL-Plus: 8,192 max output tokens
+  [/^qwen3-vl-plus$/, LIMITS['8k']],
 ];
 
 /**