From 48d8587bf9d694a1026496fba24b834fe9f85c5c Mon Sep 17 00:00:00 2001
From: Mingholy <mingholy.lmh@gmail.com>
Date: Wed, 24 Sep 2025 10:21:09 +0800
Subject: [PATCH] feat: add yolo mode support to auto vision model switch
 (#652)

* feat: add yolo mode support to auto vision model switch

* feat: add cli args & env variables for switch behavoir

* fix: use dedicated model names and settings

* docs: add vision model instructions

* fix: failed test case

* fix: setModel failure
---
 README.md                                     |  53 ++
 packages/cli/src/config/config.test.ts        |   2 +-
 packages/cli/src/config/config.ts             |  12 +
 packages/cli/src/config/settings.test.ts      |  80 ++-
 packages/cli/src/config/settings.ts           |  18 +
 packages/cli/src/config/settingsSchema.ts     |  12 +-
 packages/cli/src/ui/App.tsx                   |  41 +-
 .../ui/components/ModelSwitchDialog.test.tsx  |  30 +-
 .../src/ui/components/ModelSwitchDialog.tsx   |  12 +-
 .../cli/src/ui/hooks/useGeminiStream.test.tsx |  44 +-
 packages/cli/src/ui/hooks/useGeminiStream.ts  |  14 +-
 .../src/ui/hooks/useVisionAutoSwitch.test.ts  | 529 +++++++++++++++++-
 .../cli/src/ui/hooks/useVisionAutoSwitch.ts   | 103 +++-
 packages/cli/src/ui/models/availableModels.ts |   9 +-
 packages/core/src/config/config.test.ts       |  81 +++
 packages/core/src/config/config.ts            |  49 +-
 .../core/src/config/flashFallback.test.ts     |  12 +-
 packages/core/src/config/models.ts            |   7 +-
 packages/core/src/core/client.ts              |   2 +-
 packages/core/src/core/geminiChat.ts          |   2 +-
 packages/core/src/core/logger.test.ts         |  80 +++
 packages/core/src/core/logger.ts              |  19 +
 packages/core/src/core/prompts.ts             |   8 +
 packages/core/src/core/tokenLimits.ts         |  21 +
 packages/core/src/subagents/subagent.test.ts  |  13 +
 packages/core/src/subagents/subagent.ts       |   2 +-
 26 files changed, 1133 insertions(+), 122 deletions(-)

diff --git a/README.md b/README.md
index 40419342..4c4396ec 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,7 @@ For detailed setup instructions, see [Authorization](#authorization).
 - **Code Understanding & Editing** - Query and edit large codebases beyond traditional context window limits
 - **Workflow Automation** - Automate operational tasks like handling pull requests and complex rebases
 - **Enhanced Parser** - Adapted parser specifically optimized for Qwen-Coder models
+- **Vision Model Support** - Automatically detect images in your input and seamlessly switch to vision-capable models for multimodal analysis
 
 ## Installation
 
@@ -121,6 +122,58 @@ Create or edit `.qwen/settings.json` in your home directory:
 
 > 📝 **Note**: Session token limit applies to a single conversation, not cumulative API calls.
 
+### Vision Model Configuration
+
+Qwen Code includes intelligent vision model auto-switching that detects images in your input and can automatically switch to vision-capable models for multimodal analysis. **This feature is enabled by default** - when you include images in your queries, you'll see a dialog asking how you'd like to handle the vision model switch.
+
+#### Skip the Switch Dialog (Optional)
+
+If you don't want to see the interactive dialog each time, configure the default behavior in your `.qwen/settings.json`:
+
+```json
+{
+  "experimental": {
+    "vlmSwitchMode": "once"
+  }
+}
+```
+
+**Available modes:**
+
+- **`"once"`** - Switch to vision model for this query only, then revert
+- **`"session"`** - Switch to vision model for the entire session
+- **`"persist"`** - Continue with current model (no switching)
+- **Not set** - Show interactive dialog each time (default)
+
+#### Command Line Override
+
+You can also set the behavior via command line:
+
+```bash
+# Switch once per query
+qwen --vlm-switch-mode once
+
+# Switch for entire session
+qwen --vlm-switch-mode session
+
+# Never switch automatically
+qwen --vlm-switch-mode persist
+```
+
+#### Disable Vision Models (Optional)
+
+To completely disable vision model support, add to your `.qwen/settings.json`:
+
+```json
+{
+  "experimental": {
+    "visionModelPreview": false
+  }
+}
+```
+
+> 💡 **Tip**: In YOLO mode (`--yolo`), vision switching happens automatically without prompts when images are detected.
+
 ### Authorization
 
 Choose your preferred authentication method based on your needs:
diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts
index a4296943..8acbe717 100644
--- a/packages/cli/src/config/config.test.ts
+++ b/packages/cli/src/config/config.test.ts
@@ -1514,7 +1514,7 @@ describe('loadCliConfig model selection', () => {
       argv,
     );
 
-    expect(config.getModel()).toBe('qwen3-coder-plus');
+    expect(config.getModel()).toBe('coder-model');
   });
 
   it('always prefers model from argvs', async () => {
diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index eaa354d6..e1ee021f 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -82,6 +82,7 @@ export interface CliArgs {
   includeDirectories: string[] | undefined;
   tavilyApiKey: string | undefined;
   screenReader: boolean | undefined;
+  vlmSwitchMode: string | undefined;
 }
 
 export async function parseArguments(settings: Settings): Promise<CliArgs> {
@@ -249,6 +250,13 @@ export async function parseArguments(settings: Settings): Promise<CliArgs> {
           description: 'Enable screen reader mode for accessibility.',
           default: false,
         })
+        .option('vlm-switch-mode', {
+          type: 'string',
+          choices: ['once', 'session', 'persist'],
+          description:
+            'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). Overrides settings files.',
+          default: process.env['VLM_SWITCH_MODE'],
+        })
         .check((argv) => {
           if (argv.prompt && argv['promptInteractive']) {
             throw new Error(
@@ -524,6 +532,9 @@ export async function loadCliConfig(
     argv.screenReader !== undefined
       ? argv.screenReader
       : (settings.ui?.accessibility?.screenReader ?? false);
+
+  const vlmSwitchMode =
+    argv.vlmSwitchMode || settings.experimental?.vlmSwitchMode;
   return new Config({
     sessionId,
     embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL,
@@ -630,6 +641,7 @@ export async function loadCliConfig(
     skipNextSpeakerCheck: settings.model?.skipNextSpeakerCheck,
     enablePromptCompletion: settings.general?.enablePromptCompletion ?? false,
     skipLoopDetection: settings.skipLoopDetection ?? false,
+    vlmSwitchMode,
   });
 }
 
diff --git a/packages/cli/src/config/settings.test.ts b/packages/cli/src/config/settings.test.ts
index 7d0e737d..89720114 100644
--- a/packages/cli/src/config/settings.test.ts
+++ b/packages/cli/src/config/settings.test.ts
@@ -69,7 +69,11 @@ const MOCK_WORKSPACE_SETTINGS_PATH = pathActual.join(
 );
 
 // A more flexible type for test data that allows arbitrary properties.
-type TestSettings = Settings & { [key: string]: unknown };
+type TestSettings = Settings & {
+  [key: string]: unknown;
+  nested?: { [key: string]: unknown };
+  nestedObj?: { [key: string]: unknown };
+};
 
 vi.mock('fs', async (importOriginal) => {
   // Get all the functions from the real 'fs' module
@@ -137,6 +141,9 @@ describe('Settings Loading and Merging', () => {
         advanced: {
           excludedEnvVars: [],
         },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
         extensions: {
           disabled: [],
           workspacesWithMigrationNudge: [],
@@ -197,6 +204,9 @@ describe('Settings Loading and Merging', () => {
         advanced: {
           excludedEnvVars: [],
         },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
         extensions: {
           disabled: [],
           workspacesWithMigrationNudge: [],
@@ -260,6 +270,9 @@ describe('Settings Loading and Merging', () => {
         advanced: {
           excludedEnvVars: [],
         },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
         extensions: {
           disabled: [],
           workspacesWithMigrationNudge: [],
@@ -320,6 +333,9 @@ describe('Settings Loading and Merging', () => {
         advanced: {
           excludedEnvVars: [],
         },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
         extensions: {
           disabled: [],
           workspacesWithMigrationNudge: [],
@@ -385,6 +401,9 @@ describe('Settings Loading and Merging', () => {
         advanced: {
           excludedEnvVars: [],
         },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
         extensions: {
           disabled: [],
           workspacesWithMigrationNudge: [],
@@ -477,6 +496,9 @@ describe('Settings Loading and Merging', () => {
         advanced: {
           excludedEnvVars: [],
         },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
         extensions: {
           disabled: [],
           workspacesWithMigrationNudge: [],
@@ -562,6 +584,9 @@ describe('Settings Loading and Merging', () => {
         advanced: {
           excludedEnvVars: [],
         },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
         extensions: {
           disabled: [],
           workspacesWithMigrationNudge: [],
@@ -691,6 +716,9 @@ describe('Settings Loading and Merging', () => {
             '/system/dir',
           ],
         },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
         extensions: {
           disabled: [],
           workspacesWithMigrationNudge: [],
@@ -1431,6 +1459,9 @@ describe('Settings Loading and Merging', () => {
         advanced: {
           excludedEnvVars: [],
         },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
         extensions: {
           disabled: [],
           workspacesWithMigrationNudge: [],
@@ -1516,7 +1547,11 @@ describe('Settings Loading and Merging', () => {
         'workspace_endpoint_from_env/api',
       );
       expect(
-        (settings.workspace.settings as TestSettings)['nested']['value'],
+        (
+          (settings.workspace.settings as TestSettings).nested as {
+            [key: string]: unknown;
+          }
+        )['value'],
       ).toBe('workspace_endpoint_from_env');
       expect((settings.merged as TestSettings)['endpoint']).toBe(
         'workspace_endpoint_from_env/api',
@@ -1766,19 +1801,39 @@ describe('Settings Loading and Merging', () => {
       ).toBeUndefined();
 
       expect(
-        (settings.user.settings as TestSettings)['nestedObj']['nestedNull'],
+        (
+          (settings.user.settings as TestSettings).nestedObj as {
+            [key: string]: unknown;
+          }
+        )['nestedNull'],
       ).toBeNull();
       expect(
-        (settings.user.settings as TestSettings)['nestedObj']['nestedBool'],
+        (
+          (settings.user.settings as TestSettings).nestedObj as {
+            [key: string]: unknown;
+          }
+        )['nestedBool'],
       ).toBe(true);
       expect(
-        (settings.user.settings as TestSettings)['nestedObj']['nestedNum'],
+        (
+          (settings.user.settings as TestSettings).nestedObj as {
+            [key: string]: unknown;
+          }
+        )['nestedNum'],
       ).toBe(0);
       expect(
-        (settings.user.settings as TestSettings)['nestedObj']['nestedString'],
+        (
+          (settings.user.settings as TestSettings).nestedObj as {
+            [key: string]: unknown;
+          }
+        )['nestedString'],
       ).toBe('literal');
       expect(
-        (settings.user.settings as TestSettings)['nestedObj']['anotherEnv'],
+        (
+          (settings.user.settings as TestSettings).nestedObj as {
+            [key: string]: unknown;
+          }
+        )['anotherEnv'],
       ).toBe('env_string_nested_value');
 
       delete process.env['MY_ENV_STRING'];
@@ -1864,6 +1919,9 @@ describe('Settings Loading and Merging', () => {
           advanced: {
             excludedEnvVars: [],
           },
+          experimental: {},
+          contentGenerator: {},
+          systemPromptMappings: {},
           extensions: {
             disabled: [],
             workspacesWithMigrationNudge: [],
@@ -2336,14 +2394,14 @@ describe('Settings Loading and Merging', () => {
           vimMode: false,
         },
         model: {
-          maxSessionTurns: 0,
+          maxSessionTurns: -1,
         },
         context: {
           includeDirectories: [],
         },
         security: {
           folderTrust: {
-            enabled: null,
+            enabled: false,
           },
         },
       };
@@ -2352,9 +2410,9 @@ describe('Settings Loading and Merging', () => {
 
       expect(v1Settings).toEqual({
         vimMode: false,
-        maxSessionTurns: 0,
+        maxSessionTurns: -1,
         includeDirectories: [],
-        folderTrust: null,
+        folderTrust: false,
       });
     });
 
diff --git a/packages/cli/src/config/settings.ts b/packages/cli/src/config/settings.ts
index f3c5a2d6..b22df887 100644
--- a/packages/cli/src/config/settings.ts
+++ b/packages/cli/src/config/settings.ts
@@ -396,6 +396,24 @@ function mergeSettings(
         ]),
       ],
     },
+    experimental: {
+      ...(systemDefaults.experimental || {}),
+      ...(user.experimental || {}),
+      ...(safeWorkspaceWithoutFolderTrust.experimental || {}),
+      ...(system.experimental || {}),
+    },
+    contentGenerator: {
+      ...(systemDefaults.contentGenerator || {}),
+      ...(user.contentGenerator || {}),
+      ...(safeWorkspaceWithoutFolderTrust.contentGenerator || {}),
+      ...(system.contentGenerator || {}),
+    },
+    systemPromptMappings: {
+      ...(systemDefaults.systemPromptMappings || {}),
+      ...(user.systemPromptMappings || {}),
+      ...(safeWorkspaceWithoutFolderTrust.systemPromptMappings || {}),
+      ...(system.systemPromptMappings || {}),
+    },
     extensions: {
       ...(systemDefaults.extensions || {}),
       ...(user.extensions || {}),
diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
index c7f1e94e..815b5c58 100644
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -746,11 +746,21 @@ export const SETTINGS_SCHEMA = {
         label: 'Vision Model Preview',
         category: 'Experimental',
         requiresRestart: false,
-        default: false,
+        default: true,
         description:
           'Enable vision model support and auto-switching functionality. When disabled, vision models like qwen-vl-max-latest will be hidden and auto-switching will not occur.',
         showInDialog: true,
       },
+      vlmSwitchMode: {
+        type: 'string',
+        label: 'VLM Switch Mode',
+        category: 'Experimental',
+        requiresRestart: false,
+        default: undefined as string | undefined,
+        description:
+          'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). If not set, user will be prompted each time. This is a temporary experimental feature.',
+        showInDialog: false,
+      },
     },
   },
 
diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx
index 85691182..26090018 100644
--- a/packages/cli/src/ui/App.tsx
+++ b/packages/cli/src/ui/App.tsx
@@ -566,7 +566,9 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
       }
 
       // Switch model for future use but return false to stop current retry
-      config.setModel(fallbackModel);
+      config.setModel(fallbackModel).catch((error) => {
+        console.error('Failed to switch to fallback model:', error);
+      });
       config.setFallbackMode(true);
       logFlashFallback(
         config,
@@ -650,17 +652,28 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
   }, []);
 
   const handleModelSelect = useCallback(
-    (modelId: string) => {
-      config.setModel(modelId);
-      setCurrentModel(modelId);
-      setIsModelSelectionDialogOpen(false);
-      addItem(
-        {
-          type: MessageType.INFO,
-          text: `Switched model to \`${modelId}\` for this session.`,
-        },
-        Date.now(),
-      );
+    async (modelId: string) => {
+      try {
+        await config.setModel(modelId);
+        setCurrentModel(modelId);
+        setIsModelSelectionDialogOpen(false);
+        addItem(
+          {
+            type: MessageType.INFO,
+            text: `Switched model to \`${modelId}\` for this session.`,
+          },
+          Date.now(),
+        );
+      } catch (error) {
+        console.error('Failed to switch model:', error);
+        addItem(
+          {
+            type: MessageType.ERROR,
+            text: `Failed to switch to model \`${modelId}\`. Please try again.`,
+          },
+          Date.now(),
+        );
+      }
     },
     [config, setCurrentModel, addItem],
   );
@@ -670,7 +683,7 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
     if (!contentGeneratorConfig) return [];
 
     const visionModelPreviewEnabled =
-      settings.merged.experimental?.visionModelPreview ?? false;
+      settings.merged.experimental?.visionModelPreview ?? true;
 
     switch (contentGeneratorConfig.authType) {
       case AuthType.QWEN_OAUTH:
@@ -759,7 +772,7 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
     setModelSwitchedFromQuotaError,
     refreshStatic,
     () => cancelHandlerRef.current(),
-    settings.merged.experimental?.visionModelPreview ?? false,
+    settings.merged.experimental?.visionModelPreview ?? true,
     handleVisionSwitchRequired,
   );
 
diff --git a/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx b/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx
index f26dcc55..aab45cc2 100644
--- a/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx
+++ b/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx
@@ -46,8 +46,8 @@ describe('ModelSwitchDialog', () => {
         value: VisionSwitchOutcome.SwitchSessionToVL,
       },
       {
-        label: 'Do not switch, show guidance',
-        value: VisionSwitchOutcome.DisallowWithGuidance,
+        label: 'Continue with current model',
+        value: VisionSwitchOutcome.ContinueWithCurrentModel,
       },
     ];
 
@@ -81,18 +81,18 @@ describe('ModelSwitchDialog', () => {
     );
   });
 
-  it('should call onSelect with DisallowWithGuidance when third option is selected', () => {
+  it('should call onSelect with ContinueWithCurrentModel when third option is selected', () => {
     render(<ModelSwitchDialog onSelect={mockOnSelect} />);
 
     const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
-    onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
+    onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);
 
     expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
     );
   });
 
-  it('should setup escape key handler to call onSelect with DisallowWithGuidance', () => {
+  it('should setup escape key handler to call onSelect with ContinueWithCurrentModel', () => {
     render(<ModelSwitchDialog onSelect={mockOnSelect} />);
 
     expect(mockUseKeypress).toHaveBeenCalledWith(expect.any(Function), {
@@ -104,7 +104,7 @@ describe('ModelSwitchDialog', () => {
     keypressHandler({ name: 'escape' });
 
     expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
     );
   });
 
@@ -126,13 +126,9 @@ describe('ModelSwitchDialog', () => {
 
   describe('VisionSwitchOutcome enum', () => {
     it('should have correct enum values', () => {
-      expect(VisionSwitchOutcome.SwitchOnce).toBe('switch_once');
-      expect(VisionSwitchOutcome.SwitchSessionToVL).toBe(
-        'switch_session_to_vl',
-      );
-      expect(VisionSwitchOutcome.DisallowWithGuidance).toBe(
-        'disallow_with_guidance',
-      );
+      expect(VisionSwitchOutcome.SwitchOnce).toBe('once');
+      expect(VisionSwitchOutcome.SwitchSessionToVL).toBe('session');
+      expect(VisionSwitchOutcome.ContinueWithCurrentModel).toBe('persist');
     });
   });
 
@@ -144,7 +140,7 @@ describe('ModelSwitchDialog', () => {
     // Call multiple times
     onSelectCallback(VisionSwitchOutcome.SwitchOnce);
     onSelectCallback(VisionSwitchOutcome.SwitchSessionToVL);
-    onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
+    onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);
 
     expect(mockOnSelect).toHaveBeenCalledTimes(3);
     expect(mockOnSelect).toHaveBeenNthCalledWith(
@@ -157,7 +153,7 @@ describe('ModelSwitchDialog', () => {
     );
     expect(mockOnSelect).toHaveBeenNthCalledWith(
       3,
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
     );
   });
 
@@ -179,7 +175,7 @@ describe('ModelSwitchDialog', () => {
 
     expect(mockOnSelect).toHaveBeenCalledTimes(2);
     expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
     );
   });
 });
diff --git a/packages/cli/src/ui/components/ModelSwitchDialog.tsx b/packages/cli/src/ui/components/ModelSwitchDialog.tsx
index 1a8c73d4..f2993c47 100644
--- a/packages/cli/src/ui/components/ModelSwitchDialog.tsx
+++ b/packages/cli/src/ui/components/ModelSwitchDialog.tsx
@@ -14,9 +14,9 @@ import {
 import { useKeypress } from '../hooks/useKeypress.js';
 
 export enum VisionSwitchOutcome {
-  SwitchOnce = 'switch_once',
-  SwitchSessionToVL = 'switch_session_to_vl',
-  DisallowWithGuidance = 'disallow_with_guidance',
+  SwitchOnce = 'once',
+  SwitchSessionToVL = 'session',
+  ContinueWithCurrentModel = 'persist',
 }
 
 export interface ModelSwitchDialogProps {
@@ -29,7 +29,7 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
   useKeypress(
     (key) => {
       if (key.name === 'escape') {
-        onSelect(VisionSwitchOutcome.DisallowWithGuidance);
+        onSelect(VisionSwitchOutcome.ContinueWithCurrentModel);
       }
     },
     { isActive: true },
@@ -45,8 +45,8 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
       value: VisionSwitchOutcome.SwitchSessionToVL,
     },
     {
-      label: 'Do not switch, show guidance',
-      value: VisionSwitchOutcome.DisallowWithGuidance,
+      label: 'Continue with current model',
+      value: VisionSwitchOutcome.ContinueWithCurrentModel,
     },
   ];
 
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
index 125620cf..57da20c1 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -60,7 +60,9 @@ const mockParseAndFormatApiError = vi.hoisted(() => vi.fn());
 const mockHandleVisionSwitch = vi.hoisted(() =>
   vi.fn().mockResolvedValue({ shouldProceed: true }),
 );
-const mockRestoreOriginalModel = vi.hoisted(() => vi.fn());
+const mockRestoreOriginalModel = vi.hoisted(() =>
+  vi.fn().mockResolvedValue(undefined),
+);
 
 vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => {
   const actualCoreModule = (await importOriginal()) as any;
@@ -301,6 +303,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         );
       },
       {
@@ -462,6 +466,8 @@ describe('useGeminiStream', () => {
         () => {},
         () => {},
         () => {},
+        false, // visionModelPreviewEnabled
+        undefined, // onVisionSwitchRequired (optional)
       ),
     );
 
@@ -541,6 +547,8 @@ describe('useGeminiStream', () => {
         () => {},
         () => {},
         () => {},
+        false, // visionModelPreviewEnabled
+        undefined, // onVisionSwitchRequired (optional)
       ),
     );
 
@@ -649,6 +657,8 @@ describe('useGeminiStream', () => {
         () => {},
         () => {},
         () => {},
+        false, // visionModelPreviewEnabled
+        undefined, // onVisionSwitchRequired (optional)
       ),
     );
 
@@ -758,6 +768,8 @@ describe('useGeminiStream', () => {
         () => {},
         () => {},
         () => {},
+        false, // visionModelPreviewEnabled
+        undefined, // onVisionSwitchRequired (optional)
       ),
     );
 
@@ -887,6 +899,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           cancelSubmitSpy,
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -1198,6 +1212,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -1251,6 +1267,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -1301,6 +1319,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -1349,6 +1369,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -1398,6 +1420,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -1487,6 +1511,8 @@ describe('useGeminiStream', () => {
             () => {},
             () => {},
             () => {},
+            false, // visionModelPreviewEnabled
+            undefined, // onVisionSwitchRequired (optional)
           ),
         );
 
@@ -1537,6 +1563,8 @@ describe('useGeminiStream', () => {
         vi.fn(), // setModelSwitched
         vi.fn(), // onEditorClose
         vi.fn(), // onCancelSubmit
+        false, // visionModelPreviewEnabled
+        undefined, // onVisionSwitchRequired (optional)
       ),
     );
 
@@ -1602,6 +1630,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -1680,6 +1710,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -1734,6 +1766,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -1943,6 +1977,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -1975,6 +2011,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -2028,6 +2066,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
@@ -2065,6 +2105,8 @@ describe('useGeminiStream', () => {
           () => {},
           () => {},
           () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
         ),
       );
 
diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts
index 7f34eaa2..5bac2c41 100644
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -89,7 +89,7 @@ export const useGeminiStream = (
   setModelSwitchedFromQuotaError: React.Dispatch<React.SetStateAction<boolean>>,
   onEditorClose: () => void,
   onCancelSubmit: () => void,
-  visionModelPreviewEnabled: boolean = false,
+  visionModelPreviewEnabled: boolean,
   onVisionSwitchRequired?: (query: PartListUnion) => Promise<{
     modelOverride?: string;
     persistSessionModel?: string;
@@ -765,7 +765,9 @@ export const useGeminiStream = (
 
         if (processingStatus === StreamProcessingStatus.UserCancelled) {
           // Restore original model if it was temporarily overridden
-          restoreOriginalModel();
+          restoreOriginalModel().catch((error) => {
+            console.error('Failed to restore original model:', error);
+          });
           isSubmittingQueryRef.current = false;
           return;
         }
@@ -780,10 +782,14 @@ export const useGeminiStream = (
         }
 
         // Restore original model if it was temporarily overridden
-        restoreOriginalModel();
+        restoreOriginalModel().catch((error) => {
+          console.error('Failed to restore original model:', error);
+        });
       } catch (error: unknown) {
         // Restore original model if it was temporarily overridden
-        restoreOriginalModel();
+        restoreOriginalModel().catch((error) => {
+          console.error('Failed to restore original model:', error);
+        });
 
         if (error instanceof UnauthorizedError) {
           onAuthError();
diff --git a/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts b/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
index dd8c6a06..c04a2404 100644
--- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
+++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
@@ -8,7 +8,7 @@
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { renderHook, act } from '@testing-library/react';
 import type { Part, PartListUnion } from '@google/genai';
-import { AuthType, type Config } from '@qwen-code/qwen-code-core';
+import { AuthType, type Config, ApprovalMode } from '@qwen-code/qwen-code-core';
 import {
   shouldOfferVisionSwitch,
   processVisionSwitchOutcome,
@@ -41,7 +41,7 @@ describe('useVisionAutoSwitch helpers', () => {
       const result = shouldOfferVisionSwitch(
         parts,
         AuthType.QWEN_OAUTH,
-        'qwen-vl-max-latest',
+        'vision-model',
         true,
       );
       expect(result).toBe(false);
@@ -108,6 +108,56 @@ describe('useVisionAutoSwitch helpers', () => {
       );
       expect(result).toBe(false);
     });
+
+    it('returns true when image parts exist in YOLO mode context', () => {
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+      const result = shouldOfferVisionSwitch(
+        parts,
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        true,
+      );
+      expect(result).toBe(true);
+    });
+
+    it('returns false when no image parts exist in YOLO mode context', () => {
+      const parts: PartListUnion = [{ text: 'just text' }];
+      const result = shouldOfferVisionSwitch(
+        parts,
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        true,
+      );
+      expect(result).toBe(false);
+    });
+
+    it('returns false when already using vision model in YOLO mode context', () => {
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+      const result = shouldOfferVisionSwitch(
+        parts,
+        AuthType.QWEN_OAUTH,
+        'vision-model',
+        true,
+      );
+      expect(result).toBe(false);
+    });
+
+    it('returns false when authType is not QWEN_OAUTH in YOLO mode context', () => {
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+      const result = shouldOfferVisionSwitch(
+        parts,
+        AuthType.USE_GEMINI,
+        'qwen3-coder-plus',
+        true,
+      );
+      expect(result).toBe(false);
+    });
   });
 
   describe('processVisionSwitchOutcome', () => {
@@ -125,11 +175,11 @@ describe('useVisionAutoSwitch helpers', () => {
       expect(result).toEqual({ persistSessionModel: vl });
     });
 
-    it('maps DisallowWithGuidance to showGuidance', () => {
+    it('maps ContinueWithCurrentModel to empty result', () => {
       const result = processVisionSwitchOutcome(
-        VisionSwitchOutcome.DisallowWithGuidance,
+        VisionSwitchOutcome.ContinueWithCurrentModel,
       );
-      expect(result).toEqual({ showGuidance: true });
+      expect(result).toEqual({});
     });
   });
 
@@ -151,13 +201,20 @@ describe('useVisionAutoSwitch hook', () => {
     ts: number,
   ) => any;
 
-  const createMockConfig = (authType: AuthType, initialModel: string) => {
+  const createMockConfig = (
+    authType: AuthType,
+    initialModel: string,
+    approvalMode: ApprovalMode = ApprovalMode.DEFAULT,
+    vlmSwitchMode?: string,
+  ) => {
     let currentModel = initialModel;
     const mockConfig: Partial<Config> = {
       getModel: vi.fn(() => currentModel),
-      setModel: vi.fn((m: string) => {
+      setModel: vi.fn(async (m: string) => {
         currentModel = m;
       }),
+      getApprovalMode: vi.fn(() => approvalMode),
+      getVlmSwitchMode: vi.fn(() => vlmSwitchMode),
       getContentGeneratorConfig: vi.fn(() => ({
         authType,
         model: currentModel,
@@ -226,11 +283,9 @@ describe('useVisionAutoSwitch hook', () => {
     expect(onVisionSwitchRequired).not.toHaveBeenCalled();
   });
 
-  it('shows guidance and blocks when dialog returns showGuidance', async () => {
+  it('continues with current model when dialog returns empty result', async () => {
     const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
-    const onVisionSwitchRequired = vi
-      .fn()
-      .mockResolvedValue({ showGuidance: true });
+    const onVisionSwitchRequired = vi.fn().mockResolvedValue({}); // Empty result for ContinueWithCurrentModel
     const { result } = renderHook(() =>
       useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
     );
@@ -245,11 +300,12 @@ describe('useVisionAutoSwitch hook', () => {
       res = await result.current.handleVisionSwitch(parts, userTs, false);
     });
 
-    expect(addItem).toHaveBeenCalledWith(
+    // Should not add any guidance message
+    expect(addItem).not.toHaveBeenCalledWith(
       { type: MessageType.INFO, text: getVisionSwitchGuidanceMessage() },
       userTs,
     );
-    expect(res).toEqual({ shouldProceed: false });
+    expect(res).toEqual({ shouldProceed: true });
     expect(config.setModel).not.toHaveBeenCalled();
   });
 
@@ -258,7 +314,7 @@ describe('useVisionAutoSwitch hook', () => {
     const config = createMockConfig(AuthType.QWEN_OAUTH, initialModel);
     const onVisionSwitchRequired = vi
       .fn()
-      .mockResolvedValue({ modelOverride: 'qwen-vl-max-latest' });
+      .mockResolvedValue({ modelOverride: 'coder-model' });
     const { result } = renderHook(() =>
       useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
     );
@@ -273,20 +329,26 @@ describe('useVisionAutoSwitch hook', () => {
     });
 
     expect(res).toEqual({ shouldProceed: true, originalModel: initialModel });
-    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
+    expect(config.setModel).toHaveBeenCalledWith('coder-model', {
+      reason: 'vision_auto_switch',
+      context: 'User-prompted vision switch (one-time override)',
+    });
 
     // Now restore
-    act(() => {
-      result.current.restoreOriginalModel();
+    await act(async () => {
+      await result.current.restoreOriginalModel();
+    });
+    expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
+      reason: 'vision_auto_switch',
+      context: 'Restoring original model after vision switch',
     });
-    expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
   });
 
   it('persists session model when dialog requests persistence', async () => {
     const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
     const onVisionSwitchRequired = vi
       .fn()
-      .mockResolvedValue({ persistSessionModel: 'qwen-vl-max-latest' });
+      .mockResolvedValue({ persistSessionModel: 'coder-model' });
     const { result } = renderHook(() =>
       useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
     );
@@ -301,16 +363,17 @@ describe('useVisionAutoSwitch hook', () => {
     });
 
     expect(res).toEqual({ shouldProceed: true });
-    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
+    expect(config.setModel).toHaveBeenCalledWith('coder-model', {
+      reason: 'vision_auto_switch',
+      context: 'User-prompted vision switch (session persistent)',
+    });
 
     // Restore should be a no-op since no one-time override was used
-    act(() => {
-      result.current.restoreOriginalModel();
+    await act(async () => {
+      await result.current.restoreOriginalModel();
     });
     // Last call should still be the persisted model set
-    expect((config.setModel as any).mock.calls.pop()?.[0]).toBe(
-      'qwen-vl-max-latest',
-    );
+    expect((config.setModel as any).mock.calls.pop()?.[0]).toBe('coder-model');
   });
 
   it('returns shouldProceed=true when dialog returns no special flags', async () => {
@@ -371,4 +434,420 @@ describe('useVisionAutoSwitch hook', () => {
     expect(res).toEqual({ shouldProceed: true });
     expect(onVisionSwitchRequired).not.toHaveBeenCalled();
   });
+
+  describe('YOLO mode behavior', () => {
+    it('automatically switches to vision model in YOLO mode without showing dialog', async () => {
+      const initialModel = 'qwen3-coder-plus';
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        initialModel,
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called in YOLO mode
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 7070, false);
+      });
+
+      // Should automatically switch without calling the dialog
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+      expect(res).toEqual({
+        shouldProceed: true,
+        originalModel: initialModel,
+      });
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
+        reason: 'vision_auto_switch',
+        context: 'YOLO mode auto-switch for image content',
+      });
+    });
+
+    it('does not switch in YOLO mode when no images are present', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [{ text: 'no images here' }];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 8080, false);
+      });
+
+      expect(res).toEqual({ shouldProceed: true });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+      expect(config.setModel).not.toHaveBeenCalled();
+    });
+
+    it('does not switch in YOLO mode when already using vision model', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'vision-model',
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 9090, false);
+      });
+
+      expect(res).toEqual({ shouldProceed: true });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+      expect(config.setModel).not.toHaveBeenCalled();
+    });
+
+    it('restores original model after YOLO mode auto-switch', async () => {
+      const initialModel = 'qwen3-coder-plus';
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        initialModel,
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+
+      // First, trigger the auto-switch
+      await act(async () => {
+        await result.current.handleVisionSwitch(parts, 10100, false);
+      });
+
+      // Verify model was switched
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
+        reason: 'vision_auto_switch',
+        context: 'YOLO mode auto-switch for image content',
+      });
+
+      // Now restore the original model
+      await act(async () => {
+        await result.current.restoreOriginalModel();
+      });
+
+      // Verify model was restored
+      expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
+        reason: 'vision_auto_switch',
+        context: 'Restoring original model after vision switch',
+      });
+    });
+
+    it('does not switch in YOLO mode when authType is not QWEN_OAUTH', async () => {
+      const config = createMockConfig(
+        AuthType.USE_GEMINI,
+        'qwen3-coder-plus',
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 11110, false);
+      });
+
+      expect(res).toEqual({ shouldProceed: true });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+      expect(config.setModel).not.toHaveBeenCalled();
+    });
+
+    it('does not switch in YOLO mode when visionModelPreviewEnabled is false', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          false,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 12120, false);
+      });
+
+      expect(res).toEqual({ shouldProceed: true });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+      expect(config.setModel).not.toHaveBeenCalled();
+    });
+
+    it('handles multiple image formats in YOLO mode', async () => {
+      const initialModel = 'qwen3-coder-plus';
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        initialModel,
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { text: 'Here are some images:' },
+        { inlineData: { mimeType: 'image/jpeg', data: '...' } },
+        { fileData: { mimeType: 'image/png', fileUri: 'file://image.png' } },
+        { text: 'Please analyze them.' },
+      ];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 13130, false);
+      });
+
+      expect(res).toEqual({
+        shouldProceed: true,
+        originalModel: initialModel,
+      });
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
+        reason: 'vision_auto_switch',
+        context: 'YOLO mode auto-switch for image content',
+      });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('VLM switch mode default behavior', () => {
+    it('should automatically switch once when vlmSwitchMode is "once"', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'once',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBe('qwen3-coder-plus');
+      expect(config.setModel).toHaveBeenCalledWith('vision-model', {
+        reason: 'vision_auto_switch',
+        context: 'Default VLM switch mode: once (one-time override)',
+      });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+
+    it('should switch session when vlmSwitchMode is "session"', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'session',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBeUndefined(); // No original model for session switch
+      expect(config.setModel).toHaveBeenCalledWith('vision-model', {
+        reason: 'vision_auto_switch',
+        context: 'Default VLM switch mode: session (session persistent)',
+      });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+
+    it('should continue with current model when vlmSwitchMode is "persist"', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'persist',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBeUndefined();
+      expect(config.setModel).not.toHaveBeenCalled();
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+
+    it('should fall back to user prompt when vlmSwitchMode is not set', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        undefined, // No default mode
+      );
+      const onVisionSwitchRequired = vi
+        .fn()
+        .mockResolvedValue({ modelOverride: 'vision-model' });
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(onVisionSwitchRequired).toHaveBeenCalledWith(parts);
+    });
+
+    it('should fall back to persist behavior when vlmSwitchMode has invalid value', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'invalid-value',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBeUndefined();
+      // For invalid values, it should continue with current model (persist behavior)
+      expect(config.setModel).not.toHaveBeenCalled();
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+  });
 });
diff --git a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
index d4b9629c..f489c843 100644
--- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
+++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
@@ -5,7 +5,7 @@
  */
 
 import { type PartListUnion, type Part } from '@google/genai';
-import { AuthType, type Config } from '@qwen-code/qwen-code-core';
+import { AuthType, type Config, ApprovalMode } from '@qwen-code/qwen-code-core';
 import { useCallback, useRef } from 'react';
 import { VisionSwitchOutcome } from '../components/ModelSwitchDialog.js';
 import {
@@ -121,7 +121,7 @@ export function shouldOfferVisionSwitch(
   parts: PartListUnion,
   authType: AuthType,
   currentModel: string,
-  visionModelPreviewEnabled: boolean = false,
+  visionModelPreviewEnabled: boolean = true,
 ): boolean {
   // Only trigger for qwen-oauth
   if (authType !== AuthType.QWEN_OAUTH) {
@@ -166,11 +166,11 @@ export function processVisionSwitchOutcome(
     case VisionSwitchOutcome.SwitchSessionToVL:
       return { persistSessionModel: vlModelId };
 
-    case VisionSwitchOutcome.DisallowWithGuidance:
-      return { showGuidance: true };
+    case VisionSwitchOutcome.ContinueWithCurrentModel:
+      return {}; // Continue with current model, no changes needed
 
     default:
-      return { showGuidance: true };
+      return {}; // Default to continuing with current model
   }
 }
 
@@ -198,7 +198,7 @@ export interface VisionSwitchHandlingResult {
 export function useVisionAutoSwitch(
   config: Config,
   addItem: UseHistoryManagerReturn['addItem'],
-  visionModelPreviewEnabled: boolean = false,
+  visionModelPreviewEnabled: boolean = true,
   onVisionSwitchRequired?: (query: PartListUnion) => Promise<{
     modelOverride?: string;
     persistSessionModel?: string;
@@ -252,35 +252,91 @@ export function useVisionAutoSwitch(
         return { shouldProceed: true };
       }
 
-      try {
-        const visionSwitchResult = await onVisionSwitchRequired(query);
+      // In YOLO mode, automatically switch to vision model without user interaction
+      if (config.getApprovalMode() === ApprovalMode.YOLO) {
+        const vlModelId = getDefaultVisionModel();
+        originalModelRef.current = config.getModel();
+        await config.setModel(vlModelId, {
+          reason: 'vision_auto_switch',
+          context: 'YOLO mode auto-switch for image content',
+        });
+        return {
+          shouldProceed: true,
+          originalModel: originalModelRef.current,
+        };
+      }
 
-        if (visionSwitchResult.showGuidance) {
-          // Show guidance and don't proceed with the request
-          addItem(
-            {
-              type: MessageType.INFO,
-              text: getVisionSwitchGuidanceMessage(),
-            },
-            userMessageTimestamp,
-          );
-          return { shouldProceed: false };
+      // Check if there's a default VLM switch mode configured
+      const defaultVlmSwitchMode = config.getVlmSwitchMode();
+      if (defaultVlmSwitchMode) {
+        // Convert string value to VisionSwitchOutcome enum
+        let outcome: VisionSwitchOutcome;
+        switch (defaultVlmSwitchMode) {
+          case 'once':
+            outcome = VisionSwitchOutcome.SwitchOnce;
+            break;
+          case 'session':
+            outcome = VisionSwitchOutcome.SwitchSessionToVL;
+            break;
+          case 'persist':
+            outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
+            break;
+          default:
+            // Invalid value, fall back to prompting user
+            outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
         }
 
+        // Process the default outcome
+        const visionSwitchResult = processVisionSwitchOutcome(outcome);
+
         if (visionSwitchResult.modelOverride) {
           // One-time model override
           originalModelRef.current = config.getModel();
-          config.setModel(visionSwitchResult.modelOverride);
+          await config.setModel(visionSwitchResult.modelOverride, {
+            reason: 'vision_auto_switch',
+            context: `Default VLM switch mode: ${defaultVlmSwitchMode} (one-time override)`,
+          });
           return {
             shouldProceed: true,
             originalModel: originalModelRef.current,
           };
         } else if (visionSwitchResult.persistSessionModel) {
           // Persistent session model change
-          config.setModel(visionSwitchResult.persistSessionModel);
+          await config.setModel(visionSwitchResult.persistSessionModel, {
+            reason: 'vision_auto_switch',
+            context: `Default VLM switch mode: ${defaultVlmSwitchMode} (session persistent)`,
+          });
           return { shouldProceed: true };
         }
 
+        // For ContinueWithCurrentModel or any other case, proceed with current model
+        return { shouldProceed: true };
+      }
+
+      try {
+        const visionSwitchResult = await onVisionSwitchRequired(query);
+
+        if (visionSwitchResult.modelOverride) {
+          // One-time model override
+          originalModelRef.current = config.getModel();
+          await config.setModel(visionSwitchResult.modelOverride, {
+            reason: 'vision_auto_switch',
+            context: 'User-prompted vision switch (one-time override)',
+          });
+          return {
+            shouldProceed: true,
+            originalModel: originalModelRef.current,
+          };
+        } else if (visionSwitchResult.persistSessionModel) {
+          // Persistent session model change
+          await config.setModel(visionSwitchResult.persistSessionModel, {
+            reason: 'vision_auto_switch',
+            context: 'User-prompted vision switch (session persistent)',
+          });
+          return { shouldProceed: true };
+        }
+
+        // For ContinueWithCurrentModel or any other case, proceed with current model
         return { shouldProceed: true };
       } catch (_error) {
         // If vision switch dialog was cancelled or errored, don't proceed
@@ -290,9 +346,12 @@ export function useVisionAutoSwitch(
     [config, addItem, visionModelPreviewEnabled, onVisionSwitchRequired],
   );
 
-  const restoreOriginalModel = useCallback(() => {
+  const restoreOriginalModel = useCallback(async () => {
     if (originalModelRef.current) {
-      config.setModel(originalModelRef.current);
+      await config.setModel(originalModelRef.current, {
+        reason: 'vision_auto_switch',
+        context: 'Restoring original model after vision switch',
+      });
       originalModelRef.current = null;
     }
   }, [config]);
diff --git a/packages/cli/src/ui/models/availableModels.ts b/packages/cli/src/ui/models/availableModels.ts
index 7c3a1cf5..9ac4d420 100644
--- a/packages/cli/src/ui/models/availableModels.ts
+++ b/packages/cli/src/ui/models/availableModels.ts
@@ -10,9 +10,12 @@ export type AvailableModel = {
   isVision?: boolean;
 };
 
+export const MAINLINE_VLM = 'vision-model';
+export const MAINLINE_CODER = 'coder-model';
+
 export const AVAILABLE_MODELS_QWEN: AvailableModel[] = [
-  { id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' },
-  { id: 'qwen-vl-max-latest', label: 'qwen-vl-max', isVision: true },
+  { id: MAINLINE_CODER, label: MAINLINE_CODER },
+  { id: MAINLINE_VLM, label: MAINLINE_VLM, isVision: true },
 ];
 
 /**
@@ -42,7 +45,7 @@ export function getOpenAIAvailableModelFromEnv(): AvailableModel | null {
  * until our coding model supports multimodal.
  */
 export function getDefaultVisionModel(): string {
-  return 'qwen-vl-max-latest';
+  return MAINLINE_VLM;
 }
 
 export function isVisionModel(modelId: string): boolean {
diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts
index 8d18b89a..5d83ce20 100644
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@@ -737,4 +737,85 @@ describe('setApprovalMode with folder trust', () => {
     expect(() => config.setApprovalMode(ApprovalMode.AUTO_EDIT)).not.toThrow();
     expect(() => config.setApprovalMode(ApprovalMode.DEFAULT)).not.toThrow();
   });
+
+  describe('Model Switch Logging', () => {
+    it('should log model switch when setModel is called with different model', async () => {
+      const config = new Config({
+        sessionId: 'test-model-switch',
+        targetDir: '.',
+        debugMode: false,
+        model: 'qwen3-coder-plus',
+        cwd: '.',
+      });
+
+      // Initialize the config to set up content generator
+      await config.initialize();
+
+      // Mock the logger's logModelSwitch method
+      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
+
+      // Change the model
+      await config.setModel('qwen-vl-max-latest', {
+        reason: 'vision_auto_switch',
+        context: 'Test model switch',
+      });
+
+      // Verify that logModelSwitch was called with correct parameters
+      expect(logModelSwitchSpy).toHaveBeenCalledWith({
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'vision_auto_switch',
+        context: 'Test model switch',
+      });
+    });
+
+    it('should not log when setModel is called with same model', async () => {
+      const config = new Config({
+        sessionId: 'test-same-model',
+        targetDir: '.',
+        debugMode: false,
+        model: 'qwen3-coder-plus',
+        cwd: '.',
+      });
+
+      // Initialize the config to set up content generator
+      await config.initialize();
+
+      // Mock the logger's logModelSwitch method
+      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
+
+      // Set the same model
+      await config.setModel('qwen3-coder-plus');
+
+      // Verify that logModelSwitch was not called
+      expect(logModelSwitchSpy).not.toHaveBeenCalled();
+    });
+
+    it('should use default reason when no options provided', async () => {
+      const config = new Config({
+        sessionId: 'test-default-reason',
+        targetDir: '.',
+        debugMode: false,
+        model: 'qwen3-coder-plus',
+        cwd: '.',
+      });
+
+      // Initialize the config to set up content generator
+      await config.initialize();
+
+      // Mock the logger's logModelSwitch method
+      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
+
+      // Change the model without options
+      await config.setModel('qwen-vl-max-latest');
+
+      // Verify that logModelSwitch was called with default reason
+      expect(logModelSwitchSpy).toHaveBeenCalledWith({
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'manual',
+        context: undefined,
+      });
+    });
+  });
 });
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 6956fb06..9ff19919 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -56,6 +56,7 @@ import {
   DEFAULT_GEMINI_FLASH_MODEL,
 } from './models.js';
 import { Storage } from './storage.js';
+import { Logger, type ModelSwitchEvent } from '../core/logger.js';
 
 // Re-export OAuth config type
 export type { AnyToolInvocation, MCPOAuthConfig };
@@ -239,6 +240,7 @@ export interface ConfigParameters {
   extensionManagement?: boolean;
   enablePromptCompletion?: boolean;
   skipLoopDetection?: boolean;
+  vlmSwitchMode?: string;
 }
 
 export class Config {
@@ -330,9 +332,11 @@ export class Config {
   private readonly extensionManagement: boolean;
   private readonly enablePromptCompletion: boolean = false;
   private readonly skipLoopDetection: boolean;
+  private readonly vlmSwitchMode: string | undefined;
   private initialized: boolean = false;
   readonly storage: Storage;
   private readonly fileExclusions: FileExclusions;
+  private logger: Logger | null = null;
 
   constructor(params: ConfigParameters) {
     this.sessionId = params.sessionId;
@@ -424,8 +428,15 @@ export class Config {
     this.extensionManagement = params.extensionManagement ?? false;
     this.storage = new Storage(this.targetDir);
     this.enablePromptCompletion = params.enablePromptCompletion ?? false;
+    this.vlmSwitchMode = params.vlmSwitchMode;
     this.fileExclusions = new FileExclusions(this);
 
+    // Initialize logger asynchronously
+    this.logger = new Logger(this.sessionId, this.storage);
+    this.logger.initialize().catch((error) => {
+      console.debug('Failed to initialize logger:', error);
+    });
+
     if (params.contextFileName) {
       setGeminiMdFilename(params.contextFileName);
     }
@@ -517,21 +528,47 @@ export class Config {
     return this.contentGeneratorConfig?.model || this.model;
   }
 
-  setModel(newModel: string): void {
+  async setModel(
+    newModel: string,
+    options?: {
+      reason?: ModelSwitchEvent['reason'];
+      context?: string;
+    },
+  ): Promise<void> {
+    const oldModel = this.getModel();
+
     if (this.contentGeneratorConfig) {
       this.contentGeneratorConfig.model = newModel;
     }
 
+    // Log the model switch if the model actually changed
+    if (oldModel !== newModel && this.logger) {
+      const switchEvent: ModelSwitchEvent = {
+        fromModel: oldModel,
+        toModel: newModel,
+        reason: options?.reason || 'manual',
+        context: options?.context,
+      };
+
+      // Log asynchronously to avoid blocking
+      this.logger.logModelSwitch(switchEvent).catch((error) => {
+        console.debug('Failed to log model switch:', error);
+      });
+    }
+
     // Reinitialize chat with updated configuration while preserving history
     const geminiClient = this.getGeminiClient();
     if (geminiClient && geminiClient.isInitialized()) {
-      // Use async operation but don't await to avoid blocking
-      geminiClient.reinitialize().catch((error) => {
+      // Now await the reinitialize operation to ensure completion
+      try {
+        await geminiClient.reinitialize();
+      } catch (error) {
         console.error(
           'Failed to reinitialize chat with updated config:',
           error,
         );
-      });
+        throw error; // Re-throw to let callers handle the error
+      }
     }
   }
 
@@ -938,6 +975,10 @@ export class Config {
     return this.skipLoopDetection;
   }
 
+  getVlmSwitchMode(): string | undefined {
+    return this.vlmSwitchMode;
+  }
+
   async getGitService(): Promise<GitService> {
     if (!this.gitService) {
       this.gitService = new GitService(this.targetDir, this.storage);
diff --git a/packages/core/src/config/flashFallback.test.ts b/packages/core/src/config/flashFallback.test.ts
index a0034ea1..4173786c 100644
--- a/packages/core/src/config/flashFallback.test.ts
+++ b/packages/core/src/config/flashFallback.test.ts
@@ -41,7 +41,7 @@ describe('Flash Model Fallback Configuration', () => {
   // with the fallback mechanism. This will be necessary we introduce more
   // intelligent model routing.
   describe('setModel', () => {
-    it('should only mark as switched if contentGeneratorConfig exists', () => {
+    it('should only mark as switched if contentGeneratorConfig exists', async () => {
       // Create config without initializing contentGeneratorConfig
       const newConfig = new Config({
         sessionId: 'test-session-2',
@@ -52,15 +52,15 @@ describe('Flash Model Fallback Configuration', () => {
       });
 
       // Should not crash when contentGeneratorConfig is undefined
-      newConfig.setModel(DEFAULT_GEMINI_FLASH_MODEL);
+      await newConfig.setModel(DEFAULT_GEMINI_FLASH_MODEL);
       expect(newConfig.isInFallbackMode()).toBe(false);
     });
   });
 
   describe('getModel', () => {
-    it('should return contentGeneratorConfig model if available', () => {
+    it('should return contentGeneratorConfig model if available', async () => {
       // Simulate initialized content generator config
-      config.setModel(DEFAULT_GEMINI_FLASH_MODEL);
+      await config.setModel(DEFAULT_GEMINI_FLASH_MODEL);
       expect(config.getModel()).toBe(DEFAULT_GEMINI_FLASH_MODEL);
     });
 
@@ -88,8 +88,8 @@ describe('Flash Model Fallback Configuration', () => {
       expect(config.isInFallbackMode()).toBe(false);
     });
 
-    it('should persist switched state throughout session', () => {
-      config.setModel(DEFAULT_GEMINI_FLASH_MODEL);
+    it('should persist switched state throughout session', async () => {
+      await config.setModel(DEFAULT_GEMINI_FLASH_MODEL);
       // Setting state for fallback mode as is expected of clients
       config.setFallbackMode(true);
       expect(config.isInFallbackMode()).toBe(true);
diff --git a/packages/core/src/config/models.ts b/packages/core/src/config/models.ts
index 2a743dad..fd548737 100644
--- a/packages/core/src/config/models.ts
+++ b/packages/core/src/config/models.ts
@@ -4,11 +4,10 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-export const DEFAULT_QWEN_MODEL = 'qwen3-coder-plus';
-// We do not have a fallback model for now, but note it here anyway.
-export const DEFAULT_QWEN_FLASH_MODEL = 'qwen3-coder-flash';
+export const DEFAULT_QWEN_MODEL = 'coder-model';
+export const DEFAULT_QWEN_FLASH_MODEL = 'coder-model';
 
-export const DEFAULT_GEMINI_MODEL = 'qwen3-coder-plus';
+export const DEFAULT_GEMINI_MODEL = 'coder-model';
 export const DEFAULT_GEMINI_FLASH_MODEL = 'gemini-2.5-flash';
 export const DEFAULT_GEMINI_FLASH_LITE_MODEL = 'gemini-2.5-flash-lite';
 
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index ae0c4205..8b965001 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -1053,7 +1053,7 @@ export class GeminiClient {
           error,
         );
         if (accepted !== false && accepted !== null) {
-          this.config.setModel(fallbackModel);
+          await this.config.setModel(fallbackModel);
           this.config.setFallbackMode(true);
           return fallbackModel;
         }
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index bf8aa804..9f541601 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -224,7 +224,7 @@ export class GeminiChat {
           error,
         );
         if (accepted !== false && accepted !== null) {
-          this.config.setModel(fallbackModel);
+          await this.config.setModel(fallbackModel);
           this.config.setFallbackMode(true);
           return fallbackModel;
         }
diff --git a/packages/core/src/core/logger.test.ts b/packages/core/src/core/logger.test.ts
index 0b506b4c..29793a33 100644
--- a/packages/core/src/core/logger.test.ts
+++ b/packages/core/src/core/logger.test.ts
@@ -755,4 +755,84 @@ describe('Logger', () => {
       expect(logger['messageId']).toBe(0);
     });
   });
+
+  describe('Model Switch Logging', () => {
+    it('should log model switch events correctly', async () => {
+      const testSessionId = 'test-session-model-switch';
+      const logger = new Logger(testSessionId, new Storage(process.cwd()));
+      await logger.initialize();
+
+      const modelSwitchEvent = {
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'vision_auto_switch' as const,
+        context: 'YOLO mode auto-switch for image content',
+      };
+
+      await logger.logModelSwitch(modelSwitchEvent);
+
+      // Read the log file to verify the entry was written
+      const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8');
+      const logs: LogEntry[] = JSON.parse(logContent);
+
+      const modelSwitchLog = logs.find(
+        (log) =>
+          log.sessionId === testSessionId &&
+          log.type === MessageSenderType.MODEL_SWITCH,
+      );
+
+      expect(modelSwitchLog).toBeDefined();
+      expect(modelSwitchLog!.type).toBe(MessageSenderType.MODEL_SWITCH);
+
+      const loggedEvent = JSON.parse(modelSwitchLog!.message);
+      expect(loggedEvent.fromModel).toBe('qwen3-coder-plus');
+      expect(loggedEvent.toModel).toBe('qwen-vl-max-latest');
+      expect(loggedEvent.reason).toBe('vision_auto_switch');
+      expect(loggedEvent.context).toBe(
+        'YOLO mode auto-switch for image content',
+      );
+    });
+
+    it('should handle multiple model switch events', async () => {
+      const testSessionId = 'test-session-multiple-switches';
+      const logger = new Logger(testSessionId, new Storage(process.cwd()));
+      await logger.initialize();
+
+      // Log first switch
+      await logger.logModelSwitch({
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'vision_auto_switch',
+        context: 'Auto-switch for image',
+      });
+
+      // Log second switch (restore)
+      await logger.logModelSwitch({
+        fromModel: 'qwen-vl-max-latest',
+        toModel: 'qwen3-coder-plus',
+        reason: 'vision_auto_switch',
+        context: 'Restoring original model',
+      });
+
+      // Read the log file to verify both entries were written
+      const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8');
+      const logs: LogEntry[] = JSON.parse(logContent);
+
+      const modelSwitchLogs = logs.filter(
+        (log) =>
+          log.sessionId === testSessionId &&
+          log.type === MessageSenderType.MODEL_SWITCH,
+      );
+
+      expect(modelSwitchLogs).toHaveLength(2);
+
+      const firstSwitch = JSON.parse(modelSwitchLogs[0].message);
+      expect(firstSwitch.fromModel).toBe('qwen3-coder-plus');
+      expect(firstSwitch.toModel).toBe('qwen-vl-max-latest');
+
+      const secondSwitch = JSON.parse(modelSwitchLogs[1].message);
+      expect(secondSwitch.fromModel).toBe('qwen-vl-max-latest');
+      expect(secondSwitch.toModel).toBe('qwen3-coder-plus');
+    });
+  });
 });
diff --git a/packages/core/src/core/logger.ts b/packages/core/src/core/logger.ts
index a837b25d..4a9604b7 100644
--- a/packages/core/src/core/logger.ts
+++ b/packages/core/src/core/logger.ts
@@ -13,6 +13,7 @@ const LOG_FILE_NAME = 'logs.json';
 
 export enum MessageSenderType {
   USER = 'user',
+  MODEL_SWITCH = 'model_switch',
 }
 
 export interface LogEntry {
@@ -23,6 +24,13 @@ export interface LogEntry {
   message: string;
 }
 
+export interface ModelSwitchEvent {
+  fromModel: string;
+  toModel: string;
+  reason: 'vision_auto_switch' | 'manual' | 'fallback' | 'other';
+  context?: string;
+}
+
 // This regex matches any character that is NOT a letter (a-z, A-Z),
 // a number (0-9), a hyphen (-), an underscore (_), or a dot (.).
 
@@ -270,6 +278,17 @@ export class Logger {
     }
   }
 
+  async logModelSwitch(event: ModelSwitchEvent): Promise<void> {
+    const message = JSON.stringify({
+      fromModel: event.fromModel,
+      toModel: event.toModel,
+      reason: event.reason,
+      context: event.context,
+    });
+
+    await this.logMessage(MessageSenderType.MODEL_SWITCH, message);
+  }
+
   private _checkpointPath(tag: string): string {
     if (!tag.length) {
       throw new Error('No checkpoint tag specified.');
diff --git a/packages/core/src/core/prompts.ts b/packages/core/src/core/prompts.ts
index e18987a8..f08cbf75 100644
--- a/packages/core/src/core/prompts.ts
+++ b/packages/core/src/core/prompts.ts
@@ -820,6 +820,14 @@ function getToolCallExamples(model?: string): string {
     if (/qwen[^-]*-vl/i.test(model)) {
       return qwenVlToolCallExamples;
     }
+    // Match coder-model pattern (same as qwen3-coder)
+    if (/coder-model/i.test(model)) {
+      return qwenCoderToolCallExamples;
+    }
+    // Match vision-model pattern (same as qwen3-vl)
+    if (/vision-model/i.test(model)) {
+      return qwenVlToolCallExamples;
+    }
   }
 
   return generalToolCallExamples;
diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts
index 67ff6a86..6a3e7e86 100644
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -111,6 +111,12 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
   // Commercial Qwen3-Coder-Flash: 1M token context
   [/^qwen3-coder-flash(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-flash" and date variants
 
+  // Generic coder-model: same as qwen3-coder-plus (1M token context)
+  [/^coder-model$/, LIMITS['1m']],
+
+  // Commercial Qwen3-Max-Preview: 256K token context
+  [/^qwen3-max-preview(-.*)?$/, LIMITS['256k']], // catches "qwen3-max-preview" and date variants
+
   // Open-source Qwen3-Coder variants: 256K native
   [/^qwen3-coder-.*$/, LIMITS['256k']],
   // Open-source Qwen3 2507 variants: 256K native
@@ -131,6 +137,9 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
   // Qwen Vision Models
   [/^qwen-vl-max.*$/, LIMITS['128k']],
 
+  // Generic vision-model: same as qwen-vl-max (128K token context)
+  [/^vision-model$/, LIMITS['128k']],
+
   // -------------------
   // ByteDance Seed-OSS (512K)
   // -------------------
@@ -166,8 +175,20 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
   // Qwen3-Coder-Plus: 65,536 max output tokens
   [/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']],
 
+  // Generic coder-model: same as qwen3-coder-plus (64K max output tokens)
+  [/^coder-model$/, LIMITS['64k']],
+
+  // Qwen3-Max-Preview: 65,536 max output tokens
+  [/^qwen3-max-preview(-.*)?$/, LIMITS['64k']],
+
   // Qwen-VL-Max-Latest: 8,192 max output tokens
   [/^qwen-vl-max-latest$/, LIMITS['8k']],
+
+  // Generic vision-model: same as qwen-vl-max-latest (8K max output tokens)
+  [/^vision-model$/, LIMITS['8k']],
+
+  // Qwen3-VL-Plus: 8,192 max output tokens
+  [/^qwen3-vl-plus$/, LIMITS['8k']],
 ];
 
 /**
diff --git a/packages/core/src/subagents/subagent.test.ts b/packages/core/src/subagents/subagent.test.ts
index 0388f3e5..eabd0a9d 100644
--- a/packages/core/src/subagents/subagent.test.ts
+++ b/packages/core/src/subagents/subagent.test.ts
@@ -72,6 +72,19 @@ async function createMockConfig(
   } as unknown as ToolRegistry;
 
   vi.spyOn(config, 'getToolRegistry').mockReturnValue(mockToolRegistry);
+
+  // Mock getContentGeneratorConfig to return a valid config
+  vi.spyOn(config, 'getContentGeneratorConfig').mockReturnValue({
+    model: DEFAULT_GEMINI_MODEL,
+    authType: AuthType.USE_GEMINI,
+  });
+
+  // Mock setModel method
+  vi.spyOn(config, 'setModel').mockResolvedValue();
+
+  // Mock getSessionId method
+  vi.spyOn(config, 'getSessionId').mockReturnValue('test-session');
+
   return { config, toolRegistry: mockToolRegistry };
 }
 
diff --git a/packages/core/src/subagents/subagent.ts b/packages/core/src/subagents/subagent.ts
index 02cf0e73..19636b3c 100644
--- a/packages/core/src/subagents/subagent.ts
+++ b/packages/core/src/subagents/subagent.ts
@@ -826,7 +826,7 @@ export class SubAgentScope {
       );
 
       if (this.modelConfig.model) {
-        this.runtimeContext.setModel(this.modelConfig.model);
+        await this.runtimeContext.setModel(this.modelConfig.model);
       }
 
       return new GeminiChat(