feat: add yolo mode support to auto vision model switch (#652)

* feat: add yolo mode support to auto vision model switch * feat: add cli args & env variables for switch behavoir * fix: use dedicated model names and settings * docs: add vision model instructions * fix: failed test case * fix: setModel failure
2025-12-19 09:33:53 +00:00 · 2025-09-24 10:21:09 +08:00
parent 5ecb4a2430
commit 48d8587bf9
26 changed files with 1133 additions and 122 deletions
--- a/README.md
+++ b/README.md
@@ -54,6 +54,7 @@ For detailed setup instructions, see [Authorization](#authorization).
 - **Code Understanding & Editing** - Query and edit large codebases beyond traditional context window limits
 - **Workflow Automation** - Automate operational tasks like handling pull requests and complex rebases
 - **Enhanced Parser** - Adapted parser specifically optimized for Qwen-Coder models
+- **Vision Model Support** - Automatically detect images in your input and seamlessly switch to vision-capable models for multimodal analysis

 ## Installation

@@ -121,6 +122,58 @@ Create or edit `.qwen/settings.json` in your home directory:

 > 📝 **Note**: Session token limit applies to a single conversation, not cumulative API calls.

+### Vision Model Configuration
+
+Qwen Code includes intelligent vision model auto-switching that detects images in your input and can automatically switch to vision-capable models for multimodal analysis. **This feature is enabled by default** - when you include images in your queries, you'll see a dialog asking how you'd like to handle the vision model switch.
+
+#### Skip the Switch Dialog (Optional)
+
+If you don't want to see the interactive dialog each time, configure the default behavior in your `.qwen/settings.json`:
+
+```json
+{
+  "experimental": {
+    "vlmSwitchMode": "once"
+  }
+}
+```
+
+**Available modes:**
+
+- **`"once"`** - Switch to vision model for this query only, then revert
+- **`"session"`** - Switch to vision model for the entire session
+- **`"persist"`** - Continue with current model (no switching)
+- **Not set** - Show interactive dialog each time (default)
+
+#### Command Line Override
+
+You can also set the behavior via command line:
+
+```bash
+# Switch once per query
+qwen --vlm-switch-mode once
+
+# Switch for entire session
+qwen --vlm-switch-mode session
+
+# Never switch automatically
+qwen --vlm-switch-mode persist
+```
+
+#### Disable Vision Models (Optional)
+
+To completely disable vision model support, add to your `.qwen/settings.json`:
+
+```json
+{
+  "experimental": {
+    "visionModelPreview": false
+  }
+}
+```
+
+> 💡 **Tip**: In YOLO mode (`--yolo`), vision switching happens automatically without prompts when images are detected.
+
 ### Authorization

 Choose your preferred authentication method based on your needs:
--- a/packages/cli/src/config/config.test.ts
+++ b/packages/cli/src/config/config.test.ts
@@ -1514,7 +1514,7 @@ describe('loadCliConfig model selection', () => {
      argv,
    );

-    expect(config.getModel()).toBe('qwen3-coder-plus');
+    expect(config.getModel()).toBe('coder-model');
  });

  it('always prefers model from argvs', async () => {
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -82,6 +82,7 @@ export interface CliArgs {
  includeDirectories: string[] | undefined;
  tavilyApiKey: string | undefined;
  screenReader: boolean | undefined;
+  vlmSwitchMode: string | undefined;
 }

 export async function parseArguments(settings: Settings): Promise<CliArgs> {
@@ -249,6 +250,13 @@ export async function parseArguments(settings: Settings): Promise<CliArgs> {
          description: 'Enable screen reader mode for accessibility.',
          default: false,
        })
+        .option('vlm-switch-mode', {
+          type: 'string',
+          choices: ['once', 'session', 'persist'],
+          description:
+            'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). Overrides settings files.',
+          default: process.env['VLM_SWITCH_MODE'],
+        })
        .check((argv) => {
          if (argv.prompt && argv['promptInteractive']) {
            throw new Error(
@@ -524,6 +532,9 @@ export async function loadCliConfig(
    argv.screenReader !== undefined
      ? argv.screenReader
      : (settings.ui?.accessibility?.screenReader ?? false);
+
+  const vlmSwitchMode =
+    argv.vlmSwitchMode || settings.experimental?.vlmSwitchMode;
  return new Config({
    sessionId,
    embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL,
@@ -630,6 +641,7 @@ export async function loadCliConfig(
    skipNextSpeakerCheck: settings.model?.skipNextSpeakerCheck,
    enablePromptCompletion: settings.general?.enablePromptCompletion ?? false,
    skipLoopDetection: settings.skipLoopDetection ?? false,
+    vlmSwitchMode,
  });
 }

--- a/packages/cli/src/config/settings.test.ts
+++ b/packages/cli/src/config/settings.test.ts
@@ -69,7 +69,11 @@ const MOCK_WORKSPACE_SETTINGS_PATH = pathActual.join(
 );

 // A more flexible type for test data that allows arbitrary properties.
-type TestSettings = Settings & { [key: string]: unknown };
+type TestSettings = Settings & {
+  [key: string]: unknown;
+  nested?: { [key: string]: unknown };
+  nestedObj?: { [key: string]: unknown };
+};

 vi.mock('fs', async (importOriginal) => {
  // Get all the functions from the real 'fs' module
@@ -137,6 +141,9 @@ describe('Settings Loading and Merging', () => {
        advanced: {
          excludedEnvVars: [],
        },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
        extensions: {
          disabled: [],
          workspacesWithMigrationNudge: [],
@@ -197,6 +204,9 @@ describe('Settings Loading and Merging', () => {
        advanced: {
          excludedEnvVars: [],
        },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
        extensions: {
          disabled: [],
          workspacesWithMigrationNudge: [],
@@ -260,6 +270,9 @@ describe('Settings Loading and Merging', () => {
        advanced: {
          excludedEnvVars: [],
        },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
        extensions: {
          disabled: [],
          workspacesWithMigrationNudge: [],
@@ -320,6 +333,9 @@ describe('Settings Loading and Merging', () => {
        advanced: {
          excludedEnvVars: [],
        },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
        extensions: {
          disabled: [],
          workspacesWithMigrationNudge: [],
@@ -385,6 +401,9 @@ describe('Settings Loading and Merging', () => {
        advanced: {
          excludedEnvVars: [],
        },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
        extensions: {
          disabled: [],
          workspacesWithMigrationNudge: [],
@@ -477,6 +496,9 @@ describe('Settings Loading and Merging', () => {
        advanced: {
          excludedEnvVars: [],
        },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
        extensions: {
          disabled: [],
          workspacesWithMigrationNudge: [],
@@ -562,6 +584,9 @@ describe('Settings Loading and Merging', () => {
        advanced: {
          excludedEnvVars: [],
        },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
        extensions: {
          disabled: [],
          workspacesWithMigrationNudge: [],
@@ -691,6 +716,9 @@ describe('Settings Loading and Merging', () => {
            '/system/dir',
          ],
        },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
        extensions: {
          disabled: [],
          workspacesWithMigrationNudge: [],
@@ -1431,6 +1459,9 @@ describe('Settings Loading and Merging', () => {
        advanced: {
          excludedEnvVars: [],
        },
+        experimental: {},
+        contentGenerator: {},
+        systemPromptMappings: {},
        extensions: {
          disabled: [],
          workspacesWithMigrationNudge: [],
@@ -1516,7 +1547,11 @@ describe('Settings Loading and Merging', () => {
        'workspace_endpoint_from_env/api',
      );
      expect(
-        (settings.workspace.settings as TestSettings)['nested']['value'],
+        (
+          (settings.workspace.settings as TestSettings).nested as {
+            [key: string]: unknown;
+          }
+        )['value'],
      ).toBe('workspace_endpoint_from_env');
      expect((settings.merged as TestSettings)['endpoint']).toBe(
        'workspace_endpoint_from_env/api',
@@ -1766,19 +1801,39 @@ describe('Settings Loading and Merging', () => {
      ).toBeUndefined();

      expect(
-        (settings.user.settings as TestSettings)['nestedObj']['nestedNull'],
+        (
+          (settings.user.settings as TestSettings).nestedObj as {
+            [key: string]: unknown;
+          }
+        )['nestedNull'],
      ).toBeNull();
      expect(
-        (settings.user.settings as TestSettings)['nestedObj']['nestedBool'],
+        (
+          (settings.user.settings as TestSettings).nestedObj as {
+            [key: string]: unknown;
+          }
+        )['nestedBool'],
      ).toBe(true);
      expect(
-        (settings.user.settings as TestSettings)['nestedObj']['nestedNum'],
+        (
+          (settings.user.settings as TestSettings).nestedObj as {
+            [key: string]: unknown;
+          }
+        )['nestedNum'],
      ).toBe(0);
      expect(
-        (settings.user.settings as TestSettings)['nestedObj']['nestedString'],
+        (
+          (settings.user.settings as TestSettings).nestedObj as {
+            [key: string]: unknown;
+          }
+        )['nestedString'],
      ).toBe('literal');
      expect(
-        (settings.user.settings as TestSettings)['nestedObj']['anotherEnv'],
+        (
+          (settings.user.settings as TestSettings).nestedObj as {
+            [key: string]: unknown;
+          }
+        )['anotherEnv'],
      ).toBe('env_string_nested_value');

      delete process.env['MY_ENV_STRING'];
@@ -1864,6 +1919,9 @@ describe('Settings Loading and Merging', () => {
          advanced: {
            excludedEnvVars: [],
          },
+          experimental: {},
+          contentGenerator: {},
+          systemPromptMappings: {},
          extensions: {
            disabled: [],
            workspacesWithMigrationNudge: [],
@@ -2336,14 +2394,14 @@ describe('Settings Loading and Merging', () => {
          vimMode: false,
        },
        model: {
-          maxSessionTurns: 0,
+          maxSessionTurns: -1,
        },
        context: {
          includeDirectories: [],
        },
        security: {
          folderTrust: {
-            enabled: null,
+            enabled: false,
          },
        },
      };
@@ -2352,9 +2410,9 @@ describe('Settings Loading and Merging', () => {

      expect(v1Settings).toEqual({
        vimMode: false,
-        maxSessionTurns: 0,
+        maxSessionTurns: -1,
        includeDirectories: [],
-        folderTrust: null,
+        folderTrust: false,
      });
    });

--- a/packages/cli/src/config/settings.ts
+++ b/packages/cli/src/config/settings.ts
@@ -396,6 +396,24 @@ function mergeSettings(
        ]),
      ],
    },
+    experimental: {
+      ...(systemDefaults.experimental || {}),
+      ...(user.experimental || {}),
+      ...(safeWorkspaceWithoutFolderTrust.experimental || {}),
+      ...(system.experimental || {}),
+    },
+    contentGenerator: {
+      ...(systemDefaults.contentGenerator || {}),
+      ...(user.contentGenerator || {}),
+      ...(safeWorkspaceWithoutFolderTrust.contentGenerator || {}),
+      ...(system.contentGenerator || {}),
+    },
+    systemPromptMappings: {
+      ...(systemDefaults.systemPromptMappings || {}),
+      ...(user.systemPromptMappings || {}),
+      ...(safeWorkspaceWithoutFolderTrust.systemPromptMappings || {}),
+      ...(system.systemPromptMappings || {}),
+    },
    extensions: {
      ...(systemDefaults.extensions || {}),
      ...(user.extensions || {}),
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -746,11 +746,21 @@ export const SETTINGS_SCHEMA = {
        label: 'Vision Model Preview',
        category: 'Experimental',
        requiresRestart: false,
-        default: false,
+        default: true,
        description:
          'Enable vision model support and auto-switching functionality. When disabled, vision models like qwen-vl-max-latest will be hidden and auto-switching will not occur.',
        showInDialog: true,
      },
+      vlmSwitchMode: {
+        type: 'string',
+        label: 'VLM Switch Mode',
+        category: 'Experimental',
+        requiresRestart: false,
+        default: undefined as string | undefined,
+        description:
+          'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). If not set, user will be prompted each time. This is a temporary experimental feature.',
+        showInDialog: false,
+      },
    },
  },

--- a/packages/cli/src/ui/App.tsx
+++ b/packages/cli/src/ui/App.tsx
@@ -566,7 +566,9 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
      }

      // Switch model for future use but return false to stop current retry
-      config.setModel(fallbackModel);
+      config.setModel(fallbackModel).catch((error) => {
+        console.error('Failed to switch to fallback model:', error);
+      });
      config.setFallbackMode(true);
      logFlashFallback(
        config,
@@ -650,17 +652,28 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
  }, []);

  const handleModelSelect = useCallback(
-    (modelId: string) => {
-      config.setModel(modelId);
-      setCurrentModel(modelId);
-      setIsModelSelectionDialogOpen(false);
-      addItem(
-        {
-          type: MessageType.INFO,
-          text: `Switched model to \`${modelId}\` for this session.`,
-        },
-        Date.now(),
-      );
+    async (modelId: string) => {
+      try {
+        await config.setModel(modelId);
+        setCurrentModel(modelId);
+        setIsModelSelectionDialogOpen(false);
+        addItem(
+          {
+            type: MessageType.INFO,
+            text: `Switched model to \`${modelId}\` for this session.`,
+          },
+          Date.now(),
+        );
+      } catch (error) {
+        console.error('Failed to switch model:', error);
+        addItem(
+          {
+            type: MessageType.ERROR,
+            text: `Failed to switch to model \`${modelId}\`. Please try again.`,
+          },
+          Date.now(),
+        );
+      }
    },
    [config, setCurrentModel, addItem],
  );
@@ -670,7 +683,7 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
    if (!contentGeneratorConfig) return [];

    const visionModelPreviewEnabled =
-      settings.merged.experimental?.visionModelPreview ?? false;
+      settings.merged.experimental?.visionModelPreview ?? true;

    switch (contentGeneratorConfig.authType) {
      case AuthType.QWEN_OAUTH:
@@ -759,7 +772,7 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
    setModelSwitchedFromQuotaError,
    refreshStatic,
    () => cancelHandlerRef.current(),
-    settings.merged.experimental?.visionModelPreview ?? false,
+    settings.merged.experimental?.visionModelPreview ?? true,
    handleVisionSwitchRequired,
  );

--- a/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx
+++ b/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx
@@ -46,8 +46,8 @@ describe('ModelSwitchDialog', () => {
        value: VisionSwitchOutcome.SwitchSessionToVL,
      },
      {
-        label: 'Do not switch, show guidance',
-        value: VisionSwitchOutcome.DisallowWithGuidance,
+        label: 'Continue with current model',
+        value: VisionSwitchOutcome.ContinueWithCurrentModel,
      },
    ];

@@ -81,18 +81,18 @@ describe('ModelSwitchDialog', () => {
    );
  });

-  it('should call onSelect with DisallowWithGuidance when third option is selected', () => {
+  it('should call onSelect with ContinueWithCurrentModel when third option is selected', () => {
    render(<ModelSwitchDialog onSelect={mockOnSelect} />);

    const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
-    onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
+    onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);

    expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
    );
  });

-  it('should setup escape key handler to call onSelect with DisallowWithGuidance', () => {
+  it('should setup escape key handler to call onSelect with ContinueWithCurrentModel', () => {
    render(<ModelSwitchDialog onSelect={mockOnSelect} />);

    expect(mockUseKeypress).toHaveBeenCalledWith(expect.any(Function), {
@@ -104,7 +104,7 @@ describe('ModelSwitchDialog', () => {
    keypressHandler({ name: 'escape' });

    expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
    );
  });

@@ -126,13 +126,9 @@ describe('ModelSwitchDialog', () => {

  describe('VisionSwitchOutcome enum', () => {
    it('should have correct enum values', () => {
-      expect(VisionSwitchOutcome.SwitchOnce).toBe('switch_once');
-      expect(VisionSwitchOutcome.SwitchSessionToVL).toBe(
-        'switch_session_to_vl',
-      );
-      expect(VisionSwitchOutcome.DisallowWithGuidance).toBe(
-        'disallow_with_guidance',
-      );
+      expect(VisionSwitchOutcome.SwitchOnce).toBe('once');
+      expect(VisionSwitchOutcome.SwitchSessionToVL).toBe('session');
+      expect(VisionSwitchOutcome.ContinueWithCurrentModel).toBe('persist');
    });
  });

@@ -144,7 +140,7 @@ describe('ModelSwitchDialog', () => {
    // Call multiple times
    onSelectCallback(VisionSwitchOutcome.SwitchOnce);
    onSelectCallback(VisionSwitchOutcome.SwitchSessionToVL);
-    onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
+    onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);

    expect(mockOnSelect).toHaveBeenCalledTimes(3);
    expect(mockOnSelect).toHaveBeenNthCalledWith(
@@ -157,7 +153,7 @@ describe('ModelSwitchDialog', () => {
    );
    expect(mockOnSelect).toHaveBeenNthCalledWith(
      3,
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
    );
  });

@@ -179,7 +175,7 @@ describe('ModelSwitchDialog', () => {

    expect(mockOnSelect).toHaveBeenCalledTimes(2);
    expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
    );
  });
 });
--- a/packages/cli/src/ui/components/ModelSwitchDialog.tsx
+++ b/packages/cli/src/ui/components/ModelSwitchDialog.tsx
@@ -14,9 +14,9 @@ import {
 import { useKeypress } from '../hooks/useKeypress.js';

 export enum VisionSwitchOutcome {
-  SwitchOnce = 'switch_once',
-  SwitchSessionToVL = 'switch_session_to_vl',
-  DisallowWithGuidance = 'disallow_with_guidance',
+  SwitchOnce = 'once',
+  SwitchSessionToVL = 'session',
+  ContinueWithCurrentModel = 'persist',
 }

 export interface ModelSwitchDialogProps {
@@ -29,7 +29,7 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
  useKeypress(
    (key) => {
      if (key.name === 'escape') {
-        onSelect(VisionSwitchOutcome.DisallowWithGuidance);
+        onSelect(VisionSwitchOutcome.ContinueWithCurrentModel);
      }
    },
    { isActive: true },
@@ -45,8 +45,8 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
      value: VisionSwitchOutcome.SwitchSessionToVL,
    },
    {
-      label: 'Do not switch, show guidance',
-      value: VisionSwitchOutcome.DisallowWithGuidance,
+      label: 'Continue with current model',
+      value: VisionSwitchOutcome.ContinueWithCurrentModel,
    },
  ];

--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -60,7 +60,9 @@ const mockParseAndFormatApiError = vi.hoisted(() => vi.fn());
 const mockHandleVisionSwitch = vi.hoisted(() =>
  vi.fn().mockResolvedValue({ shouldProceed: true }),
 );
-const mockRestoreOriginalModel = vi.hoisted(() => vi.fn());
+const mockRestoreOriginalModel = vi.hoisted(() =>
+  vi.fn().mockResolvedValue(undefined),
+);

 vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => {
  const actualCoreModule = (await importOriginal()) as any;
@@ -301,6 +303,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        );
      },
      {
@@ -462,6 +466,8 @@ describe('useGeminiStream', () => {
        () => {},
        () => {},
        () => {},
+        false, // visionModelPreviewEnabled
+        undefined, // onVisionSwitchRequired (optional)
      ),
    );

@@ -541,6 +547,8 @@ describe('useGeminiStream', () => {
        () => {},
        () => {},
        () => {},
+        false, // visionModelPreviewEnabled
+        undefined, // onVisionSwitchRequired (optional)
      ),
    );

@@ -649,6 +657,8 @@ describe('useGeminiStream', () => {
        () => {},
        () => {},
        () => {},
+        false, // visionModelPreviewEnabled
+        undefined, // onVisionSwitchRequired (optional)
      ),
    );

@@ -758,6 +768,8 @@ describe('useGeminiStream', () => {
        () => {},
        () => {},
        () => {},
+        false, // visionModelPreviewEnabled
+        undefined, // onVisionSwitchRequired (optional)
      ),
    );

@@ -887,6 +899,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          cancelSubmitSpy,
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -1198,6 +1212,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -1251,6 +1267,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -1301,6 +1319,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -1349,6 +1369,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -1398,6 +1420,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -1487,6 +1511,8 @@ describe('useGeminiStream', () => {
            () => {},
            () => {},
            () => {},
+            false, // visionModelPreviewEnabled
+            undefined, // onVisionSwitchRequired (optional)
          ),
        );

@@ -1537,6 +1563,8 @@ describe('useGeminiStream', () => {
        vi.fn(), // setModelSwitched
        vi.fn(), // onEditorClose
        vi.fn(), // onCancelSubmit
+        false, // visionModelPreviewEnabled
+        undefined, // onVisionSwitchRequired (optional)
      ),
    );

@@ -1602,6 +1630,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -1680,6 +1710,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -1734,6 +1766,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -1943,6 +1977,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -1975,6 +2011,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -2028,6 +2066,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

@@ -2065,6 +2105,8 @@ describe('useGeminiStream', () => {
          () => {},
          () => {},
          () => {},
+          false, // visionModelPreviewEnabled
+          undefined, // onVisionSwitchRequired (optional)
        ),
      );

--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -89,7 +89,7 @@ export const useGeminiStream = (
  setModelSwitchedFromQuotaError: React.Dispatch<React.SetStateAction<boolean>>,
  onEditorClose: () => void,
  onCancelSubmit: () => void,
-  visionModelPreviewEnabled: boolean = false,
+  visionModelPreviewEnabled: boolean,
  onVisionSwitchRequired?: (query: PartListUnion) => Promise<{
    modelOverride?: string;
    persistSessionModel?: string;
@@ -765,7 +765,9 @@ export const useGeminiStream = (

        if (processingStatus === StreamProcessingStatus.UserCancelled) {
          // Restore original model if it was temporarily overridden
-          restoreOriginalModel();
+          restoreOriginalModel().catch((error) => {
+            console.error('Failed to restore original model:', error);
+          });
          isSubmittingQueryRef.current = false;
          return;
        }
@@ -780,10 +782,14 @@ export const useGeminiStream = (
        }

        // Restore original model if it was temporarily overridden
-        restoreOriginalModel();
+        restoreOriginalModel().catch((error) => {
+          console.error('Failed to restore original model:', error);
+        });
      } catch (error: unknown) {
        // Restore original model if it was temporarily overridden
-        restoreOriginalModel();
+        restoreOriginalModel().catch((error) => {
+          console.error('Failed to restore original model:', error);
+        });

        if (error instanceof UnauthorizedError) {
          onAuthError();
--- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
+++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
@@ -8,7 +8,7 @@
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { renderHook, act } from '@testing-library/react';
 import type { Part, PartListUnion } from '@google/genai';
-import { AuthType, type Config } from '@qwen-code/qwen-code-core';
+import { AuthType, type Config, ApprovalMode } from '@qwen-code/qwen-code-core';
 import {
  shouldOfferVisionSwitch,
  processVisionSwitchOutcome,
@@ -41,7 +41,7 @@ describe('useVisionAutoSwitch helpers', () => {
      const result = shouldOfferVisionSwitch(
        parts,
        AuthType.QWEN_OAUTH,
-        'qwen-vl-max-latest',
+        'vision-model',
        true,
      );
      expect(result).toBe(false);
@@ -108,6 +108,56 @@ describe('useVisionAutoSwitch helpers', () => {
      );
      expect(result).toBe(false);
    });
+
+    it('returns true when image parts exist in YOLO mode context', () => {
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+      const result = shouldOfferVisionSwitch(
+        parts,
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        true,
+      );
+      expect(result).toBe(true);
+    });
+
+    it('returns false when no image parts exist in YOLO mode context', () => {
+      const parts: PartListUnion = [{ text: 'just text' }];
+      const result = shouldOfferVisionSwitch(
+        parts,
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        true,
+      );
+      expect(result).toBe(false);
+    });
+
+    it('returns false when already using vision model in YOLO mode context', () => {
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+      const result = shouldOfferVisionSwitch(
+        parts,
+        AuthType.QWEN_OAUTH,
+        'vision-model',
+        true,
+      );
+      expect(result).toBe(false);
+    });
+
+    it('returns false when authType is not QWEN_OAUTH in YOLO mode context', () => {
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+      const result = shouldOfferVisionSwitch(
+        parts,
+        AuthType.USE_GEMINI,
+        'qwen3-coder-plus',
+        true,
+      );
+      expect(result).toBe(false);
+    });
  });

  describe('processVisionSwitchOutcome', () => {
@@ -125,11 +175,11 @@ describe('useVisionAutoSwitch helpers', () => {
      expect(result).toEqual({ persistSessionModel: vl });
    });

-    it('maps DisallowWithGuidance to showGuidance', () => {
+    it('maps ContinueWithCurrentModel to empty result', () => {
      const result = processVisionSwitchOutcome(
-        VisionSwitchOutcome.DisallowWithGuidance,
+        VisionSwitchOutcome.ContinueWithCurrentModel,
      );
-      expect(result).toEqual({ showGuidance: true });
+      expect(result).toEqual({});
    });
  });

@@ -151,13 +201,20 @@ describe('useVisionAutoSwitch hook', () => {
    ts: number,
  ) => any;

-  const createMockConfig = (authType: AuthType, initialModel: string) => {
+  const createMockConfig = (
+    authType: AuthType,
+    initialModel: string,
+    approvalMode: ApprovalMode = ApprovalMode.DEFAULT,
+    vlmSwitchMode?: string,
+  ) => {
    let currentModel = initialModel;
    const mockConfig: Partial<Config> = {
      getModel: vi.fn(() => currentModel),
-      setModel: vi.fn((m: string) => {
+      setModel: vi.fn(async (m: string) => {
        currentModel = m;
      }),
+      getApprovalMode: vi.fn(() => approvalMode),
+      getVlmSwitchMode: vi.fn(() => vlmSwitchMode),
      getContentGeneratorConfig: vi.fn(() => ({
        authType,
        model: currentModel,
@@ -226,11 +283,9 @@ describe('useVisionAutoSwitch hook', () => {
    expect(onVisionSwitchRequired).not.toHaveBeenCalled();
  });

-  it('shows guidance and blocks when dialog returns showGuidance', async () => {
+  it('continues with current model when dialog returns empty result', async () => {
    const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
-    const onVisionSwitchRequired = vi
-      .fn()
-      .mockResolvedValue({ showGuidance: true });
+    const onVisionSwitchRequired = vi.fn().mockResolvedValue({}); // Empty result for ContinueWithCurrentModel
    const { result } = renderHook(() =>
      useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
    );
@@ -245,11 +300,12 @@ describe('useVisionAutoSwitch hook', () => {
      res = await result.current.handleVisionSwitch(parts, userTs, false);
    });

-    expect(addItem).toHaveBeenCalledWith(
+    // Should not add any guidance message
+    expect(addItem).not.toHaveBeenCalledWith(
      { type: MessageType.INFO, text: getVisionSwitchGuidanceMessage() },
      userTs,
    );
-    expect(res).toEqual({ shouldProceed: false });
+    expect(res).toEqual({ shouldProceed: true });
    expect(config.setModel).not.toHaveBeenCalled();
  });

@@ -258,7 +314,7 @@ describe('useVisionAutoSwitch hook', () => {
    const config = createMockConfig(AuthType.QWEN_OAUTH, initialModel);
    const onVisionSwitchRequired = vi
      .fn()
-      .mockResolvedValue({ modelOverride: 'qwen-vl-max-latest' });
+      .mockResolvedValue({ modelOverride: 'coder-model' });
    const { result } = renderHook(() =>
      useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
    );
@@ -273,20 +329,26 @@ describe('useVisionAutoSwitch hook', () => {
    });

    expect(res).toEqual({ shouldProceed: true, originalModel: initialModel });
-    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
+    expect(config.setModel).toHaveBeenCalledWith('coder-model', {
+      reason: 'vision_auto_switch',
+      context: 'User-prompted vision switch (one-time override)',
+    });

    // Now restore
-    act(() => {
-      result.current.restoreOriginalModel();
+    await act(async () => {
+      await result.current.restoreOriginalModel();
+    });
+    expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
+      reason: 'vision_auto_switch',
+      context: 'Restoring original model after vision switch',
    });
-    expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
  });

  it('persists session model when dialog requests persistence', async () => {
    const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
    const onVisionSwitchRequired = vi
      .fn()
-      .mockResolvedValue({ persistSessionModel: 'qwen-vl-max-latest' });
+      .mockResolvedValue({ persistSessionModel: 'coder-model' });
    const { result } = renderHook(() =>
      useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
    );
@@ -301,16 +363,17 @@ describe('useVisionAutoSwitch hook', () => {
    });

    expect(res).toEqual({ shouldProceed: true });
-    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
+    expect(config.setModel).toHaveBeenCalledWith('coder-model', {
+      reason: 'vision_auto_switch',
+      context: 'User-prompted vision switch (session persistent)',
+    });

    // Restore should be a no-op since no one-time override was used
-    act(() => {
-      result.current.restoreOriginalModel();
+    await act(async () => {
+      await result.current.restoreOriginalModel();
    });
    // Last call should still be the persisted model set
-    expect((config.setModel as any).mock.calls.pop()?.[0]).toBe(
-      'qwen-vl-max-latest',
-    );
+    expect((config.setModel as any).mock.calls.pop()?.[0]).toBe('coder-model');
  });

  it('returns shouldProceed=true when dialog returns no special flags', async () => {
@@ -371,4 +434,420 @@ describe('useVisionAutoSwitch hook', () => {
    expect(res).toEqual({ shouldProceed: true });
    expect(onVisionSwitchRequired).not.toHaveBeenCalled();
  });
+
+  describe('YOLO mode behavior', () => {
+    it('automatically switches to vision model in YOLO mode without showing dialog', async () => {
+      const initialModel = 'qwen3-coder-plus';
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        initialModel,
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called in YOLO mode
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 7070, false);
+      });
+
+      // Should automatically switch without calling the dialog
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+      expect(res).toEqual({
+        shouldProceed: true,
+        originalModel: initialModel,
+      });
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
+        reason: 'vision_auto_switch',
+        context: 'YOLO mode auto-switch for image content',
+      });
+    });
+
+    it('does not switch in YOLO mode when no images are present', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [{ text: 'no images here' }];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 8080, false);
+      });
+
+      expect(res).toEqual({ shouldProceed: true });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+      expect(config.setModel).not.toHaveBeenCalled();
+    });
+
+    it('does not switch in YOLO mode when already using vision model', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'vision-model',
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 9090, false);
+      });
+
+      expect(res).toEqual({ shouldProceed: true });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+      expect(config.setModel).not.toHaveBeenCalled();
+    });
+
+    it('restores original model after YOLO mode auto-switch', async () => {
+      const initialModel = 'qwen3-coder-plus';
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        initialModel,
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+
+      // First, trigger the auto-switch
+      await act(async () => {
+        await result.current.handleVisionSwitch(parts, 10100, false);
+      });
+
+      // Verify model was switched
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
+        reason: 'vision_auto_switch',
+        context: 'YOLO mode auto-switch for image content',
+      });
+
+      // Now restore the original model
+      await act(async () => {
+        await result.current.restoreOriginalModel();
+      });
+
+      // Verify model was restored
+      expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
+        reason: 'vision_auto_switch',
+        context: 'Restoring original model after vision switch',
+      });
+    });
+
+    it('does not switch in YOLO mode when authType is not QWEN_OAUTH', async () => {
+      const config = createMockConfig(
+        AuthType.USE_GEMINI,
+        'qwen3-coder-plus',
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 11110, false);
+      });
+
+      expect(res).toEqual({ shouldProceed: true });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+      expect(config.setModel).not.toHaveBeenCalled();
+    });
+
+    it('does not switch in YOLO mode when visionModelPreviewEnabled is false', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          false,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/png', data: '...' } },
+      ];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 12120, false);
+      });
+
+      expect(res).toEqual({ shouldProceed: true });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+      expect(config.setModel).not.toHaveBeenCalled();
+    });
+
+    it('handles multiple image formats in YOLO mode', async () => {
+      const initialModel = 'qwen3-coder-plus';
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        initialModel,
+        ApprovalMode.YOLO,
+      );
+      const onVisionSwitchRequired = vi.fn();
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { text: 'Here are some images:' },
+        { inlineData: { mimeType: 'image/jpeg', data: '...' } },
+        { fileData: { mimeType: 'image/png', fileUri: 'file://image.png' } },
+        { text: 'Please analyze them.' },
+      ];
+
+      let res: any;
+      await act(async () => {
+        res = await result.current.handleVisionSwitch(parts, 13130, false);
+      });
+
+      expect(res).toEqual({
+        shouldProceed: true,
+        originalModel: initialModel,
+      });
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
+        reason: 'vision_auto_switch',
+        context: 'YOLO mode auto-switch for image content',
+      });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('VLM switch mode default behavior', () => {
+    it('should automatically switch once when vlmSwitchMode is "once"', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'once',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBe('qwen3-coder-plus');
+      expect(config.setModel).toHaveBeenCalledWith('vision-model', {
+        reason: 'vision_auto_switch',
+        context: 'Default VLM switch mode: once (one-time override)',
+      });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+
+    it('should switch session when vlmSwitchMode is "session"', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'session',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBeUndefined(); // No original model for session switch
+      expect(config.setModel).toHaveBeenCalledWith('vision-model', {
+        reason: 'vision_auto_switch',
+        context: 'Default VLM switch mode: session (session persistent)',
+      });
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+
+    it('should continue with current model when vlmSwitchMode is "persist"', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'persist',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBeUndefined();
+      expect(config.setModel).not.toHaveBeenCalled();
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+
+    it('should fall back to user prompt when vlmSwitchMode is not set', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        undefined, // No default mode
+      );
+      const onVisionSwitchRequired = vi
+        .fn()
+        .mockResolvedValue({ modelOverride: 'vision-model' });
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(onVisionSwitchRequired).toHaveBeenCalledWith(parts);
+    });
+
+    it('should fall back to persist behavior when vlmSwitchMode has invalid value', async () => {
+      const config = createMockConfig(
+        AuthType.QWEN_OAUTH,
+        'qwen3-coder-plus',
+        ApprovalMode.DEFAULT,
+        'invalid-value',
+      );
+      const onVisionSwitchRequired = vi.fn(); // Should not be called
+      const { result } = renderHook(() =>
+        useVisionAutoSwitch(
+          config,
+          addItem as any,
+          true,
+          onVisionSwitchRequired,
+        ),
+      );
+
+      const parts: PartListUnion = [
+        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
+      ];
+
+      const switchResult = await result.current.handleVisionSwitch(
+        parts,
+        Date.now(),
+        false,
+      );
+
+      expect(switchResult.shouldProceed).toBe(true);
+      expect(switchResult.originalModel).toBeUndefined();
+      // For invalid values, it should continue with current model (persist behavior)
+      expect(config.setModel).not.toHaveBeenCalled();
+      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
+    });
+  });
 });
--- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
+++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
@@ -5,7 +5,7 @@
 */

 import { type PartListUnion, type Part } from '@google/genai';
-import { AuthType, type Config } from '@qwen-code/qwen-code-core';
+import { AuthType, type Config, ApprovalMode } from '@qwen-code/qwen-code-core';
 import { useCallback, useRef } from 'react';
 import { VisionSwitchOutcome } from '../components/ModelSwitchDialog.js';
 import {
@@ -121,7 +121,7 @@ export function shouldOfferVisionSwitch(
  parts: PartListUnion,
  authType: AuthType,
  currentModel: string,
-  visionModelPreviewEnabled: boolean = false,
+  visionModelPreviewEnabled: boolean = true,
 ): boolean {
  // Only trigger for qwen-oauth
  if (authType !== AuthType.QWEN_OAUTH) {
@@ -166,11 +166,11 @@ export function processVisionSwitchOutcome(
    case VisionSwitchOutcome.SwitchSessionToVL:
      return { persistSessionModel: vlModelId };

-    case VisionSwitchOutcome.DisallowWithGuidance:
-      return { showGuidance: true };
+    case VisionSwitchOutcome.ContinueWithCurrentModel:
+      return {}; // Continue with current model, no changes needed

    default:
-      return { showGuidance: true };
+      return {}; // Default to continuing with current model
  }
 }

@@ -198,7 +198,7 @@ export interface VisionSwitchHandlingResult {
 export function useVisionAutoSwitch(
  config: Config,
  addItem: UseHistoryManagerReturn['addItem'],
-  visionModelPreviewEnabled: boolean = false,
+  visionModelPreviewEnabled: boolean = true,
  onVisionSwitchRequired?: (query: PartListUnion) => Promise<{
    modelOverride?: string;
    persistSessionModel?: string;
@@ -252,35 +252,91 @@ export function useVisionAutoSwitch(
        return { shouldProceed: true };
      }

-      try {
-        const visionSwitchResult = await onVisionSwitchRequired(query);
+      // In YOLO mode, automatically switch to vision model without user interaction
+      if (config.getApprovalMode() === ApprovalMode.YOLO) {
+        const vlModelId = getDefaultVisionModel();
+        originalModelRef.current = config.getModel();
+        await config.setModel(vlModelId, {
+          reason: 'vision_auto_switch',
+          context: 'YOLO mode auto-switch for image content',
+        });
+        return {
+          shouldProceed: true,
+          originalModel: originalModelRef.current,
+        };
+      }

-        if (visionSwitchResult.showGuidance) {
-          // Show guidance and don't proceed with the request
-          addItem(
-            {
-              type: MessageType.INFO,
-              text: getVisionSwitchGuidanceMessage(),
-            },
-            userMessageTimestamp,
-          );
-          return { shouldProceed: false };
+      // Check if there's a default VLM switch mode configured
+      const defaultVlmSwitchMode = config.getVlmSwitchMode();
+      if (defaultVlmSwitchMode) {
+        // Convert string value to VisionSwitchOutcome enum
+        let outcome: VisionSwitchOutcome;
+        switch (defaultVlmSwitchMode) {
+          case 'once':
+            outcome = VisionSwitchOutcome.SwitchOnce;
+            break;
+          case 'session':
+            outcome = VisionSwitchOutcome.SwitchSessionToVL;
+            break;
+          case 'persist':
+            outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
+            break;
+          default:
+            // Invalid value, fall back to prompting user
+            outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
        }

+        // Process the default outcome
+        const visionSwitchResult = processVisionSwitchOutcome(outcome);
+
        if (visionSwitchResult.modelOverride) {
          // One-time model override
          originalModelRef.current = config.getModel();
-          config.setModel(visionSwitchResult.modelOverride);
+          await config.setModel(visionSwitchResult.modelOverride, {
+            reason: 'vision_auto_switch',
+            context: `Default VLM switch mode: ${defaultVlmSwitchMode} (one-time override)`,
+          });
          return {
            shouldProceed: true,
            originalModel: originalModelRef.current,
          };
        } else if (visionSwitchResult.persistSessionModel) {
          // Persistent session model change
-          config.setModel(visionSwitchResult.persistSessionModel);
+          await config.setModel(visionSwitchResult.persistSessionModel, {
+            reason: 'vision_auto_switch',
+            context: `Default VLM switch mode: ${defaultVlmSwitchMode} (session persistent)`,
+          });
          return { shouldProceed: true };
        }

+        // For ContinueWithCurrentModel or any other case, proceed with current model
+        return { shouldProceed: true };
+      }
+
+      try {
+        const visionSwitchResult = await onVisionSwitchRequired(query);
+
+        if (visionSwitchResult.modelOverride) {
+          // One-time model override
+          originalModelRef.current = config.getModel();
+          await config.setModel(visionSwitchResult.modelOverride, {
+            reason: 'vision_auto_switch',
+            context: 'User-prompted vision switch (one-time override)',
+          });
+          return {
+            shouldProceed: true,
+            originalModel: originalModelRef.current,
+          };
+        } else if (visionSwitchResult.persistSessionModel) {
+          // Persistent session model change
+          await config.setModel(visionSwitchResult.persistSessionModel, {
+            reason: 'vision_auto_switch',
+            context: 'User-prompted vision switch (session persistent)',
+          });
+          return { shouldProceed: true };
+        }
+
+        // For ContinueWithCurrentModel or any other case, proceed with current model
        return { shouldProceed: true };
      } catch (_error) {
        // If vision switch dialog was cancelled or errored, don't proceed
@@ -290,9 +346,12 @@ export function useVisionAutoSwitch(
    [config, addItem, visionModelPreviewEnabled, onVisionSwitchRequired],
  );

-  const restoreOriginalModel = useCallback(() => {
+  const restoreOriginalModel = useCallback(async () => {
    if (originalModelRef.current) {
-      config.setModel(originalModelRef.current);
+      await config.setModel(originalModelRef.current, {
+        reason: 'vision_auto_switch',
+        context: 'Restoring original model after vision switch',
+      });
      originalModelRef.current = null;
    }
  }, [config]);
--- a/packages/cli/src/ui/models/availableModels.ts
+++ b/packages/cli/src/ui/models/availableModels.ts
@@ -10,9 +10,12 @@ export type AvailableModel = {
  isVision?: boolean;
 };

+export const MAINLINE_VLM = 'vision-model';
+export const MAINLINE_CODER = 'coder-model';
+
 export const AVAILABLE_MODELS_QWEN: AvailableModel[] = [
-  { id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' },
-  { id: 'qwen-vl-max-latest', label: 'qwen-vl-max', isVision: true },
+  { id: MAINLINE_CODER, label: MAINLINE_CODER },
+  { id: MAINLINE_VLM, label: MAINLINE_VLM, isVision: true },
 ];

 /**
@@ -42,7 +45,7 @@ export function getOpenAIAvailableModelFromEnv(): AvailableModel | null {
 * until our coding model supports multimodal.
 */
 export function getDefaultVisionModel(): string {
-  return 'qwen-vl-max-latest';
+  return MAINLINE_VLM;
 }

 export function isVisionModel(modelId: string): boolean {
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@@ -737,4 +737,85 @@ describe('setApprovalMode with folder trust', () => {
    expect(() => config.setApprovalMode(ApprovalMode.AUTO_EDIT)).not.toThrow();
    expect(() => config.setApprovalMode(ApprovalMode.DEFAULT)).not.toThrow();
  });
+
+  describe('Model Switch Logging', () => {
+    it('should log model switch when setModel is called with different model', async () => {
+      const config = new Config({
+        sessionId: 'test-model-switch',
+        targetDir: '.',
+        debugMode: false,
+        model: 'qwen3-coder-plus',
+        cwd: '.',
+      });
+
+      // Initialize the config to set up content generator
+      await config.initialize();
+
+      // Mock the logger's logModelSwitch method
+      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
+
+      // Change the model
+      await config.setModel('qwen-vl-max-latest', {
+        reason: 'vision_auto_switch',
+        context: 'Test model switch',
+      });
+
+      // Verify that logModelSwitch was called with correct parameters
+      expect(logModelSwitchSpy).toHaveBeenCalledWith({
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'vision_auto_switch',
+        context: 'Test model switch',
+      });
+    });
+
+    it('should not log when setModel is called with same model', async () => {
+      const config = new Config({
+        sessionId: 'test-same-model',
+        targetDir: '.',
+        debugMode: false,
+        model: 'qwen3-coder-plus',
+        cwd: '.',
+      });
+
+      // Initialize the config to set up content generator
+      await config.initialize();
+
+      // Mock the logger's logModelSwitch method
+      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
+
+      // Set the same model
+      await config.setModel('qwen3-coder-plus');
+
+      // Verify that logModelSwitch was not called
+      expect(logModelSwitchSpy).not.toHaveBeenCalled();
+    });
+
+    it('should use default reason when no options provided', async () => {
+      const config = new Config({
+        sessionId: 'test-default-reason',
+        targetDir: '.',
+        debugMode: false,
+        model: 'qwen3-coder-plus',
+        cwd: '.',
+      });
+
+      // Initialize the config to set up content generator
+      await config.initialize();
+
+      // Mock the logger's logModelSwitch method
+      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
+
+      // Change the model without options
+      await config.setModel('qwen-vl-max-latest');
+
+      // Verify that logModelSwitch was called with default reason
+      expect(logModelSwitchSpy).toHaveBeenCalledWith({
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'manual',
+        context: undefined,
+      });
+    });
+  });
 });
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -56,6 +56,7 @@ import {
  DEFAULT_GEMINI_FLASH_MODEL,
 } from './models.js';
 import { Storage } from './storage.js';
+import { Logger, type ModelSwitchEvent } from '../core/logger.js';

 // Re-export OAuth config type
 export type { AnyToolInvocation, MCPOAuthConfig };
@@ -239,6 +240,7 @@ export interface ConfigParameters {
  extensionManagement?: boolean;
  enablePromptCompletion?: boolean;
  skipLoopDetection?: boolean;
+  vlmSwitchMode?: string;
 }

 export class Config {
@@ -330,9 +332,11 @@ export class Config {
  private readonly extensionManagement: boolean;
  private readonly enablePromptCompletion: boolean = false;
  private readonly skipLoopDetection: boolean;
+  private readonly vlmSwitchMode: string | undefined;
  private initialized: boolean = false;
  readonly storage: Storage;
  private readonly fileExclusions: FileExclusions;
+  private logger: Logger | null = null;

  constructor(params: ConfigParameters) {
    this.sessionId = params.sessionId;
@@ -424,8 +428,15 @@ export class Config {
    this.extensionManagement = params.extensionManagement ?? false;
    this.storage = new Storage(this.targetDir);
    this.enablePromptCompletion = params.enablePromptCompletion ?? false;
+    this.vlmSwitchMode = params.vlmSwitchMode;
    this.fileExclusions = new FileExclusions(this);

+    // Initialize logger asynchronously
+    this.logger = new Logger(this.sessionId, this.storage);
+    this.logger.initialize().catch((error) => {
+      console.debug('Failed to initialize logger:', error);
+    });
+
    if (params.contextFileName) {
      setGeminiMdFilename(params.contextFileName);
    }
@@ -517,21 +528,47 @@ export class Config {
    return this.contentGeneratorConfig?.model || this.model;
  }

-  setModel(newModel: string): void {
+  async setModel(
+    newModel: string,
+    options?: {
+      reason?: ModelSwitchEvent['reason'];
+      context?: string;
+    },
+  ): Promise<void> {
+    const oldModel = this.getModel();
+
    if (this.contentGeneratorConfig) {
      this.contentGeneratorConfig.model = newModel;
    }

+    // Log the model switch if the model actually changed
+    if (oldModel !== newModel && this.logger) {
+      const switchEvent: ModelSwitchEvent = {
+        fromModel: oldModel,
+        toModel: newModel,
+        reason: options?.reason || 'manual',
+        context: options?.context,
+      };
+
+      // Log asynchronously to avoid blocking
+      this.logger.logModelSwitch(switchEvent).catch((error) => {
+        console.debug('Failed to log model switch:', error);
+      });
+    }
+
    // Reinitialize chat with updated configuration while preserving history
    const geminiClient = this.getGeminiClient();
    if (geminiClient && geminiClient.isInitialized()) {
-      // Use async operation but don't await to avoid blocking
-      geminiClient.reinitialize().catch((error) => {
+      // Now await the reinitialize operation to ensure completion
+      try {
+        await geminiClient.reinitialize();
+      } catch (error) {
        console.error(
          'Failed to reinitialize chat with updated config:',
          error,
        );
-      });
+        throw error; // Re-throw to let callers handle the error
+      }
    }
  }

@@ -938,6 +975,10 @@ export class Config {
    return this.skipLoopDetection;
  }

+  getVlmSwitchMode(): string | undefined {
+    return this.vlmSwitchMode;
+  }
+
  async getGitService(): Promise<GitService> {
    if (!this.gitService) {
      this.gitService = new GitService(this.targetDir, this.storage);
--- a/packages/core/src/config/flashFallback.test.ts
+++ b/packages/core/src/config/flashFallback.test.ts
@@ -41,7 +41,7 @@ describe('Flash Model Fallback Configuration', () => {
  // with the fallback mechanism. This will be necessary we introduce more
  // intelligent model routing.
  describe('setModel', () => {
-    it('should only mark as switched if contentGeneratorConfig exists', () => {
+    it('should only mark as switched if contentGeneratorConfig exists', async () => {
      // Create config without initializing contentGeneratorConfig
      const newConfig = new Config({
        sessionId: 'test-session-2',
@@ -52,15 +52,15 @@ describe('Flash Model Fallback Configuration', () => {
      });

      // Should not crash when contentGeneratorConfig is undefined
-      newConfig.setModel(DEFAULT_GEMINI_FLASH_MODEL);
+      await newConfig.setModel(DEFAULT_GEMINI_FLASH_MODEL);
      expect(newConfig.isInFallbackMode()).toBe(false);
    });
  });

  describe('getModel', () => {
-    it('should return contentGeneratorConfig model if available', () => {
+    it('should return contentGeneratorConfig model if available', async () => {
      // Simulate initialized content generator config
-      config.setModel(DEFAULT_GEMINI_FLASH_MODEL);
+      await config.setModel(DEFAULT_GEMINI_FLASH_MODEL);
      expect(config.getModel()).toBe(DEFAULT_GEMINI_FLASH_MODEL);
    });

@@ -88,8 +88,8 @@ describe('Flash Model Fallback Configuration', () => {
      expect(config.isInFallbackMode()).toBe(false);
    });

-    it('should persist switched state throughout session', () => {
-      config.setModel(DEFAULT_GEMINI_FLASH_MODEL);
+    it('should persist switched state throughout session', async () => {
+      await config.setModel(DEFAULT_GEMINI_FLASH_MODEL);
      // Setting state for fallback mode as is expected of clients
      config.setFallbackMode(true);
      expect(config.isInFallbackMode()).toBe(true);
--- a/packages/core/src/config/models.ts
+++ b/packages/core/src/config/models.ts
@@ -4,11 +4,10 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-export const DEFAULT_QWEN_MODEL = 'qwen3-coder-plus';
-// We do not have a fallback model for now, but note it here anyway.
-export const DEFAULT_QWEN_FLASH_MODEL = 'qwen3-coder-flash';
+export const DEFAULT_QWEN_MODEL = 'coder-model';
+export const DEFAULT_QWEN_FLASH_MODEL = 'coder-model';

-export const DEFAULT_GEMINI_MODEL = 'qwen3-coder-plus';
+export const DEFAULT_GEMINI_MODEL = 'coder-model';
 export const DEFAULT_GEMINI_FLASH_MODEL = 'gemini-2.5-flash';
 export const DEFAULT_GEMINI_FLASH_LITE_MODEL = 'gemini-2.5-flash-lite';

--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -1053,7 +1053,7 @@ export class GeminiClient {
          error,
        );
        if (accepted !== false && accepted !== null) {
-          this.config.setModel(fallbackModel);
+          await this.config.setModel(fallbackModel);
          this.config.setFallbackMode(true);
          return fallbackModel;
        }
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -224,7 +224,7 @@ export class GeminiChat {
          error,
        );
        if (accepted !== false && accepted !== null) {
-          this.config.setModel(fallbackModel);
+          await this.config.setModel(fallbackModel);
          this.config.setFallbackMode(true);
          return fallbackModel;
        }
--- a/packages/core/src/core/logger.test.ts
+++ b/packages/core/src/core/logger.test.ts
@@ -755,4 +755,84 @@ describe('Logger', () => {
      expect(logger['messageId']).toBe(0);
    });
  });
+
+  describe('Model Switch Logging', () => {
+    it('should log model switch events correctly', async () => {
+      const testSessionId = 'test-session-model-switch';
+      const logger = new Logger(testSessionId, new Storage(process.cwd()));
+      await logger.initialize();
+
+      const modelSwitchEvent = {
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'vision_auto_switch' as const,
+        context: 'YOLO mode auto-switch for image content',
+      };
+
+      await logger.logModelSwitch(modelSwitchEvent);
+
+      // Read the log file to verify the entry was written
+      const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8');
+      const logs: LogEntry[] = JSON.parse(logContent);
+
+      const modelSwitchLog = logs.find(
+        (log) =>
+          log.sessionId === testSessionId &&
+          log.type === MessageSenderType.MODEL_SWITCH,
+      );
+
+      expect(modelSwitchLog).toBeDefined();
+      expect(modelSwitchLog!.type).toBe(MessageSenderType.MODEL_SWITCH);
+
+      const loggedEvent = JSON.parse(modelSwitchLog!.message);
+      expect(loggedEvent.fromModel).toBe('qwen3-coder-plus');
+      expect(loggedEvent.toModel).toBe('qwen-vl-max-latest');
+      expect(loggedEvent.reason).toBe('vision_auto_switch');
+      expect(loggedEvent.context).toBe(
+        'YOLO mode auto-switch for image content',
+      );
+    });
+
+    it('should handle multiple model switch events', async () => {
+      const testSessionId = 'test-session-multiple-switches';
+      const logger = new Logger(testSessionId, new Storage(process.cwd()));
+      await logger.initialize();
+
+      // Log first switch
+      await logger.logModelSwitch({
+        fromModel: 'qwen3-coder-plus',
+        toModel: 'qwen-vl-max-latest',
+        reason: 'vision_auto_switch',
+        context: 'Auto-switch for image',
+      });
+
+      // Log second switch (restore)
+      await logger.logModelSwitch({
+        fromModel: 'qwen-vl-max-latest',
+        toModel: 'qwen3-coder-plus',
+        reason: 'vision_auto_switch',
+        context: 'Restoring original model',
+      });
+
+      // Read the log file to verify both entries were written
+      const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8');
+      const logs: LogEntry[] = JSON.parse(logContent);
+
+      const modelSwitchLogs = logs.filter(
+        (log) =>
+          log.sessionId === testSessionId &&
+          log.type === MessageSenderType.MODEL_SWITCH,
+      );
+
+      expect(modelSwitchLogs).toHaveLength(2);
+
+      const firstSwitch = JSON.parse(modelSwitchLogs[0].message);
+      expect(firstSwitch.fromModel).toBe('qwen3-coder-plus');
+      expect(firstSwitch.toModel).toBe('qwen-vl-max-latest');
+
+      const secondSwitch = JSON.parse(modelSwitchLogs[1].message);
+      expect(secondSwitch.fromModel).toBe('qwen-vl-max-latest');
+      expect(secondSwitch.toModel).toBe('qwen3-coder-plus');
+    });
+  });
 });
--- a/packages/core/src/core/logger.ts
+++ b/packages/core/src/core/logger.ts
@@ -13,6 +13,7 @@ const LOG_FILE_NAME = 'logs.json';

 export enum MessageSenderType {
  USER = 'user',
+  MODEL_SWITCH = 'model_switch',
 }

 export interface LogEntry {
@@ -23,6 +24,13 @@ export interface LogEntry {
  message: string;
 }

+export interface ModelSwitchEvent {
+  fromModel: string;
+  toModel: string;
+  reason: 'vision_auto_switch' | 'manual' | 'fallback' | 'other';
+  context?: string;
+}
+
 // This regex matches any character that is NOT a letter (a-z, A-Z),
 // a number (0-9), a hyphen (-), an underscore (_), or a dot (.).

@@ -270,6 +278,17 @@ export class Logger {
    }
  }

+  async logModelSwitch(event: ModelSwitchEvent): Promise<void> {
+    const message = JSON.stringify({
+      fromModel: event.fromModel,
+      toModel: event.toModel,
+      reason: event.reason,
+      context: event.context,
+    });
+
+    await this.logMessage(MessageSenderType.MODEL_SWITCH, message);
+  }
+
  private _checkpointPath(tag: string): string {
    if (!tag.length) {
      throw new Error('No checkpoint tag specified.');
--- a/packages/core/src/core/prompts.ts
+++ b/packages/core/src/core/prompts.ts
@@ -820,6 +820,14 @@ function getToolCallExamples(model?: string): string {
    if (/qwen[^-]*-vl/i.test(model)) {
      return qwenVlToolCallExamples;
    }
+    // Match coder-model pattern (same as qwen3-coder)
+    if (/coder-model/i.test(model)) {
+      return qwenCoderToolCallExamples;
+    }
+    // Match vision-model pattern (same as qwen3-vl)
+    if (/vision-model/i.test(model)) {
+      return qwenVlToolCallExamples;
+    }
  }

  return generalToolCallExamples;
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -111,6 +111,12 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
  // Commercial Qwen3-Coder-Flash: 1M token context
  [/^qwen3-coder-flash(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-flash" and date variants

+  // Generic coder-model: same as qwen3-coder-plus (1M token context)
+  [/^coder-model$/, LIMITS['1m']],
+
+  // Commercial Qwen3-Max-Preview: 256K token context
+  [/^qwen3-max-preview(-.*)?$/, LIMITS['256k']], // catches "qwen3-max-preview" and date variants
+
  // Open-source Qwen3-Coder variants: 256K native
  [/^qwen3-coder-.*$/, LIMITS['256k']],
  // Open-source Qwen3 2507 variants: 256K native
@@ -131,6 +137,9 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
  // Qwen Vision Models
  [/^qwen-vl-max.*$/, LIMITS['128k']],

+  // Generic vision-model: same as qwen-vl-max (128K token context)
+  [/^vision-model$/, LIMITS['128k']],
+
  // -------------------
  // ByteDance Seed-OSS (512K)
  // -------------------
@@ -166,8 +175,20 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
  // Qwen3-Coder-Plus: 65,536 max output tokens
  [/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']],

+  // Generic coder-model: same as qwen3-coder-plus (64K max output tokens)
+  [/^coder-model$/, LIMITS['64k']],
+
+  // Qwen3-Max-Preview: 65,536 max output tokens
+  [/^qwen3-max-preview(-.*)?$/, LIMITS['64k']],
+
  // Qwen-VL-Max-Latest: 8,192 max output tokens
  [/^qwen-vl-max-latest$/, LIMITS['8k']],
+
+  // Generic vision-model: same as qwen-vl-max-latest (8K max output tokens)
+  [/^vision-model$/, LIMITS['8k']],
+
+  // Qwen3-VL-Plus: 8,192 max output tokens
+  [/^qwen3-vl-plus$/, LIMITS['8k']],
 ];

 /**
--- a/packages/core/src/subagents/subagent.test.ts
+++ b/packages/core/src/subagents/subagent.test.ts
@@ -72,6 +72,19 @@ async function createMockConfig(
  } as unknown as ToolRegistry;

  vi.spyOn(config, 'getToolRegistry').mockReturnValue(mockToolRegistry);
+
+  // Mock getContentGeneratorConfig to return a valid config
+  vi.spyOn(config, 'getContentGeneratorConfig').mockReturnValue({
+    model: DEFAULT_GEMINI_MODEL,
+    authType: AuthType.USE_GEMINI,
+  });
+
+  // Mock setModel method
+  vi.spyOn(config, 'setModel').mockResolvedValue();
+
+  // Mock getSessionId method
+  vi.spyOn(config, 'getSessionId').mockReturnValue('test-session');
+
  return { config, toolRegistry: mockToolRegistry };
 }

--- a/packages/core/src/subagents/subagent.ts
+++ b/packages/core/src/subagents/subagent.ts
@@ -826,7 +826,7 @@ export class SubAgentScope {
      );

      if (this.modelConfig.model) {
-        this.runtimeContext.setModel(this.modelConfig.model);
+        await this.runtimeContext.setModel(this.modelConfig.model);
      }

      return new GeminiChat(