feat: add cli args & env variables for switch behavoir

2025-12-21 17:27:54 +00:00 · 2025-09-23 19:14:26 +08:00
parent 85a2b8d6e0
commit e4d16adf7b
12 changed files with 555 additions and 63 deletions
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -82,6 +82,7 @@ export interface CliArgs {
  includeDirectories: string[] | undefined;
  tavilyApiKey: string | undefined;
  screenReader: boolean | undefined;
  vlmSwitchMode: string | undefined;
 }
 export async function parseArguments(settings: Settings): Promise<CliArgs> {
@@ -249,6 +250,13 @@ export async function parseArguments(settings: Settings): Promise<CliArgs> {
          description: 'Enable screen reader mode for accessibility.',
          default: false,
        })
        .option('vlm-switch-mode', {
          type: 'string',
          choices: ['once', 'session', 'persist'],
          description:
            'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). Overrides settings files.',
          default: process.env['VLM_SWITCH_MODE'],
        })
        .check((argv) => {
          if (argv.prompt && argv['promptInteractive']) {
            throw new Error(
@@ -524,6 +532,9 @@ export async function loadCliConfig(
    argv.screenReader !== undefined
      ? argv.screenReader
      : (settings.ui?.accessibility?.screenReader ?? false);
  const vlmSwitchMode =
    argv.vlmSwitchMode || settings.experimental?.vlmSwitchMode;
  return new Config({
    sessionId,
    embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL,
@@ -630,6 +641,7 @@ export async function loadCliConfig(
    skipNextSpeakerCheck: settings.model?.skipNextSpeakerCheck,
    enablePromptCompletion: settings.general?.enablePromptCompletion ?? false,
    skipLoopDetection: settings.skipLoopDetection ?? false,
    vlmSwitchMode,
  });
 }
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -751,6 +751,16 @@ export const SETTINGS_SCHEMA = {
          'Enable vision model support and auto-switching functionality. When disabled, vision models like qwen-vl-max-latest will be hidden and auto-switching will not occur.',
        showInDialog: true,
      },
      vlmSwitchMode: {
        type: 'string',
        label: 'VLM Switch Mode',
        category: 'Experimental',
        requiresRestart: false,
        default: undefined as string | undefined,
        description:
          'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). If not set, user will be prompted each time. This is a temporary experimental feature.',
        showInDialog: false,
      },
    },
  },
--- a/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx
+++ b/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx
@@ -46,8 +46,8 @@ describe('ModelSwitchDialog', () => {
        value: VisionSwitchOutcome.SwitchSessionToVL,
      },
      {
-        label: 'Do not switch, show guidance',
+        label: 'Continue with current model',
-        value: VisionSwitchOutcome.DisallowWithGuidance,
+        value: VisionSwitchOutcome.ContinueWithCurrentModel,
      },
    ];
@@ -81,18 +81,18 @@ describe('ModelSwitchDialog', () => {
    );
  });
-  it('should call onSelect with DisallowWithGuidance when third option is selected', () => {
+  it('should call onSelect with ContinueWithCurrentModel when third option is selected', () => {
    render(<ModelSwitchDialog onSelect={mockOnSelect} />);
    const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
-    onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
+    onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);
    expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
    );
  });
-  it('should setup escape key handler to call onSelect with DisallowWithGuidance', () => {
+  it('should setup escape key handler to call onSelect with ContinueWithCurrentModel', () => {
    render(<ModelSwitchDialog onSelect={mockOnSelect} />);
    expect(mockUseKeypress).toHaveBeenCalledWith(expect.any(Function), {
@@ -104,7 +104,7 @@ describe('ModelSwitchDialog', () => {
    keypressHandler({ name: 'escape' });
    expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
    );
  });
@@ -126,13 +126,9 @@ describe('ModelSwitchDialog', () => {
  describe('VisionSwitchOutcome enum', () => {
    it('should have correct enum values', () => {
-      expect(VisionSwitchOutcome.SwitchOnce).toBe('switch_once');
+      expect(VisionSwitchOutcome.SwitchOnce).toBe('once');
-      expect(VisionSwitchOutcome.SwitchSessionToVL).toBe(
+      expect(VisionSwitchOutcome.SwitchSessionToVL).toBe('session');
-        'switch_session_to_vl',
+      expect(VisionSwitchOutcome.ContinueWithCurrentModel).toBe('persist');
      );
      expect(VisionSwitchOutcome.DisallowWithGuidance).toBe(
        'disallow_with_guidance',
      );
    });
  });
@@ -144,7 +140,7 @@ describe('ModelSwitchDialog', () => {
    // Call multiple times
    onSelectCallback(VisionSwitchOutcome.SwitchOnce);
    onSelectCallback(VisionSwitchOutcome.SwitchSessionToVL);
-    onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
+    onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel);
    expect(mockOnSelect).toHaveBeenCalledTimes(3);
    expect(mockOnSelect).toHaveBeenNthCalledWith(
@@ -157,7 +153,7 @@ describe('ModelSwitchDialog', () => {
    );
    expect(mockOnSelect).toHaveBeenNthCalledWith(
      3,
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
    );
  });
@@ -179,7 +175,7 @@ describe('ModelSwitchDialog', () => {
    expect(mockOnSelect).toHaveBeenCalledTimes(2);
    expect(mockOnSelect).toHaveBeenCalledWith(
-      VisionSwitchOutcome.DisallowWithGuidance,
+      VisionSwitchOutcome.ContinueWithCurrentModel,
    );
  });
 });
--- a/packages/cli/src/ui/components/ModelSwitchDialog.tsx
+++ b/packages/cli/src/ui/components/ModelSwitchDialog.tsx
@@ -14,9 +14,9 @@ import {
 import { useKeypress } from '../hooks/useKeypress.js';
 export enum VisionSwitchOutcome {
-  SwitchOnce = 'switch_once',
+  SwitchOnce = 'once',
-  SwitchSessionToVL = 'switch_session_to_vl',
+  SwitchSessionToVL = 'session',
-  DisallowWithGuidance = 'disallow_with_guidance',
+  ContinueWithCurrentModel = 'persist',
 }
 export interface ModelSwitchDialogProps {
@@ -29,7 +29,7 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
  useKeypress(
    (key) => {
      if (key.name === 'escape') {
-        onSelect(VisionSwitchOutcome.DisallowWithGuidance);
+        onSelect(VisionSwitchOutcome.ContinueWithCurrentModel);
      }
    },
    { isActive: true },
@@ -45,8 +45,8 @@ export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
      value: VisionSwitchOutcome.SwitchSessionToVL,
    },
    {
-      label: 'Do not switch, show guidance',
+      label: 'Continue with current model',
-      value: VisionSwitchOutcome.DisallowWithGuidance,
+      value: VisionSwitchOutcome.ContinueWithCurrentModel,
    },
  ];
--- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
+++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts
@@ -175,11 +175,11 @@ describe('useVisionAutoSwitch helpers', () => {
      expect(result).toEqual({ persistSessionModel: vl });
    });
-    it('maps DisallowWithGuidance to showGuidance', () => {
+    it('maps ContinueWithCurrentModel to empty result', () => {
      const result = processVisionSwitchOutcome(
-        VisionSwitchOutcome.DisallowWithGuidance,
+        VisionSwitchOutcome.ContinueWithCurrentModel,
      );
-      expect(result).toEqual({ showGuidance: true });
+      expect(result).toEqual({});
    });
  });
@@ -205,6 +205,7 @@ describe('useVisionAutoSwitch hook', () => {
    authType: AuthType,
    initialModel: string,
    approvalMode: ApprovalMode = ApprovalMode.DEFAULT,
    vlmSwitchMode?: string,
  ) => {
    let currentModel = initialModel;
    const mockConfig: Partial<Config> = {
@@ -213,6 +214,7 @@ describe('useVisionAutoSwitch hook', () => {
        currentModel = m;
      }),
      getApprovalMode: vi.fn(() => approvalMode),
      getVlmSwitchMode: vi.fn(() => vlmSwitchMode),
      getContentGeneratorConfig: vi.fn(() => ({
        authType,
        model: currentModel,
@@ -281,11 +283,9 @@ describe('useVisionAutoSwitch hook', () => {
    expect(onVisionSwitchRequired).not.toHaveBeenCalled();
  });
-  it('shows guidance and blocks when dialog returns showGuidance', async () => {
+  it('continues with current model when dialog returns empty result', async () => {
    const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus');
-    const onVisionSwitchRequired = vi
+    const onVisionSwitchRequired = vi.fn().mockResolvedValue({}); // Empty result for ContinueWithCurrentModel
      .fn()
      .mockResolvedValue({ showGuidance: true });
    const { result } = renderHook(() =>
      useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired),
    );
@@ -300,11 +300,12 @@ describe('useVisionAutoSwitch hook', () => {
      res = await result.current.handleVisionSwitch(parts, userTs, false);
    });
-    expect(addItem).toHaveBeenCalledWith(
+    // Should not add any guidance message
    expect(addItem).not.toHaveBeenCalledWith(
      { type: MessageType.INFO, text: getVisionSwitchGuidanceMessage() },
      userTs,
    );
-    expect(res).toEqual({ shouldProceed: false });
+    expect(res).toEqual({ shouldProceed: true });
    expect(config.setModel).not.toHaveBeenCalled();
  });
@@ -328,13 +329,19 @@ describe('useVisionAutoSwitch hook', () => {
    });
    expect(res).toEqual({ shouldProceed: true, originalModel: initialModel });
-    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
+    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
      reason: 'vision_auto_switch',
      context: 'User-prompted vision switch (one-time override)',
    });
    // Now restore
    act(() => {
      result.current.restoreOriginalModel();
    });
-    expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
+    expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
      reason: 'vision_auto_switch',
      context: 'Restoring original model after vision switch',
    });
  });
  it('persists session model when dialog requests persistence', async () => {
@@ -356,7 +363,10 @@ describe('useVisionAutoSwitch hook', () => {
    });
    expect(res).toEqual({ shouldProceed: true });
-    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest');
+    expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
      reason: 'vision_auto_switch',
      context: 'User-prompted vision switch (session persistent)',
    });
    // Restore should be a no-op since no one-time override was used
    act(() => {
@@ -460,7 +470,10 @@ describe('useVisionAutoSwitch hook', () => {
        shouldProceed: true,
        originalModel: initialModel,
      });
-      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel());
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
        reason: 'vision_auto_switch',
        context: 'YOLO mode auto-switch for image content',
      });
    });
    it('does not switch in YOLO mode when no images are present', async () => {
@@ -548,7 +561,10 @@ describe('useVisionAutoSwitch hook', () => {
      });
      // Verify model was switched
-      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel());
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
        reason: 'vision_auto_switch',
        context: 'YOLO mode auto-switch for image content',
      });
      // Now restore the original model
      act(() => {
@@ -556,7 +572,10 @@ describe('useVisionAutoSwitch hook', () => {
      });
      // Verify model was restored
-      expect(config.setModel).toHaveBeenLastCalledWith(initialModel);
+      expect(config.setModel).toHaveBeenLastCalledWith(initialModel, {
        reason: 'vision_auto_switch',
        context: 'Restoring original model after vision switch',
      });
    });
    it('does not switch in YOLO mode when authType is not QWEN_OAUTH', async () => {
@@ -652,7 +671,184 @@ describe('useVisionAutoSwitch hook', () => {
        shouldProceed: true,
        originalModel: initialModel,
      });
-      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel());
+      expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), {
        reason: 'vision_auto_switch',
        context: 'YOLO mode auto-switch for image content',
      });
      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
    });
  });
  describe('VLM switch mode default behavior', () => {
    it('should automatically switch once when vlmSwitchMode is "once"', async () => {
      const config = createMockConfig(
        AuthType.QWEN_OAUTH,
        'qwen3-coder-plus',
        ApprovalMode.DEFAULT,
        'once',
      );
      const onVisionSwitchRequired = vi.fn(); // Should not be called
      const { result } = renderHook(() =>
        useVisionAutoSwitch(
          config,
          addItem as any,
          true,
          onVisionSwitchRequired,
        ),
      );
      const parts: PartListUnion = [
        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
      ];
      const switchResult = await result.current.handleVisionSwitch(
        parts,
        Date.now(),
        false,
      );
      expect(switchResult.shouldProceed).toBe(true);
      expect(switchResult.originalModel).toBe('qwen3-coder-plus');
      expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
        reason: 'vision_auto_switch',
        context: 'Default VLM switch mode: once (one-time override)',
      });
      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
    });
    it('should switch session when vlmSwitchMode is "session"', async () => {
      const config = createMockConfig(
        AuthType.QWEN_OAUTH,
        'qwen3-coder-plus',
        ApprovalMode.DEFAULT,
        'session',
      );
      const onVisionSwitchRequired = vi.fn(); // Should not be called
      const { result } = renderHook(() =>
        useVisionAutoSwitch(
          config,
          addItem as any,
          true,
          onVisionSwitchRequired,
        ),
      );
      const parts: PartListUnion = [
        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
      ];
      const switchResult = await result.current.handleVisionSwitch(
        parts,
        Date.now(),
        false,
      );
      expect(switchResult.shouldProceed).toBe(true);
      expect(switchResult.originalModel).toBeUndefined(); // No original model for session switch
      expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest', {
        reason: 'vision_auto_switch',
        context: 'Default VLM switch mode: session (session persistent)',
      });
      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
    });
    it('should continue with current model when vlmSwitchMode is "persist"', async () => {
      const config = createMockConfig(
        AuthType.QWEN_OAUTH,
        'qwen3-coder-plus',
        ApprovalMode.DEFAULT,
        'persist',
      );
      const onVisionSwitchRequired = vi.fn(); // Should not be called
      const { result } = renderHook(() =>
        useVisionAutoSwitch(
          config,
          addItem as any,
          true,
          onVisionSwitchRequired,
        ),
      );
      const parts: PartListUnion = [
        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
      ];
      const switchResult = await result.current.handleVisionSwitch(
        parts,
        Date.now(),
        false,
      );
      expect(switchResult.shouldProceed).toBe(true);
      expect(switchResult.originalModel).toBeUndefined();
      expect(config.setModel).not.toHaveBeenCalled();
      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
    });
    it('should fall back to user prompt when vlmSwitchMode is not set', async () => {
      const config = createMockConfig(
        AuthType.QWEN_OAUTH,
        'qwen3-coder-plus',
        ApprovalMode.DEFAULT,
        undefined, // No default mode
      );
      const onVisionSwitchRequired = vi
        .fn()
        .mockResolvedValue({ modelOverride: 'qwen-vl-max-latest' });
      const { result } = renderHook(() =>
        useVisionAutoSwitch(
          config,
          addItem as any,
          true,
          onVisionSwitchRequired,
        ),
      );
      const parts: PartListUnion = [
        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
      ];
      const switchResult = await result.current.handleVisionSwitch(
        parts,
        Date.now(),
        false,
      );
      expect(switchResult.shouldProceed).toBe(true);
      expect(onVisionSwitchRequired).toHaveBeenCalledWith(parts);
    });
    it('should fall back to persist behavior when vlmSwitchMode has invalid value', async () => {
      const config = createMockConfig(
        AuthType.QWEN_OAUTH,
        'qwen3-coder-plus',
        ApprovalMode.DEFAULT,
        'invalid-value',
      );
      const onVisionSwitchRequired = vi.fn(); // Should not be called
      const { result } = renderHook(() =>
        useVisionAutoSwitch(
          config,
          addItem as any,
          true,
          onVisionSwitchRequired,
        ),
      );
      const parts: PartListUnion = [
        { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } },
      ];
      const switchResult = await result.current.handleVisionSwitch(
        parts,
        Date.now(),
        false,
      );
      expect(switchResult.shouldProceed).toBe(true);
      expect(switchResult.originalModel).toBeUndefined();
      // For invalid values, it should continue with current model (persist behavior)
      expect(config.setModel).not.toHaveBeenCalled();
      expect(onVisionSwitchRequired).not.toHaveBeenCalled();
    });
  });
--- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
+++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts
@@ -166,11 +166,11 @@ export function processVisionSwitchOutcome(
    case VisionSwitchOutcome.SwitchSessionToVL:
      return { persistSessionModel: vlModelId };
-    case VisionSwitchOutcome.DisallowWithGuidance:
+    case VisionSwitchOutcome.ContinueWithCurrentModel:
-      return { showGuidance: true };
+      return {}; // Continue with current model, no changes needed
    default:
-      return { showGuidance: true };
+      return {}; // Default to continuing with current model
  }
 }
@@ -256,42 +256,87 @@ export function useVisionAutoSwitch(
      if (config.getApprovalMode() === ApprovalMode.YOLO) {
        const vlModelId = getDefaultVisionModel();
        originalModelRef.current = config.getModel();
-        config.setModel(vlModelId);
+        config.setModel(vlModelId, {
          reason: 'vision_auto_switch',
          context: 'YOLO mode auto-switch for image content',
        });
        return {
          shouldProceed: true,
          originalModel: originalModelRef.current,
        };
      }
-      try {
+      // Check if there's a default VLM switch mode configured
-        const visionSwitchResult = await onVisionSwitchRequired(query);
+      const defaultVlmSwitchMode = config.getVlmSwitchMode();
-
+      if (defaultVlmSwitchMode) {
-        if (visionSwitchResult.showGuidance) {
+        // Convert string value to VisionSwitchOutcome enum
-          // Show guidance and don't proceed with the request
+        let outcome: VisionSwitchOutcome;
-          addItem(
+        switch (defaultVlmSwitchMode) {
-            {
+          case 'once':
-              type: MessageType.INFO,
+            outcome = VisionSwitchOutcome.SwitchOnce;
-              text: getVisionSwitchGuidanceMessage(),
+            break;
-            },
+          case 'session':
-            userMessageTimestamp,
+            outcome = VisionSwitchOutcome.SwitchSessionToVL;
-          );
+            break;
-          return { shouldProceed: false };
+          case 'persist':
            outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
            break;
          default:
            // Invalid value, fall back to prompting user
            outcome = VisionSwitchOutcome.ContinueWithCurrentModel;
        }
        // Process the default outcome
        const visionSwitchResult = processVisionSwitchOutcome(outcome);
        if (visionSwitchResult.modelOverride) {
          // One-time model override
          originalModelRef.current = config.getModel();
-          config.setModel(visionSwitchResult.modelOverride);
+          config.setModel(visionSwitchResult.modelOverride, {
            reason: 'vision_auto_switch',
            context: `Default VLM switch mode: ${defaultVlmSwitchMode} (one-time override)`,
          });
          return {
            shouldProceed: true,
            originalModel: originalModelRef.current,
          };
        } else if (visionSwitchResult.persistSessionModel) {
          // Persistent session model change
-          config.setModel(visionSwitchResult.persistSessionModel);
+          config.setModel(visionSwitchResult.persistSessionModel, {
            reason: 'vision_auto_switch',
            context: `Default VLM switch mode: ${defaultVlmSwitchMode} (session persistent)`,
          });
          return { shouldProceed: true };
        }
        // For ContinueWithCurrentModel or any other case, proceed with current model
        return { shouldProceed: true };
      }
      try {
        const visionSwitchResult = await onVisionSwitchRequired(query);
        if (visionSwitchResult.modelOverride) {
          // One-time model override
          originalModelRef.current = config.getModel();
          config.setModel(visionSwitchResult.modelOverride, {
            reason: 'vision_auto_switch',
            context: 'User-prompted vision switch (one-time override)',
          });
          return {
            shouldProceed: true,
            originalModel: originalModelRef.current,
          };
        } else if (visionSwitchResult.persistSessionModel) {
          // Persistent session model change
          config.setModel(visionSwitchResult.persistSessionModel, {
            reason: 'vision_auto_switch',
            context: 'User-prompted vision switch (session persistent)',
          });
          return { shouldProceed: true };
        }
        // For ContinueWithCurrentModel or any other case, proceed with current model
        return { shouldProceed: true };
      } catch (_error) {
        // If vision switch dialog was cancelled or errored, don't proceed
@@ -303,7 +348,10 @@ export function useVisionAutoSwitch(
  const restoreOriginalModel = useCallback(() => {
    if (originalModelRef.current) {
-      config.setModel(originalModelRef.current);
+      config.setModel(originalModelRef.current, {
        reason: 'vision_auto_switch',
        context: 'Restoring original model after vision switch',
      });
      originalModelRef.current = null;
    }
  }, [config]);
--- a/packages/cli/src/ui/models/availableModels.ts
+++ b/packages/cli/src/ui/models/availableModels.ts
@@ -10,9 +10,12 @@ export type AvailableModel = {
  isVision?: boolean;
 };
 export const MAINLINE_VLM = 'qwen-vl-max-latest';
 export const MAINLINE_CODER = 'qwen3-coder-plus';
 export const AVAILABLE_MODELS_QWEN: AvailableModel[] = [
-  { id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' },
+  { id: MAINLINE_CODER, label: MAINLINE_CODER },
-  { id: 'qwen-vl-max-latest', label: 'qwen-vl-max', isVision: true },
+  { id: MAINLINE_VLM, label: MAINLINE_VLM, isVision: true },
 ];
 /**
@@ -42,7 +45,7 @@ export function getOpenAIAvailableModelFromEnv(): AvailableModel | null {
 * until our coding model supports multimodal.
 */
 export function getDefaultVisionModel(): string {
-  return 'qwen-vl-max-latest';
+  return MAINLINE_VLM;
 }
 export function isVisionModel(modelId: string): boolean {
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@@ -737,4 +737,85 @@ describe('setApprovalMode with folder trust', () => {
    expect(() => config.setApprovalMode(ApprovalMode.AUTO_EDIT)).not.toThrow();
    expect(() => config.setApprovalMode(ApprovalMode.DEFAULT)).not.toThrow();
  });
  describe('Model Switch Logging', () => {
    it('should log model switch when setModel is called with different model', async () => {
      const config = new Config({
        sessionId: 'test-model-switch',
        targetDir: '.',
        debugMode: false,
        model: 'qwen3-coder-plus',
        cwd: '.',
      });
      // Initialize the config to set up content generator
      await config.initialize();
      // Mock the logger's logModelSwitch method
      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
      // Change the model
      config.setModel('qwen-vl-max-latest', {
        reason: 'vision_auto_switch',
        context: 'Test model switch',
      });
      // Verify that logModelSwitch was called with correct parameters
      expect(logModelSwitchSpy).toHaveBeenCalledWith({
        fromModel: 'qwen3-coder-plus',
        toModel: 'qwen-vl-max-latest',
        reason: 'vision_auto_switch',
        context: 'Test model switch',
      });
    });
    it('should not log when setModel is called with same model', async () => {
      const config = new Config({
        sessionId: 'test-same-model',
        targetDir: '.',
        debugMode: false,
        model: 'qwen3-coder-plus',
        cwd: '.',
      });
      // Initialize the config to set up content generator
      await config.initialize();
      // Mock the logger's logModelSwitch method
      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
      // Set the same model
      config.setModel('qwen3-coder-plus');
      // Verify that logModelSwitch was not called
      expect(logModelSwitchSpy).not.toHaveBeenCalled();
    });
    it('should use default reason when no options provided', async () => {
      const config = new Config({
        sessionId: 'test-default-reason',
        targetDir: '.',
        debugMode: false,
        model: 'qwen3-coder-plus',
        cwd: '.',
      });
      // Initialize the config to set up content generator
      await config.initialize();
      // Mock the logger's logModelSwitch method
      const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch');
      // Change the model without options
      config.setModel('qwen-vl-max-latest');
      // Verify that logModelSwitch was called with default reason
      expect(logModelSwitchSpy).toHaveBeenCalledWith({
        fromModel: 'qwen3-coder-plus',
        toModel: 'qwen-vl-max-latest',
        reason: 'manual',
        context: undefined,
      });
    });
  });
 });
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -56,6 +56,7 @@ import {
  DEFAULT_GEMINI_FLASH_MODEL,
 } from './models.js';
 import { Storage } from './storage.js';
 import { Logger, type ModelSwitchEvent } from '../core/logger.js';
 // Re-export OAuth config type
 export type { AnyToolInvocation, MCPOAuthConfig };
@@ -239,6 +240,7 @@ export interface ConfigParameters {
  extensionManagement?: boolean;
  enablePromptCompletion?: boolean;
  skipLoopDetection?: boolean;
  vlmSwitchMode?: string;
 }
 export class Config {
@@ -330,9 +332,11 @@ export class Config {
  private readonly extensionManagement: boolean;
  private readonly enablePromptCompletion: boolean = false;
  private readonly skipLoopDetection: boolean;
  private readonly vlmSwitchMode: string | undefined;
  private initialized: boolean = false;
  readonly storage: Storage;
  private readonly fileExclusions: FileExclusions;
  private logger: Logger | null = null;
  constructor(params: ConfigParameters) {
    this.sessionId = params.sessionId;
@@ -424,8 +428,15 @@ export class Config {
    this.extensionManagement = params.extensionManagement ?? false;
    this.storage = new Storage(this.targetDir);
    this.enablePromptCompletion = params.enablePromptCompletion ?? false;
    this.vlmSwitchMode = params.vlmSwitchMode;
    this.fileExclusions = new FileExclusions(this);
    // Initialize logger asynchronously
    this.logger = new Logger(this.sessionId, this.storage);
    this.logger.initialize().catch((error) => {
      console.debug('Failed to initialize logger:', error);
    });
    if (params.contextFileName) {
      setGeminiMdFilename(params.contextFileName);
    }
@@ -517,11 +528,34 @@ export class Config {
    return this.contentGeneratorConfig?.model || this.model;
  }
-  setModel(newModel: string): void {
+  setModel(
    newModel: string,
    options?: {
      reason?: ModelSwitchEvent['reason'];
      context?: string;
    },
  ): void {
    const oldModel = this.getModel();
    if (this.contentGeneratorConfig) {
      this.contentGeneratorConfig.model = newModel;
    }
    // Log the model switch if the model actually changed
    if (oldModel !== newModel && this.logger) {
      const switchEvent: ModelSwitchEvent = {
        fromModel: oldModel,
        toModel: newModel,
        reason: options?.reason || 'manual',
        context: options?.context,
      };
      // Log asynchronously to avoid blocking
      this.logger.logModelSwitch(switchEvent).catch((error) => {
        console.debug('Failed to log model switch:', error);
      });
    }
    // Reinitialize chat with updated configuration while preserving history
    const geminiClient = this.getGeminiClient();
    if (geminiClient && geminiClient.isInitialized()) {
@@ -938,6 +972,10 @@ export class Config {
    return this.skipLoopDetection;
  }
  getVlmSwitchMode(): string | undefined {
    return this.vlmSwitchMode;
  }
  async getGitService(): Promise<GitService> {
    if (!this.gitService) {
      this.gitService = new GitService(this.targetDir, this.storage);
--- a/packages/core/src/core/logger.test.ts
+++ b/packages/core/src/core/logger.test.ts
@@ -755,4 +755,84 @@ describe('Logger', () => {
      expect(logger['messageId']).toBe(0);
    });
  });
  describe('Model Switch Logging', () => {
    it('should log model switch events correctly', async () => {
      const testSessionId = 'test-session-model-switch';
      const logger = new Logger(testSessionId, new Storage(process.cwd()));
      await logger.initialize();
      const modelSwitchEvent = {
        fromModel: 'qwen3-coder-plus',
        toModel: 'qwen-vl-max-latest',
        reason: 'vision_auto_switch' as const,
        context: 'YOLO mode auto-switch for image content',
      };
      await logger.logModelSwitch(modelSwitchEvent);
      // Read the log file to verify the entry was written
      const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8');
      const logs: LogEntry[] = JSON.parse(logContent);
      const modelSwitchLog = logs.find(
        (log) =>
          log.sessionId === testSessionId &&
          log.type === MessageSenderType.MODEL_SWITCH,
      );
      expect(modelSwitchLog).toBeDefined();
      expect(modelSwitchLog!.type).toBe(MessageSenderType.MODEL_SWITCH);
      const loggedEvent = JSON.parse(modelSwitchLog!.message);
      expect(loggedEvent.fromModel).toBe('qwen3-coder-plus');
      expect(loggedEvent.toModel).toBe('qwen-vl-max-latest');
      expect(loggedEvent.reason).toBe('vision_auto_switch');
      expect(loggedEvent.context).toBe(
        'YOLO mode auto-switch for image content',
      );
    });
    it('should handle multiple model switch events', async () => {
      const testSessionId = 'test-session-multiple-switches';
      const logger = new Logger(testSessionId, new Storage(process.cwd()));
      await logger.initialize();
      // Log first switch
      await logger.logModelSwitch({
        fromModel: 'qwen3-coder-plus',
        toModel: 'qwen-vl-max-latest',
        reason: 'vision_auto_switch',
        context: 'Auto-switch for image',
      });
      // Log second switch (restore)
      await logger.logModelSwitch({
        fromModel: 'qwen-vl-max-latest',
        toModel: 'qwen3-coder-plus',
        reason: 'vision_auto_switch',
        context: 'Restoring original model',
      });
      // Read the log file to verify both entries were written
      const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8');
      const logs: LogEntry[] = JSON.parse(logContent);
      const modelSwitchLogs = logs.filter(
        (log) =>
          log.sessionId === testSessionId &&
          log.type === MessageSenderType.MODEL_SWITCH,
      );
      expect(modelSwitchLogs).toHaveLength(2);
      const firstSwitch = JSON.parse(modelSwitchLogs[0].message);
      expect(firstSwitch.fromModel).toBe('qwen3-coder-plus');
      expect(firstSwitch.toModel).toBe('qwen-vl-max-latest');
      const secondSwitch = JSON.parse(modelSwitchLogs[1].message);
      expect(secondSwitch.fromModel).toBe('qwen-vl-max-latest');
      expect(secondSwitch.toModel).toBe('qwen3-coder-plus');
    });
  });
 });
--- a/packages/core/src/core/logger.ts
+++ b/packages/core/src/core/logger.ts
@@ -13,6 +13,7 @@ const LOG_FILE_NAME = 'logs.json';
 export enum MessageSenderType {
  USER = 'user',
  MODEL_SWITCH = 'model_switch',
 }
 export interface LogEntry {
@@ -23,6 +24,13 @@ export interface LogEntry {
  message: string;
 }
 export interface ModelSwitchEvent {
  fromModel: string;
  toModel: string;
  reason: 'vision_auto_switch' | 'manual' | 'fallback' | 'other';
  context?: string;
 }
 // This regex matches any character that is NOT a letter (a-z, A-Z),
 // a number (0-9), a hyphen (-), an underscore (_), or a dot (.).
@@ -270,6 +278,17 @@ export class Logger {
    }
  }
  async logModelSwitch(event: ModelSwitchEvent): Promise<void> {
    const message = JSON.stringify({
      fromModel: event.fromModel,
      toModel: event.toModel,
      reason: event.reason,
      context: event.context,
    });
    await this.logMessage(MessageSenderType.MODEL_SWITCH, message);
  }
  private _checkpointPath(tag: string): string {
    if (!tag.length) {
      throw new Error('No checkpoint tag specified.');
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -111,6 +111,9 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
  // Commercial Qwen3-Coder-Flash: 1M token context
  [/^qwen3-coder-flash(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-flash" and date variants
  // Commercial Qwen3-Max-Preview: 256K token context
  [/^qwen3-max-preview(-.*)?$/, LIMITS['256k']], // catches "qwen3-max-preview" and date variants
  // Open-source Qwen3-Coder variants: 256K native
  [/^qwen3-coder-.*$/, LIMITS['256k']],
  // Open-source Qwen3 2507 variants: 256K native
@@ -166,8 +169,14 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
  // Qwen3-Coder-Plus: 65,536 max output tokens
  [/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']],
  // Qwen3-Max-Preview: 65,536 max output tokens
  [/^qwen3-max-preview(-.*)?$/, LIMITS['64k']],
  // Qwen-VL-Max-Latest: 8,192 max output tokens
  [/^qwen-vl-max-latest$/, LIMITS['8k']],
  // Qwen3-VL-Plus: 8,192 max output tokens
  [/^qwen3-vl-plus$/, LIMITS['8k']],
 ];
 /**