From 48d8587bf9d694a1026496fba24b834fe9f85c5c Mon Sep 17 00:00:00 2001 From: Mingholy Date: Wed, 24 Sep 2025 10:21:09 +0800 Subject: [PATCH] feat: add yolo mode support to auto vision model switch (#652) * feat: add yolo mode support to auto vision model switch * feat: add cli args & env variables for switch behavoir * fix: use dedicated model names and settings * docs: add vision model instructions * fix: failed test case * fix: setModel failure --- README.md | 53 ++ packages/cli/src/config/config.test.ts | 2 +- packages/cli/src/config/config.ts | 12 + packages/cli/src/config/settings.test.ts | 80 ++- packages/cli/src/config/settings.ts | 18 + packages/cli/src/config/settingsSchema.ts | 12 +- packages/cli/src/ui/App.tsx | 41 +- .../ui/components/ModelSwitchDialog.test.tsx | 30 +- .../src/ui/components/ModelSwitchDialog.tsx | 12 +- .../cli/src/ui/hooks/useGeminiStream.test.tsx | 44 +- packages/cli/src/ui/hooks/useGeminiStream.ts | 14 +- .../src/ui/hooks/useVisionAutoSwitch.test.ts | 529 +++++++++++++++++- .../cli/src/ui/hooks/useVisionAutoSwitch.ts | 103 +++- packages/cli/src/ui/models/availableModels.ts | 9 +- packages/core/src/config/config.test.ts | 81 +++ packages/core/src/config/config.ts | 49 +- .../core/src/config/flashFallback.test.ts | 12 +- packages/core/src/config/models.ts | 7 +- packages/core/src/core/client.ts | 2 +- packages/core/src/core/geminiChat.ts | 2 +- packages/core/src/core/logger.test.ts | 80 +++ packages/core/src/core/logger.ts | 19 + packages/core/src/core/prompts.ts | 8 + packages/core/src/core/tokenLimits.ts | 21 + packages/core/src/subagents/subagent.test.ts | 13 + packages/core/src/subagents/subagent.ts | 2 +- 26 files changed, 1133 insertions(+), 122 deletions(-) diff --git a/README.md b/README.md index 40419342..4c4396ec 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,7 @@ For detailed setup instructions, see [Authorization](#authorization). - **Code Understanding & Editing** - Query and edit large codebases beyond traditional context window limits - **Workflow Automation** - Automate operational tasks like handling pull requests and complex rebases - **Enhanced Parser** - Adapted parser specifically optimized for Qwen-Coder models +- **Vision Model Support** - Automatically detect images in your input and seamlessly switch to vision-capable models for multimodal analysis ## Installation @@ -121,6 +122,58 @@ Create or edit `.qwen/settings.json` in your home directory: > 📝 **Note**: Session token limit applies to a single conversation, not cumulative API calls. +### Vision Model Configuration + +Qwen Code includes intelligent vision model auto-switching that detects images in your input and can automatically switch to vision-capable models for multimodal analysis. **This feature is enabled by default** - when you include images in your queries, you'll see a dialog asking how you'd like to handle the vision model switch. + +#### Skip the Switch Dialog (Optional) + +If you don't want to see the interactive dialog each time, configure the default behavior in your `.qwen/settings.json`: + +```json +{ + "experimental": { + "vlmSwitchMode": "once" + } +} +``` + +**Available modes:** + +- **`"once"`** - Switch to vision model for this query only, then revert +- **`"session"`** - Switch to vision model for the entire session +- **`"persist"`** - Continue with current model (no switching) +- **Not set** - Show interactive dialog each time (default) + +#### Command Line Override + +You can also set the behavior via command line: + +```bash +# Switch once per query +qwen --vlm-switch-mode once + +# Switch for entire session +qwen --vlm-switch-mode session + +# Never switch automatically +qwen --vlm-switch-mode persist +``` + +#### Disable Vision Models (Optional) + +To completely disable vision model support, add to your `.qwen/settings.json`: + +```json +{ + "experimental": { + "visionModelPreview": false + } +} +``` + +> 💡 **Tip**: In YOLO mode (`--yolo`), vision switching happens automatically without prompts when images are detected. + ### Authorization Choose your preferred authentication method based on your needs: diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index a4296943..8acbe717 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -1514,7 +1514,7 @@ describe('loadCliConfig model selection', () => { argv, ); - expect(config.getModel()).toBe('qwen3-coder-plus'); + expect(config.getModel()).toBe('coder-model'); }); it('always prefers model from argvs', async () => { diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index eaa354d6..e1ee021f 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -82,6 +82,7 @@ export interface CliArgs { includeDirectories: string[] | undefined; tavilyApiKey: string | undefined; screenReader: boolean | undefined; + vlmSwitchMode: string | undefined; } export async function parseArguments(settings: Settings): Promise { @@ -249,6 +250,13 @@ export async function parseArguments(settings: Settings): Promise { description: 'Enable screen reader mode for accessibility.', default: false, }) + .option('vlm-switch-mode', { + type: 'string', + choices: ['once', 'session', 'persist'], + description: + 'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). Overrides settings files.', + default: process.env['VLM_SWITCH_MODE'], + }) .check((argv) => { if (argv.prompt && argv['promptInteractive']) { throw new Error( @@ -524,6 +532,9 @@ export async function loadCliConfig( argv.screenReader !== undefined ? argv.screenReader : (settings.ui?.accessibility?.screenReader ?? false); + + const vlmSwitchMode = + argv.vlmSwitchMode || settings.experimental?.vlmSwitchMode; return new Config({ sessionId, embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL, @@ -630,6 +641,7 @@ export async function loadCliConfig( skipNextSpeakerCheck: settings.model?.skipNextSpeakerCheck, enablePromptCompletion: settings.general?.enablePromptCompletion ?? false, skipLoopDetection: settings.skipLoopDetection ?? false, + vlmSwitchMode, }); } diff --git a/packages/cli/src/config/settings.test.ts b/packages/cli/src/config/settings.test.ts index 7d0e737d..89720114 100644 --- a/packages/cli/src/config/settings.test.ts +++ b/packages/cli/src/config/settings.test.ts @@ -69,7 +69,11 @@ const MOCK_WORKSPACE_SETTINGS_PATH = pathActual.join( ); // A more flexible type for test data that allows arbitrary properties. -type TestSettings = Settings & { [key: string]: unknown }; +type TestSettings = Settings & { + [key: string]: unknown; + nested?: { [key: string]: unknown }; + nestedObj?: { [key: string]: unknown }; +}; vi.mock('fs', async (importOriginal) => { // Get all the functions from the real 'fs' module @@ -137,6 +141,9 @@ describe('Settings Loading and Merging', () => { advanced: { excludedEnvVars: [], }, + experimental: {}, + contentGenerator: {}, + systemPromptMappings: {}, extensions: { disabled: [], workspacesWithMigrationNudge: [], @@ -197,6 +204,9 @@ describe('Settings Loading and Merging', () => { advanced: { excludedEnvVars: [], }, + experimental: {}, + contentGenerator: {}, + systemPromptMappings: {}, extensions: { disabled: [], workspacesWithMigrationNudge: [], @@ -260,6 +270,9 @@ describe('Settings Loading and Merging', () => { advanced: { excludedEnvVars: [], }, + experimental: {}, + contentGenerator: {}, + systemPromptMappings: {}, extensions: { disabled: [], workspacesWithMigrationNudge: [], @@ -320,6 +333,9 @@ describe('Settings Loading and Merging', () => { advanced: { excludedEnvVars: [], }, + experimental: {}, + contentGenerator: {}, + systemPromptMappings: {}, extensions: { disabled: [], workspacesWithMigrationNudge: [], @@ -385,6 +401,9 @@ describe('Settings Loading and Merging', () => { advanced: { excludedEnvVars: [], }, + experimental: {}, + contentGenerator: {}, + systemPromptMappings: {}, extensions: { disabled: [], workspacesWithMigrationNudge: [], @@ -477,6 +496,9 @@ describe('Settings Loading and Merging', () => { advanced: { excludedEnvVars: [], }, + experimental: {}, + contentGenerator: {}, + systemPromptMappings: {}, extensions: { disabled: [], workspacesWithMigrationNudge: [], @@ -562,6 +584,9 @@ describe('Settings Loading and Merging', () => { advanced: { excludedEnvVars: [], }, + experimental: {}, + contentGenerator: {}, + systemPromptMappings: {}, extensions: { disabled: [], workspacesWithMigrationNudge: [], @@ -691,6 +716,9 @@ describe('Settings Loading and Merging', () => { '/system/dir', ], }, + experimental: {}, + contentGenerator: {}, + systemPromptMappings: {}, extensions: { disabled: [], workspacesWithMigrationNudge: [], @@ -1431,6 +1459,9 @@ describe('Settings Loading and Merging', () => { advanced: { excludedEnvVars: [], }, + experimental: {}, + contentGenerator: {}, + systemPromptMappings: {}, extensions: { disabled: [], workspacesWithMigrationNudge: [], @@ -1516,7 +1547,11 @@ describe('Settings Loading and Merging', () => { 'workspace_endpoint_from_env/api', ); expect( - (settings.workspace.settings as TestSettings)['nested']['value'], + ( + (settings.workspace.settings as TestSettings).nested as { + [key: string]: unknown; + } + )['value'], ).toBe('workspace_endpoint_from_env'); expect((settings.merged as TestSettings)['endpoint']).toBe( 'workspace_endpoint_from_env/api', @@ -1766,19 +1801,39 @@ describe('Settings Loading and Merging', () => { ).toBeUndefined(); expect( - (settings.user.settings as TestSettings)['nestedObj']['nestedNull'], + ( + (settings.user.settings as TestSettings).nestedObj as { + [key: string]: unknown; + } + )['nestedNull'], ).toBeNull(); expect( - (settings.user.settings as TestSettings)['nestedObj']['nestedBool'], + ( + (settings.user.settings as TestSettings).nestedObj as { + [key: string]: unknown; + } + )['nestedBool'], ).toBe(true); expect( - (settings.user.settings as TestSettings)['nestedObj']['nestedNum'], + ( + (settings.user.settings as TestSettings).nestedObj as { + [key: string]: unknown; + } + )['nestedNum'], ).toBe(0); expect( - (settings.user.settings as TestSettings)['nestedObj']['nestedString'], + ( + (settings.user.settings as TestSettings).nestedObj as { + [key: string]: unknown; + } + )['nestedString'], ).toBe('literal'); expect( - (settings.user.settings as TestSettings)['nestedObj']['anotherEnv'], + ( + (settings.user.settings as TestSettings).nestedObj as { + [key: string]: unknown; + } + )['anotherEnv'], ).toBe('env_string_nested_value'); delete process.env['MY_ENV_STRING']; @@ -1864,6 +1919,9 @@ describe('Settings Loading and Merging', () => { advanced: { excludedEnvVars: [], }, + experimental: {}, + contentGenerator: {}, + systemPromptMappings: {}, extensions: { disabled: [], workspacesWithMigrationNudge: [], @@ -2336,14 +2394,14 @@ describe('Settings Loading and Merging', () => { vimMode: false, }, model: { - maxSessionTurns: 0, + maxSessionTurns: -1, }, context: { includeDirectories: [], }, security: { folderTrust: { - enabled: null, + enabled: false, }, }, }; @@ -2352,9 +2410,9 @@ describe('Settings Loading and Merging', () => { expect(v1Settings).toEqual({ vimMode: false, - maxSessionTurns: 0, + maxSessionTurns: -1, includeDirectories: [], - folderTrust: null, + folderTrust: false, }); }); diff --git a/packages/cli/src/config/settings.ts b/packages/cli/src/config/settings.ts index f3c5a2d6..b22df887 100644 --- a/packages/cli/src/config/settings.ts +++ b/packages/cli/src/config/settings.ts @@ -396,6 +396,24 @@ function mergeSettings( ]), ], }, + experimental: { + ...(systemDefaults.experimental || {}), + ...(user.experimental || {}), + ...(safeWorkspaceWithoutFolderTrust.experimental || {}), + ...(system.experimental || {}), + }, + contentGenerator: { + ...(systemDefaults.contentGenerator || {}), + ...(user.contentGenerator || {}), + ...(safeWorkspaceWithoutFolderTrust.contentGenerator || {}), + ...(system.contentGenerator || {}), + }, + systemPromptMappings: { + ...(systemDefaults.systemPromptMappings || {}), + ...(user.systemPromptMappings || {}), + ...(safeWorkspaceWithoutFolderTrust.systemPromptMappings || {}), + ...(system.systemPromptMappings || {}), + }, extensions: { ...(systemDefaults.extensions || {}), ...(user.extensions || {}), diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index c7f1e94e..815b5c58 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -746,11 +746,21 @@ export const SETTINGS_SCHEMA = { label: 'Vision Model Preview', category: 'Experimental', requiresRestart: false, - default: false, + default: true, description: 'Enable vision model support and auto-switching functionality. When disabled, vision models like qwen-vl-max-latest will be hidden and auto-switching will not occur.', showInDialog: true, }, + vlmSwitchMode: { + type: 'string', + label: 'VLM Switch Mode', + category: 'Experimental', + requiresRestart: false, + default: undefined as string | undefined, + description: + 'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). If not set, user will be prompted each time. This is a temporary experimental feature.', + showInDialog: false, + }, }, }, diff --git a/packages/cli/src/ui/App.tsx b/packages/cli/src/ui/App.tsx index 85691182..26090018 100644 --- a/packages/cli/src/ui/App.tsx +++ b/packages/cli/src/ui/App.tsx @@ -566,7 +566,9 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => { } // Switch model for future use but return false to stop current retry - config.setModel(fallbackModel); + config.setModel(fallbackModel).catch((error) => { + console.error('Failed to switch to fallback model:', error); + }); config.setFallbackMode(true); logFlashFallback( config, @@ -650,17 +652,28 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => { }, []); const handleModelSelect = useCallback( - (modelId: string) => { - config.setModel(modelId); - setCurrentModel(modelId); - setIsModelSelectionDialogOpen(false); - addItem( - { - type: MessageType.INFO, - text: `Switched model to \`${modelId}\` for this session.`, - }, - Date.now(), - ); + async (modelId: string) => { + try { + await config.setModel(modelId); + setCurrentModel(modelId); + setIsModelSelectionDialogOpen(false); + addItem( + { + type: MessageType.INFO, + text: `Switched model to \`${modelId}\` for this session.`, + }, + Date.now(), + ); + } catch (error) { + console.error('Failed to switch model:', error); + addItem( + { + type: MessageType.ERROR, + text: `Failed to switch to model \`${modelId}\`. Please try again.`, + }, + Date.now(), + ); + } }, [config, setCurrentModel, addItem], ); @@ -670,7 +683,7 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => { if (!contentGeneratorConfig) return []; const visionModelPreviewEnabled = - settings.merged.experimental?.visionModelPreview ?? false; + settings.merged.experimental?.visionModelPreview ?? true; switch (contentGeneratorConfig.authType) { case AuthType.QWEN_OAUTH: @@ -759,7 +772,7 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => { setModelSwitchedFromQuotaError, refreshStatic, () => cancelHandlerRef.current(), - settings.merged.experimental?.visionModelPreview ?? false, + settings.merged.experimental?.visionModelPreview ?? true, handleVisionSwitchRequired, ); diff --git a/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx b/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx index f26dcc55..aab45cc2 100644 --- a/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx +++ b/packages/cli/src/ui/components/ModelSwitchDialog.test.tsx @@ -46,8 +46,8 @@ describe('ModelSwitchDialog', () => { value: VisionSwitchOutcome.SwitchSessionToVL, }, { - label: 'Do not switch, show guidance', - value: VisionSwitchOutcome.DisallowWithGuidance, + label: 'Continue with current model', + value: VisionSwitchOutcome.ContinueWithCurrentModel, }, ]; @@ -81,18 +81,18 @@ describe('ModelSwitchDialog', () => { ); }); - it('should call onSelect with DisallowWithGuidance when third option is selected', () => { + it('should call onSelect with ContinueWithCurrentModel when third option is selected', () => { render(); const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect; - onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance); + onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel); expect(mockOnSelect).toHaveBeenCalledWith( - VisionSwitchOutcome.DisallowWithGuidance, + VisionSwitchOutcome.ContinueWithCurrentModel, ); }); - it('should setup escape key handler to call onSelect with DisallowWithGuidance', () => { + it('should setup escape key handler to call onSelect with ContinueWithCurrentModel', () => { render(); expect(mockUseKeypress).toHaveBeenCalledWith(expect.any(Function), { @@ -104,7 +104,7 @@ describe('ModelSwitchDialog', () => { keypressHandler({ name: 'escape' }); expect(mockOnSelect).toHaveBeenCalledWith( - VisionSwitchOutcome.DisallowWithGuidance, + VisionSwitchOutcome.ContinueWithCurrentModel, ); }); @@ -126,13 +126,9 @@ describe('ModelSwitchDialog', () => { describe('VisionSwitchOutcome enum', () => { it('should have correct enum values', () => { - expect(VisionSwitchOutcome.SwitchOnce).toBe('switch_once'); - expect(VisionSwitchOutcome.SwitchSessionToVL).toBe( - 'switch_session_to_vl', - ); - expect(VisionSwitchOutcome.DisallowWithGuidance).toBe( - 'disallow_with_guidance', - ); + expect(VisionSwitchOutcome.SwitchOnce).toBe('once'); + expect(VisionSwitchOutcome.SwitchSessionToVL).toBe('session'); + expect(VisionSwitchOutcome.ContinueWithCurrentModel).toBe('persist'); }); }); @@ -144,7 +140,7 @@ describe('ModelSwitchDialog', () => { // Call multiple times onSelectCallback(VisionSwitchOutcome.SwitchOnce); onSelectCallback(VisionSwitchOutcome.SwitchSessionToVL); - onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance); + onSelectCallback(VisionSwitchOutcome.ContinueWithCurrentModel); expect(mockOnSelect).toHaveBeenCalledTimes(3); expect(mockOnSelect).toHaveBeenNthCalledWith( @@ -157,7 +153,7 @@ describe('ModelSwitchDialog', () => { ); expect(mockOnSelect).toHaveBeenNthCalledWith( 3, - VisionSwitchOutcome.DisallowWithGuidance, + VisionSwitchOutcome.ContinueWithCurrentModel, ); }); @@ -179,7 +175,7 @@ describe('ModelSwitchDialog', () => { expect(mockOnSelect).toHaveBeenCalledTimes(2); expect(mockOnSelect).toHaveBeenCalledWith( - VisionSwitchOutcome.DisallowWithGuidance, + VisionSwitchOutcome.ContinueWithCurrentModel, ); }); }); diff --git a/packages/cli/src/ui/components/ModelSwitchDialog.tsx b/packages/cli/src/ui/components/ModelSwitchDialog.tsx index 1a8c73d4..f2993c47 100644 --- a/packages/cli/src/ui/components/ModelSwitchDialog.tsx +++ b/packages/cli/src/ui/components/ModelSwitchDialog.tsx @@ -14,9 +14,9 @@ import { import { useKeypress } from '../hooks/useKeypress.js'; export enum VisionSwitchOutcome { - SwitchOnce = 'switch_once', - SwitchSessionToVL = 'switch_session_to_vl', - DisallowWithGuidance = 'disallow_with_guidance', + SwitchOnce = 'once', + SwitchSessionToVL = 'session', + ContinueWithCurrentModel = 'persist', } export interface ModelSwitchDialogProps { @@ -29,7 +29,7 @@ export const ModelSwitchDialog: React.FC = ({ useKeypress( (key) => { if (key.name === 'escape') { - onSelect(VisionSwitchOutcome.DisallowWithGuidance); + onSelect(VisionSwitchOutcome.ContinueWithCurrentModel); } }, { isActive: true }, @@ -45,8 +45,8 @@ export const ModelSwitchDialog: React.FC = ({ value: VisionSwitchOutcome.SwitchSessionToVL, }, { - label: 'Do not switch, show guidance', - value: VisionSwitchOutcome.DisallowWithGuidance, + label: 'Continue with current model', + value: VisionSwitchOutcome.ContinueWithCurrentModel, }, ]; diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 125620cf..57da20c1 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -60,7 +60,9 @@ const mockParseAndFormatApiError = vi.hoisted(() => vi.fn()); const mockHandleVisionSwitch = vi.hoisted(() => vi.fn().mockResolvedValue({ shouldProceed: true }), ); -const mockRestoreOriginalModel = vi.hoisted(() => vi.fn()); +const mockRestoreOriginalModel = vi.hoisted(() => + vi.fn().mockResolvedValue(undefined), +); vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => { const actualCoreModule = (await importOriginal()) as any; @@ -301,6 +303,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ); }, { @@ -462,6 +466,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -541,6 +547,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -649,6 +657,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -758,6 +768,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -887,6 +899,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, cancelSubmitSpy, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1198,6 +1212,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1251,6 +1267,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1301,6 +1319,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1349,6 +1369,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1398,6 +1420,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1487,6 +1511,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1537,6 +1563,8 @@ describe('useGeminiStream', () => { vi.fn(), // setModelSwitched vi.fn(), // onEditorClose vi.fn(), // onCancelSubmit + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1602,6 +1630,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1680,6 +1710,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1734,6 +1766,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1943,6 +1977,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -1975,6 +2011,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -2028,6 +2066,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); @@ -2065,6 +2105,8 @@ describe('useGeminiStream', () => { () => {}, () => {}, () => {}, + false, // visionModelPreviewEnabled + undefined, // onVisionSwitchRequired (optional) ), ); diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 7f34eaa2..5bac2c41 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -89,7 +89,7 @@ export const useGeminiStream = ( setModelSwitchedFromQuotaError: React.Dispatch>, onEditorClose: () => void, onCancelSubmit: () => void, - visionModelPreviewEnabled: boolean = false, + visionModelPreviewEnabled: boolean, onVisionSwitchRequired?: (query: PartListUnion) => Promise<{ modelOverride?: string; persistSessionModel?: string; @@ -765,7 +765,9 @@ export const useGeminiStream = ( if (processingStatus === StreamProcessingStatus.UserCancelled) { // Restore original model if it was temporarily overridden - restoreOriginalModel(); + restoreOriginalModel().catch((error) => { + console.error('Failed to restore original model:', error); + }); isSubmittingQueryRef.current = false; return; } @@ -780,10 +782,14 @@ export const useGeminiStream = ( } // Restore original model if it was temporarily overridden - restoreOriginalModel(); + restoreOriginalModel().catch((error) => { + console.error('Failed to restore original model:', error); + }); } catch (error: unknown) { // Restore original model if it was temporarily overridden - restoreOriginalModel(); + restoreOriginalModel().catch((error) => { + console.error('Failed to restore original model:', error); + }); if (error instanceof UnauthorizedError) { onAuthError(); diff --git a/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts b/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts index dd8c6a06..c04a2404 100644 --- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts +++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.test.ts @@ -8,7 +8,7 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { renderHook, act } from '@testing-library/react'; import type { Part, PartListUnion } from '@google/genai'; -import { AuthType, type Config } from '@qwen-code/qwen-code-core'; +import { AuthType, type Config, ApprovalMode } from '@qwen-code/qwen-code-core'; import { shouldOfferVisionSwitch, processVisionSwitchOutcome, @@ -41,7 +41,7 @@ describe('useVisionAutoSwitch helpers', () => { const result = shouldOfferVisionSwitch( parts, AuthType.QWEN_OAUTH, - 'qwen-vl-max-latest', + 'vision-model', true, ); expect(result).toBe(false); @@ -108,6 +108,56 @@ describe('useVisionAutoSwitch helpers', () => { ); expect(result).toBe(false); }); + + it('returns true when image parts exist in YOLO mode context', () => { + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/png', data: '...' } }, + ]; + const result = shouldOfferVisionSwitch( + parts, + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + true, + ); + expect(result).toBe(true); + }); + + it('returns false when no image parts exist in YOLO mode context', () => { + const parts: PartListUnion = [{ text: 'just text' }]; + const result = shouldOfferVisionSwitch( + parts, + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + true, + ); + expect(result).toBe(false); + }); + + it('returns false when already using vision model in YOLO mode context', () => { + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/png', data: '...' } }, + ]; + const result = shouldOfferVisionSwitch( + parts, + AuthType.QWEN_OAUTH, + 'vision-model', + true, + ); + expect(result).toBe(false); + }); + + it('returns false when authType is not QWEN_OAUTH in YOLO mode context', () => { + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/png', data: '...' } }, + ]; + const result = shouldOfferVisionSwitch( + parts, + AuthType.USE_GEMINI, + 'qwen3-coder-plus', + true, + ); + expect(result).toBe(false); + }); }); describe('processVisionSwitchOutcome', () => { @@ -125,11 +175,11 @@ describe('useVisionAutoSwitch helpers', () => { expect(result).toEqual({ persistSessionModel: vl }); }); - it('maps DisallowWithGuidance to showGuidance', () => { + it('maps ContinueWithCurrentModel to empty result', () => { const result = processVisionSwitchOutcome( - VisionSwitchOutcome.DisallowWithGuidance, + VisionSwitchOutcome.ContinueWithCurrentModel, ); - expect(result).toEqual({ showGuidance: true }); + expect(result).toEqual({}); }); }); @@ -151,13 +201,20 @@ describe('useVisionAutoSwitch hook', () => { ts: number, ) => any; - const createMockConfig = (authType: AuthType, initialModel: string) => { + const createMockConfig = ( + authType: AuthType, + initialModel: string, + approvalMode: ApprovalMode = ApprovalMode.DEFAULT, + vlmSwitchMode?: string, + ) => { let currentModel = initialModel; const mockConfig: Partial = { getModel: vi.fn(() => currentModel), - setModel: vi.fn((m: string) => { + setModel: vi.fn(async (m: string) => { currentModel = m; }), + getApprovalMode: vi.fn(() => approvalMode), + getVlmSwitchMode: vi.fn(() => vlmSwitchMode), getContentGeneratorConfig: vi.fn(() => ({ authType, model: currentModel, @@ -226,11 +283,9 @@ describe('useVisionAutoSwitch hook', () => { expect(onVisionSwitchRequired).not.toHaveBeenCalled(); }); - it('shows guidance and blocks when dialog returns showGuidance', async () => { + it('continues with current model when dialog returns empty result', async () => { const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus'); - const onVisionSwitchRequired = vi - .fn() - .mockResolvedValue({ showGuidance: true }); + const onVisionSwitchRequired = vi.fn().mockResolvedValue({}); // Empty result for ContinueWithCurrentModel const { result } = renderHook(() => useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired), ); @@ -245,11 +300,12 @@ describe('useVisionAutoSwitch hook', () => { res = await result.current.handleVisionSwitch(parts, userTs, false); }); - expect(addItem).toHaveBeenCalledWith( + // Should not add any guidance message + expect(addItem).not.toHaveBeenCalledWith( { type: MessageType.INFO, text: getVisionSwitchGuidanceMessage() }, userTs, ); - expect(res).toEqual({ shouldProceed: false }); + expect(res).toEqual({ shouldProceed: true }); expect(config.setModel).not.toHaveBeenCalled(); }); @@ -258,7 +314,7 @@ describe('useVisionAutoSwitch hook', () => { const config = createMockConfig(AuthType.QWEN_OAUTH, initialModel); const onVisionSwitchRequired = vi .fn() - .mockResolvedValue({ modelOverride: 'qwen-vl-max-latest' }); + .mockResolvedValue({ modelOverride: 'coder-model' }); const { result } = renderHook(() => useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired), ); @@ -273,20 +329,26 @@ describe('useVisionAutoSwitch hook', () => { }); expect(res).toEqual({ shouldProceed: true, originalModel: initialModel }); - expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest'); + expect(config.setModel).toHaveBeenCalledWith('coder-model', { + reason: 'vision_auto_switch', + context: 'User-prompted vision switch (one-time override)', + }); // Now restore - act(() => { - result.current.restoreOriginalModel(); + await act(async () => { + await result.current.restoreOriginalModel(); + }); + expect(config.setModel).toHaveBeenLastCalledWith(initialModel, { + reason: 'vision_auto_switch', + context: 'Restoring original model after vision switch', }); - expect(config.setModel).toHaveBeenLastCalledWith(initialModel); }); it('persists session model when dialog requests persistence', async () => { const config = createMockConfig(AuthType.QWEN_OAUTH, 'qwen3-coder-plus'); const onVisionSwitchRequired = vi .fn() - .mockResolvedValue({ persistSessionModel: 'qwen-vl-max-latest' }); + .mockResolvedValue({ persistSessionModel: 'coder-model' }); const { result } = renderHook(() => useVisionAutoSwitch(config, addItem as any, true, onVisionSwitchRequired), ); @@ -301,16 +363,17 @@ describe('useVisionAutoSwitch hook', () => { }); expect(res).toEqual({ shouldProceed: true }); - expect(config.setModel).toHaveBeenCalledWith('qwen-vl-max-latest'); + expect(config.setModel).toHaveBeenCalledWith('coder-model', { + reason: 'vision_auto_switch', + context: 'User-prompted vision switch (session persistent)', + }); // Restore should be a no-op since no one-time override was used - act(() => { - result.current.restoreOriginalModel(); + await act(async () => { + await result.current.restoreOriginalModel(); }); // Last call should still be the persisted model set - expect((config.setModel as any).mock.calls.pop()?.[0]).toBe( - 'qwen-vl-max-latest', - ); + expect((config.setModel as any).mock.calls.pop()?.[0]).toBe('coder-model'); }); it('returns shouldProceed=true when dialog returns no special flags', async () => { @@ -371,4 +434,420 @@ describe('useVisionAutoSwitch hook', () => { expect(res).toEqual({ shouldProceed: true }); expect(onVisionSwitchRequired).not.toHaveBeenCalled(); }); + + describe('YOLO mode behavior', () => { + it('automatically switches to vision model in YOLO mode without showing dialog', async () => { + const initialModel = 'qwen3-coder-plus'; + const config = createMockConfig( + AuthType.QWEN_OAUTH, + initialModel, + ApprovalMode.YOLO, + ); + const onVisionSwitchRequired = vi.fn(); // Should not be called in YOLO mode + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/png', data: '...' } }, + ]; + + let res: any; + await act(async () => { + res = await result.current.handleVisionSwitch(parts, 7070, false); + }); + + // Should automatically switch without calling the dialog + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + expect(res).toEqual({ + shouldProceed: true, + originalModel: initialModel, + }); + expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), { + reason: 'vision_auto_switch', + context: 'YOLO mode auto-switch for image content', + }); + }); + + it('does not switch in YOLO mode when no images are present', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.YOLO, + ); + const onVisionSwitchRequired = vi.fn(); + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [{ text: 'no images here' }]; + + let res: any; + await act(async () => { + res = await result.current.handleVisionSwitch(parts, 8080, false); + }); + + expect(res).toEqual({ shouldProceed: true }); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + expect(config.setModel).not.toHaveBeenCalled(); + }); + + it('does not switch in YOLO mode when already using vision model', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'vision-model', + ApprovalMode.YOLO, + ); + const onVisionSwitchRequired = vi.fn(); + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/png', data: '...' } }, + ]; + + let res: any; + await act(async () => { + res = await result.current.handleVisionSwitch(parts, 9090, false); + }); + + expect(res).toEqual({ shouldProceed: true }); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + expect(config.setModel).not.toHaveBeenCalled(); + }); + + it('restores original model after YOLO mode auto-switch', async () => { + const initialModel = 'qwen3-coder-plus'; + const config = createMockConfig( + AuthType.QWEN_OAUTH, + initialModel, + ApprovalMode.YOLO, + ); + const onVisionSwitchRequired = vi.fn(); + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/png', data: '...' } }, + ]; + + // First, trigger the auto-switch + await act(async () => { + await result.current.handleVisionSwitch(parts, 10100, false); + }); + + // Verify model was switched + expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), { + reason: 'vision_auto_switch', + context: 'YOLO mode auto-switch for image content', + }); + + // Now restore the original model + await act(async () => { + await result.current.restoreOriginalModel(); + }); + + // Verify model was restored + expect(config.setModel).toHaveBeenLastCalledWith(initialModel, { + reason: 'vision_auto_switch', + context: 'Restoring original model after vision switch', + }); + }); + + it('does not switch in YOLO mode when authType is not QWEN_OAUTH', async () => { + const config = createMockConfig( + AuthType.USE_GEMINI, + 'qwen3-coder-plus', + ApprovalMode.YOLO, + ); + const onVisionSwitchRequired = vi.fn(); + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/png', data: '...' } }, + ]; + + let res: any; + await act(async () => { + res = await result.current.handleVisionSwitch(parts, 11110, false); + }); + + expect(res).toEqual({ shouldProceed: true }); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + expect(config.setModel).not.toHaveBeenCalled(); + }); + + it('does not switch in YOLO mode when visionModelPreviewEnabled is false', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.YOLO, + ); + const onVisionSwitchRequired = vi.fn(); + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + false, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/png', data: '...' } }, + ]; + + let res: any; + await act(async () => { + res = await result.current.handleVisionSwitch(parts, 12120, false); + }); + + expect(res).toEqual({ shouldProceed: true }); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + expect(config.setModel).not.toHaveBeenCalled(); + }); + + it('handles multiple image formats in YOLO mode', async () => { + const initialModel = 'qwen3-coder-plus'; + const config = createMockConfig( + AuthType.QWEN_OAUTH, + initialModel, + ApprovalMode.YOLO, + ); + const onVisionSwitchRequired = vi.fn(); + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { text: 'Here are some images:' }, + { inlineData: { mimeType: 'image/jpeg', data: '...' } }, + { fileData: { mimeType: 'image/png', fileUri: 'file://image.png' } }, + { text: 'Please analyze them.' }, + ]; + + let res: any; + await act(async () => { + res = await result.current.handleVisionSwitch(parts, 13130, false); + }); + + expect(res).toEqual({ + shouldProceed: true, + originalModel: initialModel, + }); + expect(config.setModel).toHaveBeenCalledWith(getDefaultVisionModel(), { + reason: 'vision_auto_switch', + context: 'YOLO mode auto-switch for image content', + }); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + }); + }); + + describe('VLM switch mode default behavior', () => { + it('should automatically switch once when vlmSwitchMode is "once"', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.DEFAULT, + 'once', + ); + const onVisionSwitchRequired = vi.fn(); // Should not be called + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } }, + ]; + + const switchResult = await result.current.handleVisionSwitch( + parts, + Date.now(), + false, + ); + + expect(switchResult.shouldProceed).toBe(true); + expect(switchResult.originalModel).toBe('qwen3-coder-plus'); + expect(config.setModel).toHaveBeenCalledWith('vision-model', { + reason: 'vision_auto_switch', + context: 'Default VLM switch mode: once (one-time override)', + }); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + }); + + it('should switch session when vlmSwitchMode is "session"', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.DEFAULT, + 'session', + ); + const onVisionSwitchRequired = vi.fn(); // Should not be called + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } }, + ]; + + const switchResult = await result.current.handleVisionSwitch( + parts, + Date.now(), + false, + ); + + expect(switchResult.shouldProceed).toBe(true); + expect(switchResult.originalModel).toBeUndefined(); // No original model for session switch + expect(config.setModel).toHaveBeenCalledWith('vision-model', { + reason: 'vision_auto_switch', + context: 'Default VLM switch mode: session (session persistent)', + }); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + }); + + it('should continue with current model when vlmSwitchMode is "persist"', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.DEFAULT, + 'persist', + ); + const onVisionSwitchRequired = vi.fn(); // Should not be called + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } }, + ]; + + const switchResult = await result.current.handleVisionSwitch( + parts, + Date.now(), + false, + ); + + expect(switchResult.shouldProceed).toBe(true); + expect(switchResult.originalModel).toBeUndefined(); + expect(config.setModel).not.toHaveBeenCalled(); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + }); + + it('should fall back to user prompt when vlmSwitchMode is not set', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.DEFAULT, + undefined, // No default mode + ); + const onVisionSwitchRequired = vi + .fn() + .mockResolvedValue({ modelOverride: 'vision-model' }); + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } }, + ]; + + const switchResult = await result.current.handleVisionSwitch( + parts, + Date.now(), + false, + ); + + expect(switchResult.shouldProceed).toBe(true); + expect(onVisionSwitchRequired).toHaveBeenCalledWith(parts); + }); + + it('should fall back to persist behavior when vlmSwitchMode has invalid value', async () => { + const config = createMockConfig( + AuthType.QWEN_OAUTH, + 'qwen3-coder-plus', + ApprovalMode.DEFAULT, + 'invalid-value', + ); + const onVisionSwitchRequired = vi.fn(); // Should not be called + const { result } = renderHook(() => + useVisionAutoSwitch( + config, + addItem as any, + true, + onVisionSwitchRequired, + ), + ); + + const parts: PartListUnion = [ + { inlineData: { mimeType: 'image/jpeg', data: 'base64data' } }, + ]; + + const switchResult = await result.current.handleVisionSwitch( + parts, + Date.now(), + false, + ); + + expect(switchResult.shouldProceed).toBe(true); + expect(switchResult.originalModel).toBeUndefined(); + // For invalid values, it should continue with current model (persist behavior) + expect(config.setModel).not.toHaveBeenCalled(); + expect(onVisionSwitchRequired).not.toHaveBeenCalled(); + }); + }); }); diff --git a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts index d4b9629c..f489c843 100644 --- a/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts +++ b/packages/cli/src/ui/hooks/useVisionAutoSwitch.ts @@ -5,7 +5,7 @@ */ import { type PartListUnion, type Part } from '@google/genai'; -import { AuthType, type Config } from '@qwen-code/qwen-code-core'; +import { AuthType, type Config, ApprovalMode } from '@qwen-code/qwen-code-core'; import { useCallback, useRef } from 'react'; import { VisionSwitchOutcome } from '../components/ModelSwitchDialog.js'; import { @@ -121,7 +121,7 @@ export function shouldOfferVisionSwitch( parts: PartListUnion, authType: AuthType, currentModel: string, - visionModelPreviewEnabled: boolean = false, + visionModelPreviewEnabled: boolean = true, ): boolean { // Only trigger for qwen-oauth if (authType !== AuthType.QWEN_OAUTH) { @@ -166,11 +166,11 @@ export function processVisionSwitchOutcome( case VisionSwitchOutcome.SwitchSessionToVL: return { persistSessionModel: vlModelId }; - case VisionSwitchOutcome.DisallowWithGuidance: - return { showGuidance: true }; + case VisionSwitchOutcome.ContinueWithCurrentModel: + return {}; // Continue with current model, no changes needed default: - return { showGuidance: true }; + return {}; // Default to continuing with current model } } @@ -198,7 +198,7 @@ export interface VisionSwitchHandlingResult { export function useVisionAutoSwitch( config: Config, addItem: UseHistoryManagerReturn['addItem'], - visionModelPreviewEnabled: boolean = false, + visionModelPreviewEnabled: boolean = true, onVisionSwitchRequired?: (query: PartListUnion) => Promise<{ modelOverride?: string; persistSessionModel?: string; @@ -252,35 +252,91 @@ export function useVisionAutoSwitch( return { shouldProceed: true }; } - try { - const visionSwitchResult = await onVisionSwitchRequired(query); + // In YOLO mode, automatically switch to vision model without user interaction + if (config.getApprovalMode() === ApprovalMode.YOLO) { + const vlModelId = getDefaultVisionModel(); + originalModelRef.current = config.getModel(); + await config.setModel(vlModelId, { + reason: 'vision_auto_switch', + context: 'YOLO mode auto-switch for image content', + }); + return { + shouldProceed: true, + originalModel: originalModelRef.current, + }; + } - if (visionSwitchResult.showGuidance) { - // Show guidance and don't proceed with the request - addItem( - { - type: MessageType.INFO, - text: getVisionSwitchGuidanceMessage(), - }, - userMessageTimestamp, - ); - return { shouldProceed: false }; + // Check if there's a default VLM switch mode configured + const defaultVlmSwitchMode = config.getVlmSwitchMode(); + if (defaultVlmSwitchMode) { + // Convert string value to VisionSwitchOutcome enum + let outcome: VisionSwitchOutcome; + switch (defaultVlmSwitchMode) { + case 'once': + outcome = VisionSwitchOutcome.SwitchOnce; + break; + case 'session': + outcome = VisionSwitchOutcome.SwitchSessionToVL; + break; + case 'persist': + outcome = VisionSwitchOutcome.ContinueWithCurrentModel; + break; + default: + // Invalid value, fall back to prompting user + outcome = VisionSwitchOutcome.ContinueWithCurrentModel; } + // Process the default outcome + const visionSwitchResult = processVisionSwitchOutcome(outcome); + if (visionSwitchResult.modelOverride) { // One-time model override originalModelRef.current = config.getModel(); - config.setModel(visionSwitchResult.modelOverride); + await config.setModel(visionSwitchResult.modelOverride, { + reason: 'vision_auto_switch', + context: `Default VLM switch mode: ${defaultVlmSwitchMode} (one-time override)`, + }); return { shouldProceed: true, originalModel: originalModelRef.current, }; } else if (visionSwitchResult.persistSessionModel) { // Persistent session model change - config.setModel(visionSwitchResult.persistSessionModel); + await config.setModel(visionSwitchResult.persistSessionModel, { + reason: 'vision_auto_switch', + context: `Default VLM switch mode: ${defaultVlmSwitchMode} (session persistent)`, + }); return { shouldProceed: true }; } + // For ContinueWithCurrentModel or any other case, proceed with current model + return { shouldProceed: true }; + } + + try { + const visionSwitchResult = await onVisionSwitchRequired(query); + + if (visionSwitchResult.modelOverride) { + // One-time model override + originalModelRef.current = config.getModel(); + await config.setModel(visionSwitchResult.modelOverride, { + reason: 'vision_auto_switch', + context: 'User-prompted vision switch (one-time override)', + }); + return { + shouldProceed: true, + originalModel: originalModelRef.current, + }; + } else if (visionSwitchResult.persistSessionModel) { + // Persistent session model change + await config.setModel(visionSwitchResult.persistSessionModel, { + reason: 'vision_auto_switch', + context: 'User-prompted vision switch (session persistent)', + }); + return { shouldProceed: true }; + } + + // For ContinueWithCurrentModel or any other case, proceed with current model return { shouldProceed: true }; } catch (_error) { // If vision switch dialog was cancelled or errored, don't proceed @@ -290,9 +346,12 @@ export function useVisionAutoSwitch( [config, addItem, visionModelPreviewEnabled, onVisionSwitchRequired], ); - const restoreOriginalModel = useCallback(() => { + const restoreOriginalModel = useCallback(async () => { if (originalModelRef.current) { - config.setModel(originalModelRef.current); + await config.setModel(originalModelRef.current, { + reason: 'vision_auto_switch', + context: 'Restoring original model after vision switch', + }); originalModelRef.current = null; } }, [config]); diff --git a/packages/cli/src/ui/models/availableModels.ts b/packages/cli/src/ui/models/availableModels.ts index 7c3a1cf5..9ac4d420 100644 --- a/packages/cli/src/ui/models/availableModels.ts +++ b/packages/cli/src/ui/models/availableModels.ts @@ -10,9 +10,12 @@ export type AvailableModel = { isVision?: boolean; }; +export const MAINLINE_VLM = 'vision-model'; +export const MAINLINE_CODER = 'coder-model'; + export const AVAILABLE_MODELS_QWEN: AvailableModel[] = [ - { id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' }, - { id: 'qwen-vl-max-latest', label: 'qwen-vl-max', isVision: true }, + { id: MAINLINE_CODER, label: MAINLINE_CODER }, + { id: MAINLINE_VLM, label: MAINLINE_VLM, isVision: true }, ]; /** @@ -42,7 +45,7 @@ export function getOpenAIAvailableModelFromEnv(): AvailableModel | null { * until our coding model supports multimodal. */ export function getDefaultVisionModel(): string { - return 'qwen-vl-max-latest'; + return MAINLINE_VLM; } export function isVisionModel(modelId: string): boolean { diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 8d18b89a..5d83ce20 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -737,4 +737,85 @@ describe('setApprovalMode with folder trust', () => { expect(() => config.setApprovalMode(ApprovalMode.AUTO_EDIT)).not.toThrow(); expect(() => config.setApprovalMode(ApprovalMode.DEFAULT)).not.toThrow(); }); + + describe('Model Switch Logging', () => { + it('should log model switch when setModel is called with different model', async () => { + const config = new Config({ + sessionId: 'test-model-switch', + targetDir: '.', + debugMode: false, + model: 'qwen3-coder-plus', + cwd: '.', + }); + + // Initialize the config to set up content generator + await config.initialize(); + + // Mock the logger's logModelSwitch method + const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch'); + + // Change the model + await config.setModel('qwen-vl-max-latest', { + reason: 'vision_auto_switch', + context: 'Test model switch', + }); + + // Verify that logModelSwitch was called with correct parameters + expect(logModelSwitchSpy).toHaveBeenCalledWith({ + fromModel: 'qwen3-coder-plus', + toModel: 'qwen-vl-max-latest', + reason: 'vision_auto_switch', + context: 'Test model switch', + }); + }); + + it('should not log when setModel is called with same model', async () => { + const config = new Config({ + sessionId: 'test-same-model', + targetDir: '.', + debugMode: false, + model: 'qwen3-coder-plus', + cwd: '.', + }); + + // Initialize the config to set up content generator + await config.initialize(); + + // Mock the logger's logModelSwitch method + const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch'); + + // Set the same model + await config.setModel('qwen3-coder-plus'); + + // Verify that logModelSwitch was not called + expect(logModelSwitchSpy).not.toHaveBeenCalled(); + }); + + it('should use default reason when no options provided', async () => { + const config = new Config({ + sessionId: 'test-default-reason', + targetDir: '.', + debugMode: false, + model: 'qwen3-coder-plus', + cwd: '.', + }); + + // Initialize the config to set up content generator + await config.initialize(); + + // Mock the logger's logModelSwitch method + const logModelSwitchSpy = vi.spyOn(config['logger']!, 'logModelSwitch'); + + // Change the model without options + await config.setModel('qwen-vl-max-latest'); + + // Verify that logModelSwitch was called with default reason + expect(logModelSwitchSpy).toHaveBeenCalledWith({ + fromModel: 'qwen3-coder-plus', + toModel: 'qwen-vl-max-latest', + reason: 'manual', + context: undefined, + }); + }); + }); }); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 6956fb06..9ff19919 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -56,6 +56,7 @@ import { DEFAULT_GEMINI_FLASH_MODEL, } from './models.js'; import { Storage } from './storage.js'; +import { Logger, type ModelSwitchEvent } from '../core/logger.js'; // Re-export OAuth config type export type { AnyToolInvocation, MCPOAuthConfig }; @@ -239,6 +240,7 @@ export interface ConfigParameters { extensionManagement?: boolean; enablePromptCompletion?: boolean; skipLoopDetection?: boolean; + vlmSwitchMode?: string; } export class Config { @@ -330,9 +332,11 @@ export class Config { private readonly extensionManagement: boolean; private readonly enablePromptCompletion: boolean = false; private readonly skipLoopDetection: boolean; + private readonly vlmSwitchMode: string | undefined; private initialized: boolean = false; readonly storage: Storage; private readonly fileExclusions: FileExclusions; + private logger: Logger | null = null; constructor(params: ConfigParameters) { this.sessionId = params.sessionId; @@ -424,8 +428,15 @@ export class Config { this.extensionManagement = params.extensionManagement ?? false; this.storage = new Storage(this.targetDir); this.enablePromptCompletion = params.enablePromptCompletion ?? false; + this.vlmSwitchMode = params.vlmSwitchMode; this.fileExclusions = new FileExclusions(this); + // Initialize logger asynchronously + this.logger = new Logger(this.sessionId, this.storage); + this.logger.initialize().catch((error) => { + console.debug('Failed to initialize logger:', error); + }); + if (params.contextFileName) { setGeminiMdFilename(params.contextFileName); } @@ -517,21 +528,47 @@ export class Config { return this.contentGeneratorConfig?.model || this.model; } - setModel(newModel: string): void { + async setModel( + newModel: string, + options?: { + reason?: ModelSwitchEvent['reason']; + context?: string; + }, + ): Promise { + const oldModel = this.getModel(); + if (this.contentGeneratorConfig) { this.contentGeneratorConfig.model = newModel; } + // Log the model switch if the model actually changed + if (oldModel !== newModel && this.logger) { + const switchEvent: ModelSwitchEvent = { + fromModel: oldModel, + toModel: newModel, + reason: options?.reason || 'manual', + context: options?.context, + }; + + // Log asynchronously to avoid blocking + this.logger.logModelSwitch(switchEvent).catch((error) => { + console.debug('Failed to log model switch:', error); + }); + } + // Reinitialize chat with updated configuration while preserving history const geminiClient = this.getGeminiClient(); if (geminiClient && geminiClient.isInitialized()) { - // Use async operation but don't await to avoid blocking - geminiClient.reinitialize().catch((error) => { + // Now await the reinitialize operation to ensure completion + try { + await geminiClient.reinitialize(); + } catch (error) { console.error( 'Failed to reinitialize chat with updated config:', error, ); - }); + throw error; // Re-throw to let callers handle the error + } } } @@ -938,6 +975,10 @@ export class Config { return this.skipLoopDetection; } + getVlmSwitchMode(): string | undefined { + return this.vlmSwitchMode; + } + async getGitService(): Promise { if (!this.gitService) { this.gitService = new GitService(this.targetDir, this.storage); diff --git a/packages/core/src/config/flashFallback.test.ts b/packages/core/src/config/flashFallback.test.ts index a0034ea1..4173786c 100644 --- a/packages/core/src/config/flashFallback.test.ts +++ b/packages/core/src/config/flashFallback.test.ts @@ -41,7 +41,7 @@ describe('Flash Model Fallback Configuration', () => { // with the fallback mechanism. This will be necessary we introduce more // intelligent model routing. describe('setModel', () => { - it('should only mark as switched if contentGeneratorConfig exists', () => { + it('should only mark as switched if contentGeneratorConfig exists', async () => { // Create config without initializing contentGeneratorConfig const newConfig = new Config({ sessionId: 'test-session-2', @@ -52,15 +52,15 @@ describe('Flash Model Fallback Configuration', () => { }); // Should not crash when contentGeneratorConfig is undefined - newConfig.setModel(DEFAULT_GEMINI_FLASH_MODEL); + await newConfig.setModel(DEFAULT_GEMINI_FLASH_MODEL); expect(newConfig.isInFallbackMode()).toBe(false); }); }); describe('getModel', () => { - it('should return contentGeneratorConfig model if available', () => { + it('should return contentGeneratorConfig model if available', async () => { // Simulate initialized content generator config - config.setModel(DEFAULT_GEMINI_FLASH_MODEL); + await config.setModel(DEFAULT_GEMINI_FLASH_MODEL); expect(config.getModel()).toBe(DEFAULT_GEMINI_FLASH_MODEL); }); @@ -88,8 +88,8 @@ describe('Flash Model Fallback Configuration', () => { expect(config.isInFallbackMode()).toBe(false); }); - it('should persist switched state throughout session', () => { - config.setModel(DEFAULT_GEMINI_FLASH_MODEL); + it('should persist switched state throughout session', async () => { + await config.setModel(DEFAULT_GEMINI_FLASH_MODEL); // Setting state for fallback mode as is expected of clients config.setFallbackMode(true); expect(config.isInFallbackMode()).toBe(true); diff --git a/packages/core/src/config/models.ts b/packages/core/src/config/models.ts index 2a743dad..fd548737 100644 --- a/packages/core/src/config/models.ts +++ b/packages/core/src/config/models.ts @@ -4,11 +4,10 @@ * SPDX-License-Identifier: Apache-2.0 */ -export const DEFAULT_QWEN_MODEL = 'qwen3-coder-plus'; -// We do not have a fallback model for now, but note it here anyway. -export const DEFAULT_QWEN_FLASH_MODEL = 'qwen3-coder-flash'; +export const DEFAULT_QWEN_MODEL = 'coder-model'; +export const DEFAULT_QWEN_FLASH_MODEL = 'coder-model'; -export const DEFAULT_GEMINI_MODEL = 'qwen3-coder-plus'; +export const DEFAULT_GEMINI_MODEL = 'coder-model'; export const DEFAULT_GEMINI_FLASH_MODEL = 'gemini-2.5-flash'; export const DEFAULT_GEMINI_FLASH_LITE_MODEL = 'gemini-2.5-flash-lite'; diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index ae0c4205..8b965001 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -1053,7 +1053,7 @@ export class GeminiClient { error, ); if (accepted !== false && accepted !== null) { - this.config.setModel(fallbackModel); + await this.config.setModel(fallbackModel); this.config.setFallbackMode(true); return fallbackModel; } diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index bf8aa804..9f541601 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -224,7 +224,7 @@ export class GeminiChat { error, ); if (accepted !== false && accepted !== null) { - this.config.setModel(fallbackModel); + await this.config.setModel(fallbackModel); this.config.setFallbackMode(true); return fallbackModel; } diff --git a/packages/core/src/core/logger.test.ts b/packages/core/src/core/logger.test.ts index 0b506b4c..29793a33 100644 --- a/packages/core/src/core/logger.test.ts +++ b/packages/core/src/core/logger.test.ts @@ -755,4 +755,84 @@ describe('Logger', () => { expect(logger['messageId']).toBe(0); }); }); + + describe('Model Switch Logging', () => { + it('should log model switch events correctly', async () => { + const testSessionId = 'test-session-model-switch'; + const logger = new Logger(testSessionId, new Storage(process.cwd())); + await logger.initialize(); + + const modelSwitchEvent = { + fromModel: 'qwen3-coder-plus', + toModel: 'qwen-vl-max-latest', + reason: 'vision_auto_switch' as const, + context: 'YOLO mode auto-switch for image content', + }; + + await logger.logModelSwitch(modelSwitchEvent); + + // Read the log file to verify the entry was written + const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8'); + const logs: LogEntry[] = JSON.parse(logContent); + + const modelSwitchLog = logs.find( + (log) => + log.sessionId === testSessionId && + log.type === MessageSenderType.MODEL_SWITCH, + ); + + expect(modelSwitchLog).toBeDefined(); + expect(modelSwitchLog!.type).toBe(MessageSenderType.MODEL_SWITCH); + + const loggedEvent = JSON.parse(modelSwitchLog!.message); + expect(loggedEvent.fromModel).toBe('qwen3-coder-plus'); + expect(loggedEvent.toModel).toBe('qwen-vl-max-latest'); + expect(loggedEvent.reason).toBe('vision_auto_switch'); + expect(loggedEvent.context).toBe( + 'YOLO mode auto-switch for image content', + ); + }); + + it('should handle multiple model switch events', async () => { + const testSessionId = 'test-session-multiple-switches'; + const logger = new Logger(testSessionId, new Storage(process.cwd())); + await logger.initialize(); + + // Log first switch + await logger.logModelSwitch({ + fromModel: 'qwen3-coder-plus', + toModel: 'qwen-vl-max-latest', + reason: 'vision_auto_switch', + context: 'Auto-switch for image', + }); + + // Log second switch (restore) + await logger.logModelSwitch({ + fromModel: 'qwen-vl-max-latest', + toModel: 'qwen3-coder-plus', + reason: 'vision_auto_switch', + context: 'Restoring original model', + }); + + // Read the log file to verify both entries were written + const logContent = await fs.readFile(TEST_LOG_FILE_PATH, 'utf-8'); + const logs: LogEntry[] = JSON.parse(logContent); + + const modelSwitchLogs = logs.filter( + (log) => + log.sessionId === testSessionId && + log.type === MessageSenderType.MODEL_SWITCH, + ); + + expect(modelSwitchLogs).toHaveLength(2); + + const firstSwitch = JSON.parse(modelSwitchLogs[0].message); + expect(firstSwitch.fromModel).toBe('qwen3-coder-plus'); + expect(firstSwitch.toModel).toBe('qwen-vl-max-latest'); + + const secondSwitch = JSON.parse(modelSwitchLogs[1].message); + expect(secondSwitch.fromModel).toBe('qwen-vl-max-latest'); + expect(secondSwitch.toModel).toBe('qwen3-coder-plus'); + }); + }); }); diff --git a/packages/core/src/core/logger.ts b/packages/core/src/core/logger.ts index a837b25d..4a9604b7 100644 --- a/packages/core/src/core/logger.ts +++ b/packages/core/src/core/logger.ts @@ -13,6 +13,7 @@ const LOG_FILE_NAME = 'logs.json'; export enum MessageSenderType { USER = 'user', + MODEL_SWITCH = 'model_switch', } export interface LogEntry { @@ -23,6 +24,13 @@ export interface LogEntry { message: string; } +export interface ModelSwitchEvent { + fromModel: string; + toModel: string; + reason: 'vision_auto_switch' | 'manual' | 'fallback' | 'other'; + context?: string; +} + // This regex matches any character that is NOT a letter (a-z, A-Z), // a number (0-9), a hyphen (-), an underscore (_), or a dot (.). @@ -270,6 +278,17 @@ export class Logger { } } + async logModelSwitch(event: ModelSwitchEvent): Promise { + const message = JSON.stringify({ + fromModel: event.fromModel, + toModel: event.toModel, + reason: event.reason, + context: event.context, + }); + + await this.logMessage(MessageSenderType.MODEL_SWITCH, message); + } + private _checkpointPath(tag: string): string { if (!tag.length) { throw new Error('No checkpoint tag specified.'); diff --git a/packages/core/src/core/prompts.ts b/packages/core/src/core/prompts.ts index e18987a8..f08cbf75 100644 --- a/packages/core/src/core/prompts.ts +++ b/packages/core/src/core/prompts.ts @@ -820,6 +820,14 @@ function getToolCallExamples(model?: string): string { if (/qwen[^-]*-vl/i.test(model)) { return qwenVlToolCallExamples; } + // Match coder-model pattern (same as qwen3-coder) + if (/coder-model/i.test(model)) { + return qwenCoderToolCallExamples; + } + // Match vision-model pattern (same as qwen3-vl) + if (/vision-model/i.test(model)) { + return qwenVlToolCallExamples; + } } return generalToolCallExamples; diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts index 67ff6a86..6a3e7e86 100644 --- a/packages/core/src/core/tokenLimits.ts +++ b/packages/core/src/core/tokenLimits.ts @@ -111,6 +111,12 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [ // Commercial Qwen3-Coder-Flash: 1M token context [/^qwen3-coder-flash(-.*)?$/, LIMITS['1m']], // catches "qwen3-coder-flash" and date variants + // Generic coder-model: same as qwen3-coder-plus (1M token context) + [/^coder-model$/, LIMITS['1m']], + + // Commercial Qwen3-Max-Preview: 256K token context + [/^qwen3-max-preview(-.*)?$/, LIMITS['256k']], // catches "qwen3-max-preview" and date variants + // Open-source Qwen3-Coder variants: 256K native [/^qwen3-coder-.*$/, LIMITS['256k']], // Open-source Qwen3 2507 variants: 256K native @@ -131,6 +137,9 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [ // Qwen Vision Models [/^qwen-vl-max.*$/, LIMITS['128k']], + // Generic vision-model: same as qwen-vl-max (128K token context) + [/^vision-model$/, LIMITS['128k']], + // ------------------- // ByteDance Seed-OSS (512K) // ------------------- @@ -166,8 +175,20 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [ // Qwen3-Coder-Plus: 65,536 max output tokens [/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']], + // Generic coder-model: same as qwen3-coder-plus (64K max output tokens) + [/^coder-model$/, LIMITS['64k']], + + // Qwen3-Max-Preview: 65,536 max output tokens + [/^qwen3-max-preview(-.*)?$/, LIMITS['64k']], + // Qwen-VL-Max-Latest: 8,192 max output tokens [/^qwen-vl-max-latest$/, LIMITS['8k']], + + // Generic vision-model: same as qwen-vl-max-latest (8K max output tokens) + [/^vision-model$/, LIMITS['8k']], + + // Qwen3-VL-Plus: 8,192 max output tokens + [/^qwen3-vl-plus$/, LIMITS['8k']], ]; /** diff --git a/packages/core/src/subagents/subagent.test.ts b/packages/core/src/subagents/subagent.test.ts index 0388f3e5..eabd0a9d 100644 --- a/packages/core/src/subagents/subagent.test.ts +++ b/packages/core/src/subagents/subagent.test.ts @@ -72,6 +72,19 @@ async function createMockConfig( } as unknown as ToolRegistry; vi.spyOn(config, 'getToolRegistry').mockReturnValue(mockToolRegistry); + + // Mock getContentGeneratorConfig to return a valid config + vi.spyOn(config, 'getContentGeneratorConfig').mockReturnValue({ + model: DEFAULT_GEMINI_MODEL, + authType: AuthType.USE_GEMINI, + }); + + // Mock setModel method + vi.spyOn(config, 'setModel').mockResolvedValue(); + + // Mock getSessionId method + vi.spyOn(config, 'getSessionId').mockReturnValue('test-session'); + return { config, toolRegistry: mockToolRegistry }; } diff --git a/packages/core/src/subagents/subagent.ts b/packages/core/src/subagents/subagent.ts index 02cf0e73..19636b3c 100644 --- a/packages/core/src/subagents/subagent.ts +++ b/packages/core/src/subagents/subagent.ts @@ -826,7 +826,7 @@ export class SubAgentScope { ); if (this.modelConfig.model) { - this.runtimeContext.setModel(this.modelConfig.model); + await this.runtimeContext.setModel(this.modelConfig.model); } return new GeminiChat(