feat: add yolo mode support to auto vision model switch (#652)

* feat: add yolo mode support to auto vision model switch

* feat: add cli args & env variables for switch behavoir

* fix: use dedicated model names and settings

* docs: add vision model instructions

* fix: failed test case

* fix: setModel failure
This commit is contained in:
Mingholy
2025-09-24 10:21:09 +08:00
committed by GitHub
parent 5ecb4a2430
commit 48d8587bf9
26 changed files with 1133 additions and 122 deletions

View File

@@ -1514,7 +1514,7 @@ describe('loadCliConfig model selection', () => {
argv,
);
expect(config.getModel()).toBe('qwen3-coder-plus');
expect(config.getModel()).toBe('coder-model');
});
it('always prefers model from argvs', async () => {

View File

@@ -82,6 +82,7 @@ export interface CliArgs {
includeDirectories: string[] | undefined;
tavilyApiKey: string | undefined;
screenReader: boolean | undefined;
vlmSwitchMode: string | undefined;
}
export async function parseArguments(settings: Settings): Promise<CliArgs> {
@@ -249,6 +250,13 @@ export async function parseArguments(settings: Settings): Promise<CliArgs> {
description: 'Enable screen reader mode for accessibility.',
default: false,
})
.option('vlm-switch-mode', {
type: 'string',
choices: ['once', 'session', 'persist'],
description:
'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). Overrides settings files.',
default: process.env['VLM_SWITCH_MODE'],
})
.check((argv) => {
if (argv.prompt && argv['promptInteractive']) {
throw new Error(
@@ -524,6 +532,9 @@ export async function loadCliConfig(
argv.screenReader !== undefined
? argv.screenReader
: (settings.ui?.accessibility?.screenReader ?? false);
const vlmSwitchMode =
argv.vlmSwitchMode || settings.experimental?.vlmSwitchMode;
return new Config({
sessionId,
embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL,
@@ -630,6 +641,7 @@ export async function loadCliConfig(
skipNextSpeakerCheck: settings.model?.skipNextSpeakerCheck,
enablePromptCompletion: settings.general?.enablePromptCompletion ?? false,
skipLoopDetection: settings.skipLoopDetection ?? false,
vlmSwitchMode,
});
}

View File

@@ -69,7 +69,11 @@ const MOCK_WORKSPACE_SETTINGS_PATH = pathActual.join(
);
// A more flexible type for test data that allows arbitrary properties.
type TestSettings = Settings & { [key: string]: unknown };
type TestSettings = Settings & {
[key: string]: unknown;
nested?: { [key: string]: unknown };
nestedObj?: { [key: string]: unknown };
};
vi.mock('fs', async (importOriginal) => {
// Get all the functions from the real 'fs' module
@@ -137,6 +141,9 @@ describe('Settings Loading and Merging', () => {
advanced: {
excludedEnvVars: [],
},
experimental: {},
contentGenerator: {},
systemPromptMappings: {},
extensions: {
disabled: [],
workspacesWithMigrationNudge: [],
@@ -197,6 +204,9 @@ describe('Settings Loading and Merging', () => {
advanced: {
excludedEnvVars: [],
},
experimental: {},
contentGenerator: {},
systemPromptMappings: {},
extensions: {
disabled: [],
workspacesWithMigrationNudge: [],
@@ -260,6 +270,9 @@ describe('Settings Loading and Merging', () => {
advanced: {
excludedEnvVars: [],
},
experimental: {},
contentGenerator: {},
systemPromptMappings: {},
extensions: {
disabled: [],
workspacesWithMigrationNudge: [],
@@ -320,6 +333,9 @@ describe('Settings Loading and Merging', () => {
advanced: {
excludedEnvVars: [],
},
experimental: {},
contentGenerator: {},
systemPromptMappings: {},
extensions: {
disabled: [],
workspacesWithMigrationNudge: [],
@@ -385,6 +401,9 @@ describe('Settings Loading and Merging', () => {
advanced: {
excludedEnvVars: [],
},
experimental: {},
contentGenerator: {},
systemPromptMappings: {},
extensions: {
disabled: [],
workspacesWithMigrationNudge: [],
@@ -477,6 +496,9 @@ describe('Settings Loading and Merging', () => {
advanced: {
excludedEnvVars: [],
},
experimental: {},
contentGenerator: {},
systemPromptMappings: {},
extensions: {
disabled: [],
workspacesWithMigrationNudge: [],
@@ -562,6 +584,9 @@ describe('Settings Loading and Merging', () => {
advanced: {
excludedEnvVars: [],
},
experimental: {},
contentGenerator: {},
systemPromptMappings: {},
extensions: {
disabled: [],
workspacesWithMigrationNudge: [],
@@ -691,6 +716,9 @@ describe('Settings Loading and Merging', () => {
'/system/dir',
],
},
experimental: {},
contentGenerator: {},
systemPromptMappings: {},
extensions: {
disabled: [],
workspacesWithMigrationNudge: [],
@@ -1431,6 +1459,9 @@ describe('Settings Loading and Merging', () => {
advanced: {
excludedEnvVars: [],
},
experimental: {},
contentGenerator: {},
systemPromptMappings: {},
extensions: {
disabled: [],
workspacesWithMigrationNudge: [],
@@ -1516,7 +1547,11 @@ describe('Settings Loading and Merging', () => {
'workspace_endpoint_from_env/api',
);
expect(
(settings.workspace.settings as TestSettings)['nested']['value'],
(
(settings.workspace.settings as TestSettings).nested as {
[key: string]: unknown;
}
)['value'],
).toBe('workspace_endpoint_from_env');
expect((settings.merged as TestSettings)['endpoint']).toBe(
'workspace_endpoint_from_env/api',
@@ -1766,19 +1801,39 @@ describe('Settings Loading and Merging', () => {
).toBeUndefined();
expect(
(settings.user.settings as TestSettings)['nestedObj']['nestedNull'],
(
(settings.user.settings as TestSettings).nestedObj as {
[key: string]: unknown;
}
)['nestedNull'],
).toBeNull();
expect(
(settings.user.settings as TestSettings)['nestedObj']['nestedBool'],
(
(settings.user.settings as TestSettings).nestedObj as {
[key: string]: unknown;
}
)['nestedBool'],
).toBe(true);
expect(
(settings.user.settings as TestSettings)['nestedObj']['nestedNum'],
(
(settings.user.settings as TestSettings).nestedObj as {
[key: string]: unknown;
}
)['nestedNum'],
).toBe(0);
expect(
(settings.user.settings as TestSettings)['nestedObj']['nestedString'],
(
(settings.user.settings as TestSettings).nestedObj as {
[key: string]: unknown;
}
)['nestedString'],
).toBe('literal');
expect(
(settings.user.settings as TestSettings)['nestedObj']['anotherEnv'],
(
(settings.user.settings as TestSettings).nestedObj as {
[key: string]: unknown;
}
)['anotherEnv'],
).toBe('env_string_nested_value');
delete process.env['MY_ENV_STRING'];
@@ -1864,6 +1919,9 @@ describe('Settings Loading and Merging', () => {
advanced: {
excludedEnvVars: [],
},
experimental: {},
contentGenerator: {},
systemPromptMappings: {},
extensions: {
disabled: [],
workspacesWithMigrationNudge: [],
@@ -2336,14 +2394,14 @@ describe('Settings Loading and Merging', () => {
vimMode: false,
},
model: {
maxSessionTurns: 0,
maxSessionTurns: -1,
},
context: {
includeDirectories: [],
},
security: {
folderTrust: {
enabled: null,
enabled: false,
},
},
};
@@ -2352,9 +2410,9 @@ describe('Settings Loading and Merging', () => {
expect(v1Settings).toEqual({
vimMode: false,
maxSessionTurns: 0,
maxSessionTurns: -1,
includeDirectories: [],
folderTrust: null,
folderTrust: false,
});
});

View File

@@ -396,6 +396,24 @@ function mergeSettings(
]),
],
},
experimental: {
...(systemDefaults.experimental || {}),
...(user.experimental || {}),
...(safeWorkspaceWithoutFolderTrust.experimental || {}),
...(system.experimental || {}),
},
contentGenerator: {
...(systemDefaults.contentGenerator || {}),
...(user.contentGenerator || {}),
...(safeWorkspaceWithoutFolderTrust.contentGenerator || {}),
...(system.contentGenerator || {}),
},
systemPromptMappings: {
...(systemDefaults.systemPromptMappings || {}),
...(user.systemPromptMappings || {}),
...(safeWorkspaceWithoutFolderTrust.systemPromptMappings || {}),
...(system.systemPromptMappings || {}),
},
extensions: {
...(systemDefaults.extensions || {}),
...(user.extensions || {}),

View File

@@ -746,11 +746,21 @@ export const SETTINGS_SCHEMA = {
label: 'Vision Model Preview',
category: 'Experimental',
requiresRestart: false,
default: false,
default: true,
description:
'Enable vision model support and auto-switching functionality. When disabled, vision models like qwen-vl-max-latest will be hidden and auto-switching will not occur.',
showInDialog: true,
},
vlmSwitchMode: {
type: 'string',
label: 'VLM Switch Mode',
category: 'Experimental',
requiresRestart: false,
default: undefined as string | undefined,
description:
'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). If not set, user will be prompted each time. This is a temporary experimental feature.',
showInDialog: false,
},
},
},