feat: add yolo mode support to auto vision model switch (#652)

* feat: add yolo mode support to auto vision model switch

* feat: add cli args & env variables for switch behavoir

* fix: use dedicated model names and settings

* docs: add vision model instructions

* fix: failed test case

* fix: setModel failure
This commit is contained in:
Mingholy
2025-09-24 10:21:09 +08:00
committed by GitHub
parent 5ecb4a2430
commit 48d8587bf9
26 changed files with 1133 additions and 122 deletions

View File

@@ -82,6 +82,7 @@ export interface CliArgs {
includeDirectories: string[] | undefined;
tavilyApiKey: string | undefined;
screenReader: boolean | undefined;
vlmSwitchMode: string | undefined;
}
export async function parseArguments(settings: Settings): Promise<CliArgs> {
@@ -249,6 +250,13 @@ export async function parseArguments(settings: Settings): Promise<CliArgs> {
description: 'Enable screen reader mode for accessibility.',
default: false,
})
.option('vlm-switch-mode', {
type: 'string',
choices: ['once', 'session', 'persist'],
description:
'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). Overrides settings files.',
default: process.env['VLM_SWITCH_MODE'],
})
.check((argv) => {
if (argv.prompt && argv['promptInteractive']) {
throw new Error(
@@ -524,6 +532,9 @@ export async function loadCliConfig(
argv.screenReader !== undefined
? argv.screenReader
: (settings.ui?.accessibility?.screenReader ?? false);
const vlmSwitchMode =
argv.vlmSwitchMode || settings.experimental?.vlmSwitchMode;
return new Config({
sessionId,
embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL,
@@ -630,6 +641,7 @@ export async function loadCliConfig(
skipNextSpeakerCheck: settings.model?.skipNextSpeakerCheck,
enablePromptCompletion: settings.general?.enablePromptCompletion ?? false,
skipLoopDetection: settings.skipLoopDetection ?? false,
vlmSwitchMode,
});
}