Vision model support for Qwen-OAuth (#525)

* refactor: openaiContentGenerator

* refactor: optimize stream handling

* refactor: re-organize refactored files

* fix: unit test cases

* feat: `/model` command for switching to vision model

* fix: lint error

* feat: add image tokenizer to fit vlm context window

* fix: lint and type errors

* feat: add `visionModelPreview` to control default visibility of vision models

* fix: remove deprecated files

* fix: align supported image formats with bailian doc
This commit is contained in:
Mingholy
2025-09-18 13:32:00 +08:00
committed by GitHub
parent 56808ac210
commit 761833c915
41 changed files with 4083 additions and 5336 deletions

View File

@@ -0,0 +1,246 @@
/**
* @license
* Copyright 2025 Qwen
* SPDX-License-Identifier: Apache-2.0
*/
import React from 'react';
import { render } from 'ink-testing-library';
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { ModelSelectionDialog } from './ModelSelectionDialog.js';
import type { AvailableModel } from '../models/availableModels.js';
import type { RadioSelectItem } from './shared/RadioButtonSelect.js';
// Mock the useKeypress hook
const mockUseKeypress = vi.hoisted(() => vi.fn());
vi.mock('../hooks/useKeypress.js', () => ({
useKeypress: mockUseKeypress,
}));
// Mock the RadioButtonSelect component
const mockRadioButtonSelect = vi.hoisted(() => vi.fn());
vi.mock('./shared/RadioButtonSelect.js', () => ({
RadioButtonSelect: mockRadioButtonSelect,
}));
describe('ModelSelectionDialog', () => {
const mockAvailableModels: AvailableModel[] = [
{ id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' },
{ id: 'qwen-vl-max-latest', label: 'qwen-vl-max', isVision: true },
{ id: 'gpt-4', label: 'GPT-4' },
];
const mockOnSelect = vi.fn();
const mockOnCancel = vi.fn();
beforeEach(() => {
vi.clearAllMocks();
// Mock RadioButtonSelect to return a simple div
mockRadioButtonSelect.mockReturnValue(
React.createElement('div', { 'data-testid': 'radio-select' }),
);
});
it('should setup escape key handler to call onCancel', () => {
render(
<ModelSelectionDialog
availableModels={mockAvailableModels}
currentModel="qwen3-coder-plus"
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
expect(mockUseKeypress).toHaveBeenCalledWith(expect.any(Function), {
isActive: true,
});
// Simulate escape key press
const keypressHandler = mockUseKeypress.mock.calls[0][0];
keypressHandler({ name: 'escape' });
expect(mockOnCancel).toHaveBeenCalled();
});
it('should not call onCancel for non-escape keys', () => {
render(
<ModelSelectionDialog
availableModels={mockAvailableModels}
currentModel="qwen3-coder-plus"
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
const keypressHandler = mockUseKeypress.mock.calls[0][0];
keypressHandler({ name: 'enter' });
expect(mockOnCancel).not.toHaveBeenCalled();
});
it('should set correct initial index for current model', () => {
render(
<ModelSelectionDialog
availableModels={mockAvailableModels}
currentModel="qwen-vl-max-latest"
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
expect(callArgs.initialIndex).toBe(1); // qwen-vl-max-latest is at index 1
});
it('should set initial index to 0 when current model is not found', () => {
render(
<ModelSelectionDialog
availableModels={mockAvailableModels}
currentModel="non-existent-model"
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
expect(callArgs.initialIndex).toBe(0);
});
it('should call onSelect when a model is selected', () => {
render(
<ModelSelectionDialog
availableModels={mockAvailableModels}
currentModel="qwen3-coder-plus"
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
expect(typeof callArgs.onSelect).toBe('function');
// Simulate selection
const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
onSelectCallback('qwen-vl-max-latest');
expect(mockOnSelect).toHaveBeenCalledWith('qwen-vl-max-latest');
});
it('should handle empty models array', () => {
render(
<ModelSelectionDialog
availableModels={[]}
currentModel=""
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
expect(callArgs.items).toEqual([]);
expect(callArgs.initialIndex).toBe(0);
});
it('should create correct option items with proper labels', () => {
render(
<ModelSelectionDialog
availableModels={mockAvailableModels}
currentModel="qwen3-coder-plus"
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
const expectedItems = [
{
label: 'qwen3-coder-plus (current)',
value: 'qwen3-coder-plus',
},
{
label: 'qwen-vl-max [Vision]',
value: 'qwen-vl-max-latest',
},
{
label: 'GPT-4',
value: 'gpt-4',
},
];
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
expect(callArgs.items).toEqual(expectedItems);
});
it('should show vision indicator for vision models', () => {
render(
<ModelSelectionDialog
availableModels={mockAvailableModels}
currentModel="gpt-4"
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
const visionModelItem = callArgs.items.find(
(item: RadioSelectItem<string>) => item.value === 'qwen-vl-max-latest',
);
expect(visionModelItem?.label).toContain('[Vision]');
});
it('should show current indicator for the current model', () => {
render(
<ModelSelectionDialog
availableModels={mockAvailableModels}
currentModel="qwen-vl-max-latest"
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
const currentModelItem = callArgs.items.find(
(item: RadioSelectItem<string>) => item.value === 'qwen-vl-max-latest',
);
expect(currentModelItem?.label).toContain('(current)');
});
it('should pass isFocused prop to RadioButtonSelect', () => {
render(
<ModelSelectionDialog
availableModels={mockAvailableModels}
currentModel="qwen3-coder-plus"
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
expect(callArgs.isFocused).toBe(true);
});
it('should handle multiple onSelect calls correctly', () => {
render(
<ModelSelectionDialog
availableModels={mockAvailableModels}
currentModel="qwen3-coder-plus"
onSelect={mockOnSelect}
onCancel={mockOnCancel}
/>,
);
const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
// Call multiple times
onSelectCallback('qwen3-coder-plus');
onSelectCallback('qwen-vl-max-latest');
onSelectCallback('gpt-4');
expect(mockOnSelect).toHaveBeenCalledTimes(3);
expect(mockOnSelect).toHaveBeenNthCalledWith(1, 'qwen3-coder-plus');
expect(mockOnSelect).toHaveBeenNthCalledWith(2, 'qwen-vl-max-latest');
expect(mockOnSelect).toHaveBeenNthCalledWith(3, 'gpt-4');
});
});

View File

@@ -0,0 +1,87 @@
/**
* @license
* Copyright 2025 Qwen
* SPDX-License-Identifier: Apache-2.0
*/
import type React from 'react';
import { Box, Text } from 'ink';
import { Colors } from '../colors.js';
import {
RadioButtonSelect,
type RadioSelectItem,
} from './shared/RadioButtonSelect.js';
import { useKeypress } from '../hooks/useKeypress.js';
import type { AvailableModel } from '../models/availableModels.js';
export interface ModelSelectionDialogProps {
availableModels: AvailableModel[];
currentModel: string;
onSelect: (modelId: string) => void;
onCancel: () => void;
}
export const ModelSelectionDialog: React.FC<ModelSelectionDialogProps> = ({
availableModels,
currentModel,
onSelect,
onCancel,
}) => {
useKeypress(
(key) => {
if (key.name === 'escape') {
onCancel();
}
},
{ isActive: true },
);
const options: Array<RadioSelectItem<string>> = availableModels.map(
(model) => {
const visionIndicator = model.isVision ? ' [Vision]' : '';
const currentIndicator = model.id === currentModel ? ' (current)' : '';
return {
label: `${model.label}${visionIndicator}${currentIndicator}`,
value: model.id,
};
},
);
const initialIndex = Math.max(
0,
availableModels.findIndex((model) => model.id === currentModel),
);
const handleSelect = (modelId: string) => {
onSelect(modelId);
};
return (
<Box
flexDirection="column"
borderStyle="round"
borderColor={Colors.AccentBlue}
padding={1}
width="100%"
marginLeft={1}
>
<Box flexDirection="column" marginBottom={1}>
<Text bold>Select Model</Text>
<Text>Choose a model for this session:</Text>
</Box>
<Box marginBottom={1}>
<RadioButtonSelect
items={options}
initialIndex={initialIndex}
onSelect={handleSelect}
isFocused
/>
</Box>
<Box>
<Text color={Colors.Gray}>Press Enter to select, Esc to cancel</Text>
</Box>
</Box>
);
};

View File

@@ -0,0 +1,185 @@
/**
* @license
* Copyright 2025 Qwen
* SPDX-License-Identifier: Apache-2.0
*/
import React from 'react';
import { render } from 'ink-testing-library';
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { ModelSwitchDialog, VisionSwitchOutcome } from './ModelSwitchDialog.js';
// Mock the useKeypress hook
const mockUseKeypress = vi.hoisted(() => vi.fn());
vi.mock('../hooks/useKeypress.js', () => ({
useKeypress: mockUseKeypress,
}));
// Mock the RadioButtonSelect component
const mockRadioButtonSelect = vi.hoisted(() => vi.fn());
vi.mock('./shared/RadioButtonSelect.js', () => ({
RadioButtonSelect: mockRadioButtonSelect,
}));
describe('ModelSwitchDialog', () => {
const mockOnSelect = vi.fn();
beforeEach(() => {
vi.clearAllMocks();
// Mock RadioButtonSelect to return a simple div
mockRadioButtonSelect.mockReturnValue(
React.createElement('div', { 'data-testid': 'radio-select' }),
);
});
it('should setup RadioButtonSelect with correct options', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const expectedItems = [
{
label: 'Switch for this request only',
value: VisionSwitchOutcome.SwitchOnce,
},
{
label: 'Switch session to vision model',
value: VisionSwitchOutcome.SwitchSessionToVL,
},
{
label: 'Do not switch, show guidance',
value: VisionSwitchOutcome.DisallowWithGuidance,
},
];
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
expect(callArgs.items).toEqual(expectedItems);
expect(callArgs.initialIndex).toBe(0);
expect(callArgs.isFocused).toBe(true);
});
it('should call onSelect when an option is selected', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
expect(typeof callArgs.onSelect).toBe('function');
// Simulate selection of "Switch for this request only"
const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
onSelectCallback(VisionSwitchOutcome.SwitchOnce);
expect(mockOnSelect).toHaveBeenCalledWith(VisionSwitchOutcome.SwitchOnce);
});
it('should call onSelect with SwitchSessionToVL when second option is selected', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
onSelectCallback(VisionSwitchOutcome.SwitchSessionToVL);
expect(mockOnSelect).toHaveBeenCalledWith(
VisionSwitchOutcome.SwitchSessionToVL,
);
});
it('should call onSelect with DisallowWithGuidance when third option is selected', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
expect(mockOnSelect).toHaveBeenCalledWith(
VisionSwitchOutcome.DisallowWithGuidance,
);
});
it('should setup escape key handler to call onSelect with DisallowWithGuidance', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
expect(mockUseKeypress).toHaveBeenCalledWith(expect.any(Function), {
isActive: true,
});
// Simulate escape key press
const keypressHandler = mockUseKeypress.mock.calls[0][0];
keypressHandler({ name: 'escape' });
expect(mockOnSelect).toHaveBeenCalledWith(
VisionSwitchOutcome.DisallowWithGuidance,
);
});
it('should not call onSelect for non-escape keys', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const keypressHandler = mockUseKeypress.mock.calls[0][0];
keypressHandler({ name: 'enter' });
expect(mockOnSelect).not.toHaveBeenCalled();
});
it('should set initial index to 0 (first option)', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
expect(callArgs.initialIndex).toBe(0);
});
describe('VisionSwitchOutcome enum', () => {
it('should have correct enum values', () => {
expect(VisionSwitchOutcome.SwitchOnce).toBe('switch_once');
expect(VisionSwitchOutcome.SwitchSessionToVL).toBe(
'switch_session_to_vl',
);
expect(VisionSwitchOutcome.DisallowWithGuidance).toBe(
'disallow_with_guidance',
);
});
});
it('should handle multiple onSelect calls correctly', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const onSelectCallback = mockRadioButtonSelect.mock.calls[0][0].onSelect;
// Call multiple times
onSelectCallback(VisionSwitchOutcome.SwitchOnce);
onSelectCallback(VisionSwitchOutcome.SwitchSessionToVL);
onSelectCallback(VisionSwitchOutcome.DisallowWithGuidance);
expect(mockOnSelect).toHaveBeenCalledTimes(3);
expect(mockOnSelect).toHaveBeenNthCalledWith(
1,
VisionSwitchOutcome.SwitchOnce,
);
expect(mockOnSelect).toHaveBeenNthCalledWith(
2,
VisionSwitchOutcome.SwitchSessionToVL,
);
expect(mockOnSelect).toHaveBeenNthCalledWith(
3,
VisionSwitchOutcome.DisallowWithGuidance,
);
});
it('should pass isFocused prop to RadioButtonSelect', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const callArgs = mockRadioButtonSelect.mock.calls[0][0];
expect(callArgs.isFocused).toBe(true);
});
it('should handle escape key multiple times', () => {
render(<ModelSwitchDialog onSelect={mockOnSelect} />);
const keypressHandler = mockUseKeypress.mock.calls[0][0];
// Call escape multiple times
keypressHandler({ name: 'escape' });
keypressHandler({ name: 'escape' });
expect(mockOnSelect).toHaveBeenCalledTimes(2);
expect(mockOnSelect).toHaveBeenCalledWith(
VisionSwitchOutcome.DisallowWithGuidance,
);
});
});

View File

@@ -0,0 +1,89 @@
/**
* @license
* Copyright 2025 Qwen
* SPDX-License-Identifier: Apache-2.0
*/
import type React from 'react';
import { Box, Text } from 'ink';
import { Colors } from '../colors.js';
import {
RadioButtonSelect,
type RadioSelectItem,
} from './shared/RadioButtonSelect.js';
import { useKeypress } from '../hooks/useKeypress.js';
export enum VisionSwitchOutcome {
SwitchOnce = 'switch_once',
SwitchSessionToVL = 'switch_session_to_vl',
DisallowWithGuidance = 'disallow_with_guidance',
}
export interface ModelSwitchDialogProps {
onSelect: (outcome: VisionSwitchOutcome) => void;
}
export const ModelSwitchDialog: React.FC<ModelSwitchDialogProps> = ({
onSelect,
}) => {
useKeypress(
(key) => {
if (key.name === 'escape') {
onSelect(VisionSwitchOutcome.DisallowWithGuidance);
}
},
{ isActive: true },
);
const options: Array<RadioSelectItem<VisionSwitchOutcome>> = [
{
label: 'Switch for this request only',
value: VisionSwitchOutcome.SwitchOnce,
},
{
label: 'Switch session to vision model',
value: VisionSwitchOutcome.SwitchSessionToVL,
},
{
label: 'Do not switch, show guidance',
value: VisionSwitchOutcome.DisallowWithGuidance,
},
];
const handleSelect = (outcome: VisionSwitchOutcome) => {
onSelect(outcome);
};
return (
<Box
flexDirection="column"
borderStyle="round"
borderColor={Colors.AccentYellow}
padding={1}
width="100%"
marginLeft={1}
>
<Box flexDirection="column" marginBottom={1}>
<Text bold>Vision Model Switch Required</Text>
<Text>
Your message contains an image, but the current model doesn&apos;t
support vision.
</Text>
<Text>How would you like to proceed?</Text>
</Box>
<Box marginBottom={1}>
<RadioButtonSelect
items={options}
initialIndex={0}
onSelect={handleSelect}
isFocused
/>
</Box>
<Box>
<Text color={Colors.Gray}>Press Enter to select, Esc to cancel</Text>
</Box>
</Box>
);
};