mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-01-21 00:06:18 +00:00
Compare commits
4 Commits
v0.7.2
...
feature/ad
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
88bd8ddffd | ||
|
|
af269e6474 | ||
|
|
ef769c49bf | ||
|
|
a684f07ff4 |
@@ -120,6 +120,7 @@ Settings are organized into categories. All settings should be placed within the
|
||||
"generationConfig": {
|
||||
"timeout": 60000,
|
||||
"disableCacheControl": false,
|
||||
"contextWindowSize": 128000,
|
||||
"customHeaders": {
|
||||
"X-Request-ID": "req-123",
|
||||
"X-User-ID": "user-456"
|
||||
@@ -134,6 +135,46 @@ Settings are organized into categories. All settings should be placed within the
|
||||
}
|
||||
```
|
||||
|
||||
**contextWindowSize:**
|
||||
|
||||
The `contextWindowSize` field allows you to manually override the automatic context window size detection. This is useful when you want to:
|
||||
|
||||
- **Optimize performance**: Limit context size to improve response speed
|
||||
- **Control costs**: Reduce token usage to lower API call costs
|
||||
- **Handle specific requirements**: Set a custom limit when automatic detection doesn't match your needs
|
||||
- **Testing scenarios**: Use smaller context windows in test environments
|
||||
|
||||
**Values:**
|
||||
|
||||
- `-1` (default): Use automatic detection based on the model's capabilities
|
||||
- Positive number: Manually specify the context window size in tokens (e.g., `128000` for 128k tokens)
|
||||
|
||||
**Example with contextWindowSize:**
|
||||
|
||||
```json
|
||||
{
|
||||
"model": {
|
||||
"generationConfig": {
|
||||
"contextWindowSize": 128000 // Override to 128k tokens
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Or use `-1` for automatic detection:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": {
|
||||
"generationConfig": {
|
||||
"contextWindowSize": -1 // Auto-detect based on model (default)
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Priority:** User-configured `contextWindowSize` > Automatic detection > Default value
|
||||
|
||||
The `customHeaders` field allows you to add custom HTTP headers to all API requests. This is useful for request tracing, monitoring, API gateway routing, or when different models require different headers. If `customHeaders` is defined in `modelProviders[].generationConfig.customHeaders`, it will be used directly; otherwise, headers from `model.generationConfig.customHeaders` will be used. No merging occurs between the two levels.
|
||||
|
||||
**model.openAILoggingDir examples:**
|
||||
@@ -274,7 +315,7 @@ If you are experiencing performance issues with file searching (e.g., with `@` c
|
||||
| `tools.truncateToolOutputThreshold` | number | Truncate tool output if it is larger than this many characters. Applies to Shell, Grep, Glob, ReadFile and ReadManyFiles tools. | `25000` | Requires restart: Yes |
|
||||
| `tools.truncateToolOutputLines` | number | Maximum lines or entries kept when truncating tool output. Applies to Shell, Grep, Glob, ReadFile and ReadManyFiles tools. | `1000` | Requires restart: Yes |
|
||||
| `tools.autoAccept` | boolean | Controls whether the CLI automatically accepts and executes tool calls that are considered safe (e.g., read-only operations) without explicit user confirmation. If set to `true`, the CLI will bypass the confirmation prompt for tools deemed safe. | `false` | |
|
||||
| `tools.experimental.skills` | boolean | Enable experimental Agent Skills feature | `false` | |
|
||||
| `tools.experimental.skills` | boolean | Enable experimental Agent Skills feature | `false` | |
|
||||
|
||||
#### mcp
|
||||
|
||||
|
||||
@@ -690,6 +690,18 @@ const SETTINGS_SCHEMA = {
|
||||
{ value: 'openapi_30', label: 'OpenAPI 3.0 Strict' },
|
||||
],
|
||||
},
|
||||
contextWindowSize: {
|
||||
type: 'number',
|
||||
label: 'Context Window Size',
|
||||
category: 'Generation Configuration',
|
||||
requiresRestart: false,
|
||||
default: -1,
|
||||
description:
|
||||
'Override the automatic context window size detection. Set to -1 to use automatic detection based on the model. Set to a positive number to use a custom context window size.',
|
||||
parentKey: 'generationConfig',
|
||||
childKey: 'contextWindowSize',
|
||||
showInDialog: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -6,18 +6,22 @@
|
||||
|
||||
import { Text } from 'ink';
|
||||
import { theme } from '../semantic-colors.js';
|
||||
import { tokenLimit } from '@qwen-code/qwen-code-core';
|
||||
import { tokenLimit, type Config } from '@qwen-code/qwen-code-core';
|
||||
|
||||
export const ContextUsageDisplay = ({
|
||||
promptTokenCount,
|
||||
model,
|
||||
terminalWidth,
|
||||
config,
|
||||
}: {
|
||||
promptTokenCount: number;
|
||||
model: string;
|
||||
terminalWidth: number;
|
||||
config: Config;
|
||||
}) => {
|
||||
const percentage = promptTokenCount / tokenLimit(model);
|
||||
const contentGeneratorConfig = config.getContentGeneratorConfig();
|
||||
const contextLimit = tokenLimit(model, 'input', contentGeneratorConfig);
|
||||
const percentage = promptTokenCount / contextLimit;
|
||||
const percentageLeft = ((1 - percentage) * 100).toFixed(0);
|
||||
|
||||
const label = terminalWidth < 100 ? '%' : '% context left';
|
||||
|
||||
@@ -43,6 +43,7 @@ const createMockConfig = (overrides = {}) => ({
|
||||
getModel: vi.fn(() => defaultProps.model),
|
||||
getTargetDir: vi.fn(() => defaultProps.targetDir),
|
||||
getDebugMode: vi.fn(() => false),
|
||||
getContentGeneratorConfig: vi.fn(() => ({})),
|
||||
...overrides,
|
||||
});
|
||||
|
||||
|
||||
@@ -146,6 +146,7 @@ export const Footer: React.FC = () => {
|
||||
promptTokenCount={promptTokenCount}
|
||||
model={model}
|
||||
terminalWidth={terminalWidth}
|
||||
config={config}
|
||||
/>
|
||||
</Text>
|
||||
{showMemoryUsage && <MemoryUsageDisplay />}
|
||||
|
||||
@@ -91,6 +91,9 @@ export type ContentGeneratorConfig = {
|
||||
userAgent?: string;
|
||||
// Schema compliance mode for tool definitions
|
||||
schemaCompliance?: 'auto' | 'openapi_30';
|
||||
// Context window size override. If set to a positive number, it will override
|
||||
// the automatic detection. Set to -1 to use automatic detection.
|
||||
contextWindowSize?: number;
|
||||
// Custom HTTP headers to be sent with requests
|
||||
customHeaders?: Record<string, string>;
|
||||
};
|
||||
|
||||
@@ -224,14 +224,29 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
|
||||
* or output generation based on the model and token type. It uses the same
|
||||
* normalization logic for consistency across both input and output limits.
|
||||
*
|
||||
* If a contentGeneratorConfig is provided with a contextWindowSize > 0, that value
|
||||
* will be used for input token limits instead of the automatic detection.
|
||||
*
|
||||
* @param model - The model name to get the token limit for
|
||||
* @param type - The type of token limit ('input' for context window, 'output' for generation)
|
||||
* @param contentGeneratorConfig - Optional config that may contain a contextWindowSize override
|
||||
* @returns The maximum number of tokens allowed for this model and type
|
||||
*/
|
||||
export function tokenLimit(
|
||||
model: Model,
|
||||
type: TokenLimitType = 'input',
|
||||
contentGeneratorConfig?: { contextWindowSize?: number },
|
||||
): TokenCount {
|
||||
// If user configured a specific context window size for input, use it
|
||||
const configuredLimit = contentGeneratorConfig?.contextWindowSize;
|
||||
if (
|
||||
type === 'input' &&
|
||||
configuredLimit !== undefined &&
|
||||
configuredLimit > 0
|
||||
) {
|
||||
return configuredLimit;
|
||||
}
|
||||
|
||||
const norm = normalize(model);
|
||||
|
||||
// Choose the appropriate patterns based on token type
|
||||
|
||||
@@ -25,6 +25,7 @@ export const MODEL_GENERATION_CONFIG_FIELDS = [
|
||||
'disableCacheControl',
|
||||
'schemaCompliance',
|
||||
'reasoning',
|
||||
'contextWindowSize',
|
||||
'customHeaders',
|
||||
] as const satisfies ReadonlyArray<keyof ContentGeneratorConfig>;
|
||||
|
||||
|
||||
@@ -118,6 +118,7 @@ describe('ChatCompressionService', () => {
|
||||
mockConfig = {
|
||||
getChatCompression: vi.fn(),
|
||||
getContentGenerator: vi.fn(),
|
||||
getContentGeneratorConfig: vi.fn().mockReturnValue({}),
|
||||
} as unknown as Config;
|
||||
|
||||
vi.mocked(tokenLimit).mockReturnValue(1000);
|
||||
|
||||
@@ -110,7 +110,9 @@ export class ChatCompressionService {
|
||||
|
||||
// Don't compress if not forced and we are under the limit.
|
||||
if (!force) {
|
||||
if (originalTokenCount < threshold * tokenLimit(model)) {
|
||||
const contentGeneratorConfig = config.getContentGeneratorConfig();
|
||||
const contextLimit = tokenLimit(model, 'input', contentGeneratorConfig);
|
||||
if (originalTokenCount < threshold * contextLimit) {
|
||||
return {
|
||||
newHistory: null,
|
||||
info: {
|
||||
|
||||
Reference in New Issue
Block a user