Compare commits

...

4 Commits

Author SHA1 Message Date
xwj02155382
88bd8ddffd docs: add contextWindowSize configuration documentation 2026-01-19 15:29:23 +08:00
xwj02155382
af269e6474 fix: add getContentGeneratorConfig mock to Footer.test.tsx 2026-01-19 14:51:05 +08:00
xwj02155382
ef769c49bf Merge branch 'main' of github.com:QwenLM/qwen-code into feature/add-context-window-size-config 2026-01-19 14:37:17 +08:00
xwj02155382
a684f07ff4 refactor: move contextWindowSize to ContentGeneratorConfig
- Add contextWindowSize field to ContentGeneratorConfig interface
- Update tokenLimit function to accept contentGeneratorConfig parameter
- Implement priority logic: user config > auto-detection
- Update chatCompressionService to use new API via getContentGeneratorConfig()
- Add contextWindowSize to MODEL_GENERATION_CONFIG_FIELDS for config resolution
- Add contextWindowSize to CLI settings schema (model.generationConfig)
- Update UI components (Footer, ContextUsageDisplay) to use new config API
- Fix test mocks to include getContentGeneratorConfig method

This refactor avoids modifying 71+ test files by moving the config
to the generator level instead of the Config class level.

Modified files:
- packages/core/src/core/contentGenerator.ts
- packages/core/src/core/tokenLimits.ts
- packages/core/src/services/chatCompressionService.ts
- packages/core/src/services/chatCompressionService.test.ts
- packages/core/src/models/constants.ts
- packages/cli/src/config/settingsSchema.ts
- packages/cli/src/ui/components/ContextUsageDisplay.tsx
- packages/cli/src/ui/components/Footer.tsx
2026-01-19 14:21:05 +08:00
10 changed files with 85 additions and 4 deletions

View File

@@ -120,6 +120,7 @@ Settings are organized into categories. All settings should be placed within the
"generationConfig": {
"timeout": 60000,
"disableCacheControl": false,
"contextWindowSize": 128000,
"customHeaders": {
"X-Request-ID": "req-123",
"X-User-ID": "user-456"
@@ -134,6 +135,46 @@ Settings are organized into categories. All settings should be placed within the
}
```
**contextWindowSize:**
The `contextWindowSize` field allows you to manually override the automatic context window size detection. This is useful when you want to:
- **Optimize performance**: Limit context size to improve response speed
- **Control costs**: Reduce token usage to lower API call costs
- **Handle specific requirements**: Set a custom limit when automatic detection doesn't match your needs
- **Testing scenarios**: Use smaller context windows in test environments
**Values:**
- `-1` (default): Use automatic detection based on the model's capabilities
- Positive number: Manually specify the context window size in tokens (e.g., `128000` for 128k tokens)
**Example with contextWindowSize:**
```json
{
"model": {
"generationConfig": {
"contextWindowSize": 128000 // Override to 128k tokens
}
}
}
```
Or use `-1` for automatic detection:
```json
{
"model": {
"generationConfig": {
"contextWindowSize": -1 // Auto-detect based on model (default)
}
}
}
```
**Priority:** User-configured `contextWindowSize` > Automatic detection > Default value
The `customHeaders` field allows you to add custom HTTP headers to all API requests. This is useful for request tracing, monitoring, API gateway routing, or when different models require different headers. If `customHeaders` is defined in `modelProviders[].generationConfig.customHeaders`, it will be used directly; otherwise, headers from `model.generationConfig.customHeaders` will be used. No merging occurs between the two levels.
**model.openAILoggingDir examples:**
@@ -274,7 +315,7 @@ If you are experiencing performance issues with file searching (e.g., with `@` c
| `tools.truncateToolOutputThreshold` | number | Truncate tool output if it is larger than this many characters. Applies to Shell, Grep, Glob, ReadFile and ReadManyFiles tools. | `25000` | Requires restart: Yes |
| `tools.truncateToolOutputLines` | number | Maximum lines or entries kept when truncating tool output. Applies to Shell, Grep, Glob, ReadFile and ReadManyFiles tools. | `1000` | Requires restart: Yes |
| `tools.autoAccept` | boolean | Controls whether the CLI automatically accepts and executes tool calls that are considered safe (e.g., read-only operations) without explicit user confirmation. If set to `true`, the CLI will bypass the confirmation prompt for tools deemed safe. | `false` | |
| `tools.experimental.skills` | boolean | Enable experimental Agent Skills feature | `false` | |
| `tools.experimental.skills` | boolean | Enable experimental Agent Skills feature | `false` | |
#### mcp

View File

@@ -690,6 +690,18 @@ const SETTINGS_SCHEMA = {
{ value: 'openapi_30', label: 'OpenAPI 3.0 Strict' },
],
},
contextWindowSize: {
type: 'number',
label: 'Context Window Size',
category: 'Generation Configuration',
requiresRestart: false,
default: -1,
description:
'Override the automatic context window size detection. Set to -1 to use automatic detection based on the model. Set to a positive number to use a custom context window size.',
parentKey: 'generationConfig',
childKey: 'contextWindowSize',
showInDialog: true,
},
},
},
},

View File

@@ -6,18 +6,22 @@
import { Text } from 'ink';
import { theme } from '../semantic-colors.js';
import { tokenLimit } from '@qwen-code/qwen-code-core';
import { tokenLimit, type Config } from '@qwen-code/qwen-code-core';
export const ContextUsageDisplay = ({
promptTokenCount,
model,
terminalWidth,
config,
}: {
promptTokenCount: number;
model: string;
terminalWidth: number;
config: Config;
}) => {
const percentage = promptTokenCount / tokenLimit(model);
const contentGeneratorConfig = config.getContentGeneratorConfig();
const contextLimit = tokenLimit(model, 'input', contentGeneratorConfig);
const percentage = promptTokenCount / contextLimit;
const percentageLeft = ((1 - percentage) * 100).toFixed(0);
const label = terminalWidth < 100 ? '%' : '% context left';

View File

@@ -43,6 +43,7 @@ const createMockConfig = (overrides = {}) => ({
getModel: vi.fn(() => defaultProps.model),
getTargetDir: vi.fn(() => defaultProps.targetDir),
getDebugMode: vi.fn(() => false),
getContentGeneratorConfig: vi.fn(() => ({})),
...overrides,
});

View File

@@ -146,6 +146,7 @@ export const Footer: React.FC = () => {
promptTokenCount={promptTokenCount}
model={model}
terminalWidth={terminalWidth}
config={config}
/>
</Text>
{showMemoryUsage && <MemoryUsageDisplay />}

View File

@@ -91,6 +91,9 @@ export type ContentGeneratorConfig = {
userAgent?: string;
// Schema compliance mode for tool definitions
schemaCompliance?: 'auto' | 'openapi_30';
// Context window size override. If set to a positive number, it will override
// the automatic detection. Set to -1 to use automatic detection.
contextWindowSize?: number;
// Custom HTTP headers to be sent with requests
customHeaders?: Record<string, string>;
};

View File

@@ -224,14 +224,29 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
* or output generation based on the model and token type. It uses the same
* normalization logic for consistency across both input and output limits.
*
* If a contentGeneratorConfig is provided with a contextWindowSize > 0, that value
* will be used for input token limits instead of the automatic detection.
*
* @param model - The model name to get the token limit for
* @param type - The type of token limit ('input' for context window, 'output' for generation)
* @param contentGeneratorConfig - Optional config that may contain a contextWindowSize override
* @returns The maximum number of tokens allowed for this model and type
*/
export function tokenLimit(
model: Model,
type: TokenLimitType = 'input',
contentGeneratorConfig?: { contextWindowSize?: number },
): TokenCount {
// If user configured a specific context window size for input, use it
const configuredLimit = contentGeneratorConfig?.contextWindowSize;
if (
type === 'input' &&
configuredLimit !== undefined &&
configuredLimit > 0
) {
return configuredLimit;
}
const norm = normalize(model);
// Choose the appropriate patterns based on token type

View File

@@ -25,6 +25,7 @@ export const MODEL_GENERATION_CONFIG_FIELDS = [
'disableCacheControl',
'schemaCompliance',
'reasoning',
'contextWindowSize',
'customHeaders',
] as const satisfies ReadonlyArray<keyof ContentGeneratorConfig>;

View File

@@ -118,6 +118,7 @@ describe('ChatCompressionService', () => {
mockConfig = {
getChatCompression: vi.fn(),
getContentGenerator: vi.fn(),
getContentGeneratorConfig: vi.fn().mockReturnValue({}),
} as unknown as Config;
vi.mocked(tokenLimit).mockReturnValue(1000);

View File

@@ -110,7 +110,9 @@ export class ChatCompressionService {
// Don't compress if not forced and we are under the limit.
if (!force) {
if (originalTokenCount < threshold * tokenLimit(model)) {
const contentGeneratorConfig = config.getContentGeneratorConfig();
const contextLimit = tokenLimit(model, 'input', contentGeneratorConfig);
if (originalTokenCount < threshold * contextLimit) {
return {
newHistory: null,
info: {