Update: compress/hard constrained token usage (#136)

* setup truncation & folder structure

* fix: xml in prompt; qwen code in stats page

* fix: clear & continue logic

* preflight

* add maxSessionLimit in README
This commit is contained in:
Fan
2025-07-30 18:14:24 +08:00
committed by GitHub
parent a08bcb2f41
commit df5c4e8079
29 changed files with 1117 additions and 386 deletions

View File

@@ -382,6 +382,8 @@ export async function loadCliConfig(
model: argv.model!,
extensionContextFilePaths,
maxSessionTurns: settings.maxSessionTurns ?? -1,
sessionTokenLimit: settings.sessionTokenLimit ?? 32000,
maxFolderItems: settings.maxFolderItems ?? 20,
listExtensions: argv.listExtensions || false,
activeExtensions: activeExtensions.map((e) => ({
name: e.config.name,

View File

@@ -85,6 +85,12 @@ export interface Settings {
// Setting for setting maximum number of user/model/tool turns in a session.
maxSessionTurns?: number;
// Setting for maximum token limit for conversation history before blocking requests
sessionTokenLimit?: number;
// Setting for maximum number of files and folders to show in folder structure
maxFolderItems?: number;
// Sampling parameters for content generation
sampling_params?: {
top_p?: number;

View File

@@ -60,7 +60,9 @@ export const createMockCommandContext = (
byName: {},
},
},
promptCount: 0,
} as SessionStatsState,
resetSession: vi.fn(),
},
};

View File

@@ -43,17 +43,22 @@ describe('clearCommand', () => {
expect(mockResetChat).toHaveBeenCalledTimes(1);
expect(mockContext.session.resetSession).toHaveBeenCalledTimes(1);
expect(mockContext.ui.clear).toHaveBeenCalledTimes(1);
// Check the order of operations.
const setDebugMessageOrder = (mockContext.ui.setDebugMessage as Mock).mock
.invocationCallOrder[0];
const resetChatOrder = mockResetChat.mock.invocationCallOrder[0];
const resetSessionOrder = (mockContext.session.resetSession as Mock).mock
.invocationCallOrder[0];
const clearOrder = (mockContext.ui.clear as Mock).mock
.invocationCallOrder[0];
expect(setDebugMessageOrder).toBeLessThan(resetChatOrder);
expect(resetChatOrder).toBeLessThan(clearOrder);
expect(resetChatOrder).toBeLessThan(resetSessionOrder);
expect(resetSessionOrder).toBeLessThan(clearOrder);
});
it('should not attempt to reset chat if config service is not available', async () => {
@@ -73,6 +78,7 @@ describe('clearCommand', () => {
'Clearing terminal and resetting chat.',
);
expect(mockResetChat).not.toHaveBeenCalled();
expect(nullConfigContext.session.resetSession).toHaveBeenCalledTimes(1);
expect(nullConfigContext.ui.clear).toHaveBeenCalledTimes(1);
});
});

View File

@@ -12,6 +12,7 @@ export const clearCommand: SlashCommand = {
action: async (context, _args) => {
context.ui.setDebugMessage('Clearing terminal and resetting chat.');
await context.services.config?.getGeminiClient()?.resetChat();
context.session.resetSession();
context.ui.clear();
},
};

View File

@@ -38,6 +38,7 @@ export interface CommandContext {
// Session-specific data
session: {
stats: SessionStatsState;
resetSession: () => void;
};
}

View File

@@ -36,7 +36,7 @@ export const AboutBox: React.FC<AboutBoxProps> = ({
>
<Box marginBottom={1}>
<Text bold color={Colors.AccentPurple}>
About Gemini CLI
About Qwen Code
</Text>
</Box>
<Box flexDirection="row">

View File

@@ -63,7 +63,7 @@ describe('<HistoryItemDisplay />', () => {
const { lastFrame } = render(
<HistoryItemDisplay {...baseItem} item={item} />,
);
expect(lastFrame()).toContain('About Gemini CLI');
expect(lastFrame()).toContain('About Qwen Code');
});
it('renders ModelStatsDisplay for "model_stats" type', () => {

View File

@@ -50,6 +50,7 @@ interface SessionStatsContextValue {
stats: SessionStatsState;
startNewPrompt: () => void;
getPromptCount: () => number;
resetSession: () => void;
}
// --- Context Definition ---
@@ -109,13 +110,23 @@ export const SessionStatsProvider: React.FC<{ children: React.ReactNode }> = ({
[stats.promptCount],
);
const resetSession = useCallback(() => {
setStats({
sessionStartTime: new Date(),
metrics: uiTelemetryService.getMetrics(),
lastPromptTokenCount: uiTelemetryService.getLastPromptTokenCount(),
promptCount: 0,
});
}, []);
const value = useMemo(
() => ({
stats,
startNewPrompt,
getPromptCount,
resetSession,
}),
[stats, startNewPrompt, getPromptCount],
[stats, startNewPrompt, getPromptCount, resetSession],
);
return (

View File

@@ -172,6 +172,7 @@ export const useSlashCommandProcessor = (
},
session: {
stats: session.stats,
resetSession: session.resetSession,
},
}),
[
@@ -183,6 +184,7 @@ export const useSlashCommandProcessor = (
clearItems,
refreshStatic,
session.stats,
session.resetSession,
onDebugMessage,
],
);

View File

@@ -452,6 +452,23 @@ export const useGeminiStream = (
[addItem, config],
);
const handleSessionTokenLimitExceededEvent = useCallback(
(value: { currentTokens: number; limit: number; message: string }) =>
addItem(
{
type: 'error',
text:
`🚫 Session token limit exceeded: ${value.currentTokens.toLocaleString()} tokens > ${value.limit.toLocaleString()} limit.\n\n` +
`💡 Solutions:\n` +
` • Start a new session: Use /clear command\n` +
` • Increase limit: Add "sessionTokenLimit": (e.g., 128000) to your settings.json\n` +
` • Compress history: Use /compress command to compress history`,
},
Date.now(),
),
[addItem],
);
const handleLoopDetectedEvent = useCallback(() => {
addItem(
{
@@ -501,6 +518,9 @@ export const useGeminiStream = (
case ServerGeminiEventType.MaxSessionTurns:
handleMaxSessionTurnsEvent();
break;
case ServerGeminiEventType.SessionTokenLimitExceeded:
handleSessionTokenLimitExceededEvent(event.value);
break;
case ServerGeminiEventType.LoopDetected:
// handle later because we want to move pending history to history
// before we add loop detected message to history
@@ -525,6 +545,7 @@ export const useGeminiStream = (
scheduleToolCalls,
handleChatCompressionEvent,
handleMaxSessionTurnsEvent,
handleSessionTokenLimitExceededEvent,
],
);