diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index 279a7e2b..b05f1245 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -4,12 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { Config , - AuthType} from '@qwen-code/qwen-code-core'; -import { - InputFormat, - logUserPrompt, -} from '@qwen-code/qwen-code-core'; +import type { Config, AuthType } from '@qwen-code/qwen-code-core'; +import { InputFormat, logUserPrompt } from '@qwen-code/qwen-code-core'; import { render } from 'ink'; import dns from 'node:dns'; import os from 'node:os'; diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 5c871ea6..7154e6aa 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -769,11 +769,17 @@ export const useGeminiStream = ( for await (const event of stream) { switch (event.type) { case ServerGeminiEventType.Thought: - thoughtBuffer = handleThoughtEvent( - event.value, - thoughtBuffer, - userMessageTimestamp, - ); + // If the thought has a subject, it's a discrete status update rather than + // a streamed textual thought, so we update the thought state directly. + if (event.value.subject) { + setThought(event.value); + } else { + thoughtBuffer = handleThoughtEvent( + event.value, + thoughtBuffer, + userMessageTimestamp, + ); + } break; case ServerGeminiEventType.Content: geminiMessageBuffer = handleContentEvent( @@ -844,6 +850,7 @@ export const useGeminiStream = ( handleMaxSessionTurnsEvent, handleSessionTokenLimitExceededEvent, handleCitationEvent, + setThought, ], ); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index b59c4017..86a21ef2 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -16,8 +16,8 @@ import { ProxyAgent, setGlobalDispatcher } from 'undici'; import type { ContentGenerator, ContentGeneratorConfig, - - AuthType} from '../core/contentGenerator.js'; + AuthType, +} from '../core/contentGenerator.js'; import type { FallbackModelHandler } from '../fallback/types.js'; import type { MCPOAuthConfig } from '../mcp/oauth-provider.js'; import type { ShellExecutionConfig } from '../services/shellExecutionService.js'; diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index 52886467..8ba85160 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -32,6 +32,8 @@ export interface ContentGenerator { countTokens(request: CountTokensParameters): Promise; embedContent(request: EmbedContentParameters): Promise; + + useSummarizedThinking(): boolean; } export enum AuthType { diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index 0b8176ff..b849fdbf 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -100,6 +100,7 @@ describe('GeminiChat', () => { countTokens: vi.fn(), embedContent: vi.fn(), batchEmbedContents: vi.fn(), + useSummarizedThinking: vi.fn().mockReturnValue(false), } as unknown as ContentGenerator; mockHandleFallback.mockClear(); @@ -718,6 +719,99 @@ describe('GeminiChat', () => { 1, ); }); + + it('should handle summarized thinking by conditionally including thoughts in history', async () => { + // Case 1: useSummarizedThinking is true -> thoughts NOT in history + vi.mocked(mockContentGenerator.useSummarizedThinking).mockReturnValue( + true, + ); + const stream1 = (async function* () { + yield { + candidates: [ + { + content: { + role: 'model', + parts: [{ thought: true, text: 'T1' }, { text: 'A1' }], + }, + finishReason: 'STOP', + }, + ], + } as unknown as GenerateContentResponse; + })(); + vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( + stream1, + ); + + const res1 = await chat.sendMessageStream('m1', { message: 'h1' }, 'p1'); + for await (const _ of res1); + + const history1 = chat.getHistory(); + expect(history1[1].parts).toEqual([{ text: 'A1' }]); + + // Case 2: useSummarizedThinking is false -> thoughts ARE in history + chat.clearHistory(); + vi.mocked(mockContentGenerator.useSummarizedThinking).mockReturnValue( + false, + ); + const stream2 = (async function* () { + yield { + candidates: [ + { + content: { + role: 'model', + parts: [{ thought: true, text: 'T2' }, { text: 'A2' }], + }, + finishReason: 'STOP', + }, + ], + } as unknown as GenerateContentResponse; + })(); + vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( + stream2, + ); + + const res2 = await chat.sendMessageStream('m1', { message: 'h1' }, 'p2'); + for await (const _ of res2); + + const history2 = chat.getHistory(); + expect(history2[1].parts).toEqual([ + { text: 'T2', thought: true }, + { text: 'A2' }, + ]); + }); + + it('should keep parts with thoughtSignature when consolidating history', async () => { + const stream = (async function* () { + yield { + candidates: [ + { + content: { + role: 'model', + parts: [ + { + text: 'p1', + thoughtSignature: 's1', + } as unknown as { text: string; thoughtSignature: string }, + ], + }, + finishReason: 'STOP', + }, + ], + } as unknown as GenerateContentResponse; + })(); + vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue( + stream, + ); + + const res = await chat.sendMessageStream('m1', { message: 'h1' }, 'p1'); + for await (const _ of res); + + const history = chat.getHistory(); + expect(history[1].parts![0]).toEqual({ + text: 'p1', + thoughtSignature: 's1', + }); + }); }); describe('addHistory', () => { @@ -1532,7 +1626,7 @@ describe('GeminiChat', () => { }); describe('stripThoughtsFromHistory', () => { - it('should strip thought signatures', () => { + it('should strip thoughts and thought signatures, and remove empty content objects', () => { chat.setHistory([ { role: 'user', @@ -1544,10 +1638,15 @@ describe('GeminiChat', () => { { text: 'thinking...', thought: true }, { text: 'hi' }, { - functionCall: { name: 'test', args: {} }, - }, + text: 'hidden metadata', + thoughtSignature: 'abc', + } as unknown as { text: string; thoughtSignature: string }, ], }, + { + role: 'model', + parts: [{ text: 'only thinking', thought: true }], + }, ]); chat.stripThoughtsFromHistory(); @@ -1559,7 +1658,7 @@ describe('GeminiChat', () => { }, { role: 'model', - parts: [{ text: 'hi' }, { functionCall: { name: 'test', args: {} } }], + parts: [{ text: 'hi' }, { text: 'hidden metadata' }], }, ]); }); diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index e9e4fcc2..128156eb 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -109,18 +109,24 @@ function isValidContent(content: Content): boolean { if (part === undefined || Object.keys(part).length === 0) { return false; } - if ( - !part.thought && - part.text !== undefined && - part.text === '' && - part.functionCall === undefined - ) { + if (!isValidContentPart(part)) { return false; } } return true; } +function isValidContentPart(part: Part): boolean { + const isInvalid = + !part.thought && + !part.thoughtSignature && + part.text !== undefined && + part.text === '' && + part.functionCall === undefined; + + return !isInvalid; +} + /** * Validates the history contains the correct roles. * @@ -448,15 +454,29 @@ export class GeminiChat { if (!content.parts) return content; // Filter out thought parts entirely - const filteredParts = content.parts.filter( - (part) => - !( + const filteredParts = content.parts + .filter( + (part) => + !( + part && + typeof part === 'object' && + 'thought' in part && + part.thought + ), + ) + .map((part) => { + if ( part && typeof part === 'object' && - 'thought' in part && - part.thought - ), - ); + 'thoughtSignature' in part + ) { + const newPart = { ...part }; + delete (newPart as { thoughtSignature?: string }) + .thoughtSignature; + return newPart; + } + return part; + }); return { ...content, @@ -538,11 +558,15 @@ export class GeminiChat { yield chunk; // Yield every chunk to the UI immediately. } - const thoughtParts = allModelParts.filter((part) => part.thought); - const thoughtText = thoughtParts - .map((part) => part.text) - .join('') - .trim(); + let thoughtText = ''; + // Only include thoughts if not using summarized thinking. + if (!this.config.getContentGenerator().useSummarizedThinking()) { + thoughtText = allModelParts + .filter((part) => part.thought) + .map((part) => part.text) + .join('') + .trim(); + } const contentParts = allModelParts.filter((part) => !part.thought); const consolidatedHistoryParts: Part[] = []; @@ -555,7 +579,7 @@ export class GeminiChat { isValidNonThoughtTextPart(part) ) { lastPart.text += part.text; - } else { + } else if (isValidContentPart(part)) { consolidatedHistoryParts.push(part); } } diff --git a/packages/core/src/core/geminiContentGenerator/geminiContentGenerator.ts b/packages/core/src/core/geminiContentGenerator/geminiContentGenerator.ts index eca580c8..57c7cad6 100644 --- a/packages/core/src/core/geminiContentGenerator/geminiContentGenerator.ts +++ b/packages/core/src/core/geminiContentGenerator/geminiContentGenerator.ts @@ -137,4 +137,8 @@ export class GeminiContentGenerator implements ContentGenerator { ): Promise { return this.googleGenAI.models.embedContent(request); } + + useSummarizedThinking(): boolean { + return true; + } } diff --git a/packages/core/src/core/geminiContentGenerator/loggingContentGenerator.ts b/packages/core/src/core/geminiContentGenerator/loggingContentGenerator.ts index 5bffff8c..60d0fc24 100644 --- a/packages/core/src/core/geminiContentGenerator/loggingContentGenerator.ts +++ b/packages/core/src/core/geminiContentGenerator/loggingContentGenerator.ts @@ -209,6 +209,10 @@ export class LoggingContentGenerator implements ContentGenerator { return this.wrapped.embedContent(req); } + useSummarizedThinking(): boolean { + return this.wrapped.useSummarizedThinking(); + } + private toContents(contents: ContentListUnion): Content[] { if (Array.isArray(contents)) { // it's a Content[] or a PartsUnion[] diff --git a/packages/core/src/core/openaiContentGenerator/openaiContentGenerator.ts b/packages/core/src/core/openaiContentGenerator/openaiContentGenerator.ts index ae1f43e5..4dae3f19 100644 --- a/packages/core/src/core/openaiContentGenerator/openaiContentGenerator.ts +++ b/packages/core/src/core/openaiContentGenerator/openaiContentGenerator.ts @@ -154,4 +154,8 @@ export class OpenAIContentGenerator implements ContentGenerator { ); } } + + useSummarizedThinking(): boolean { + return false; + } } diff --git a/packages/core/src/core/turn.ts b/packages/core/src/core/turn.ts index edd9b24e..13cb7373 100644 --- a/packages/core/src/core/turn.ts +++ b/packages/core/src/core/turn.ts @@ -27,7 +27,11 @@ import { toFriendlyError, } from '../utils/errors.js'; import type { GeminiChat } from './geminiChat.js'; -import { getThoughtText, type ThoughtSummary } from '../utils/thoughtUtils.js'; +import { + getThoughtText, + parseThought, + type ThoughtSummary, +} from '../utils/thoughtUtils.js'; // Define a structure for tools passed to the server export interface ServerTool { @@ -266,11 +270,11 @@ export class Turn { this.currentResponseId = resp.responseId; } - const thoughtPart = getThoughtText(resp); - if (thoughtPart) { + const thoughtText = getThoughtText(resp); + if (thoughtText) { yield { type: GeminiEventType.Thought, - value: { subject: '', description: thoughtPart }, + value: parseThought(thoughtText), }; continue; } diff --git a/packages/core/src/utils/nextSpeakerChecker.test.ts b/packages/core/src/utils/nextSpeakerChecker.test.ts index 3cdb8628..e7b7bfa3 100644 --- a/packages/core/src/utils/nextSpeakerChecker.test.ts +++ b/packages/core/src/utils/nextSpeakerChecker.test.ts @@ -61,6 +61,7 @@ describe('checkNextSpeaker', () => { generateContentStream: vi.fn(), countTokens: vi.fn(), embedContent: vi.fn(), + useSummarizedThinking: vi.fn().mockReturnValue(false), } as ContentGenerator, {} as Config, ); diff --git a/packages/core/src/utils/thoughtUtils.ts b/packages/core/src/utils/thoughtUtils.ts index 21b95532..116d5717 100644 --- a/packages/core/src/utils/thoughtUtils.ts +++ b/packages/core/src/utils/thoughtUtils.ts @@ -29,7 +29,7 @@ export function parseThought(rawText: string): ThoughtSummary { const startIndex = rawText.indexOf(START_DELIMITER); if (startIndex === -1) { // No start delimiter found, the whole text is the description. - return { subject: '', description: rawText.trim() }; + return { subject: '', description: rawText }; } const endIndex = rawText.indexOf( @@ -39,7 +39,7 @@ export function parseThought(rawText: string): ThoughtSummary { if (endIndex === -1) { // Start delimiter found but no end delimiter, so it's not a valid subject. // Treat the entire string as the description. - return { subject: '', description: rawText.trim() }; + return { subject: '', description: rawText }; } const subject = rawText