DeepSeek V3.2 Thinking Mode Integration (#1134)

2025-12-19 09:33:53 +00:00 · 2025-12-05 15:08:35 +08:00
parent a58d3f7aaf
commit 3e2a2255ee
24 changed files with 752 additions and 107 deletions
--- a/packages/cli/src/nonInteractiveCli.test.ts
+++ b/packages/cli/src/nonInteractiveCli.test.ts
@@ -245,6 +245,7 @@ describe('runNonInteractive', () => {
      [{ text: 'Test input' }],
      expect.any(AbortSignal),
      'prompt-id-1',
+      { isContinuation: false },
    );
    expect(processStdoutSpy).toHaveBeenCalledWith('Hello');
    expect(processStdoutSpy).toHaveBeenCalledWith(' World');
@@ -293,11 +294,21 @@ describe('runNonInteractive', () => {
      expect.any(AbortSignal),
      undefined,
    );
+    // Verify first call has isContinuation: false
+    expect(mockGeminiClient.sendMessageStream).toHaveBeenNthCalledWith(
+      1,
+      [{ text: 'Use a tool' }],
+      expect.any(AbortSignal),
+      'prompt-id-2',
+      { isContinuation: false },
+    );
+    // Verify second call (after tool execution) has isContinuation: true
    expect(mockGeminiClient.sendMessageStream).toHaveBeenNthCalledWith(
      2,
      [{ text: 'Tool response' }],
      expect.any(AbortSignal),
      'prompt-id-2',
+      { isContinuation: true },
    );
    expect(processStdoutSpy).toHaveBeenCalledWith('Final answer');
    expect(processStdoutSpy).toHaveBeenCalledWith('\n');
@@ -372,6 +383,7 @@ describe('runNonInteractive', () => {
      ],
      expect.any(AbortSignal),
      'prompt-id-3',
+      { isContinuation: true },
    );
    expect(processStdoutSpy).toHaveBeenCalledWith('Sorry, let me try again.');
  });
@@ -497,6 +509,7 @@ describe('runNonInteractive', () => {
      processedParts,
      expect.any(AbortSignal),
      'prompt-id-7',
+      { isContinuation: false },
    );

    // 6. Assert the final output is correct
@@ -528,6 +541,7 @@ describe('runNonInteractive', () => {
      [{ text: 'Test input' }],
      expect.any(AbortSignal),
      'prompt-id-1',
+      { isContinuation: false },
    );

    // JSON adapter emits array of messages, last one is result with stats
@@ -680,6 +694,7 @@ describe('runNonInteractive', () => {
      [{ text: 'Empty response test' }],
      expect.any(AbortSignal),
      'prompt-id-empty',
+      { isContinuation: false },
    );

    // JSON adapter emits array of messages, last one is result with stats
@@ -831,6 +846,7 @@ describe('runNonInteractive', () => {
      [{ text: 'Prompt from command' }],
      expect.any(AbortSignal),
      'prompt-id-slash',
+      { isContinuation: false },
    );

    expect(processStdoutSpy).toHaveBeenCalledWith('Response from command');
@@ -887,6 +903,7 @@ describe('runNonInteractive', () => {
      [{ text: '/unknowncommand' }],
      expect.any(AbortSignal),
      'prompt-id-unknown',
+      { isContinuation: false },
    );

    expect(processStdoutSpy).toHaveBeenCalledWith('Response to unknown');
@@ -1217,6 +1234,7 @@ describe('runNonInteractive', () => {
      [{ text: 'Message from stream-json input' }],
      expect.any(AbortSignal),
      'prompt-envelope',
+      { isContinuation: false },
    );
  });

@@ -1692,6 +1710,7 @@ describe('runNonInteractive', () => {
      [{ text: 'Simple string content' }],
      expect.any(AbortSignal),
      'prompt-string-content',
+      { isContinuation: false },
    );

    // UserMessage with array of text blocks
@@ -1724,6 +1743,7 @@ describe('runNonInteractive', () => {
      [{ text: 'First part' }, { text: 'Second part' }],
      expect.any(AbortSignal),
      'prompt-blocks-content',
+      { isContinuation: false },
    );
  });
 });
--- a/packages/cli/src/nonInteractiveCli.ts
+++ b/packages/cli/src/nonInteractiveCli.ts
@@ -172,6 +172,7 @@ export async function runNonInteractive(
        adapter.emitMessage(systemMessage);
      }

+      let isFirstTurn = true;
      while (true) {
        turnCount++;
        if (
@@ -187,7 +188,9 @@ export async function runNonInteractive(
          currentMessages[0]?.parts || [],
          abortController.signal,
          prompt_id,
+          { isContinuation: !isFirstTurn },
        );
+        isFirstTurn = false;

        // Start assistant message for this turn
        if (adapter) {
@@ -207,7 +210,9 @@ export async function runNonInteractive(
            }
          } else {
            // Text output mode - direct stdout
-            if (event.type === GeminiEventType.Content) {
+            if (event.type === GeminiEventType.Thought) {
+              process.stdout.write(event.value.description);
+            } else if (event.type === GeminiEventType.Content) {
              process.stdout.write(event.value);
            } else if (event.type === GeminiEventType.ToolCallRequest) {
              toolCallRequests.push(event.value);
--- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx
+++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx
@@ -15,6 +15,8 @@ import { InfoMessage } from './messages/InfoMessage.js';
 import { ErrorMessage } from './messages/ErrorMessage.js';
 import { ToolGroupMessage } from './messages/ToolGroupMessage.js';
 import { GeminiMessageContent } from './messages/GeminiMessageContent.js';
+import { GeminiThoughtMessage } from './messages/GeminiThoughtMessage.js';
+import { GeminiThoughtMessageContent } from './messages/GeminiThoughtMessageContent.js';
 import { CompressionMessage } from './messages/CompressionMessage.js';
 import { SummaryMessage } from './messages/SummaryMessage.js';
 import { WarningMessage } from './messages/WarningMessage.js';
@@ -85,6 +87,26 @@ const HistoryItemDisplayComponent: React.FC<HistoryItemDisplayProps> = ({
          terminalWidth={terminalWidth}
        />
      )}
+      {itemForDisplay.type === 'gemini_thought' && (
+        <GeminiThoughtMessage
+          text={itemForDisplay.text}
+          isPending={isPending}
+          availableTerminalHeight={
+            availableTerminalHeightGemini ?? availableTerminalHeight
+          }
+          terminalWidth={terminalWidth}
+        />
+      )}
+      {itemForDisplay.type === 'gemini_thought_content' && (
+        <GeminiThoughtMessageContent
+          text={itemForDisplay.text}
+          isPending={isPending}
+          availableTerminalHeight={
+            availableTerminalHeightGemini ?? availableTerminalHeight
+          }
+          terminalWidth={terminalWidth}
+        />
+      )}
      {itemForDisplay.type === 'info' && (
        <InfoMessage text={itemForDisplay.text} />
      )}
--- a/packages/cli/src/ui/components/messages/GeminiThoughtMessage.tsx
+++ b/packages/cli/src/ui/components/messages/GeminiThoughtMessage.tsx
@@ -0,0 +1,48 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type React from 'react';
+import { Text, Box } from 'ink';
+import { MarkdownDisplay } from '../../utils/MarkdownDisplay.js';
+import { theme } from '../../semantic-colors.js';
+
+interface GeminiThoughtMessageProps {
+  text: string;
+  isPending: boolean;
+  availableTerminalHeight?: number;
+  terminalWidth: number;
+}
+
+/**
+ * Displays model thinking/reasoning text with a softer, dimmed style
+ * to visually distinguish it from regular content output.
+ */
+export const GeminiThoughtMessage: React.FC<GeminiThoughtMessageProps> = ({
+  text,
+  isPending,
+  availableTerminalHeight,
+  terminalWidth,
+}) => {
+  const prefix = '✦ ';
+  const prefixWidth = prefix.length;
+
+  return (
+    <Box flexDirection="row" marginBottom={1}>
+      <Box width={prefixWidth}>
+        <Text color={theme.text.secondary}>{prefix}</Text>
+      </Box>
+      <Box flexGrow={1} flexDirection="column">
+        <MarkdownDisplay
+          text={text}
+          isPending={isPending}
+          availableTerminalHeight={availableTerminalHeight}
+          terminalWidth={terminalWidth}
+          textColor={theme.text.secondary}
+        />
+      </Box>
+    </Box>
+  );
+};
--- a/packages/cli/src/ui/components/messages/GeminiThoughtMessageContent.tsx
+++ b/packages/cli/src/ui/components/messages/GeminiThoughtMessageContent.tsx
@@ -0,0 +1,40 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type React from 'react';
+import { Box } from 'ink';
+import { MarkdownDisplay } from '../../utils/MarkdownDisplay.js';
+import { theme } from '../../semantic-colors.js';
+
+interface GeminiThoughtMessageContentProps {
+  text: string;
+  isPending: boolean;
+  availableTerminalHeight?: number;
+  terminalWidth: number;
+}
+
+/**
+ * Continuation component for thought messages, similar to GeminiMessageContent.
+ * Used when a thought response gets too long and needs to be split for performance.
+ */
+export const GeminiThoughtMessageContent: React.FC<
+  GeminiThoughtMessageContentProps
+> = ({ text, isPending, availableTerminalHeight, terminalWidth }) => {
+  const originalPrefix = '✦ ';
+  const prefixWidth = originalPrefix.length;
+
+  return (
+    <Box flexDirection="column" paddingLeft={prefixWidth} marginBottom={1}>
+      <MarkdownDisplay
+        text={text}
+        isPending={isPending}
+        availableTerminalHeight={availableTerminalHeight}
+        terminalWidth={terminalWidth}
+        textColor={theme.text.secondary}
+      />
+    </Box>
+  );
+};
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -2261,6 +2261,57 @@ describe('useGeminiStream', () => {
      });
    });

+    it('should accumulate streamed thought descriptions', async () => {
+      mockSendMessageStream.mockReturnValue(
+        (async function* () {
+          yield {
+            type: ServerGeminiEventType.Thought,
+            value: { subject: '', description: 'thinking ' },
+          };
+          yield {
+            type: ServerGeminiEventType.Thought,
+            value: { subject: '', description: 'more' },
+          };
+          yield {
+            type: ServerGeminiEventType.Finished,
+            value: { reason: 'STOP', usageMetadata: undefined },
+          };
+        })(),
+      );
+
+      const { result } = renderHook(() =>
+        useGeminiStream(
+          new MockedGeminiClientClass(mockConfig),
+          [],
+          mockAddItem,
+          mockConfig,
+          mockLoadedSettings,
+          mockOnDebugMessage,
+          mockHandleSlashCommand,
+          false,
+          () => 'vscode' as EditorType,
+          () => {},
+          () => Promise.resolve(),
+          false,
+          () => {},
+          () => {},
+          () => {},
+          false, // visionModelPreviewEnabled
+          () => {},
+          80,
+          24,
+        ),
+      );
+
+      await act(async () => {
+        await result.current.submitQuery('Streamed thought');
+      });
+
+      await waitFor(() => {
+        expect(result.current.thought?.description).toBe('thinking more');
+      });
+    });
+
    it('should memoize pendingHistoryItems', () => {
      mockUseReactToolScheduler.mockReturnValue([
        [],
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -497,6 +497,61 @@ export const useGeminiStream = (
    [addItem, pendingHistoryItemRef, setPendingHistoryItem],
  );

+  const mergeThought = useCallback(
+    (incoming: ThoughtSummary) => {
+      setThought((prev) => {
+        if (!prev) {
+          return incoming;
+        }
+        const subject = incoming.subject || prev.subject;
+        const description = `${prev.description ?? ''}${incoming.description ?? ''}`;
+        return { subject, description };
+      });
+    },
+    [setThought],
+  );
+
+  const handleThoughtEvent = useCallback(
+    (
+      eventValue: ThoughtSummary,
+      currentThoughtBuffer: string,
+      userMessageTimestamp: number,
+    ): string => {
+      if (turnCancelledRef.current) {
+        return '';
+      }
+
+      // Extract the description text from the thought summary
+      const thoughtText = eventValue.description ?? '';
+      if (!thoughtText) {
+        return currentThoughtBuffer;
+      }
+
+      const newThoughtBuffer = currentThoughtBuffer + thoughtText;
+
+      // If we're not already showing a thought, start a new one
+      if (pendingHistoryItemRef.current?.type !== 'gemini_thought') {
+        // If there's a pending non-thought item, finalize it first
+        if (pendingHistoryItemRef.current) {
+          addItem(pendingHistoryItemRef.current, userMessageTimestamp);
+        }
+        setPendingHistoryItem({ type: 'gemini_thought', text: '' });
+      }
+
+      // Update the existing thought message with accumulated content
+      setPendingHistoryItem({
+        type: 'gemini_thought',
+        text: newThoughtBuffer,
+      });
+
+      // Also update the thought state for the loading indicator
+      mergeThought(eventValue);
+
+      return newThoughtBuffer;
+    },
+    [addItem, pendingHistoryItemRef, setPendingHistoryItem, mergeThought],
+  );
+
  const handleUserCancelledEvent = useCallback(
    (userMessageTimestamp: number) => {
      if (turnCancelledRef.current) {
@@ -710,11 +765,16 @@ export const useGeminiStream = (
      signal: AbortSignal,
    ): Promise<StreamProcessingStatus> => {
      let geminiMessageBuffer = '';
+      let thoughtBuffer = '';
      const toolCallRequests: ToolCallRequestInfo[] = [];
      for await (const event of stream) {
        switch (event.type) {
          case ServerGeminiEventType.Thought:
-            setThought(event.value);
+            thoughtBuffer = handleThoughtEvent(
+              event.value,
+              thoughtBuffer,
+              userMessageTimestamp,
+            );
            break;
          case ServerGeminiEventType.Content:
            geminiMessageBuffer = handleContentEvent(
@@ -776,6 +836,7 @@ export const useGeminiStream = (
    },
    [
      handleContentEvent,
+      handleThoughtEvent,
      handleUserCancelledEvent,
      handleErrorEvent,
      scheduleToolCalls,
--- a/packages/cli/src/ui/types.ts
+++ b/packages/cli/src/ui/types.ts
@@ -103,6 +103,16 @@ export type HistoryItemGeminiContent = HistoryItemBase & {
  text: string;
 };

+export type HistoryItemGeminiThought = HistoryItemBase & {
+  type: 'gemini_thought';
+  text: string;
+};
+
+export type HistoryItemGeminiThoughtContent = HistoryItemBase & {
+  type: 'gemini_thought_content';
+  text: string;
+};
+
 export type HistoryItemInfo = HistoryItemBase & {
  type: 'info';
  text: string;
@@ -241,6 +251,8 @@ export type HistoryItemWithoutId =
  | HistoryItemUserShell
  | HistoryItemGemini
  | HistoryItemGeminiContent
+  | HistoryItemGeminiThought
+  | HistoryItemGeminiThoughtContent
  | HistoryItemInfo
  | HistoryItemError
  | HistoryItemWarning
--- a/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx
+++ b/packages/cli/src/ui/utils/InlineMarkdownRenderer.tsx
@@ -19,12 +19,16 @@ const UNDERLINE_TAG_END_LENGTH = 4; // For "</u>"

 interface RenderInlineProps {
  text: string;
+  textColor?: string;
 }

-const RenderInlineInternal: React.FC<RenderInlineProps> = ({ text }) => {
+const RenderInlineInternal: React.FC<RenderInlineProps> = ({
+  text,
+  textColor = theme.text.primary,
+}) => {
  // Early return for plain text without markdown or URLs
  if (!/[*_~`<[https?:]/.test(text)) {
-    return <Text color={theme.text.primary}>{text}</Text>;
+    return <Text color={textColor}>{text}</Text>;
  }

  const nodes: React.ReactNode[] = [];
--- a/packages/cli/src/ui/utils/MarkdownDisplay.tsx
+++ b/packages/cli/src/ui/utils/MarkdownDisplay.tsx
@@ -17,6 +17,7 @@ interface MarkdownDisplayProps {
  isPending: boolean;
  availableTerminalHeight?: number;
  terminalWidth: number;
+  textColor?: string;
 }

 // Constants for Markdown parsing and rendering
@@ -31,6 +32,7 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
  isPending,
  availableTerminalHeight,
  terminalWidth,
+  textColor = theme.text.primary,
 }) => {
  if (!text) return <></>;

@@ -116,7 +118,7 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
        addContentBlock(
          <Box key={key}>
            <Text wrap="wrap">
-              <RenderInline text={line} />
+              <RenderInline text={line} textColor={textColor} />
            </Text>
          </Box>,
        );
@@ -155,7 +157,7 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
        addContentBlock(
          <Box key={key}>
            <Text wrap="wrap">
-              <RenderInline text={line} />
+              <RenderInline text={line} textColor={textColor} />
            </Text>
          </Box>,
        );
@@ -173,36 +175,36 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
      switch (level) {
        case 1:
          headerNode = (
-            <Text bold color={theme.text.link}>
-              <RenderInline text={headerText} />
+            <Text bold color={textColor}>
+              <RenderInline text={headerText} textColor={textColor} />
            </Text>
          );
          break;
        case 2:
          headerNode = (
-            <Text bold color={theme.text.link}>
-              <RenderInline text={headerText} />
+            <Text bold color={textColor}>
+              <RenderInline text={headerText} textColor={textColor} />
            </Text>
          );
          break;
        case 3:
          headerNode = (
-            <Text bold color={theme.text.primary}>
-              <RenderInline text={headerText} />
+            <Text bold color={textColor}>
+              <RenderInline text={headerText} textColor={textColor} />
            </Text>
          );
          break;
        case 4:
          headerNode = (
-            <Text italic color={theme.text.secondary}>
-              <RenderInline text={headerText} />
+            <Text italic color={textColor}>
+              <RenderInline text={headerText} textColor={textColor} />
            </Text>
          );
          break;
        default:
          headerNode = (
-            <Text color={theme.text.primary}>
-              <RenderInline text={headerText} />
+            <Text color={textColor}>
+              <RenderInline text={headerText} textColor={textColor} />
            </Text>
          );
          break;
@@ -219,6 +221,7 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
          type="ul"
          marker={marker}
          leadingWhitespace={leadingWhitespace}
+          textColor={textColor}
        />,
      );
    } else if (olMatch) {
@@ -232,6 +235,7 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
          type="ol"
          marker={marker}
          leadingWhitespace={leadingWhitespace}
+          textColor={textColor}
        />,
      );
    } else {
@@ -245,8 +249,8 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
      } else {
        addContentBlock(
          <Box key={key}>
-            <Text wrap="wrap" color={theme.text.primary}>
-              <RenderInline text={line} />
+            <Text wrap="wrap" color={textColor}>
+              <RenderInline text={line} textColor={textColor} />
            </Text>
          </Box>,
        );
@@ -367,6 +371,7 @@ interface RenderListItemProps {
  type: 'ul' | 'ol';
  marker: string;
  leadingWhitespace?: string;
+  textColor?: string;
 }

 const RenderListItemInternal: React.FC<RenderListItemProps> = ({
@@ -374,6 +379,7 @@ const RenderListItemInternal: React.FC<RenderListItemProps> = ({
  type,
  marker,
  leadingWhitespace = '',
+  textColor = theme.text.primary,
 }) => {
  const prefix = type === 'ol' ? `${marker}. ` : `${marker} `;
  const prefixWidth = prefix.length;
@@ -385,11 +391,11 @@ const RenderListItemInternal: React.FC<RenderListItemProps> = ({
      flexDirection="row"
    >
      <Box width={prefixWidth}>
-        <Text color={theme.text.primary}>{prefix}</Text>
+        <Text color={textColor}>{prefix}</Text>
      </Box>
      <Box flexGrow={LIST_ITEM_TEXT_FLEX_GROW}>
-        <Text wrap="wrap" color={theme.text.primary}>
-          <RenderInline text={itemText} />
+        <Text wrap="wrap" color={textColor}>
+          <RenderInline text={itemText} textColor={textColor} />
        </Text>
      </Box>
    </Box>
--- a/packages/cli/src/ui/utils/resumeHistoryUtils.test.ts
+++ b/packages/cli/src/ui/utils/resumeHistoryUtils.test.ts
@@ -102,7 +102,7 @@ describe('resumeHistoryUtils', () => {
    ]);
  });

-  it('marks tool results as error, skips thought text, and falls back when tool is missing', () => {
+  it('marks tool results as error, captures thought text, and falls back when tool is missing', () => {
    const conversation = {
      messages: [
        {
@@ -142,6 +142,11 @@ describe('resumeHistoryUtils', () => {
    const items = buildResumedHistoryItems(session, makeConfig({}));

    expect(items).toEqual([
+      {
+        id: expect.any(Number),
+        type: 'gemini_thought',
+        text: 'should be skipped',
+      },
      { id: expect.any(Number), type: 'gemini', text: 'visible text' },
      {
        id: expect.any(Number),
--- a/packages/cli/src/ui/utils/resumeHistoryUtils.ts
+++ b/packages/cli/src/ui/utils/resumeHistoryUtils.ts
@@ -17,7 +17,7 @@ import type { HistoryItem, HistoryItemWithoutId } from '../types.js';
 import { ToolCallStatus } from '../types.js';

 /**
- * Extracts text content from a Content object's parts.
+ * Extracts text content from a Content object's parts (excluding thought parts).
 */
 function extractTextFromParts(parts: Part[] | undefined): string {
  if (!parts) return '';
@@ -34,6 +34,22 @@ function extractTextFromParts(parts: Part[] | undefined): string {
  return textParts.join('\n');
 }

+/**
+ * Extracts thought text content from a Content object's parts.
+ * Thought parts are identified by having `thought: true`.
+ */
+function extractThoughtTextFromParts(parts: Part[] | undefined): string {
+  if (!parts) return '';
+
+  const thoughtParts: string[] = [];
+  for (const part of parts) {
+    if ('text' in part && part.text && 'thought' in part && part.thought) {
+      thoughtParts.push(part.text);
+    }
+  }
+  return thoughtParts.join('\n');
+}
+
 /**
 * Extracts function calls from a Content object's parts.
 */
@@ -187,12 +203,28 @@ function convertToHistoryItems(
      case 'assistant': {
        const parts = record.message?.parts as Part[] | undefined;

+        // Extract thought content
+        const thoughtText = extractThoughtTextFromParts(parts);
+
        // Extract text content (non-function-call, non-thought)
        const text = extractTextFromParts(parts);

        // Extract function calls
        const functionCalls = extractFunctionCalls(parts);

+        // If there's thought content, add it as a gemini_thought message
+        if (thoughtText) {
+          // Flush any pending tool group before thought
+          if (currentToolGroup.length > 0) {
+            items.push({
+              type: 'tool_group',
+              tools: [...currentToolGroup],
+            });
+            currentToolGroup = [];
+          }
+          items.push({ type: 'gemini_thought', text: thoughtText });
+        }
+
        // If there's text content, add it as a gemini message
        if (text) {
          // Flush any pending tool group before text
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -448,6 +448,7 @@ describe('Gemini Client (client.ts)', () => {
        getHistory: mockGetHistory,
        addHistory: vi.fn(),
        setHistory: vi.fn(),
+        stripThoughtsFromHistory: vi.fn(),
      } as unknown as GeminiChat;
    });

@@ -462,6 +463,7 @@ describe('Gemini Client (client.ts)', () => {
      const mockOriginalChat: Partial<GeminiChat> = {
        getHistory: vi.fn((_curated?: boolean) => chatHistory),
        setHistory: vi.fn(),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockOriginalChat as GeminiChat;

@@ -1080,6 +1082,7 @@ describe('Gemini Client (client.ts)', () => {
      const mockChat = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      } as unknown as GeminiChat;
      client['chat'] = mockChat;

@@ -1142,6 +1145,7 @@ ${JSON.stringify(
      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@@ -1197,6 +1201,7 @@ ${JSON.stringify(
      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@@ -1273,6 +1278,7 @@ ${JSON.stringify(
      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@@ -1319,6 +1325,7 @@ ${JSON.stringify(
      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@@ -1363,6 +1370,7 @@ ${JSON.stringify(
      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@@ -1450,6 +1458,7 @@ ${JSON.stringify(
      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@@ -1506,6 +1515,7 @@ ${JSON.stringify(
      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@@ -1586,6 +1596,7 @@ ${JSON.stringify(
            .mockReturnValue([
              { role: 'user', parts: [{ text: 'previous message' }] },
            ]),
+          stripThoughtsFromHistory: vi.fn(),
        };
        client['chat'] = mockChat as GeminiChat;
      });
@@ -1840,6 +1851,7 @@ ${JSON.stringify(
          addHistory: vi.fn(),
          getHistory: vi.fn().mockReturnValue([]), // Default empty history
          setHistory: vi.fn(),
+          stripThoughtsFromHistory: vi.fn(),
        };
        client['chat'] = mockChat as GeminiChat;

@@ -2180,6 +2192,7 @@ ${JSON.stringify(
      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@@ -2216,6 +2229,7 @@ ${JSON.stringify(
      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

@@ -2256,6 +2270,7 @@ ${JSON.stringify(
      const mockChat: Partial<GeminiChat> = {
        addHistory: vi.fn(),
        getHistory: vi.fn().mockReturnValue([]),
+        stripThoughtsFromHistory: vi.fn(),
      };
      client['chat'] = mockChat as GeminiChat;

--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -419,6 +419,9 @@ export class GeminiClient {

      // record user message for session management
      this.config.getChatRecordingService()?.recordUserMessage(request);
+
+      // strip thoughts from history before sending the message
+      this.stripThoughtsFromHistory();
    }
    this.sessionTurnCount++;
    if (
--- a/packages/core/src/core/geminiChat.test.ts
+++ b/packages/core/src/core/geminiChat.test.ts
@@ -1541,10 +1541,10 @@ describe('GeminiChat', () => {
        {
          role: 'model',
          parts: [
-            { text: 'thinking...', thoughtSignature: 'thought-123' },
+            { text: 'thinking...', thought: true },
+            { text: 'hi' },
            {
              functionCall: { name: 'test', args: {} },
-              thoughtSignature: 'thought-456',
            },
          ],
        },
@@ -1559,10 +1559,7 @@ describe('GeminiChat', () => {
        },
        {
          role: 'model',
-          parts: [
-            { text: 'thinking...' },
-            { functionCall: { name: 'test', args: {} } },
-          ],
+          parts: [{ text: 'hi' }, { functionCall: { name: 'test', args: {} } }],
        },
      ]);
    });
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -443,20 +443,28 @@ export class GeminiChat {
  }

  stripThoughtsFromHistory(): void {
-    this.history = this.history.map((content) => {
-      const newContent = { ...content };
-      if (newContent.parts) {
-        newContent.parts = newContent.parts.map((part) => {
-          if (part && typeof part === 'object' && 'thoughtSignature' in part) {
-            const newPart = { ...part };
-            delete (newPart as { thoughtSignature?: string }).thoughtSignature;
-            return newPart;
-          }
-          return part;
-        });
-      }
-      return newContent;
-    });
+    this.history = this.history
+      .map((content) => {
+        if (!content.parts) return content;
+
+        // Filter out thought parts entirely
+        const filteredParts = content.parts.filter(
+          (part) =>
+            !(
+              part &&
+              typeof part === 'object' &&
+              'thought' in part &&
+              part.thought
+            ),
+        );
+
+        return {
+          ...content,
+          parts: filteredParts,
+        };
+      })
+      // Remove Content objects that have no parts left after filtering
+      .filter((content) => content.parts && content.parts.length > 0);
  }

  setTools(tools: Tool[]): void {
@@ -497,8 +505,6 @@ export class GeminiChat {
  ): AsyncGenerator<GenerateContentResponse> {
    // Collect ALL parts from the model response (including thoughts for recording)
    const allModelParts: Part[] = [];
-    // Non-thought parts for history (what we send back to the API)
-    const historyParts: Part[] = [];
    let usageMetadata: GenerateContentResponseUsageMetadata | undefined;

    let hasToolCall = false;
@@ -516,8 +522,6 @@ export class GeminiChat {

          // Collect all parts for recording
          allModelParts.push(...content.parts);
-          // Collect non-thought parts for history
-          historyParts.push(...content.parts.filter((part) => !part.thought));
        }
      }

@@ -534,9 +538,15 @@ export class GeminiChat {
      yield chunk; // Yield every chunk to the UI immediately.
    }

-    // Consolidate text parts for history (merges adjacent text parts).
+    const thoughtParts = allModelParts.filter((part) => part.thought);
+    const thoughtText = thoughtParts
+      .map((part) => part.text)
+      .join('')
+      .trim();
+
+    const contentParts = allModelParts.filter((part) => !part.thought);
    const consolidatedHistoryParts: Part[] = [];
-    for (const part of historyParts) {
+    for (const part of contentParts) {
      const lastPart =
        consolidatedHistoryParts[consolidatedHistoryParts.length - 1];
      if (
@@ -550,20 +560,21 @@ export class GeminiChat {
      }
    }

-    const responseText = consolidatedHistoryParts
+    const contentText = consolidatedHistoryParts
      .filter((part) => part.text)
      .map((part) => part.text)
      .join('')
      .trim();

    // Record assistant turn with raw Content and metadata
-    if (responseText || hasToolCall || usageMetadata) {
+    if (thoughtText || contentText || hasToolCall || usageMetadata) {
      this.chatRecordingService?.recordAssistantTurn({
        model,
        message: [
-          ...(responseText ? [{ text: responseText }] : []),
+          ...(thoughtText ? [{ text: thoughtText, thought: true }] : []),
+          ...(contentText ? [{ text: contentText }] : []),
          ...(hasToolCall
-            ? historyParts
+            ? contentParts
                .filter((part) => part.functionCall)
                .map((part) => ({ functionCall: part.functionCall }))
            : []),
@@ -579,7 +590,7 @@ export class GeminiChat {
    // We throw an error only when there's no tool call AND:
    // - No finish reason, OR
    // - Empty response text (e.g., only thoughts with no actual content)
-    if (!hasToolCall && (!hasFinishReason || !responseText)) {
+    if (!hasToolCall && (!hasFinishReason || !contentText)) {
      if (!hasFinishReason) {
        throw new InvalidStreamError(
          'Model stream ended without a finish reason.',
@@ -593,8 +604,13 @@ export class GeminiChat {
      }
    }

-    // Add to history (without thoughts, for API calls)
-    this.history.push({ role: 'model', parts: consolidatedHistoryParts });
+    this.history.push({
+      role: 'model',
+      parts: [
+        ...(thoughtText ? [{ text: thoughtText, thought: true }] : []),
+        ...consolidatedHistoryParts,
+      ],
+    });
  }
 }

--- a/packages/core/src/core/openaiContentGenerator/converter.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/converter.test.ts
@@ -8,6 +8,7 @@ import { describe, it, expect, beforeEach } from 'vitest';
 import { OpenAIContentConverter } from './converter.js';
 import type { StreamingToolCallParser } from './streamingToolCallParser.js';
 import type { GenerateContentParameters, Content } from '@google/genai';
+import type OpenAI from 'openai';

 describe('OpenAIContentConverter', () => {
  let converter: OpenAIContentConverter;
@@ -142,4 +143,63 @@ describe('OpenAIContentConverter', () => {
      expect(toolMessage?.content).toBe('{"data":{"value":42}}');
    });
  });
+
+  describe('OpenAI -> Gemini reasoning content', () => {
+    it('should convert reasoning_content to a thought part for non-streaming responses', () => {
+      const response = converter.convertOpenAIResponseToGemini({
+        object: 'chat.completion',
+        id: 'chatcmpl-1',
+        created: 123,
+        model: 'gpt-test',
+        choices: [
+          {
+            index: 0,
+            message: {
+              role: 'assistant',
+              content: 'final answer',
+              reasoning_content: 'chain-of-thought',
+            },
+            finish_reason: 'stop',
+            logprobs: null,
+          },
+        ],
+      } as unknown as OpenAI.Chat.ChatCompletion);
+
+      const parts = response.candidates?.[0]?.content?.parts;
+      expect(parts?.[0]).toEqual(
+        expect.objectContaining({ thought: true, text: 'chain-of-thought' }),
+      );
+      expect(parts?.[1]).toEqual(
+        expect.objectContaining({ text: 'final answer' }),
+      );
+    });
+
+    it('should convert streaming reasoning_content delta to a thought part', () => {
+      const chunk = converter.convertOpenAIChunkToGemini({
+        object: 'chat.completion.chunk',
+        id: 'chunk-1',
+        created: 456,
+        choices: [
+          {
+            index: 0,
+            delta: {
+              content: 'visible text',
+              reasoning_content: 'thinking...',
+            },
+            finish_reason: 'stop',
+            logprobs: null,
+          },
+        ],
+        model: 'gpt-test',
+      } as unknown as OpenAI.Chat.ChatCompletionChunk);
+
+      const parts = chunk.candidates?.[0]?.content?.parts;
+      expect(parts?.[0]).toEqual(
+        expect.objectContaining({ thought: true, text: 'thinking...' }),
+      );
+      expect(parts?.[1]).toEqual(
+        expect.objectContaining({ text: 'visible text' }),
+      );
+    });
+  });
 });
--- a/packages/core/src/core/openaiContentGenerator/converter.ts
+++ b/packages/core/src/core/openaiContentGenerator/converter.ts
@@ -31,6 +31,25 @@ interface ExtendedCompletionUsage extends OpenAI.CompletionUsage {
  cached_tokens?: number;
 }

+interface ExtendedChatCompletionAssistantMessageParam
+  extends OpenAI.Chat.ChatCompletionAssistantMessageParam {
+  reasoning_content?: string | null;
+}
+
+type ExtendedChatCompletionMessageParam =
+  | OpenAI.Chat.ChatCompletionMessageParam
+  | ExtendedChatCompletionAssistantMessageParam;
+
+export interface ExtendedCompletionMessage
+  extends OpenAI.Chat.ChatCompletionMessage {
+  reasoning_content?: string | null;
+}
+
+export interface ExtendedCompletionChunkDelta
+  extends OpenAI.Chat.ChatCompletionChunk.Choice.Delta {
+  reasoning_content?: string | null;
+}
+
 /**
 * Tool call accumulator for streaming responses
 */
@@ -44,7 +63,8 @@ export interface ToolCallAccumulator {
 * Parsed parts from Gemini content, categorized by type
 */
 interface ParsedParts {
-  textParts: string[];
+  thoughtParts: string[];
+  contentParts: string[];
  functionCalls: FunctionCall[];
  functionResponses: FunctionResponse[];
  mediaParts: Array<{
@@ -251,7 +271,7 @@ export class OpenAIContentConverter {
   */
  private processContents(
    contents: ContentListUnion,
-    messages: OpenAI.Chat.ChatCompletionMessageParam[],
+    messages: ExtendedChatCompletionMessageParam[],
  ): void {
    if (Array.isArray(contents)) {
      for (const content of contents) {
@@ -267,7 +287,7 @@ export class OpenAIContentConverter {
   */
  private processContent(
    content: ContentUnion | PartUnion,
-    messages: OpenAI.Chat.ChatCompletionMessageParam[],
+    messages: ExtendedChatCompletionMessageParam[],
  ): void {
    if (typeof content === 'string') {
      messages.push({ role: 'user' as const, content });
@@ -301,11 +321,19 @@ export class OpenAIContentConverter {
        },
      }));

-      messages.push({
+      const assistantMessage: ExtendedChatCompletionAssistantMessageParam = {
        role: 'assistant' as const,
-        content: parsedParts.textParts.join('') || null,
+        content: parsedParts.contentParts.join('') || null,
        tool_calls: toolCalls,
-      });
+      };
+
+      // Only include reasoning_content if it has actual content
+      const reasoningContent = parsedParts.thoughtParts.join('');
+      if (reasoningContent) {
+        assistantMessage.reasoning_content = reasoningContent;
+      }
+
+      messages.push(assistantMessage);
      return;
    }

@@ -322,7 +350,8 @@ export class OpenAIContentConverter {
   * Parse Gemini parts into categorized components
   */
  private parseParts(parts: Part[]): ParsedParts {
-    const textParts: string[] = [];
+    const thoughtParts: string[] = [];
+    const contentParts: string[] = [];
    const functionCalls: FunctionCall[] = [];
    const functionResponses: FunctionResponse[] = [];
    const mediaParts: Array<{
@@ -334,9 +363,20 @@ export class OpenAIContentConverter {

    for (const part of parts) {
      if (typeof part === 'string') {
-        textParts.push(part);
-      } else if ('text' in part && part.text) {
-        textParts.push(part.text);
+        contentParts.push(part);
+      } else if (
+        'text' in part &&
+        part.text &&
+        !('thought' in part && part.thought)
+      ) {
+        contentParts.push(part.text);
+      } else if (
+        'text' in part &&
+        part.text &&
+        'thought' in part &&
+        part.thought
+      ) {
+        thoughtParts.push(part.text);
      } else if ('functionCall' in part && part.functionCall) {
        functionCalls.push(part.functionCall);
      } else if ('functionResponse' in part && part.functionResponse) {
@@ -361,7 +401,13 @@ export class OpenAIContentConverter {
      }
    }

-    return { textParts, functionCalls, functionResponses, mediaParts };
+    return {
+      thoughtParts,
+      contentParts,
+      functionCalls,
+      functionResponses,
+      mediaParts,
+    };
  }

  private extractFunctionResponseContent(response: unknown): string {
@@ -408,14 +454,29 @@ export class OpenAIContentConverter {
   */
  private createMultimodalMessage(
    role: 'user' | 'assistant',
-    parsedParts: Pick<ParsedParts, 'textParts' | 'mediaParts'>,
-  ): OpenAI.Chat.ChatCompletionMessageParam | null {
-    const { textParts, mediaParts } = parsedParts;
-    const content = textParts.map((text) => ({ type: 'text' as const, text }));
+    parsedParts: Pick<
+      ParsedParts,
+      'contentParts' | 'mediaParts' | 'thoughtParts'
+    >,
+  ): ExtendedChatCompletionMessageParam | null {
+    const { contentParts, mediaParts, thoughtParts } = parsedParts;
+    const reasoningContent = thoughtParts.join('');
+    const content = contentParts.map((text) => ({
+      type: 'text' as const,
+      text,
+    }));

    // If no media parts, return simple text message
    if (mediaParts.length === 0) {
-      return content.length > 0 ? { role, content } : null;
+      if (content.length === 0) return null;
+      const message: ExtendedChatCompletionMessageParam = { role, content };
+      // Only include reasoning_content if it has actual content
+      if (reasoningContent) {
+        (
+          message as ExtendedChatCompletionAssistantMessageParam
+        ).reasoning_content = reasoningContent;
+      }
+      return message;
    }

    // For assistant messages with media, convert to text only
@@ -536,6 +597,13 @@ export class OpenAIContentConverter {

    const parts: Part[] = [];

+    // Handle reasoning content (thoughts)
+    const reasoningText = (choice.message as ExtendedCompletionMessage)
+      .reasoning_content;
+    if (reasoningText) {
+      parts.push({ text: reasoningText, thought: true });
+    }
+
    // Handle text content
    if (choice.message.content) {
      parts.push({ text: choice.message.content });
@@ -632,6 +700,12 @@ export class OpenAIContentConverter {
    if (choice) {
      const parts: Part[] = [];

+      const reasoningText = (choice.delta as ExtendedCompletionChunkDelta)
+        .reasoning_content;
+      if (reasoningText) {
+        parts.push({ text: reasoningText, thought: true });
+      }
+
      // Handle text content
      if (choice.delta?.content) {
        if (typeof choice.delta.content === 'string') {
@@ -721,6 +795,8 @@ export class OpenAIContentConverter {
      const promptTokens = usage.prompt_tokens || 0;
      const completionTokens = usage.completion_tokens || 0;
      const totalTokens = usage.total_tokens || 0;
+      const thinkingTokens =
+        usage.completion_tokens_details?.reasoning_tokens || 0;
      // Support both formats: prompt_tokens_details.cached_tokens (OpenAI standard)
      // and cached_tokens (some models return it at top level)
      const extendedUsage = usage as ExtendedCompletionUsage;
@@ -743,6 +819,7 @@ export class OpenAIContentConverter {
      response.usageMetadata = {
        promptTokenCount: finalPromptTokens,
        candidatesTokenCount: finalCompletionTokens,
+        thoughtsTokenCount: thinkingTokens,
        totalTokenCount: totalTokens,
        cachedContentTokenCount: cachedTokens,
      };
--- a/packages/core/src/core/openaiContentGenerator/telemetryService.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/telemetryService.test.ts
@@ -561,11 +561,14 @@ describe('DefaultTelemetryService', () => {
          choices: [
            {
              index: 0,
-              delta: { content: 'Hello' },
+              delta: {
+                content: 'Hello',
+                reasoning_content: 'thinking ',
+              },
              finish_reason: null,
            },
          ],
-        } as OpenAI.Chat.ChatCompletionChunk,
+        } as unknown as OpenAI.Chat.ChatCompletionChunk,
        {
          id: 'test-id',
          object: 'chat.completion.chunk',
@@ -574,7 +577,10 @@ describe('DefaultTelemetryService', () => {
          choices: [
            {
              index: 0,
-              delta: { content: ' world' },
+              delta: {
+                content: ' world',
+                reasoning_content: 'more',
+              },
              finish_reason: 'stop',
            },
          ],
@@ -583,7 +589,7 @@ describe('DefaultTelemetryService', () => {
            completion_tokens: 5,
            total_tokens: 15,
          },
-        } as OpenAI.Chat.ChatCompletionChunk,
+        } as unknown as OpenAI.Chat.ChatCompletionChunk,
      ];

      await telemetryService.logStreamingSuccess(
@@ -603,11 +609,11 @@ describe('DefaultTelemetryService', () => {
          choices: [
            {
              index: 0,
-              message: {
+              message: expect.objectContaining({
                role: 'assistant',
                content: 'Hello world',
-                refusal: null,
-              },
+                reasoning_content: 'thinking more',
+              }),
              finish_reason: 'stop',
              logprobs: null,
            },
@@ -722,11 +728,14 @@ describe('DefaultTelemetryService', () => {
          choices: [
            {
              index: 0,
-              delta: { content: 'Hello' },
+              delta: {
+                content: 'Hello',
+                reasoning_content: 'thinking ',
+              },
              finish_reason: null,
            },
          ],
-        } as OpenAI.Chat.ChatCompletionChunk,
+        } as unknown as OpenAI.Chat.ChatCompletionChunk,
        {
          id: 'test-id',
          object: 'chat.completion.chunk',
@@ -735,7 +744,10 @@ describe('DefaultTelemetryService', () => {
          choices: [
            {
              index: 0,
-              delta: { content: ' world!' },
+              delta: {
+                content: ' world!',
+                reasoning_content: 'more',
+              },
              finish_reason: 'stop',
            },
          ],
@@ -744,7 +756,7 @@ describe('DefaultTelemetryService', () => {
            completion_tokens: 5,
            total_tokens: 15,
          },
-        } as OpenAI.Chat.ChatCompletionChunk,
+        } as unknown as OpenAI.Chat.ChatCompletionChunk,
      ];

      await telemetryService.logStreamingSuccess(
@@ -757,27 +769,14 @@ describe('DefaultTelemetryService', () => {
      expect(openaiLogger.logInteraction).toHaveBeenCalledWith(
        mockOpenAIRequest,
        expect.objectContaining({
-          id: 'test-id',
-          object: 'chat.completion',
-          created: 1234567890,
-          model: 'gpt-4',
          choices: [
-            {
-              index: 0,
-              message: {
-                role: 'assistant',
+            expect.objectContaining({
+              message: expect.objectContaining({
                content: 'Hello world!',
-                refusal: null,
-              },
-              finish_reason: 'stop',
-              logprobs: null,
-            },
+                reasoning_content: 'thinking more',
+              }),
+            }),
          ],
-          usage: {
-            prompt_tokens: 10,
-            completion_tokens: 5,
-            total_tokens: 15,
-          },
        }),
      );
    });
--- a/packages/core/src/core/openaiContentGenerator/telemetryService.ts
+++ b/packages/core/src/core/openaiContentGenerator/telemetryService.ts
@@ -10,6 +10,7 @@ import { ApiErrorEvent, ApiResponseEvent } from '../../telemetry/types.js';
 import { OpenAILogger } from '../../utils/openaiLogger.js';
 import type { GenerateContentResponse } from '@google/genai';
 import type OpenAI from 'openai';
+import type { ExtendedCompletionChunkDelta } from './converter.js';

 export interface RequestContext {
  userPromptId: string;
@@ -172,6 +173,7 @@ export class DefaultTelemetryService implements TelemetryService {
      | 'content_filter'
      | 'function_call'
      | null = null;
+    let combinedReasoning = '';
    let usage:
      | {
          prompt_tokens: number;
@@ -183,6 +185,12 @@ export class DefaultTelemetryService implements TelemetryService {
    for (const chunk of chunks) {
      const choice = chunk.choices?.[0];
      if (choice) {
+        // Combine reasoning content
+        const reasoningContent = (choice.delta as ExtendedCompletionChunkDelta)
+          ?.reasoning_content;
+        if (reasoningContent) {
+          combinedReasoning += reasoningContent;
+        }
        // Combine text content
        if (choice.delta?.content) {
          combinedContent += choice.delta.content;
@@ -230,6 +238,11 @@ export class DefaultTelemetryService implements TelemetryService {
      content: combinedContent || null,
      refusal: null,
    };
+    if (combinedReasoning) {
+      // Attach reasoning content if any thought tokens were streamed
+      (message as { reasoning_content?: string }).reasoning_content =
+        combinedReasoning;
+    }

    // Add tool calls if any
    if (toolCalls.length > 0) {
--- a/packages/core/src/core/turn.test.ts
+++ b/packages/core/src/core/turn.test.ts
@@ -120,6 +120,97 @@ describe('Turn', () => {
      expect(turn.getDebugResponses().length).toBe(2);
    });

+    it('should emit Thought events when a thought part is present', async () => {
+      const mockResponseStream = (async function* () {
+        yield {
+          type: StreamEventType.CHUNK,
+          value: {
+            candidates: [
+              {
+                content: {
+                  role: 'model',
+                  parts: [
+                    { thought: true, text: 'reasoning...' },
+                    { text: 'final answer' },
+                  ],
+                },
+              },
+            ],
+          } as GenerateContentResponse,
+        };
+      })();
+      mockSendMessageStream.mockResolvedValue(mockResponseStream);
+
+      const events = [];
+      const reqParts: Part[] = [{ text: 'Hi' }];
+      for await (const event of turn.run(
+        'test-model',
+        reqParts,
+        new AbortController().signal,
+      )) {
+        events.push(event);
+      }
+
+      expect(events).toEqual([
+        {
+          type: GeminiEventType.Thought,
+          value: { subject: '', description: 'reasoning...' },
+        },
+      ]);
+    });
+
+    it('should emit thought descriptions per incoming chunk', async () => {
+      const mockResponseStream = (async function* () {
+        yield {
+          type: StreamEventType.CHUNK,
+          value: {
+            candidates: [
+              {
+                content: {
+                  role: 'model',
+                  parts: [{ thought: true, text: 'part1' }],
+                },
+              },
+            ],
+          } as GenerateContentResponse,
+        };
+        yield {
+          type: StreamEventType.CHUNK,
+          value: {
+            candidates: [
+              {
+                content: {
+                  role: 'model',
+                  parts: [{ thought: true, text: 'part2' }],
+                },
+              },
+            ],
+          } as GenerateContentResponse,
+        };
+      })();
+      mockSendMessageStream.mockResolvedValue(mockResponseStream);
+
+      const events = [];
+      for await (const event of turn.run(
+        'test-model',
+        [{ text: 'Hi' }],
+        new AbortController().signal,
+      )) {
+        events.push(event);
+      }
+
+      expect(events).toEqual([
+        {
+          type: GeminiEventType.Thought,
+          value: { subject: '', description: 'part1' },
+        },
+        {
+          type: GeminiEventType.Thought,
+          value: { subject: '', description: 'part2' },
+        },
+      ]);
+    });
+
    it('should yield tool_call_request events for function calls', async () => {
      const mockResponseStream = (async function* () {
        yield {
--- a/packages/core/src/core/turn.ts
+++ b/packages/core/src/core/turn.ts
@@ -27,7 +27,7 @@ import {
  toFriendlyError,
 } from '../utils/errors.js';
 import type { GeminiChat } from './geminiChat.js';
-import { parseThought, type ThoughtSummary } from '../utils/thoughtUtils.js';
+import { getThoughtText, type ThoughtSummary } from '../utils/thoughtUtils.js';

 // Define a structure for tools passed to the server
 export interface ServerTool {
@@ -266,12 +266,11 @@ export class Turn {
          this.currentResponseId = resp.responseId;
        }

-        const thoughtPart = resp.candidates?.[0]?.content?.parts?.[0];
-        if (thoughtPart?.thought) {
-          const thought = parseThought(thoughtPart.text ?? '');
+        const thoughtPart = getThoughtText(resp);
+        if (thoughtPart) {
          yield {
            type: GeminiEventType.Thought,
-            value: thought,
+            value: { subject: '', description: thoughtPart },
          };
          continue;
        }
--- a/packages/core/src/services/sessionService.ts
+++ b/packages/core/src/services/sessionService.ts
@@ -542,6 +542,39 @@ export class SessionService {
  }
 }

+/**
+ * Options for building API history from conversation.
+ */
+export interface BuildApiHistoryOptions {
+  /**
+   * Whether to strip thought parts from the history.
+   * Thought parts are content parts that have `thought: true`.
+   * @default true
+   */
+  stripThoughtsFromHistory?: boolean;
+}
+
+/**
+ * Strips thought parts from a Content object.
+ * Thought parts are identified by having `thought: true`.
+ * Returns null if the content only contained thought parts.
+ */
+function stripThoughtsFromContent(content: Content): Content | null {
+  if (!content.parts) return content;
+
+  const filteredParts = content.parts.filter((part) => !(part as Part).thought);
+
+  // If all parts were thoughts, remove the entire content
+  if (filteredParts.length === 0) {
+    return null;
+  }
+
+  return {
+    ...content,
+    parts: filteredParts,
+  };
+}
+
 /**
 * Builds the model-facing chat history (Content[]) from a reconstructed
 * conversation. This keeps UI history intact while applying chat compression
@@ -555,7 +588,9 @@ export class SessionService {
 */
 export function buildApiHistoryFromConversation(
  conversation: ConversationRecord,
+  options: BuildApiHistoryOptions = {},
 ): Content[] {
+  const { stripThoughtsFromHistory = true } = options;
  const { messages } = conversation;

  let lastCompressionIndex = -1;
@@ -585,14 +620,26 @@ export function buildApiHistoryFromConversation(
      }
    }

+    if (stripThoughtsFromHistory) {
+      return baseHistory
+        .map(stripThoughtsFromContent)
+        .filter((content): content is Content => content !== null);
+    }
    return baseHistory;
  }

  // Fallback: return linear messages as Content[]
-  return messages
+  const result = messages
    .map((record) => record.message)
    .filter((message): message is Content => message !== undefined)
    .map((message) => structuredClone(message));
+
+  if (stripThoughtsFromHistory) {
+    return result
+      .map(stripThoughtsFromContent)
+      .filter((content): content is Content => content !== null);
+  }
+  return result;
 }

 /**
--- a/packages/core/src/utils/thoughtUtils.ts
+++ b/packages/core/src/utils/thoughtUtils.ts
@@ -4,6 +4,8 @@
 * SPDX-License-Identifier: Apache-2.0
 */

+import type { GenerateContentResponse } from '@google/genai';
+
 export type ThoughtSummary = {
  subject: string;
  description: string;
@@ -52,3 +54,23 @@ export function parseThought(rawText: string): ThoughtSummary {

  return { subject, description };
 }
+
+export function getThoughtText(
+  response: GenerateContentResponse,
+): string | null {
+  if (response.candidates && response.candidates.length > 0) {
+    const candidate = response.candidates[0];
+
+    if (
+      candidate.content &&
+      candidate.content.parts &&
+      candidate.content.parts.length > 0
+    ) {
+      return candidate.content.parts
+        .filter((part) => part.thought)
+        .map((part) => part.text ?? '')
+        .join('');
+    }
+  }
+  return null;
+}