feat: Show model thoughts while loading (#992)

2025-12-19 09:33:53 +00:00 · 2025-06-15 11:19:05 -07:00
parent b3d89a1075
commit 123ad20e9b
7 changed files with 153 additions and 24 deletions
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -38,6 +38,11 @@ import {
 import { ProxyAgent, setGlobalDispatcher } from 'undici';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';

+function isThinkingSupported(model: string) {
+  if (model.startsWith('gemini-2.5')) return true;
+  return false;
+}
+
 export class GeminiClient {
  private chat: Promise<GeminiChat>;
  private contentGenerator: Promise<ContentGenerator>;
@@ -164,14 +169,21 @@ export class GeminiClient {
    try {
      const userMemory = this.config.getUserMemory();
      const systemInstruction = getCoreSystemPrompt(userMemory);
-
+      const generateContentConfigWithThinking = isThinkingSupported(this.model)
+        ? {
+            ...this.generateContentConfig,
+            thinkingConfig: {
+              includeThoughts: true,
+            },
+          }
+        : this.generateContentConfig;
      return new GeminiChat(
        this.config,
        await this.contentGenerator,
        this.model,
        {
          systemInstruction,
-          ...this.generateContentConfig,
+          ...generateContentConfigWithThinking,
          tools,
        },
        history,
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -417,6 +417,10 @@ export class GeminiChat {
          chunks.push(chunk);
          const content = chunk.candidates?.[0]?.content;
          if (content !== undefined) {
+            if (this.isThoughtContent(content)) {
+              yield chunk;
+              continue;
+            }
            outputContent.push(content);
          }
        }
@@ -452,12 +456,19 @@ export class GeminiChat {
    modelOutput: Content[],
    automaticFunctionCallingHistory?: Content[],
  ) {
+    const nonThoughtModelOutput = modelOutput.filter(
+      (content) => !this.isThoughtContent(content),
+    );
+
    let outputContents: Content[] = [];
    if (
-      modelOutput.length > 0 &&
-      modelOutput.every((content) => content.role !== undefined)
+      nonThoughtModelOutput.length > 0 &&
+      nonThoughtModelOutput.every((content) => content.role !== undefined)
    ) {
-      outputContents = modelOutput;
+      outputContents = nonThoughtModelOutput;
+    } else if (nonThoughtModelOutput.length === 0 && modelOutput.length > 0) {
+      // This case handles when the model returns only a thought.
+      // We don't want to add an empty model response in this case.
    } else {
      // When not a function response appends an empty content when model returns empty response, so that the
      // history is always alternating between user and model.
@@ -486,7 +497,6 @@ export class GeminiChat {
      if (this.isThoughtContent(content)) {
        continue;
      }
-
      const lastContent =
        consolidatedOutputContents[consolidatedOutputContents.length - 1];
      if (this.isTextContent(lastContent) && this.isTextContent(content)) {
--- a/packages/core/src/core/turn.ts
+++ b/packages/core/src/core/turn.ts
@@ -45,6 +45,7 @@ export enum GeminiEventType {
  Error = 'error',
  ChatCompressed = 'chat_compressed',
  UsageMetadata = 'usage_metadata',
+  Thought = 'thought',
 }

 export interface GeminiErrorEventValue {
@@ -69,11 +70,21 @@ export interface ServerToolCallConfirmationDetails {
  details: ToolCallConfirmationDetails;
 }

+export type ThoughtSummary = {
+  subject: string;
+  description: string;
+};
+
 export type ServerGeminiContentEvent = {
  type: GeminiEventType.Content;
  value: string;
 };

+export type ServerGeminiThoughtEvent = {
+  type: GeminiEventType.Thought;
+  value: ThoughtSummary;
+};
+
 export type ServerGeminiToolCallRequestEvent = {
  type: GeminiEventType.ToolCallRequest;
  value: ToolCallRequestInfo;
@@ -122,7 +133,8 @@ export type ServerGeminiStreamEvent =
  | ServerGeminiUserCancelledEvent
  | ServerGeminiErrorEvent
  | ServerGeminiChatCompressedEvent
-  | ServerGeminiUsageMetadataEvent;
+  | ServerGeminiUsageMetadataEvent
+  | ServerGeminiThoughtEvent;

 // A turn manages the agentic loop turn within the server context.
 export class Turn {
@@ -160,6 +172,28 @@ export class Turn {
        }
        this.debugResponses.push(resp);

+        const thoughtPart = resp.candidates?.[0]?.content?.parts?.[0];
+        if (thoughtPart?.thought) {
+          // Thought always has a bold "subject" part enclosed in double asterisks
+          // (e.g., **Subject**). The rest of the string is considered the description.
+          const rawText = thoughtPart.text ?? '';
+          const subjectStringMatches = rawText.match(/\*\*(.*?)\*\*/s);
+          const subject = subjectStringMatches
+            ? subjectStringMatches[1].trim()
+            : '';
+          const description = rawText.replace(/\*\*(.*?)\*\*/s, '').trim();
+          const thought: ThoughtSummary = {
+            subject,
+            description,
+          };
+
+          yield {
+            type: GeminiEventType.Thought,
+            value: thought,
+          };
+          continue;
+        }
+
        const text = getResponseText(resp);
        if (text) {
          yield { type: GeminiEventType.Content, value: text };