Refactor read-file and support images. (#480)

2025-12-20 16:57:46 +00:00 · 2025-05-29 22:30:18 +00:00
parent f21abdd1f0
commit dab7517622
13 changed files with 1475 additions and 260 deletions
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -0,0 +1,137 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+import { mergePartListUnions } from './useGeminiStream.js';
+import { Part, PartListUnion } from '@google/genai';
+
+// Mock useToolScheduler
+vi.mock('./useToolScheduler', async () => {
+  const actual = await vi.importActual('./useToolScheduler');
+  return {
+    ...actual, // We need mapToDisplay from actual
+    useToolScheduler: vi.fn(),
+  };
+});
+
+describe('mergePartListUnions', () => {
+  it('should merge multiple PartListUnion arrays', () => {
+    const list1: PartListUnion = [{ text: 'Hello' }];
+    const list2: PartListUnion = [
+      { inlineData: { mimeType: 'image/png', data: 'abc' } },
+    ];
+    const list3: PartListUnion = [{ text: 'World' }, { text: '!' }];
+    const result = mergePartListUnions([list1, list2, list3]);
+    expect(result).toEqual([
+      { text: 'Hello' },
+      { inlineData: { mimeType: 'image/png', data: 'abc' } },
+      { text: 'World' },
+      { text: '!' },
+    ]);
+  });
+
+  it('should handle empty arrays in the input list', () => {
+    const list1: PartListUnion = [{ text: 'First' }];
+    const list2: PartListUnion = [];
+    const list3: PartListUnion = [{ text: 'Last' }];
+    const result = mergePartListUnions([list1, list2, list3]);
+    expect(result).toEqual([{ text: 'First' }, { text: 'Last' }]);
+  });
+
+  it('should handle a single PartListUnion array', () => {
+    const list1: PartListUnion = [
+      { text: 'One' },
+      { inlineData: { mimeType: 'image/jpeg', data: 'xyz' } },
+    ];
+    const result = mergePartListUnions([list1]);
+    expect(result).toEqual(list1);
+  });
+
+  it('should return an empty array if all input arrays are empty', () => {
+    const list1: PartListUnion = [];
+    const list2: PartListUnion = [];
+    const result = mergePartListUnions([list1, list2]);
+    expect(result).toEqual([]);
+  });
+
+  it('should handle input list being empty', () => {
+    const result = mergePartListUnions([]);
+    expect(result).toEqual([]);
+  });
+
+  it('should correctly merge when PartListUnion items are single Parts not in arrays', () => {
+    const part1: Part = { text: 'Single part 1' };
+    const part2: Part = { inlineData: { mimeType: 'image/gif', data: 'gif' } };
+    const listContainingSingleParts: PartListUnion[] = [
+      part1,
+      [part2],
+      { text: 'Another single part' },
+    ];
+    const result = mergePartListUnions(listContainingSingleParts);
+    expect(result).toEqual([
+      { text: 'Single part 1' },
+      { inlineData: { mimeType: 'image/gif', data: 'gif' } },
+      { text: 'Another single part' },
+    ]);
+  });
+
+  it('should handle a mix of arrays and single parts, including empty arrays and undefined/null parts if they were possible (though PartListUnion typing restricts this)', () => {
+    const list1: PartListUnion = [{ text: 'A' }];
+    const list2: PartListUnion = [];
+    const part3: Part = { text: 'B' };
+    const list4: PartListUnion = [
+      { text: 'C' },
+      { inlineData: { mimeType: 'text/plain', data: 'D' } },
+    ];
+    const result = mergePartListUnions([list1, list2, part3, list4]);
+    expect(result).toEqual([
+      { text: 'A' },
+      { text: 'B' },
+      { text: 'C' },
+      { inlineData: { mimeType: 'text/plain', data: 'D' } },
+    ]);
+  });
+
+  it('should preserve the order of parts from the input arrays', () => {
+    const listA: PartListUnion = [{ text: '1' }, { text: '2' }];
+    const listB: PartListUnion = [{ text: '3' }];
+    const listC: PartListUnion = [{ text: '4' }, { text: '5' }];
+    const result = mergePartListUnions([listA, listB, listC]);
+    expect(result).toEqual([
+      { text: '1' },
+      { text: '2' },
+      { text: '3' },
+      { text: '4' },
+      { text: '5' },
+    ]);
+  });
+
+  it('should handle cases where some PartListUnion items are single Parts and others are arrays of Parts', () => {
+    const singlePart1: Part = { text: 'First single' };
+    const arrayPart1: Part[] = [
+      { text: 'Array item 1' },
+      { text: 'Array item 2' },
+    ];
+    const singlePart2: Part = {
+      inlineData: { mimeType: 'application/json', data: 'e30=' },
+    }; // {}
+    const arrayPart2: Part[] = [{ text: 'Last array item' }];
+
+    const result = mergePartListUnions([
+      singlePart1,
+      arrayPart1,
+      singlePart2,
+      arrayPart2,
+    ]);
+    expect(result).toEqual([
+      { text: 'First single' },
+      { text: 'Array item 1' },
+      { text: 'Array item 2' },
+      { inlineData: { mimeType: 'application/json', data: 'e30=' } },
+      { text: 'Last array item' },
+    ]);
+  });
+});
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -41,6 +41,18 @@ import { useLogger } from './useLogger.js';
 import { useToolScheduler, mapToDisplay } from './useToolScheduler.js';
 import { GeminiChat } from '@gemini-code/server/src/core/geminiChat.js';

+export function mergePartListUnions(list: PartListUnion[]): PartListUnion {
+  const resultParts: PartListUnion = [];
+  for (const item of list) {
+    if (Array.isArray(item)) {
+      resultParts.push(...item);
+    } else {
+      resultParts.push(item);
+    }
+  }
+  return resultParts;
+}
+
 enum StreamProcessingStatus {
  Completed,
  UserCancelled,
@@ -74,16 +86,16 @@ export const useGeminiStream = (
    (tools) => {
      if (tools.length) {
        addItem(mapToDisplay(tools), Date.now());
-        submitQuery(
-          tools
-            .filter(
-              (t) =>
-                t.status === 'error' ||
-                t.status === 'cancelled' ||
-                t.status === 'success',
-            )
-            .map((t) => t.response.responsePart),
-        );
+        const toolResponses = tools
+          .filter(
+            (t) =>
+              t.status === 'error' ||
+              t.status === 'cancelled' ||
+              t.status === 'success',
+          )
+          .map((t) => t.response.responseParts);
+
+        submitQuery(mergePartListUnions(toolResponses));
      }
    },
    config,
@@ -313,7 +325,7 @@ export const useGeminiStream = (
    };
    const responseInfo: ToolCallResponseInfo = {
      callId: request.callId,
-      responsePart: functionResponse,
+      responseParts: functionResponse,
      resultDisplay,
      error: new Error(declineMessage),
    };
--- a/packages/cli/src/ui/hooks/useToolScheduler.test.ts
+++ b/packages/cli/src/ui/hooks/useToolScheduler.test.ts
@@ -0,0 +1,126 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { formatLlmContentForFunctionResponse } from './useToolScheduler.js';
+import { Part, PartListUnion } from '@google/genai';
+
+describe('formatLlmContentForFunctionResponse', () => {
+  it('should handle simple string llmContent', () => {
+    const llmContent = 'Simple text output';
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    expect(functionResponseJson).toEqual({ output: 'Simple text output' });
+    expect(additionalParts).toEqual([]);
+  });
+
+  it('should handle llmContent as a single Part with text', () => {
+    const llmContent: Part = { text: 'Text from Part object' };
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    expect(functionResponseJson).toEqual({ output: 'Text from Part object' });
+    expect(additionalParts).toEqual([]);
+  });
+
+  it('should handle llmContent as a PartListUnion array with a single text Part', () => {
+    const llmContent: PartListUnion = [{ text: 'Text from array' }];
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    expect(functionResponseJson).toEqual({ output: 'Text from array' });
+    expect(additionalParts).toEqual([]);
+  });
+
+  it('should handle llmContent with inlineData', () => {
+    const llmContent: Part = {
+      inlineData: { mimeType: 'image/png', data: 'base64...' },
+    };
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    expect(functionResponseJson).toEqual({
+      status: 'Binary content of type image/png was processed.',
+    });
+    expect(additionalParts).toEqual([llmContent]);
+  });
+
+  it('should handle llmContent with fileData', () => {
+    const llmContent: Part = {
+      fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' },
+    };
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    expect(functionResponseJson).toEqual({
+      status: 'Binary content of type application/pdf was processed.',
+    });
+    expect(additionalParts).toEqual([llmContent]);
+  });
+
+  it('should handle llmContent as an array of multiple Parts (text and inlineData)', () => {
+    const llmContent: PartListUnion = [
+      { text: 'Some textual description' },
+      { inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } },
+      { text: 'Another text part' },
+    ];
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    expect(functionResponseJson).toEqual({
+      status: 'Tool execution succeeded.',
+    });
+    expect(additionalParts).toEqual(llmContent);
+  });
+
+  it('should handle llmContent as an array with a single inlineData Part', () => {
+    const llmContent: PartListUnion = [
+      { inlineData: { mimeType: 'image/gif', data: 'gifdata...' } },
+    ];
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    // When the array is a single Part and that part is inlineData
+    expect(functionResponseJson).toEqual({
+      status: 'Binary content of type image/gif was processed.',
+    });
+    expect(additionalParts).toEqual(llmContent);
+  });
+
+  it('should handle llmContent as a generic Part (not text, inlineData, or fileData)', () => {
+    // This case might represent a malformed or unexpected Part type.
+    // For example, a Part that is just an empty object or has other properties.
+    const llmContent: Part = { functionCall: { name: 'test', args: {} } }; // Example of a non-standard part for this context
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    expect(functionResponseJson).toEqual({
+      status: 'Tool execution succeeded.',
+    });
+    expect(additionalParts).toEqual([llmContent]);
+  });
+
+  it('should handle empty string llmContent', () => {
+    const llmContent = '';
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    expect(functionResponseJson).toEqual({ output: '' });
+    expect(additionalParts).toEqual([]);
+  });
+
+  it('should handle llmContent as an empty array', () => {
+    const llmContent: PartListUnion = [];
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    expect(functionResponseJson).toEqual({
+      status: 'Tool execution succeeded.',
+    });
+    expect(additionalParts).toEqual([]);
+  });
+
+  it('should handle llmContent as a Part with undefined inlineData/fileData/text', () => {
+    const llmContent: Part = {}; // An empty part object
+    const { functionResponseJson, additionalParts } =
+      formatLlmContentForFunctionResponse(llmContent);
+    expect(functionResponseJson).toEqual({
+      status: 'Tool execution succeeded.',
+    });
+    expect(additionalParts).toEqual([llmContent]);
+  });
+});
--- a/packages/cli/src/ui/hooks/useToolScheduler.ts
+++ b/packages/cli/src/ui/hooks/useToolScheduler.ts
@@ -13,7 +13,7 @@ import {
  ToolCallConfirmationDetails,
  ToolResult,
 } from '@gemini-code/server';
-import { Part } from '@google/genai';
+import { Part, PartUnion, PartListUnion } from '@google/genai';
 import { useCallback, useEffect, useState } from 'react';
 import {
  HistoryItemToolGroup,
@@ -88,6 +88,60 @@ export type CompletedToolCall =
  | CancelledToolCall
  | ErroredToolCall;

+/**
+ * Formats a PartListUnion response from a tool into JSON suitable for a Gemini
+ * FunctionResponse and additional Parts to include after that response.
+ *
+ * This is required because FunctionReponse appears to only support JSON
+ * and not arbitrary parts. Including parts like inlineData or fileData
+ * directly in a FunctionResponse confuses the model resulting in a failure
+ * to interpret the multimodal content and context window exceeded errors.
+ */
+
+export function formatLlmContentForFunctionResponse(
+  llmContent: PartListUnion,
+): {
+  functionResponseJson: Record<string, string>;
+  additionalParts: PartUnion[];
+} {
+  const additionalParts: PartUnion[] = [];
+  let functionResponseJson: Record<string, string>;
+
+  if (Array.isArray(llmContent) && llmContent.length === 1) {
+    // Ensure that length 1 arrays are treated as a single Part.
+    llmContent = llmContent[0];
+  }
+
+  if (typeof llmContent === 'string') {
+    functionResponseJson = { output: llmContent };
+  } else if (Array.isArray(llmContent)) {
+    functionResponseJson = { status: 'Tool execution succeeded.' };
+    additionalParts.push(...llmContent);
+  } else {
+    if (
+      llmContent.inlineData !== undefined ||
+      llmContent.fileData !== undefined
+    ) {
+      // For Parts like inlineData or fileData, use the returnDisplay as the textual output for the functionResponse.
+      // The actual Part will be added to additionalParts.
+      functionResponseJson = {
+        status: `Binary content of type ${llmContent.inlineData?.mimeType || llmContent.fileData?.mimeType || 'unknown'} was processed.`,
+      };
+      additionalParts.push(llmContent);
+    } else if (llmContent.text !== undefined) {
+      functionResponseJson = { output: llmContent.text };
+    } else {
+      functionResponseJson = { status: 'Tool execution succeeded.' };
+      additionalParts.push(llmContent);
+    }
+  }
+
+  return {
+    functionResponseJson,
+    additionalParts,
+  };
+}
+
 export function useToolScheduler(
  onComplete: (tools: CompletedToolCall[]) => void,
  config: Config,
@@ -201,7 +255,7 @@ export function useToolScheduler(
                status: 'cancelled',
                response: {
                  callId: c.request.callId,
-                  responsePart: {
+                  responseParts: {
                    functionResponse: {
                      id: c.request.callId,
                      name: c.request.name,
@@ -276,21 +330,24 @@ export function useToolScheduler(
            .execute(t.request.args, signal, onOutputChunk)
            .then((result: ToolResult) => {
              if (signal.aborted) {
+                // TODO(jacobr): avoid stringifying the LLM content.
                setToolCalls(
                  setStatus(callId, 'cancelled', String(result.llmContent)),
                );
                return;
              }
+              const { functionResponseJson, additionalParts } =
+                formatLlmContentForFunctionResponse(result.llmContent);
              const functionResponse: Part = {
                functionResponse: {
                  name: t.request.name,
                  id: callId,
-                  response: { output: result.llmContent },
+                  response: functionResponseJson,
                },
              };
              const response: ToolCallResponseInfo = {
                callId,
-                responsePart: functionResponse,
+                responseParts: [functionResponse, ...additionalParts],
                resultDisplay: result.returnDisplay,
                error: undefined,
              };
@@ -401,7 +458,7 @@ function setStatus(
            status: 'cancelled',
            response: {
              callId: t.request.callId,
-              responsePart: {
+              responseParts: {
                functionResponse: {
                  id: t.request.callId,
                  name: t.request.name,
@@ -446,7 +503,7 @@ const toolErrorResponse = (
 ): ToolCallResponseInfo => ({
  callId: request.callId,
  error,
-  responsePart: {
+  responseParts: {
    functionResponse: {
      id: request.callId,
      name: request.name,