Refactor read-file and support images. (#480)

This commit is contained in:
Jacob Richman
2025-05-29 22:30:18 +00:00
committed by GitHub
parent f21abdd1f0
commit dab7517622
13 changed files with 1475 additions and 260 deletions

View File

@@ -0,0 +1,137 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi } from 'vitest';
import { mergePartListUnions } from './useGeminiStream.js';
import { Part, PartListUnion } from '@google/genai';
// Mock useToolScheduler
vi.mock('./useToolScheduler', async () => {
const actual = await vi.importActual('./useToolScheduler');
return {
...actual, // We need mapToDisplay from actual
useToolScheduler: vi.fn(),
};
});
describe('mergePartListUnions', () => {
it('should merge multiple PartListUnion arrays', () => {
const list1: PartListUnion = [{ text: 'Hello' }];
const list2: PartListUnion = [
{ inlineData: { mimeType: 'image/png', data: 'abc' } },
];
const list3: PartListUnion = [{ text: 'World' }, { text: '!' }];
const result = mergePartListUnions([list1, list2, list3]);
expect(result).toEqual([
{ text: 'Hello' },
{ inlineData: { mimeType: 'image/png', data: 'abc' } },
{ text: 'World' },
{ text: '!' },
]);
});
it('should handle empty arrays in the input list', () => {
const list1: PartListUnion = [{ text: 'First' }];
const list2: PartListUnion = [];
const list3: PartListUnion = [{ text: 'Last' }];
const result = mergePartListUnions([list1, list2, list3]);
expect(result).toEqual([{ text: 'First' }, { text: 'Last' }]);
});
it('should handle a single PartListUnion array', () => {
const list1: PartListUnion = [
{ text: 'One' },
{ inlineData: { mimeType: 'image/jpeg', data: 'xyz' } },
];
const result = mergePartListUnions([list1]);
expect(result).toEqual(list1);
});
it('should return an empty array if all input arrays are empty', () => {
const list1: PartListUnion = [];
const list2: PartListUnion = [];
const result = mergePartListUnions([list1, list2]);
expect(result).toEqual([]);
});
it('should handle input list being empty', () => {
const result = mergePartListUnions([]);
expect(result).toEqual([]);
});
it('should correctly merge when PartListUnion items are single Parts not in arrays', () => {
const part1: Part = { text: 'Single part 1' };
const part2: Part = { inlineData: { mimeType: 'image/gif', data: 'gif' } };
const listContainingSingleParts: PartListUnion[] = [
part1,
[part2],
{ text: 'Another single part' },
];
const result = mergePartListUnions(listContainingSingleParts);
expect(result).toEqual([
{ text: 'Single part 1' },
{ inlineData: { mimeType: 'image/gif', data: 'gif' } },
{ text: 'Another single part' },
]);
});
it('should handle a mix of arrays and single parts, including empty arrays and undefined/null parts if they were possible (though PartListUnion typing restricts this)', () => {
const list1: PartListUnion = [{ text: 'A' }];
const list2: PartListUnion = [];
const part3: Part = { text: 'B' };
const list4: PartListUnion = [
{ text: 'C' },
{ inlineData: { mimeType: 'text/plain', data: 'D' } },
];
const result = mergePartListUnions([list1, list2, part3, list4]);
expect(result).toEqual([
{ text: 'A' },
{ text: 'B' },
{ text: 'C' },
{ inlineData: { mimeType: 'text/plain', data: 'D' } },
]);
});
it('should preserve the order of parts from the input arrays', () => {
const listA: PartListUnion = [{ text: '1' }, { text: '2' }];
const listB: PartListUnion = [{ text: '3' }];
const listC: PartListUnion = [{ text: '4' }, { text: '5' }];
const result = mergePartListUnions([listA, listB, listC]);
expect(result).toEqual([
{ text: '1' },
{ text: '2' },
{ text: '3' },
{ text: '4' },
{ text: '5' },
]);
});
it('should handle cases where some PartListUnion items are single Parts and others are arrays of Parts', () => {
const singlePart1: Part = { text: 'First single' };
const arrayPart1: Part[] = [
{ text: 'Array item 1' },
{ text: 'Array item 2' },
];
const singlePart2: Part = {
inlineData: { mimeType: 'application/json', data: 'e30=' },
}; // {}
const arrayPart2: Part[] = [{ text: 'Last array item' }];
const result = mergePartListUnions([
singlePart1,
arrayPart1,
singlePart2,
arrayPart2,
]);
expect(result).toEqual([
{ text: 'First single' },
{ text: 'Array item 1' },
{ text: 'Array item 2' },
{ inlineData: { mimeType: 'application/json', data: 'e30=' } },
{ text: 'Last array item' },
]);
});
});

View File

@@ -41,6 +41,18 @@ import { useLogger } from './useLogger.js';
import { useToolScheduler, mapToDisplay } from './useToolScheduler.js';
import { GeminiChat } from '@gemini-code/server/src/core/geminiChat.js';
export function mergePartListUnions(list: PartListUnion[]): PartListUnion {
const resultParts: PartListUnion = [];
for (const item of list) {
if (Array.isArray(item)) {
resultParts.push(...item);
} else {
resultParts.push(item);
}
}
return resultParts;
}
enum StreamProcessingStatus {
Completed,
UserCancelled,
@@ -74,16 +86,16 @@ export const useGeminiStream = (
(tools) => {
if (tools.length) {
addItem(mapToDisplay(tools), Date.now());
submitQuery(
tools
.filter(
(t) =>
t.status === 'error' ||
t.status === 'cancelled' ||
t.status === 'success',
)
.map((t) => t.response.responsePart),
);
const toolResponses = tools
.filter(
(t) =>
t.status === 'error' ||
t.status === 'cancelled' ||
t.status === 'success',
)
.map((t) => t.response.responseParts);
submitQuery(mergePartListUnions(toolResponses));
}
},
config,
@@ -313,7 +325,7 @@ export const useGeminiStream = (
};
const responseInfo: ToolCallResponseInfo = {
callId: request.callId,
responsePart: functionResponse,
responseParts: functionResponse,
resultDisplay,
error: new Error(declineMessage),
};

View File

@@ -0,0 +1,126 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { formatLlmContentForFunctionResponse } from './useToolScheduler.js';
import { Part, PartListUnion } from '@google/genai';
describe('formatLlmContentForFunctionResponse', () => {
it('should handle simple string llmContent', () => {
const llmContent = 'Simple text output';
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({ output: 'Simple text output' });
expect(additionalParts).toEqual([]);
});
it('should handle llmContent as a single Part with text', () => {
const llmContent: Part = { text: 'Text from Part object' };
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({ output: 'Text from Part object' });
expect(additionalParts).toEqual([]);
});
it('should handle llmContent as a PartListUnion array with a single text Part', () => {
const llmContent: PartListUnion = [{ text: 'Text from array' }];
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({ output: 'Text from array' });
expect(additionalParts).toEqual([]);
});
it('should handle llmContent with inlineData', () => {
const llmContent: Part = {
inlineData: { mimeType: 'image/png', data: 'base64...' },
};
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Binary content of type image/png was processed.',
});
expect(additionalParts).toEqual([llmContent]);
});
it('should handle llmContent with fileData', () => {
const llmContent: Part = {
fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' },
};
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Binary content of type application/pdf was processed.',
});
expect(additionalParts).toEqual([llmContent]);
});
it('should handle llmContent as an array of multiple Parts (text and inlineData)', () => {
const llmContent: PartListUnion = [
{ text: 'Some textual description' },
{ inlineData: { mimeType: 'image/jpeg', data: 'base64data...' } },
{ text: 'Another text part' },
];
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Tool execution succeeded.',
});
expect(additionalParts).toEqual(llmContent);
});
it('should handle llmContent as an array with a single inlineData Part', () => {
const llmContent: PartListUnion = [
{ inlineData: { mimeType: 'image/gif', data: 'gifdata...' } },
];
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
// When the array is a single Part and that part is inlineData
expect(functionResponseJson).toEqual({
status: 'Binary content of type image/gif was processed.',
});
expect(additionalParts).toEqual(llmContent);
});
it('should handle llmContent as a generic Part (not text, inlineData, or fileData)', () => {
// This case might represent a malformed or unexpected Part type.
// For example, a Part that is just an empty object or has other properties.
const llmContent: Part = { functionCall: { name: 'test', args: {} } }; // Example of a non-standard part for this context
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Tool execution succeeded.',
});
expect(additionalParts).toEqual([llmContent]);
});
it('should handle empty string llmContent', () => {
const llmContent = '';
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({ output: '' });
expect(additionalParts).toEqual([]);
});
it('should handle llmContent as an empty array', () => {
const llmContent: PartListUnion = [];
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Tool execution succeeded.',
});
expect(additionalParts).toEqual([]);
});
it('should handle llmContent as a Part with undefined inlineData/fileData/text', () => {
const llmContent: Part = {}; // An empty part object
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(llmContent);
expect(functionResponseJson).toEqual({
status: 'Tool execution succeeded.',
});
expect(additionalParts).toEqual([llmContent]);
});
});

View File

@@ -13,7 +13,7 @@ import {
ToolCallConfirmationDetails,
ToolResult,
} from '@gemini-code/server';
import { Part } from '@google/genai';
import { Part, PartUnion, PartListUnion } from '@google/genai';
import { useCallback, useEffect, useState } from 'react';
import {
HistoryItemToolGroup,
@@ -88,6 +88,60 @@ export type CompletedToolCall =
| CancelledToolCall
| ErroredToolCall;
/**
* Formats a PartListUnion response from a tool into JSON suitable for a Gemini
* FunctionResponse and additional Parts to include after that response.
*
* This is required because FunctionReponse appears to only support JSON
* and not arbitrary parts. Including parts like inlineData or fileData
* directly in a FunctionResponse confuses the model resulting in a failure
* to interpret the multimodal content and context window exceeded errors.
*/
export function formatLlmContentForFunctionResponse(
llmContent: PartListUnion,
): {
functionResponseJson: Record<string, string>;
additionalParts: PartUnion[];
} {
const additionalParts: PartUnion[] = [];
let functionResponseJson: Record<string, string>;
if (Array.isArray(llmContent) && llmContent.length === 1) {
// Ensure that length 1 arrays are treated as a single Part.
llmContent = llmContent[0];
}
if (typeof llmContent === 'string') {
functionResponseJson = { output: llmContent };
} else if (Array.isArray(llmContent)) {
functionResponseJson = { status: 'Tool execution succeeded.' };
additionalParts.push(...llmContent);
} else {
if (
llmContent.inlineData !== undefined ||
llmContent.fileData !== undefined
) {
// For Parts like inlineData or fileData, use the returnDisplay as the textual output for the functionResponse.
// The actual Part will be added to additionalParts.
functionResponseJson = {
status: `Binary content of type ${llmContent.inlineData?.mimeType || llmContent.fileData?.mimeType || 'unknown'} was processed.`,
};
additionalParts.push(llmContent);
} else if (llmContent.text !== undefined) {
functionResponseJson = { output: llmContent.text };
} else {
functionResponseJson = { status: 'Tool execution succeeded.' };
additionalParts.push(llmContent);
}
}
return {
functionResponseJson,
additionalParts,
};
}
export function useToolScheduler(
onComplete: (tools: CompletedToolCall[]) => void,
config: Config,
@@ -201,7 +255,7 @@ export function useToolScheduler(
status: 'cancelled',
response: {
callId: c.request.callId,
responsePart: {
responseParts: {
functionResponse: {
id: c.request.callId,
name: c.request.name,
@@ -276,21 +330,24 @@ export function useToolScheduler(
.execute(t.request.args, signal, onOutputChunk)
.then((result: ToolResult) => {
if (signal.aborted) {
// TODO(jacobr): avoid stringifying the LLM content.
setToolCalls(
setStatus(callId, 'cancelled', String(result.llmContent)),
);
return;
}
const { functionResponseJson, additionalParts } =
formatLlmContentForFunctionResponse(result.llmContent);
const functionResponse: Part = {
functionResponse: {
name: t.request.name,
id: callId,
response: { output: result.llmContent },
response: functionResponseJson,
},
};
const response: ToolCallResponseInfo = {
callId,
responsePart: functionResponse,
responseParts: [functionResponse, ...additionalParts],
resultDisplay: result.returnDisplay,
error: undefined,
};
@@ -401,7 +458,7 @@ function setStatus(
status: 'cancelled',
response: {
callId: t.request.callId,
responsePart: {
responseParts: {
functionResponse: {
id: t.request.callId,
name: t.request.name,
@@ -446,7 +503,7 @@ const toolErrorResponse = (
): ToolCallResponseInfo => ({
callId: request.callId,
error,
responsePart: {
responseParts: {
functionResponse: {
id: request.callId,
name: request.name,