diff --git a/packages/core/src/tools/web-search.test.ts b/packages/core/src/tools/web-search.test.ts index 0f7c1d6c..54c4b4b3 100644 --- a/packages/core/src/tools/web-search.test.ts +++ b/packages/core/src/tools/web-search.test.ts @@ -173,5 +173,77 @@ Sources: ); expect(result.sources).toHaveLength(2); }); + + it('should insert markers at correct byte positions for multibyte text', async () => { + const params: WebSearchToolParams = { query: 'multibyte query' }; + (mockGeminiClient.generateContent as Mock).mockResolvedValue({ + candidates: [ + { + content: { + role: 'model', + parts: [{ text: 'こんにちは! Gemini CLI✨️' }], + }, + groundingMetadata: { + groundingChunks: [ + { + web: { + title: 'Japanese Greeting', + uri: 'https://example.test/japanese-greeting', + }, + }, + { + web: { + title: 'google-gemini/gemini-cli', + uri: 'https://github.com/google-gemini/gemini-cli', + }, + }, + { + web: { + title: 'Gemini CLI: your open-source AI agent', + uri: 'https://blog.google/technology/developers/introducing-gemini-cli-open-source-ai-agent/', + }, + }, + ], + groundingSupports: [ + { + segment: { + // Byte range of "こんにちは!" (utf-8 encoded) + startIndex: 0, + endIndex: 16, + }, + groundingChunkIndices: [0], + }, + { + segment: { + // Byte range of "Gemini CLI✨️" (utf-8 encoded) + startIndex: 17, + endIndex: 33, + }, + groundingChunkIndices: [1, 2], + }, + ], + }, + }, + ], + }); + + const invocation = tool.build(params); + const result = await invocation.execute(abortSignal); + + const expectedLlmContent = `Web search results for "multibyte query": + +こんにちは![1] Gemini CLI✨️[2][3] + +Sources: +[1] Japanese Greeting (https://example.test/japanese-greeting) +[2] google-gemini/gemini-cli (https://github.com/google-gemini/gemini-cli) +[3] Gemini CLI: your open-source AI agent (https://blog.google/technology/developers/introducing-gemini-cli-open-source-ai-agent/)`; + + expect(result.llmContent).toBe(expectedLlmContent); + expect(result.returnDisplay).toBe( + 'Search results for "multibyte query" returned.', + ); + expect(result.sources).toHaveLength(3); + }); }); }); diff --git a/packages/core/src/tools/web-search.ts b/packages/core/src/tools/web-search.ts index 442fac4f..03404e0a 100644 --- a/packages/core/src/tools/web-search.ts +++ b/packages/core/src/tools/web-search.ts @@ -128,11 +128,28 @@ class WebSearchToolInvocation extends BaseToolInvocation< // Sort insertions by index in descending order to avoid shifting subsequent indices insertions.sort((a, b) => b.index - a.index); - const responseChars = modifiedResponseText.split(''); // Use new variable - insertions.forEach((insertion) => { - responseChars.splice(insertion.index, 0, insertion.marker); - }); - modifiedResponseText = responseChars.join(''); // Assign back to modifiedResponseText + // Use TextEncoder/TextDecoder since segment indices are UTF-8 byte positions + const encoder = new TextEncoder(); + const responseBytes = encoder.encode(modifiedResponseText); + const parts: Uint8Array[] = []; + let lastIndex = responseBytes.length; + for (const ins of insertions) { + const pos = Math.min(ins.index, lastIndex); + parts.unshift(responseBytes.subarray(pos, lastIndex)); + parts.unshift(encoder.encode(ins.marker)); + lastIndex = pos; + } + parts.unshift(responseBytes.subarray(0, lastIndex)); + + // Concatenate all parts into a single buffer + const totalLength = parts.reduce((sum, part) => sum + part.length, 0); + const finalBytes = new Uint8Array(totalLength); + let offset = 0; + for (const part of parts) { + finalBytes.set(part, offset); + offset += part.length; + } + modifiedResponseText = new TextDecoder().decode(finalBytes); } if (sourceListFormatted.length > 0) {