fix(core): citation markers misplaced in search results containing multibyte characters (#5956)

Co-authored-by: Allen Hutchison <adh@google.com>
This commit is contained in:
pokutuna
2025-08-23 01:09:16 +09:00
committed by GitHub
parent 3b29f11862
commit 56ad22b39b
2 changed files with 94 additions and 5 deletions

View File

@@ -173,5 +173,77 @@ Sources:
); );
expect(result.sources).toHaveLength(2); expect(result.sources).toHaveLength(2);
}); });
it('should insert markers at correct byte positions for multibyte text', async () => {
const params: WebSearchToolParams = { query: 'multibyte query' };
(mockGeminiClient.generateContent as Mock).mockResolvedValue({
candidates: [
{
content: {
role: 'model',
parts: [{ text: 'こんにちは! Gemini CLI✨' }],
},
groundingMetadata: {
groundingChunks: [
{
web: {
title: 'Japanese Greeting',
uri: 'https://example.test/japanese-greeting',
},
},
{
web: {
title: 'google-gemini/gemini-cli',
uri: 'https://github.com/google-gemini/gemini-cli',
},
},
{
web: {
title: 'Gemini CLI: your open-source AI agent',
uri: 'https://blog.google/technology/developers/introducing-gemini-cli-open-source-ai-agent/',
},
},
],
groundingSupports: [
{
segment: {
// Byte range of "こんにちは!" (utf-8 encoded)
startIndex: 0,
endIndex: 16,
},
groundingChunkIndices: [0],
},
{
segment: {
// Byte range of "Gemini CLI✨" (utf-8 encoded)
startIndex: 17,
endIndex: 33,
},
groundingChunkIndices: [1, 2],
},
],
},
},
],
});
const invocation = tool.build(params);
const result = await invocation.execute(abortSignal);
const expectedLlmContent = `Web search results for "multibyte query":
こんにちは![1] Gemini CLI✨[2][3]
Sources:
[1] Japanese Greeting (https://example.test/japanese-greeting)
[2] google-gemini/gemini-cli (https://github.com/google-gemini/gemini-cli)
[3] Gemini CLI: your open-source AI agent (https://blog.google/technology/developers/introducing-gemini-cli-open-source-ai-agent/)`;
expect(result.llmContent).toBe(expectedLlmContent);
expect(result.returnDisplay).toBe(
'Search results for "multibyte query" returned.',
);
expect(result.sources).toHaveLength(3);
});
}); });
}); });

View File

@@ -128,11 +128,28 @@ class WebSearchToolInvocation extends BaseToolInvocation<
// Sort insertions by index in descending order to avoid shifting subsequent indices // Sort insertions by index in descending order to avoid shifting subsequent indices
insertions.sort((a, b) => b.index - a.index); insertions.sort((a, b) => b.index - a.index);
const responseChars = modifiedResponseText.split(''); // Use new variable // Use TextEncoder/TextDecoder since segment indices are UTF-8 byte positions
insertions.forEach((insertion) => { const encoder = new TextEncoder();
responseChars.splice(insertion.index, 0, insertion.marker); const responseBytes = encoder.encode(modifiedResponseText);
}); const parts: Uint8Array[] = [];
modifiedResponseText = responseChars.join(''); // Assign back to modifiedResponseText let lastIndex = responseBytes.length;
for (const ins of insertions) {
const pos = Math.min(ins.index, lastIndex);
parts.unshift(responseBytes.subarray(pos, lastIndex));
parts.unshift(encoder.encode(ins.marker));
lastIndex = pos;
}
parts.unshift(responseBytes.subarray(0, lastIndex));
// Concatenate all parts into a single buffer
const totalLength = parts.reduce((sum, part) => sum + part.length, 0);
const finalBytes = new Uint8Array(totalLength);
let offset = 0;
for (const part of parts) {
finalBytes.set(part, offset);
offset += part.length;
}
modifiedResponseText = new TextDecoder().decode(finalBytes);
} }
if (sourceListFormatted.length > 0) { if (sourceListFormatted.length > 0) {