mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
fix(core): citation markers misplaced in search results containing multibyte characters (#5956)
Co-authored-by: Allen Hutchison <adh@google.com>
This commit is contained in:
@@ -173,5 +173,77 @@ Sources:
|
|||||||
);
|
);
|
||||||
expect(result.sources).toHaveLength(2);
|
expect(result.sources).toHaveLength(2);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should insert markers at correct byte positions for multibyte text', async () => {
|
||||||
|
const params: WebSearchToolParams = { query: 'multibyte query' };
|
||||||
|
(mockGeminiClient.generateContent as Mock).mockResolvedValue({
|
||||||
|
candidates: [
|
||||||
|
{
|
||||||
|
content: {
|
||||||
|
role: 'model',
|
||||||
|
parts: [{ text: 'こんにちは! Gemini CLI✨️' }],
|
||||||
|
},
|
||||||
|
groundingMetadata: {
|
||||||
|
groundingChunks: [
|
||||||
|
{
|
||||||
|
web: {
|
||||||
|
title: 'Japanese Greeting',
|
||||||
|
uri: 'https://example.test/japanese-greeting',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
web: {
|
||||||
|
title: 'google-gemini/gemini-cli',
|
||||||
|
uri: 'https://github.com/google-gemini/gemini-cli',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
web: {
|
||||||
|
title: 'Gemini CLI: your open-source AI agent',
|
||||||
|
uri: 'https://blog.google/technology/developers/introducing-gemini-cli-open-source-ai-agent/',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
groundingSupports: [
|
||||||
|
{
|
||||||
|
segment: {
|
||||||
|
// Byte range of "こんにちは!" (utf-8 encoded)
|
||||||
|
startIndex: 0,
|
||||||
|
endIndex: 16,
|
||||||
|
},
|
||||||
|
groundingChunkIndices: [0],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
segment: {
|
||||||
|
// Byte range of "Gemini CLI✨️" (utf-8 encoded)
|
||||||
|
startIndex: 17,
|
||||||
|
endIndex: 33,
|
||||||
|
},
|
||||||
|
groundingChunkIndices: [1, 2],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const invocation = tool.build(params);
|
||||||
|
const result = await invocation.execute(abortSignal);
|
||||||
|
|
||||||
|
const expectedLlmContent = `Web search results for "multibyte query":
|
||||||
|
|
||||||
|
こんにちは![1] Gemini CLI✨️[2][3]
|
||||||
|
|
||||||
|
Sources:
|
||||||
|
[1] Japanese Greeting (https://example.test/japanese-greeting)
|
||||||
|
[2] google-gemini/gemini-cli (https://github.com/google-gemini/gemini-cli)
|
||||||
|
[3] Gemini CLI: your open-source AI agent (https://blog.google/technology/developers/introducing-gemini-cli-open-source-ai-agent/)`;
|
||||||
|
|
||||||
|
expect(result.llmContent).toBe(expectedLlmContent);
|
||||||
|
expect(result.returnDisplay).toBe(
|
||||||
|
'Search results for "multibyte query" returned.',
|
||||||
|
);
|
||||||
|
expect(result.sources).toHaveLength(3);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -128,11 +128,28 @@ class WebSearchToolInvocation extends BaseToolInvocation<
|
|||||||
// Sort insertions by index in descending order to avoid shifting subsequent indices
|
// Sort insertions by index in descending order to avoid shifting subsequent indices
|
||||||
insertions.sort((a, b) => b.index - a.index);
|
insertions.sort((a, b) => b.index - a.index);
|
||||||
|
|
||||||
const responseChars = modifiedResponseText.split(''); // Use new variable
|
// Use TextEncoder/TextDecoder since segment indices are UTF-8 byte positions
|
||||||
insertions.forEach((insertion) => {
|
const encoder = new TextEncoder();
|
||||||
responseChars.splice(insertion.index, 0, insertion.marker);
|
const responseBytes = encoder.encode(modifiedResponseText);
|
||||||
});
|
const parts: Uint8Array[] = [];
|
||||||
modifiedResponseText = responseChars.join(''); // Assign back to modifiedResponseText
|
let lastIndex = responseBytes.length;
|
||||||
|
for (const ins of insertions) {
|
||||||
|
const pos = Math.min(ins.index, lastIndex);
|
||||||
|
parts.unshift(responseBytes.subarray(pos, lastIndex));
|
||||||
|
parts.unshift(encoder.encode(ins.marker));
|
||||||
|
lastIndex = pos;
|
||||||
|
}
|
||||||
|
parts.unshift(responseBytes.subarray(0, lastIndex));
|
||||||
|
|
||||||
|
// Concatenate all parts into a single buffer
|
||||||
|
const totalLength = parts.reduce((sum, part) => sum + part.length, 0);
|
||||||
|
const finalBytes = new Uint8Array(totalLength);
|
||||||
|
let offset = 0;
|
||||||
|
for (const part of parts) {
|
||||||
|
finalBytes.set(part, offset);
|
||||||
|
offset += part.length;
|
||||||
|
}
|
||||||
|
modifiedResponseText = new TextDecoder().decode(finalBytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sourceListFormatted.length > 0) {
|
if (sourceListFormatted.length > 0) {
|
||||||
|
|||||||
Reference in New Issue
Block a user