mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
fix(core): citation markers misplaced in search results containing multibyte characters (#5956)
Co-authored-by: Allen Hutchison <adh@google.com>
This commit is contained in:
@@ -173,5 +173,77 @@ Sources:
|
||||
);
|
||||
expect(result.sources).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('should insert markers at correct byte positions for multibyte text', async () => {
|
||||
const params: WebSearchToolParams = { query: 'multibyte query' };
|
||||
(mockGeminiClient.generateContent as Mock).mockResolvedValue({
|
||||
candidates: [
|
||||
{
|
||||
content: {
|
||||
role: 'model',
|
||||
parts: [{ text: 'こんにちは! Gemini CLI✨️' }],
|
||||
},
|
||||
groundingMetadata: {
|
||||
groundingChunks: [
|
||||
{
|
||||
web: {
|
||||
title: 'Japanese Greeting',
|
||||
uri: 'https://example.test/japanese-greeting',
|
||||
},
|
||||
},
|
||||
{
|
||||
web: {
|
||||
title: 'google-gemini/gemini-cli',
|
||||
uri: 'https://github.com/google-gemini/gemini-cli',
|
||||
},
|
||||
},
|
||||
{
|
||||
web: {
|
||||
title: 'Gemini CLI: your open-source AI agent',
|
||||
uri: 'https://blog.google/technology/developers/introducing-gemini-cli-open-source-ai-agent/',
|
||||
},
|
||||
},
|
||||
],
|
||||
groundingSupports: [
|
||||
{
|
||||
segment: {
|
||||
// Byte range of "こんにちは!" (utf-8 encoded)
|
||||
startIndex: 0,
|
||||
endIndex: 16,
|
||||
},
|
||||
groundingChunkIndices: [0],
|
||||
},
|
||||
{
|
||||
segment: {
|
||||
// Byte range of "Gemini CLI✨️" (utf-8 encoded)
|
||||
startIndex: 17,
|
||||
endIndex: 33,
|
||||
},
|
||||
groundingChunkIndices: [1, 2],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(abortSignal);
|
||||
|
||||
const expectedLlmContent = `Web search results for "multibyte query":
|
||||
|
||||
こんにちは![1] Gemini CLI✨️[2][3]
|
||||
|
||||
Sources:
|
||||
[1] Japanese Greeting (https://example.test/japanese-greeting)
|
||||
[2] google-gemini/gemini-cli (https://github.com/google-gemini/gemini-cli)
|
||||
[3] Gemini CLI: your open-source AI agent (https://blog.google/technology/developers/introducing-gemini-cli-open-source-ai-agent/)`;
|
||||
|
||||
expect(result.llmContent).toBe(expectedLlmContent);
|
||||
expect(result.returnDisplay).toBe(
|
||||
'Search results for "multibyte query" returned.',
|
||||
);
|
||||
expect(result.sources).toHaveLength(3);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -128,11 +128,28 @@ class WebSearchToolInvocation extends BaseToolInvocation<
|
||||
// Sort insertions by index in descending order to avoid shifting subsequent indices
|
||||
insertions.sort((a, b) => b.index - a.index);
|
||||
|
||||
const responseChars = modifiedResponseText.split(''); // Use new variable
|
||||
insertions.forEach((insertion) => {
|
||||
responseChars.splice(insertion.index, 0, insertion.marker);
|
||||
});
|
||||
modifiedResponseText = responseChars.join(''); // Assign back to modifiedResponseText
|
||||
// Use TextEncoder/TextDecoder since segment indices are UTF-8 byte positions
|
||||
const encoder = new TextEncoder();
|
||||
const responseBytes = encoder.encode(modifiedResponseText);
|
||||
const parts: Uint8Array[] = [];
|
||||
let lastIndex = responseBytes.length;
|
||||
for (const ins of insertions) {
|
||||
const pos = Math.min(ins.index, lastIndex);
|
||||
parts.unshift(responseBytes.subarray(pos, lastIndex));
|
||||
parts.unshift(encoder.encode(ins.marker));
|
||||
lastIndex = pos;
|
||||
}
|
||||
parts.unshift(responseBytes.subarray(0, lastIndex));
|
||||
|
||||
// Concatenate all parts into a single buffer
|
||||
const totalLength = parts.reduce((sum, part) => sum + part.length, 0);
|
||||
const finalBytes = new Uint8Array(totalLength);
|
||||
let offset = 0;
|
||||
for (const part of parts) {
|
||||
finalBytes.set(part, offset);
|
||||
offset += part.length;
|
||||
}
|
||||
modifiedResponseText = new TextDecoder().decode(finalBytes);
|
||||
}
|
||||
|
||||
if (sourceListFormatted.length > 0) {
|
||||
|
||||
Reference in New Issue
Block a user