fix(core): citation markers misplaced in search results containing multibyte characters (#5956)

Co-authored-by: Allen Hutchison <adh@google.com>
This commit is contained in:
pokutuna
2025-08-23 01:09:16 +09:00
committed by GitHub
parent 3b29f11862
commit 56ad22b39b
2 changed files with 94 additions and 5 deletions

View File

@@ -128,11 +128,28 @@ class WebSearchToolInvocation extends BaseToolInvocation<
// Sort insertions by index in descending order to avoid shifting subsequent indices
insertions.sort((a, b) => b.index - a.index);
const responseChars = modifiedResponseText.split(''); // Use new variable
insertions.forEach((insertion) => {
responseChars.splice(insertion.index, 0, insertion.marker);
});
modifiedResponseText = responseChars.join(''); // Assign back to modifiedResponseText
// Use TextEncoder/TextDecoder since segment indices are UTF-8 byte positions
const encoder = new TextEncoder();
const responseBytes = encoder.encode(modifiedResponseText);
const parts: Uint8Array[] = [];
let lastIndex = responseBytes.length;
for (const ins of insertions) {
const pos = Math.min(ins.index, lastIndex);
parts.unshift(responseBytes.subarray(pos, lastIndex));
parts.unshift(encoder.encode(ins.marker));
lastIndex = pos;
}
parts.unshift(responseBytes.subarray(0, lastIndex));
// Concatenate all parts into a single buffer
const totalLength = parts.reduce((sum, part) => sum + part.length, 0);
const finalBytes = new Uint8Array(totalLength);
let offset = 0;
for (const part of parts) {
finalBytes.set(part, offset);
offset += part.length;
}
modifiedResponseText = new TextDecoder().decode(finalBytes);
}
if (sourceListFormatted.length > 0) {