mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
🎯 PR: Improve Edit Tool Reliability with Fuzzy Matching Pipeline (#1025)
This commit is contained in:
@@ -425,7 +425,9 @@ describe('EditTool', () => {
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toMatch(/Successfully modified file/);
|
||||
expect(result.llmContent).toMatch(
|
||||
/Showing lines \d+-\d+ of \d+ from the edited file:/,
|
||||
);
|
||||
expect(fs.readFileSync(filePath, 'utf8')).toBe(newContent);
|
||||
const display = result.returnDisplay as FileDiff;
|
||||
expect(display.fileDiff).toMatch(initialContent);
|
||||
@@ -450,6 +452,9 @@ describe('EditTool', () => {
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toMatch(/Created new file/);
|
||||
expect(result.llmContent).toMatch(
|
||||
/Showing lines \d+-\d+ of \d+ from the edited file:/,
|
||||
);
|
||||
expect(fs.existsSync(newFilePath)).toBe(true);
|
||||
expect(fs.readFileSync(newFilePath, 'utf8')).toBe(fileContent);
|
||||
|
||||
@@ -485,7 +490,7 @@ describe('EditTool', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('should return error if multiple occurrences of old_string are found', async () => {
|
||||
it('should return error if multiple occurrences of old_string are found and replace_all is false', async () => {
|
||||
fs.writeFileSync(filePath, 'multiple old old strings', 'utf8');
|
||||
const params: EditToolParams = {
|
||||
file_path: filePath,
|
||||
@@ -494,27 +499,27 @@ describe('EditTool', () => {
|
||||
};
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
expect(result.llmContent).toMatch(
|
||||
/Expected 1 occurrence but found 2 for old_string in file/,
|
||||
);
|
||||
expect(result.llmContent).toMatch(/replace_all was not enabled/);
|
||||
expect(result.returnDisplay).toMatch(
|
||||
/Failed to edit, expected 1 occurrence but found 2/,
|
||||
/Failed to edit because the text matches multiple locations/,
|
||||
);
|
||||
});
|
||||
|
||||
it('should successfully replace multiple occurrences when expected_replacements specified', async () => {
|
||||
it('should successfully replace multiple occurrences when replace_all is true', async () => {
|
||||
fs.writeFileSync(filePath, 'old text\nold text\nold text', 'utf8');
|
||||
const params: EditToolParams = {
|
||||
file_path: filePath,
|
||||
old_string: 'old',
|
||||
new_string: 'new',
|
||||
expected_replacements: 3,
|
||||
replace_all: true,
|
||||
};
|
||||
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toMatch(/Successfully modified file/);
|
||||
expect(result.llmContent).toMatch(
|
||||
/Showing lines \d+-\d+ of \d+ from the edited file/,
|
||||
);
|
||||
expect(fs.readFileSync(filePath, 'utf8')).toBe(
|
||||
'new text\nnew text\nnew text',
|
||||
);
|
||||
@@ -535,24 +540,6 @@ describe('EditTool', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('should return error if expected_replacements does not match actual occurrences', async () => {
|
||||
fs.writeFileSync(filePath, 'old text old text', 'utf8');
|
||||
const params: EditToolParams = {
|
||||
file_path: filePath,
|
||||
old_string: 'old',
|
||||
new_string: 'new',
|
||||
expected_replacements: 3, // Expecting 3 but only 2 exist
|
||||
};
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
expect(result.llmContent).toMatch(
|
||||
/Expected 3 occurrences but found 2 for old_string in file/,
|
||||
);
|
||||
expect(result.returnDisplay).toMatch(
|
||||
/Failed to edit, expected 3 occurrences but found 2/,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return error if trying to create a file that already exists (empty old_string)', async () => {
|
||||
fs.writeFileSync(filePath, 'Existing content', 'utf8');
|
||||
const params: EditToolParams = {
|
||||
@@ -568,38 +555,6 @@ describe('EditTool', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('should include modification message when proposed content is modified', async () => {
|
||||
const initialContent = 'Line 1\nold line\nLine 3\nLine 4\nLine 5\n';
|
||||
fs.writeFileSync(filePath, initialContent, 'utf8');
|
||||
const params: EditToolParams = {
|
||||
file_path: filePath,
|
||||
old_string: 'old',
|
||||
new_string: 'new',
|
||||
modified_by_user: true,
|
||||
ai_proposed_content: 'Line 1\nAI line\nLine 3\nLine 4\nLine 5\n',
|
||||
};
|
||||
|
||||
(mockConfig.getApprovalMode as Mock).mockReturnValueOnce(
|
||||
ApprovalMode.AUTO_EDIT,
|
||||
);
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
expect(result.llmContent).toMatch(
|
||||
/User modified the `new_string` content/,
|
||||
);
|
||||
expect((result.returnDisplay as FileDiff).diffStat).toStrictEqual({
|
||||
model_added_lines: 1,
|
||||
model_removed_lines: 1,
|
||||
model_added_chars: 7,
|
||||
model_removed_chars: 8,
|
||||
user_added_lines: 1,
|
||||
user_removed_lines: 1,
|
||||
user_added_chars: 8,
|
||||
user_removed_chars: 7,
|
||||
});
|
||||
});
|
||||
|
||||
it('should not include modification message when proposed content is not modified', async () => {
|
||||
const initialContent = 'This is some old text.';
|
||||
fs.writeFileSync(filePath, initialContent, 'utf8');
|
||||
@@ -723,13 +678,12 @@ describe('EditTool', () => {
|
||||
expect(result.error?.type).toBe(ToolErrorType.EDIT_NO_OCCURRENCE_FOUND);
|
||||
});
|
||||
|
||||
it('should return EXPECTED_OCCURRENCE_MISMATCH error', async () => {
|
||||
it('should return EXPECTED_OCCURRENCE_MISMATCH error when replace_all is false and text is not unique', async () => {
|
||||
fs.writeFileSync(filePath, 'one one two', 'utf8');
|
||||
const params: EditToolParams = {
|
||||
file_path: filePath,
|
||||
old_string: 'one',
|
||||
new_string: 'new',
|
||||
expected_replacements: 3,
|
||||
};
|
||||
const invocation = tool.build(params);
|
||||
const result = await invocation.execute(new AbortController().signal);
|
||||
|
||||
@@ -34,6 +34,12 @@ import type {
|
||||
} from './modifiable-tool.js';
|
||||
import { IdeClient } from '../ide/ide-client.js';
|
||||
import { safeLiteralReplace } from '../utils/textUtils.js';
|
||||
import {
|
||||
countOccurrences,
|
||||
extractEditSnippet,
|
||||
maybeAugmentOldStringForDeletion,
|
||||
normalizeEditStrings,
|
||||
} from '../utils/editHelper.js';
|
||||
|
||||
export function applyReplacement(
|
||||
currentContent: string | null,
|
||||
@@ -77,10 +83,9 @@ export interface EditToolParams {
|
||||
new_string: string;
|
||||
|
||||
/**
|
||||
* Number of replacements expected. Defaults to 1 if not specified.
|
||||
* Use when you want to replace multiple occurrences.
|
||||
* Replace every occurrence of old_string instead of requiring a unique match.
|
||||
*/
|
||||
expected_replacements?: number;
|
||||
replace_all?: boolean;
|
||||
|
||||
/**
|
||||
* Whether the edit was modified manually by the user.
|
||||
@@ -118,12 +123,12 @@ class EditToolInvocation implements ToolInvocation<EditToolParams, ToolResult> {
|
||||
* @throws File system errors if reading the file fails unexpectedly (e.g., permissions)
|
||||
*/
|
||||
private async calculateEdit(params: EditToolParams): Promise<CalculatedEdit> {
|
||||
const expectedReplacements = params.expected_replacements ?? 1;
|
||||
const replaceAll = params.replace_all ?? false;
|
||||
let currentContent: string | null = null;
|
||||
let fileExists = false;
|
||||
let isNewFile = false;
|
||||
const finalNewString = params.new_string;
|
||||
const finalOldString = params.old_string;
|
||||
let finalNewString = params.new_string;
|
||||
let finalOldString = params.old_string;
|
||||
let occurrences = 0;
|
||||
let error:
|
||||
| { display: string; raw: string; type: ToolErrorType }
|
||||
@@ -144,7 +149,15 @@ class EditToolInvocation implements ToolInvocation<EditToolParams, ToolResult> {
|
||||
fileExists = false;
|
||||
}
|
||||
|
||||
if (params.old_string === '' && !fileExists) {
|
||||
const normalizedStrings = normalizeEditStrings(
|
||||
currentContent,
|
||||
finalOldString,
|
||||
finalNewString,
|
||||
);
|
||||
finalOldString = normalizedStrings.oldString;
|
||||
finalNewString = normalizedStrings.newString;
|
||||
|
||||
if (finalOldString === '' && !fileExists) {
|
||||
// Creating a new file
|
||||
isNewFile = true;
|
||||
} else if (!fileExists) {
|
||||
@@ -155,7 +168,13 @@ class EditToolInvocation implements ToolInvocation<EditToolParams, ToolResult> {
|
||||
type: ToolErrorType.FILE_NOT_FOUND,
|
||||
};
|
||||
} else if (currentContent !== null) {
|
||||
occurrences = this.countOccurrences(currentContent, params.old_string);
|
||||
finalOldString = maybeAugmentOldStringForDeletion(
|
||||
currentContent,
|
||||
finalOldString,
|
||||
finalNewString,
|
||||
);
|
||||
|
||||
occurrences = countOccurrences(currentContent, finalOldString);
|
||||
if (params.old_string === '') {
|
||||
// Error: Trying to create a file that already exists
|
||||
error = {
|
||||
@@ -169,13 +188,10 @@ class EditToolInvocation implements ToolInvocation<EditToolParams, ToolResult> {
|
||||
raw: `Failed to edit, 0 occurrences found for old_string in ${params.file_path}. No edits made. The exact text in old_string was not found. Ensure you're not escaping content incorrectly and check whitespace, indentation, and context. Use ${ReadFileTool.Name} tool to verify.`,
|
||||
type: ToolErrorType.EDIT_NO_OCCURRENCE_FOUND,
|
||||
};
|
||||
} else if (occurrences !== expectedReplacements) {
|
||||
const occurrenceTerm =
|
||||
expectedReplacements === 1 ? 'occurrence' : 'occurrences';
|
||||
|
||||
} else if (!replaceAll && occurrences > 1) {
|
||||
error = {
|
||||
display: `Failed to edit, expected ${expectedReplacements} ${occurrenceTerm} but found ${occurrences}.`,
|
||||
raw: `Failed to edit, Expected ${expectedReplacements} ${occurrenceTerm} but found ${occurrences} for old_string in file: ${params.file_path}`,
|
||||
display: `Failed to edit because the text matches multiple locations. Provide more context or set replace_all to true.`,
|
||||
raw: `Failed to edit. Found ${occurrences} occurrences for old_string in ${params.file_path} but replace_all was not enabled.`,
|
||||
type: ToolErrorType.EDIT_EXPECTED_OCCURRENCE_MISMATCH,
|
||||
};
|
||||
} else if (finalOldString === finalNewString) {
|
||||
@@ -221,22 +237,6 @@ class EditToolInvocation implements ToolInvocation<EditToolParams, ToolResult> {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts occurrences of a substring in a string
|
||||
*/
|
||||
private countOccurrences(str: string, substr: string): number {
|
||||
if (substr === '') {
|
||||
return 0;
|
||||
}
|
||||
let count = 0;
|
||||
let pos = str.indexOf(substr);
|
||||
while (pos !== -1) {
|
||||
count++;
|
||||
pos = str.indexOf(substr, pos + substr.length); // Start search after the current match
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles the confirmation prompt for the Edit tool in the CLI.
|
||||
* It needs to calculate the diff to show the user.
|
||||
@@ -422,12 +422,16 @@ class EditToolInvocation implements ToolInvocation<EditToolParams, ToolResult> {
|
||||
const llmSuccessMessageParts = [
|
||||
editData.isNewFile
|
||||
? `Created new file: ${this.params.file_path} with provided content.`
|
||||
: `Successfully modified file: ${this.params.file_path} (${editData.occurrences} replacements).`,
|
||||
: `The file: ${this.params.file_path} has been updated.`,
|
||||
];
|
||||
if (this.params.modified_by_user) {
|
||||
llmSuccessMessageParts.push(
|
||||
`User modified the \`new_string\` content to be: ${this.params.new_string}.`,
|
||||
);
|
||||
|
||||
const snippetResult = extractEditSnippet(
|
||||
editData.currentContent,
|
||||
editData.newContent,
|
||||
);
|
||||
if (snippetResult) {
|
||||
const snippetText = `Showing lines ${snippetResult.startLine}-${snippetResult.endLine} of ${snippetResult.totalLines} from the edited file:\n\n---\n\n${snippetResult.content}`;
|
||||
llmSuccessMessageParts.push(snippetText);
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -470,7 +474,7 @@ export class EditTool
|
||||
super(
|
||||
EditTool.Name,
|
||||
ToolDisplayNames.EDIT,
|
||||
`Replaces text within a file. By default, replaces a single occurrence, but can replace multiple occurrences when \`expected_replacements\` is specified. This tool requires providing significant context around the change to ensure precise targeting. Always use the ${ReadFileTool.Name} tool to examine the file's current content before attempting a text replacement.
|
||||
`Replaces text within a file. By default, replaces a single occurrence. Set \`replace_all\` to true when you intend to modify every instance of \`old_string\`. This tool requires providing significant context around the change to ensure precise targeting. Always use the ${ReadFileTool.Name} tool to examine the file's current content before attempting a text replacement.
|
||||
|
||||
The user has the ability to modify the \`new_string\` content. If modified, this will be stated in the response.
|
||||
|
||||
@@ -480,7 +484,7 @@ Expectation for required parameters:
|
||||
3. \`new_string\` MUST be the exact literal text to replace \`old_string\` with (also including all whitespace, indentation, newlines, and surrounding code etc.). Ensure the resulting code is correct and idiomatic.
|
||||
4. NEVER escape \`old_string\` or \`new_string\`, that would break the exact literal text requirement.
|
||||
**Important:** If ANY of the above are not satisfied, the tool will fail. CRITICAL for \`old_string\`: Must uniquely identify the single instance to change. Include at least 3 lines of context BEFORE and AFTER the target text, matching whitespace and indentation precisely. If this string matches multiple locations, or does not match exactly, the tool will fail.
|
||||
**Multiple replacements:** Set \`expected_replacements\` to the number of occurrences you want to replace. The tool will replace ALL occurrences that match \`old_string\` exactly. Ensure the number of replacements matches your expectation.`,
|
||||
**Multiple replacements:** Set \`replace_all\` to true when you want to replace every occurrence that matches \`old_string\`.`,
|
||||
Kind.Edit,
|
||||
{
|
||||
properties: {
|
||||
@@ -491,7 +495,7 @@ Expectation for required parameters:
|
||||
},
|
||||
old_string: {
|
||||
description:
|
||||
'The exact literal text to replace, preferably unescaped. For single replacements (default), include at least 3 lines of context BEFORE and AFTER the target text, matching whitespace and indentation precisely. For multiple replacements, specify expected_replacements parameter. If this string is not the exact literal text (i.e. you escaped it) or does not match exactly, the tool will fail.',
|
||||
'The exact literal text to replace, preferably unescaped. For single replacements (default), include at least 3 lines of context BEFORE and AFTER the target text, matching whitespace and indentation precisely. If this string is not the exact literal text (i.e. you escaped it) or does not match exactly, the tool will fail.',
|
||||
type: 'string',
|
||||
},
|
||||
new_string: {
|
||||
@@ -499,11 +503,10 @@ Expectation for required parameters:
|
||||
'The exact literal text to replace `old_string` with, preferably unescaped. Provide the EXACT text. Ensure the resulting code is correct and idiomatic.',
|
||||
type: 'string',
|
||||
},
|
||||
expected_replacements: {
|
||||
type: 'number',
|
||||
replace_all: {
|
||||
type: 'boolean',
|
||||
description:
|
||||
'Number of replacements expected. Defaults to 1 if not specified. Use when you want to replace multiple occurrences.',
|
||||
minimum: 1,
|
||||
'Replace all occurrences of old_string (default false).',
|
||||
},
|
||||
},
|
||||
required: ['file_path', 'old_string', 'new_string'],
|
||||
|
||||
153
packages/core/src/utils/editHelper.test.ts
Normal file
153
packages/core/src/utils/editHelper.test.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
countOccurrences,
|
||||
maybeAugmentOldStringForDeletion,
|
||||
normalizeEditStrings,
|
||||
} from './editHelper.js';
|
||||
|
||||
describe('normalizeEditStrings', () => {
|
||||
const file = `const one = 1;
|
||||
const two = 2;
|
||||
`;
|
||||
|
||||
it('returns literal matches unchanged and trims new_string trailing whitespace', () => {
|
||||
const result = normalizeEditStrings(
|
||||
file,
|
||||
'const two = 2;',
|
||||
' const two = 42; ',
|
||||
);
|
||||
expect(result).toEqual({
|
||||
oldString: 'const two = 2;',
|
||||
newString: ' const two = 42;',
|
||||
});
|
||||
});
|
||||
|
||||
it('normalizes smart quotes to match on-disk text', () => {
|
||||
const result = normalizeEditStrings(
|
||||
"const greeting = 'Don't';\n",
|
||||
'const greeting = ‘Don’t’;',
|
||||
'const greeting = “Hello”; ',
|
||||
);
|
||||
expect(result).toEqual({
|
||||
oldString: "const greeting = 'Don't';",
|
||||
newString: 'const greeting = “Hello”;',
|
||||
});
|
||||
});
|
||||
|
||||
it('falls back to original strings when no match is found', () => {
|
||||
const result = normalizeEditStrings(file, 'missing text', 'replacement');
|
||||
expect(result).toEqual({
|
||||
oldString: 'missing text',
|
||||
newString: 'replacement',
|
||||
});
|
||||
});
|
||||
|
||||
it('still trims new_string when editing a brand-new file', () => {
|
||||
const result = normalizeEditStrings(null, '', 'new file contents ');
|
||||
expect(result).toEqual({
|
||||
oldString: '',
|
||||
newString: 'new file contents',
|
||||
});
|
||||
});
|
||||
|
||||
it('matches unicode dash variants', () => {
|
||||
const result = normalizeEditStrings(
|
||||
'const range = "1-2";\n',
|
||||
'const range = "1\u20132";',
|
||||
'const range = "3\u20135"; ',
|
||||
);
|
||||
expect(result).toEqual({
|
||||
oldString: 'const range = "1-2";',
|
||||
newString: 'const range = "3\u20135";',
|
||||
});
|
||||
});
|
||||
|
||||
it('matches when trailing whitespace differs only at line ends', () => {
|
||||
const result = normalizeEditStrings(
|
||||
'value = 1;\n',
|
||||
'value = 1; \n',
|
||||
'value = 2; \n',
|
||||
);
|
||||
expect(result).toEqual({
|
||||
oldString: 'value = 1;\n',
|
||||
newString: 'value = 2;\n',
|
||||
});
|
||||
});
|
||||
|
||||
it('treats non-breaking spaces as regular spaces', () => {
|
||||
const result = normalizeEditStrings(
|
||||
'const label = "hello world";\n',
|
||||
'const label = "hello\u00a0world";',
|
||||
'const label = "hi\u00a0world";',
|
||||
);
|
||||
expect(result).toEqual({
|
||||
oldString: 'const label = "hello world";',
|
||||
newString: 'const label = "hi\u00a0world";',
|
||||
});
|
||||
});
|
||||
|
||||
it('drops trailing newline from new content when the file lacks it', () => {
|
||||
const result = normalizeEditStrings(
|
||||
'console.log("hi")',
|
||||
'console.log("hi")\n',
|
||||
'console.log("bye")\n',
|
||||
);
|
||||
expect(result).toEqual({
|
||||
oldString: 'console.log("hi")',
|
||||
newString: 'console.log("bye")',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('countOccurrences', () => {
|
||||
it('returns zero when substring empty or missing', () => {
|
||||
expect(countOccurrences('abc', '')).toBe(0);
|
||||
expect(countOccurrences('abc', 'z')).toBe(0);
|
||||
});
|
||||
|
||||
it('counts non-overlapping occurrences', () => {
|
||||
expect(countOccurrences('aaaa', 'aa')).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('maybeAugmentOldStringForDeletion', () => {
|
||||
const file = 'console.log("hi")\nconsole.log("bye")\n';
|
||||
|
||||
it('appends newline when deleting text followed by newline', () => {
|
||||
expect(
|
||||
maybeAugmentOldStringForDeletion(file, 'console.log("hi")', ''),
|
||||
).toBe('console.log("hi")\n');
|
||||
});
|
||||
|
||||
it('leaves strings untouched when not deleting', () => {
|
||||
expect(
|
||||
maybeAugmentOldStringForDeletion(
|
||||
file,
|
||||
'console.log("hi")',
|
||||
'replacement',
|
||||
),
|
||||
).toBe('console.log("hi")');
|
||||
});
|
||||
|
||||
it('does not append newline when file lacks the variant', () => {
|
||||
expect(
|
||||
maybeAugmentOldStringForDeletion(
|
||||
'console.log("hi")',
|
||||
'console.log("hi")',
|
||||
'',
|
||||
),
|
||||
).toBe('console.log("hi")');
|
||||
});
|
||||
|
||||
it('no-ops when the old string already ends with a newline', () => {
|
||||
expect(
|
||||
maybeAugmentOldStringForDeletion(file, 'console.log("bye")\n', ''),
|
||||
).toBe('console.log("bye")\n');
|
||||
});
|
||||
});
|
||||
499
packages/core/src/utils/editHelper.ts
Normal file
499
packages/core/src/utils/editHelper.ts
Normal file
@@ -0,0 +1,499 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* Helpers for reconciling LLM-proposed edits with on-disk text.
|
||||
*
|
||||
* The normalization pipeline intentionally stays deterministic: we first try
|
||||
* literal substring matches, then gradually relax comparison rules (smart
|
||||
* quotes, em-dashes, trailing whitespace, etc.) until we either locate the
|
||||
* exact slice from the file or conclude the edit cannot be applied.
|
||||
*/
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* Character-level normalization */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
const UNICODE_EQUIVALENT_MAP: Record<string, string> = {
|
||||
// Hyphen variations → ASCII hyphen-minus.
|
||||
'\u2010': '-',
|
||||
'\u2011': '-',
|
||||
'\u2012': '-',
|
||||
'\u2013': '-',
|
||||
'\u2014': '-',
|
||||
'\u2015': '-',
|
||||
'\u2212': '-',
|
||||
// Curly single quotes → straight apostrophe.
|
||||
'\u2018': "'",
|
||||
'\u2019': "'",
|
||||
'\u201A': "'",
|
||||
'\u201B': "'",
|
||||
// Curly double quotes → straight double quote.
|
||||
'\u201C': '"',
|
||||
'\u201D': '"',
|
||||
'\u201E': '"',
|
||||
'\u201F': '"',
|
||||
// Whitespace variants → normal space.
|
||||
'\u00A0': ' ',
|
||||
'\u2002': ' ',
|
||||
'\u2003': ' ',
|
||||
'\u2004': ' ',
|
||||
'\u2005': ' ',
|
||||
'\u2006': ' ',
|
||||
'\u2007': ' ',
|
||||
'\u2008': ' ',
|
||||
'\u2009': ' ',
|
||||
'\u200A': ' ',
|
||||
'\u202F': ' ',
|
||||
'\u205F': ' ',
|
||||
'\u3000': ' ',
|
||||
};
|
||||
|
||||
function normalizeBasicCharacters(text: string): string {
|
||||
if (text === '') {
|
||||
return text;
|
||||
}
|
||||
|
||||
let normalized = '';
|
||||
for (const char of text) {
|
||||
normalized += UNICODE_EQUIVALENT_MAP[char] ?? char;
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes trailing whitespace from each line while keeping the original newline
|
||||
* separators intact.
|
||||
*/
|
||||
function stripTrailingWhitespacePreserveNewlines(text: string): string {
|
||||
const pieces = text.split(/(\r\n|\n|\r)/);
|
||||
let result = '';
|
||||
|
||||
for (let i = 0; i < pieces.length; i++) {
|
||||
const segment = pieces[i];
|
||||
if (segment === undefined) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i % 2 === 0) {
|
||||
result += segment.trimEnd();
|
||||
} else {
|
||||
result += segment;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* Line-based search helpers */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
interface MatchedSliceResult {
|
||||
slice: string;
|
||||
removedTrailingFinalEmptyLine: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Comparison passes become progressively more forgiving, making it possible to
|
||||
* match when only trailing whitespace differs. Leading whitespace (indentation)
|
||||
* is always preserved to avoid matching at incorrect scope levels.
|
||||
*/
|
||||
const LINE_COMPARISON_PASSES: Array<(value: string) => string> = [
|
||||
(value) => value,
|
||||
(value) => value.trimEnd(),
|
||||
];
|
||||
|
||||
function normalizeLineForComparison(value: string): string {
|
||||
return normalizeBasicCharacters(value).trimEnd();
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the first index where {@link pattern} appears within {@link lines} once
|
||||
* both sequences are transformed in the same way.
|
||||
*/
|
||||
function seekSequenceWithTransform(
|
||||
lines: string[],
|
||||
pattern: string[],
|
||||
transform: (value: string) => string,
|
||||
): number | null {
|
||||
if (pattern.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pattern.length > lines.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
outer: for (let i = 0; i <= lines.length - pattern.length; i++) {
|
||||
for (let p = 0; p < pattern.length; p++) {
|
||||
if (transform(lines[i + p]) !== transform(pattern[p])) {
|
||||
continue outer;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function buildLineIndex(text: string): {
|
||||
lines: string[];
|
||||
offsets: number[];
|
||||
} {
|
||||
const lines = text.split('\n');
|
||||
const offsets = new Array<number>(lines.length + 1);
|
||||
let cursor = 0;
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
offsets[i] = cursor;
|
||||
cursor += lines[i].length;
|
||||
if (i < lines.length - 1) {
|
||||
cursor += 1; // Account for the newline that split() removed.
|
||||
}
|
||||
}
|
||||
offsets[lines.length] = text.length;
|
||||
|
||||
return { lines, offsets };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconstructs the original characters for the matched lines, optionally
|
||||
* preserving the newline that follows the final line.
|
||||
*/
|
||||
function sliceFromLines(
|
||||
text: string,
|
||||
offsets: number[],
|
||||
lines: string[],
|
||||
startLine: number,
|
||||
lineCount: number,
|
||||
includeTrailingNewline: boolean,
|
||||
): string {
|
||||
if (lineCount === 0) {
|
||||
return includeTrailingNewline ? '\n' : '';
|
||||
}
|
||||
|
||||
const startIndex = offsets[startLine] ?? 0;
|
||||
const lastLineIndex = startLine + lineCount - 1;
|
||||
const lastLineStart = offsets[lastLineIndex] ?? 0;
|
||||
let endIndex = lastLineStart + (lines[lastLineIndex]?.length ?? 0);
|
||||
|
||||
if (includeTrailingNewline) {
|
||||
const nextLineStart = offsets[startLine + lineCount];
|
||||
if (nextLineStart !== undefined) {
|
||||
endIndex = nextLineStart;
|
||||
} else if (text.endsWith('\n')) {
|
||||
endIndex = text.length;
|
||||
}
|
||||
}
|
||||
|
||||
return text.slice(startIndex, endIndex);
|
||||
}
|
||||
|
||||
function findLineBasedMatch(
|
||||
haystack: string,
|
||||
needle: string,
|
||||
): MatchedSliceResult | null {
|
||||
const { lines, offsets } = buildLineIndex(haystack);
|
||||
const patternLines = needle.split('\n');
|
||||
const endsWithNewline = needle.endsWith('\n');
|
||||
|
||||
if (patternLines.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const attemptMatch = (candidate: string[]): number | null => {
|
||||
for (const pass of LINE_COMPARISON_PASSES) {
|
||||
const idx = seekSequenceWithTransform(lines, candidate, pass);
|
||||
if (idx !== null) {
|
||||
return idx;
|
||||
}
|
||||
}
|
||||
return seekSequenceWithTransform(
|
||||
lines,
|
||||
candidate,
|
||||
normalizeLineForComparison,
|
||||
);
|
||||
};
|
||||
|
||||
let matchIndex = attemptMatch(patternLines);
|
||||
if (matchIndex !== null) {
|
||||
return {
|
||||
slice: sliceFromLines(
|
||||
haystack,
|
||||
offsets,
|
||||
lines,
|
||||
matchIndex,
|
||||
patternLines.length,
|
||||
endsWithNewline,
|
||||
),
|
||||
removedTrailingFinalEmptyLine: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (patternLines.at(-1) === '') {
|
||||
const trimmedPattern = patternLines.slice(0, -1);
|
||||
if (trimmedPattern.length === 0) {
|
||||
return null;
|
||||
}
|
||||
matchIndex = attemptMatch(trimmedPattern);
|
||||
if (matchIndex !== null) {
|
||||
return {
|
||||
slice: sliceFromLines(
|
||||
haystack,
|
||||
offsets,
|
||||
lines,
|
||||
matchIndex,
|
||||
trimmedPattern.length,
|
||||
false,
|
||||
),
|
||||
removedTrailingFinalEmptyLine: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* Slice discovery */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
function findMatchedSlice(
|
||||
haystack: string,
|
||||
needle: string,
|
||||
): MatchedSliceResult | null {
|
||||
if (needle === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
const literalIndex = haystack.indexOf(needle);
|
||||
if (literalIndex !== -1) {
|
||||
return {
|
||||
slice: haystack.slice(literalIndex, literalIndex + needle.length),
|
||||
removedTrailingFinalEmptyLine: false,
|
||||
};
|
||||
}
|
||||
|
||||
const normalizedHaystack = normalizeBasicCharacters(haystack);
|
||||
const normalizedNeedleChars = normalizeBasicCharacters(needle);
|
||||
const normalizedIndex = normalizedHaystack.indexOf(normalizedNeedleChars);
|
||||
if (normalizedIndex !== -1) {
|
||||
return {
|
||||
slice: haystack.slice(normalizedIndex, normalizedIndex + needle.length),
|
||||
removedTrailingFinalEmptyLine: false,
|
||||
};
|
||||
}
|
||||
|
||||
return findLineBasedMatch(haystack, needle);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the literal slice from {@link haystack} that best corresponds to the
|
||||
* provided {@link needle}, or {@code null} when no match is found.
|
||||
*/
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* Replacement helpers */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
function removeTrailingNewline(text: string): string {
|
||||
if (text.endsWith('\r\n')) {
|
||||
return text.slice(0, -2);
|
||||
}
|
||||
if (text.endsWith('\n') || text.endsWith('\r')) {
|
||||
return text.slice(0, -1);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function adjustNewStringForTrailingLine(
|
||||
newString: string,
|
||||
removedTrailingLine: boolean,
|
||||
): string {
|
||||
return removedTrailingLine ? removeTrailingNewline(newString) : newString;
|
||||
}
|
||||
|
||||
export interface NormalizedEditStrings {
|
||||
oldString: string;
|
||||
newString: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the core normalization pipeline:
|
||||
* 1. Strip trailing whitespace copied from numbered output.
|
||||
* 2. Attempt to find the literal text inside {@link fileContent}.
|
||||
* 3. If found through a relaxed match (smart quotes, line trims, etc.),
|
||||
* return the canonical slice from disk so later replacements operate on
|
||||
* exact bytes.
|
||||
*/
|
||||
export function normalizeEditStrings(
|
||||
fileContent: string | null,
|
||||
oldString: string,
|
||||
newString: string,
|
||||
): NormalizedEditStrings {
|
||||
const trimmedNewString = stripTrailingWhitespacePreserveNewlines(newString);
|
||||
|
||||
if (fileContent === null || oldString === '') {
|
||||
return {
|
||||
oldString,
|
||||
newString: trimmedNewString,
|
||||
};
|
||||
}
|
||||
|
||||
const canonicalOriginal = findMatchedSlice(fileContent, oldString);
|
||||
if (canonicalOriginal !== null) {
|
||||
return {
|
||||
oldString: canonicalOriginal.slice,
|
||||
newString: adjustNewStringForTrailingLine(
|
||||
trimmedNewString,
|
||||
canonicalOriginal.removedTrailingFinalEmptyLine,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
oldString,
|
||||
newString: trimmedNewString,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* When deleting text and the on-disk content contains the same substring with a
|
||||
* trailing newline, automatically consume that newline so the removal does not
|
||||
* leave a blank line behind.
|
||||
*/
|
||||
export function maybeAugmentOldStringForDeletion(
|
||||
fileContent: string | null,
|
||||
oldString: string,
|
||||
newString: string,
|
||||
): string {
|
||||
if (
|
||||
fileContent === null ||
|
||||
oldString === '' ||
|
||||
newString !== '' ||
|
||||
oldString.endsWith('\n')
|
||||
) {
|
||||
return oldString;
|
||||
}
|
||||
|
||||
const candidate = `${oldString}\n`;
|
||||
return fileContent.includes(candidate) ? candidate : oldString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts the number of non-overlapping occurrences of {@link substr} inside
|
||||
* {@link source}. Returns 0 when the substring is empty.
|
||||
*/
|
||||
export function countOccurrences(source: string, substr: string): number {
|
||||
if (substr === '') {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let count = 0;
|
||||
let index = source.indexOf(substr);
|
||||
while (index !== -1) {
|
||||
count++;
|
||||
index = source.indexOf(substr, index + substr.length);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result from extracting a snippet showing the edited region.
|
||||
*/
|
||||
export interface EditSnippetResult {
|
||||
/** Starting line number (1-indexed) of the snippet */
|
||||
startLine: number;
|
||||
/** Ending line number (1-indexed) of the snippet */
|
||||
endLine: number;
|
||||
/** Total number of lines in the new content */
|
||||
totalLines: number;
|
||||
/** The snippet content (subset of lines from newContent) */
|
||||
content: string;
|
||||
}
|
||||
|
||||
const SNIPPET_CONTEXT_LINES = 4;
|
||||
const SNIPPET_MAX_LINES = 1000;
|
||||
|
||||
/**
|
||||
* Extracts a snippet from the edited file showing the changed region with
|
||||
* surrounding context. This compares the old and new content line-by-line
|
||||
* from both ends to locate the changed region.
|
||||
*
|
||||
* @param oldContent The original file content before the edit (null for new files)
|
||||
* @param newContent The new file content after the edit
|
||||
* @param contextLines Number of context lines to show before and after the change
|
||||
* @returns Snippet information, or null if no meaningful snippet can be extracted
|
||||
*/
|
||||
export function extractEditSnippet(
|
||||
oldContent: string | null,
|
||||
newContent: string,
|
||||
): EditSnippetResult | null {
|
||||
const newLines = newContent.split('\n');
|
||||
const totalLines = newLines.length;
|
||||
|
||||
if (oldContent === null) {
|
||||
return {
|
||||
startLine: 1,
|
||||
endLine: totalLines,
|
||||
totalLines,
|
||||
content: newContent,
|
||||
};
|
||||
}
|
||||
|
||||
// No changes case
|
||||
if (oldContent === newContent || !newContent) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const oldLines = oldContent.split('\n');
|
||||
|
||||
// Find the first line that differs from the start
|
||||
let firstDiffLine = 0;
|
||||
const minLength = Math.min(oldLines.length, newLines.length);
|
||||
|
||||
while (firstDiffLine < minLength) {
|
||||
if (oldLines[firstDiffLine] !== newLines[firstDiffLine]) {
|
||||
break;
|
||||
}
|
||||
firstDiffLine++;
|
||||
}
|
||||
|
||||
// Find the first line that differs from the end
|
||||
let oldEndIndex = oldLines.length - 1;
|
||||
let newEndIndex = newLines.length - 1;
|
||||
|
||||
while (oldEndIndex >= firstDiffLine && newEndIndex >= firstDiffLine) {
|
||||
if (oldLines[oldEndIndex] !== newLines[newEndIndex]) {
|
||||
break;
|
||||
}
|
||||
oldEndIndex--;
|
||||
newEndIndex--;
|
||||
}
|
||||
|
||||
// The changed region in the new content is from firstDiffLine to newEndIndex (inclusive)
|
||||
// Convert to 1-indexed line numbers
|
||||
const changeStart = firstDiffLine + 1;
|
||||
const changeEnd = newEndIndex + 1;
|
||||
|
||||
// If the change region is too large, don't generate a snippet
|
||||
if (changeEnd - changeStart > SNIPPET_MAX_LINES) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Calculate snippet bounds with context
|
||||
const snippetStart = Math.max(1, changeStart - SNIPPET_CONTEXT_LINES);
|
||||
const snippetEnd = Math.min(totalLines, changeEnd + SNIPPET_CONTEXT_LINES);
|
||||
|
||||
const snippetLines = newLines.slice(snippetStart - 1, snippetEnd);
|
||||
|
||||
return {
|
||||
startLine: snippetStart,
|
||||
endLine: snippetEnd,
|
||||
totalLines,
|
||||
content: snippetLines.join('\n'),
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user