From 0752a31e1e308ab993d70f7dd2dae406450e49b1 Mon Sep 17 00:00:00 2001 From: tanzhenxin Date: Thu, 13 Nov 2025 19:01:09 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=AF=20PR:=20Improve=20Edit=20Tool=20Re?= =?UTF-8?q?liability=20with=20Fuzzy=20Matching=20Pipeline=20(#1025)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/tools/file-system.md | 12 +- packages/core/src/tools/edit.test.ts | 76 +--- packages/core/src/tools/edit.ts | 87 ++-- packages/core/src/utils/editHelper.test.ts | 153 +++++++ packages/core/src/utils/editHelper.ts | 499 +++++++++++++++++++++ 5 files changed, 718 insertions(+), 109 deletions(-) create mode 100644 packages/core/src/utils/editHelper.test.ts create mode 100644 packages/core/src/utils/editHelper.ts diff --git a/docs/tools/file-system.md b/docs/tools/file-system.md index 93b6ac85..3c5097df 100644 --- a/docs/tools/file-system.md +++ b/docs/tools/file-system.md @@ -132,7 +132,7 @@ grep_search(pattern="function", glob="*.js", limit=10) ## 6. `edit` (Edit) -`edit` replaces text within a file. By default, replaces a single occurrence, but can replace multiple occurrences when `expected_replacements` is specified. This tool is designed for precise, targeted changes and requires significant context around the `old_string` to ensure it modifies the correct location. +`edit` replaces text within a file. By default it requires `old_string` to match a single unique location; set `replace_all` to `true` when you intentionally want to change every occurrence. This tool is designed for precise, targeted changes and requires significant context around the `old_string` to ensure it modifies the correct location. - **Tool name:** `edit` - **Display name:** Edit @@ -144,12 +144,12 @@ grep_search(pattern="function", glob="*.js", limit=10) **CRITICAL:** This string must uniquely identify the single instance to change. It should include at least 3 lines of context _before_ and _after_ the target text, matching whitespace and indentation precisely. If `old_string` is empty, the tool attempts to create a new file at `file_path` with `new_string` as content. - `new_string` (string, required): The exact literal text to replace `old_string` with. - - `expected_replacements` (number, optional): The number of occurrences to replace. Defaults to `1`. + - `replace_all` (boolean, optional): Replace all occurrences of `old_string`. Defaults to `false`. - **Behavior:** - If `old_string` is empty and `file_path` does not exist, creates a new file with `new_string` as content. - - If `old_string` is provided, it reads the `file_path` and attempts to find exactly one occurrence of `old_string`. - - If one occurrence is found, it replaces it with `new_string`. + - If `old_string` is provided, it reads the `file_path` and attempts to find exactly one occurrence unless `replace_all` is true. + - If the match is unique (or `replace_all` is true), it replaces the text with `new_string`. - **Enhanced Reliability (Multi-Stage Edit Correction):** To significantly improve the success rate of edits, especially when the model-provided `old_string` might not be perfectly precise, the tool incorporates a multi-stage edit correction mechanism. - If the initial `old_string` isn't found or matches multiple locations, the tool can leverage the Qwen model to iteratively refine `old_string` (and potentially `new_string`). - This self-correction process attempts to identify the unique segment the model intended to modify, making the `edit` operation more robust even with slightly imperfect initial context. @@ -158,10 +158,10 @@ grep_search(pattern="function", glob="*.js", limit=10) - `old_string` is not empty, but the `file_path` does not exist. - `old_string` is empty, but the `file_path` already exists. - `old_string` is not found in the file after attempts to correct it. - - `old_string` is found multiple times, and the self-correction mechanism cannot resolve it to a single, unambiguous match. + - `old_string` is found multiple times, `replace_all` is false, and the self-correction mechanism cannot resolve it to a single, unambiguous match. - **Output (`llmContent`):** - On success: `Successfully modified file: /path/to/file.txt (1 replacements).` or `Created new file: /path/to/new_file.txt with provided content.` - - On failure: An error message explaining the reason (e.g., `Failed to edit, 0 occurrences found...`, `Failed to edit, expected 1 occurrences but found 2...`). + - On failure: An error message explaining the reason (e.g., `Failed to edit, 0 occurrences found...`, `Failed to edit because the text matches multiple locations...`). - **Confirmation:** Yes. Shows a diff of the proposed changes and asks for user approval before writing to the file. These file system tools provide a foundation for Qwen Code to understand and interact with your local project context. diff --git a/packages/core/src/tools/edit.test.ts b/packages/core/src/tools/edit.test.ts index b695087a..9e41b938 100644 --- a/packages/core/src/tools/edit.test.ts +++ b/packages/core/src/tools/edit.test.ts @@ -425,7 +425,9 @@ describe('EditTool', () => { const invocation = tool.build(params); const result = await invocation.execute(new AbortController().signal); - expect(result.llmContent).toMatch(/Successfully modified file/); + expect(result.llmContent).toMatch( + /Showing lines \d+-\d+ of \d+ from the edited file:/, + ); expect(fs.readFileSync(filePath, 'utf8')).toBe(newContent); const display = result.returnDisplay as FileDiff; expect(display.fileDiff).toMatch(initialContent); @@ -450,6 +452,9 @@ describe('EditTool', () => { const result = await invocation.execute(new AbortController().signal); expect(result.llmContent).toMatch(/Created new file/); + expect(result.llmContent).toMatch( + /Showing lines \d+-\d+ of \d+ from the edited file:/, + ); expect(fs.existsSync(newFilePath)).toBe(true); expect(fs.readFileSync(newFilePath, 'utf8')).toBe(fileContent); @@ -485,7 +490,7 @@ describe('EditTool', () => { ); }); - it('should return error if multiple occurrences of old_string are found', async () => { + it('should return error if multiple occurrences of old_string are found and replace_all is false', async () => { fs.writeFileSync(filePath, 'multiple old old strings', 'utf8'); const params: EditToolParams = { file_path: filePath, @@ -494,27 +499,27 @@ describe('EditTool', () => { }; const invocation = tool.build(params); const result = await invocation.execute(new AbortController().signal); - expect(result.llmContent).toMatch( - /Expected 1 occurrence but found 2 for old_string in file/, - ); + expect(result.llmContent).toMatch(/replace_all was not enabled/); expect(result.returnDisplay).toMatch( - /Failed to edit, expected 1 occurrence but found 2/, + /Failed to edit because the text matches multiple locations/, ); }); - it('should successfully replace multiple occurrences when expected_replacements specified', async () => { + it('should successfully replace multiple occurrences when replace_all is true', async () => { fs.writeFileSync(filePath, 'old text\nold text\nold text', 'utf8'); const params: EditToolParams = { file_path: filePath, old_string: 'old', new_string: 'new', - expected_replacements: 3, + replace_all: true, }; const invocation = tool.build(params); const result = await invocation.execute(new AbortController().signal); - expect(result.llmContent).toMatch(/Successfully modified file/); + expect(result.llmContent).toMatch( + /Showing lines \d+-\d+ of \d+ from the edited file/, + ); expect(fs.readFileSync(filePath, 'utf8')).toBe( 'new text\nnew text\nnew text', ); @@ -535,24 +540,6 @@ describe('EditTool', () => { }); }); - it('should return error if expected_replacements does not match actual occurrences', async () => { - fs.writeFileSync(filePath, 'old text old text', 'utf8'); - const params: EditToolParams = { - file_path: filePath, - old_string: 'old', - new_string: 'new', - expected_replacements: 3, // Expecting 3 but only 2 exist - }; - const invocation = tool.build(params); - const result = await invocation.execute(new AbortController().signal); - expect(result.llmContent).toMatch( - /Expected 3 occurrences but found 2 for old_string in file/, - ); - expect(result.returnDisplay).toMatch( - /Failed to edit, expected 3 occurrences but found 2/, - ); - }); - it('should return error if trying to create a file that already exists (empty old_string)', async () => { fs.writeFileSync(filePath, 'Existing content', 'utf8'); const params: EditToolParams = { @@ -568,38 +555,6 @@ describe('EditTool', () => { ); }); - it('should include modification message when proposed content is modified', async () => { - const initialContent = 'Line 1\nold line\nLine 3\nLine 4\nLine 5\n'; - fs.writeFileSync(filePath, initialContent, 'utf8'); - const params: EditToolParams = { - file_path: filePath, - old_string: 'old', - new_string: 'new', - modified_by_user: true, - ai_proposed_content: 'Line 1\nAI line\nLine 3\nLine 4\nLine 5\n', - }; - - (mockConfig.getApprovalMode as Mock).mockReturnValueOnce( - ApprovalMode.AUTO_EDIT, - ); - const invocation = tool.build(params); - const result = await invocation.execute(new AbortController().signal); - - expect(result.llmContent).toMatch( - /User modified the `new_string` content/, - ); - expect((result.returnDisplay as FileDiff).diffStat).toStrictEqual({ - model_added_lines: 1, - model_removed_lines: 1, - model_added_chars: 7, - model_removed_chars: 8, - user_added_lines: 1, - user_removed_lines: 1, - user_added_chars: 8, - user_removed_chars: 7, - }); - }); - it('should not include modification message when proposed content is not modified', async () => { const initialContent = 'This is some old text.'; fs.writeFileSync(filePath, initialContent, 'utf8'); @@ -723,13 +678,12 @@ describe('EditTool', () => { expect(result.error?.type).toBe(ToolErrorType.EDIT_NO_OCCURRENCE_FOUND); }); - it('should return EXPECTED_OCCURRENCE_MISMATCH error', async () => { + it('should return EXPECTED_OCCURRENCE_MISMATCH error when replace_all is false and text is not unique', async () => { fs.writeFileSync(filePath, 'one one two', 'utf8'); const params: EditToolParams = { file_path: filePath, old_string: 'one', new_string: 'new', - expected_replacements: 3, }; const invocation = tool.build(params); const result = await invocation.execute(new AbortController().signal); diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts index fa94fda5..ec257290 100644 --- a/packages/core/src/tools/edit.ts +++ b/packages/core/src/tools/edit.ts @@ -34,6 +34,12 @@ import type { } from './modifiable-tool.js'; import { IdeClient } from '../ide/ide-client.js'; import { safeLiteralReplace } from '../utils/textUtils.js'; +import { + countOccurrences, + extractEditSnippet, + maybeAugmentOldStringForDeletion, + normalizeEditStrings, +} from '../utils/editHelper.js'; export function applyReplacement( currentContent: string | null, @@ -77,10 +83,9 @@ export interface EditToolParams { new_string: string; /** - * Number of replacements expected. Defaults to 1 if not specified. - * Use when you want to replace multiple occurrences. + * Replace every occurrence of old_string instead of requiring a unique match. */ - expected_replacements?: number; + replace_all?: boolean; /** * Whether the edit was modified manually by the user. @@ -118,12 +123,12 @@ class EditToolInvocation implements ToolInvocation { * @throws File system errors if reading the file fails unexpectedly (e.g., permissions) */ private async calculateEdit(params: EditToolParams): Promise { - const expectedReplacements = params.expected_replacements ?? 1; + const replaceAll = params.replace_all ?? false; let currentContent: string | null = null; let fileExists = false; let isNewFile = false; - const finalNewString = params.new_string; - const finalOldString = params.old_string; + let finalNewString = params.new_string; + let finalOldString = params.old_string; let occurrences = 0; let error: | { display: string; raw: string; type: ToolErrorType } @@ -144,7 +149,15 @@ class EditToolInvocation implements ToolInvocation { fileExists = false; } - if (params.old_string === '' && !fileExists) { + const normalizedStrings = normalizeEditStrings( + currentContent, + finalOldString, + finalNewString, + ); + finalOldString = normalizedStrings.oldString; + finalNewString = normalizedStrings.newString; + + if (finalOldString === '' && !fileExists) { // Creating a new file isNewFile = true; } else if (!fileExists) { @@ -155,7 +168,13 @@ class EditToolInvocation implements ToolInvocation { type: ToolErrorType.FILE_NOT_FOUND, }; } else if (currentContent !== null) { - occurrences = this.countOccurrences(currentContent, params.old_string); + finalOldString = maybeAugmentOldStringForDeletion( + currentContent, + finalOldString, + finalNewString, + ); + + occurrences = countOccurrences(currentContent, finalOldString); if (params.old_string === '') { // Error: Trying to create a file that already exists error = { @@ -169,13 +188,10 @@ class EditToolInvocation implements ToolInvocation { raw: `Failed to edit, 0 occurrences found for old_string in ${params.file_path}. No edits made. The exact text in old_string was not found. Ensure you're not escaping content incorrectly and check whitespace, indentation, and context. Use ${ReadFileTool.Name} tool to verify.`, type: ToolErrorType.EDIT_NO_OCCURRENCE_FOUND, }; - } else if (occurrences !== expectedReplacements) { - const occurrenceTerm = - expectedReplacements === 1 ? 'occurrence' : 'occurrences'; - + } else if (!replaceAll && occurrences > 1) { error = { - display: `Failed to edit, expected ${expectedReplacements} ${occurrenceTerm} but found ${occurrences}.`, - raw: `Failed to edit, Expected ${expectedReplacements} ${occurrenceTerm} but found ${occurrences} for old_string in file: ${params.file_path}`, + display: `Failed to edit because the text matches multiple locations. Provide more context or set replace_all to true.`, + raw: `Failed to edit. Found ${occurrences} occurrences for old_string in ${params.file_path} but replace_all was not enabled.`, type: ToolErrorType.EDIT_EXPECTED_OCCURRENCE_MISMATCH, }; } else if (finalOldString === finalNewString) { @@ -221,22 +237,6 @@ class EditToolInvocation implements ToolInvocation { }; } - /** - * Counts occurrences of a substring in a string - */ - private countOccurrences(str: string, substr: string): number { - if (substr === '') { - return 0; - } - let count = 0; - let pos = str.indexOf(substr); - while (pos !== -1) { - count++; - pos = str.indexOf(substr, pos + substr.length); // Start search after the current match - } - return count; - } - /** * Handles the confirmation prompt for the Edit tool in the CLI. * It needs to calculate the diff to show the user. @@ -422,12 +422,16 @@ class EditToolInvocation implements ToolInvocation { const llmSuccessMessageParts = [ editData.isNewFile ? `Created new file: ${this.params.file_path} with provided content.` - : `Successfully modified file: ${this.params.file_path} (${editData.occurrences} replacements).`, + : `The file: ${this.params.file_path} has been updated.`, ]; - if (this.params.modified_by_user) { - llmSuccessMessageParts.push( - `User modified the \`new_string\` content to be: ${this.params.new_string}.`, - ); + + const snippetResult = extractEditSnippet( + editData.currentContent, + editData.newContent, + ); + if (snippetResult) { + const snippetText = `Showing lines ${snippetResult.startLine}-${snippetResult.endLine} of ${snippetResult.totalLines} from the edited file:\n\n---\n\n${snippetResult.content}`; + llmSuccessMessageParts.push(snippetText); } return { @@ -470,7 +474,7 @@ export class EditTool super( EditTool.Name, ToolDisplayNames.EDIT, - `Replaces text within a file. By default, replaces a single occurrence, but can replace multiple occurrences when \`expected_replacements\` is specified. This tool requires providing significant context around the change to ensure precise targeting. Always use the ${ReadFileTool.Name} tool to examine the file's current content before attempting a text replacement. + `Replaces text within a file. By default, replaces a single occurrence. Set \`replace_all\` to true when you intend to modify every instance of \`old_string\`. This tool requires providing significant context around the change to ensure precise targeting. Always use the ${ReadFileTool.Name} tool to examine the file's current content before attempting a text replacement. The user has the ability to modify the \`new_string\` content. If modified, this will be stated in the response. @@ -480,7 +484,7 @@ Expectation for required parameters: 3. \`new_string\` MUST be the exact literal text to replace \`old_string\` with (also including all whitespace, indentation, newlines, and surrounding code etc.). Ensure the resulting code is correct and idiomatic. 4. NEVER escape \`old_string\` or \`new_string\`, that would break the exact literal text requirement. **Important:** If ANY of the above are not satisfied, the tool will fail. CRITICAL for \`old_string\`: Must uniquely identify the single instance to change. Include at least 3 lines of context BEFORE and AFTER the target text, matching whitespace and indentation precisely. If this string matches multiple locations, or does not match exactly, the tool will fail. -**Multiple replacements:** Set \`expected_replacements\` to the number of occurrences you want to replace. The tool will replace ALL occurrences that match \`old_string\` exactly. Ensure the number of replacements matches your expectation.`, +**Multiple replacements:** Set \`replace_all\` to true when you want to replace every occurrence that matches \`old_string\`.`, Kind.Edit, { properties: { @@ -491,7 +495,7 @@ Expectation for required parameters: }, old_string: { description: - 'The exact literal text to replace, preferably unescaped. For single replacements (default), include at least 3 lines of context BEFORE and AFTER the target text, matching whitespace and indentation precisely. For multiple replacements, specify expected_replacements parameter. If this string is not the exact literal text (i.e. you escaped it) or does not match exactly, the tool will fail.', + 'The exact literal text to replace, preferably unescaped. For single replacements (default), include at least 3 lines of context BEFORE and AFTER the target text, matching whitespace and indentation precisely. If this string is not the exact literal text (i.e. you escaped it) or does not match exactly, the tool will fail.', type: 'string', }, new_string: { @@ -499,11 +503,10 @@ Expectation for required parameters: 'The exact literal text to replace `old_string` with, preferably unescaped. Provide the EXACT text. Ensure the resulting code is correct and idiomatic.', type: 'string', }, - expected_replacements: { - type: 'number', + replace_all: { + type: 'boolean', description: - 'Number of replacements expected. Defaults to 1 if not specified. Use when you want to replace multiple occurrences.', - minimum: 1, + 'Replace all occurrences of old_string (default false).', }, }, required: ['file_path', 'old_string', 'new_string'], diff --git a/packages/core/src/utils/editHelper.test.ts b/packages/core/src/utils/editHelper.test.ts new file mode 100644 index 00000000..79fe78f6 --- /dev/null +++ b/packages/core/src/utils/editHelper.test.ts @@ -0,0 +1,153 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from 'vitest'; +import { + countOccurrences, + maybeAugmentOldStringForDeletion, + normalizeEditStrings, +} from './editHelper.js'; + +describe('normalizeEditStrings', () => { + const file = `const one = 1; +const two = 2; +`; + + it('returns literal matches unchanged and trims new_string trailing whitespace', () => { + const result = normalizeEditStrings( + file, + 'const two = 2;', + ' const two = 42; ', + ); + expect(result).toEqual({ + oldString: 'const two = 2;', + newString: ' const two = 42;', + }); + }); + + it('normalizes smart quotes to match on-disk text', () => { + const result = normalizeEditStrings( + "const greeting = 'Don't';\n", + 'const greeting = ‘Don’t’;', + 'const greeting = “Hello”; ', + ); + expect(result).toEqual({ + oldString: "const greeting = 'Don't';", + newString: 'const greeting = “Hello”;', + }); + }); + + it('falls back to original strings when no match is found', () => { + const result = normalizeEditStrings(file, 'missing text', 'replacement'); + expect(result).toEqual({ + oldString: 'missing text', + newString: 'replacement', + }); + }); + + it('still trims new_string when editing a brand-new file', () => { + const result = normalizeEditStrings(null, '', 'new file contents '); + expect(result).toEqual({ + oldString: '', + newString: 'new file contents', + }); + }); + + it('matches unicode dash variants', () => { + const result = normalizeEditStrings( + 'const range = "1-2";\n', + 'const range = "1\u20132";', + 'const range = "3\u20135"; ', + ); + expect(result).toEqual({ + oldString: 'const range = "1-2";', + newString: 'const range = "3\u20135";', + }); + }); + + it('matches when trailing whitespace differs only at line ends', () => { + const result = normalizeEditStrings( + 'value = 1;\n', + 'value = 1; \n', + 'value = 2; \n', + ); + expect(result).toEqual({ + oldString: 'value = 1;\n', + newString: 'value = 2;\n', + }); + }); + + it('treats non-breaking spaces as regular spaces', () => { + const result = normalizeEditStrings( + 'const label = "hello world";\n', + 'const label = "hello\u00a0world";', + 'const label = "hi\u00a0world";', + ); + expect(result).toEqual({ + oldString: 'const label = "hello world";', + newString: 'const label = "hi\u00a0world";', + }); + }); + + it('drops trailing newline from new content when the file lacks it', () => { + const result = normalizeEditStrings( + 'console.log("hi")', + 'console.log("hi")\n', + 'console.log("bye")\n', + ); + expect(result).toEqual({ + oldString: 'console.log("hi")', + newString: 'console.log("bye")', + }); + }); +}); + +describe('countOccurrences', () => { + it('returns zero when substring empty or missing', () => { + expect(countOccurrences('abc', '')).toBe(0); + expect(countOccurrences('abc', 'z')).toBe(0); + }); + + it('counts non-overlapping occurrences', () => { + expect(countOccurrences('aaaa', 'aa')).toBe(2); + }); +}); + +describe('maybeAugmentOldStringForDeletion', () => { + const file = 'console.log("hi")\nconsole.log("bye")\n'; + + it('appends newline when deleting text followed by newline', () => { + expect( + maybeAugmentOldStringForDeletion(file, 'console.log("hi")', ''), + ).toBe('console.log("hi")\n'); + }); + + it('leaves strings untouched when not deleting', () => { + expect( + maybeAugmentOldStringForDeletion( + file, + 'console.log("hi")', + 'replacement', + ), + ).toBe('console.log("hi")'); + }); + + it('does not append newline when file lacks the variant', () => { + expect( + maybeAugmentOldStringForDeletion( + 'console.log("hi")', + 'console.log("hi")', + '', + ), + ).toBe('console.log("hi")'); + }); + + it('no-ops when the old string already ends with a newline', () => { + expect( + maybeAugmentOldStringForDeletion(file, 'console.log("bye")\n', ''), + ).toBe('console.log("bye")\n'); + }); +}); diff --git a/packages/core/src/utils/editHelper.ts b/packages/core/src/utils/editHelper.ts new file mode 100644 index 00000000..6b4a388d --- /dev/null +++ b/packages/core/src/utils/editHelper.ts @@ -0,0 +1,499 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Helpers for reconciling LLM-proposed edits with on-disk text. + * + * The normalization pipeline intentionally stays deterministic: we first try + * literal substring matches, then gradually relax comparison rules (smart + * quotes, em-dashes, trailing whitespace, etc.) until we either locate the + * exact slice from the file or conclude the edit cannot be applied. + */ + +/* -------------------------------------------------------------------------- */ +/* Character-level normalization */ +/* -------------------------------------------------------------------------- */ + +const UNICODE_EQUIVALENT_MAP: Record = { + // Hyphen variations → ASCII hyphen-minus. + '\u2010': '-', + '\u2011': '-', + '\u2012': '-', + '\u2013': '-', + '\u2014': '-', + '\u2015': '-', + '\u2212': '-', + // Curly single quotes → straight apostrophe. + '\u2018': "'", + '\u2019': "'", + '\u201A': "'", + '\u201B': "'", + // Curly double quotes → straight double quote. + '\u201C': '"', + '\u201D': '"', + '\u201E': '"', + '\u201F': '"', + // Whitespace variants → normal space. + '\u00A0': ' ', + '\u2002': ' ', + '\u2003': ' ', + '\u2004': ' ', + '\u2005': ' ', + '\u2006': ' ', + '\u2007': ' ', + '\u2008': ' ', + '\u2009': ' ', + '\u200A': ' ', + '\u202F': ' ', + '\u205F': ' ', + '\u3000': ' ', +}; + +function normalizeBasicCharacters(text: string): string { + if (text === '') { + return text; + } + + let normalized = ''; + for (const char of text) { + normalized += UNICODE_EQUIVALENT_MAP[char] ?? char; + } + return normalized; +} + +/** + * Removes trailing whitespace from each line while keeping the original newline + * separators intact. + */ +function stripTrailingWhitespacePreserveNewlines(text: string): string { + const pieces = text.split(/(\r\n|\n|\r)/); + let result = ''; + + for (let i = 0; i < pieces.length; i++) { + const segment = pieces[i]; + if (segment === undefined) { + continue; + } + + if (i % 2 === 0) { + result += segment.trimEnd(); + } else { + result += segment; + } + } + + return result; +} + +/* -------------------------------------------------------------------------- */ +/* Line-based search helpers */ +/* -------------------------------------------------------------------------- */ + +interface MatchedSliceResult { + slice: string; + removedTrailingFinalEmptyLine: boolean; +} + +/** + * Comparison passes become progressively more forgiving, making it possible to + * match when only trailing whitespace differs. Leading whitespace (indentation) + * is always preserved to avoid matching at incorrect scope levels. + */ +const LINE_COMPARISON_PASSES: Array<(value: string) => string> = [ + (value) => value, + (value) => value.trimEnd(), +]; + +function normalizeLineForComparison(value: string): string { + return normalizeBasicCharacters(value).trimEnd(); +} + +/** + * Finds the first index where {@link pattern} appears within {@link lines} once + * both sequences are transformed in the same way. + */ +function seekSequenceWithTransform( + lines: string[], + pattern: string[], + transform: (value: string) => string, +): number | null { + if (pattern.length === 0) { + return 0; + } + + if (pattern.length > lines.length) { + return null; + } + + outer: for (let i = 0; i <= lines.length - pattern.length; i++) { + for (let p = 0; p < pattern.length; p++) { + if (transform(lines[i + p]) !== transform(pattern[p])) { + continue outer; + } + } + return i; + } + + return null; +} + +function buildLineIndex(text: string): { + lines: string[]; + offsets: number[]; +} { + const lines = text.split('\n'); + const offsets = new Array(lines.length + 1); + let cursor = 0; + + for (let i = 0; i < lines.length; i++) { + offsets[i] = cursor; + cursor += lines[i].length; + if (i < lines.length - 1) { + cursor += 1; // Account for the newline that split() removed. + } + } + offsets[lines.length] = text.length; + + return { lines, offsets }; +} + +/** + * Reconstructs the original characters for the matched lines, optionally + * preserving the newline that follows the final line. + */ +function sliceFromLines( + text: string, + offsets: number[], + lines: string[], + startLine: number, + lineCount: number, + includeTrailingNewline: boolean, +): string { + if (lineCount === 0) { + return includeTrailingNewline ? '\n' : ''; + } + + const startIndex = offsets[startLine] ?? 0; + const lastLineIndex = startLine + lineCount - 1; + const lastLineStart = offsets[lastLineIndex] ?? 0; + let endIndex = lastLineStart + (lines[lastLineIndex]?.length ?? 0); + + if (includeTrailingNewline) { + const nextLineStart = offsets[startLine + lineCount]; + if (nextLineStart !== undefined) { + endIndex = nextLineStart; + } else if (text.endsWith('\n')) { + endIndex = text.length; + } + } + + return text.slice(startIndex, endIndex); +} + +function findLineBasedMatch( + haystack: string, + needle: string, +): MatchedSliceResult | null { + const { lines, offsets } = buildLineIndex(haystack); + const patternLines = needle.split('\n'); + const endsWithNewline = needle.endsWith('\n'); + + if (patternLines.length === 0) { + return null; + } + + const attemptMatch = (candidate: string[]): number | null => { + for (const pass of LINE_COMPARISON_PASSES) { + const idx = seekSequenceWithTransform(lines, candidate, pass); + if (idx !== null) { + return idx; + } + } + return seekSequenceWithTransform( + lines, + candidate, + normalizeLineForComparison, + ); + }; + + let matchIndex = attemptMatch(patternLines); + if (matchIndex !== null) { + return { + slice: sliceFromLines( + haystack, + offsets, + lines, + matchIndex, + patternLines.length, + endsWithNewline, + ), + removedTrailingFinalEmptyLine: false, + }; + } + + if (patternLines.at(-1) === '') { + const trimmedPattern = patternLines.slice(0, -1); + if (trimmedPattern.length === 0) { + return null; + } + matchIndex = attemptMatch(trimmedPattern); + if (matchIndex !== null) { + return { + slice: sliceFromLines( + haystack, + offsets, + lines, + matchIndex, + trimmedPattern.length, + false, + ), + removedTrailingFinalEmptyLine: true, + }; + } + } + + return null; +} + +/* -------------------------------------------------------------------------- */ +/* Slice discovery */ +/* -------------------------------------------------------------------------- */ + +function findMatchedSlice( + haystack: string, + needle: string, +): MatchedSliceResult | null { + if (needle === '') { + return null; + } + + const literalIndex = haystack.indexOf(needle); + if (literalIndex !== -1) { + return { + slice: haystack.slice(literalIndex, literalIndex + needle.length), + removedTrailingFinalEmptyLine: false, + }; + } + + const normalizedHaystack = normalizeBasicCharacters(haystack); + const normalizedNeedleChars = normalizeBasicCharacters(needle); + const normalizedIndex = normalizedHaystack.indexOf(normalizedNeedleChars); + if (normalizedIndex !== -1) { + return { + slice: haystack.slice(normalizedIndex, normalizedIndex + needle.length), + removedTrailingFinalEmptyLine: false, + }; + } + + return findLineBasedMatch(haystack, needle); +} + +/** + * Returns the literal slice from {@link haystack} that best corresponds to the + * provided {@link needle}, or {@code null} when no match is found. + */ +/* -------------------------------------------------------------------------- */ +/* Replacement helpers */ +/* -------------------------------------------------------------------------- */ + +function removeTrailingNewline(text: string): string { + if (text.endsWith('\r\n')) { + return text.slice(0, -2); + } + if (text.endsWith('\n') || text.endsWith('\r')) { + return text.slice(0, -1); + } + return text; +} + +function adjustNewStringForTrailingLine( + newString: string, + removedTrailingLine: boolean, +): string { + return removedTrailingLine ? removeTrailingNewline(newString) : newString; +} + +export interface NormalizedEditStrings { + oldString: string; + newString: string; +} + +/** + * Runs the core normalization pipeline: + * 1. Strip trailing whitespace copied from numbered output. + * 2. Attempt to find the literal text inside {@link fileContent}. + * 3. If found through a relaxed match (smart quotes, line trims, etc.), + * return the canonical slice from disk so later replacements operate on + * exact bytes. + */ +export function normalizeEditStrings( + fileContent: string | null, + oldString: string, + newString: string, +): NormalizedEditStrings { + const trimmedNewString = stripTrailingWhitespacePreserveNewlines(newString); + + if (fileContent === null || oldString === '') { + return { + oldString, + newString: trimmedNewString, + }; + } + + const canonicalOriginal = findMatchedSlice(fileContent, oldString); + if (canonicalOriginal !== null) { + return { + oldString: canonicalOriginal.slice, + newString: adjustNewStringForTrailingLine( + trimmedNewString, + canonicalOriginal.removedTrailingFinalEmptyLine, + ), + }; + } + + return { + oldString, + newString: trimmedNewString, + }; +} + +/** + * When deleting text and the on-disk content contains the same substring with a + * trailing newline, automatically consume that newline so the removal does not + * leave a blank line behind. + */ +export function maybeAugmentOldStringForDeletion( + fileContent: string | null, + oldString: string, + newString: string, +): string { + if ( + fileContent === null || + oldString === '' || + newString !== '' || + oldString.endsWith('\n') + ) { + return oldString; + } + + const candidate = `${oldString}\n`; + return fileContent.includes(candidate) ? candidate : oldString; +} + +/** + * Counts the number of non-overlapping occurrences of {@link substr} inside + * {@link source}. Returns 0 when the substring is empty. + */ +export function countOccurrences(source: string, substr: string): number { + if (substr === '') { + return 0; + } + + let count = 0; + let index = source.indexOf(substr); + while (index !== -1) { + count++; + index = source.indexOf(substr, index + substr.length); + } + return count; +} + +/** + * Result from extracting a snippet showing the edited region. + */ +export interface EditSnippetResult { + /** Starting line number (1-indexed) of the snippet */ + startLine: number; + /** Ending line number (1-indexed) of the snippet */ + endLine: number; + /** Total number of lines in the new content */ + totalLines: number; + /** The snippet content (subset of lines from newContent) */ + content: string; +} + +const SNIPPET_CONTEXT_LINES = 4; +const SNIPPET_MAX_LINES = 1000; + +/** + * Extracts a snippet from the edited file showing the changed region with + * surrounding context. This compares the old and new content line-by-line + * from both ends to locate the changed region. + * + * @param oldContent The original file content before the edit (null for new files) + * @param newContent The new file content after the edit + * @param contextLines Number of context lines to show before and after the change + * @returns Snippet information, or null if no meaningful snippet can be extracted + */ +export function extractEditSnippet( + oldContent: string | null, + newContent: string, +): EditSnippetResult | null { + const newLines = newContent.split('\n'); + const totalLines = newLines.length; + + if (oldContent === null) { + return { + startLine: 1, + endLine: totalLines, + totalLines, + content: newContent, + }; + } + + // No changes case + if (oldContent === newContent || !newContent) { + return null; + } + + const oldLines = oldContent.split('\n'); + + // Find the first line that differs from the start + let firstDiffLine = 0; + const minLength = Math.min(oldLines.length, newLines.length); + + while (firstDiffLine < minLength) { + if (oldLines[firstDiffLine] !== newLines[firstDiffLine]) { + break; + } + firstDiffLine++; + } + + // Find the first line that differs from the end + let oldEndIndex = oldLines.length - 1; + let newEndIndex = newLines.length - 1; + + while (oldEndIndex >= firstDiffLine && newEndIndex >= firstDiffLine) { + if (oldLines[oldEndIndex] !== newLines[newEndIndex]) { + break; + } + oldEndIndex--; + newEndIndex--; + } + + // The changed region in the new content is from firstDiffLine to newEndIndex (inclusive) + // Convert to 1-indexed line numbers + const changeStart = firstDiffLine + 1; + const changeEnd = newEndIndex + 1; + + // If the change region is too large, don't generate a snippet + if (changeEnd - changeStart > SNIPPET_MAX_LINES) { + return null; + } + + // Calculate snippet bounds with context + const snippetStart = Math.max(1, changeStart - SNIPPET_CONTEXT_LINES); + const snippetEnd = Math.min(totalLines, changeEnd + SNIPPET_CONTEXT_LINES); + + const snippetLines = newLines.slice(snippetStart - 1, snippetEnd); + + return { + startLine: snippetStart, + endLine: snippetEnd, + totalLines, + content: snippetLines.join('\n'), + }; +}