qwen-code/packages/core/src/utils/editHelper.ts

/**
 * @license
 * Copyright 2025 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

/**
 * Helpers for reconciling LLM-proposed edits with on-disk text.
 *
 * The normalization pipeline intentionally stays deterministic: we first try
 * literal substring matches, then gradually relax comparison rules (smart
 * quotes, em-dashes, trailing whitespace, etc.) until we either locate the
 * exact slice from the file or conclude the edit cannot be applied.
 */

/* -------------------------------------------------------------------------- */
/* Character-level normalization                                             */
/* -------------------------------------------------------------------------- */

const UNICODE_EQUIVALENT_MAP: Record<string, string> = {
  // Hyphen variations → ASCII hyphen-minus.
  '\u2010': '-',
  '\u2011': '-',
  '\u2012': '-',
  '\u2013': '-',
  '\u2014': '-',
  '\u2015': '-',
  '\u2212': '-',
  // Curly single quotes → straight apostrophe.
  '\u2018': "'",
  '\u2019': "'",
  '\u201A': "'",
  '\u201B': "'",
  // Curly double quotes → straight double quote.
  '\u201C': '"',
  '\u201D': '"',
  '\u201E': '"',
  '\u201F': '"',
  // Whitespace variants → normal space.
  '\u00A0': ' ',
  '\u2002': ' ',
  '\u2003': ' ',
  '\u2004': ' ',
  '\u2005': ' ',
  '\u2006': ' ',
  '\u2007': ' ',
  '\u2008': ' ',
  '\u2009': ' ',
  '\u200A': ' ',
  '\u202F': ' ',
  '\u205F': ' ',
  '\u3000': ' ',
};

function normalizeBasicCharacters(text: string): string {
  if (text === '') {
    return text;
  }

  let normalized = '';
  for (const char of text) {
    normalized += UNICODE_EQUIVALENT_MAP[char] ?? char;
  }
  return normalized;
}

/**
 * Removes trailing whitespace from each line while keeping the original newline
 * separators intact.
 */
function stripTrailingWhitespacePreserveNewlines(text: string): string {
  const pieces = text.split(/(\r\n|\n|\r)/);
  let result = '';

  for (let i = 0; i < pieces.length; i++) {
    const segment = pieces[i];
    if (segment === undefined) {
      continue;
    }

    if (i % 2 === 0) {
      result += segment.trimEnd();
    } else {
      result += segment;
    }
  }

  return result;
}

/* -------------------------------------------------------------------------- */
/* Line-based search helpers                                                 */
/* -------------------------------------------------------------------------- */

interface MatchedSliceResult {
  slice: string;
  removedTrailingFinalEmptyLine: boolean;
}

/**
 * Comparison passes become progressively more forgiving, making it possible to
 * match when only trailing whitespace differs. Leading whitespace (indentation)
 * is always preserved to avoid matching at incorrect scope levels.
 */
const LINE_COMPARISON_PASSES: Array<(value: string) => string> = [
  (value) => value,
  (value) => value.trimEnd(),
];

function normalizeLineForComparison(value: string): string {
  return normalizeBasicCharacters(value).trimEnd();
}

/**
 * Finds the first index where {@link pattern} appears within {@link lines} once
 * both sequences are transformed in the same way.
 */
function seekSequenceWithTransform(
  lines: string[],
  pattern: string[],
  transform: (value: string) => string,
): number | null {
  if (pattern.length === 0) {
    return 0;
  }

  if (pattern.length > lines.length) {
    return null;
  }

  outer: for (let i = 0; i <= lines.length - pattern.length; i++) {
    for (let p = 0; p < pattern.length; p++) {
      if (transform(lines[i + p]) !== transform(pattern[p])) {
        continue outer;
      }
    }
    return i;
  }

  return null;
}

function buildLineIndex(text: string): {
  lines: string[];
  offsets: number[];
} {
  const lines = text.split('\n');
  const offsets = new Array<number>(lines.length + 1);
  let cursor = 0;

  for (let i = 0; i < lines.length; i++) {
    offsets[i] = cursor;
    cursor += lines[i].length;
    if (i < lines.length - 1) {
      cursor += 1; // Account for the newline that split() removed.
    }
  }
  offsets[lines.length] = text.length;

  return { lines, offsets };
}

/**
 * Reconstructs the original characters for the matched lines, optionally
 * preserving the newline that follows the final line.
 */
function sliceFromLines(
  text: string,
  offsets: number[],
  lines: string[],
  startLine: number,
  lineCount: number,
  includeTrailingNewline: boolean,
): string {
  if (lineCount === 0) {
    return includeTrailingNewline ? '\n' : '';
  }

  const startIndex = offsets[startLine] ?? 0;
  const lastLineIndex = startLine + lineCount - 1;
  const lastLineStart = offsets[lastLineIndex] ?? 0;
  let endIndex = lastLineStart + (lines[lastLineIndex]?.length ?? 0);

  if (includeTrailingNewline) {
    const nextLineStart = offsets[startLine + lineCount];
    if (nextLineStart !== undefined) {
      endIndex = nextLineStart;
    } else if (text.endsWith('\n')) {
      endIndex = text.length;
    }
  }

  return text.slice(startIndex, endIndex);
}

function findLineBasedMatch(
  haystack: string,
  needle: string,
): MatchedSliceResult | null {
  const { lines, offsets } = buildLineIndex(haystack);
  const patternLines = needle.split('\n');
  const endsWithNewline = needle.endsWith('\n');

  if (patternLines.length === 0) {
    return null;
  }

  const attemptMatch = (candidate: string[]): number | null => {
    for (const pass of LINE_COMPARISON_PASSES) {
      const idx = seekSequenceWithTransform(lines, candidate, pass);
      if (idx !== null) {
        return idx;
      }
    }
    return seekSequenceWithTransform(
      lines,
      candidate,
      normalizeLineForComparison,
    );
  };

  let matchIndex = attemptMatch(patternLines);
  if (matchIndex !== null) {
    return {
      slice: sliceFromLines(
        haystack,
        offsets,
        lines,
        matchIndex,
        patternLines.length,
        endsWithNewline,
      ),
      removedTrailingFinalEmptyLine: false,
    };
  }

  if (patternLines.at(-1) === '') {
    const trimmedPattern = patternLines.slice(0, -1);
    if (trimmedPattern.length === 0) {
      return null;
    }
    matchIndex = attemptMatch(trimmedPattern);
    if (matchIndex !== null) {
      return {
        slice: sliceFromLines(
          haystack,
          offsets,
          lines,
          matchIndex,
          trimmedPattern.length,
          false,
        ),
        removedTrailingFinalEmptyLine: true,
      };
    }
  }

  return null;
}

/* -------------------------------------------------------------------------- */
/* Slice discovery                                                           */
/* -------------------------------------------------------------------------- */

function findMatchedSlice(
  haystack: string,
  needle: string,
): MatchedSliceResult | null {
  if (needle === '') {
    return null;
  }

  const literalIndex = haystack.indexOf(needle);
  if (literalIndex !== -1) {
    return {
      slice: haystack.slice(literalIndex, literalIndex + needle.length),
      removedTrailingFinalEmptyLine: false,
    };
  }

  const normalizedHaystack = normalizeBasicCharacters(haystack);
  const normalizedNeedleChars = normalizeBasicCharacters(needle);
  const normalizedIndex = normalizedHaystack.indexOf(normalizedNeedleChars);
  if (normalizedIndex !== -1) {
    return {
      slice: haystack.slice(normalizedIndex, normalizedIndex + needle.length),
      removedTrailingFinalEmptyLine: false,
    };
  }

  return findLineBasedMatch(haystack, needle);
}

/**
 * Returns the literal slice from {@link haystack} that best corresponds to the
 * provided {@link needle}, or {@code null} when no match is found.
 */
/* -------------------------------------------------------------------------- */
/* Replacement helpers                                                       */
/* -------------------------------------------------------------------------- */

function removeTrailingNewline(text: string): string {
  if (text.endsWith('\r\n')) {
    return text.slice(0, -2);
  }
  if (text.endsWith('\n') || text.endsWith('\r')) {
    return text.slice(0, -1);
  }
  return text;
}

function adjustNewStringForTrailingLine(
  newString: string,
  removedTrailingLine: boolean,
): string {
  return removedTrailingLine ? removeTrailingNewline(newString) : newString;
}

export interface NormalizedEditStrings {
  oldString: string;
  newString: string;
}

/**
 * Runs the core normalization pipeline:
 *   1. Strip trailing whitespace copied from numbered output.
 *   2. Attempt to find the literal text inside {@link fileContent}.
 *   3. If found through a relaxed match (smart quotes, line trims, etc.),
 *      return the canonical slice from disk so later replacements operate on
 *      exact bytes.
 */
export function normalizeEditStrings(
  fileContent: string | null,
  oldString: string,
  newString: string,
): NormalizedEditStrings {
  const trimmedNewString = stripTrailingWhitespacePreserveNewlines(newString);

  if (fileContent === null || oldString === '') {
    return {
      oldString,
      newString: trimmedNewString,
    };
  }

  const canonicalOriginal = findMatchedSlice(fileContent, oldString);
  if (canonicalOriginal !== null) {
    return {
      oldString: canonicalOriginal.slice,
      newString: adjustNewStringForTrailingLine(
        trimmedNewString,
        canonicalOriginal.removedTrailingFinalEmptyLine,
      ),
    };
  }

  return {
    oldString,
    newString: trimmedNewString,
  };
}

/**
 * When deleting text and the on-disk content contains the same substring with a
 * trailing newline, automatically consume that newline so the removal does not
 * leave a blank line behind.
 */
export function maybeAugmentOldStringForDeletion(
  fileContent: string | null,
  oldString: string,
  newString: string,
): string {
  if (
    fileContent === null ||
    oldString === '' ||
    newString !== '' ||
    oldString.endsWith('\n')
  ) {
    return oldString;
  }

  const candidate = `${oldString}\n`;
  return fileContent.includes(candidate) ? candidate : oldString;
}

/**
 * Counts the number of non-overlapping occurrences of {@link substr} inside
 * {@link source}. Returns 0 when the substring is empty.
 */
export function countOccurrences(source: string, substr: string): number {
  if (substr === '') {
    return 0;
  }

  let count = 0;
  let index = source.indexOf(substr);
  while (index !== -1) {
    count++;
    index = source.indexOf(substr, index + substr.length);
  }
  return count;
}

/**
 * Result from extracting a snippet showing the edited region.
 */
export interface EditSnippetResult {
  /** Starting line number (1-indexed) of the snippet */
  startLine: number;
  /** Ending line number (1-indexed) of the snippet */
  endLine: number;
  /** Total number of lines in the new content */
  totalLines: number;
  /** The snippet content (subset of lines from newContent) */
  content: string;
}

const SNIPPET_CONTEXT_LINES = 4;
const SNIPPET_MAX_LINES = 1000;

/**
 * Extracts a snippet from the edited file showing the changed region with
 * surrounding context. This compares the old and new content line-by-line
 * from both ends to locate the changed region.
 *
 * @param oldContent The original file content before the edit (null for new files)
 * @param newContent The new file content after the edit
 * @param contextLines Number of context lines to show before and after the change
 * @returns Snippet information, or null if no meaningful snippet can be extracted
 */
export function extractEditSnippet(
  oldContent: string | null,
  newContent: string,
): EditSnippetResult | null {
  const newLines = newContent.split('\n');
  const totalLines = newLines.length;

  if (oldContent === null) {
    return {
      startLine: 1,
      endLine: totalLines,
      totalLines,
      content: newContent,
    };
  }

  // No changes case
  if (oldContent === newContent || !newContent) {
    return null;
  }

  const oldLines = oldContent.split('\n');

  // Find the first line that differs from the start
  let firstDiffLine = 0;
  const minLength = Math.min(oldLines.length, newLines.length);

  while (firstDiffLine < minLength) {
    if (oldLines[firstDiffLine] !== newLines[firstDiffLine]) {
      break;
    }
    firstDiffLine++;
  }

  // Find the first line that differs from the end
  let oldEndIndex = oldLines.length - 1;
  let newEndIndex = newLines.length - 1;

  while (oldEndIndex >= firstDiffLine && newEndIndex >= firstDiffLine) {
    if (oldLines[oldEndIndex] !== newLines[newEndIndex]) {
      break;
    }
    oldEndIndex--;
    newEndIndex--;
  }

  // The changed region in the new content is from firstDiffLine to newEndIndex (inclusive)
  // Convert to 1-indexed line numbers
  const changeStart = firstDiffLine + 1;
  const changeEnd = newEndIndex + 1;

  // If the change region is too large, don't generate a snippet
  if (changeEnd - changeStart > SNIPPET_MAX_LINES) {
    return null;
  }

  // Calculate snippet bounds with context
  const snippetStart = Math.max(1, changeStart - SNIPPET_CONTEXT_LINES);
  const snippetEnd = Math.min(totalLines, changeEnd + SNIPPET_CONTEXT_LINES);

  const snippetLines = newLines.slice(snippetStart - 1, snippetEnd);

  return {
    startLine: snippetStart,
    endLine: snippetEnd,
    totalLines,
    content: snippetLines.join('\n'),
  };
}