fix: CLAUDE.md compatibility for GEMINI.md '@' file import behavior (#2978)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Allen Hutchison <adh@google.com>
2025-12-19 09:33:53 +00:00 · 2025-07-31 22:06:50 +05:30
parent ae86c7ba05
commit 9a6422f331
14 changed files with 1355 additions and 215 deletions
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -39,6 +39,7 @@
    "html-to-text": "^9.0.5",
    "https-proxy-agent": "^7.0.6",
    "ignore": "^7.0.0",
+    "marked": "^15.0.12",
    "micromatch": "^4.0.8",
    "open": "^10.1.2",
    "shell-quote": "^1.8.3",
--- a/packages/core/src/utils/memoryDiscovery.test.ts
+++ b/packages/core/src/utils/memoryDiscovery.test.ts
@@ -305,10 +305,12 @@ Subdir memory
      false,
      new FileDiscoveryService(projectRoot),
      [],
+      'tree',
      {
        respectGitIgnore: true,
        respectGeminiIgnore: true,
      },
+      200, // maxDirs parameter
    );

    expect(result).toEqual({
@@ -334,6 +336,7 @@ My code memory
      true,
      new FileDiscoveryService(projectRoot),
      [],
+      'tree', // importFormat
      {
        respectGitIgnore: true,
        respectGeminiIgnore: true,
--- a/packages/core/src/utils/memoryDiscovery.ts
+++ b/packages/core/src/utils/memoryDiscovery.ts
@@ -43,7 +43,7 @@ async function findProjectRoot(startDir: string): Promise<string | null> {
  while (true) {
    const gitPath = path.join(currentDir, '.git');
    try {
-      const stats = await fs.stat(gitPath);
+      const stats = await fs.lstat(gitPath);
      if (stats.isDirectory()) {
        return currentDir;
      }
@@ -230,6 +230,7 @@ async function getGeminiMdFilePathsInternal(
 async function readGeminiMdFiles(
  filePaths: string[],
  debugMode: boolean,
+  importFormat: 'flat' | 'tree' = 'tree',
 ): Promise<GeminiFileContent[]> {
  const results: GeminiFileContent[] = [];
  for (const filePath of filePaths) {
@@ -237,16 +238,19 @@ async function readGeminiMdFiles(
      const content = await fs.readFile(filePath, 'utf-8');

      // Process imports in the content
-      const processedContent = await processImports(
+      const processedResult = await processImports(
        content,
        path.dirname(filePath),
        debugMode,
+        undefined,
+        undefined,
+        importFormat,
      );

-      results.push({ filePath, content: processedContent });
+      results.push({ filePath, content: processedResult.content });
      if (debugMode)
        logger.debug(
-          `Successfully read and processed imports: ${filePath} (Length: ${processedContent.length})`,
+          `Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`,
        );
    } catch (error: unknown) {
      const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST;
@@ -293,12 +297,13 @@ export async function loadServerHierarchicalMemory(
  debugMode: boolean,
  fileService: FileDiscoveryService,
  extensionContextFilePaths: string[] = [],
+  importFormat: 'flat' | 'tree' = 'tree',
  fileFilteringOptions?: FileFilteringOptions,
  maxDirs: number = 200,
 ): Promise<{ memoryContent: string; fileCount: number }> {
  if (debugMode)
    logger.debug(
-      `Loading server hierarchical memory for CWD: ${currentWorkingDirectory}`,
+      `Loading server hierarchical memory for CWD: ${currentWorkingDirectory} (importFormat: ${importFormat})`,
    );

  // For the server, homedir() refers to the server process's home.
@@ -317,7 +322,11 @@ export async function loadServerHierarchicalMemory(
    if (debugMode) logger.debug('No GEMINI.md files found in hierarchy.');
    return { memoryContent: '', fileCount: 0 };
  }
-  const contentsWithPaths = await readGeminiMdFiles(filePaths, debugMode);
+  const contentsWithPaths = await readGeminiMdFiles(
+    filePaths,
+    debugMode,
+    importFormat,
+  );
  // Pass CWD for relative path display in concatenated content
  const combinedInstructions = concatenateInstructions(
    contentsWithPaths,
--- a/packages/core/src/utils/memoryImportProcessor.test.ts
+++ b/packages/core/src/utils/memoryImportProcessor.test.ts
--- a/packages/core/src/utils/memoryImportProcessor.ts
+++ b/packages/core/src/utils/memoryImportProcessor.ts
@@ -6,6 +6,7 @@

 import * as fs from 'fs/promises';
 import * as path from 'path';
+import { marked } from 'marked';

 // Simple console logger for import processing
 const logger = {
@@ -29,15 +30,176 @@ interface ImportState {
  currentFile?: string; // Track the current file being processed
 }

+/**
+ * Interface representing a file in the import tree
+ */
+export interface MemoryFile {
+  path: string;
+  imports?: MemoryFile[]; // Direct imports, in the order they were imported
+}
+
+/**
+ * Result of processing imports
+ */
+export interface ProcessImportsResult {
+  content: string;
+  importTree: MemoryFile;
+}
+
+// Helper to find the project root (looks for .git directory)
+async function findProjectRoot(startDir: string): Promise<string> {
+  let currentDir = path.resolve(startDir);
+  while (true) {
+    const gitPath = path.join(currentDir, '.git');
+    try {
+      const stats = await fs.lstat(gitPath);
+      if (stats.isDirectory()) {
+        return currentDir;
+      }
+    } catch {
+      // .git not found, continue to parent
+    }
+    const parentDir = path.dirname(currentDir);
+    if (parentDir === currentDir) {
+      // Reached filesystem root
+      break;
+    }
+    currentDir = parentDir;
+  }
+  // Fallback to startDir if .git not found
+  return path.resolve(startDir);
+}
+
+// Add a type guard for error objects
+function hasMessage(err: unknown): err is { message: string } {
+  return (
+    typeof err === 'object' &&
+    err !== null &&
+    'message' in err &&
+    typeof (err as { message: unknown }).message === 'string'
+  );
+}
+
+// Helper to find all code block and inline code regions using marked
+/**
+ * Finds all import statements in content without using regex
+ * @returns Array of {start, _end, path} objects for each import found
+ */
+function findImports(
+  content: string,
+): Array<{ start: number; _end: number; path: string }> {
+  const imports: Array<{ start: number; _end: number; path: string }> = [];
+  let i = 0;
+  const len = content.length;
+
+  while (i < len) {
+    // Find next @ symbol
+    i = content.indexOf('@', i);
+    if (i === -1) break;
+
+    // Check if it's a word boundary (not part of another word)
+    if (i > 0 && !isWhitespace(content[i - 1])) {
+      i++;
+      continue;
+    }
+
+    // Find the end of the import path (whitespace or newline)
+    let j = i + 1;
+    while (
+      j < len &&
+      !isWhitespace(content[j]) &&
+      content[j] !== '\n' &&
+      content[j] !== '\r'
+    ) {
+      j++;
+    }
+
+    // Extract the path (everything after @)
+    const importPath = content.slice(i + 1, j);
+
+    // Basic validation (starts with ./ or / or letter)
+    if (
+      importPath.length > 0 &&
+      (importPath[0] === '.' ||
+        importPath[0] === '/' ||
+        isLetter(importPath[0]))
+    ) {
+      imports.push({
+        start: i,
+        _end: j,
+        path: importPath,
+      });
+    }
+
+    i = j + 1;
+  }
+
+  return imports;
+}
+
+function isWhitespace(char: string): boolean {
+  return char === ' ' || char === '\t' || char === '\n' || char === '\r';
+}
+
+function isLetter(char: string): boolean {
+  const code = char.charCodeAt(0);
+  return (
+    (code >= 65 && code <= 90) || // A-Z
+    (code >= 97 && code <= 122)
+  ); // a-z
+}
+
+function findCodeRegions(content: string): Array<[number, number]> {
+  const regions: Array<[number, number]> = [];
+  const tokens = marked.lexer(content);
+
+  // Map from raw content to a queue of its start indices in the original content.
+  const rawContentIndices = new Map<string, number[]>();
+
+  function walk(token: { type: string; raw: string; tokens?: unknown[] }) {
+    if (token.type === 'code' || token.type === 'codespan') {
+      if (!rawContentIndices.has(token.raw)) {
+        const indices: number[] = [];
+        let lastIndex = -1;
+        while ((lastIndex = content.indexOf(token.raw, lastIndex + 1)) !== -1) {
+          indices.push(lastIndex);
+        }
+        rawContentIndices.set(token.raw, indices);
+      }
+
+      const indices = rawContentIndices.get(token.raw);
+      if (indices && indices.length > 0) {
+        // Assume tokens are processed in order of appearance.
+        // Dequeue the next available index for this raw content.
+        const idx = indices.shift()!;
+        regions.push([idx, idx + token.raw.length]);
+      }
+    }
+
+    if ('tokens' in token && token.tokens) {
+      for (const child of token.tokens) {
+        walk(child as { type: string; raw: string; tokens?: unknown[] });
+      }
+    }
+  }
+
+  for (const token of tokens) {
+    walk(token);
+  }
+
+  return regions;
+}
+
 /**
 * Processes import statements in GEMINI.md content
- * Supports @path/to/file.md syntax for importing content from other files
- *
+ * Supports @path/to/file syntax for importing content from other files
 * @param content - The content to process for imports
 * @param basePath - The directory path where the current file is located
 * @param debugMode - Whether to enable debug logging
 * @param importState - State tracking for circular import prevention
- * @returns Processed content with imports resolved
+ * @param projectRoot - The project root directory for allowed directories
+ * @param importFormat - The format of the import tree
+ * @returns Processed content with imports resolved and import tree
 */
 export async function processImports(
  content: string,
@@ -45,156 +207,198 @@ export async function processImports(
  debugMode: boolean = false,
  importState: ImportState = {
    processedFiles: new Set(),
-    maxDepth: 10,
+    maxDepth: 5,
    currentDepth: 0,
  },
-): Promise<string> {
+  projectRoot?: string,
+  importFormat: 'flat' | 'tree' = 'tree',
+): Promise<ProcessImportsResult> {
+  if (!projectRoot) {
+    projectRoot = await findProjectRoot(basePath);
+  }
+
  if (importState.currentDepth >= importState.maxDepth) {
    if (debugMode) {
      logger.warn(
        `Maximum import depth (${importState.maxDepth}) reached. Stopping import processing.`,
      );
    }
-    return content;
+    return {
+      content,
+      importTree: { path: importState.currentFile || 'unknown' },
+    };
  }

-  // Regex to match @path/to/file imports (supports any file extension)
-  // Supports both @path/to/file.md and @./path/to/file.md syntax
-  const importRegex = /@([./]?[^\s\n]+\.[^\s\n]+)/g;
+  // --- FLAT FORMAT LOGIC ---
+  if (importFormat === 'flat') {
+    // Use a queue to process files in order of first encounter, and a set to avoid duplicates
+    const flatFiles: Array<{ path: string; content: string }> = [];
+    // Track processed files across the entire operation
+    const processedFiles = new Set<string>();

-  let processedContent = content;
-  let match: RegExpExecArray | null;
+    // Helper to recursively process imports
+    async function processFlat(
+      fileContent: string,
+      fileBasePath: string,
+      filePath: string,
+      depth: number,
+    ) {
+      // Normalize the file path to ensure consistent comparison
+      const normalizedPath = path.normalize(filePath);

-  // Process all imports in the content
-  while ((match = importRegex.exec(content)) !== null) {
-    const importPath = match[1];
+      // Skip if already processed
+      if (processedFiles.has(normalizedPath)) return;

-    // Validate import path to prevent path traversal attacks
-    if (!validateImportPath(importPath, basePath, [basePath])) {
-      processedContent = processedContent.replace(
-        match[0],
-        `<!-- Import failed: ${importPath} - Path traversal attempt -->`,
-      );
-      continue;
-    }
+      // Mark as processed before processing to prevent infinite recursion
+      processedFiles.add(normalizedPath);

-    // Check if the import is for a non-md file and warn
-    if (!importPath.endsWith('.md')) {
-      logger.warn(
-        `Import processor only supports .md files. Attempting to import non-md file: ${importPath}. This will fail.`,
-      );
-      // Replace the import with a warning comment
-      processedContent = processedContent.replace(
-        match[0],
-        `<!-- Import failed: ${importPath} - Only .md files are supported -->`,
-      );
-      continue;
-    }
+      // Add this file to the flat list
+      flatFiles.push({ path: normalizedPath, content: fileContent });

-    const fullPath = path.resolve(basePath, importPath);
+      // Find imports in this file
+      const codeRegions = findCodeRegions(fileContent);
+      const imports = findImports(fileContent);

-    if (debugMode) {
-      logger.debug(`Processing import: ${importPath} -> ${fullPath}`);
-    }
+      // Process imports in reverse order to handle indices correctly
+      for (let i = imports.length - 1; i >= 0; i--) {
+        const { start, _end, path: importPath } = imports[i];

-    // Check for circular imports - if we're already processing this file
-    if (importState.currentFile === fullPath) {
-      if (debugMode) {
-        logger.warn(`Circular import detected: ${importPath}`);
-      }
-      // Replace the import with a warning comment
-      processedContent = processedContent.replace(
-        match[0],
-        `<!-- Circular import detected: ${importPath} -->`,
-      );
-      continue;
-    }
-
-    // Check if we've already processed this file in this import chain
-    if (importState.processedFiles.has(fullPath)) {
-      if (debugMode) {
-        logger.warn(`File already processed in this chain: ${importPath}`);
-      }
-      // Replace the import with a warning comment
-      processedContent = processedContent.replace(
-        match[0],
-        `<!-- File already processed: ${importPath} -->`,
-      );
-      continue;
-    }
-
-    // Check for potential circular imports by looking at the import chain
-    if (importState.currentFile) {
-      const currentFileDir = path.dirname(importState.currentFile);
-      const potentialCircularPath = path.resolve(currentFileDir, importPath);
-      if (potentialCircularPath === importState.currentFile) {
-        if (debugMode) {
-          logger.warn(`Circular import detected: ${importPath}`);
+        // Skip if inside a code region
+        if (
+          codeRegions.some(
+            ([regionStart, regionEnd]) =>
+              start >= regionStart && start < regionEnd,
+          )
+        ) {
+          continue;
+        }
+
+        // Validate import path
+        if (
+          !validateImportPath(importPath, fileBasePath, [projectRoot || ''])
+        ) {
+          continue;
+        }
+
+        const fullPath = path.resolve(fileBasePath, importPath);
+        const normalizedFullPath = path.normalize(fullPath);
+
+        // Skip if already processed
+        if (processedFiles.has(normalizedFullPath)) continue;
+
+        try {
+          await fs.access(fullPath);
+          const importedContent = await fs.readFile(fullPath, 'utf-8');
+
+          // Process the imported file
+          await processFlat(
+            importedContent,
+            path.dirname(fullPath),
+            normalizedFullPath,
+            depth + 1,
+          );
+        } catch (error) {
+          if (debugMode) {
+            logger.warn(
+              `Failed to import ${fullPath}: ${hasMessage(error) ? error.message : 'Unknown error'}`,
+            );
+          }
+          // Continue with other imports even if one fails
        }
-        // Replace the import with a warning comment
-        processedContent = processedContent.replace(
-          match[0],
-          `<!-- Circular import detected: ${importPath} -->`,
-        );
-        continue;
      }
    }

+    // Start with the root file (current file)
+    const rootPath = path.normalize(
+      importState.currentFile || path.resolve(basePath),
+    );
+    await processFlat(content, basePath, rootPath, 0);
+
+    // Concatenate all unique files in order, Claude-style
+    const flatContent = flatFiles
+      .map(
+        (f) =>
+          `--- File: ${f.path} ---\n${f.content.trim()}\n--- End of File: ${f.path} ---`,
+      )
+      .join('\n\n');
+
+    return {
+      content: flatContent,
+      importTree: { path: rootPath }, // Tree not meaningful in flat mode
+    };
+  }
+
+  // --- TREE FORMAT LOGIC (existing) ---
+  const codeRegions = findCodeRegions(content);
+  let result = '';
+  let lastIndex = 0;
+  const imports: MemoryFile[] = [];
+  const importsList = findImports(content);
+
+  for (const { start, _end, path: importPath } of importsList) {
+    // Add content before this import
+    result += content.substring(lastIndex, start);
+    lastIndex = _end;
+
+    // Skip if inside a code region
+    if (codeRegions.some(([s, e]) => start >= s && start < e)) {
+      result += `@${importPath}`;
+      continue;
+    }
+    // Validate import path to prevent path traversal attacks
+    if (!validateImportPath(importPath, basePath, [projectRoot || ''])) {
+      result += `<!-- Import failed: ${importPath} - Path traversal attempt -->`;
+      continue;
+    }
+    const fullPath = path.resolve(basePath, importPath);
+    if (importState.processedFiles.has(fullPath)) {
+      result += `<!-- File already processed: ${importPath} -->`;
+      continue;
+    }
    try {
-      // Check if the file exists
      await fs.access(fullPath);
-
-      // Read the imported file content
-      const importedContent = await fs.readFile(fullPath, 'utf-8');
-
-      if (debugMode) {
-        logger.debug(`Successfully read imported file: ${fullPath}`);
-      }
-
-      // Recursively process imports in the imported content
-      const processedImportedContent = await processImports(
-        importedContent,
+      const fileContent = await fs.readFile(fullPath, 'utf-8');
+      // Mark this file as processed for this import chain
+      const newImportState: ImportState = {
+        ...importState,
+        processedFiles: new Set(importState.processedFiles),
+        currentDepth: importState.currentDepth + 1,
+        currentFile: fullPath,
+      };
+      newImportState.processedFiles.add(fullPath);
+      const imported = await processImports(
+        fileContent,
        path.dirname(fullPath),
        debugMode,
-        {
-          ...importState,
-          processedFiles: new Set([...importState.processedFiles, fullPath]),
-          currentDepth: importState.currentDepth + 1,
-          currentFile: fullPath, // Set the current file being processed
-        },
+        newImportState,
+        projectRoot,
+        importFormat,
      );
-
-      // Replace the import statement with the processed content
-      processedContent = processedContent.replace(
-        match[0],
-        `<!-- Imported from: ${importPath} -->\n${processedImportedContent}\n<!-- End of import from: ${importPath} -->`,
-      );
-    } catch (error) {
-      const errorMessage =
-        error instanceof Error ? error.message : String(error);
-      if (debugMode) {
-        logger.error(`Failed to import ${importPath}: ${errorMessage}`);
+      result += `<!-- Imported from: ${importPath} -->\n${imported.content}\n<!-- End of import from: ${importPath} -->`;
+      imports.push(imported.importTree);
+    } catch (err: unknown) {
+      let message = 'Unknown error';
+      if (hasMessage(err)) {
+        message = err.message;
+      } else if (typeof err === 'string') {
+        message = err;
      }
-
-      // Replace the import with an error comment
-      processedContent = processedContent.replace(
-        match[0],
-        `<!-- Import failed: ${importPath} - ${errorMessage} -->`,
-      );
+      logger.error(`Failed to import ${importPath}: ${message}`);
+      result += `<!-- Import failed: ${importPath} - ${message} -->`;
    }
  }
+  // Add any remaining content after the last match
+  result += content.substring(lastIndex);

-  return processedContent;
+  return {
+    content: result,
+    importTree: {
+      path: importState.currentFile || 'unknown',
+      imports: imports.length > 0 ? imports : undefined,
+    },
+  };
 }

-/**
- * Validates import paths to ensure they are safe and within allowed directories
- *
- * @param importPath - The import path to validate
- * @param basePath - The base directory for resolving relative paths
- * @param allowedDirectories - Array of allowed directory paths
- * @returns Whether the import path is valid
- */
 export function validateImportPath(
  importPath: string,
  basePath: string,
@@ -209,6 +413,8 @@ export function validateImportPath(

  return allowedDirectories.some((allowedDir) => {
    const normalizedAllowedDir = path.resolve(allowedDir);
-    return resolvedPath.startsWith(normalizedAllowedDir);
+    const isSamePath = resolvedPath === normalizedAllowedDir;
+    const isSubPath = resolvedPath.startsWith(normalizedAllowedDir + path.sep);
+    return isSamePath || isSubPath;
  });
 }