fix: CLAUDE.md compatibility for GEMINI.md '@' file import behavior (#2978)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Allen Hutchison <adh@google.com>
This commit is contained in:
Niladri Das
2025-07-31 22:06:50 +05:30
committed by GitHub
parent ae86c7ba05
commit 9a6422f331
14 changed files with 1355 additions and 215 deletions

View File

@@ -39,6 +39,7 @@
"html-to-text": "^9.0.5",
"https-proxy-agent": "^7.0.6",
"ignore": "^7.0.0",
"marked": "^15.0.12",
"micromatch": "^4.0.8",
"open": "^10.1.2",
"shell-quote": "^1.8.3",

View File

@@ -305,10 +305,12 @@ Subdir memory
false,
new FileDiscoveryService(projectRoot),
[],
'tree',
{
respectGitIgnore: true,
respectGeminiIgnore: true,
},
200, // maxDirs parameter
);
expect(result).toEqual({
@@ -334,6 +336,7 @@ My code memory
true,
new FileDiscoveryService(projectRoot),
[],
'tree', // importFormat
{
respectGitIgnore: true,
respectGeminiIgnore: true,

View File

@@ -43,7 +43,7 @@ async function findProjectRoot(startDir: string): Promise<string | null> {
while (true) {
const gitPath = path.join(currentDir, '.git');
try {
const stats = await fs.stat(gitPath);
const stats = await fs.lstat(gitPath);
if (stats.isDirectory()) {
return currentDir;
}
@@ -230,6 +230,7 @@ async function getGeminiMdFilePathsInternal(
async function readGeminiMdFiles(
filePaths: string[],
debugMode: boolean,
importFormat: 'flat' | 'tree' = 'tree',
): Promise<GeminiFileContent[]> {
const results: GeminiFileContent[] = [];
for (const filePath of filePaths) {
@@ -237,16 +238,19 @@ async function readGeminiMdFiles(
const content = await fs.readFile(filePath, 'utf-8');
// Process imports in the content
const processedContent = await processImports(
const processedResult = await processImports(
content,
path.dirname(filePath),
debugMode,
undefined,
undefined,
importFormat,
);
results.push({ filePath, content: processedContent });
results.push({ filePath, content: processedResult.content });
if (debugMode)
logger.debug(
`Successfully read and processed imports: ${filePath} (Length: ${processedContent.length})`,
`Successfully read and processed imports: ${filePath} (Length: ${processedResult.content.length})`,
);
} catch (error: unknown) {
const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST;
@@ -293,12 +297,13 @@ export async function loadServerHierarchicalMemory(
debugMode: boolean,
fileService: FileDiscoveryService,
extensionContextFilePaths: string[] = [],
importFormat: 'flat' | 'tree' = 'tree',
fileFilteringOptions?: FileFilteringOptions,
maxDirs: number = 200,
): Promise<{ memoryContent: string; fileCount: number }> {
if (debugMode)
logger.debug(
`Loading server hierarchical memory for CWD: ${currentWorkingDirectory}`,
`Loading server hierarchical memory for CWD: ${currentWorkingDirectory} (importFormat: ${importFormat})`,
);
// For the server, homedir() refers to the server process's home.
@@ -317,7 +322,11 @@ export async function loadServerHierarchicalMemory(
if (debugMode) logger.debug('No GEMINI.md files found in hierarchy.');
return { memoryContent: '', fileCount: 0 };
}
const contentsWithPaths = await readGeminiMdFiles(filePaths, debugMode);
const contentsWithPaths = await readGeminiMdFiles(
filePaths,
debugMode,
importFormat,
);
// Pass CWD for relative path display in concatenated content
const combinedInstructions = concatenateInstructions(
contentsWithPaths,

File diff suppressed because it is too large Load Diff

View File

@@ -6,6 +6,7 @@
import * as fs from 'fs/promises';
import * as path from 'path';
import { marked } from 'marked';
// Simple console logger for import processing
const logger = {
@@ -29,15 +30,176 @@ interface ImportState {
currentFile?: string; // Track the current file being processed
}
/**
* Interface representing a file in the import tree
*/
export interface MemoryFile {
path: string;
imports?: MemoryFile[]; // Direct imports, in the order they were imported
}
/**
* Result of processing imports
*/
export interface ProcessImportsResult {
content: string;
importTree: MemoryFile;
}
// Helper to find the project root (looks for .git directory)
async function findProjectRoot(startDir: string): Promise<string> {
let currentDir = path.resolve(startDir);
while (true) {
const gitPath = path.join(currentDir, '.git');
try {
const stats = await fs.lstat(gitPath);
if (stats.isDirectory()) {
return currentDir;
}
} catch {
// .git not found, continue to parent
}
const parentDir = path.dirname(currentDir);
if (parentDir === currentDir) {
// Reached filesystem root
break;
}
currentDir = parentDir;
}
// Fallback to startDir if .git not found
return path.resolve(startDir);
}
// Add a type guard for error objects
function hasMessage(err: unknown): err is { message: string } {
return (
typeof err === 'object' &&
err !== null &&
'message' in err &&
typeof (err as { message: unknown }).message === 'string'
);
}
// Helper to find all code block and inline code regions using marked
/**
* Finds all import statements in content without using regex
* @returns Array of {start, _end, path} objects for each import found
*/
function findImports(
content: string,
): Array<{ start: number; _end: number; path: string }> {
const imports: Array<{ start: number; _end: number; path: string }> = [];
let i = 0;
const len = content.length;
while (i < len) {
// Find next @ symbol
i = content.indexOf('@', i);
if (i === -1) break;
// Check if it's a word boundary (not part of another word)
if (i > 0 && !isWhitespace(content[i - 1])) {
i++;
continue;
}
// Find the end of the import path (whitespace or newline)
let j = i + 1;
while (
j < len &&
!isWhitespace(content[j]) &&
content[j] !== '\n' &&
content[j] !== '\r'
) {
j++;
}
// Extract the path (everything after @)
const importPath = content.slice(i + 1, j);
// Basic validation (starts with ./ or / or letter)
if (
importPath.length > 0 &&
(importPath[0] === '.' ||
importPath[0] === '/' ||
isLetter(importPath[0]))
) {
imports.push({
start: i,
_end: j,
path: importPath,
});
}
i = j + 1;
}
return imports;
}
function isWhitespace(char: string): boolean {
return char === ' ' || char === '\t' || char === '\n' || char === '\r';
}
function isLetter(char: string): boolean {
const code = char.charCodeAt(0);
return (
(code >= 65 && code <= 90) || // A-Z
(code >= 97 && code <= 122)
); // a-z
}
function findCodeRegions(content: string): Array<[number, number]> {
const regions: Array<[number, number]> = [];
const tokens = marked.lexer(content);
// Map from raw content to a queue of its start indices in the original content.
const rawContentIndices = new Map<string, number[]>();
function walk(token: { type: string; raw: string; tokens?: unknown[] }) {
if (token.type === 'code' || token.type === 'codespan') {
if (!rawContentIndices.has(token.raw)) {
const indices: number[] = [];
let lastIndex = -1;
while ((lastIndex = content.indexOf(token.raw, lastIndex + 1)) !== -1) {
indices.push(lastIndex);
}
rawContentIndices.set(token.raw, indices);
}
const indices = rawContentIndices.get(token.raw);
if (indices && indices.length > 0) {
// Assume tokens are processed in order of appearance.
// Dequeue the next available index for this raw content.
const idx = indices.shift()!;
regions.push([idx, idx + token.raw.length]);
}
}
if ('tokens' in token && token.tokens) {
for (const child of token.tokens) {
walk(child as { type: string; raw: string; tokens?: unknown[] });
}
}
}
for (const token of tokens) {
walk(token);
}
return regions;
}
/**
* Processes import statements in GEMINI.md content
* Supports @path/to/file.md syntax for importing content from other files
*
* Supports @path/to/file syntax for importing content from other files
* @param content - The content to process for imports
* @param basePath - The directory path where the current file is located
* @param debugMode - Whether to enable debug logging
* @param importState - State tracking for circular import prevention
* @returns Processed content with imports resolved
* @param projectRoot - The project root directory for allowed directories
* @param importFormat - The format of the import tree
* @returns Processed content with imports resolved and import tree
*/
export async function processImports(
content: string,
@@ -45,156 +207,198 @@ export async function processImports(
debugMode: boolean = false,
importState: ImportState = {
processedFiles: new Set(),
maxDepth: 10,
maxDepth: 5,
currentDepth: 0,
},
): Promise<string> {
projectRoot?: string,
importFormat: 'flat' | 'tree' = 'tree',
): Promise<ProcessImportsResult> {
if (!projectRoot) {
projectRoot = await findProjectRoot(basePath);
}
if (importState.currentDepth >= importState.maxDepth) {
if (debugMode) {
logger.warn(
`Maximum import depth (${importState.maxDepth}) reached. Stopping import processing.`,
);
}
return content;
return {
content,
importTree: { path: importState.currentFile || 'unknown' },
};
}
// Regex to match @path/to/file imports (supports any file extension)
// Supports both @path/to/file.md and @./path/to/file.md syntax
const importRegex = /@([./]?[^\s\n]+\.[^\s\n]+)/g;
// --- FLAT FORMAT LOGIC ---
if (importFormat === 'flat') {
// Use a queue to process files in order of first encounter, and a set to avoid duplicates
const flatFiles: Array<{ path: string; content: string }> = [];
// Track processed files across the entire operation
const processedFiles = new Set<string>();
let processedContent = content;
let match: RegExpExecArray | null;
// Helper to recursively process imports
async function processFlat(
fileContent: string,
fileBasePath: string,
filePath: string,
depth: number,
) {
// Normalize the file path to ensure consistent comparison
const normalizedPath = path.normalize(filePath);
// Process all imports in the content
while ((match = importRegex.exec(content)) !== null) {
const importPath = match[1];
// Skip if already processed
if (processedFiles.has(normalizedPath)) return;
// Validate import path to prevent path traversal attacks
if (!validateImportPath(importPath, basePath, [basePath])) {
processedContent = processedContent.replace(
match[0],
`<!-- Import failed: ${importPath} - Path traversal attempt -->`,
);
continue;
}
// Mark as processed before processing to prevent infinite recursion
processedFiles.add(normalizedPath);
// Check if the import is for a non-md file and warn
if (!importPath.endsWith('.md')) {
logger.warn(
`Import processor only supports .md files. Attempting to import non-md file: ${importPath}. This will fail.`,
);
// Replace the import with a warning comment
processedContent = processedContent.replace(
match[0],
`<!-- Import failed: ${importPath} - Only .md files are supported -->`,
);
continue;
}
// Add this file to the flat list
flatFiles.push({ path: normalizedPath, content: fileContent });
const fullPath = path.resolve(basePath, importPath);
// Find imports in this file
const codeRegions = findCodeRegions(fileContent);
const imports = findImports(fileContent);
if (debugMode) {
logger.debug(`Processing import: ${importPath} -> ${fullPath}`);
}
// Process imports in reverse order to handle indices correctly
for (let i = imports.length - 1; i >= 0; i--) {
const { start, _end, path: importPath } = imports[i];
// Check for circular imports - if we're already processing this file
if (importState.currentFile === fullPath) {
if (debugMode) {
logger.warn(`Circular import detected: ${importPath}`);
}
// Replace the import with a warning comment
processedContent = processedContent.replace(
match[0],
`<!-- Circular import detected: ${importPath} -->`,
);
continue;
}
// Check if we've already processed this file in this import chain
if (importState.processedFiles.has(fullPath)) {
if (debugMode) {
logger.warn(`File already processed in this chain: ${importPath}`);
}
// Replace the import with a warning comment
processedContent = processedContent.replace(
match[0],
`<!-- File already processed: ${importPath} -->`,
);
continue;
}
// Check for potential circular imports by looking at the import chain
if (importState.currentFile) {
const currentFileDir = path.dirname(importState.currentFile);
const potentialCircularPath = path.resolve(currentFileDir, importPath);
if (potentialCircularPath === importState.currentFile) {
if (debugMode) {
logger.warn(`Circular import detected: ${importPath}`);
// Skip if inside a code region
if (
codeRegions.some(
([regionStart, regionEnd]) =>
start >= regionStart && start < regionEnd,
)
) {
continue;
}
// Validate import path
if (
!validateImportPath(importPath, fileBasePath, [projectRoot || ''])
) {
continue;
}
const fullPath = path.resolve(fileBasePath, importPath);
const normalizedFullPath = path.normalize(fullPath);
// Skip if already processed
if (processedFiles.has(normalizedFullPath)) continue;
try {
await fs.access(fullPath);
const importedContent = await fs.readFile(fullPath, 'utf-8');
// Process the imported file
await processFlat(
importedContent,
path.dirname(fullPath),
normalizedFullPath,
depth + 1,
);
} catch (error) {
if (debugMode) {
logger.warn(
`Failed to import ${fullPath}: ${hasMessage(error) ? error.message : 'Unknown error'}`,
);
}
// Continue with other imports even if one fails
}
// Replace the import with a warning comment
processedContent = processedContent.replace(
match[0],
`<!-- Circular import detected: ${importPath} -->`,
);
continue;
}
}
// Start with the root file (current file)
const rootPath = path.normalize(
importState.currentFile || path.resolve(basePath),
);
await processFlat(content, basePath, rootPath, 0);
// Concatenate all unique files in order, Claude-style
const flatContent = flatFiles
.map(
(f) =>
`--- File: ${f.path} ---\n${f.content.trim()}\n--- End of File: ${f.path} ---`,
)
.join('\n\n');
return {
content: flatContent,
importTree: { path: rootPath }, // Tree not meaningful in flat mode
};
}
// --- TREE FORMAT LOGIC (existing) ---
const codeRegions = findCodeRegions(content);
let result = '';
let lastIndex = 0;
const imports: MemoryFile[] = [];
const importsList = findImports(content);
for (const { start, _end, path: importPath } of importsList) {
// Add content before this import
result += content.substring(lastIndex, start);
lastIndex = _end;
// Skip if inside a code region
if (codeRegions.some(([s, e]) => start >= s && start < e)) {
result += `@${importPath}`;
continue;
}
// Validate import path to prevent path traversal attacks
if (!validateImportPath(importPath, basePath, [projectRoot || ''])) {
result += `<!-- Import failed: ${importPath} - Path traversal attempt -->`;
continue;
}
const fullPath = path.resolve(basePath, importPath);
if (importState.processedFiles.has(fullPath)) {
result += `<!-- File already processed: ${importPath} -->`;
continue;
}
try {
// Check if the file exists
await fs.access(fullPath);
// Read the imported file content
const importedContent = await fs.readFile(fullPath, 'utf-8');
if (debugMode) {
logger.debug(`Successfully read imported file: ${fullPath}`);
}
// Recursively process imports in the imported content
const processedImportedContent = await processImports(
importedContent,
const fileContent = await fs.readFile(fullPath, 'utf-8');
// Mark this file as processed for this import chain
const newImportState: ImportState = {
...importState,
processedFiles: new Set(importState.processedFiles),
currentDepth: importState.currentDepth + 1,
currentFile: fullPath,
};
newImportState.processedFiles.add(fullPath);
const imported = await processImports(
fileContent,
path.dirname(fullPath),
debugMode,
{
...importState,
processedFiles: new Set([...importState.processedFiles, fullPath]),
currentDepth: importState.currentDepth + 1,
currentFile: fullPath, // Set the current file being processed
},
newImportState,
projectRoot,
importFormat,
);
// Replace the import statement with the processed content
processedContent = processedContent.replace(
match[0],
`<!-- Imported from: ${importPath} -->\n${processedImportedContent}\n<!-- End of import from: ${importPath} -->`,
);
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
if (debugMode) {
logger.error(`Failed to import ${importPath}: ${errorMessage}`);
result += `<!-- Imported from: ${importPath} -->\n${imported.content}\n<!-- End of import from: ${importPath} -->`;
imports.push(imported.importTree);
} catch (err: unknown) {
let message = 'Unknown error';
if (hasMessage(err)) {
message = err.message;
} else if (typeof err === 'string') {
message = err;
}
// Replace the import with an error comment
processedContent = processedContent.replace(
match[0],
`<!-- Import failed: ${importPath} - ${errorMessage} -->`,
);
logger.error(`Failed to import ${importPath}: ${message}`);
result += `<!-- Import failed: ${importPath} - ${message} -->`;
}
}
// Add any remaining content after the last match
result += content.substring(lastIndex);
return processedContent;
return {
content: result,
importTree: {
path: importState.currentFile || 'unknown',
imports: imports.length > 0 ? imports : undefined,
},
};
}
/**
* Validates import paths to ensure they are safe and within allowed directories
*
* @param importPath - The import path to validate
* @param basePath - The base directory for resolving relative paths
* @param allowedDirectories - Array of allowed directory paths
* @returns Whether the import path is valid
*/
export function validateImportPath(
importPath: string,
basePath: string,
@@ -209,6 +413,8 @@ export function validateImportPath(
return allowedDirectories.some((allowedDir) => {
const normalizedAllowedDir = path.resolve(allowedDir);
return resolvedPath.startsWith(normalizedAllowedDir);
const isSamePath = resolvedPath === normalizedAllowedDir;
const isSubPath = resolvedPath.startsWith(normalizedAllowedDir + path.sep);
return isSamePath || isSubPath;
});
}