feat: modular GEMINI.md imports with @file.md syntax (#1585) (#2230)

This commit is contained in:
Niladri Das
2025-06-30 04:21:47 +05:30
committed by GitHub
parent ada4061a45
commit f848d35758
5 changed files with 658 additions and 2 deletions

View File

@@ -14,6 +14,7 @@ import {
getAllGeminiMdFilenames,
} from '../tools/memoryTool.js';
import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
import { processImports } from './memoryImportProcessor.js';
// Simple console logger, similar to the one previously in CLI's config.ts
// TODO: Integrate with a more robust server-side logger if available/appropriate.
@@ -223,10 +224,18 @@ async function readGeminiMdFiles(
for (const filePath of filePaths) {
try {
const content = await fs.readFile(filePath, 'utf-8');
results.push({ filePath, content });
// Process imports in the content
const processedContent = await processImports(
content,
path.dirname(filePath),
debugMode,
);
results.push({ filePath, content: processedContent });
if (debugMode)
logger.debug(
`Successfully read: ${filePath} (Length: ${content.length})`,
`Successfully read and processed imports: ${filePath} (Length: ${processedContent.length})`,
);
} catch (error: unknown) {
const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST;

View File

@@ -0,0 +1,257 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs/promises';
import * as path from 'path';
import { processImports, validateImportPath } from './memoryImportProcessor.js';
// Mock fs/promises
vi.mock('fs/promises');
const mockedFs = vi.mocked(fs);
// Mock console methods to capture warnings
const originalConsoleWarn = console.warn;
const originalConsoleError = console.error;
const originalConsoleDebug = console.debug;
describe('memoryImportProcessor', () => {
beforeEach(() => {
vi.clearAllMocks();
// Mock console methods
console.warn = vi.fn();
console.error = vi.fn();
console.debug = vi.fn();
});
afterEach(() => {
// Restore console methods
console.warn = originalConsoleWarn;
console.error = originalConsoleError;
console.debug = originalConsoleDebug;
});
describe('processImports', () => {
it('should process basic md file imports', async () => {
const content = 'Some content @./test.md more content';
const basePath = '/test/path';
const importedContent = '# Imported Content\nThis is imported.';
mockedFs.access.mockResolvedValue(undefined);
mockedFs.readFile.mockResolvedValue(importedContent);
const result = await processImports(content, basePath, true);
expect(result).toContain('<!-- Imported from: ./test.md -->');
expect(result).toContain(importedContent);
expect(result).toContain('<!-- End of import from: ./test.md -->');
expect(mockedFs.readFile).toHaveBeenCalledWith(
path.resolve(basePath, './test.md'),
'utf-8',
);
});
it('should warn and fail for non-md file imports', async () => {
const content = 'Some content @./instructions.txt more content';
const basePath = '/test/path';
const result = await processImports(content, basePath, true);
expect(console.warn).toHaveBeenCalledWith(
'[WARN] [ImportProcessor]',
'Import processor only supports .md files. Attempting to import non-md file: ./instructions.txt. This will fail.',
);
expect(result).toContain(
'<!-- Import failed: ./instructions.txt - Only .md files are supported -->',
);
expect(mockedFs.readFile).not.toHaveBeenCalled();
});
it('should handle circular imports', async () => {
const content = 'Content @./circular.md more content';
const basePath = '/test/path';
const circularContent = 'Circular @./main.md content';
mockedFs.access.mockResolvedValue(undefined);
mockedFs.readFile.mockResolvedValue(circularContent);
// Set up the import state to simulate we're already processing main.md
const importState = {
processedFiles: new Set<string>(),
maxDepth: 10,
currentDepth: 0,
currentFile: '/test/path/main.md', // Simulate we're processing main.md
};
const result = await processImports(content, basePath, true, importState);
// The circular import should be detected when processing the nested import
expect(result).toContain('<!-- Circular import detected: ./main.md -->');
});
it('should handle file not found errors', async () => {
const content = 'Content @./nonexistent.md more content';
const basePath = '/test/path';
mockedFs.access.mockRejectedValue(new Error('File not found'));
const result = await processImports(content, basePath, true);
expect(result).toContain(
'<!-- Import failed: ./nonexistent.md - File not found -->',
);
expect(console.error).toHaveBeenCalledWith(
'[ERROR] [ImportProcessor]',
'Failed to import ./nonexistent.md: File not found',
);
});
it('should respect max depth limit', async () => {
const content = 'Content @./deep.md more content';
const basePath = '/test/path';
const deepContent = 'Deep @./deeper.md content';
mockedFs.access.mockResolvedValue(undefined);
mockedFs.readFile.mockResolvedValue(deepContent);
const importState = {
processedFiles: new Set<string>(),
maxDepth: 1,
currentDepth: 1,
};
const result = await processImports(content, basePath, true, importState);
expect(console.warn).toHaveBeenCalledWith(
'[WARN] [ImportProcessor]',
'Maximum import depth (1) reached. Stopping import processing.',
);
expect(result).toBe(content);
});
it('should handle nested imports recursively', async () => {
const content = 'Main @./nested.md content';
const basePath = '/test/path';
const nestedContent = 'Nested @./inner.md content';
const innerContent = 'Inner content';
mockedFs.access.mockResolvedValue(undefined);
mockedFs.readFile
.mockResolvedValueOnce(nestedContent)
.mockResolvedValueOnce(innerContent);
const result = await processImports(content, basePath, true);
expect(result).toContain('<!-- Imported from: ./nested.md -->');
expect(result).toContain('<!-- Imported from: ./inner.md -->');
expect(result).toContain(innerContent);
});
it('should handle absolute paths in imports', async () => {
const content = 'Content @/absolute/path/file.md more content';
const basePath = '/test/path';
const importedContent = 'Absolute path content';
mockedFs.access.mockResolvedValue(undefined);
mockedFs.readFile.mockResolvedValue(importedContent);
const result = await processImports(content, basePath, true);
expect(result).toContain(
'<!-- Import failed: /absolute/path/file.md - Path traversal attempt -->',
);
});
it('should handle multiple imports in same content', async () => {
const content = 'Start @./first.md middle @./second.md end';
const basePath = '/test/path';
const firstContent = 'First content';
const secondContent = 'Second content';
mockedFs.access.mockResolvedValue(undefined);
mockedFs.readFile
.mockResolvedValueOnce(firstContent)
.mockResolvedValueOnce(secondContent);
const result = await processImports(content, basePath, true);
expect(result).toContain('<!-- Imported from: ./first.md -->');
expect(result).toContain('<!-- Imported from: ./second.md -->');
expect(result).toContain(firstContent);
expect(result).toContain(secondContent);
});
});
describe('validateImportPath', () => {
it('should reject URLs', () => {
expect(
validateImportPath('https://example.com/file.md', '/base', [
'/allowed',
]),
).toBe(false);
expect(
validateImportPath('http://example.com/file.md', '/base', ['/allowed']),
).toBe(false);
expect(
validateImportPath('file:///path/to/file.md', '/base', ['/allowed']),
).toBe(false);
});
it('should allow paths within allowed directories', () => {
expect(validateImportPath('./file.md', '/base', ['/base'])).toBe(true);
expect(validateImportPath('../file.md', '/base', ['/allowed'])).toBe(
false,
);
expect(
validateImportPath('/allowed/sub/file.md', '/base', ['/allowed']),
).toBe(true);
});
it('should reject paths outside allowed directories', () => {
expect(
validateImportPath('/forbidden/file.md', '/base', ['/allowed']),
).toBe(false);
expect(validateImportPath('../../../file.md', '/base', ['/base'])).toBe(
false,
);
});
it('should handle multiple allowed directories', () => {
expect(
validateImportPath('./file.md', '/base', ['/allowed1', '/allowed2']),
).toBe(false);
expect(
validateImportPath('/allowed1/file.md', '/base', [
'/allowed1',
'/allowed2',
]),
).toBe(true);
expect(
validateImportPath('/allowed2/file.md', '/base', [
'/allowed1',
'/allowed2',
]),
).toBe(true);
});
it('should handle relative paths correctly', () => {
expect(validateImportPath('file.md', '/base', ['/base'])).toBe(true);
expect(validateImportPath('./file.md', '/base', ['/base'])).toBe(true);
expect(validateImportPath('../file.md', '/base', ['/parent'])).toBe(
false,
);
});
it('should handle absolute paths correctly', () => {
expect(
validateImportPath('/allowed/file.md', '/base', ['/allowed']),
).toBe(true);
expect(
validateImportPath('/forbidden/file.md', '/base', ['/allowed']),
).toBe(false);
});
});
});

View File

@@ -0,0 +1,214 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import * as fs from 'fs/promises';
import * as path from 'path';
// Simple console logger for import processing
const logger = {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
debug: (...args: any[]) =>
console.debug('[DEBUG] [ImportProcessor]', ...args),
// eslint-disable-next-line @typescript-eslint/no-explicit-any
warn: (...args: any[]) => console.warn('[WARN] [ImportProcessor]', ...args),
// eslint-disable-next-line @typescript-eslint/no-explicit-any
error: (...args: any[]) =>
console.error('[ERROR] [ImportProcessor]', ...args),
};
/**
* Interface for tracking import processing state to prevent circular imports
*/
interface ImportState {
processedFiles: Set<string>;
maxDepth: number;
currentDepth: number;
currentFile?: string; // Track the current file being processed
}
/**
* Processes import statements in GEMINI.md content
* Supports @path/to/file.md syntax for importing content from other files
*
* @param content - The content to process for imports
* @param basePath - The directory path where the current file is located
* @param debugMode - Whether to enable debug logging
* @param importState - State tracking for circular import prevention
* @returns Processed content with imports resolved
*/
export async function processImports(
content: string,
basePath: string,
debugMode: boolean = false,
importState: ImportState = {
processedFiles: new Set(),
maxDepth: 10,
currentDepth: 0,
},
): Promise<string> {
if (importState.currentDepth >= importState.maxDepth) {
if (debugMode) {
logger.warn(
`Maximum import depth (${importState.maxDepth}) reached. Stopping import processing.`,
);
}
return content;
}
// Regex to match @path/to/file imports (supports any file extension)
// Supports both @path/to/file.md and @./path/to/file.md syntax
const importRegex = /@([./]?[^\s\n]+\.[^\s\n]+)/g;
let processedContent = content;
let match: RegExpExecArray | null;
// Process all imports in the content
while ((match = importRegex.exec(content)) !== null) {
const importPath = match[1];
// Validate import path to prevent path traversal attacks
if (!validateImportPath(importPath, basePath, [basePath])) {
processedContent = processedContent.replace(
match[0],
`<!-- Import failed: ${importPath} - Path traversal attempt -->`,
);
continue;
}
// Check if the import is for a non-md file and warn
if (!importPath.endsWith('.md')) {
logger.warn(
`Import processor only supports .md files. Attempting to import non-md file: ${importPath}. This will fail.`,
);
// Replace the import with a warning comment
processedContent = processedContent.replace(
match[0],
`<!-- Import failed: ${importPath} - Only .md files are supported -->`,
);
continue;
}
const fullPath = path.resolve(basePath, importPath);
if (debugMode) {
logger.debug(`Processing import: ${importPath} -> ${fullPath}`);
}
// Check for circular imports - if we're already processing this file
if (importState.currentFile === fullPath) {
if (debugMode) {
logger.warn(`Circular import detected: ${importPath}`);
}
// Replace the import with a warning comment
processedContent = processedContent.replace(
match[0],
`<!-- Circular import detected: ${importPath} -->`,
);
continue;
}
// Check if we've already processed this file in this import chain
if (importState.processedFiles.has(fullPath)) {
if (debugMode) {
logger.warn(`File already processed in this chain: ${importPath}`);
}
// Replace the import with a warning comment
processedContent = processedContent.replace(
match[0],
`<!-- File already processed: ${importPath} -->`,
);
continue;
}
// Check for potential circular imports by looking at the import chain
if (importState.currentFile) {
const currentFileDir = path.dirname(importState.currentFile);
const potentialCircularPath = path.resolve(currentFileDir, importPath);
if (potentialCircularPath === importState.currentFile) {
if (debugMode) {
logger.warn(`Circular import detected: ${importPath}`);
}
// Replace the import with a warning comment
processedContent = processedContent.replace(
match[0],
`<!-- Circular import detected: ${importPath} -->`,
);
continue;
}
}
try {
// Check if the file exists
await fs.access(fullPath);
// Read the imported file content
const importedContent = await fs.readFile(fullPath, 'utf-8');
if (debugMode) {
logger.debug(`Successfully read imported file: ${fullPath}`);
}
// Recursively process imports in the imported content
const processedImportedContent = await processImports(
importedContent,
path.dirname(fullPath),
debugMode,
{
...importState,
processedFiles: new Set([...importState.processedFiles, fullPath]),
currentDepth: importState.currentDepth + 1,
currentFile: fullPath, // Set the current file being processed
},
);
// Replace the import statement with the processed content
processedContent = processedContent.replace(
match[0],
`<!-- Imported from: ${importPath} -->\n${processedImportedContent}\n<!-- End of import from: ${importPath} -->`,
);
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
if (debugMode) {
logger.error(`Failed to import ${importPath}: ${errorMessage}`);
}
// Replace the import with an error comment
processedContent = processedContent.replace(
match[0],
`<!-- Import failed: ${importPath} - ${errorMessage} -->`,
);
}
}
return processedContent;
}
/**
* Validates import paths to ensure they are safe and within allowed directories
*
* @param importPath - The import path to validate
* @param basePath - The base directory for resolving relative paths
* @param allowedDirectories - Array of allowed directory paths
* @returns Whether the import path is valid
*/
export function validateImportPath(
importPath: string,
basePath: string,
allowedDirectories: string[],
): boolean {
// Reject URLs
if (/^(file|https?):\/\//.test(importPath)) {
return false;
}
const resolvedPath = path.resolve(basePath, importPath);
return allowedDirectories.some((allowedDir) => {
const normalizedAllowedDir = path.resolve(allowedDir);
return resolvedPath.startsWith(normalizedAllowedDir);
});
}