feat: modular GEMINI.md imports with @file.md syntax (#1585) (#2230)

2025-12-19 09:33:53 +00:00 · 2025-06-30 04:21:47 +05:30
parent ada4061a45
commit f848d35758
5 changed files with 658 additions and 2 deletions
--- a/packages/core/src/utils/memoryDiscovery.ts
+++ b/packages/core/src/utils/memoryDiscovery.ts
@@ -14,6 +14,7 @@ import {
  getAllGeminiMdFilenames,
 } from '../tools/memoryTool.js';
 import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
+import { processImports } from './memoryImportProcessor.js';

 // Simple console logger, similar to the one previously in CLI's config.ts
 // TODO: Integrate with a more robust server-side logger if available/appropriate.
@@ -223,10 +224,18 @@ async function readGeminiMdFiles(
  for (const filePath of filePaths) {
    try {
      const content = await fs.readFile(filePath, 'utf-8');
-      results.push({ filePath, content });
+
+      // Process imports in the content
+      const processedContent = await processImports(
+        content,
+        path.dirname(filePath),
+        debugMode,
+      );
+
+      results.push({ filePath, content: processedContent });
      if (debugMode)
        logger.debug(
-          `Successfully read: ${filePath} (Length: ${content.length})`,
+          `Successfully read and processed imports: ${filePath} (Length: ${processedContent.length})`,
        );
    } catch (error: unknown) {
      const isTestEnv = process.env.NODE_ENV === 'test' || process.env.VITEST;
--- a/packages/core/src/utils/memoryImportProcessor.test.ts
+++ b/packages/core/src/utils/memoryImportProcessor.test.ts
@@ -0,0 +1,257 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import { processImports, validateImportPath } from './memoryImportProcessor.js';
+
+// Mock fs/promises
+vi.mock('fs/promises');
+const mockedFs = vi.mocked(fs);
+
+// Mock console methods to capture warnings
+const originalConsoleWarn = console.warn;
+const originalConsoleError = console.error;
+const originalConsoleDebug = console.debug;
+
+describe('memoryImportProcessor', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    // Mock console methods
+    console.warn = vi.fn();
+    console.error = vi.fn();
+    console.debug = vi.fn();
+  });
+
+  afterEach(() => {
+    // Restore console methods
+    console.warn = originalConsoleWarn;
+    console.error = originalConsoleError;
+    console.debug = originalConsoleDebug;
+  });
+
+  describe('processImports', () => {
+    it('should process basic md file imports', async () => {
+      const content = 'Some content @./test.md more content';
+      const basePath = '/test/path';
+      const importedContent = '# Imported Content\nThis is imported.';
+
+      mockedFs.access.mockResolvedValue(undefined);
+      mockedFs.readFile.mockResolvedValue(importedContent);
+
+      const result = await processImports(content, basePath, true);
+
+      expect(result).toContain('<!-- Imported from: ./test.md -->');
+      expect(result).toContain(importedContent);
+      expect(result).toContain('<!-- End of import from: ./test.md -->');
+      expect(mockedFs.readFile).toHaveBeenCalledWith(
+        path.resolve(basePath, './test.md'),
+        'utf-8',
+      );
+    });
+
+    it('should warn and fail for non-md file imports', async () => {
+      const content = 'Some content @./instructions.txt more content';
+      const basePath = '/test/path';
+
+      const result = await processImports(content, basePath, true);
+
+      expect(console.warn).toHaveBeenCalledWith(
+        '[WARN] [ImportProcessor]',
+        'Import processor only supports .md files. Attempting to import non-md file: ./instructions.txt. This will fail.',
+      );
+      expect(result).toContain(
+        '<!-- Import failed: ./instructions.txt - Only .md files are supported -->',
+      );
+      expect(mockedFs.readFile).not.toHaveBeenCalled();
+    });
+
+    it('should handle circular imports', async () => {
+      const content = 'Content @./circular.md more content';
+      const basePath = '/test/path';
+      const circularContent = 'Circular @./main.md content';
+
+      mockedFs.access.mockResolvedValue(undefined);
+      mockedFs.readFile.mockResolvedValue(circularContent);
+
+      // Set up the import state to simulate we're already processing main.md
+      const importState = {
+        processedFiles: new Set<string>(),
+        maxDepth: 10,
+        currentDepth: 0,
+        currentFile: '/test/path/main.md', // Simulate we're processing main.md
+      };
+
+      const result = await processImports(content, basePath, true, importState);
+
+      // The circular import should be detected when processing the nested import
+      expect(result).toContain('<!-- Circular import detected: ./main.md -->');
+    });
+
+    it('should handle file not found errors', async () => {
+      const content = 'Content @./nonexistent.md more content';
+      const basePath = '/test/path';
+
+      mockedFs.access.mockRejectedValue(new Error('File not found'));
+
+      const result = await processImports(content, basePath, true);
+
+      expect(result).toContain(
+        '<!-- Import failed: ./nonexistent.md - File not found -->',
+      );
+      expect(console.error).toHaveBeenCalledWith(
+        '[ERROR] [ImportProcessor]',
+        'Failed to import ./nonexistent.md: File not found',
+      );
+    });
+
+    it('should respect max depth limit', async () => {
+      const content = 'Content @./deep.md more content';
+      const basePath = '/test/path';
+      const deepContent = 'Deep @./deeper.md content';
+
+      mockedFs.access.mockResolvedValue(undefined);
+      mockedFs.readFile.mockResolvedValue(deepContent);
+
+      const importState = {
+        processedFiles: new Set<string>(),
+        maxDepth: 1,
+        currentDepth: 1,
+      };
+
+      const result = await processImports(content, basePath, true, importState);
+
+      expect(console.warn).toHaveBeenCalledWith(
+        '[WARN] [ImportProcessor]',
+        'Maximum import depth (1) reached. Stopping import processing.',
+      );
+      expect(result).toBe(content);
+    });
+
+    it('should handle nested imports recursively', async () => {
+      const content = 'Main @./nested.md content';
+      const basePath = '/test/path';
+      const nestedContent = 'Nested @./inner.md content';
+      const innerContent = 'Inner content';
+
+      mockedFs.access.mockResolvedValue(undefined);
+      mockedFs.readFile
+        .mockResolvedValueOnce(nestedContent)
+        .mockResolvedValueOnce(innerContent);
+
+      const result = await processImports(content, basePath, true);
+
+      expect(result).toContain('<!-- Imported from: ./nested.md -->');
+      expect(result).toContain('<!-- Imported from: ./inner.md -->');
+      expect(result).toContain(innerContent);
+    });
+
+    it('should handle absolute paths in imports', async () => {
+      const content = 'Content @/absolute/path/file.md more content';
+      const basePath = '/test/path';
+      const importedContent = 'Absolute path content';
+
+      mockedFs.access.mockResolvedValue(undefined);
+      mockedFs.readFile.mockResolvedValue(importedContent);
+
+      const result = await processImports(content, basePath, true);
+
+      expect(result).toContain(
+        '<!-- Import failed: /absolute/path/file.md - Path traversal attempt -->',
+      );
+    });
+
+    it('should handle multiple imports in same content', async () => {
+      const content = 'Start @./first.md middle @./second.md end';
+      const basePath = '/test/path';
+      const firstContent = 'First content';
+      const secondContent = 'Second content';
+
+      mockedFs.access.mockResolvedValue(undefined);
+      mockedFs.readFile
+        .mockResolvedValueOnce(firstContent)
+        .mockResolvedValueOnce(secondContent);
+
+      const result = await processImports(content, basePath, true);
+
+      expect(result).toContain('<!-- Imported from: ./first.md -->');
+      expect(result).toContain('<!-- Imported from: ./second.md -->');
+      expect(result).toContain(firstContent);
+      expect(result).toContain(secondContent);
+    });
+  });
+
+  describe('validateImportPath', () => {
+    it('should reject URLs', () => {
+      expect(
+        validateImportPath('https://example.com/file.md', '/base', [
+          '/allowed',
+        ]),
+      ).toBe(false);
+      expect(
+        validateImportPath('http://example.com/file.md', '/base', ['/allowed']),
+      ).toBe(false);
+      expect(
+        validateImportPath('file:///path/to/file.md', '/base', ['/allowed']),
+      ).toBe(false);
+    });
+
+    it('should allow paths within allowed directories', () => {
+      expect(validateImportPath('./file.md', '/base', ['/base'])).toBe(true);
+      expect(validateImportPath('../file.md', '/base', ['/allowed'])).toBe(
+        false,
+      );
+      expect(
+        validateImportPath('/allowed/sub/file.md', '/base', ['/allowed']),
+      ).toBe(true);
+    });
+
+    it('should reject paths outside allowed directories', () => {
+      expect(
+        validateImportPath('/forbidden/file.md', '/base', ['/allowed']),
+      ).toBe(false);
+      expect(validateImportPath('../../../file.md', '/base', ['/base'])).toBe(
+        false,
+      );
+    });
+
+    it('should handle multiple allowed directories', () => {
+      expect(
+        validateImportPath('./file.md', '/base', ['/allowed1', '/allowed2']),
+      ).toBe(false);
+      expect(
+        validateImportPath('/allowed1/file.md', '/base', [
+          '/allowed1',
+          '/allowed2',
+        ]),
+      ).toBe(true);
+      expect(
+        validateImportPath('/allowed2/file.md', '/base', [
+          '/allowed1',
+          '/allowed2',
+        ]),
+      ).toBe(true);
+    });
+
+    it('should handle relative paths correctly', () => {
+      expect(validateImportPath('file.md', '/base', ['/base'])).toBe(true);
+      expect(validateImportPath('./file.md', '/base', ['/base'])).toBe(true);
+      expect(validateImportPath('../file.md', '/base', ['/parent'])).toBe(
+        false,
+      );
+    });
+
+    it('should handle absolute paths correctly', () => {
+      expect(
+        validateImportPath('/allowed/file.md', '/base', ['/allowed']),
+      ).toBe(true);
+      expect(
+        validateImportPath('/forbidden/file.md', '/base', ['/allowed']),
+      ).toBe(false);
+    });
+  });
+});
--- a/packages/core/src/utils/memoryImportProcessor.ts
+++ b/packages/core/src/utils/memoryImportProcessor.ts
@@ -0,0 +1,214 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import * as fs from 'fs/promises';
+import * as path from 'path';
+
+// Simple console logger for import processing
+const logger = {
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  debug: (...args: any[]) =>
+    console.debug('[DEBUG] [ImportProcessor]', ...args),
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  warn: (...args: any[]) => console.warn('[WARN] [ImportProcessor]', ...args),
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  error: (...args: any[]) =>
+    console.error('[ERROR] [ImportProcessor]', ...args),
+};
+
+/**
+ * Interface for tracking import processing state to prevent circular imports
+ */
+interface ImportState {
+  processedFiles: Set<string>;
+  maxDepth: number;
+  currentDepth: number;
+  currentFile?: string; // Track the current file being processed
+}
+
+/**
+ * Processes import statements in GEMINI.md content
+ * Supports @path/to/file.md syntax for importing content from other files
+ *
+ * @param content - The content to process for imports
+ * @param basePath - The directory path where the current file is located
+ * @param debugMode - Whether to enable debug logging
+ * @param importState - State tracking for circular import prevention
+ * @returns Processed content with imports resolved
+ */
+export async function processImports(
+  content: string,
+  basePath: string,
+  debugMode: boolean = false,
+  importState: ImportState = {
+    processedFiles: new Set(),
+    maxDepth: 10,
+    currentDepth: 0,
+  },
+): Promise<string> {
+  if (importState.currentDepth >= importState.maxDepth) {
+    if (debugMode) {
+      logger.warn(
+        `Maximum import depth (${importState.maxDepth}) reached. Stopping import processing.`,
+      );
+    }
+    return content;
+  }
+
+  // Regex to match @path/to/file imports (supports any file extension)
+  // Supports both @path/to/file.md and @./path/to/file.md syntax
+  const importRegex = /@([./]?[^\s\n]+\.[^\s\n]+)/g;
+
+  let processedContent = content;
+  let match: RegExpExecArray | null;
+
+  // Process all imports in the content
+  while ((match = importRegex.exec(content)) !== null) {
+    const importPath = match[1];
+
+    // Validate import path to prevent path traversal attacks
+    if (!validateImportPath(importPath, basePath, [basePath])) {
+      processedContent = processedContent.replace(
+        match[0],
+        `<!-- Import failed: ${importPath} - Path traversal attempt -->`,
+      );
+      continue;
+    }
+
+    // Check if the import is for a non-md file and warn
+    if (!importPath.endsWith('.md')) {
+      logger.warn(
+        `Import processor only supports .md files. Attempting to import non-md file: ${importPath}. This will fail.`,
+      );
+      // Replace the import with a warning comment
+      processedContent = processedContent.replace(
+        match[0],
+        `<!-- Import failed: ${importPath} - Only .md files are supported -->`,
+      );
+      continue;
+    }
+
+    const fullPath = path.resolve(basePath, importPath);
+
+    if (debugMode) {
+      logger.debug(`Processing import: ${importPath} -> ${fullPath}`);
+    }
+
+    // Check for circular imports - if we're already processing this file
+    if (importState.currentFile === fullPath) {
+      if (debugMode) {
+        logger.warn(`Circular import detected: ${importPath}`);
+      }
+      // Replace the import with a warning comment
+      processedContent = processedContent.replace(
+        match[0],
+        `<!-- Circular import detected: ${importPath} -->`,
+      );
+      continue;
+    }
+
+    // Check if we've already processed this file in this import chain
+    if (importState.processedFiles.has(fullPath)) {
+      if (debugMode) {
+        logger.warn(`File already processed in this chain: ${importPath}`);
+      }
+      // Replace the import with a warning comment
+      processedContent = processedContent.replace(
+        match[0],
+        `<!-- File already processed: ${importPath} -->`,
+      );
+      continue;
+    }
+
+    // Check for potential circular imports by looking at the import chain
+    if (importState.currentFile) {
+      const currentFileDir = path.dirname(importState.currentFile);
+      const potentialCircularPath = path.resolve(currentFileDir, importPath);
+      if (potentialCircularPath === importState.currentFile) {
+        if (debugMode) {
+          logger.warn(`Circular import detected: ${importPath}`);
+        }
+        // Replace the import with a warning comment
+        processedContent = processedContent.replace(
+          match[0],
+          `<!-- Circular import detected: ${importPath} -->`,
+        );
+        continue;
+      }
+    }
+
+    try {
+      // Check if the file exists
+      await fs.access(fullPath);
+
+      // Read the imported file content
+      const importedContent = await fs.readFile(fullPath, 'utf-8');
+
+      if (debugMode) {
+        logger.debug(`Successfully read imported file: ${fullPath}`);
+      }
+
+      // Recursively process imports in the imported content
+      const processedImportedContent = await processImports(
+        importedContent,
+        path.dirname(fullPath),
+        debugMode,
+        {
+          ...importState,
+          processedFiles: new Set([...importState.processedFiles, fullPath]),
+          currentDepth: importState.currentDepth + 1,
+          currentFile: fullPath, // Set the current file being processed
+        },
+      );
+
+      // Replace the import statement with the processed content
+      processedContent = processedContent.replace(
+        match[0],
+        `<!-- Imported from: ${importPath} -->\n${processedImportedContent}\n<!-- End of import from: ${importPath} -->`,
+      );
+    } catch (error) {
+      const errorMessage =
+        error instanceof Error ? error.message : String(error);
+      if (debugMode) {
+        logger.error(`Failed to import ${importPath}: ${errorMessage}`);
+      }
+
+      // Replace the import with an error comment
+      processedContent = processedContent.replace(
+        match[0],
+        `<!-- Import failed: ${importPath} - ${errorMessage} -->`,
+      );
+    }
+  }
+
+  return processedContent;
+}
+
+/**
+ * Validates import paths to ensure they are safe and within allowed directories
+ *
+ * @param importPath - The import path to validate
+ * @param basePath - The base directory for resolving relative paths
+ * @param allowedDirectories - Array of allowed directory paths
+ * @returns Whether the import path is valid
+ */
+export function validateImportPath(
+  importPath: string,
+  basePath: string,
+  allowedDirectories: string[],
+): boolean {
+  // Reject URLs
+  if (/^(file|https?):\/\//.test(importPath)) {
+    return false;
+  }
+
+  const resolvedPath = path.resolve(basePath, importPath);
+
+  return allowedDirectories.some((allowedDir) => {
+    const normalizedAllowedDir = path.resolve(allowedDir);
+    return resolvedPath.startsWith(normalizedAllowedDir);
+  });
+}