diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index 917c7446..87afca43 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -4,11 +4,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { describe, it, expect, vi, beforeEach, afterEach, Mock } from 'vitest'; import * as os from 'os'; import * as path from 'path'; import { ShellTool, EditTool, WriteFileTool } from '@google/gemini-cli-core'; -import { loadCliConfig, parseArguments } from './config.js'; +import { loadCliConfig, parseArguments, CliArgs } from './config.js'; import { Settings } from './settings.js'; import { Extension } from './extension.js'; import * as ServerConfig from '@google/gemini-cli-core'; @@ -637,6 +637,7 @@ describe('Hierarchical Memory Loading (config.ts) - Placeholder Suite', () => { const settings: Settings = {}; const extensions: Extension[] = [ { + path: '/path/to/ext1', config: { name: 'ext1', version: '1.0.0', @@ -644,6 +645,7 @@ describe('Hierarchical Memory Loading (config.ts) - Placeholder Suite', () => { contextFiles: ['/path/to/ext1/GEMINI.md'], }, { + path: '/path/to/ext2', config: { name: 'ext2', version: '1.0.0', @@ -651,6 +653,7 @@ describe('Hierarchical Memory Loading (config.ts) - Placeholder Suite', () => { contextFiles: [], }, { + path: '/path/to/ext3', config: { name: 'ext3', version: '1.0.0', @@ -716,6 +719,7 @@ describe('mergeMcpServers', () => { }; const extensions: Extension[] = [ { + path: '/path/to/ext1', config: { name: 'ext1', version: '1.0.0', @@ -752,6 +756,7 @@ describe('mergeExcludeTools', () => { const settings: Settings = { excludeTools: ['tool1', 'tool2'] }; const extensions: Extension[] = [ { + path: '/path/to/ext1', config: { name: 'ext1', version: '1.0.0', @@ -760,6 +765,7 @@ describe('mergeExcludeTools', () => { contextFiles: [], }, { + path: '/path/to/ext2', config: { name: 'ext2', version: '1.0.0', @@ -786,6 +792,7 @@ describe('mergeExcludeTools', () => { const settings: Settings = { excludeTools: ['tool1', 'tool2'] }; const extensions: Extension[] = [ { + path: '/path/to/ext1', config: { name: 'ext1', version: '1.0.0', @@ -812,6 +819,7 @@ describe('mergeExcludeTools', () => { const settings: Settings = { excludeTools: ['tool1'] }; const extensions: Extension[] = [ { + path: '/path/to/ext1', config: { name: 'ext1', version: '1.0.0', @@ -820,6 +828,7 @@ describe('mergeExcludeTools', () => { contextFiles: [], }, { + path: '/path/to/ext2', config: { name: 'ext2', version: '1.0.0', @@ -893,6 +902,7 @@ describe('mergeExcludeTools', () => { const settings: Settings = {}; const extensions: Extension[] = [ { + path: '/path/to/ext', config: { name: 'ext1', version: '1.0.0', @@ -919,6 +929,7 @@ describe('mergeExcludeTools', () => { const settings: Settings = { excludeTools: ['tool1'] }; const extensions: Extension[] = [ { + path: '/path/to/ext', config: { name: 'ext1', version: '1.0.0', @@ -1133,7 +1144,12 @@ describe('Approval mode tool exclusion logic', () => { const extensions: Extension[] = []; await expect( - loadCliConfig(settings, extensions, 'test-session', invalidArgv), + loadCliConfig( + settings, + extensions, + 'test-session', + invalidArgv as CliArgs, + ), ).rejects.toThrow( 'Invalid approval mode: invalid_mode. Valid values are: yolo, auto_edit, default', ); @@ -1288,10 +1304,12 @@ describe('loadCliConfig with allowed-mcp-server-names', () => { describe('loadCliConfig extensions', () => { const mockExtensions: Extension[] = [ { + path: '/path/to/ext1', config: { name: 'ext1', version: '1.0.0' }, contextFiles: ['/path/to/ext1.md'], }, { + path: '/path/to/ext2', config: { name: 'ext2', version: '1.0.0' }, contextFiles: ['/path/to/ext2.md'], }, @@ -1894,14 +1912,12 @@ describe('loadCliConfig trustedFolder', () => { description, } of testCases) { it(`should be correct for: ${description}`, async () => { - (isWorkspaceTrusted as vi.Mock).mockImplementation( - (settings: Settings) => { - const featureIsEnabled = - (settings.folderTrustFeature ?? false) && - (settings.folderTrust ?? true); - return featureIsEnabled ? mockTrustValue : true; - }, - ); + (isWorkspaceTrusted as Mock).mockImplementation((settings: Settings) => { + const featureIsEnabled = + (settings.folderTrustFeature ?? false) && + (settings.folderTrust ?? true); + return featureIsEnabled ? mockTrustValue : true; + }); const argv = await parseArguments(); const settings: Settings = { folderTrustFeature, folderTrust }; const config = await loadCliConfig(settings, [], 'test-session', argv); diff --git a/packages/cli/src/ui/hooks/atCommandProcessor.test.ts b/packages/cli/src/ui/hooks/atCommandProcessor.test.ts index a0f82bcb..b83cbde3 100644 --- a/packages/cli/src/ui/hooks/atCommandProcessor.test.ts +++ b/packages/cli/src/ui/hooks/atCommandProcessor.test.ts @@ -13,6 +13,8 @@ import { ReadManyFilesTool, StandardFileSystemService, ToolRegistry, + COMMON_IGNORE_PATTERNS, + DEFAULT_FILE_EXCLUDES, } from '@google/gemini-cli-core'; import * as os from 'os'; import { ToolCallStatus } from '../types.js'; @@ -69,6 +71,13 @@ describe('handleAtCommand', () => { getPromptsByServer: () => [], }), getDebugMode: () => false, + getFileExclusions: () => ({ + getCoreIgnorePatterns: () => COMMON_IGNORE_PATTERNS, + getDefaultExcludePatterns: () => DEFAULT_FILE_EXCLUDES, + getGlobExcludes: () => COMMON_IGNORE_PATTERNS, + buildExcludePatterns: () => DEFAULT_FILE_EXCLUDES, + getReadManyFilesExcludes: () => DEFAULT_FILE_EXCLUDES, + }), getUsageStatisticsEnabled: () => false, } as unknown as Config; diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 1219fd3d..9e197dd1 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -54,6 +54,7 @@ import { IdeConnectionEvent, IdeConnectionType } from '../telemetry/types.js'; export type { MCPOAuthConfig }; import { WorkspaceContext } from '../utils/workspaceContext.js'; import { Storage } from './storage.js'; +import { FileExclusions } from '../utils/ignorePatterns.js'; export enum ApprovalMode { DEFAULT = 'default', @@ -277,6 +278,7 @@ export class Config { private readonly enablePromptCompletion: boolean = false; private initialized: boolean = false; readonly storage: Storage; + private readonly fileExclusions: FileExclusions; constructor(params: ConfigParameters) { this.sessionId = params.sessionId; @@ -349,6 +351,7 @@ export class Config { this.skipNextSpeakerCheck = params.skipNextSpeakerCheck ?? false; this.storage = new Storage(this.targetDir); this.enablePromptCompletion = params.enablePromptCompletion ?? false; + this.fileExclusions = new FileExclusions(this); if (params.contextFileName) { setGeminiMdFilename(params.contextFileName); @@ -621,6 +624,21 @@ export class Config { }; } + /** + * Gets custom file exclusion patterns from configuration. + * TODO: This is a placeholder implementation. In the future, this could + * read from settings files, CLI arguments, or environment variables. + */ + getCustomExcludes(): string[] { + // Placeholder implementation - returns empty array for now + // Future implementation could read from: + // - User settings file + // - Project-specific configuration + // - Environment variables + // - CLI arguments + return []; + } + getCheckpointingEnabled(): boolean { return this.checkpointing; } @@ -766,6 +784,10 @@ export class Config { return this.gitService; } + getFileExclusions(): FileExclusions { + return this.fileExclusions; + } + async createToolRegistry(): Promise { const registry = new ToolRegistry(this); diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 855b36fc..40e6b6e0 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -44,6 +44,7 @@ export * from './utils/formatters.js'; export * from './utils/generateContentResponseUtilities.js'; export * from './utils/filesearch/fileSearch.js'; export * from './utils/errorParsing.js'; +export * from './utils/ignorePatterns.js'; // Export services export * from './services/fileDiscoveryService.js'; diff --git a/packages/core/src/tools/glob.test.ts b/packages/core/src/tools/glob.test.ts index 905c5776..99fed36d 100644 --- a/packages/core/src/tools/glob.test.ts +++ b/packages/core/src/tools/glob.test.ts @@ -29,6 +29,9 @@ describe('GlobTool', () => { getFileFilteringRespectGitIgnore: () => true, getTargetDir: () => tempRootDir, getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir), + getFileExclusions: () => ({ + getGlobExcludes: () => [], + }), } as unknown as Config; beforeEach(async () => { diff --git a/packages/core/src/tools/glob.ts b/packages/core/src/tools/glob.ts index 2bd26fbd..e107dd1c 100644 --- a/packages/core/src/tools/glob.ts +++ b/packages/core/src/tools/glob.ts @@ -155,7 +155,7 @@ class GlobToolInvocation extends BaseToolInvocation< stat: true, nocase: !this.params.case_sensitive, dot: true, - ignore: ['**/node_modules/**', '**/.git/**'], + ignore: this.config.getFileExclusions().getGlobExcludes(), follow: false, signal, })) as GlobPath[]; diff --git a/packages/core/src/tools/grep.test.ts b/packages/core/src/tools/grep.test.ts index a152c88c..bebecf78 100644 --- a/packages/core/src/tools/grep.test.ts +++ b/packages/core/src/tools/grep.test.ts @@ -39,6 +39,9 @@ describe('GrepTool', () => { const mockConfig = { getTargetDir: () => tempRootDir, getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir), + getFileExclusions: () => ({ + getGlobExcludes: () => [], + }), } as unknown as Config; beforeEach(async () => { @@ -258,6 +261,9 @@ describe('GrepTool', () => { getTargetDir: () => tempRootDir, getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir, [secondDir]), + getFileExclusions: () => ({ + getGlobExcludes: () => [], + }), } as unknown as Config; const multiDirGrepTool = new GrepTool(multiDirConfig); @@ -308,6 +314,9 @@ describe('GrepTool', () => { getTargetDir: () => tempRootDir, getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir, [secondDir]), + getFileExclusions: () => ({ + getGlobExcludes: () => [], + }), } as unknown as Config; const multiDirGrepTool = new GrepTool(multiDirConfig); @@ -367,6 +376,9 @@ describe('GrepTool', () => { getTargetDir: () => tempRootDir, getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir, ['/another/dir']), + getFileExclusions: () => ({ + getGlobExcludes: () => [], + }), } as unknown as Config; const multiDirGrepTool = new GrepTool(multiDirConfig); diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index 4a0b8af4..9d7646c3 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -21,6 +21,7 @@ import { makeRelative, shortenPath } from '../utils/paths.js'; import { getErrorMessage, isNodeError } from '../utils/errors.js'; import { isGitRepository } from '../utils/gitUtils.js'; import { Config } from '../config/config.js'; +import { FileExclusions } from '../utils/ignorePatterns.js'; import { ToolErrorType } from './tool-error.js'; // --- Interfaces --- @@ -58,11 +59,14 @@ class GrepToolInvocation extends BaseToolInvocation< GrepToolParams, ToolResult > { + private readonly fileExclusions: FileExclusions; + constructor( private readonly config: Config, params: GrepToolParams, ) { super(params); + this.fileExclusions = config.getFileExclusions(); } /** @@ -281,7 +285,6 @@ class GrepToolInvocation extends BaseToolInvocation< /** * Gets a description of the grep operation - * @param params Parameters for the grep operation * @returns A string describing the grep */ getDescription(): string { @@ -391,7 +394,27 @@ class GrepToolInvocation extends BaseToolInvocation< if (grepAvailable) { strategyUsed = 'system grep'; const grepArgs = ['-r', '-n', '-H', '-E']; - const commonExcludes = ['.git', 'node_modules', 'bower_components']; + // Extract directory names from exclusion patterns for grep --exclude-dir + const globExcludes = this.fileExclusions.getGlobExcludes(); + const commonExcludes = globExcludes + .map((pattern) => { + let dir = pattern; + if (dir.startsWith('**/')) { + dir = dir.substring(3); + } + if (dir.endsWith('/**')) { + dir = dir.slice(0, -3); + } else if (dir.endsWith('/')) { + dir = dir.slice(0, -1); + } + + // Only consider patterns that are likely directories. This filters out file patterns. + if (dir && !dir.includes('/') && !dir.includes('*')) { + return dir; + } + return null; + }) + .filter((dir): dir is string => !!dir); commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`)); if (include) { grepArgs.push(`--include=${include}`); @@ -474,13 +497,7 @@ class GrepToolInvocation extends BaseToolInvocation< ); strategyUsed = 'javascript fallback'; const globPattern = include ? include : '**/*'; - const ignorePatterns = [ - '.git/**', - 'node_modules/**', - 'bower_components/**', - '.svn/**', - '.hg/**', - ]; // Use glob patterns for ignores here + const ignorePatterns = this.fileExclusions.getGlobExcludes(); const filesStream = globStream(globPattern, { cwd: absolutePath, diff --git a/packages/core/src/tools/read-many-files.test.ts b/packages/core/src/tools/read-many-files.test.ts index 3145dd26..afe1c5ed 100644 --- a/packages/core/src/tools/read-many-files.test.ts +++ b/packages/core/src/tools/read-many-files.test.ts @@ -16,6 +16,10 @@ import { Config } from '../config/config.js'; import { WorkspaceContext } from '../utils/workspaceContext.js'; import { StandardFileSystemService } from '../services/fileSystemService.js'; import { ToolErrorType } from './tool-error.js'; +import { + COMMON_IGNORE_PATTERNS, + DEFAULT_FILE_EXCLUDES, +} from '../utils/ignorePatterns.js'; import * as glob from 'glob'; vi.mock('glob', { spy: true }); @@ -77,6 +81,13 @@ describe('ReadManyFilesTool', () => { getTargetDir: () => tempRootDir, getWorkspaceDirs: () => [tempRootDir], getWorkspaceContext: () => new WorkspaceContext(tempRootDir), + getFileExclusions: () => ({ + getCoreIgnorePatterns: () => COMMON_IGNORE_PATTERNS, + getDefaultExcludePatterns: () => DEFAULT_FILE_EXCLUDES, + getGlobExcludes: () => COMMON_IGNORE_PATTERNS, + buildExcludePatterns: () => DEFAULT_FILE_EXCLUDES, + getReadManyFilesExcludes: () => DEFAULT_FILE_EXCLUDES, + }), } as Partial as Config; tool = new ReadManyFilesTool(mockConfig); @@ -484,6 +495,13 @@ describe('ReadManyFilesTool', () => { }), getWorkspaceContext: () => new WorkspaceContext(tempDir1, [tempDir2]), getTargetDir: () => tempDir1, + getFileExclusions: () => ({ + getCoreIgnorePatterns: () => COMMON_IGNORE_PATTERNS, + getDefaultExcludePatterns: () => [], + getGlobExcludes: () => COMMON_IGNORE_PATTERNS, + buildExcludePatterns: () => [], + getReadManyFilesExcludes: () => [], + }), } as Partial as Config; tool = new ReadManyFilesTool(mockConfig); diff --git a/packages/core/src/tools/read-many-files.ts b/packages/core/src/tools/read-many-files.ts index 9f3a1161..ad697017 100644 --- a/packages/core/src/tools/read-many-files.ts +++ b/packages/core/src/tools/read-many-files.ts @@ -15,7 +15,6 @@ import { getErrorMessage } from '../utils/errors.js'; import * as fs from 'fs'; import * as path from 'path'; import { glob, escape } from 'glob'; -import { getCurrentGeminiMdFilename } from './memoryTool.js'; import { detectFileType, processSingleFileContent, @@ -98,49 +97,13 @@ type FileProcessingResult = }; /** - * Default exclusion patterns for commonly ignored directories and binary file types. - * These are compatible with glob ignore patterns. + * Creates the default exclusion patterns including dynamic patterns. + * This combines the shared patterns with dynamic patterns like GEMINI.md. * TODO(adh): Consider making this configurable or extendable through a command line argument. - * TODO(adh): Look into sharing this list with the glob tool. */ -const DEFAULT_EXCLUDES: string[] = [ - '**/node_modules/**', - '**/.git/**', - '**/.vscode/**', - '**/.idea/**', - '**/dist/**', - '**/build/**', - '**/coverage/**', - '**/__pycache__/**', - '**/*.pyc', - '**/*.pyo', - '**/*.bin', - '**/*.exe', - '**/*.dll', - '**/*.so', - '**/*.dylib', - '**/*.class', - '**/*.jar', - '**/*.war', - '**/*.zip', - '**/*.tar', - '**/*.gz', - '**/*.bz2', - '**/*.rar', - '**/*.7z', - '**/*.doc', - '**/*.docx', - '**/*.xls', - '**/*.xlsx', - '**/*.ppt', - '**/*.pptx', - '**/*.odt', - '**/*.ods', - '**/*.odp', - '**/*.DS_Store', - '**/.env', - `**/${getCurrentGeminiMdFilename()}`, -]; +function getDefaultExcludes(config?: Config): string[] { + return config?.getFileExclusions().getReadManyFilesExcludes() ?? []; +} const DEFAULT_OUTPUT_SEPARATOR_FORMAT = '--- {filePath} ---'; const DEFAULT_OUTPUT_TERMINATOR = '\n--- End of content ---'; @@ -172,7 +135,11 @@ ${this.config.getTargetDir()} .getGeminiIgnorePatterns(); const finalExclusionPatternsForDescription: string[] = paramUseDefaultExcludes - ? [...DEFAULT_EXCLUDES, ...paramExcludes, ...geminiIgnorePatterns] + ? [ + ...getDefaultExcludes(this.config), + ...paramExcludes, + ...geminiIgnorePatterns, + ] : [...paramExcludes, ...geminiIgnorePatterns]; let excludeDesc = `Excluding: ${ @@ -230,7 +197,7 @@ ${finalExclusionPatternsForDescription const contentParts: PartListUnion = []; const effectiveExcludes = useDefaultExcludes - ? [...DEFAULT_EXCLUDES, ...exclude] + ? [...getDefaultExcludes(this.config), ...exclude] : [...exclude]; const searchPatterns = [...inputPatterns, ...include]; diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index f0b491ed..07aa2b67 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -10,6 +10,7 @@ import { PartUnion } from '@google/genai'; import mime from 'mime-types'; import { FileSystemService } from '../services/fileSystemService.js'; import { ToolErrorType } from '../tools/tool-error.js'; +import { BINARY_EXTENSIONS } from './ignorePatterns.js'; // Constants for text file processing const DEFAULT_MAX_LINES_TEXT_FILE = 2000; @@ -153,38 +154,7 @@ export async function detectFileType( // Stricter binary check for common non-text extensions before content check // These are often not well-covered by mime-types or might be misidentified. - if ( - [ - '.zip', - '.tar', - '.gz', - '.exe', - '.dll', - '.so', - '.class', - '.jar', - '.war', - '.7z', - '.doc', - '.docx', - '.xls', - '.xlsx', - '.ppt', - '.pptx', - '.odt', - '.ods', - '.odp', - '.bin', - '.dat', - '.obj', - '.o', - '.a', - '.lib', - '.wasm', - '.pyc', - '.pyo', - ].includes(ext) - ) { + if (BINARY_EXTENSIONS.includes(ext)) { return 'binary'; } diff --git a/packages/core/src/utils/ignorePatterns.test.ts b/packages/core/src/utils/ignorePatterns.test.ts new file mode 100644 index 00000000..96822379 --- /dev/null +++ b/packages/core/src/utils/ignorePatterns.test.ts @@ -0,0 +1,319 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi } from 'vitest'; +import { + FileExclusions, + BINARY_EXTENSIONS, + extractExtensionsFromPatterns, +} from './ignorePatterns.js'; +import { Config } from '../config/config.js'; + +// Mock the memoryTool module +vi.mock('../tools/memoryTool.js', () => ({ + getCurrentGeminiMdFilename: vi.fn(() => 'GEMINI.md'), +})); + +describe('FileExclusions', () => { + describe('getCoreIgnorePatterns', () => { + it('should return basic ignore patterns', () => { + const excluder = new FileExclusions(); + const patterns = excluder.getCoreIgnorePatterns(); + + expect(patterns).toContain('**/node_modules/**'); + expect(patterns).toContain('**/.git/**'); + expect(patterns).toContain('**/bower_components/**'); + expect(patterns).toContain('**/.svn/**'); + expect(patterns).toContain('**/.hg/**'); + expect(patterns).toHaveLength(5); + }); + }); + + describe('getDefaultExcludePatterns', () => { + it('should return comprehensive patterns by default', () => { + const excluder = new FileExclusions(); + const patterns = excluder.getDefaultExcludePatterns(); + + // Should include core patterns + expect(patterns).toContain('**/node_modules/**'); + expect(patterns).toContain('**/.git/**'); + + // Should include directory excludes + expect(patterns).toContain('**/.vscode/**'); + expect(patterns).toContain('**/dist/**'); + expect(patterns).toContain('**/build/**'); + + // Should include binary patterns + expect(patterns).toContain('**/*.exe'); + expect(patterns).toContain('**/*.jar'); + + // Should include system files + expect(patterns).toContain('**/.DS_Store'); + expect(patterns).toContain('**/.env'); + + // Should include dynamic patterns + expect(patterns).toContain('**/GEMINI.md'); + }); + + it('should respect includeDefaults option', () => { + const excluder = new FileExclusions(); + const patterns = excluder.getDefaultExcludePatterns({ + includeDefaults: false, + includeDynamicPatterns: false, + }); + + expect(patterns).not.toContain('**/node_modules/**'); + expect(patterns).not.toContain('**/.git/**'); + expect(patterns).not.toContain('**/GEMINI.md'); + expect(patterns).toHaveLength(0); + }); + + it('should include custom patterns', () => { + const excluder = new FileExclusions(); + const patterns = excluder.getDefaultExcludePatterns({ + customPatterns: ['**/custom/**', '**/*.custom'], + }); + + expect(patterns).toContain('**/custom/**'); + expect(patterns).toContain('**/*.custom'); + }); + + it('should include runtime patterns', () => { + const excluder = new FileExclusions(); + const patterns = excluder.getDefaultExcludePatterns({ + runtimePatterns: ['**/temp/**', '**/*.tmp'], + }); + + expect(patterns).toContain('**/temp/**'); + expect(patterns).toContain('**/*.tmp'); + }); + + it('should respect includeDynamicPatterns option', () => { + const excluder = new FileExclusions(); + const patternsWithDynamic = excluder.getDefaultExcludePatterns({ + includeDynamicPatterns: true, + }); + const patternsWithoutDynamic = excluder.getDefaultExcludePatterns({ + includeDynamicPatterns: false, + }); + + expect(patternsWithDynamic).toContain('**/GEMINI.md'); + expect(patternsWithoutDynamic).not.toContain('**/GEMINI.md'); + }); + }); + + describe('getReadManyFilesExcludes', () => { + it('should provide legacy compatibility', () => { + const excluder = new FileExclusions(); + const patterns = excluder.getReadManyFilesExcludes(['**/*.log']); + + // Should include all default patterns + expect(patterns).toContain('**/node_modules/**'); + expect(patterns).toContain('**/.git/**'); + expect(patterns).toContain('**/GEMINI.md'); + + // Should include additional excludes + expect(patterns).toContain('**/*.log'); + }); + }); + + describe('getGlobExcludes', () => { + it('should return core patterns for glob operations', () => { + const excluder = new FileExclusions(); + const patterns = excluder.getGlobExcludes(); + + expect(patterns).toContain('**/node_modules/**'); + expect(patterns).toContain('**/.git/**'); + expect(patterns).toContain('**/bower_components/**'); + expect(patterns).toContain('**/.svn/**'); + expect(patterns).toContain('**/.hg/**'); + + // Should not include comprehensive patterns by default + expect(patterns).toHaveLength(5); + }); + + it('should include additional excludes', () => { + const excluder = new FileExclusions(); + const patterns = excluder.getGlobExcludes(['**/temp/**']); + + expect(patterns).toContain('**/node_modules/**'); + expect(patterns).toContain('**/.git/**'); + expect(patterns).toContain('**/temp/**'); + }); + }); + + describe('with Config', () => { + it('should use config custom excludes when available', () => { + const mockConfig = { + getCustomExcludes: vi.fn(() => ['**/config-exclude/**']), + } as unknown as Config; + + const excluder = new FileExclusions(mockConfig); + const patterns = excluder.getDefaultExcludePatterns(); + + expect(patterns).toContain('**/config-exclude/**'); + expect(mockConfig.getCustomExcludes).toHaveBeenCalled(); + }); + + it('should handle config without getCustomExcludes method', () => { + const mockConfig = {} as Config; + + const excluder = new FileExclusions(mockConfig); + const patterns = excluder.getDefaultExcludePatterns(); + + // Should not throw and should include default patterns + expect(patterns).toContain('**/node_modules/**'); + expect(patterns.length).toBeGreaterThan(0); + }); + + it('should include config custom excludes in glob patterns', () => { + const mockConfig = { + getCustomExcludes: vi.fn(() => ['**/config-glob/**']), + } as unknown as Config; + + const excluder = new FileExclusions(mockConfig); + const patterns = excluder.getGlobExcludes(); + + expect(patterns).toContain('**/node_modules/**'); + expect(patterns).toContain('**/.git/**'); + expect(patterns).toContain('**/config-glob/**'); + }); + }); + + describe('buildExcludePatterns', () => { + it('should be an alias for getDefaultExcludePatterns', () => { + const excluder = new FileExclusions(); + const options = { + includeDefaults: true, + customPatterns: ['**/test/**'], + runtimePatterns: ['**/runtime/**'], + }; + + const defaultPatterns = excluder.getDefaultExcludePatterns(options); + const buildPatterns = excluder.buildExcludePatterns(options); + + expect(buildPatterns).toEqual(defaultPatterns); + }); + }); +}); + +describe('BINARY_EXTENSIONS', () => { + it('should include common binary file extensions', () => { + expect(BINARY_EXTENSIONS).toContain('.exe'); + expect(BINARY_EXTENSIONS).toContain('.dll'); + expect(BINARY_EXTENSIONS).toContain('.jar'); + expect(BINARY_EXTENSIONS).toContain('.zip'); + }); + + it('should include additional binary extensions', () => { + expect(BINARY_EXTENSIONS).toContain('.dat'); + expect(BINARY_EXTENSIONS).toContain('.obj'); + expect(BINARY_EXTENSIONS).toContain('.wasm'); + }); + + it('should include media file extensions', () => { + expect(BINARY_EXTENSIONS).toContain('.pdf'); + expect(BINARY_EXTENSIONS).toContain('.png'); + expect(BINARY_EXTENSIONS).toContain('.jpg'); + }); + + it('should be sorted', () => { + const sortedExtensions = [...BINARY_EXTENSIONS].sort(); + expect(BINARY_EXTENSIONS).toEqual(sortedExtensions); + }); + + it('should not contain invalid extensions from brace patterns', () => { + // If brace expansion was not handled correctly, we would see invalid extensions like '.{jpg,png}' + const invalidExtensions = BINARY_EXTENSIONS.filter( + (ext) => ext.includes('{') || ext.includes('}'), + ); + expect(invalidExtensions).toHaveLength(0); + }); +}); + +describe('extractExtensionsFromPatterns', () => { + it('should extract simple extensions', () => { + const patterns = ['**/*.exe', '**/*.jar', '**/*.zip']; + const result = extractExtensionsFromPatterns(patterns); + + expect(result).toEqual(['.exe', '.jar', '.zip']); + }); + + it('should handle brace expansion patterns', () => { + const patterns = ['**/*.{js,ts}', '**/*.{jpg,png}']; + const result = extractExtensionsFromPatterns(patterns); + + expect(result).toContain('.js'); + expect(result).toContain('.ts'); + expect(result).toContain('.jpg'); + expect(result).toContain('.png'); + expect(result).not.toContain('.{js,ts}'); + expect(result).not.toContain('.{jpg,png}'); + }); + + it('should combine simple and brace expansion patterns', () => { + const patterns = ['**/*.exe', '**/*.{js,ts}', '**/*.pdf']; + const result = extractExtensionsFromPatterns(patterns); + + expect(result).toContain('.exe'); + expect(result).toContain('.js'); + expect(result).toContain('.ts'); + expect(result).toContain('.pdf'); + }); + + it('should handle empty brace expansion', () => { + const patterns = ['**/*.{}', '**/*.{,}']; + const result = extractExtensionsFromPatterns(patterns); + + // Empty extensions should be filtered out + expect(result).toHaveLength(0); + }); + + it('should ignore invalid patterns', () => { + const patterns = ['no-asterisk.exe', '**/*no-dot', '**/*.{unclosed']; + const result = extractExtensionsFromPatterns(patterns); + + expect(result).toHaveLength(0); + }); + + it('should remove duplicates and sort results', () => { + const patterns = ['**/*.js', '**/*.{js,ts}', '**/*.ts']; + const result = extractExtensionsFromPatterns(patterns); + + expect(result).toEqual(['.js', '.ts']); + }); + + it('should handle complex brace patterns with multiple extensions', () => { + const patterns = ['**/*.{html,css,js,jsx,ts,tsx}']; + const result = extractExtensionsFromPatterns(patterns); + + expect(result).toEqual(['.css', '.html', '.js', '.jsx', '.ts', '.tsx']); + }); + + it('should handle compound extensions correctly using path.extname', () => { + const patterns = ['**/*.tar.gz', '**/*.min.js', '**/*.d.ts']; + const result = extractExtensionsFromPatterns(patterns); + + // Should extract the final extension part only + expect(result).toEqual(['.gz', '.js', '.ts']); + }); + + it('should handle dotfiles correctly', () => { + const patterns = ['**/*.gitignore', '**/*.profile', '**/*.bashrc']; + const result = extractExtensionsFromPatterns(patterns); + + // Dotfiles should be extracted properly + expect(result).toEqual(['.bashrc', '.gitignore', '.profile']); + }); + + it('should handle edge cases with path.extname', () => { + const patterns = ['**/*.hidden.', '**/*.config.json']; + const result = extractExtensionsFromPatterns(patterns); + + // Should handle edge cases properly (trailing dots are filtered out) + expect(result).toEqual(['.json']); + }); +}); diff --git a/packages/core/src/utils/ignorePatterns.ts b/packages/core/src/utils/ignorePatterns.ts new file mode 100644 index 00000000..3b6cbc05 --- /dev/null +++ b/packages/core/src/utils/ignorePatterns.ts @@ -0,0 +1,276 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import path from 'path'; +import { Config } from '../config/config.js'; +import { getCurrentGeminiMdFilename } from '../tools/memoryTool.js'; + +/** + * Common ignore patterns used across multiple tools for basic exclusions. + * These are the most commonly ignored directories in development projects. + */ +export const COMMON_IGNORE_PATTERNS: string[] = [ + '**/node_modules/**', + '**/.git/**', + '**/bower_components/**', + '**/.svn/**', + '**/.hg/**', +]; + +/** + * Binary file extension patterns that are typically excluded from text processing. + */ +export const BINARY_FILE_PATTERNS: string[] = [ + '**/*.bin', + '**/*.exe', + '**/*.dll', + '**/*.so', + '**/*.dylib', + '**/*.class', + '**/*.jar', + '**/*.war', + '**/*.zip', + '**/*.tar', + '**/*.gz', + '**/*.bz2', + '**/*.rar', + '**/*.7z', + '**/*.doc', + '**/*.docx', + '**/*.xls', + '**/*.xlsx', + '**/*.ppt', + '**/*.pptx', + '**/*.odt', + '**/*.ods', + '**/*.odp', +]; + +/** + * Media file patterns that require special handling in tools like read-many-files. + * These files can be processed as inlineData when explicitly requested. + */ +export const MEDIA_FILE_PATTERNS: string[] = [ + '**/*.pdf', + '**/*.png', + '**/*.jpg', + '**/*.jpeg', + '**/*.gif', + '**/*.webp', + '**/*.bmp', + '**/*.svg', +]; + +/** + * Common directory patterns that are typically ignored in development projects. + */ +export const COMMON_DIRECTORY_EXCLUDES: string[] = [ + '**/.vscode/**', + '**/.idea/**', + '**/dist/**', + '**/build/**', + '**/coverage/**', + '**/__pycache__/**', +]; + +/** + * Python-specific patterns. + */ +export const PYTHON_EXCLUDES: string[] = ['**/*.pyc', '**/*.pyo']; + +/** + * System and environment file patterns. + */ +export const SYSTEM_FILE_EXCLUDES: string[] = ['**/.DS_Store', '**/.env']; + +/** + * Comprehensive file exclusion patterns combining all common ignore patterns. + * These patterns are compatible with glob ignore patterns. + * Note: Media files (PDF, images) are not excluded here as they need special handling in read-many-files. + */ +export const DEFAULT_FILE_EXCLUDES: string[] = [ + ...COMMON_IGNORE_PATTERNS, + ...COMMON_DIRECTORY_EXCLUDES, + ...BINARY_FILE_PATTERNS, + ...PYTHON_EXCLUDES, + ...SYSTEM_FILE_EXCLUDES, +]; + +/** + * Options for configuring file exclusion patterns. + */ +export interface ExcludeOptions { + /** + * Whether to include default exclusion patterns. Defaults to true. + */ + includeDefaults?: boolean; + + /** + * Additional custom patterns from configuration. + */ + customPatterns?: string[]; + + /** + * Additional patterns provided at runtime (e.g., from CLI arguments). + */ + runtimePatterns?: string[]; + + /** + * Whether to include dynamic patterns like the current Gemini MD filename. Defaults to true. + */ + includeDynamicPatterns?: boolean; +} + +/** + * Centralized file exclusion utility that provides configurable and extensible + * file exclusion patterns for different tools and use cases. + */ +export class FileExclusions { + constructor(private config?: Config) {} + + /** + * Gets core ignore patterns for basic file operations like glob. + * These are the minimal essential patterns that should almost always be excluded. + */ + getCoreIgnorePatterns(): string[] { + return [...COMMON_IGNORE_PATTERNS]; + } + + /** + * Gets comprehensive default exclusion patterns for operations like read-many-files. + * Includes all standard exclusions: directories, binary files, system files, etc. + */ + getDefaultExcludePatterns(options: ExcludeOptions = {}): string[] { + const { + includeDefaults = true, + customPatterns = [], + runtimePatterns = [], + includeDynamicPatterns = true, + } = options; + + const patterns: string[] = []; + + // Add base defaults if requested + if (includeDefaults) { + patterns.push(...DEFAULT_FILE_EXCLUDES); + } + + // Add dynamic patterns (like current Gemini MD filename) + if (includeDynamicPatterns) { + patterns.push(`**/${getCurrentGeminiMdFilename()}`); + } + + // Add custom patterns from configuration + // TODO: getCustomExcludes method needs to be implemented in Config interface + if (this.config) { + const configCustomExcludes = this.config.getCustomExcludes?.() ?? []; + patterns.push(...configCustomExcludes); + } + + // Add user-provided custom patterns + patterns.push(...customPatterns); + + // Add runtime patterns (e.g., from CLI) + patterns.push(...runtimePatterns); + + return patterns; + } + + /** + * Gets exclude patterns for read-many-files tool with legacy compatibility. + * This maintains the same behavior as the previous getDefaultExcludes() function. + */ + getReadManyFilesExcludes(additionalExcludes: string[] = []): string[] { + return this.getDefaultExcludePatterns({ + includeDefaults: true, + runtimePatterns: additionalExcludes, + includeDynamicPatterns: true, + }); + } + + /** + * Gets exclude patterns for glob tool operations. + * Uses core patterns by default but can be extended with additional patterns. + */ + getGlobExcludes(additionalExcludes: string[] = []): string[] { + const corePatterns = this.getCoreIgnorePatterns(); + + // Add any custom patterns from config if available + // TODO: getCustomExcludes method needs to be implemented in Config interface + const configPatterns = this.config?.getCustomExcludes?.() ?? []; + + return [...corePatterns, ...configPatterns, ...additionalExcludes]; + } + + /** + * Builds exclude patterns with full customization options. + * This is the most flexible method for advanced use cases. + */ + buildExcludePatterns(options: ExcludeOptions): string[] { + return this.getDefaultExcludePatterns(options); + } +} + +/** + * Extracts file extensions from glob patterns. + * Converts patterns like glob/*.exe to .exe + * Handles brace expansion like glob/*.{js,ts} to .js and .ts + */ +export function extractExtensionsFromPatterns(patterns: string[]): string[] { + const extensions = new Set( + patterns + .filter((pattern) => pattern.includes('*.')) + .flatMap((pattern) => { + const extPart = pattern.substring(pattern.lastIndexOf('*.') + 1); + // Handle brace expansion e.g. `**/*.{jpg,png}` + if (extPart.startsWith('.{') && extPart.endsWith('}')) { + const inner = extPart.slice(2, -1); // get 'jpg,png' + return inner + .split(',') + .map((ext) => `.${ext.trim()}`) + .filter((ext) => ext !== '.'); + } + // Handle simple/compound/dotfile extensions + if ( + extPart.startsWith('.') && + !extPart.includes('/') && + !extPart.includes('{') && + !extPart.includes('}') + ) { + // Using path.extname on a dummy file handles various cases like + // '.tar.gz' -> '.gz' and '.profile' -> '.profile' correctly. + const extracted = path.extname(`dummy${extPart}`); + // If extname returns empty (e.g. for '.'), use the original part. + // Then filter out empty or '.' results and invalid double dot patterns. + const result = extracted || extPart; + return result && result !== '.' && !result.substring(1).includes('.') + ? [result] + : []; + } + return []; + }), + ); + return Array.from(extensions).sort(); +} + +/** + * Binary file extensions extracted from BINARY_FILE_PATTERNS for quick lookup. + * Additional extensions not covered by the patterns are included for completeness. + */ +export const BINARY_EXTENSIONS: string[] = [ + ...extractExtensionsFromPatterns([ + ...BINARY_FILE_PATTERNS, + ...MEDIA_FILE_PATTERNS, + ...PYTHON_EXCLUDES, + ]), + // Additional binary extensions not in the main patterns + '.dat', + '.obj', + '.o', + '.a', + '.lib', + '.wasm', +].sort();