feat(core): share file list patterns between glob and grep tools (#6359)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Arya Gummadi <aryagummadi@google.com>
This commit is contained in:
sangwook
2025-08-23 13:35:00 +09:00
committed by GitHub
parent f55b294570
commit 494a996ff8
13 changed files with 727 additions and 97 deletions

View File

@@ -54,6 +54,7 @@ import { IdeConnectionEvent, IdeConnectionType } from '../telemetry/types.js';
export type { MCPOAuthConfig };
import { WorkspaceContext } from '../utils/workspaceContext.js';
import { Storage } from './storage.js';
import { FileExclusions } from '../utils/ignorePatterns.js';
export enum ApprovalMode {
DEFAULT = 'default',
@@ -277,6 +278,7 @@ export class Config {
private readonly enablePromptCompletion: boolean = false;
private initialized: boolean = false;
readonly storage: Storage;
private readonly fileExclusions: FileExclusions;
constructor(params: ConfigParameters) {
this.sessionId = params.sessionId;
@@ -349,6 +351,7 @@ export class Config {
this.skipNextSpeakerCheck = params.skipNextSpeakerCheck ?? false;
this.storage = new Storage(this.targetDir);
this.enablePromptCompletion = params.enablePromptCompletion ?? false;
this.fileExclusions = new FileExclusions(this);
if (params.contextFileName) {
setGeminiMdFilename(params.contextFileName);
@@ -621,6 +624,21 @@ export class Config {
};
}
/**
* Gets custom file exclusion patterns from configuration.
* TODO: This is a placeholder implementation. In the future, this could
* read from settings files, CLI arguments, or environment variables.
*/
getCustomExcludes(): string[] {
// Placeholder implementation - returns empty array for now
// Future implementation could read from:
// - User settings file
// - Project-specific configuration
// - Environment variables
// - CLI arguments
return [];
}
getCheckpointingEnabled(): boolean {
return this.checkpointing;
}
@@ -766,6 +784,10 @@ export class Config {
return this.gitService;
}
getFileExclusions(): FileExclusions {
return this.fileExclusions;
}
async createToolRegistry(): Promise<ToolRegistry> {
const registry = new ToolRegistry(this);

View File

@@ -44,6 +44,7 @@ export * from './utils/formatters.js';
export * from './utils/generateContentResponseUtilities.js';
export * from './utils/filesearch/fileSearch.js';
export * from './utils/errorParsing.js';
export * from './utils/ignorePatterns.js';
// Export services
export * from './services/fileDiscoveryService.js';

View File

@@ -29,6 +29,9 @@ describe('GlobTool', () => {
getFileFilteringRespectGitIgnore: () => true,
getTargetDir: () => tempRootDir,
getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir),
getFileExclusions: () => ({
getGlobExcludes: () => [],
}),
} as unknown as Config;
beforeEach(async () => {

View File

@@ -155,7 +155,7 @@ class GlobToolInvocation extends BaseToolInvocation<
stat: true,
nocase: !this.params.case_sensitive,
dot: true,
ignore: ['**/node_modules/**', '**/.git/**'],
ignore: this.config.getFileExclusions().getGlobExcludes(),
follow: false,
signal,
})) as GlobPath[];

View File

@@ -39,6 +39,9 @@ describe('GrepTool', () => {
const mockConfig = {
getTargetDir: () => tempRootDir,
getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir),
getFileExclusions: () => ({
getGlobExcludes: () => [],
}),
} as unknown as Config;
beforeEach(async () => {
@@ -258,6 +261,9 @@ describe('GrepTool', () => {
getTargetDir: () => tempRootDir,
getWorkspaceContext: () =>
createMockWorkspaceContext(tempRootDir, [secondDir]),
getFileExclusions: () => ({
getGlobExcludes: () => [],
}),
} as unknown as Config;
const multiDirGrepTool = new GrepTool(multiDirConfig);
@@ -308,6 +314,9 @@ describe('GrepTool', () => {
getTargetDir: () => tempRootDir,
getWorkspaceContext: () =>
createMockWorkspaceContext(tempRootDir, [secondDir]),
getFileExclusions: () => ({
getGlobExcludes: () => [],
}),
} as unknown as Config;
const multiDirGrepTool = new GrepTool(multiDirConfig);
@@ -367,6 +376,9 @@ describe('GrepTool', () => {
getTargetDir: () => tempRootDir,
getWorkspaceContext: () =>
createMockWorkspaceContext(tempRootDir, ['/another/dir']),
getFileExclusions: () => ({
getGlobExcludes: () => [],
}),
} as unknown as Config;
const multiDirGrepTool = new GrepTool(multiDirConfig);

View File

@@ -21,6 +21,7 @@ import { makeRelative, shortenPath } from '../utils/paths.js';
import { getErrorMessage, isNodeError } from '../utils/errors.js';
import { isGitRepository } from '../utils/gitUtils.js';
import { Config } from '../config/config.js';
import { FileExclusions } from '../utils/ignorePatterns.js';
import { ToolErrorType } from './tool-error.js';
// --- Interfaces ---
@@ -58,11 +59,14 @@ class GrepToolInvocation extends BaseToolInvocation<
GrepToolParams,
ToolResult
> {
private readonly fileExclusions: FileExclusions;
constructor(
private readonly config: Config,
params: GrepToolParams,
) {
super(params);
this.fileExclusions = config.getFileExclusions();
}
/**
@@ -281,7 +285,6 @@ class GrepToolInvocation extends BaseToolInvocation<
/**
* Gets a description of the grep operation
* @param params Parameters for the grep operation
* @returns A string describing the grep
*/
getDescription(): string {
@@ -391,7 +394,27 @@ class GrepToolInvocation extends BaseToolInvocation<
if (grepAvailable) {
strategyUsed = 'system grep';
const grepArgs = ['-r', '-n', '-H', '-E'];
const commonExcludes = ['.git', 'node_modules', 'bower_components'];
// Extract directory names from exclusion patterns for grep --exclude-dir
const globExcludes = this.fileExclusions.getGlobExcludes();
const commonExcludes = globExcludes
.map((pattern) => {
let dir = pattern;
if (dir.startsWith('**/')) {
dir = dir.substring(3);
}
if (dir.endsWith('/**')) {
dir = dir.slice(0, -3);
} else if (dir.endsWith('/')) {
dir = dir.slice(0, -1);
}
// Only consider patterns that are likely directories. This filters out file patterns.
if (dir && !dir.includes('/') && !dir.includes('*')) {
return dir;
}
return null;
})
.filter((dir): dir is string => !!dir);
commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`));
if (include) {
grepArgs.push(`--include=${include}`);
@@ -474,13 +497,7 @@ class GrepToolInvocation extends BaseToolInvocation<
);
strategyUsed = 'javascript fallback';
const globPattern = include ? include : '**/*';
const ignorePatterns = [
'.git/**',
'node_modules/**',
'bower_components/**',
'.svn/**',
'.hg/**',
]; // Use glob patterns for ignores here
const ignorePatterns = this.fileExclusions.getGlobExcludes();
const filesStream = globStream(globPattern, {
cwd: absolutePath,

View File

@@ -16,6 +16,10 @@ import { Config } from '../config/config.js';
import { WorkspaceContext } from '../utils/workspaceContext.js';
import { StandardFileSystemService } from '../services/fileSystemService.js';
import { ToolErrorType } from './tool-error.js';
import {
COMMON_IGNORE_PATTERNS,
DEFAULT_FILE_EXCLUDES,
} from '../utils/ignorePatterns.js';
import * as glob from 'glob';
vi.mock('glob', { spy: true });
@@ -77,6 +81,13 @@ describe('ReadManyFilesTool', () => {
getTargetDir: () => tempRootDir,
getWorkspaceDirs: () => [tempRootDir],
getWorkspaceContext: () => new WorkspaceContext(tempRootDir),
getFileExclusions: () => ({
getCoreIgnorePatterns: () => COMMON_IGNORE_PATTERNS,
getDefaultExcludePatterns: () => DEFAULT_FILE_EXCLUDES,
getGlobExcludes: () => COMMON_IGNORE_PATTERNS,
buildExcludePatterns: () => DEFAULT_FILE_EXCLUDES,
getReadManyFilesExcludes: () => DEFAULT_FILE_EXCLUDES,
}),
} as Partial<Config> as Config;
tool = new ReadManyFilesTool(mockConfig);
@@ -484,6 +495,13 @@ describe('ReadManyFilesTool', () => {
}),
getWorkspaceContext: () => new WorkspaceContext(tempDir1, [tempDir2]),
getTargetDir: () => tempDir1,
getFileExclusions: () => ({
getCoreIgnorePatterns: () => COMMON_IGNORE_PATTERNS,
getDefaultExcludePatterns: () => [],
getGlobExcludes: () => COMMON_IGNORE_PATTERNS,
buildExcludePatterns: () => [],
getReadManyFilesExcludes: () => [],
}),
} as Partial<Config> as Config;
tool = new ReadManyFilesTool(mockConfig);

View File

@@ -15,7 +15,6 @@ import { getErrorMessage } from '../utils/errors.js';
import * as fs from 'fs';
import * as path from 'path';
import { glob, escape } from 'glob';
import { getCurrentGeminiMdFilename } from './memoryTool.js';
import {
detectFileType,
processSingleFileContent,
@@ -98,49 +97,13 @@ type FileProcessingResult =
};
/**
* Default exclusion patterns for commonly ignored directories and binary file types.
* These are compatible with glob ignore patterns.
* Creates the default exclusion patterns including dynamic patterns.
* This combines the shared patterns with dynamic patterns like GEMINI.md.
* TODO(adh): Consider making this configurable or extendable through a command line argument.
* TODO(adh): Look into sharing this list with the glob tool.
*/
const DEFAULT_EXCLUDES: string[] = [
'**/node_modules/**',
'**/.git/**',
'**/.vscode/**',
'**/.idea/**',
'**/dist/**',
'**/build/**',
'**/coverage/**',
'**/__pycache__/**',
'**/*.pyc',
'**/*.pyo',
'**/*.bin',
'**/*.exe',
'**/*.dll',
'**/*.so',
'**/*.dylib',
'**/*.class',
'**/*.jar',
'**/*.war',
'**/*.zip',
'**/*.tar',
'**/*.gz',
'**/*.bz2',
'**/*.rar',
'**/*.7z',
'**/*.doc',
'**/*.docx',
'**/*.xls',
'**/*.xlsx',
'**/*.ppt',
'**/*.pptx',
'**/*.odt',
'**/*.ods',
'**/*.odp',
'**/*.DS_Store',
'**/.env',
`**/${getCurrentGeminiMdFilename()}`,
];
function getDefaultExcludes(config?: Config): string[] {
return config?.getFileExclusions().getReadManyFilesExcludes() ?? [];
}
const DEFAULT_OUTPUT_SEPARATOR_FORMAT = '--- {filePath} ---';
const DEFAULT_OUTPUT_TERMINATOR = '\n--- End of content ---';
@@ -172,7 +135,11 @@ ${this.config.getTargetDir()}
.getGeminiIgnorePatterns();
const finalExclusionPatternsForDescription: string[] =
paramUseDefaultExcludes
? [...DEFAULT_EXCLUDES, ...paramExcludes, ...geminiIgnorePatterns]
? [
...getDefaultExcludes(this.config),
...paramExcludes,
...geminiIgnorePatterns,
]
: [...paramExcludes, ...geminiIgnorePatterns];
let excludeDesc = `Excluding: ${
@@ -230,7 +197,7 @@ ${finalExclusionPatternsForDescription
const contentParts: PartListUnion = [];
const effectiveExcludes = useDefaultExcludes
? [...DEFAULT_EXCLUDES, ...exclude]
? [...getDefaultExcludes(this.config), ...exclude]
: [...exclude];
const searchPatterns = [...inputPatterns, ...include];

View File

@@ -10,6 +10,7 @@ import { PartUnion } from '@google/genai';
import mime from 'mime-types';
import { FileSystemService } from '../services/fileSystemService.js';
import { ToolErrorType } from '../tools/tool-error.js';
import { BINARY_EXTENSIONS } from './ignorePatterns.js';
// Constants for text file processing
const DEFAULT_MAX_LINES_TEXT_FILE = 2000;
@@ -153,38 +154,7 @@ export async function detectFileType(
// Stricter binary check for common non-text extensions before content check
// These are often not well-covered by mime-types or might be misidentified.
if (
[
'.zip',
'.tar',
'.gz',
'.exe',
'.dll',
'.so',
'.class',
'.jar',
'.war',
'.7z',
'.doc',
'.docx',
'.xls',
'.xlsx',
'.ppt',
'.pptx',
'.odt',
'.ods',
'.odp',
'.bin',
'.dat',
'.obj',
'.o',
'.a',
'.lib',
'.wasm',
'.pyc',
'.pyo',
].includes(ext)
) {
if (BINARY_EXTENSIONS.includes(ext)) {
return 'binary';
}

View File

@@ -0,0 +1,319 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi } from 'vitest';
import {
FileExclusions,
BINARY_EXTENSIONS,
extractExtensionsFromPatterns,
} from './ignorePatterns.js';
import { Config } from '../config/config.js';
// Mock the memoryTool module
vi.mock('../tools/memoryTool.js', () => ({
getCurrentGeminiMdFilename: vi.fn(() => 'GEMINI.md'),
}));
describe('FileExclusions', () => {
describe('getCoreIgnorePatterns', () => {
it('should return basic ignore patterns', () => {
const excluder = new FileExclusions();
const patterns = excluder.getCoreIgnorePatterns();
expect(patterns).toContain('**/node_modules/**');
expect(patterns).toContain('**/.git/**');
expect(patterns).toContain('**/bower_components/**');
expect(patterns).toContain('**/.svn/**');
expect(patterns).toContain('**/.hg/**');
expect(patterns).toHaveLength(5);
});
});
describe('getDefaultExcludePatterns', () => {
it('should return comprehensive patterns by default', () => {
const excluder = new FileExclusions();
const patterns = excluder.getDefaultExcludePatterns();
// Should include core patterns
expect(patterns).toContain('**/node_modules/**');
expect(patterns).toContain('**/.git/**');
// Should include directory excludes
expect(patterns).toContain('**/.vscode/**');
expect(patterns).toContain('**/dist/**');
expect(patterns).toContain('**/build/**');
// Should include binary patterns
expect(patterns).toContain('**/*.exe');
expect(patterns).toContain('**/*.jar');
// Should include system files
expect(patterns).toContain('**/.DS_Store');
expect(patterns).toContain('**/.env');
// Should include dynamic patterns
expect(patterns).toContain('**/GEMINI.md');
});
it('should respect includeDefaults option', () => {
const excluder = new FileExclusions();
const patterns = excluder.getDefaultExcludePatterns({
includeDefaults: false,
includeDynamicPatterns: false,
});
expect(patterns).not.toContain('**/node_modules/**');
expect(patterns).not.toContain('**/.git/**');
expect(patterns).not.toContain('**/GEMINI.md');
expect(patterns).toHaveLength(0);
});
it('should include custom patterns', () => {
const excluder = new FileExclusions();
const patterns = excluder.getDefaultExcludePatterns({
customPatterns: ['**/custom/**', '**/*.custom'],
});
expect(patterns).toContain('**/custom/**');
expect(patterns).toContain('**/*.custom');
});
it('should include runtime patterns', () => {
const excluder = new FileExclusions();
const patterns = excluder.getDefaultExcludePatterns({
runtimePatterns: ['**/temp/**', '**/*.tmp'],
});
expect(patterns).toContain('**/temp/**');
expect(patterns).toContain('**/*.tmp');
});
it('should respect includeDynamicPatterns option', () => {
const excluder = new FileExclusions();
const patternsWithDynamic = excluder.getDefaultExcludePatterns({
includeDynamicPatterns: true,
});
const patternsWithoutDynamic = excluder.getDefaultExcludePatterns({
includeDynamicPatterns: false,
});
expect(patternsWithDynamic).toContain('**/GEMINI.md');
expect(patternsWithoutDynamic).not.toContain('**/GEMINI.md');
});
});
describe('getReadManyFilesExcludes', () => {
it('should provide legacy compatibility', () => {
const excluder = new FileExclusions();
const patterns = excluder.getReadManyFilesExcludes(['**/*.log']);
// Should include all default patterns
expect(patterns).toContain('**/node_modules/**');
expect(patterns).toContain('**/.git/**');
expect(patterns).toContain('**/GEMINI.md');
// Should include additional excludes
expect(patterns).toContain('**/*.log');
});
});
describe('getGlobExcludes', () => {
it('should return core patterns for glob operations', () => {
const excluder = new FileExclusions();
const patterns = excluder.getGlobExcludes();
expect(patterns).toContain('**/node_modules/**');
expect(patterns).toContain('**/.git/**');
expect(patterns).toContain('**/bower_components/**');
expect(patterns).toContain('**/.svn/**');
expect(patterns).toContain('**/.hg/**');
// Should not include comprehensive patterns by default
expect(patterns).toHaveLength(5);
});
it('should include additional excludes', () => {
const excluder = new FileExclusions();
const patterns = excluder.getGlobExcludes(['**/temp/**']);
expect(patterns).toContain('**/node_modules/**');
expect(patterns).toContain('**/.git/**');
expect(patterns).toContain('**/temp/**');
});
});
describe('with Config', () => {
it('should use config custom excludes when available', () => {
const mockConfig = {
getCustomExcludes: vi.fn(() => ['**/config-exclude/**']),
} as unknown as Config;
const excluder = new FileExclusions(mockConfig);
const patterns = excluder.getDefaultExcludePatterns();
expect(patterns).toContain('**/config-exclude/**');
expect(mockConfig.getCustomExcludes).toHaveBeenCalled();
});
it('should handle config without getCustomExcludes method', () => {
const mockConfig = {} as Config;
const excluder = new FileExclusions(mockConfig);
const patterns = excluder.getDefaultExcludePatterns();
// Should not throw and should include default patterns
expect(patterns).toContain('**/node_modules/**');
expect(patterns.length).toBeGreaterThan(0);
});
it('should include config custom excludes in glob patterns', () => {
const mockConfig = {
getCustomExcludes: vi.fn(() => ['**/config-glob/**']),
} as unknown as Config;
const excluder = new FileExclusions(mockConfig);
const patterns = excluder.getGlobExcludes();
expect(patterns).toContain('**/node_modules/**');
expect(patterns).toContain('**/.git/**');
expect(patterns).toContain('**/config-glob/**');
});
});
describe('buildExcludePatterns', () => {
it('should be an alias for getDefaultExcludePatterns', () => {
const excluder = new FileExclusions();
const options = {
includeDefaults: true,
customPatterns: ['**/test/**'],
runtimePatterns: ['**/runtime/**'],
};
const defaultPatterns = excluder.getDefaultExcludePatterns(options);
const buildPatterns = excluder.buildExcludePatterns(options);
expect(buildPatterns).toEqual(defaultPatterns);
});
});
});
describe('BINARY_EXTENSIONS', () => {
it('should include common binary file extensions', () => {
expect(BINARY_EXTENSIONS).toContain('.exe');
expect(BINARY_EXTENSIONS).toContain('.dll');
expect(BINARY_EXTENSIONS).toContain('.jar');
expect(BINARY_EXTENSIONS).toContain('.zip');
});
it('should include additional binary extensions', () => {
expect(BINARY_EXTENSIONS).toContain('.dat');
expect(BINARY_EXTENSIONS).toContain('.obj');
expect(BINARY_EXTENSIONS).toContain('.wasm');
});
it('should include media file extensions', () => {
expect(BINARY_EXTENSIONS).toContain('.pdf');
expect(BINARY_EXTENSIONS).toContain('.png');
expect(BINARY_EXTENSIONS).toContain('.jpg');
});
it('should be sorted', () => {
const sortedExtensions = [...BINARY_EXTENSIONS].sort();
expect(BINARY_EXTENSIONS).toEqual(sortedExtensions);
});
it('should not contain invalid extensions from brace patterns', () => {
// If brace expansion was not handled correctly, we would see invalid extensions like '.{jpg,png}'
const invalidExtensions = BINARY_EXTENSIONS.filter(
(ext) => ext.includes('{') || ext.includes('}'),
);
expect(invalidExtensions).toHaveLength(0);
});
});
describe('extractExtensionsFromPatterns', () => {
it('should extract simple extensions', () => {
const patterns = ['**/*.exe', '**/*.jar', '**/*.zip'];
const result = extractExtensionsFromPatterns(patterns);
expect(result).toEqual(['.exe', '.jar', '.zip']);
});
it('should handle brace expansion patterns', () => {
const patterns = ['**/*.{js,ts}', '**/*.{jpg,png}'];
const result = extractExtensionsFromPatterns(patterns);
expect(result).toContain('.js');
expect(result).toContain('.ts');
expect(result).toContain('.jpg');
expect(result).toContain('.png');
expect(result).not.toContain('.{js,ts}');
expect(result).not.toContain('.{jpg,png}');
});
it('should combine simple and brace expansion patterns', () => {
const patterns = ['**/*.exe', '**/*.{js,ts}', '**/*.pdf'];
const result = extractExtensionsFromPatterns(patterns);
expect(result).toContain('.exe');
expect(result).toContain('.js');
expect(result).toContain('.ts');
expect(result).toContain('.pdf');
});
it('should handle empty brace expansion', () => {
const patterns = ['**/*.{}', '**/*.{,}'];
const result = extractExtensionsFromPatterns(patterns);
// Empty extensions should be filtered out
expect(result).toHaveLength(0);
});
it('should ignore invalid patterns', () => {
const patterns = ['no-asterisk.exe', '**/*no-dot', '**/*.{unclosed'];
const result = extractExtensionsFromPatterns(patterns);
expect(result).toHaveLength(0);
});
it('should remove duplicates and sort results', () => {
const patterns = ['**/*.js', '**/*.{js,ts}', '**/*.ts'];
const result = extractExtensionsFromPatterns(patterns);
expect(result).toEqual(['.js', '.ts']);
});
it('should handle complex brace patterns with multiple extensions', () => {
const patterns = ['**/*.{html,css,js,jsx,ts,tsx}'];
const result = extractExtensionsFromPatterns(patterns);
expect(result).toEqual(['.css', '.html', '.js', '.jsx', '.ts', '.tsx']);
});
it('should handle compound extensions correctly using path.extname', () => {
const patterns = ['**/*.tar.gz', '**/*.min.js', '**/*.d.ts'];
const result = extractExtensionsFromPatterns(patterns);
// Should extract the final extension part only
expect(result).toEqual(['.gz', '.js', '.ts']);
});
it('should handle dotfiles correctly', () => {
const patterns = ['**/*.gitignore', '**/*.profile', '**/*.bashrc'];
const result = extractExtensionsFromPatterns(patterns);
// Dotfiles should be extracted properly
expect(result).toEqual(['.bashrc', '.gitignore', '.profile']);
});
it('should handle edge cases with path.extname', () => {
const patterns = ['**/*.hidden.', '**/*.config.json'];
const result = extractExtensionsFromPatterns(patterns);
// Should handle edge cases properly (trailing dots are filtered out)
expect(result).toEqual(['.json']);
});
});

View File

@@ -0,0 +1,276 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import path from 'path';
import { Config } from '../config/config.js';
import { getCurrentGeminiMdFilename } from '../tools/memoryTool.js';
/**
* Common ignore patterns used across multiple tools for basic exclusions.
* These are the most commonly ignored directories in development projects.
*/
export const COMMON_IGNORE_PATTERNS: string[] = [
'**/node_modules/**',
'**/.git/**',
'**/bower_components/**',
'**/.svn/**',
'**/.hg/**',
];
/**
* Binary file extension patterns that are typically excluded from text processing.
*/
export const BINARY_FILE_PATTERNS: string[] = [
'**/*.bin',
'**/*.exe',
'**/*.dll',
'**/*.so',
'**/*.dylib',
'**/*.class',
'**/*.jar',
'**/*.war',
'**/*.zip',
'**/*.tar',
'**/*.gz',
'**/*.bz2',
'**/*.rar',
'**/*.7z',
'**/*.doc',
'**/*.docx',
'**/*.xls',
'**/*.xlsx',
'**/*.ppt',
'**/*.pptx',
'**/*.odt',
'**/*.ods',
'**/*.odp',
];
/**
* Media file patterns that require special handling in tools like read-many-files.
* These files can be processed as inlineData when explicitly requested.
*/
export const MEDIA_FILE_PATTERNS: string[] = [
'**/*.pdf',
'**/*.png',
'**/*.jpg',
'**/*.jpeg',
'**/*.gif',
'**/*.webp',
'**/*.bmp',
'**/*.svg',
];
/**
* Common directory patterns that are typically ignored in development projects.
*/
export const COMMON_DIRECTORY_EXCLUDES: string[] = [
'**/.vscode/**',
'**/.idea/**',
'**/dist/**',
'**/build/**',
'**/coverage/**',
'**/__pycache__/**',
];
/**
* Python-specific patterns.
*/
export const PYTHON_EXCLUDES: string[] = ['**/*.pyc', '**/*.pyo'];
/**
* System and environment file patterns.
*/
export const SYSTEM_FILE_EXCLUDES: string[] = ['**/.DS_Store', '**/.env'];
/**
* Comprehensive file exclusion patterns combining all common ignore patterns.
* These patterns are compatible with glob ignore patterns.
* Note: Media files (PDF, images) are not excluded here as they need special handling in read-many-files.
*/
export const DEFAULT_FILE_EXCLUDES: string[] = [
...COMMON_IGNORE_PATTERNS,
...COMMON_DIRECTORY_EXCLUDES,
...BINARY_FILE_PATTERNS,
...PYTHON_EXCLUDES,
...SYSTEM_FILE_EXCLUDES,
];
/**
* Options for configuring file exclusion patterns.
*/
export interface ExcludeOptions {
/**
* Whether to include default exclusion patterns. Defaults to true.
*/
includeDefaults?: boolean;
/**
* Additional custom patterns from configuration.
*/
customPatterns?: string[];
/**
* Additional patterns provided at runtime (e.g., from CLI arguments).
*/
runtimePatterns?: string[];
/**
* Whether to include dynamic patterns like the current Gemini MD filename. Defaults to true.
*/
includeDynamicPatterns?: boolean;
}
/**
* Centralized file exclusion utility that provides configurable and extensible
* file exclusion patterns for different tools and use cases.
*/
export class FileExclusions {
constructor(private config?: Config) {}
/**
* Gets core ignore patterns for basic file operations like glob.
* These are the minimal essential patterns that should almost always be excluded.
*/
getCoreIgnorePatterns(): string[] {
return [...COMMON_IGNORE_PATTERNS];
}
/**
* Gets comprehensive default exclusion patterns for operations like read-many-files.
* Includes all standard exclusions: directories, binary files, system files, etc.
*/
getDefaultExcludePatterns(options: ExcludeOptions = {}): string[] {
const {
includeDefaults = true,
customPatterns = [],
runtimePatterns = [],
includeDynamicPatterns = true,
} = options;
const patterns: string[] = [];
// Add base defaults if requested
if (includeDefaults) {
patterns.push(...DEFAULT_FILE_EXCLUDES);
}
// Add dynamic patterns (like current Gemini MD filename)
if (includeDynamicPatterns) {
patterns.push(`**/${getCurrentGeminiMdFilename()}`);
}
// Add custom patterns from configuration
// TODO: getCustomExcludes method needs to be implemented in Config interface
if (this.config) {
const configCustomExcludes = this.config.getCustomExcludes?.() ?? [];
patterns.push(...configCustomExcludes);
}
// Add user-provided custom patterns
patterns.push(...customPatterns);
// Add runtime patterns (e.g., from CLI)
patterns.push(...runtimePatterns);
return patterns;
}
/**
* Gets exclude patterns for read-many-files tool with legacy compatibility.
* This maintains the same behavior as the previous getDefaultExcludes() function.
*/
getReadManyFilesExcludes(additionalExcludes: string[] = []): string[] {
return this.getDefaultExcludePatterns({
includeDefaults: true,
runtimePatterns: additionalExcludes,
includeDynamicPatterns: true,
});
}
/**
* Gets exclude patterns for glob tool operations.
* Uses core patterns by default but can be extended with additional patterns.
*/
getGlobExcludes(additionalExcludes: string[] = []): string[] {
const corePatterns = this.getCoreIgnorePatterns();
// Add any custom patterns from config if available
// TODO: getCustomExcludes method needs to be implemented in Config interface
const configPatterns = this.config?.getCustomExcludes?.() ?? [];
return [...corePatterns, ...configPatterns, ...additionalExcludes];
}
/**
* Builds exclude patterns with full customization options.
* This is the most flexible method for advanced use cases.
*/
buildExcludePatterns(options: ExcludeOptions): string[] {
return this.getDefaultExcludePatterns(options);
}
}
/**
* Extracts file extensions from glob patterns.
* Converts patterns like glob/*.exe to .exe
* Handles brace expansion like glob/*.{js,ts} to .js and .ts
*/
export function extractExtensionsFromPatterns(patterns: string[]): string[] {
const extensions = new Set(
patterns
.filter((pattern) => pattern.includes('*.'))
.flatMap((pattern) => {
const extPart = pattern.substring(pattern.lastIndexOf('*.') + 1);
// Handle brace expansion e.g. `**/*.{jpg,png}`
if (extPart.startsWith('.{') && extPart.endsWith('}')) {
const inner = extPart.slice(2, -1); // get 'jpg,png'
return inner
.split(',')
.map((ext) => `.${ext.trim()}`)
.filter((ext) => ext !== '.');
}
// Handle simple/compound/dotfile extensions
if (
extPart.startsWith('.') &&
!extPart.includes('/') &&
!extPart.includes('{') &&
!extPart.includes('}')
) {
// Using path.extname on a dummy file handles various cases like
// '.tar.gz' -> '.gz' and '.profile' -> '.profile' correctly.
const extracted = path.extname(`dummy${extPart}`);
// If extname returns empty (e.g. for '.'), use the original part.
// Then filter out empty or '.' results and invalid double dot patterns.
const result = extracted || extPart;
return result && result !== '.' && !result.substring(1).includes('.')
? [result]
: [];
}
return [];
}),
);
return Array.from(extensions).sort();
}
/**
* Binary file extensions extracted from BINARY_FILE_PATTERNS for quick lookup.
* Additional extensions not covered by the patterns are included for completeness.
*/
export const BINARY_EXTENSIONS: string[] = [
...extractExtensionsFromPatterns([
...BINARY_FILE_PATTERNS,
...MEDIA_FILE_PATTERNS,
...PYTHON_EXCLUDES,
]),
// Additional binary extensions not in the main patterns
'.dat',
'.obj',
'.o',
'.a',
'.lib',
'.wasm',
].sort();