Files
qwen-code/packages/core/src/utils/ignorePatterns.ts
2025-08-25 22:04:53 +00:00

277 lines
7.6 KiB
TypeScript

/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import path from 'node:path';
import type { Config } from '../config/config.js';
import { getCurrentGeminiMdFilename } from '../tools/memoryTool.js';
/**
* Common ignore patterns used across multiple tools for basic exclusions.
* These are the most commonly ignored directories in development projects.
*/
export const COMMON_IGNORE_PATTERNS: string[] = [
'**/node_modules/**',
'**/.git/**',
'**/bower_components/**',
'**/.svn/**',
'**/.hg/**',
];
/**
* Binary file extension patterns that are typically excluded from text processing.
*/
export const BINARY_FILE_PATTERNS: string[] = [
'**/*.bin',
'**/*.exe',
'**/*.dll',
'**/*.so',
'**/*.dylib',
'**/*.class',
'**/*.jar',
'**/*.war',
'**/*.zip',
'**/*.tar',
'**/*.gz',
'**/*.bz2',
'**/*.rar',
'**/*.7z',
'**/*.doc',
'**/*.docx',
'**/*.xls',
'**/*.xlsx',
'**/*.ppt',
'**/*.pptx',
'**/*.odt',
'**/*.ods',
'**/*.odp',
];
/**
* Media file patterns that require special handling in tools like read-many-files.
* These files can be processed as inlineData when explicitly requested.
*/
export const MEDIA_FILE_PATTERNS: string[] = [
'**/*.pdf',
'**/*.png',
'**/*.jpg',
'**/*.jpeg',
'**/*.gif',
'**/*.webp',
'**/*.bmp',
'**/*.svg',
];
/**
* Common directory patterns that are typically ignored in development projects.
*/
export const COMMON_DIRECTORY_EXCLUDES: string[] = [
'**/.vscode/**',
'**/.idea/**',
'**/dist/**',
'**/build/**',
'**/coverage/**',
'**/__pycache__/**',
];
/**
* Python-specific patterns.
*/
export const PYTHON_EXCLUDES: string[] = ['**/*.pyc', '**/*.pyo'];
/**
* System and environment file patterns.
*/
export const SYSTEM_FILE_EXCLUDES: string[] = ['**/.DS_Store', '**/.env'];
/**
* Comprehensive file exclusion patterns combining all common ignore patterns.
* These patterns are compatible with glob ignore patterns.
* Note: Media files (PDF, images) are not excluded here as they need special handling in read-many-files.
*/
export const DEFAULT_FILE_EXCLUDES: string[] = [
...COMMON_IGNORE_PATTERNS,
...COMMON_DIRECTORY_EXCLUDES,
...BINARY_FILE_PATTERNS,
...PYTHON_EXCLUDES,
...SYSTEM_FILE_EXCLUDES,
];
/**
* Options for configuring file exclusion patterns.
*/
export interface ExcludeOptions {
/**
* Whether to include default exclusion patterns. Defaults to true.
*/
includeDefaults?: boolean;
/**
* Additional custom patterns from configuration.
*/
customPatterns?: string[];
/**
* Additional patterns provided at runtime (e.g., from CLI arguments).
*/
runtimePatterns?: string[];
/**
* Whether to include dynamic patterns like the current Gemini MD filename. Defaults to true.
*/
includeDynamicPatterns?: boolean;
}
/**
* Centralized file exclusion utility that provides configurable and extensible
* file exclusion patterns for different tools and use cases.
*/
export class FileExclusions {
constructor(private config?: Config) {}
/**
* Gets core ignore patterns for basic file operations like glob.
* These are the minimal essential patterns that should almost always be excluded.
*/
getCoreIgnorePatterns(): string[] {
return [...COMMON_IGNORE_PATTERNS];
}
/**
* Gets comprehensive default exclusion patterns for operations like read-many-files.
* Includes all standard exclusions: directories, binary files, system files, etc.
*/
getDefaultExcludePatterns(options: ExcludeOptions = {}): string[] {
const {
includeDefaults = true,
customPatterns = [],
runtimePatterns = [],
includeDynamicPatterns = true,
} = options;
const patterns: string[] = [];
// Add base defaults if requested
if (includeDefaults) {
patterns.push(...DEFAULT_FILE_EXCLUDES);
}
// Add dynamic patterns (like current Gemini MD filename)
if (includeDynamicPatterns) {
patterns.push(`**/${getCurrentGeminiMdFilename()}`);
}
// Add custom patterns from configuration
// TODO: getCustomExcludes method needs to be implemented in Config interface
if (this.config) {
const configCustomExcludes = this.config.getCustomExcludes?.() ?? [];
patterns.push(...configCustomExcludes);
}
// Add user-provided custom patterns
patterns.push(...customPatterns);
// Add runtime patterns (e.g., from CLI)
patterns.push(...runtimePatterns);
return patterns;
}
/**
* Gets exclude patterns for read-many-files tool with legacy compatibility.
* This maintains the same behavior as the previous getDefaultExcludes() function.
*/
getReadManyFilesExcludes(additionalExcludes: string[] = []): string[] {
return this.getDefaultExcludePatterns({
includeDefaults: true,
runtimePatterns: additionalExcludes,
includeDynamicPatterns: true,
});
}
/**
* Gets exclude patterns for glob tool operations.
* Uses core patterns by default but can be extended with additional patterns.
*/
getGlobExcludes(additionalExcludes: string[] = []): string[] {
const corePatterns = this.getCoreIgnorePatterns();
// Add any custom patterns from config if available
// TODO: getCustomExcludes method needs to be implemented in Config interface
const configPatterns = this.config?.getCustomExcludes?.() ?? [];
return [...corePatterns, ...configPatterns, ...additionalExcludes];
}
/**
* Builds exclude patterns with full customization options.
* This is the most flexible method for advanced use cases.
*/
buildExcludePatterns(options: ExcludeOptions): string[] {
return this.getDefaultExcludePatterns(options);
}
}
/**
* Extracts file extensions from glob patterns.
* Converts patterns like glob/*.exe to .exe
* Handles brace expansion like glob/*.{js,ts} to .js and .ts
*/
export function extractExtensionsFromPatterns(patterns: string[]): string[] {
const extensions = new Set(
patterns
.filter((pattern) => pattern.includes('*.'))
.flatMap((pattern) => {
const extPart = pattern.substring(pattern.lastIndexOf('*.') + 1);
// Handle brace expansion e.g. `**/*.{jpg,png}`
if (extPart.startsWith('.{') && extPart.endsWith('}')) {
const inner = extPart.slice(2, -1); // get 'jpg,png'
return inner
.split(',')
.map((ext) => `.${ext.trim()}`)
.filter((ext) => ext !== '.');
}
// Handle simple/compound/dotfile extensions
if (
extPart.startsWith('.') &&
!extPart.includes('/') &&
!extPart.includes('{') &&
!extPart.includes('}')
) {
// Using path.extname on a dummy file handles various cases like
// '.tar.gz' -> '.gz' and '.profile' -> '.profile' correctly.
const extracted = path.extname(`dummy${extPart}`);
// If extname returns empty (e.g. for '.'), use the original part.
// Then filter out empty or '.' results and invalid double dot patterns.
const result = extracted || extPart;
return result && result !== '.' && !result.substring(1).includes('.')
? [result]
: [];
}
return [];
}),
);
return Array.from(extensions).sort();
}
/**
* Binary file extensions extracted from BINARY_FILE_PATTERNS for quick lookup.
* Additional extensions not covered by the patterns are included for completeness.
*/
export const BINARY_EXTENSIONS: string[] = [
...extractExtensionsFromPatterns([
...BINARY_FILE_PATTERNS,
...MEDIA_FILE_PATTERNS,
...PYTHON_EXCLUDES,
]),
// Additional binary extensions not in the main patterns
'.dat',
'.obj',
'.o',
'.a',
'.lib',
'.wasm',
].sort();