mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-22 01:37:50 +00:00
277 lines
7.6 KiB
TypeScript
277 lines
7.6 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2025 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import path from 'node:path';
|
|
import type { Config } from '../config/config.js';
|
|
import { getCurrentGeminiMdFilename } from '../tools/memoryTool.js';
|
|
|
|
/**
|
|
* Common ignore patterns used across multiple tools for basic exclusions.
|
|
* These are the most commonly ignored directories in development projects.
|
|
*/
|
|
export const COMMON_IGNORE_PATTERNS: string[] = [
|
|
'**/node_modules/**',
|
|
'**/.git/**',
|
|
'**/bower_components/**',
|
|
'**/.svn/**',
|
|
'**/.hg/**',
|
|
];
|
|
|
|
/**
|
|
* Binary file extension patterns that are typically excluded from text processing.
|
|
*/
|
|
export const BINARY_FILE_PATTERNS: string[] = [
|
|
'**/*.bin',
|
|
'**/*.exe',
|
|
'**/*.dll',
|
|
'**/*.so',
|
|
'**/*.dylib',
|
|
'**/*.class',
|
|
'**/*.jar',
|
|
'**/*.war',
|
|
'**/*.zip',
|
|
'**/*.tar',
|
|
'**/*.gz',
|
|
'**/*.bz2',
|
|
'**/*.rar',
|
|
'**/*.7z',
|
|
'**/*.doc',
|
|
'**/*.docx',
|
|
'**/*.xls',
|
|
'**/*.xlsx',
|
|
'**/*.ppt',
|
|
'**/*.pptx',
|
|
'**/*.odt',
|
|
'**/*.ods',
|
|
'**/*.odp',
|
|
];
|
|
|
|
/**
|
|
* Media file patterns that require special handling in tools like read-many-files.
|
|
* These files can be processed as inlineData when explicitly requested.
|
|
*/
|
|
export const MEDIA_FILE_PATTERNS: string[] = [
|
|
'**/*.pdf',
|
|
'**/*.png',
|
|
'**/*.jpg',
|
|
'**/*.jpeg',
|
|
'**/*.gif',
|
|
'**/*.webp',
|
|
'**/*.bmp',
|
|
'**/*.svg',
|
|
];
|
|
|
|
/**
|
|
* Common directory patterns that are typically ignored in development projects.
|
|
*/
|
|
export const COMMON_DIRECTORY_EXCLUDES: string[] = [
|
|
'**/.vscode/**',
|
|
'**/.idea/**',
|
|
'**/dist/**',
|
|
'**/build/**',
|
|
'**/coverage/**',
|
|
'**/__pycache__/**',
|
|
];
|
|
|
|
/**
|
|
* Python-specific patterns.
|
|
*/
|
|
export const PYTHON_EXCLUDES: string[] = ['**/*.pyc', '**/*.pyo'];
|
|
|
|
/**
|
|
* System and environment file patterns.
|
|
*/
|
|
export const SYSTEM_FILE_EXCLUDES: string[] = ['**/.DS_Store', '**/.env'];
|
|
|
|
/**
|
|
* Comprehensive file exclusion patterns combining all common ignore patterns.
|
|
* These patterns are compatible with glob ignore patterns.
|
|
* Note: Media files (PDF, images) are not excluded here as they need special handling in read-many-files.
|
|
*/
|
|
export const DEFAULT_FILE_EXCLUDES: string[] = [
|
|
...COMMON_IGNORE_PATTERNS,
|
|
...COMMON_DIRECTORY_EXCLUDES,
|
|
...BINARY_FILE_PATTERNS,
|
|
...PYTHON_EXCLUDES,
|
|
...SYSTEM_FILE_EXCLUDES,
|
|
];
|
|
|
|
/**
|
|
* Options for configuring file exclusion patterns.
|
|
*/
|
|
export interface ExcludeOptions {
|
|
/**
|
|
* Whether to include default exclusion patterns. Defaults to true.
|
|
*/
|
|
includeDefaults?: boolean;
|
|
|
|
/**
|
|
* Additional custom patterns from configuration.
|
|
*/
|
|
customPatterns?: string[];
|
|
|
|
/**
|
|
* Additional patterns provided at runtime (e.g., from CLI arguments).
|
|
*/
|
|
runtimePatterns?: string[];
|
|
|
|
/**
|
|
* Whether to include dynamic patterns like the current Gemini MD filename. Defaults to true.
|
|
*/
|
|
includeDynamicPatterns?: boolean;
|
|
}
|
|
|
|
/**
|
|
* Centralized file exclusion utility that provides configurable and extensible
|
|
* file exclusion patterns for different tools and use cases.
|
|
*/
|
|
export class FileExclusions {
|
|
constructor(private config?: Config) {}
|
|
|
|
/**
|
|
* Gets core ignore patterns for basic file operations like glob.
|
|
* These are the minimal essential patterns that should almost always be excluded.
|
|
*/
|
|
getCoreIgnorePatterns(): string[] {
|
|
return [...COMMON_IGNORE_PATTERNS];
|
|
}
|
|
|
|
/**
|
|
* Gets comprehensive default exclusion patterns for operations like read-many-files.
|
|
* Includes all standard exclusions: directories, binary files, system files, etc.
|
|
*/
|
|
getDefaultExcludePatterns(options: ExcludeOptions = {}): string[] {
|
|
const {
|
|
includeDefaults = true,
|
|
customPatterns = [],
|
|
runtimePatterns = [],
|
|
includeDynamicPatterns = true,
|
|
} = options;
|
|
|
|
const patterns: string[] = [];
|
|
|
|
// Add base defaults if requested
|
|
if (includeDefaults) {
|
|
patterns.push(...DEFAULT_FILE_EXCLUDES);
|
|
}
|
|
|
|
// Add dynamic patterns (like current Gemini MD filename)
|
|
if (includeDynamicPatterns) {
|
|
patterns.push(`**/${getCurrentGeminiMdFilename()}`);
|
|
}
|
|
|
|
// Add custom patterns from configuration
|
|
// TODO: getCustomExcludes method needs to be implemented in Config interface
|
|
if (this.config) {
|
|
const configCustomExcludes = this.config.getCustomExcludes?.() ?? [];
|
|
patterns.push(...configCustomExcludes);
|
|
}
|
|
|
|
// Add user-provided custom patterns
|
|
patterns.push(...customPatterns);
|
|
|
|
// Add runtime patterns (e.g., from CLI)
|
|
patterns.push(...runtimePatterns);
|
|
|
|
return patterns;
|
|
}
|
|
|
|
/**
|
|
* Gets exclude patterns for read-many-files tool with legacy compatibility.
|
|
* This maintains the same behavior as the previous getDefaultExcludes() function.
|
|
*/
|
|
getReadManyFilesExcludes(additionalExcludes: string[] = []): string[] {
|
|
return this.getDefaultExcludePatterns({
|
|
includeDefaults: true,
|
|
runtimePatterns: additionalExcludes,
|
|
includeDynamicPatterns: true,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Gets exclude patterns for glob tool operations.
|
|
* Uses core patterns by default but can be extended with additional patterns.
|
|
*/
|
|
getGlobExcludes(additionalExcludes: string[] = []): string[] {
|
|
const corePatterns = this.getCoreIgnorePatterns();
|
|
|
|
// Add any custom patterns from config if available
|
|
// TODO: getCustomExcludes method needs to be implemented in Config interface
|
|
const configPatterns = this.config?.getCustomExcludes?.() ?? [];
|
|
|
|
return [...corePatterns, ...configPatterns, ...additionalExcludes];
|
|
}
|
|
|
|
/**
|
|
* Builds exclude patterns with full customization options.
|
|
* This is the most flexible method for advanced use cases.
|
|
*/
|
|
buildExcludePatterns(options: ExcludeOptions): string[] {
|
|
return this.getDefaultExcludePatterns(options);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extracts file extensions from glob patterns.
|
|
* Converts patterns like glob/*.exe to .exe
|
|
* Handles brace expansion like glob/*.{js,ts} to .js and .ts
|
|
*/
|
|
export function extractExtensionsFromPatterns(patterns: string[]): string[] {
|
|
const extensions = new Set(
|
|
patterns
|
|
.filter((pattern) => pattern.includes('*.'))
|
|
.flatMap((pattern) => {
|
|
const extPart = pattern.substring(pattern.lastIndexOf('*.') + 1);
|
|
// Handle brace expansion e.g. `**/*.{jpg,png}`
|
|
if (extPart.startsWith('.{') && extPart.endsWith('}')) {
|
|
const inner = extPart.slice(2, -1); // get 'jpg,png'
|
|
return inner
|
|
.split(',')
|
|
.map((ext) => `.${ext.trim()}`)
|
|
.filter((ext) => ext !== '.');
|
|
}
|
|
// Handle simple/compound/dotfile extensions
|
|
if (
|
|
extPart.startsWith('.') &&
|
|
!extPart.includes('/') &&
|
|
!extPart.includes('{') &&
|
|
!extPart.includes('}')
|
|
) {
|
|
// Using path.extname on a dummy file handles various cases like
|
|
// '.tar.gz' -> '.gz' and '.profile' -> '.profile' correctly.
|
|
const extracted = path.extname(`dummy${extPart}`);
|
|
// If extname returns empty (e.g. for '.'), use the original part.
|
|
// Then filter out empty or '.' results and invalid double dot patterns.
|
|
const result = extracted || extPart;
|
|
return result && result !== '.' && !result.substring(1).includes('.')
|
|
? [result]
|
|
: [];
|
|
}
|
|
return [];
|
|
}),
|
|
);
|
|
return Array.from(extensions).sort();
|
|
}
|
|
|
|
/**
|
|
* Binary file extensions extracted from BINARY_FILE_PATTERNS for quick lookup.
|
|
* Additional extensions not covered by the patterns are included for completeness.
|
|
*/
|
|
export const BINARY_EXTENSIONS: string[] = [
|
|
...extractExtensionsFromPatterns([
|
|
...BINARY_FILE_PATTERNS,
|
|
...MEDIA_FILE_PATTERNS,
|
|
...PYTHON_EXCLUDES,
|
|
]),
|
|
// Additional binary extensions not in the main patterns
|
|
'.dat',
|
|
'.obj',
|
|
'.o',
|
|
'.a',
|
|
'.lib',
|
|
'.wasm',
|
|
].sort();
|