mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 16:57:46 +00:00
feat(core, cli): Introduce high-performance FileSearch engine (#5136)
Co-authored-by: Jacob Richman <jacob314@gmail.com>
This commit is contained in:
@@ -34,6 +34,7 @@
|
||||
"chardet": "^2.1.0",
|
||||
"diff": "^7.0.0",
|
||||
"dotenv": "^17.1.0",
|
||||
"fdir": "^6.4.6",
|
||||
"glob": "^10.4.5",
|
||||
"google-auth-library": "^9.11.0",
|
||||
"html-to-text": "^9.0.5",
|
||||
@@ -42,6 +43,7 @@
|
||||
"marked": "^15.0.12",
|
||||
"micromatch": "^4.0.8",
|
||||
"open": "^10.1.2",
|
||||
"picomatch": "^4.0.1",
|
||||
"shell-quote": "^1.8.3",
|
||||
"simple-git": "^3.28.0",
|
||||
"strip-ansi": "^7.1.0",
|
||||
@@ -49,10 +51,12 @@
|
||||
"ws": "^8.18.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@google/gemini-cli-test-utils": "file:../test-utils",
|
||||
"@types/diff": "^7.0.2",
|
||||
"@types/dotenv": "^6.1.1",
|
||||
"@types/micromatch": "^4.0.8",
|
||||
"@types/minimatch": "^5.1.2",
|
||||
"@types/picomatch": "^4.0.1",
|
||||
"@types/ws": "^8.5.10",
|
||||
"typescript": "^5.3.3",
|
||||
"vitest": "^3.1.1"
|
||||
|
||||
@@ -40,6 +40,7 @@ export * from './utils/shell-utils.js';
|
||||
export * from './utils/systemEncoding.js';
|
||||
export * from './utils/textUtils.js';
|
||||
export * from './utils/formatters.js';
|
||||
export * from './utils/filesearch/fileSearch.js';
|
||||
|
||||
// Export services
|
||||
export * from './services/fileDiscoveryService.js';
|
||||
|
||||
112
packages/core/src/utils/filesearch/crawlCache.test.ts
Normal file
112
packages/core/src/utils/filesearch/crawlCache.test.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest';
|
||||
import { getCacheKey, read, write, clear } from './crawlCache.js';
|
||||
|
||||
describe('CrawlCache', () => {
|
||||
describe('getCacheKey', () => {
|
||||
it('should generate a consistent hash', () => {
|
||||
const key1 = getCacheKey('/foo', 'bar');
|
||||
const key2 = getCacheKey('/foo', 'bar');
|
||||
expect(key1).toBe(key2);
|
||||
});
|
||||
|
||||
it('should generate a different hash for different directories', () => {
|
||||
const key1 = getCacheKey('/foo', 'bar');
|
||||
const key2 = getCacheKey('/bar', 'bar');
|
||||
expect(key1).not.toBe(key2);
|
||||
});
|
||||
|
||||
it('should generate a different hash for different ignore content', () => {
|
||||
const key1 = getCacheKey('/foo', 'bar');
|
||||
const key2 = getCacheKey('/foo', 'baz');
|
||||
expect(key1).not.toBe(key2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('in-memory cache operations', () => {
|
||||
beforeEach(() => {
|
||||
// Ensure a clean slate before each test
|
||||
clear();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
// Restore real timers after each test that uses fake ones
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('should write and read data from the cache', () => {
|
||||
const key = 'test-key';
|
||||
const data = ['foo', 'bar'];
|
||||
write(key, data, 10000); // 10 second TTL
|
||||
const cachedData = read(key);
|
||||
expect(cachedData).toEqual(data);
|
||||
});
|
||||
|
||||
it('should return undefined for a nonexistent key', () => {
|
||||
const cachedData = read('nonexistent-key');
|
||||
expect(cachedData).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should clear the cache', () => {
|
||||
const key = 'test-key';
|
||||
const data = ['foo', 'bar'];
|
||||
write(key, data, 10000);
|
||||
clear();
|
||||
const cachedData = read(key);
|
||||
expect(cachedData).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should automatically evict a cache entry after its TTL expires', async () => {
|
||||
vi.useFakeTimers();
|
||||
const key = 'ttl-key';
|
||||
const data = ['foo'];
|
||||
const ttl = 5000; // 5 seconds
|
||||
|
||||
write(key, data, ttl);
|
||||
|
||||
// Should exist immediately after writing
|
||||
expect(read(key)).toEqual(data);
|
||||
|
||||
// Advance time just before expiration
|
||||
await vi.advanceTimersByTimeAsync(ttl - 1);
|
||||
expect(read(key)).toEqual(data);
|
||||
|
||||
// Advance time past expiration
|
||||
await vi.advanceTimersByTimeAsync(1);
|
||||
expect(read(key)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should reset the timer when an entry is updated', async () => {
|
||||
vi.useFakeTimers();
|
||||
const key = 'update-key';
|
||||
const initialData = ['initial'];
|
||||
const updatedData = ['updated'];
|
||||
const ttl = 5000; // 5 seconds
|
||||
|
||||
// Write initial data
|
||||
write(key, initialData, ttl);
|
||||
|
||||
// Advance time, but not enough to expire
|
||||
await vi.advanceTimersByTimeAsync(3000);
|
||||
expect(read(key)).toEqual(initialData);
|
||||
|
||||
// Update the data, which should reset the timer
|
||||
write(key, updatedData, ttl);
|
||||
expect(read(key)).toEqual(updatedData);
|
||||
|
||||
// Advance time again. If the timer wasn't reset, the total elapsed
|
||||
// time (3000 + 3000 = 6000) would cause an eviction.
|
||||
await vi.advanceTimersByTimeAsync(3000);
|
||||
expect(read(key)).toEqual(updatedData);
|
||||
|
||||
// Advance past the new expiration time
|
||||
await vi.advanceTimersByTimeAsync(2001);
|
||||
expect(read(key)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
});
|
||||
65
packages/core/src/utils/filesearch/crawlCache.ts
Normal file
65
packages/core/src/utils/filesearch/crawlCache.ts
Normal file
@@ -0,0 +1,65 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import crypto from 'node:crypto';
|
||||
|
||||
const crawlCache = new Map<string, string[]>();
|
||||
const cacheTimers = new Map<string, NodeJS.Timeout>();
|
||||
|
||||
/**
|
||||
* Generates a unique cache key based on the project directory and the content
|
||||
* of ignore files. This ensures that the cache is invalidated if the project
|
||||
* or ignore rules change.
|
||||
*/
|
||||
export const getCacheKey = (
|
||||
directory: string,
|
||||
ignoreContent: string,
|
||||
): string => {
|
||||
const hash = crypto.createHash('sha256');
|
||||
hash.update(directory);
|
||||
hash.update(ignoreContent);
|
||||
return hash.digest('hex');
|
||||
};
|
||||
|
||||
/**
|
||||
* Reads cached data from the in-memory cache.
|
||||
* Returns undefined if the key is not found.
|
||||
*/
|
||||
export const read = (key: string): string[] | undefined => crawlCache.get(key);
|
||||
|
||||
/**
|
||||
* Writes data to the in-memory cache and sets a timer to evict it after the TTL.
|
||||
*/
|
||||
export const write = (key: string, results: string[], ttlMs: number): void => {
|
||||
// Clear any existing timer for this key to prevent premature deletion
|
||||
if (cacheTimers.has(key)) {
|
||||
clearTimeout(cacheTimers.get(key)!);
|
||||
}
|
||||
|
||||
// Store the new data
|
||||
crawlCache.set(key, results);
|
||||
|
||||
// Set a timer to automatically delete the cache entry after the TTL
|
||||
const timerId = setTimeout(() => {
|
||||
crawlCache.delete(key);
|
||||
cacheTimers.delete(key);
|
||||
}, ttlMs);
|
||||
|
||||
// Store the timer handle so we can clear it if the entry is updated
|
||||
cacheTimers.set(key, timerId);
|
||||
};
|
||||
|
||||
/**
|
||||
* Clears the entire cache and all active timers.
|
||||
* Primarily used for testing.
|
||||
*/
|
||||
export const clear = (): void => {
|
||||
for (const timerId of cacheTimers.values()) {
|
||||
clearTimeout(timerId);
|
||||
}
|
||||
crawlCache.clear();
|
||||
cacheTimers.clear();
|
||||
};
|
||||
642
packages/core/src/utils/filesearch/fileSearch.test.ts
Normal file
642
packages/core/src/utils/filesearch/fileSearch.test.ts
Normal file
@@ -0,0 +1,642 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import * as fs from 'fs/promises';
|
||||
import * as path from 'path';
|
||||
import * as cache from './crawlCache.js';
|
||||
import { FileSearch, AbortError, filter } from './fileSearch.js';
|
||||
import { createTmpDir, cleanupTmpDir } from '@google/gemini-cli-test-utils';
|
||||
|
||||
type FileSearchWithPrivateMethods = FileSearch & {
|
||||
performCrawl: () => Promise<void>;
|
||||
};
|
||||
|
||||
describe('FileSearch', () => {
|
||||
let tmpDir: string;
|
||||
afterEach(async () => {
|
||||
if (tmpDir) {
|
||||
await cleanupTmpDir(tmpDir);
|
||||
}
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('should use .geminiignore rules', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'.geminiignore': 'dist/',
|
||||
dist: ['ignored.js'],
|
||||
src: ['not-ignored.js'],
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: true,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual(['src/', '.geminiignore', 'src/not-ignored.js']);
|
||||
});
|
||||
|
||||
it('should combine .gitignore and .geminiignore rules', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'.gitignore': 'dist/',
|
||||
'.geminiignore': 'build/',
|
||||
dist: ['ignored-by-git.js'],
|
||||
build: ['ignored-by-gemini.js'],
|
||||
src: ['not-ignored.js'],
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: true,
|
||||
useGeminiignore: true,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual([
|
||||
'src/',
|
||||
'.geminiignore',
|
||||
'.gitignore',
|
||||
'src/not-ignored.js',
|
||||
]);
|
||||
});
|
||||
|
||||
it('should use ignoreDirs option', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
logs: ['some.log'],
|
||||
src: ['main.js'],
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: ['logs'],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual(['src/', 'src/main.js']);
|
||||
});
|
||||
|
||||
it('should handle negated directories', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'.gitignore': ['build/**', '!build/public', '!build/public/**'].join(
|
||||
'\n',
|
||||
),
|
||||
build: {
|
||||
'private.js': '',
|
||||
public: ['index.html'],
|
||||
},
|
||||
src: ['main.js'],
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: true,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual([
|
||||
'build/',
|
||||
'build/public/',
|
||||
'src/',
|
||||
'.gitignore',
|
||||
'build/public/index.html',
|
||||
'src/main.js',
|
||||
]);
|
||||
});
|
||||
|
||||
it('should filter results with a search pattern', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
src: {
|
||||
'main.js': '',
|
||||
'util.ts': '',
|
||||
'style.css': '',
|
||||
},
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('**/*.js');
|
||||
|
||||
expect(results).toEqual(['src/main.js']);
|
||||
});
|
||||
|
||||
it('should handle root-level file negation', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'.gitignore': ['*.mk', '!Foo.mk'].join('\n'),
|
||||
'bar.mk': '',
|
||||
'Foo.mk': '',
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: true,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual(['.gitignore', 'Foo.mk']);
|
||||
});
|
||||
|
||||
it('should handle directory negation with glob', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'.gitignore': [
|
||||
'third_party/**',
|
||||
'!third_party/foo',
|
||||
'!third_party/foo/bar',
|
||||
'!third_party/foo/bar/baz_buffer',
|
||||
].join('\n'),
|
||||
third_party: {
|
||||
foo: {
|
||||
bar: {
|
||||
baz_buffer: '',
|
||||
},
|
||||
},
|
||||
ignore_this: '',
|
||||
},
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: true,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual([
|
||||
'third_party/',
|
||||
'third_party/foo/',
|
||||
'third_party/foo/bar/',
|
||||
'.gitignore',
|
||||
'third_party/foo/bar/baz_buffer',
|
||||
]);
|
||||
});
|
||||
|
||||
it('should correctly handle negated patterns in .gitignore', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'.gitignore': ['dist/**', '!dist/keep.js'].join('\n'),
|
||||
dist: ['ignore.js', 'keep.js'],
|
||||
src: ['main.js'],
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: true,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual([
|
||||
'dist/',
|
||||
'src/',
|
||||
'.gitignore',
|
||||
'dist/keep.js',
|
||||
'src/main.js',
|
||||
]);
|
||||
});
|
||||
|
||||
// New test cases start here
|
||||
|
||||
it('should initialize correctly when ignore files are missing', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
src: ['file1.js'],
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: true,
|
||||
useGeminiignore: true,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
// Expect no errors to be thrown during initialization
|
||||
await expect(fileSearch.initialize()).resolves.toBeUndefined();
|
||||
const results = await fileSearch.search('');
|
||||
expect(results).toEqual(['src/', 'src/file1.js']);
|
||||
});
|
||||
|
||||
it('should respect maxResults option in search', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
src: {
|
||||
'file1.js': '',
|
||||
'file2.js': '',
|
||||
'file3.js': '',
|
||||
'file4.js': '',
|
||||
},
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('**/*.js', { maxResults: 2 });
|
||||
|
||||
expect(results).toEqual(['src/file1.js', 'src/file2.js']); // Assuming alphabetical sort
|
||||
});
|
||||
|
||||
it('should return empty array when no matches are found', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
src: ['file1.js'],
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('nonexistent-file.xyz');
|
||||
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
it('should throw AbortError when filter is aborted', async () => {
|
||||
const controller = new AbortController();
|
||||
const dummyPaths = Array.from({ length: 5000 }, (_, i) => `file${i}.js`); // Large array to ensure yielding
|
||||
|
||||
const filterPromise = filter(dummyPaths, '*.js', controller.signal);
|
||||
|
||||
// Abort after a short delay to ensure filter has started
|
||||
setTimeout(() => controller.abort(), 1);
|
||||
|
||||
await expect(filterPromise).rejects.toThrow(AbortError);
|
||||
});
|
||||
|
||||
describe('with in-memory cache', () => {
|
||||
beforeEach(() => {
|
||||
cache.clear();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('should throw an error if search is called before initialization', async () => {
|
||||
tmpDir = await createTmpDir({});
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await expect(fileSearch.search('')).rejects.toThrow(
|
||||
'Engine not initialized. Call initialize() first.',
|
||||
);
|
||||
});
|
||||
|
||||
it('should hit the cache for subsequent searches', async () => {
|
||||
tmpDir = await createTmpDir({ 'file1.js': '' });
|
||||
const getOptions = () => ({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: true,
|
||||
cacheTtl: 10,
|
||||
});
|
||||
|
||||
const fs1 = new FileSearch(getOptions());
|
||||
const crawlSpy1 = vi.spyOn(
|
||||
fs1 as FileSearchWithPrivateMethods,
|
||||
'performCrawl',
|
||||
);
|
||||
await fs1.initialize();
|
||||
expect(crawlSpy1).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Second search should hit the cache because the options are identical
|
||||
const fs2 = new FileSearch(getOptions());
|
||||
const crawlSpy2 = vi.spyOn(
|
||||
fs2 as FileSearchWithPrivateMethods,
|
||||
'performCrawl',
|
||||
);
|
||||
await fs2.initialize();
|
||||
expect(crawlSpy2).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should miss the cache when ignore rules change', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'.gitignore': 'a.txt',
|
||||
'a.txt': '',
|
||||
'b.txt': '',
|
||||
});
|
||||
const options = {
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: true,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: true,
|
||||
cacheTtl: 10000,
|
||||
};
|
||||
|
||||
// Initial search to populate the cache
|
||||
const fs1 = new FileSearch(options);
|
||||
const crawlSpy1 = vi.spyOn(
|
||||
fs1 as FileSearchWithPrivateMethods,
|
||||
'performCrawl',
|
||||
);
|
||||
await fs1.initialize();
|
||||
const results1 = await fs1.search('');
|
||||
expect(crawlSpy1).toHaveBeenCalledTimes(1);
|
||||
expect(results1).toEqual(['.gitignore', 'b.txt']);
|
||||
|
||||
// Modify the ignore file
|
||||
await fs.writeFile(path.join(tmpDir, '.gitignore'), 'b.txt');
|
||||
|
||||
// Second search should miss the cache and trigger a recrawl
|
||||
const fs2 = new FileSearch(options);
|
||||
const crawlSpy2 = vi.spyOn(
|
||||
fs2 as FileSearchWithPrivateMethods,
|
||||
'performCrawl',
|
||||
);
|
||||
await fs2.initialize();
|
||||
const results2 = await fs2.search('');
|
||||
expect(crawlSpy2).toHaveBeenCalledTimes(1);
|
||||
expect(results2).toEqual(['.gitignore', 'a.txt']);
|
||||
});
|
||||
|
||||
it('should miss the cache after TTL expires', async () => {
|
||||
vi.useFakeTimers();
|
||||
tmpDir = await createTmpDir({ 'file1.js': '' });
|
||||
const options = {
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: true,
|
||||
cacheTtl: 10, // 10 seconds
|
||||
};
|
||||
|
||||
// Initial search to populate the cache
|
||||
const fs1 = new FileSearch(options);
|
||||
await fs1.initialize();
|
||||
|
||||
// Advance time past the TTL
|
||||
await vi.advanceTimersByTimeAsync(11000);
|
||||
|
||||
// Second search should miss the cache and trigger a recrawl
|
||||
const fs2 = new FileSearch(options);
|
||||
const crawlSpy = vi.spyOn(
|
||||
fs2 as FileSearchWithPrivateMethods,
|
||||
'performCrawl',
|
||||
);
|
||||
await fs2.initialize();
|
||||
|
||||
expect(crawlSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle empty or commented-only ignore files', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'.gitignore': '# This is a comment\n\n \n',
|
||||
src: ['main.js'],
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: true,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual(['src/', '.gitignore', 'src/main.js']);
|
||||
});
|
||||
|
||||
it('should always ignore the .git directory', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'.git': ['config', 'HEAD'],
|
||||
src: ['main.js'],
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false, // Explicitly disable .gitignore to isolate this rule
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
const results = await fileSearch.search('');
|
||||
|
||||
expect(results).toEqual(['src/', 'src/main.js']);
|
||||
});
|
||||
|
||||
it('should be cancellable via AbortSignal', async () => {
|
||||
const largeDir: Record<string, string> = {};
|
||||
for (let i = 0; i < 100; i++) {
|
||||
largeDir[`file${i}.js`] = '';
|
||||
}
|
||||
tmpDir = await createTmpDir(largeDir);
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
|
||||
const controller = new AbortController();
|
||||
const searchPromise = fileSearch.search('**/*.js', {
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
// Yield to allow the search to start before aborting.
|
||||
await new Promise((resolve) => setImmediate(resolve));
|
||||
|
||||
controller.abort();
|
||||
|
||||
await expect(searchPromise).rejects.toThrow(AbortError);
|
||||
});
|
||||
|
||||
it('should leverage ResultCache for bestBaseQuery optimization', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
src: {
|
||||
'foo.js': '',
|
||||
'bar.ts': '',
|
||||
nested: {
|
||||
'baz.js': '',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: true, // Enable caching for this test
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
|
||||
// Perform a broad search to prime the cache
|
||||
const broadResults = await fileSearch.search('src/**');
|
||||
expect(broadResults).toEqual([
|
||||
'src/',
|
||||
'src/nested/',
|
||||
'src/bar.ts',
|
||||
'src/foo.js',
|
||||
'src/nested/baz.js',
|
||||
]);
|
||||
|
||||
// Perform a more specific search that should leverage the broad search's cached results
|
||||
const specificResults = await fileSearch.search('src/**/*.js');
|
||||
expect(specificResults).toEqual(['src/foo.js', 'src/nested/baz.js']);
|
||||
|
||||
// Although we can't directly inspect ResultCache.hits/misses from here,
|
||||
// the correctness of specificResults after a broad search implicitly
|
||||
// verifies that the caching mechanism, including bestBaseQuery, is working.
|
||||
});
|
||||
|
||||
it('should be case-insensitive by default', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'File1.Js': '',
|
||||
'file2.js': '',
|
||||
'FILE3.JS': '',
|
||||
'other.txt': '',
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: false,
|
||||
cacheTtl: 0,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
|
||||
// Search with a lowercase pattern
|
||||
let results = await fileSearch.search('file*.js');
|
||||
expect(results).toHaveLength(3);
|
||||
expect(results).toEqual(
|
||||
expect.arrayContaining(['File1.Js', 'file2.js', 'FILE3.JS']),
|
||||
);
|
||||
|
||||
// Search with an uppercase pattern
|
||||
results = await fileSearch.search('FILE*.JS');
|
||||
expect(results).toHaveLength(3);
|
||||
expect(results).toEqual(
|
||||
expect.arrayContaining(['File1.Js', 'file2.js', 'FILE3.JS']),
|
||||
);
|
||||
|
||||
// Search with a mixed-case pattern
|
||||
results = await fileSearch.search('FiLe*.Js');
|
||||
expect(results).toHaveLength(3);
|
||||
expect(results).toEqual(
|
||||
expect.arrayContaining(['File1.Js', 'file2.js', 'FILE3.JS']),
|
||||
);
|
||||
});
|
||||
|
||||
it('should respect maxResults even when the cache returns an exact match', async () => {
|
||||
tmpDir = await createTmpDir({
|
||||
'file1.js': '',
|
||||
'file2.js': '',
|
||||
'file3.js': '',
|
||||
'file4.js': '',
|
||||
'file5.js': '',
|
||||
});
|
||||
|
||||
const fileSearch = new FileSearch({
|
||||
projectRoot: tmpDir,
|
||||
useGitignore: false,
|
||||
useGeminiignore: false,
|
||||
ignoreDirs: [],
|
||||
cache: true, // Ensure caching is enabled
|
||||
cacheTtl: 10000,
|
||||
});
|
||||
|
||||
await fileSearch.initialize();
|
||||
|
||||
// 1. Perform a broad search to populate the cache with an exact match.
|
||||
const initialResults = await fileSearch.search('*.js');
|
||||
expect(initialResults).toEqual([
|
||||
'file1.js',
|
||||
'file2.js',
|
||||
'file3.js',
|
||||
'file4.js',
|
||||
'file5.js',
|
||||
]);
|
||||
|
||||
// 2. Perform the same search again, but this time with a maxResults limit.
|
||||
const limitedResults = await fileSearch.search('*.js', { maxResults: 2 });
|
||||
|
||||
// 3. Assert that the maxResults limit was respected, even with a cache hit.
|
||||
expect(limitedResults).toEqual(['file1.js', 'file2.js']);
|
||||
});
|
||||
});
|
||||
269
packages/core/src/utils/filesearch/fileSearch.ts
Normal file
269
packages/core/src/utils/filesearch/fileSearch.ts
Normal file
@@ -0,0 +1,269 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import path from 'node:path';
|
||||
import fs from 'node:fs';
|
||||
import { fdir } from 'fdir';
|
||||
import picomatch from 'picomatch';
|
||||
import { Ignore } from './ignore.js';
|
||||
import { ResultCache } from './result-cache.js';
|
||||
import * as cache from './crawlCache.js';
|
||||
|
||||
export type FileSearchOptions = {
|
||||
projectRoot: string;
|
||||
ignoreDirs: string[];
|
||||
useGitignore: boolean;
|
||||
useGeminiignore: boolean;
|
||||
cache: boolean;
|
||||
cacheTtl: number;
|
||||
};
|
||||
|
||||
export class AbortError extends Error {
|
||||
constructor(message = 'Search aborted') {
|
||||
super(message);
|
||||
this.name = 'AbortError';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters a list of paths based on a given pattern.
|
||||
* @param allPaths The list of all paths to filter.
|
||||
* @param pattern The picomatch pattern to filter by.
|
||||
* @param signal An AbortSignal to cancel the operation.
|
||||
* @returns A promise that resolves to the filtered and sorted list of paths.
|
||||
*/
|
||||
export async function filter(
|
||||
allPaths: string[],
|
||||
pattern: string,
|
||||
signal: AbortSignal | undefined,
|
||||
): Promise<string[]> {
|
||||
const patternFilter = picomatch(pattern, {
|
||||
dot: true,
|
||||
contains: true,
|
||||
nocase: true,
|
||||
});
|
||||
|
||||
const results: string[] = [];
|
||||
for (const [i, p] of allPaths.entries()) {
|
||||
// Yield control to the event loop periodically to prevent blocking.
|
||||
if (i % 1000 === 0) {
|
||||
await new Promise((resolve) => setImmediate(resolve));
|
||||
if (signal?.aborted) {
|
||||
throw new AbortError();
|
||||
}
|
||||
}
|
||||
|
||||
if (patternFilter(p)) {
|
||||
results.push(p);
|
||||
}
|
||||
}
|
||||
|
||||
results.sort((a, b) => {
|
||||
const aIsDir = a.endsWith('/');
|
||||
const bIsDir = b.endsWith('/');
|
||||
|
||||
if (aIsDir && !bIsDir) return -1;
|
||||
if (!aIsDir && bIsDir) return 1;
|
||||
|
||||
// This is 40% faster than localeCompare and the only thing we would really
|
||||
// gain from localeCompare is case-sensitive sort
|
||||
return a < b ? -1 : a > b ? 1 : 0;
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
export type SearchOptions = {
|
||||
signal?: AbortSignal;
|
||||
maxResults?: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Provides a fast and efficient way to search for files within a project,
|
||||
* respecting .gitignore and .geminiignore rules, and utilizing caching
|
||||
* for improved performance.
|
||||
*/
|
||||
export class FileSearch {
|
||||
private readonly absoluteDir: string;
|
||||
private readonly ignore: Ignore = new Ignore();
|
||||
private resultCache: ResultCache | undefined;
|
||||
private allFiles: string[] = [];
|
||||
|
||||
/**
|
||||
* Constructs a new `FileSearch` instance.
|
||||
* @param options Configuration options for the file search.
|
||||
*/
|
||||
constructor(private readonly options: FileSearchOptions) {
|
||||
this.absoluteDir = path.resolve(options.projectRoot);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the file search engine by loading ignore rules, crawling the
|
||||
* file system, and building the in-memory cache. This method must be called
|
||||
* before performing any searches.
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
this.loadIgnoreRules();
|
||||
await this.crawlFiles();
|
||||
this.buildResultCache();
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches for files matching a given pattern.
|
||||
* @param pattern The picomatch pattern to search for (e.g., '*.js', 'src/**').
|
||||
* @param options Search options, including an AbortSignal and maxResults.
|
||||
* @returns A promise that resolves to a list of matching file paths, relative
|
||||
* to the project root.
|
||||
*/
|
||||
async search(
|
||||
pattern: string,
|
||||
options: SearchOptions = {},
|
||||
): Promise<string[]> {
|
||||
if (!this.resultCache) {
|
||||
throw new Error('Engine not initialized. Call initialize() first.');
|
||||
}
|
||||
|
||||
pattern = pattern || '*';
|
||||
|
||||
const { files: candidates, isExactMatch } =
|
||||
await this.resultCache!.get(pattern);
|
||||
|
||||
let filteredCandidates;
|
||||
if (isExactMatch) {
|
||||
filteredCandidates = candidates;
|
||||
} else {
|
||||
// Apply the user's picomatch pattern filter
|
||||
filteredCandidates = await filter(candidates, pattern, options.signal);
|
||||
this.resultCache!.set(pattern, filteredCandidates);
|
||||
}
|
||||
|
||||
// Trade-off: We apply a two-stage filtering process.
|
||||
// 1. During the file system crawl (`performCrawl`), we only apply directory-level
|
||||
// ignore rules (e.g., `node_modules/`, `dist/`). This is because applying
|
||||
// a full ignore filter (which includes file-specific patterns like `*.log`)
|
||||
// during the crawl can significantly slow down `fdir`.
|
||||
// 2. Here, in the `search` method, we apply the full ignore filter
|
||||
// (including file patterns) to the `filteredCandidates` (which have already
|
||||
// been filtered by the user's search pattern and sorted). For autocomplete,
|
||||
// the number of displayed results is small (MAX_SUGGESTIONS_TO_SHOW),
|
||||
// so applying the full filter to this truncated list is much more efficient
|
||||
// than applying it to every file during the initial crawl.
|
||||
const fileFilter = this.ignore.getFileFilter();
|
||||
const results: string[] = [];
|
||||
for (const [i, candidate] of filteredCandidates.entries()) {
|
||||
// Yield to the event loop to avoid blocking on large result sets.
|
||||
if (i % 1000 === 0) {
|
||||
await new Promise((resolve) => setImmediate(resolve));
|
||||
if (options.signal?.aborted) {
|
||||
throw new AbortError();
|
||||
}
|
||||
}
|
||||
|
||||
if (results.length >= (options.maxResults ?? Infinity)) {
|
||||
break;
|
||||
}
|
||||
// The `ignore` library throws an error if the path is '.', so we skip it.
|
||||
if (candidate === '.') {
|
||||
continue;
|
||||
}
|
||||
if (!fileFilter(candidate)) {
|
||||
results.push(candidate);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads ignore rules from .gitignore and .geminiignore files, and applies
|
||||
* any additional ignore directories specified in the options.
|
||||
*/
|
||||
private loadIgnoreRules(): void {
|
||||
if (this.options.useGitignore) {
|
||||
const gitignorePath = path.join(this.absoluteDir, '.gitignore');
|
||||
if (fs.existsSync(gitignorePath)) {
|
||||
this.ignore.add(fs.readFileSync(gitignorePath, 'utf8'));
|
||||
}
|
||||
}
|
||||
|
||||
if (this.options.useGeminiignore) {
|
||||
const geminiignorePath = path.join(this.absoluteDir, '.geminiignore');
|
||||
if (fs.existsSync(geminiignorePath)) {
|
||||
this.ignore.add(fs.readFileSync(geminiignorePath, 'utf8'));
|
||||
}
|
||||
}
|
||||
|
||||
const ignoreDirs = ['.git', ...this.options.ignoreDirs];
|
||||
this.ignore.add(
|
||||
ignoreDirs.map((dir) => {
|
||||
if (dir.endsWith('/')) {
|
||||
return dir;
|
||||
}
|
||||
return `${dir}/`;
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Crawls the file system to get a list of all files and directories,
|
||||
* optionally using a cache for faster initialization.
|
||||
*/
|
||||
private async crawlFiles(): Promise<void> {
|
||||
if (this.options.cache) {
|
||||
const cacheKey = cache.getCacheKey(
|
||||
this.absoluteDir,
|
||||
this.ignore.getFingerprint(),
|
||||
);
|
||||
const cachedResults = cache.read(cacheKey);
|
||||
|
||||
if (cachedResults) {
|
||||
this.allFiles = cachedResults;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
this.allFiles = await this.performCrawl();
|
||||
|
||||
if (this.options.cache) {
|
||||
const cacheKey = cache.getCacheKey(
|
||||
this.absoluteDir,
|
||||
this.ignore.getFingerprint(),
|
||||
);
|
||||
cache.write(cacheKey, this.allFiles, this.options.cacheTtl * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs the actual file system crawl using `fdir`, applying directory
|
||||
* ignore rules.
|
||||
* @returns A promise that resolves to a list of all files and directories.
|
||||
*/
|
||||
private async performCrawl(): Promise<string[]> {
|
||||
const dirFilter = this.ignore.getDirectoryFilter();
|
||||
|
||||
// We use `fdir` for fast file system traversal. A key performance
|
||||
// optimization for large workspaces is to exclude entire directories
|
||||
// early in the traversal process. This is why we apply directory-specific
|
||||
// ignore rules (e.g., `node_modules/`, `dist/`) directly to `fdir`'s
|
||||
// exclude filter.
|
||||
const api = new fdir()
|
||||
.withRelativePaths()
|
||||
.withDirs()
|
||||
.withPathSeparator('/') // Always use unix style paths
|
||||
.exclude((_, dirPath) => {
|
||||
const relativePath = path.relative(this.absoluteDir, dirPath);
|
||||
return dirFilter(`${relativePath}/`);
|
||||
});
|
||||
|
||||
return api.crawl(this.absoluteDir).withPromise();
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the in-memory cache for fast pattern matching.
|
||||
*/
|
||||
private buildResultCache(): void {
|
||||
this.resultCache = new ResultCache(this.allFiles, this.absoluteDir);
|
||||
}
|
||||
}
|
||||
65
packages/core/src/utils/filesearch/ignore.test.ts
Normal file
65
packages/core/src/utils/filesearch/ignore.test.ts
Normal file
@@ -0,0 +1,65 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { Ignore } from './ignore.js';
|
||||
|
||||
describe('Ignore', () => {
|
||||
describe('getDirectoryFilter', () => {
|
||||
it('should ignore directories matching directory patterns', () => {
|
||||
const ig = new Ignore().add(['foo/', 'bar/']);
|
||||
const dirFilter = ig.getDirectoryFilter();
|
||||
expect(dirFilter('foo/')).toBe(true);
|
||||
expect(dirFilter('bar/')).toBe(true);
|
||||
expect(dirFilter('baz/')).toBe(false);
|
||||
});
|
||||
|
||||
it('should not ignore directories with file patterns', () => {
|
||||
const ig = new Ignore().add(['foo.js', '*.log']);
|
||||
const dirFilter = ig.getDirectoryFilter();
|
||||
expect(dirFilter('foo.js')).toBe(false);
|
||||
expect(dirFilter('foo.log')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getFileFilter', () => {
|
||||
it('should not ignore files with directory patterns', () => {
|
||||
const ig = new Ignore().add(['foo/', 'bar/']);
|
||||
const fileFilter = ig.getFileFilter();
|
||||
expect(fileFilter('foo')).toBe(false);
|
||||
expect(fileFilter('foo/file.txt')).toBe(false);
|
||||
});
|
||||
|
||||
it('should ignore files matching file patterns', () => {
|
||||
const ig = new Ignore().add(['*.log', 'foo.js']);
|
||||
const fileFilter = ig.getFileFilter();
|
||||
expect(fileFilter('foo.log')).toBe(true);
|
||||
expect(fileFilter('foo.js')).toBe(true);
|
||||
expect(fileFilter('bar.txt')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
it('should accumulate patterns across multiple add() calls', () => {
|
||||
const ig = new Ignore().add('foo.js');
|
||||
ig.add('bar.js');
|
||||
const fileFilter = ig.getFileFilter();
|
||||
expect(fileFilter('foo.js')).toBe(true);
|
||||
expect(fileFilter('bar.js')).toBe(true);
|
||||
expect(fileFilter('baz.js')).toBe(false);
|
||||
});
|
||||
|
||||
it('should return a stable and consistent fingerprint', () => {
|
||||
const ig1 = new Ignore().add(['foo', '!bar']);
|
||||
const ig2 = new Ignore().add('foo\n!bar');
|
||||
|
||||
// Fingerprints should be identical for the same rules.
|
||||
expect(ig1.getFingerprint()).toBe(ig2.getFingerprint());
|
||||
|
||||
// Adding a new rule should change the fingerprint.
|
||||
ig2.add('baz');
|
||||
expect(ig1.getFingerprint()).not.toBe(ig2.getFingerprint());
|
||||
});
|
||||
});
|
||||
93
packages/core/src/utils/filesearch/ignore.ts
Normal file
93
packages/core/src/utils/filesearch/ignore.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import ignore from 'ignore';
|
||||
import picomatch from 'picomatch';
|
||||
|
||||
const hasFileExtension = picomatch('**/*[*.]*');
|
||||
|
||||
export class Ignore {
|
||||
private readonly allPatterns: string[] = [];
|
||||
private dirIgnorer = ignore();
|
||||
private fileIgnorer = ignore();
|
||||
|
||||
/**
|
||||
* Adds one or more ignore patterns.
|
||||
* @param patterns A single pattern string or an array of pattern strings.
|
||||
* Each pattern can be a glob-like string similar to .gitignore rules.
|
||||
* @returns The `Ignore` instance for chaining.
|
||||
*/
|
||||
add(patterns: string | string[]): this {
|
||||
if (typeof patterns === 'string') {
|
||||
patterns = patterns.split(/\r?\n/);
|
||||
}
|
||||
|
||||
for (const p of patterns) {
|
||||
const pattern = p.trim();
|
||||
|
||||
if (pattern === '' || pattern.startsWith('#')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
this.allPatterns.push(pattern);
|
||||
|
||||
const isPositiveDirPattern =
|
||||
pattern.endsWith('/') && !pattern.startsWith('!');
|
||||
|
||||
if (isPositiveDirPattern) {
|
||||
this.dirIgnorer.add(pattern);
|
||||
} else {
|
||||
// An ambiguous pattern (e.g., "build") could match a file or a
|
||||
// directory. To optimize the file system crawl, we use a heuristic:
|
||||
// patterns without a dot in the last segment are included in the
|
||||
// directory exclusion check.
|
||||
//
|
||||
// This heuristic can fail. For example, an ignore pattern of "my.assets"
|
||||
// intended to exclude a directory will not be treated as a directory
|
||||
// pattern because it contains a ".". This results in crawling a
|
||||
// directory that should have been excluded, reducing efficiency.
|
||||
// Correctness is still maintained. The incorrectly crawled directory
|
||||
// will be filtered out by the final ignore check.
|
||||
//
|
||||
// For maximum crawl efficiency, users should explicitly mark directory
|
||||
// patterns with a trailing slash (e.g., "my.assets/").
|
||||
this.fileIgnorer.add(pattern);
|
||||
if (!hasFileExtension(pattern)) {
|
||||
this.dirIgnorer.add(pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a predicate that matches explicit directory ignore patterns (patterns ending with '/').
|
||||
* @returns {(dirPath: string) => boolean}
|
||||
*/
|
||||
getDirectoryFilter(): (dirPath: string) => boolean {
|
||||
return (dirPath: string) => this.dirIgnorer.ignores(dirPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a predicate that matches file ignore patterns (all patterns not ending with '/').
|
||||
* Note: This may also match directories if a file pattern matches a directory name, but all explicit directory patterns are handled by getDirectoryFilter.
|
||||
* @returns {(filePath: string) => boolean}
|
||||
*/
|
||||
getFileFilter(): (filePath: string) => boolean {
|
||||
return (filePath: string) => this.fileIgnorer.ignores(filePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string representing the current set of ignore patterns.
|
||||
* This can be used to generate a unique identifier for the ignore configuration,
|
||||
* useful for caching purposes.
|
||||
* @returns A string fingerprint of the ignore patterns.
|
||||
*/
|
||||
getFingerprint(): string {
|
||||
return this.allPatterns.join('\n');
|
||||
}
|
||||
}
|
||||
56
packages/core/src/utils/filesearch/result-cache.test.ts
Normal file
56
packages/core/src/utils/filesearch/result-cache.test.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import path from 'node:path';
|
||||
import { test, expect } from 'vitest';
|
||||
import { ResultCache } from './result-cache.js';
|
||||
|
||||
test('ResultCache basic usage', async () => {
|
||||
const files = [
|
||||
'foo.txt',
|
||||
'bar.js',
|
||||
'baz.md',
|
||||
'subdir/file.txt',
|
||||
'subdir/other.js',
|
||||
'subdir/nested/file.md',
|
||||
];
|
||||
const cache = new ResultCache(files, path.resolve('.'));
|
||||
const { files: resultFiles, isExactMatch } = await cache.get('*.js');
|
||||
expect(resultFiles).toEqual(files);
|
||||
expect(isExactMatch).toBe(false);
|
||||
});
|
||||
|
||||
test('ResultCache cache hit/miss', async () => {
|
||||
const files = ['foo.txt', 'bar.js', 'baz.md'];
|
||||
const cache = new ResultCache(files, path.resolve('.'));
|
||||
// First call: miss
|
||||
const { files: result1Files, isExactMatch: isExactMatch1 } =
|
||||
await cache.get('*.js');
|
||||
expect(result1Files).toEqual(files);
|
||||
expect(isExactMatch1).toBe(false);
|
||||
|
||||
// Simulate FileSearch applying the filter and setting the result
|
||||
cache.set('*.js', ['bar.js']);
|
||||
|
||||
// Second call: hit
|
||||
const { files: result2Files, isExactMatch: isExactMatch2 } =
|
||||
await cache.get('*.js');
|
||||
expect(result2Files).toEqual(['bar.js']);
|
||||
expect(isExactMatch2).toBe(true);
|
||||
});
|
||||
|
||||
test('ResultCache best base query', async () => {
|
||||
const files = ['foo.txt', 'foobar.js', 'baz.md'];
|
||||
const cache = new ResultCache(files, path.resolve('.'));
|
||||
|
||||
// Cache a broader query
|
||||
cache.set('foo', ['foo.txt', 'foobar.js']);
|
||||
|
||||
// Search for a more specific query that starts with the broader one
|
||||
const { files: resultFiles, isExactMatch } = await cache.get('foobar');
|
||||
expect(resultFiles).toEqual(['foo.txt', 'foobar.js']);
|
||||
expect(isExactMatch).toBe(false);
|
||||
});
|
||||
70
packages/core/src/utils/filesearch/result-cache.ts
Normal file
70
packages/core/src/utils/filesearch/result-cache.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* Implements an in-memory cache for file search results.
|
||||
* This cache optimizes subsequent searches by leveraging previously computed results.
|
||||
*/
|
||||
export class ResultCache {
|
||||
private readonly cache: Map<string, string[]>;
|
||||
private hits = 0;
|
||||
private misses = 0;
|
||||
|
||||
constructor(
|
||||
private readonly allFiles: string[],
|
||||
private readonly absoluteDir: string,
|
||||
) {
|
||||
this.cache = new Map();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves cached search results for a given query, or provides a base set
|
||||
* of files to search from.
|
||||
* @param query The search query pattern.
|
||||
* @returns An object containing the files to search and a boolean indicating
|
||||
* if the result is an exact cache hit.
|
||||
*/
|
||||
async get(
|
||||
query: string,
|
||||
): Promise<{ files: string[]; isExactMatch: boolean }> {
|
||||
const isCacheHit = this.cache.has(query);
|
||||
|
||||
if (isCacheHit) {
|
||||
this.hits++;
|
||||
return { files: this.cache.get(query)!, isExactMatch: true };
|
||||
}
|
||||
|
||||
this.misses++;
|
||||
|
||||
// This is the core optimization of the memory cache.
|
||||
// If a user first searches for "foo", and then for "foobar",
|
||||
// we don't need to search through all files again. We can start
|
||||
// from the results of the "foo" search.
|
||||
// This finds the most specific, already-cached query that is a prefix
|
||||
// of the current query.
|
||||
let bestBaseQuery = '';
|
||||
for (const key of this.cache?.keys?.() ?? []) {
|
||||
if (query.startsWith(key) && key.length > bestBaseQuery.length) {
|
||||
bestBaseQuery = key;
|
||||
}
|
||||
}
|
||||
|
||||
const filesToSearch = bestBaseQuery
|
||||
? this.cache.get(bestBaseQuery)!
|
||||
: this.allFiles;
|
||||
|
||||
return { files: filesToSearch, isExactMatch: false };
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores search results in the cache.
|
||||
* @param query The search query pattern.
|
||||
* @param results The matching file paths to cache.
|
||||
*/
|
||||
set(query: string, results: string[]): void {
|
||||
this.cache.set(query, results);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user