Merge branch 'main' into web-search

This commit is contained in:
pomelo-nwu
2025-11-03 17:34:03 +08:00
36 changed files with 1766 additions and 760 deletions

View File

@@ -23,6 +23,7 @@ import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.j
import type { ChildProcess } from 'node:child_process';
import { spawn } from 'node:child_process';
import { ensureRipgrepPath } from '../utils/ripgrepUtils.js';
import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js';
// Mock ripgrepUtils
vi.mock('../utils/ripgrepUtils.js', () => ({
@@ -42,11 +43,17 @@ function createMockSpawn(
outputData?: string;
exitCode?: number;
signal?: string;
onCall?: (
command: string,
args: readonly string[],
spawnOptions?: unknown,
) => void;
} = {},
) {
const { outputData, exitCode = 0, signal } = options;
const { outputData, exitCode = 0, signal, onCall } = options;
return () => {
return (command: string, args: readonly string[], spawnOptions?: unknown) => {
onCall?.(command, args, spawnOptions);
const mockProcess = {
stdout: {
on: vi.fn(),
@@ -87,19 +94,29 @@ function createMockSpawn(
describe('RipGrepTool', () => {
let tempRootDir: string;
let grepTool: RipGrepTool;
let fileExclusionsMock: { getGlobExcludes: () => string[] };
const abortSignal = new AbortController().signal;
const mockConfig = {
getTargetDir: () => tempRootDir,
getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir),
getWorkingDir: () => tempRootDir,
getDebugMode: () => false,
getUseBuiltinRipgrep: () => true,
} as unknown as Config;
beforeEach(async () => {
vi.clearAllMocks();
(ensureRipgrepPath as Mock).mockResolvedValue('/mock/path/to/rg');
mockSpawn.mockClear();
mockSpawn.mockReset();
tempRootDir = await fs.mkdtemp(path.join(os.tmpdir(), 'grep-tool-root-'));
fileExclusionsMock = {
getGlobExcludes: vi.fn().mockReturnValue([]),
};
Object.assign(mockConfig, {
getFileExclusions: () => fileExclusionsMock,
getFileFilteringOptions: () => DEFAULT_FILE_FILTERING_OPTIONS,
});
grepTool = new RipGrepTool(mockConfig);
// Create some test files and directories
@@ -137,11 +154,11 @@ describe('RipGrepTool', () => {
expect(grepTool.validateToolParams(params)).toBeNull();
});
it('should return null for valid params (pattern, path, and include)', () => {
it('should return null for valid params (pattern, path, and glob)', () => {
const params: RipGrepToolParams = {
pattern: 'hello',
path: '.',
include: '*.txt',
glob: '*.txt',
};
expect(grepTool.validateToolParams(params)).toBeNull();
});
@@ -153,9 +170,11 @@ describe('RipGrepTool', () => {
);
});
it('should return null for what would be an invalid regex pattern', () => {
it('should surface an error for invalid regex pattern', () => {
const params: RipGrepToolParams = { pattern: '[[' };
expect(grepTool.validateToolParams(params)).toBeNull();
expect(grepTool.validateToolParams(params)).toContain(
'Invalid regular expression pattern: [[',
);
});
it('should return error if path does not exist', () => {
@@ -194,13 +213,11 @@ describe('RipGrepTool', () => {
expect(result.llmContent).toContain(
'Found 3 matches for pattern "world" in the workspace directory',
);
expect(result.llmContent).toContain('File: fileA.txt');
expect(result.llmContent).toContain('L1: hello world');
expect(result.llmContent).toContain('L2: second line with world');
expect(result.llmContent).toContain('fileA.txt:1:hello world');
expect(result.llmContent).toContain('fileA.txt:2:second line with world');
expect(result.llmContent).toContain(
`File: ${path.join('sub', 'fileC.txt')}`,
'sub/fileC.txt:1:another world in sub dir',
);
expect(result.llmContent).toContain('L1: another world in sub dir');
expect(result.returnDisplay).toBe('Found 3 matches');
});
@@ -219,12 +236,33 @@ describe('RipGrepTool', () => {
expect(result.llmContent).toContain(
'Found 1 match for pattern "world" in path "sub"',
);
expect(result.llmContent).toContain('File: fileC.txt'); // Path relative to 'sub'
expect(result.llmContent).toContain('L1: another world in sub dir');
expect(result.llmContent).toContain(
'fileC.txt:1:another world in sub dir',
);
expect(result.returnDisplay).toBe('Found 1 match');
});
it('should find matches with an include glob', async () => {
it('should use target directory when path is not provided', async () => {
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData: `fileA.txt:1:hello world${EOL}`,
exitCode: 0,
onCall: (_, args) => {
// Should search in the target directory (tempRootDir)
expect(args[args.length - 1]).toBe(tempRootDir);
},
}),
);
const params: RipGrepToolParams = { pattern: 'world' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain(
'Found 1 match for pattern "world" in the workspace directory',
);
});
it('should find matches with a glob filter', async () => {
// Setup specific mock for this test
mockSpawn.mockImplementationOnce(
createMockSpawn({
@@ -233,20 +271,19 @@ describe('RipGrepTool', () => {
}),
);
const params: RipGrepToolParams = { pattern: 'hello', include: '*.js' };
const params: RipGrepToolParams = { pattern: 'hello', glob: '*.js' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain(
'Found 1 match for pattern "hello" in the workspace directory (filter: "*.js"):',
);
expect(result.llmContent).toContain('File: fileB.js');
expect(result.llmContent).toContain(
'L2: function baz() { return "hello"; }',
'fileB.js:2:function baz() { return "hello"; }',
);
expect(result.returnDisplay).toBe('Found 1 match');
});
it('should find matches with an include glob and path', async () => {
it('should find matches with a glob filter and path', async () => {
await fs.writeFile(
path.join(tempRootDir, 'sub', 'another.js'),
'const greeting = "hello";',
@@ -291,18 +328,115 @@ describe('RipGrepTool', () => {
const params: RipGrepToolParams = {
pattern: 'hello',
path: 'sub',
include: '*.js',
glob: '*.js',
};
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain(
'Found 1 match for pattern "hello" in path "sub" (filter: "*.js")',
);
expect(result.llmContent).toContain('File: another.js');
expect(result.llmContent).toContain('L1: const greeting = "hello";');
expect(result.llmContent).toContain(
'another.js:1:const greeting = "hello";',
);
expect(result.returnDisplay).toBe('Found 1 match');
});
it('should pass .qwenignore to ripgrep when respected', async () => {
await fs.writeFile(
path.join(tempRootDir, '.qwenignore'),
'ignored.txt\n',
);
mockSpawn.mockImplementationOnce(
createMockSpawn({
exitCode: 1,
onCall: (_, args) => {
expect(args).toContain('--ignore-file');
expect(args).toContain(path.join(tempRootDir, '.qwenignore'));
},
}),
);
const params: RipGrepToolParams = { pattern: 'secret' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain(
'No matches found for pattern "secret" in the workspace directory.',
);
expect(result.returnDisplay).toBe('No matches found');
});
it('should include .qwenignore matches when disabled in config', async () => {
await fs.writeFile(path.join(tempRootDir, '.qwenignore'), 'kept.txt\n');
await fs.writeFile(path.join(tempRootDir, 'kept.txt'), 'keep me');
Object.assign(mockConfig, {
getFileFilteringOptions: () => ({
respectGitIgnore: true,
respectQwenIgnore: false,
}),
});
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData: `kept.txt:1:keep me${EOL}`,
exitCode: 0,
onCall: (_, args) => {
expect(args).not.toContain('--ignore-file');
expect(args).not.toContain(path.join(tempRootDir, '.qwenignore'));
},
}),
);
const params: RipGrepToolParams = { pattern: 'keep' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain(
'Found 1 match for pattern "keep" in the workspace directory:',
);
expect(result.llmContent).toContain('kept.txt:1:keep me');
expect(result.returnDisplay).toBe('Found 1 match');
});
it('should disable gitignore when configured', async () => {
Object.assign(mockConfig, {
getFileFilteringOptions: () => ({
respectGitIgnore: false,
respectQwenIgnore: true,
}),
});
mockSpawn.mockImplementationOnce(
createMockSpawn({
exitCode: 1,
onCall: (_, args) => {
expect(args).toContain('--no-ignore-vcs');
},
}),
);
const params: RipGrepToolParams = { pattern: 'ignored' };
const invocation = grepTool.build(params);
await invocation.execute(abortSignal);
});
it('should truncate llm content when exceeding maximum length', async () => {
const longMatch = 'fileA.txt:1:' + 'a'.repeat(25_000);
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData: `${longMatch}${EOL}`,
exitCode: 0,
}),
);
const params: RipGrepToolParams = { pattern: 'a+' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(String(result.llmContent).length).toBeLessThanOrEqual(20_000);
expect(result.llmContent).toMatch(/\[\d+ lines? truncated\] \.\.\./);
expect(result.returnDisplay).toContain('truncated');
});
it('should return "No matches found" when pattern does not exist', async () => {
// Setup specific mock for no matches
mockSpawn.mockImplementationOnce(
@@ -320,19 +454,10 @@ describe('RipGrepTool', () => {
expect(result.returnDisplay).toBe('No matches found');
});
it('should return an error from ripgrep for invalid regex pattern', async () => {
mockSpawn.mockImplementationOnce(
createMockSpawn({
exitCode: 2,
}),
);
it('should throw validation error for invalid regex pattern', async () => {
const params: RipGrepToolParams = { pattern: '[[' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('ripgrep exited with code 2');
expect(result.returnDisplay).toContain(
'Error: ripgrep exited with code 2',
expect(() => grepTool.build(params)).toThrow(
'Invalid regular expression pattern: [[',
);
});
@@ -379,8 +504,7 @@ describe('RipGrepTool', () => {
expect(result.llmContent).toContain(
'Found 1 match for pattern "foo.*bar" in the workspace directory:',
);
expect(result.llmContent).toContain('File: fileB.js');
expect(result.llmContent).toContain('L1: const foo = "bar";');
expect(result.llmContent).toContain('fileB.js:1:const foo = "bar";');
});
it('should be case-insensitive by default (JS fallback)', async () => {
@@ -430,11 +554,9 @@ describe('RipGrepTool', () => {
expect(result.llmContent).toContain(
'Found 2 matches for pattern "HELLO" in the workspace directory:',
);
expect(result.llmContent).toContain('File: fileA.txt');
expect(result.llmContent).toContain('L1: hello world');
expect(result.llmContent).toContain('File: fileB.js');
expect(result.llmContent).toContain('fileA.txt:1:hello world');
expect(result.llmContent).toContain(
'L2: function baz() { return "hello"; }',
'fileB.js:2:function baz() { return "hello"; }',
);
});
@@ -462,191 +584,6 @@ describe('RipGrepTool', () => {
});
});
describe('multi-directory workspace', () => {
it('should search across all workspace directories when no path is specified', async () => {
// Create additional directory with test files
const secondDir = await fs.mkdtemp(
path.join(os.tmpdir(), 'grep-tool-second-'),
);
await fs.writeFile(
path.join(secondDir, 'other.txt'),
'hello from second directory\nworld in second',
);
await fs.writeFile(
path.join(secondDir, 'another.js'),
'function world() { return "test"; }',
);
// Create a mock config with multiple directories
const multiDirConfig = {
getTargetDir: () => tempRootDir,
getWorkspaceContext: () =>
createMockWorkspaceContext(tempRootDir, [secondDir]),
getDebugMode: () => false,
} as unknown as Config;
// Setup specific mock for this test - multi-directory search for 'world'
// Mock will be called twice - once for each directory
let callCount = 0;
mockSpawn.mockImplementation(() => {
callCount++;
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const stdoutDataHandler = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const closeHandler = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
let outputData = '';
if (callCount === 1) {
// First directory (tempRootDir)
outputData =
[
'fileA.txt:1:hello world',
'fileA.txt:2:second line with world',
'sub/fileC.txt:1:another world in sub dir',
].join(EOL) + EOL;
} else if (callCount === 2) {
// Second directory (secondDir)
outputData =
[
'other.txt:2:world in second',
'another.js:1:function world() { return "test"; }',
].join(EOL) + EOL;
}
if (stdoutDataHandler && outputData) {
stdoutDataHandler(Buffer.from(outputData));
}
if (closeHandler) {
closeHandler(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const multiDirGrepTool = new RipGrepTool(multiDirConfig);
const params: RipGrepToolParams = { pattern: 'world' };
const invocation = multiDirGrepTool.build(params);
const result = await invocation.execute(abortSignal);
// Should find matches in both directories
expect(result.llmContent).toContain(
'Found 5 matches for pattern "world"',
);
// Matches from first directory
expect(result.llmContent).toContain('fileA.txt');
expect(result.llmContent).toContain('L1: hello world');
expect(result.llmContent).toContain('L2: second line with world');
expect(result.llmContent).toContain('fileC.txt');
expect(result.llmContent).toContain('L1: another world in sub dir');
// Matches from both directories
expect(result.llmContent).toContain('other.txt');
expect(result.llmContent).toContain('L2: world in second');
expect(result.llmContent).toContain('another.js');
expect(result.llmContent).toContain('L1: function world()');
// Clean up
await fs.rm(secondDir, { recursive: true, force: true });
mockSpawn.mockClear();
});
it('should search only specified path within workspace directories', async () => {
// Create additional directory
const secondDir = await fs.mkdtemp(
path.join(os.tmpdir(), 'grep-tool-second-'),
);
await fs.mkdir(path.join(secondDir, 'sub'));
await fs.writeFile(
path.join(secondDir, 'sub', 'test.txt'),
'hello from second sub directory',
);
// Create a mock config with multiple directories
const multiDirConfig = {
getTargetDir: () => tempRootDir,
getWorkspaceContext: () =>
createMockWorkspaceContext(tempRootDir, [secondDir]),
getDebugMode: () => false,
} as unknown as Config;
// Setup specific mock for this test - searching in 'sub' should only return matches from that directory
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
on: vi.fn(),
removeListener: vi.fn(),
},
stderr: {
on: vi.fn(),
removeListener: vi.fn(),
},
on: vi.fn(),
removeListener: vi.fn(),
kill: vi.fn(),
};
setTimeout(() => {
const onData = mockProcess.stdout.on.mock.calls.find(
(call) => call[0] === 'data',
)?.[1];
const onClose = mockProcess.on.mock.calls.find(
(call) => call[0] === 'close',
)?.[1];
if (onData) {
onData(Buffer.from(`fileC.txt:1:another world in sub dir${EOL}`));
}
if (onClose) {
onClose(0);
}
}, 0);
return mockProcess as unknown as ChildProcess;
});
const multiDirGrepTool = new RipGrepTool(multiDirConfig);
// Search only in the 'sub' directory of the first workspace
const params: RipGrepToolParams = { pattern: 'world', path: 'sub' };
const invocation = multiDirGrepTool.build(params);
const result = await invocation.execute(abortSignal);
// Should only find matches in the specified sub directory
expect(result.llmContent).toContain(
'Found 1 match for pattern "world" in path "sub"',
);
expect(result.llmContent).toContain('File: fileC.txt');
expect(result.llmContent).toContain('L1: another world in sub dir');
// Should not contain matches from second directory
expect(result.llmContent).not.toContain('test.txt');
// Clean up
await fs.rm(secondDir, { recursive: true, force: true });
});
});
describe('abort signal handling', () => {
it('should handle AbortSignal during search', async () => {
const controller = new AbortController();
@@ -1062,8 +999,8 @@ describe('RipGrepTool', () => {
});
});
describe('include pattern filtering', () => {
it('should handle multiple file extensions in include pattern', async () => {
describe('glob pattern filtering', () => {
it('should handle multiple file extensions in glob pattern', async () => {
await fs.writeFile(
path.join(tempRootDir, 'test.ts'),
'typescript content',
@@ -1075,7 +1012,7 @@ describe('RipGrepTool', () => {
);
await fs.writeFile(path.join(tempRootDir, 'test.txt'), 'text content');
// Setup specific mock for this test - include pattern should filter to only ts/tsx files
// Setup specific mock for this test - glob pattern should filter to only ts/tsx files
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
@@ -1116,7 +1053,7 @@ describe('RipGrepTool', () => {
const params: RipGrepToolParams = {
pattern: 'content',
include: '*.{ts,tsx}',
glob: '*.{ts,tsx}',
};
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
@@ -1127,7 +1064,7 @@ describe('RipGrepTool', () => {
expect(result.llmContent).not.toContain('test.txt');
});
it('should handle directory patterns in include', async () => {
it('should handle directory patterns in glob', async () => {
await fs.mkdir(path.join(tempRootDir, 'src'), { recursive: true });
await fs.writeFile(
path.join(tempRootDir, 'src', 'main.ts'),
@@ -1135,7 +1072,7 @@ describe('RipGrepTool', () => {
);
await fs.writeFile(path.join(tempRootDir, 'other.ts'), 'other code');
// Setup specific mock for this test - include pattern should filter to only src/** files
// Setup specific mock for this test - glob pattern should filter to only src/** files
mockSpawn.mockImplementationOnce(() => {
const mockProcess = {
stdout: {
@@ -1172,7 +1109,7 @@ describe('RipGrepTool', () => {
const params: RipGrepToolParams = {
pattern: 'code',
include: 'src/**',
glob: 'src/**',
};
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
@@ -1189,10 +1126,10 @@ describe('RipGrepTool', () => {
expect(invocation.getDescription()).toBe("'testPattern'");
});
it('should generate correct description with pattern and include', () => {
it('should generate correct description with pattern and glob', () => {
const params: RipGrepToolParams = {
pattern: 'testPattern',
include: '*.ts',
glob: '*.ts',
};
const invocation = grepTool.build(params);
expect(invocation.getDescription()).toBe("'testPattern' in *.ts");
@@ -1211,29 +1148,18 @@ describe('RipGrepTool', () => {
expect(invocation.getDescription()).toContain(path.join('src', 'app'));
});
it('should indicate searching across all workspace directories when no path specified', () => {
// Create a mock config with multiple directories
const multiDirConfig = {
getTargetDir: () => tempRootDir,
getWorkspaceContext: () =>
createMockWorkspaceContext(tempRootDir, ['/another/dir']),
getDebugMode: () => false,
} as unknown as Config;
const multiDirGrepTool = new RipGrepTool(multiDirConfig);
it('should generate correct description with default search path', () => {
const params: RipGrepToolParams = { pattern: 'testPattern' };
const invocation = multiDirGrepTool.build(params);
expect(invocation.getDescription()).toBe(
"'testPattern' across all workspace directories",
);
const invocation = grepTool.build(params);
expect(invocation.getDescription()).toBe("'testPattern'");
});
it('should generate correct description with pattern, include, and path', async () => {
it('should generate correct description with pattern, glob, and path', async () => {
const dirPath = path.join(tempRootDir, 'src', 'app');
await fs.mkdir(dirPath, { recursive: true });
const params: RipGrepToolParams = {
pattern: 'testPattern',
include: '*.ts',
glob: '*.ts',
path: path.join('src', 'app'),
};
const invocation = grepTool.build(params);

View File

@@ -10,16 +10,19 @@ import { EOL } from 'node:os';
import { spawn } from 'node:child_process';
import type { ToolInvocation, ToolResult } from './tools.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { SchemaValidator } from '../utils/schemaValidator.js';
import { ToolNames } from './tool-names.js';
import { makeRelative, shortenPath } from '../utils/paths.js';
import { getErrorMessage, isNodeError } from '../utils/errors.js';
import type { Config } from '../config/config.js';
import { ensureRipgrepPath } from '../utils/ripgrepUtils.js';
import { SchemaValidator } from '../utils/schemaValidator.js';
import type { FileFilteringOptions } from '../config/constants.js';
import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js';
const DEFAULT_TOTAL_MAX_MATCHES = 20000;
const MAX_LLM_CONTENT_LENGTH = 20_000;
/**
* Parameters for the GrepTool
* Parameters for the GrepTool (Simplified)
*/
export interface RipGrepToolParams {
/**
@@ -33,18 +36,14 @@ export interface RipGrepToolParams {
path?: string;
/**
* File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}")
* Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}")
*/
include?: string;
}
glob?: string;
/**
* Result object for a single grep match
*/
interface GrepMatch {
filePath: string;
lineNumber: number;
line: string;
/**
* Maximum number of matching lines to return (optional, shows all if not specified)
*/
limit?: number;
}
class GrepToolInvocation extends BaseToolInvocation<
@@ -61,18 +60,15 @@ class GrepToolInvocation extends BaseToolInvocation<
/**
* Checks if a path is within the root directory and resolves it.
* @param relativePath Path relative to the root directory (or undefined for root).
* @returns The absolute path if valid and exists, or null if no path specified (to search all directories).
* @returns The absolute path to search within.
* @throws {Error} If path is outside root, doesn't exist, or isn't a directory.
*/
private resolveAndValidatePath(relativePath?: string): string | null {
// If no path specified, return null to indicate searching all workspace directories
if (!relativePath) {
return null;
}
private resolveAndValidatePath(relativePath?: string): string {
const targetDir = this.config.getTargetDir();
const targetPath = relativePath
? path.resolve(targetDir, relativePath)
: targetDir;
const targetPath = path.resolve(this.config.getTargetDir(), relativePath);
// Security Check: Ensure the resolved path is within workspace boundaries
const workspaceContext = this.config.getWorkspaceContext();
if (!workspaceContext.isPathWithinWorkspace(targetPath)) {
const directories = workspaceContext.getDirectories();
@@ -81,7 +77,10 @@ class GrepToolInvocation extends BaseToolInvocation<
);
}
// Check existence and type after resolving
return this.ensureDirectory(targetPath);
}
private ensureDirectory(targetPath: string): string {
try {
const stats = fs.statSync(targetPath);
if (!stats.isDirectory()) {
@@ -101,104 +100,81 @@ class GrepToolInvocation extends BaseToolInvocation<
async execute(signal: AbortSignal): Promise<ToolResult> {
try {
const workspaceContext = this.config.getWorkspaceContext();
const searchDirAbs = this.resolveAndValidatePath(this.params.path);
const searchDirDisplay = this.params.path || '.';
// Determine which directories to search
let searchDirectories: readonly string[];
if (searchDirAbs === null) {
// No path specified - search all workspace directories
searchDirectories = workspaceContext.getDirectories();
} else {
// Specific path provided - search only that directory
searchDirectories = [searchDirAbs];
}
// Get raw ripgrep output
const rawOutput = await this.performRipgrepSearch({
pattern: this.params.pattern,
path: searchDirAbs,
glob: this.params.glob,
signal,
});
let allMatches: GrepMatch[] = [];
const totalMaxMatches = DEFAULT_TOTAL_MAX_MATCHES;
// Build search description
const searchLocationDescription = this.params.path
? `in path "${searchDirDisplay}"`
: `in the workspace directory`;
if (this.config.getDebugMode()) {
console.log(`[GrepTool] Total result limit: ${totalMaxMatches}`);
}
const filterDescription = this.params.glob
? ` (filter: "${this.params.glob}")`
: '';
for (const searchDir of searchDirectories) {
const searchResult = await this.performRipgrepSearch({
pattern: this.params.pattern,
path: searchDir,
include: this.params.include,
signal,
});
if (searchDirectories.length > 1) {
const dirName = path.basename(searchDir);
searchResult.forEach((match) => {
match.filePath = path.join(dirName, match.filePath);
});
}
allMatches = allMatches.concat(searchResult);
if (allMatches.length >= totalMaxMatches) {
allMatches = allMatches.slice(0, totalMaxMatches);
break;
}
}
let searchLocationDescription: string;
if (searchDirAbs === null) {
const numDirs = workspaceContext.getDirectories().length;
searchLocationDescription =
numDirs > 1
? `across ${numDirs} workspace directories`
: `in the workspace directory`;
} else {
searchLocationDescription = `in path "${searchDirDisplay}"`;
}
if (allMatches.length === 0) {
const noMatchMsg = `No matches found for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}.`;
// Check if we have any matches
if (!rawOutput.trim()) {
const noMatchMsg = `No matches found for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}.`;
return { llmContent: noMatchMsg, returnDisplay: `No matches found` };
}
const wasTruncated = allMatches.length >= totalMaxMatches;
// Split into lines and count total matches
const allLines = rawOutput.split(EOL).filter((line) => line.trim());
const totalMatches = allLines.length;
const matchTerm = totalMatches === 1 ? 'match' : 'matches';
const matchesByFile = allMatches.reduce(
(acc, match) => {
const fileKey = match.filePath;
if (!acc[fileKey]) {
acc[fileKey] = [];
}
acc[fileKey].push(match);
acc[fileKey].sort((a, b) => a.lineNumber - b.lineNumber);
return acc;
},
{} as Record<string, GrepMatch[]>,
);
// Build header early to calculate available space
const header = `Found ${totalMatches} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}:\n---\n`;
const maxTruncationNoticeLength = 100; // "[... N more matches truncated]"
const maxGrepOutputLength =
MAX_LLM_CONTENT_LENGTH - header.length - maxTruncationNoticeLength;
const matchCount = allMatches.length;
const matchTerm = matchCount === 1 ? 'match' : 'matches';
let llmContent = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}`;
if (wasTruncated) {
llmContent += ` (results limited to ${totalMaxMatches} matches for performance)`;
// Apply line limit first (if specified)
let truncatedByLineLimit = false;
let linesToInclude = allLines;
if (
this.params.limit !== undefined &&
allLines.length > this.params.limit
) {
linesToInclude = allLines.slice(0, this.params.limit);
truncatedByLineLimit = true;
}
llmContent += `:\n---\n`;
// Join lines back into grep output
let grepOutput = linesToInclude.join(EOL);
for (const filePath in matchesByFile) {
llmContent += `File: ${filePath}\n`;
matchesByFile[filePath].forEach((match) => {
const trimmedLine = match.line.trim();
llmContent += `L${match.lineNumber}: ${trimmedLine}\n`;
});
llmContent += '---\n';
// Apply character limit as safety net
let truncatedByCharLimit = false;
if (grepOutput.length > maxGrepOutputLength) {
grepOutput = grepOutput.slice(0, maxGrepOutputLength) + '...';
truncatedByCharLimit = true;
}
let displayMessage = `Found ${matchCount} ${matchTerm}`;
if (wasTruncated) {
displayMessage += ` (limited)`;
// Count how many lines we actually included after character truncation
const finalLines = grepOutput.split(EOL).filter((line) => line.trim());
const includedLines = finalLines.length;
// Build result
let llmContent = header + grepOutput;
// Add truncation notice if needed
if (truncatedByLineLimit || truncatedByCharLimit) {
const omittedMatches = totalMatches - includedLines;
llmContent += ` [${omittedMatches} ${omittedMatches === 1 ? 'line' : 'lines'} truncated] ...`;
}
// Build display message (show real count, not truncated)
let displayMessage = `Found ${totalMatches} ${matchTerm}`;
if (truncatedByLineLimit || truncatedByCharLimit) {
displayMessage += ` (truncated)`;
}
return {
@@ -215,53 +191,15 @@ class GrepToolInvocation extends BaseToolInvocation<
}
}
private parseRipgrepOutput(output: string, basePath: string): GrepMatch[] {
const results: GrepMatch[] = [];
if (!output) return results;
const lines = output.split(EOL);
for (const line of lines) {
if (!line.trim()) continue;
const firstColonIndex = line.indexOf(':');
if (firstColonIndex === -1) continue;
const secondColonIndex = line.indexOf(':', firstColonIndex + 1);
if (secondColonIndex === -1) continue;
const filePathRaw = line.substring(0, firstColonIndex);
const lineNumberStr = line.substring(
firstColonIndex + 1,
secondColonIndex,
);
const lineContent = line.substring(secondColonIndex + 1);
const lineNumber = parseInt(lineNumberStr, 10);
if (!isNaN(lineNumber)) {
const absoluteFilePath = path.resolve(basePath, filePathRaw);
const relativeFilePath = path.relative(basePath, absoluteFilePath);
results.push({
filePath: relativeFilePath || path.basename(absoluteFilePath),
lineNumber,
line: lineContent,
});
}
}
return results;
}
private async performRipgrepSearch(options: {
pattern: string;
path: string;
include?: string;
glob?: string;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const { pattern, path: absolutePath, include } = options;
}): Promise<string> {
const { pattern, path: absolutePath, glob } = options;
const rgArgs = [
const rgArgs: string[] = [
'--line-number',
'--no-heading',
'--with-filename',
@@ -270,29 +208,34 @@ class GrepToolInvocation extends BaseToolInvocation<
pattern,
];
if (include) {
rgArgs.push('--glob', include);
// Add file exclusions from .gitignore and .qwenignore
const filteringOptions = this.getFileFilteringOptions();
if (!filteringOptions.respectGitIgnore) {
rgArgs.push('--no-ignore-vcs');
}
const excludes = [
'.git',
'node_modules',
'bower_components',
'*.log',
'*.tmp',
'build',
'dist',
'coverage',
];
excludes.forEach((exclude) => {
rgArgs.push('--glob', `!${exclude}`);
});
if (filteringOptions.respectQwenIgnore) {
const qwenIgnorePath = path.join(
this.config.getTargetDir(),
'.qwenignore',
);
if (fs.existsSync(qwenIgnorePath)) {
rgArgs.push('--ignore-file', qwenIgnorePath);
}
}
// Add glob pattern if provided
if (glob) {
rgArgs.push('--glob', glob);
}
rgArgs.push('--threads', '4');
rgArgs.push(absolutePath);
try {
const rgPath = await ensureRipgrepPath();
const rgPath = this.config.getUseBuiltinRipgrep()
? await ensureRipgrepPath()
: 'rg';
const output = await new Promise<string>((resolve, reject) => {
const child = spawn(rgPath, rgArgs, {
windowsHide: true,
@@ -334,22 +277,33 @@ class GrepToolInvocation extends BaseToolInvocation<
});
});
return this.parseRipgrepOutput(output, absolutePath);
return output;
} catch (error: unknown) {
console.error(`GrepLogic: ripgrep failed: ${getErrorMessage(error)}`);
throw error;
}
}
private getFileFilteringOptions(): FileFilteringOptions {
const options = this.config.getFileFilteringOptions?.();
return {
respectGitIgnore:
options?.respectGitIgnore ??
DEFAULT_FILE_FILTERING_OPTIONS.respectGitIgnore,
respectQwenIgnore:
options?.respectQwenIgnore ??
DEFAULT_FILE_FILTERING_OPTIONS.respectQwenIgnore,
};
}
/**
* Gets a description of the grep operation
* @param params Parameters for the grep operation
* @returns A string describing the grep
*/
getDescription(): string {
let description = `'${this.params.pattern}'`;
if (this.params.include) {
description += ` in ${this.params.include}`;
if (this.params.glob) {
description += ` in ${this.params.glob}`;
}
if (this.params.path) {
const resolvedPath = path.resolve(
@@ -381,36 +335,41 @@ class GrepToolInvocation extends BaseToolInvocation<
}
/**
* Implementation of the Grep tool logic (moved from CLI)
* Implementation of the Grep tool logic
*/
export class RipGrepTool extends BaseDeclarativeTool<
RipGrepToolParams,
ToolResult
> {
static readonly Name = 'search_file_content';
static readonly Name = ToolNames.GREP;
constructor(private readonly config: Config) {
super(
RipGrepTool.Name,
'SearchText',
'Searches for a regular expression pattern within the content of files in a specified directory (or current working directory). Can filter files by a glob pattern. Returns the lines containing matches, along with their file paths and line numbers. Total results limited to 20,000 matches like VSCode.',
'Grep',
'A powerful search tool built on ripgrep\n\n Usage:\n - ALWAYS use Grep for search tasks. NEVER invoke `grep` or `rg` as a Bash command. The Grep tool has been optimized for correct permissions and access.\n - Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+")\n - Filter files with glob parameter (e.g., "*.js", "**/*.tsx")\n - Use Task tool for open-ended searches requiring multiple rounds\n - Pattern syntax: Uses ripgrep (not grep) - special regex characters need escaping (use `interface\\{\\}` to find `interface{}` in Go code)\n',
Kind.Search,
{
properties: {
pattern: {
description:
"The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').",
type: 'string',
description:
'The regular expression pattern to search for in file contents',
},
glob: {
type: 'string',
description:
'Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}") - maps to rg --glob',
},
path: {
description:
'Optional: The absolute path to the directory to search within. If omitted, searches the current working directory.',
type: 'string',
description:
'File or directory to search in (rg PATH). Defaults to current working directory.',
},
include: {
limit: {
type: 'number',
description:
"Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).",
type: 'string',
'Limit output to first N lines/entries. Optional - shows all matches if not specified.',
},
},
required: ['pattern'],
@@ -422,13 +381,13 @@ export class RipGrepTool extends BaseDeclarativeTool<
/**
* Checks if a path is within the root directory and resolves it.
* @param relativePath Path relative to the root directory (or undefined for root).
* @returns The absolute path if valid and exists, or null if no path specified (to search all directories).
* @returns The absolute path to search within.
* @throws {Error} If path is outside root, doesn't exist, or isn't a directory.
*/
private resolveAndValidatePath(relativePath?: string): string | null {
// If no path specified, return null to indicate searching all workspace directories
private resolveAndValidatePath(relativePath?: string): string {
// If no path specified, search within the workspace root directory
if (!relativePath) {
return null;
return this.config.getTargetDir();
}
const targetPath = path.resolve(this.config.getTargetDir(), relativePath);
@@ -465,7 +424,9 @@ export class RipGrepTool extends BaseDeclarativeTool<
* @param params Parameters to validate
* @returns An error message string if invalid, null otherwise
*/
override validateToolParams(params: RipGrepToolParams): string | null {
protected override validateToolParamValues(
params: RipGrepToolParams,
): string | null {
const errors = SchemaValidator.validate(
this.schema.parametersJsonSchema,
params,
@@ -474,6 +435,13 @@ export class RipGrepTool extends BaseDeclarativeTool<
return errors;
}
// Validate pattern is a valid regex
try {
new RegExp(params.pattern);
} catch (error) {
return `Invalid regular expression pattern: ${params.pattern}. Error: ${getErrorMessage(error)}`;
}
// Only validate path if one is provided
if (params.path) {
try {

View File

@@ -14,7 +14,7 @@ export const ToolNames = {
WRITE_FILE: 'write_file',
READ_FILE: 'read_file',
READ_MANY_FILES: 'read_many_files',
GREP: 'search_file_content',
GREP: 'grep_search',
GLOB: 'glob',
SHELL: 'run_shell_command',
TODO_WRITE: 'todo_write',