Feat: Simplify and Improve Search Tools (glob, grep, ripgrep) (#969)

This commit is contained in:
tanzhenxin
2025-11-06 16:25:06 +08:00
committed by GitHub
parent decb04efc4
commit d17c37af7d
9 changed files with 840 additions and 782 deletions

View File

@@ -88,17 +88,6 @@ describe('GlobTool', () => {
expect(result.returnDisplay).toBe('Found 2 matching file(s)');
});
it('should find files case-sensitively when case_sensitive is true', async () => {
const params: GlobToolParams = { pattern: '*.txt', case_sensitive: true };
const invocation = globTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 1 file(s)');
expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt'));
expect(result.llmContent).not.toContain(
path.join(tempRootDir, 'FileB.TXT'),
);
});
it('should find files case-insensitively by default (pattern: *.TXT)', async () => {
const params: GlobToolParams = { pattern: '*.TXT' };
const invocation = globTool.build(params);
@@ -108,18 +97,6 @@ describe('GlobTool', () => {
expect(result.llmContent).toContain(path.join(tempRootDir, 'FileB.TXT'));
});
it('should find files case-insensitively when case_sensitive is false (pattern: *.TXT)', async () => {
const params: GlobToolParams = {
pattern: '*.TXT',
case_sensitive: false,
};
const invocation = globTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 2 file(s)');
expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt'));
expect(result.llmContent).toContain(path.join(tempRootDir, 'FileB.TXT'));
});
it('should find files using a pattern that includes a subdirectory', async () => {
const params: GlobToolParams = { pattern: 'sub/*.md' };
const invocation = globTool.build(params);
@@ -207,7 +184,7 @@ describe('GlobTool', () => {
const filesListed = llmContent
.trim()
.split(/\r?\n/)
.slice(1)
.slice(2)
.map((line) => line.trim())
.filter(Boolean);
@@ -220,14 +197,13 @@ describe('GlobTool', () => {
);
});
it('should return a PATH_NOT_IN_WORKSPACE error if path is outside workspace', async () => {
it('should return error if path is outside workspace', async () => {
// Bypassing validation to test execute method directly
vi.spyOn(globTool, 'validateToolParams').mockReturnValue(null);
const params: GlobToolParams = { pattern: '*.txt', path: '/etc' };
const invocation = globTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.error?.type).toBe(ToolErrorType.PATH_NOT_IN_WORKSPACE);
expect(result.returnDisplay).toBe('Path is not within workspace');
expect(result.returnDisplay).toBe('Error: Path is not within workspace');
});
it('should return a GLOB_EXECUTION_ERROR on glob failure', async () => {
@@ -255,15 +231,6 @@ describe('GlobTool', () => {
expect(globTool.validateToolParams(params)).toBeNull();
});
it('should return null for valid parameters (pattern, path, and case_sensitive)', () => {
const params: GlobToolParams = {
pattern: '*.js',
path: 'sub',
case_sensitive: true,
};
expect(globTool.validateToolParams(params)).toBeNull();
});
it('should return error if pattern is missing (schema validation)', () => {
// Need to correctly define this as an object without pattern
const params = { path: '.' };
@@ -297,16 +264,6 @@ describe('GlobTool', () => {
);
});
it('should return error if case_sensitive is provided but is not a boolean', () => {
const params = {
pattern: '*.ts',
case_sensitive: 'true',
} as unknown as GlobToolParams; // Force incorrect type
expect(globTool.validateToolParams(params)).toBe(
'params/case_sensitive must be boolean',
);
});
it("should return error if search path resolves outside the tool's root directory", () => {
// Create a globTool instance specifically for this test, with a deeper root
tempRootDir = path.join(tempRootDir, 'sub');
@@ -319,7 +276,7 @@ describe('GlobTool', () => {
path: '../../../../../../../../../../tmp', // Definitely outside
};
expect(specificGlobTool.validateToolParams(paramsOutside)).toContain(
'resolves outside the allowed workspace directories',
'Path is not within workspace',
);
});
@@ -329,14 +286,14 @@ describe('GlobTool', () => {
path: 'nonexistent_subdir',
};
expect(globTool.validateToolParams(params)).toContain(
'Search path does not exist',
'Path does not exist',
);
});
it('should return error if specified search path is a file, not a directory', async () => {
const params: GlobToolParams = { pattern: '*.txt', path: 'fileA.txt' };
expect(globTool.validateToolParams(params)).toContain(
'Search path is not a directory',
'Path is not a directory',
);
});
});
@@ -348,20 +305,10 @@ describe('GlobTool', () => {
expect(globTool.validateToolParams(validPath)).toBeNull();
expect(globTool.validateToolParams(invalidPath)).toContain(
'resolves outside the allowed workspace directories',
'Path is not within workspace',
);
});
it('should provide clear error messages when path is outside workspace', () => {
const invalidPath = { pattern: '*.ts', path: '/etc' };
const error = globTool.validateToolParams(invalidPath);
expect(error).toContain(
'resolves outside the allowed workspace directories',
);
expect(error).toContain(tempRootDir);
});
it('should work with paths in workspace subdirectories', async () => {
const params: GlobToolParams = { pattern: '*.md', path: 'sub' };
const invocation = globTool.build(params);
@@ -417,47 +364,123 @@ describe('GlobTool', () => {
expect(result.llmContent).toContain('Found 3 file(s)'); // fileA.txt, FileB.TXT, b.notignored.txt
expect(result.llmContent).not.toContain('a.qwenignored.txt');
});
});
it('should not respect .gitignore when respect_git_ignore is false', async () => {
await fs.writeFile(path.join(tempRootDir, '.gitignore'), '*.ignored.txt');
await fs.writeFile(
path.join(tempRootDir, 'a.ignored.txt'),
'ignored content',
);
describe('file count truncation', () => {
it('should truncate results when more than 100 files are found', async () => {
// Create 150 test files
for (let i = 1; i <= 150; i++) {
await fs.writeFile(
path.join(tempRootDir, `file${i}.trunctest`),
`content${i}`,
);
}
const params: GlobToolParams = {
pattern: '*.txt',
respect_git_ignore: false,
};
const params: GlobToolParams = { pattern: '*.trunctest' };
const invocation = globTool.build(params);
const result = await invocation.execute(abortSignal);
const llmContent = partListUnionToString(result.llmContent);
expect(result.llmContent).toContain('Found 3 file(s)'); // fileA.txt, FileB.TXT, a.ignored.txt
expect(result.llmContent).toContain('a.ignored.txt');
// Should report all 150 files found
expect(llmContent).toContain('Found 150 file(s)');
// Should include truncation notice
expect(llmContent).toContain('[50 files truncated] ...');
// Count the number of .trunctest files mentioned in the output
const fileMatches = llmContent.match(/file\d+\.trunctest/g);
expect(fileMatches).toBeDefined();
expect(fileMatches?.length).toBe(100);
// returnDisplay should indicate truncation
expect(result.returnDisplay).toBe(
'Found 150 matching file(s) (truncated)',
);
});
it('should not respect .qwenignore when respect_qwen_ignore is false', async () => {
await fs.writeFile(
path.join(tempRootDir, '.qwenignore'),
'*.qwenignored.txt',
);
await fs.writeFile(
path.join(tempRootDir, 'a.qwenignored.txt'),
'ignored content',
);
it('should not truncate when exactly 100 files are found', async () => {
// Create exactly 100 test files
for (let i = 1; i <= 100; i++) {
await fs.writeFile(
path.join(tempRootDir, `exact${i}.trunctest`),
`content${i}`,
);
}
// Recreate the tool to pick up the new .qwenignore file
globTool = new GlobTool(mockConfig);
const params: GlobToolParams = {
pattern: '*.txt',
respect_qwen_ignore: false,
};
const params: GlobToolParams = { pattern: '*.trunctest' };
const invocation = globTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 3 file(s)'); // fileA.txt, FileB.TXT, a.qwenignored.txt
expect(result.llmContent).toContain('a.qwenignored.txt');
// Should report all 100 files found
expect(result.llmContent).toContain('Found 100 file(s)');
// Should NOT include truncation notice
expect(result.llmContent).not.toContain('truncated');
// Should show all 100 files
expect(result.llmContent).toContain('exact1.trunctest');
expect(result.llmContent).toContain('exact100.trunctest');
// returnDisplay should NOT indicate truncation
expect(result.returnDisplay).toBe('Found 100 matching file(s)');
});
it('should not truncate when fewer than 100 files are found', async () => {
// Create 50 test files
for (let i = 1; i <= 50; i++) {
await fs.writeFile(
path.join(tempRootDir, `small${i}.trunctest`),
`content${i}`,
);
}
const params: GlobToolParams = { pattern: '*.trunctest' };
const invocation = globTool.build(params);
const result = await invocation.execute(abortSignal);
// Should report all 50 files found
expect(result.llmContent).toContain('Found 50 file(s)');
// Should NOT include truncation notice
expect(result.llmContent).not.toContain('truncated');
// returnDisplay should NOT indicate truncation
expect(result.returnDisplay).toBe('Found 50 matching file(s)');
});
it('should use correct singular/plural in truncation message for 1 file truncated', async () => {
// Create 101 test files (will truncate 1 file)
for (let i = 1; i <= 101; i++) {
await fs.writeFile(
path.join(tempRootDir, `singular${i}.trunctest`),
`content${i}`,
);
}
const params: GlobToolParams = { pattern: '*.trunctest' };
const invocation = globTool.build(params);
const result = await invocation.execute(abortSignal);
// Should use singular "file" for 1 truncated file
expect(result.llmContent).toContain('[1 file truncated] ...');
expect(result.llmContent).not.toContain('[1 files truncated]');
});
it('should use correct plural in truncation message for multiple files truncated', async () => {
// Create 105 test files (will truncate 5 files)
for (let i = 1; i <= 105; i++) {
await fs.writeFile(
path.join(tempRootDir, `plural${i}.trunctest`),
`content${i}`,
);
}
const params: GlobToolParams = { pattern: '*.trunctest' };
const invocation = globTool.build(params);
const result = await invocation.execute(abortSignal);
// Should use plural "files" for multiple truncated files
expect(result.llmContent).toContain('[5 files truncated] ...');
});
});
});

View File

@@ -10,10 +10,17 @@ import { glob, escape } from 'glob';
import type { ToolInvocation, ToolResult } from './tools.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { ToolNames } from './tool-names.js';
import { shortenPath, makeRelative } from '../utils/paths.js';
import { resolveAndValidatePath } from '../utils/paths.js';
import { type Config } from '../config/config.js';
import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js';
import {
DEFAULT_FILE_FILTERING_OPTIONS,
type FileFilteringOptions,
} from '../config/constants.js';
import { ToolErrorType } from './tool-error.js';
import { getErrorMessage } from '../utils/errors.js';
import type { FileDiscoveryService } from '../services/fileDiscoveryService.js';
const MAX_FILE_COUNT = 100;
// Subset of 'Path' interface provided by 'glob' that we can implement for testing
export interface GlobPath {
@@ -64,118 +71,68 @@ export interface GlobToolParams {
* The directory to search in (optional, defaults to current directory)
*/
path?: string;
/**
* Whether the search should be case-sensitive (optional, defaults to false)
*/
case_sensitive?: boolean;
/**
* Whether to respect .gitignore patterns (optional, defaults to true)
*/
respect_git_ignore?: boolean;
/**
* Whether to respect .qwenignore patterns (optional, defaults to true)
*/
respect_qwen_ignore?: boolean;
}
class GlobToolInvocation extends BaseToolInvocation<
GlobToolParams,
ToolResult
> {
private fileService: FileDiscoveryService;
constructor(
private config: Config,
params: GlobToolParams,
) {
super(params);
this.fileService = config.getFileService();
}
getDescription(): string {
let description = `'${this.params.pattern}'`;
if (this.params.path) {
const searchDir = path.resolve(
this.config.getTargetDir(),
this.params.path || '.',
);
const relativePath = makeRelative(searchDir, this.config.getTargetDir());
description += ` within ${shortenPath(relativePath)}`;
description += ` in path '${this.params.path}'`;
}
return description;
}
async execute(signal: AbortSignal): Promise<ToolResult> {
try {
const workspaceContext = this.config.getWorkspaceContext();
const workspaceDirectories = workspaceContext.getDirectories();
// Default to target directory if no path is provided
const searchDirAbs = resolveAndValidatePath(
this.config,
this.params.path,
);
const searchLocationDescription = this.params.path
? `within ${searchDirAbs}`
: `in the workspace directory`;
// If a specific path is provided, resolve it and check if it's within workspace
let searchDirectories: readonly string[];
if (this.params.path) {
const searchDirAbsolute = path.resolve(
this.config.getTargetDir(),
this.params.path,
);
if (!workspaceContext.isPathWithinWorkspace(searchDirAbsolute)) {
const rawError = `Error: Path "${this.params.path}" is not within any workspace directory`;
return {
llmContent: rawError,
returnDisplay: `Path is not within workspace`,
error: {
message: rawError,
type: ToolErrorType.PATH_NOT_IN_WORKSPACE,
},
};
}
searchDirectories = [searchDirAbsolute];
} else {
// Search across all workspace directories
searchDirectories = workspaceDirectories;
// Collect entries from the search directory
let pattern = this.params.pattern;
const fullPath = path.join(searchDirAbs, pattern);
if (fs.existsSync(fullPath)) {
pattern = escape(pattern);
}
// Get centralized file discovery service
const fileDiscovery = this.config.getFileService();
// Collect entries from all search directories
const allEntries: GlobPath[] = [];
for (const searchDir of searchDirectories) {
let pattern = this.params.pattern;
const fullPath = path.join(searchDir, pattern);
if (fs.existsSync(fullPath)) {
pattern = escape(pattern);
}
const entries = (await glob(pattern, {
cwd: searchDir,
withFileTypes: true,
nodir: true,
stat: true,
nocase: !this.params.case_sensitive,
dot: true,
ignore: this.config.getFileExclusions().getGlobExcludes(),
follow: false,
signal,
})) as GlobPath[];
allEntries.push(...entries);
}
const allEntries = (await glob(pattern, {
cwd: searchDirAbs,
withFileTypes: true,
nodir: true,
stat: true,
nocase: true,
dot: true,
follow: false,
signal,
})) as GlobPath[];
const relativePaths = allEntries.map((p) =>
path.relative(this.config.getTargetDir(), p.fullpath()),
);
const { filteredPaths, gitIgnoredCount, qwenIgnoredCount } =
fileDiscovery.filterFilesWithReport(relativePaths, {
respectGitIgnore:
this.params?.respect_git_ignore ??
this.config.getFileFilteringOptions().respectGitIgnore ??
DEFAULT_FILE_FILTERING_OPTIONS.respectGitIgnore,
respectQwenIgnore:
this.params?.respect_qwen_ignore ??
this.config.getFileFilteringOptions().respectQwenIgnore ??
DEFAULT_FILE_FILTERING_OPTIONS.respectQwenIgnore,
});
const { filteredPaths } = this.fileService.filterFilesWithReport(
relativePaths,
this.getFileFilteringOptions(),
);
const filteredAbsolutePaths = new Set(
filteredPaths.map((p) => path.resolve(this.config.getTargetDir(), p)),
@@ -186,20 +143,8 @@ class GlobToolInvocation extends BaseToolInvocation<
);
if (!filteredEntries || filteredEntries.length === 0) {
let message = `No files found matching pattern "${this.params.pattern}"`;
if (searchDirectories.length === 1) {
message += ` within ${searchDirectories[0]}`;
} else {
message += ` within ${searchDirectories.length} workspace directories`;
}
if (gitIgnoredCount > 0) {
message += ` (${gitIgnoredCount} files were git-ignored)`;
}
if (qwenIgnoredCount > 0) {
message += ` (${qwenIgnoredCount} files were qwen-ignored)`;
}
return {
llmContent: message,
llmContent: `No files found matching pattern "${this.params.pattern}" ${searchLocationDescription}`,
returnDisplay: `No files found`,
};
}
@@ -215,29 +160,32 @@ class GlobToolInvocation extends BaseToolInvocation<
oneDayInMs,
);
const sortedAbsolutePaths = sortedEntries.map((entry) =>
const totalFileCount = sortedEntries.length;
const truncated = totalFileCount > MAX_FILE_COUNT;
// Limit to MAX_FILE_COUNT if needed
const entriesToShow = truncated
? sortedEntries.slice(0, MAX_FILE_COUNT)
: sortedEntries;
const sortedAbsolutePaths = entriesToShow.map((entry) =>
entry.fullpath(),
);
const fileListDescription = sortedAbsolutePaths.join('\n');
const fileCount = sortedAbsolutePaths.length;
let resultMessage = `Found ${fileCount} file(s) matching "${this.params.pattern}"`;
if (searchDirectories.length === 1) {
resultMessage += ` within ${searchDirectories[0]}`;
} else {
resultMessage += ` across ${searchDirectories.length} workspace directories`;
let resultMessage = `Found ${totalFileCount} file(s) matching "${this.params.pattern}" ${searchLocationDescription}`;
resultMessage += `, sorted by modification time (newest first):\n---\n${fileListDescription}`;
// Add truncation notice if needed
if (truncated) {
const omittedFiles = totalFileCount - MAX_FILE_COUNT;
const fileTerm = omittedFiles === 1 ? 'file' : 'files';
resultMessage += `\n---\n[${omittedFiles} ${fileTerm} truncated] ...`;
}
if (gitIgnoredCount > 0) {
resultMessage += ` (${gitIgnoredCount} additional files were git-ignored)`;
}
if (qwenIgnoredCount > 0) {
resultMessage += ` (${qwenIgnoredCount} additional files were qwen-ignored)`;
}
resultMessage += `, sorted by modification time (newest first):\n${fileListDescription}`;
return {
llmContent: resultMessage,
returnDisplay: `Found ${fileCount} matching file(s)`,
returnDisplay: `Found ${totalFileCount} matching file(s)${truncated ? ' (truncated)' : ''}`,
};
} catch (error) {
const errorMessage =
@@ -246,7 +194,7 @@ class GlobToolInvocation extends BaseToolInvocation<
const rawError = `Error during glob search operation: ${errorMessage}`;
return {
llmContent: rawError,
returnDisplay: `Error: An unexpected error occurred.`,
returnDisplay: `Error: ${errorMessage || 'An unexpected error occurred.'}`,
error: {
message: rawError,
type: ToolErrorType.GLOB_EXECUTION_ERROR,
@@ -254,6 +202,18 @@ class GlobToolInvocation extends BaseToolInvocation<
};
}
}
private getFileFilteringOptions(): FileFilteringOptions {
const options = this.config.getFileFilteringOptions?.();
return {
respectGitIgnore:
options?.respectGitIgnore ??
DEFAULT_FILE_FILTERING_OPTIONS.respectGitIgnore,
respectQwenIgnore:
options?.respectQwenIgnore ??
DEFAULT_FILE_FILTERING_OPTIONS.respectQwenIgnore,
};
}
}
/**
@@ -266,35 +226,19 @@ export class GlobTool extends BaseDeclarativeTool<GlobToolParams, ToolResult> {
super(
GlobTool.Name,
'FindFiles',
'Efficiently finds files matching specific glob patterns (e.g., `src/**/*.ts`, `**/*.md`), returning absolute paths sorted by modification time (newest first). Ideal for quickly locating files based on their name or path structure, especially in large codebases.',
'Fast file pattern matching tool that works with any codebase size\n- Supports glob patterns like "**/*.js" or "src/**/*.ts"\n- Returns matching file paths sorted by modification time\n- Use this tool when you need to find files by name patterns\n- When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead\n- You have the capability to call multiple tools in a single response. It is always better to speculatively perform multiple searches as a batch that are potentially useful.',
Kind.Search,
{
properties: {
pattern: {
description:
"The glob pattern to match against (e.g., '**/*.py', 'docs/*.md').",
description: 'The glob pattern to match files against',
type: 'string',
},
path: {
description:
'Optional: The absolute path to the directory to search within. If omitted, searches the root directory.',
'The directory to search in. If not specified, the current working directory will be used. IMPORTANT: Omit this field to use the default directory. DO NOT enter "undefined" or "null" - simply omit it for the default behavior. Must be a valid directory path if provided.',
type: 'string',
},
case_sensitive: {
description:
'Optional: Whether the search should be case-sensitive. Defaults to false.',
type: 'boolean',
},
respect_git_ignore: {
description:
'Optional: Whether to respect .gitignore patterns when finding files. Only available in git repositories. Defaults to true.',
type: 'boolean',
},
respect_qwen_ignore: {
description:
'Optional: Whether to respect .qwenignore patterns when finding files. Defaults to true.',
type: 'boolean',
},
},
required: ['pattern'],
type: 'object',
@@ -308,29 +252,6 @@ export class GlobTool extends BaseDeclarativeTool<GlobToolParams, ToolResult> {
protected override validateToolParamValues(
params: GlobToolParams,
): string | null {
const searchDirAbsolute = path.resolve(
this.config.getTargetDir(),
params.path || '.',
);
const workspaceContext = this.config.getWorkspaceContext();
if (!workspaceContext.isPathWithinWorkspace(searchDirAbsolute)) {
const directories = workspaceContext.getDirectories();
return `Search path ("${searchDirAbsolute}") resolves outside the allowed workspace directories: ${directories.join(', ')}`;
}
const targetDir = searchDirAbsolute || this.config.getTargetDir();
try {
if (!fs.existsSync(targetDir)) {
return `Search path does not exist ${targetDir}`;
}
if (!fs.statSync(targetDir).isDirectory()) {
return `Search path is not a directory: ${targetDir}`;
}
} catch (e: unknown) {
return `Error accessing search path: ${e}`;
}
if (
!params.pattern ||
typeof params.pattern !== 'string' ||
@@ -339,6 +260,15 @@ export class GlobTool extends BaseDeclarativeTool<GlobToolParams, ToolResult> {
return "The 'pattern' parameter cannot be empty.";
}
// Only validate path if one is provided
if (params.path) {
try {
resolveAndValidatePath(this.config, params.path);
} catch (error) {
return getErrorMessage(error);
}
}
return null;
}

View File

@@ -84,11 +84,11 @@ describe('GrepTool', () => {
expect(grepTool.validateToolParams(params)).toBeNull();
});
it('should return null for valid params (pattern, path, and include)', () => {
it('should return null for valid params (pattern, path, and glob)', () => {
const params: GrepToolParams = {
pattern: 'hello',
path: '.',
include: '*.txt',
glob: '*.txt',
};
expect(grepTool.validateToolParams(params)).toBeNull();
});
@@ -111,7 +111,7 @@ describe('GrepTool', () => {
const params: GrepToolParams = { pattern: 'hello', path: 'nonexistent' };
// Check for the core error message, as the full path might vary
expect(grepTool.validateToolParams(params)).toContain(
'Failed to access path stats for',
'Path does not exist:',
);
expect(grepTool.validateToolParams(params)).toContain('nonexistent');
});
@@ -155,8 +155,8 @@ describe('GrepTool', () => {
expect(result.returnDisplay).toBe('Found 1 match');
});
it('should find matches with an include glob', async () => {
const params: GrepToolParams = { pattern: 'hello', include: '*.js' };
it('should find matches with a glob filter', async () => {
const params: GrepToolParams = { pattern: 'hello', glob: '*.js' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain(
@@ -169,7 +169,7 @@ describe('GrepTool', () => {
expect(result.returnDisplay).toBe('Found 1 match');
});
it('should find matches with an include glob and path', async () => {
it('should find matches with a glob filter and path', async () => {
await fs.writeFile(
path.join(tempRootDir, 'sub', 'another.js'),
'const greeting = "hello";',
@@ -177,7 +177,7 @@ describe('GrepTool', () => {
const params: GrepToolParams = {
pattern: 'hello',
path: 'sub',
include: '*.js',
glob: '*.js',
};
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
@@ -244,59 +244,23 @@ describe('GrepTool', () => {
describe('multi-directory workspace', () => {
it('should search across all workspace directories when no path is specified', async () => {
// Create additional directory with test files
const secondDir = await fs.mkdtemp(
path.join(os.tmpdir(), 'grep-tool-second-'),
);
await fs.writeFile(
path.join(secondDir, 'other.txt'),
'hello from second directory\nworld in second',
);
await fs.writeFile(
path.join(secondDir, 'another.js'),
'function world() { return "test"; }',
);
// Create a mock config with multiple directories
const multiDirConfig = {
getTargetDir: () => tempRootDir,
getWorkspaceContext: () =>
createMockWorkspaceContext(tempRootDir, [secondDir]),
getFileExclusions: () => ({
getGlobExcludes: () => [],
}),
} as unknown as Config;
const multiDirGrepTool = new GrepTool(multiDirConfig);
// The new implementation searches only in the target directory (first workspace directory)
// when no path is specified, not across all workspace directories
const params: GrepToolParams = { pattern: 'world' };
const invocation = multiDirGrepTool.build(params);
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
// Should find matches in both directories
// Should find matches in the target directory only
expect(result.llmContent).toContain(
'Found 5 matches for pattern "world"',
'Found 3 matches for pattern "world" in the workspace directory',
);
// Matches from first directory
// Matches from target directory
expect(result.llmContent).toContain('fileA.txt');
expect(result.llmContent).toContain('L1: hello world');
expect(result.llmContent).toContain('L2: second line with world');
expect(result.llmContent).toContain('fileC.txt');
expect(result.llmContent).toContain('L1: another world in sub dir');
// Matches from second directory (with directory name prefix)
const secondDirName = path.basename(secondDir);
expect(result.llmContent).toContain(
`File: ${path.join(secondDirName, 'other.txt')}`,
);
expect(result.llmContent).toContain('L2: world in second');
expect(result.llmContent).toContain(
`File: ${path.join(secondDirName, 'another.js')}`,
);
expect(result.llmContent).toContain('L1: function world()');
// Clean up
await fs.rm(secondDir, { recursive: true, force: true });
});
it('should search only specified path within workspace directories', async () => {
@@ -346,16 +310,18 @@ describe('GrepTool', () => {
it('should generate correct description with pattern only', () => {
const params: GrepToolParams = { pattern: 'testPattern' };
const invocation = grepTool.build(params);
expect(invocation.getDescription()).toBe("'testPattern'");
expect(invocation.getDescription()).toBe("'testPattern' in path './'");
});
it('should generate correct description with pattern and include', () => {
it('should generate correct description with pattern and glob', () => {
const params: GrepToolParams = {
pattern: 'testPattern',
include: '*.ts',
glob: '*.ts',
};
const invocation = grepTool.build(params);
expect(invocation.getDescription()).toBe("'testPattern' in *.ts");
expect(invocation.getDescription()).toBe(
"'testPattern' in path './' (filter: '*.ts')",
);
});
it('should generate correct description with pattern and path', async () => {
@@ -366,49 +332,37 @@ describe('GrepTool', () => {
path: path.join('src', 'app'),
};
const invocation = grepTool.build(params);
// The path will be relative to the tempRootDir, so we check for containment.
expect(invocation.getDescription()).toContain("'testPattern' within");
expect(invocation.getDescription()).toContain(path.join('src', 'app'));
});
it('should indicate searching across all workspace directories when no path specified', () => {
// Create a mock config with multiple directories
const multiDirConfig = {
getTargetDir: () => tempRootDir,
getWorkspaceContext: () =>
createMockWorkspaceContext(tempRootDir, ['/another/dir']),
getFileExclusions: () => ({
getGlobExcludes: () => [],
}),
} as unknown as Config;
const multiDirGrepTool = new GrepTool(multiDirConfig);
const params: GrepToolParams = { pattern: 'testPattern' };
const invocation = multiDirGrepTool.build(params);
expect(invocation.getDescription()).toBe(
"'testPattern' across all workspace directories",
expect(invocation.getDescription()).toContain(
"'testPattern' in path 'src",
);
expect(invocation.getDescription()).toContain("app'");
});
it('should generate correct description with pattern, include, and path', async () => {
it('should indicate searching workspace directory when no path specified', () => {
const params: GrepToolParams = { pattern: 'testPattern' };
const invocation = grepTool.build(params);
expect(invocation.getDescription()).toBe("'testPattern' in path './'");
});
it('should generate correct description with pattern, glob, and path', async () => {
const dirPath = path.join(tempRootDir, 'src', 'app');
await fs.mkdir(dirPath, { recursive: true });
const params: GrepToolParams = {
pattern: 'testPattern',
include: '*.ts',
glob: '*.ts',
path: path.join('src', 'app'),
};
const invocation = grepTool.build(params);
expect(invocation.getDescription()).toContain(
"'testPattern' in *.ts within",
"'testPattern' in path 'src",
);
expect(invocation.getDescription()).toContain(path.join('src', 'app'));
expect(invocation.getDescription()).toContain("(filter: '*.ts')");
});
it('should use ./ for root path in description', () => {
const params: GrepToolParams = { pattern: 'testPattern', path: '.' };
const invocation = grepTool.build(params);
expect(invocation.getDescription()).toBe("'testPattern' within ./");
expect(invocation.getDescription()).toBe("'testPattern' in path '.'");
});
});
@@ -422,67 +376,50 @@ describe('GrepTool', () => {
}
});
it('should limit results to default 20 matches', async () => {
it('should show all results when no limit is specified', async () => {
const params: GrepToolParams = { pattern: 'testword' };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 20 matches');
expect(result.llmContent).toContain(
'showing first 20 of 30+ total matches',
);
expect(result.llmContent).toContain('WARNING: Results truncated');
expect(result.returnDisplay).toContain(
'Found 20 matches (truncated from 30+)',
);
// New implementation shows all matches when limit is not specified
expect(result.llmContent).toContain('Found 30 matches');
expect(result.llmContent).not.toContain('truncated');
expect(result.returnDisplay).toBe('Found 30 matches');
});
it('should respect custom maxResults parameter', async () => {
const params: GrepToolParams = { pattern: 'testword', maxResults: 5 };
it('should respect custom limit parameter', async () => {
const params: GrepToolParams = { pattern: 'testword', limit: 5 };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 5 matches');
expect(result.llmContent).toContain(
'showing first 5 of 30+ total matches',
);
expect(result.llmContent).toContain('current: 5');
expect(result.returnDisplay).toContain(
'Found 5 matches (truncated from 30+)',
);
// Should find 30 total but limit to 5
expect(result.llmContent).toContain('Found 30 matches');
expect(result.llmContent).toContain('25 lines truncated');
expect(result.returnDisplay).toContain('Found 30 matches (truncated)');
});
it('should not show truncation warning when all results fit', async () => {
const params: GrepToolParams = { pattern: 'testword', maxResults: 50 };
const params: GrepToolParams = { pattern: 'testword', limit: 50 };
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 30 matches');
expect(result.llmContent).not.toContain('WARNING: Results truncated');
expect(result.llmContent).not.toContain('showing first');
expect(result.llmContent).not.toContain('truncated');
expect(result.returnDisplay).toBe('Found 30 matches');
});
it('should validate maxResults parameter', () => {
const invalidParams = [
{ pattern: 'test', maxResults: 0 },
{ pattern: 'test', maxResults: 101 },
{ pattern: 'test', maxResults: -1 },
{ pattern: 'test', maxResults: 1.5 },
];
invalidParams.forEach((params) => {
const error = grepTool.validateToolParams(params as GrepToolParams);
expect(error).toBeTruthy(); // Just check that validation fails
expect(error).toMatch(/maxResults|must be/); // Check it's about maxResults validation
});
it('should not validate limit parameter', () => {
// limit parameter has no validation constraints in the new implementation
const params = { pattern: 'test', limit: 5 };
const error = grepTool.validateToolParams(params as GrepToolParams);
expect(error).toBeNull();
});
it('should accept valid maxResults parameter', () => {
it('should accept valid limit parameter', () => {
const validParams = [
{ pattern: 'test', maxResults: 1 },
{ pattern: 'test', maxResults: 50 },
{ pattern: 'test', maxResults: 100 },
{ pattern: 'test', limit: 1 },
{ pattern: 'test', limit: 50 },
{ pattern: 'test', limit: 100 },
];
validParams.forEach((params) => {

View File

@@ -4,7 +4,6 @@
* SPDX-License-Identifier: Apache-2.0
*/
import fs from 'node:fs';
import fsPromises from 'node:fs/promises';
import path from 'node:path';
import { EOL } from 'node:os';
@@ -13,13 +12,15 @@ import { globStream } from 'glob';
import type { ToolInvocation, ToolResult } from './tools.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { ToolNames } from './tool-names.js';
import { makeRelative, shortenPath } from '../utils/paths.js';
import { resolveAndValidatePath } from '../utils/paths.js';
import { getErrorMessage, isNodeError } from '../utils/errors.js';
import { isGitRepository } from '../utils/gitUtils.js';
import type { Config } from '../config/config.js';
import type { FileExclusions } from '../utils/ignorePatterns.js';
import { ToolErrorType } from './tool-error.js';
const MAX_LLM_CONTENT_LENGTH = 20_000;
// --- Interfaces ---
/**
@@ -37,14 +38,14 @@ export interface GrepToolParams {
path?: string;
/**
* File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}")
* Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}")
*/
include?: string;
glob?: string;
/**
* Maximum number of matches to return (optional, defaults to 20)
* Maximum number of matching lines to return (optional, shows all if not specified)
*/
maxResults?: number;
limit?: number;
}
/**
@@ -70,121 +71,57 @@ class GrepToolInvocation extends BaseToolInvocation<
this.fileExclusions = config.getFileExclusions();
}
/**
* Checks if a path is within the root directory and resolves it.
* @param relativePath Path relative to the root directory (or undefined for root).
* @returns The absolute path if valid and exists, or null if no path specified (to search all directories).
* @throws {Error} If path is outside root, doesn't exist, or isn't a directory.
*/
private resolveAndValidatePath(relativePath?: string): string | null {
// If no path specified, return null to indicate searching all workspace directories
if (!relativePath) {
return null;
}
const targetPath = path.resolve(this.config.getTargetDir(), relativePath);
// Security Check: Ensure the resolved path is within workspace boundaries
const workspaceContext = this.config.getWorkspaceContext();
if (!workspaceContext.isPathWithinWorkspace(targetPath)) {
const directories = workspaceContext.getDirectories();
throw new Error(
`Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`,
);
}
// Check existence and type after resolving
try {
const stats = fs.statSync(targetPath);
if (!stats.isDirectory()) {
throw new Error(`Path is not a directory: ${targetPath}`);
}
} catch (error: unknown) {
if (isNodeError(error) && error.code !== 'ENOENT') {
throw new Error(`Path does not exist: ${targetPath}`);
}
throw new Error(
`Failed to access path stats for ${targetPath}: ${error}`,
);
}
return targetPath;
}
async execute(signal: AbortSignal): Promise<ToolResult> {
try {
const workspaceContext = this.config.getWorkspaceContext();
const searchDirAbs = this.resolveAndValidatePath(this.params.path);
// Default to target directory if no path is provided
const searchDirAbs = resolveAndValidatePath(
this.config,
this.params.path,
);
const searchDirDisplay = this.params.path || '.';
// Determine which directories to search
let searchDirectories: readonly string[];
if (searchDirAbs === null) {
// No path specified - search all workspace directories
searchDirectories = workspaceContext.getDirectories();
} else {
// Specific path provided - search only that directory
searchDirectories = [searchDirAbs];
}
// Perform grep search
const rawMatches = await this.performGrepSearch({
pattern: this.params.pattern,
path: searchDirAbs,
glob: this.params.glob,
signal,
});
// Collect matches from all search directories
let allMatches: GrepMatch[] = [];
const maxResults = this.params.maxResults ?? 20; // Default to 20 results
let totalMatchesFound = 0;
let searchTruncated = false;
// Build search description
const searchLocationDescription = this.params.path
? `in path "${searchDirDisplay}"`
: `in the workspace directory`;
for (const searchDir of searchDirectories) {
const matches = await this.performGrepSearch({
pattern: this.params.pattern,
path: searchDir,
include: this.params.include,
signal,
});
const filterDescription = this.params.glob
? ` (filter: "${this.params.glob}")`
: '';
totalMatchesFound += matches.length;
// Add directory prefix if searching multiple directories
if (searchDirectories.length > 1) {
const dirName = path.basename(searchDir);
matches.forEach((match) => {
match.filePath = path.join(dirName, match.filePath);
});
}
// Apply result limiting
const remainingSlots = maxResults - allMatches.length;
if (remainingSlots <= 0) {
searchTruncated = true;
break;
}
if (matches.length > remainingSlots) {
allMatches = allMatches.concat(matches.slice(0, remainingSlots));
searchTruncated = true;
break;
} else {
allMatches = allMatches.concat(matches);
}
}
let searchLocationDescription: string;
if (searchDirAbs === null) {
const numDirs = workspaceContext.getDirectories().length;
searchLocationDescription =
numDirs > 1
? `across ${numDirs} workspace directories`
: `in the workspace directory`;
} else {
searchLocationDescription = `in path "${searchDirDisplay}"`;
}
if (allMatches.length === 0) {
const noMatchMsg = `No matches found for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}.`;
// Check if we have any matches
if (rawMatches.length === 0) {
const noMatchMsg = `No matches found for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}.`;
return { llmContent: noMatchMsg, returnDisplay: `No matches found` };
}
// Apply line limit if specified
let truncatedByLineLimit = false;
let matchesToInclude = rawMatches;
if (
this.params.limit !== undefined &&
rawMatches.length > this.params.limit
) {
matchesToInclude = rawMatches.slice(0, this.params.limit);
truncatedByLineLimit = true;
}
const totalMatches = rawMatches.length;
const matchTerm = totalMatches === 1 ? 'match' : 'matches';
// Build header
const header = `Found ${totalMatches} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}:\n---\n`;
// Group matches by file
const matchesByFile = allMatches.reduce(
const matchesByFile = matchesToInclude.reduce(
(acc, match) => {
const fileKey = match.filePath;
if (!acc[fileKey]) {
@@ -197,46 +134,51 @@ class GrepToolInvocation extends BaseToolInvocation<
{} as Record<string, GrepMatch[]>,
);
const matchCount = allMatches.length;
const matchTerm = matchCount === 1 ? 'match' : 'matches';
// Build the header with truncation info if needed
let headerText = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}`;
if (searchTruncated) {
headerText += ` (showing first ${matchCount} of ${totalMatchesFound}+ total matches)`;
}
let llmContent = `${headerText}:
---
`;
// Build grep output
let grepOutput = '';
for (const filePath in matchesByFile) {
llmContent += `File: ${filePath}\n`;
grepOutput += `File: ${filePath}\n`;
matchesByFile[filePath].forEach((match) => {
const trimmedLine = match.line.trim();
llmContent += `L${match.lineNumber}: ${trimmedLine}\n`;
grepOutput += `L${match.lineNumber}: ${trimmedLine}\n`;
});
llmContent += '---\n';
grepOutput += '---\n';
}
// Add truncation guidance if results were limited
if (searchTruncated) {
llmContent += `\nWARNING: Results truncated to prevent context overflow. To see more results:
- Use a more specific pattern to reduce matches
- Add file filters with the 'include' parameter (e.g., "*.js", "src/**")
- Specify a narrower 'path' to search in a subdirectory
- Increase 'maxResults' parameter if you need more matches (current: ${maxResults})`;
// Apply character limit as safety net
let truncatedByCharLimit = false;
if (grepOutput.length > MAX_LLM_CONTENT_LENGTH) {
grepOutput = grepOutput.slice(0, MAX_LLM_CONTENT_LENGTH) + '...';
truncatedByCharLimit = true;
}
let displayText = `Found ${matchCount} ${matchTerm}`;
if (searchTruncated) {
displayText += ` (truncated from ${totalMatchesFound}+)`;
// Count how many lines we actually included after character truncation
const finalLines = grepOutput
.split('\n')
.filter(
(line) =>
line.trim() && !line.startsWith('File:') && !line.startsWith('---'),
);
const includedLines = finalLines.length;
// Build result
let llmContent = header + grepOutput;
// Add truncation notice if needed
if (truncatedByLineLimit || truncatedByCharLimit) {
const omittedMatches = totalMatches - includedLines;
llmContent += ` [${omittedMatches} ${omittedMatches === 1 ? 'line' : 'lines'} truncated] ...`;
}
// Build display message
let displayMessage = `Found ${totalMatches} ${matchTerm}`;
if (truncatedByLineLimit || truncatedByCharLimit) {
displayMessage += ` (truncated)`;
}
return {
llmContent: llmContent.trim(),
returnDisplay: displayText,
returnDisplay: displayMessage,
};
} catch (error) {
console.error(`Error during GrepLogic execution: ${error}`);
@@ -329,50 +271,26 @@ class GrepToolInvocation extends BaseToolInvocation<
* @returns A string describing the grep
*/
getDescription(): string {
let description = `'${this.params.pattern}'`;
if (this.params.include) {
description += ` in ${this.params.include}`;
}
if (this.params.path) {
const resolvedPath = path.resolve(
this.config.getTargetDir(),
this.params.path,
);
if (
resolvedPath === this.config.getTargetDir() ||
this.params.path === '.'
) {
description += ` within ./`;
} else {
const relativePath = makeRelative(
resolvedPath,
this.config.getTargetDir(),
);
description += ` within ${shortenPath(relativePath)}`;
}
} else {
// When no path is specified, indicate searching all workspace directories
const workspaceContext = this.config.getWorkspaceContext();
const directories = workspaceContext.getDirectories();
if (directories.length > 1) {
description += ` across all workspace directories`;
}
let description = `'${this.params.pattern}' in path '${this.params.path || './'}'`;
if (this.params.glob) {
description += ` (filter: '${this.params.glob}')`;
}
return description;
}
/**
* Performs the actual search using the prioritized strategies.
* @param options Search options including pattern, absolute path, and include glob.
* @param options Search options including pattern, absolute path, and glob filter.
* @returns A promise resolving to an array of match objects.
*/
private async performGrepSearch(options: {
pattern: string;
path: string; // Expects absolute path
include?: string;
glob?: string;
signal: AbortSignal;
}): Promise<GrepMatch[]> {
const { pattern, path: absolutePath, include } = options;
const { pattern, path: absolutePath, glob } = options;
let strategyUsed = 'none';
try {
@@ -390,8 +308,8 @@ class GrepToolInvocation extends BaseToolInvocation<
'--ignore-case',
pattern,
];
if (include) {
gitArgs.push('--', include);
if (glob) {
gitArgs.push('--', glob);
}
try {
@@ -457,8 +375,8 @@ class GrepToolInvocation extends BaseToolInvocation<
})
.filter((dir): dir is string => !!dir);
commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`));
if (include) {
grepArgs.push(`--include=${include}`);
if (glob) {
grepArgs.push(`--include=${glob}`);
}
grepArgs.push(pattern);
grepArgs.push('.');
@@ -537,7 +455,7 @@ class GrepToolInvocation extends BaseToolInvocation<
'GrepLogic: Falling back to JavaScript grep implementation.',
);
strategyUsed = 'javascript fallback';
const globPattern = include ? include : '**/*';
const globPattern = glob ? glob : '**/*';
const ignorePatterns = this.fileExclusions.getGlobExcludes();
const filesIterator = globStream(globPattern, {
@@ -603,32 +521,30 @@ export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
constructor(private readonly config: Config) {
super(
GrepTool.Name,
'SearchText',
'Searches for a regular expression pattern within the content of files in a specified directory (or current working directory). Can filter files by a glob pattern. Returns the lines containing matches, along with their file paths and line numbers.',
'Grep',
'A powerful search tool for finding patterns in files\n\n Usage:\n - ALWAYS use Grep for search tasks. NEVER invoke `grep` or `rg` as a Bash command. The Grep tool has been optimized for correct permissions and access.\n - Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+")\n - Filter files with glob parameter (e.g., "*.js", "**/*.tsx")\n - Case-insensitive by default\n - Use Task tool for open-ended searches requiring multiple rounds\n',
Kind.Search,
{
properties: {
pattern: {
description:
"The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').",
type: 'string',
description:
'The regular expression pattern to search for in file contents',
},
glob: {
type: 'string',
description:
'Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}")',
},
path: {
description:
'Optional: The absolute path to the directory to search within. If omitted, searches the current working directory.',
type: 'string',
},
include: {
description:
"Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).",
type: 'string',
'File or directory to search in. Defaults to current working directory.',
},
maxResults: {
description:
'Optional: Maximum number of matches to return to prevent context overflow (default: 20, max: 100). Use lower values for broad searches, higher for specific searches.',
limit: {
type: 'number',
minimum: 1,
maximum: 100,
description:
'Limit output to first N matching lines. Optional - shows all matches if not specified.',
},
},
required: ['pattern'],
@@ -637,47 +553,6 @@ export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
);
}
/**
* Checks if a path is within the root directory and resolves it.
* @param relativePath Path relative to the root directory (or undefined for root).
* @returns The absolute path if valid and exists, or null if no path specified (to search all directories).
* @throws {Error} If path is outside root, doesn't exist, or isn't a directory.
*/
private resolveAndValidatePath(relativePath?: string): string | null {
// If no path specified, return null to indicate searching all workspace directories
if (!relativePath) {
return null;
}
const targetPath = path.resolve(this.config.getTargetDir(), relativePath);
// Security Check: Ensure the resolved path is within workspace boundaries
const workspaceContext = this.config.getWorkspaceContext();
if (!workspaceContext.isPathWithinWorkspace(targetPath)) {
const directories = workspaceContext.getDirectories();
throw new Error(
`Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`,
);
}
// Check existence and type after resolving
try {
const stats = fs.statSync(targetPath);
if (!stats.isDirectory()) {
throw new Error(`Path is not a directory: ${targetPath}`);
}
} catch (error: unknown) {
if (isNodeError(error) && error.code !== 'ENOENT') {
throw new Error(`Path does not exist: ${targetPath}`);
}
throw new Error(
`Failed to access path stats for ${targetPath}: ${error}`,
);
}
return targetPath;
}
/**
* Validates the parameters for the tool
* @param params Parameters to validate
@@ -686,27 +561,17 @@ export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
protected override validateToolParamValues(
params: GrepToolParams,
): string | null {
// Validate pattern is a valid regex
try {
new RegExp(params.pattern);
} catch (error) {
return `Invalid regular expression pattern provided: ${params.pattern}. Error: ${getErrorMessage(error)}`;
}
// Validate maxResults if provided
if (params.maxResults !== undefined) {
if (
!Number.isInteger(params.maxResults) ||
params.maxResults < 1 ||
params.maxResults > 100
) {
return `maxResults must be an integer between 1 and 100, got: ${params.maxResults}`;
}
return `Invalid regular expression pattern: ${params.pattern}. Error: ${getErrorMessage(error)}`;
}
// Only validate path if one is provided
if (params.path) {
try {
this.resolveAndValidatePath(params.path);
resolveAndValidatePath(this.config, params.path);
} catch (error) {
return getErrorMessage(error);
}

View File

@@ -184,17 +184,15 @@ describe('RipGrepTool', () => {
};
// Check for the core error message, as the full path might vary
expect(grepTool.validateToolParams(params)).toContain(
'Failed to access path stats for',
'Path does not exist:',
);
expect(grepTool.validateToolParams(params)).toContain('nonexistent');
});
it('should return error if path is a file, not a directory', async () => {
it('should allow path to be a file', () => {
const filePath = path.join(tempRootDir, 'fileA.txt');
const params: RipGrepToolParams = { pattern: 'hello', path: filePath };
expect(grepTool.validateToolParams(params)).toContain(
`Path is not a directory: ${filePath}`,
);
expect(grepTool.validateToolParams(params)).toBeNull();
});
});
@@ -432,7 +430,7 @@ describe('RipGrepTool', () => {
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(String(result.llmContent).length).toBeLessThanOrEqual(20_000);
expect(String(result.llmContent).length).toBeLessThanOrEqual(21_000);
expect(result.llmContent).toMatch(/\[\d+ lines? truncated\] \.\.\./);
expect(result.returnDisplay).toContain('truncated');
});
@@ -567,6 +565,26 @@ describe('RipGrepTool', () => {
);
});
it('should search within a single file when path is a file', async () => {
mockSpawn.mockImplementationOnce(
createMockSpawn({
outputData: `fileA.txt:1:hello world${EOL}fileA.txt:2:second line with world${EOL}`,
exitCode: 0,
}),
);
const params: RipGrepToolParams = {
pattern: 'world',
path: path.join(tempRootDir, 'fileA.txt'),
};
const invocation = grepTool.build(params);
const result = await invocation.execute(abortSignal);
expect(result.llmContent).toContain('Found 2 matches');
expect(result.llmContent).toContain('fileA.txt:1:hello world');
expect(result.llmContent).toContain('fileA.txt:2:second line with world');
expect(result.returnDisplay).toBe('Found 2 matches');
});
it('should throw an error if ripgrep is not available', async () => {
// Make ensureRipgrepBinary throw
(ensureRipgrepPath as Mock).mockRejectedValue(
@@ -648,7 +666,9 @@ describe('RipGrepTool', () => {
describe('error handling and edge cases', () => {
it('should handle workspace boundary violations', () => {
const params: RipGrepToolParams = { pattern: 'test', path: '../outside' };
expect(() => grepTool.build(params)).toThrow(/Path validation failed/);
expect(() => grepTool.build(params)).toThrow(
/Path is not within workspace/,
);
});
it('should handle empty directories gracefully', async () => {
@@ -1132,7 +1152,9 @@ describe('RipGrepTool', () => {
glob: '*.ts',
};
const invocation = grepTool.build(params);
expect(invocation.getDescription()).toBe("'testPattern' in *.ts");
expect(invocation.getDescription()).toBe(
"'testPattern' (filter: '*.ts')",
);
});
it('should generate correct description with pattern and path', async () => {
@@ -1143,9 +1165,10 @@ describe('RipGrepTool', () => {
path: path.join('src', 'app'),
};
const invocation = grepTool.build(params);
// The path will be relative to the tempRootDir, so we check for containment.
expect(invocation.getDescription()).toContain("'testPattern' within");
expect(invocation.getDescription()).toContain(path.join('src', 'app'));
expect(invocation.getDescription()).toContain(
"'testPattern' in path 'src",
);
expect(invocation.getDescription()).toContain("app'");
});
it('should generate correct description with default search path', () => {
@@ -1164,15 +1187,15 @@ describe('RipGrepTool', () => {
};
const invocation = grepTool.build(params);
expect(invocation.getDescription()).toContain(
"'testPattern' in *.ts within",
"'testPattern' in path 'src",
);
expect(invocation.getDescription()).toContain(path.join('src', 'app'));
expect(invocation.getDescription()).toContain("(filter: '*.ts')");
});
it('should use ./ for root path in description', () => {
it('should use path when specified in description', () => {
const params: RipGrepToolParams = { pattern: 'testPattern', path: '.' };
const invocation = grepTool.build(params);
expect(invocation.getDescription()).toBe("'testPattern' within ./");
expect(invocation.getDescription()).toBe("'testPattern' in path '.'");
});
});
});

View File

@@ -11,8 +11,8 @@ import { spawn } from 'node:child_process';
import type { ToolInvocation, ToolResult } from './tools.js';
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
import { ToolNames } from './tool-names.js';
import { makeRelative, shortenPath } from '../utils/paths.js';
import { getErrorMessage, isNodeError } from '../utils/errors.js';
import { resolveAndValidatePath } from '../utils/paths.js';
import { getErrorMessage } from '../utils/errors.js';
import type { Config } from '../config/config.js';
import { ensureRipgrepPath } from '../utils/ripgrepUtils.js';
import { SchemaValidator } from '../utils/schemaValidator.js';
@@ -57,50 +57,13 @@ class GrepToolInvocation extends BaseToolInvocation<
super(params);
}
/**
* Checks if a path is within the root directory and resolves it.
* @param relativePath Path relative to the root directory (or undefined for root).
* @returns The absolute path to search within.
* @throws {Error} If path is outside root, doesn't exist, or isn't a directory.
*/
private resolveAndValidatePath(relativePath?: string): string {
const targetDir = this.config.getTargetDir();
const targetPath = relativePath
? path.resolve(targetDir, relativePath)
: targetDir;
const workspaceContext = this.config.getWorkspaceContext();
if (!workspaceContext.isPathWithinWorkspace(targetPath)) {
const directories = workspaceContext.getDirectories();
throw new Error(
`Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`,
);
}
return this.ensureDirectory(targetPath);
}
private ensureDirectory(targetPath: string): string {
try {
const stats = fs.statSync(targetPath);
if (!stats.isDirectory()) {
throw new Error(`Path is not a directory: ${targetPath}`);
}
} catch (error: unknown) {
if (isNodeError(error) && error.code !== 'ENOENT') {
throw new Error(`Path does not exist: ${targetPath}`);
}
throw new Error(
`Failed to access path stats for ${targetPath}: ${error}`,
);
}
return targetPath;
}
async execute(signal: AbortSignal): Promise<ToolResult> {
try {
const searchDirAbs = this.resolveAndValidatePath(this.params.path);
const searchDirAbs = resolveAndValidatePath(
this.config,
this.params.path,
{ allowFiles: true },
);
const searchDirDisplay = this.params.path || '.';
// Get raw ripgrep output
@@ -133,9 +96,6 @@ class GrepToolInvocation extends BaseToolInvocation<
// Build header early to calculate available space
const header = `Found ${totalMatches} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}:\n---\n`;
const maxTruncationNoticeLength = 100; // "[... N more matches truncated]"
const maxGrepOutputLength =
MAX_LLM_CONTENT_LENGTH - header.length - maxTruncationNoticeLength;
// Apply line limit first (if specified)
let truncatedByLineLimit = false;
@@ -148,19 +108,32 @@ class GrepToolInvocation extends BaseToolInvocation<
truncatedByLineLimit = true;
}
// Join lines back into grep output
let grepOutput = linesToInclude.join(EOL);
// Apply character limit as safety net
// Build output and track how many lines we include, respecting character limit
const parts: string[] = [];
let includedLines = 0;
let truncatedByCharLimit = false;
if (grepOutput.length > maxGrepOutputLength) {
grepOutput = grepOutput.slice(0, maxGrepOutputLength) + '...';
truncatedByCharLimit = true;
let currentLength = 0;
for (const line of linesToInclude) {
const sep = includedLines > 0 ? 1 : 0;
includedLines++;
if (currentLength + line.length <= MAX_LLM_CONTENT_LENGTH) {
parts.push(line);
currentLength = currentLength + line.length + sep;
} else {
const remaining = Math.max(
MAX_LLM_CONTENT_LENGTH - currentLength - sep,
10,
);
parts.push(line.slice(0, remaining) + '...');
truncatedByCharLimit = true;
break;
}
}
// Count how many lines we actually included after character truncation
const finalLines = grepOutput.split(EOL).filter((line) => line.trim());
const includedLines = finalLines.length;
const grepOutput = parts.join('\n');
// Build result
let llmContent = header + grepOutput;
@@ -168,7 +141,7 @@ class GrepToolInvocation extends BaseToolInvocation<
// Add truncation notice if needed
if (truncatedByLineLimit || truncatedByCharLimit) {
const omittedMatches = totalMatches - includedLines;
llmContent += ` [${omittedMatches} ${omittedMatches === 1 ? 'line' : 'lines'} truncated] ...`;
llmContent += `\n---\n[${omittedMatches} ${omittedMatches === 1 ? 'line' : 'lines'} truncated] ...`;
}
// Build display message (show real count, not truncated)
@@ -193,7 +166,7 @@ class GrepToolInvocation extends BaseToolInvocation<
private async performRipgrepSearch(options: {
pattern: string;
path: string;
path: string; // Can be a file or directory
glob?: string;
signal: AbortSignal;
}): Promise<string> {
@@ -302,34 +275,13 @@ class GrepToolInvocation extends BaseToolInvocation<
*/
getDescription(): string {
let description = `'${this.params.pattern}'`;
if (this.params.glob) {
description += ` in ${this.params.glob}`;
}
if (this.params.path) {
const resolvedPath = path.resolve(
this.config.getTargetDir(),
this.params.path,
);
if (
resolvedPath === this.config.getTargetDir() ||
this.params.path === '.'
) {
description += ` within ./`;
} else {
const relativePath = makeRelative(
resolvedPath,
this.config.getTargetDir(),
);
description += ` within ${shortenPath(relativePath)}`;
}
} else {
// When no path is specified, indicate searching all workspace directories
const workspaceContext = this.config.getWorkspaceContext();
const directories = workspaceContext.getDirectories();
if (directories.length > 1) {
description += ` across all workspace directories`;
}
description += ` in path '${this.params.path}'`;
}
if (this.params.glob) {
description += ` (filter: '${this.params.glob}')`;
}
return description;
}
}
@@ -378,47 +330,6 @@ export class RipGrepTool extends BaseDeclarativeTool<
);
}
/**
* Checks if a path is within the root directory and resolves it.
* @param relativePath Path relative to the root directory (or undefined for root).
* @returns The absolute path to search within.
* @throws {Error} If path is outside root, doesn't exist, or isn't a directory.
*/
private resolveAndValidatePath(relativePath?: string): string {
// If no path specified, search within the workspace root directory
if (!relativePath) {
return this.config.getTargetDir();
}
const targetPath = path.resolve(this.config.getTargetDir(), relativePath);
// Security Check: Ensure the resolved path is within workspace boundaries
const workspaceContext = this.config.getWorkspaceContext();
if (!workspaceContext.isPathWithinWorkspace(targetPath)) {
const directories = workspaceContext.getDirectories();
throw new Error(
`Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`,
);
}
// Check existence and type after resolving
try {
const stats = fs.statSync(targetPath);
if (!stats.isDirectory()) {
throw new Error(`Path is not a directory: ${targetPath}`);
}
} catch (error: unknown) {
if (isNodeError(error) && error.code !== 'ENOENT') {
throw new Error(`Path does not exist: ${targetPath}`);
}
throw new Error(
`Failed to access path stats for ${targetPath}: ${error}`,
);
}
return targetPath;
}
/**
* Validates the parameters for the tool
* @param params Parameters to validate
@@ -445,7 +356,7 @@ export class RipGrepTool extends BaseDeclarativeTool<
// Only validate path if one is provided
if (params.path) {
try {
this.resolveAndValidatePath(params.path);
resolveAndValidatePath(this.config, params.path, { allowFiles: true });
} catch (error) {
return getErrorMessage(error);
}