diff --git a/docs/tools/file-system.md b/docs/tools/file-system.md index 7bf90d06..7ce38359 100644 --- a/docs/tools/file-system.md +++ b/docs/tools/file-system.md @@ -68,72 +68,66 @@ Qwen Code provides a comprehensive suite of tools for interacting with the local - **File:** `glob.ts` - **Parameters:** - `pattern` (string, required): The glob pattern to match against (e.g., `"*.py"`, `"src/**/*.js"`). - - `path` (string, optional): The absolute path to the directory to search within. If omitted, searches the tool's root directory. - - `case_sensitive` (boolean, optional): Whether the search should be case-sensitive. Defaults to `false`. - - `respect_git_ignore` (boolean, optional): Whether to respect .gitignore patterns when finding files. Defaults to `true`. + - `path` (string, optional): The directory to search in. If not specified, the current working directory will be used. - **Behavior:** - Searches for files matching the glob pattern within the specified directory. - Returns a list of absolute paths, sorted with the most recently modified files first. - - Ignores common nuisance directories like `node_modules` and `.git` by default. -- **Output (`llmContent`):** A message like: `Found 5 file(s) matching "*.ts" within src, sorted by modification time (newest first):\nsrc/file1.ts\nsrc/subdir/file2.ts...` + - Respects .gitignore and .qwenignore patterns by default. + - Limits results to 100 files to prevent context overflow. +- **Output (`llmContent`):** A message like: `Found 5 file(s) matching "*.ts" within /path/to/search/dir, sorted by modification time (newest first):\n---\n/path/to/file1.ts\n/path/to/subdir/file2.ts\n---\n[95 files truncated] ...` - **Confirmation:** No. -## 5. `search_file_content` (SearchText) +## 5. `grep_search` (Grep) -`search_file_content` searches for a regular expression pattern within the content of files in a specified directory. Can filter files by a glob pattern. Returns the lines containing matches, along with their file paths and line numbers. +`grep_search` searches for a regular expression pattern within the content of files in a specified directory. Can filter files by a glob pattern. Returns the lines containing matches, along with their file paths and line numbers. -- **Tool name:** `search_file_content` -- **Display name:** SearchText -- **File:** `grep.ts` +- **Tool name:** `grep_search` +- **Display name:** Grep +- **File:** `ripGrep.ts` (with `grep.ts` as fallback) - **Parameters:** - - `pattern` (string, required): The regular expression (regex) to search for (e.g., `"function\s+myFunction"`). - - `path` (string, optional): The absolute path to the directory to search within. Defaults to the current working directory. - - `include` (string, optional): A glob pattern to filter which files are searched (e.g., `"*.js"`, `"src/**/*.{ts,tsx}"`). If omitted, searches most files (respecting common ignores). - - `maxResults` (number, optional): Maximum number of matches to return to prevent context overflow (default: 20, max: 100). Use lower values for broad searches, higher for specific searches. + - `pattern` (string, required): The regular expression pattern to search for in file contents (e.g., `"function\\s+myFunction"`, `"log.*Error"`). + - `path` (string, optional): File or directory to search in. Defaults to current working directory. + - `glob` (string, optional): Glob pattern to filter files (e.g. `"*.js"`, `"src/**/*.{ts,tsx}"`). + - `limit` (number, optional): Limit output to first N matching lines. Optional - shows all matches if not specified. - **Behavior:** - - Uses `git grep` if available in a Git repository for speed; otherwise, falls back to system `grep` or a JavaScript-based search. - - Returns a list of matching lines, each prefixed with its file path (relative to the search directory) and line number. - - Limits results to a maximum of 20 matches by default to prevent context overflow. When results are truncated, shows a clear warning with guidance on refining searches. + - Uses ripgrep for fast search when available; otherwise falls back to a JavaScript-based search implementation. + - Returns matching lines with file paths and line numbers. + - Case-insensitive by default. + - Respects .gitignore and .qwenignore patterns. + - Limits output to prevent context overflow. - **Output (`llmContent`):** A formatted string of matches, e.g.: ``` Found 3 matches for pattern "myFunction" in path "." (filter: "*.ts"): --- - File: src/utils.ts - L15: export function myFunction() { - L22: myFunction.call(); - --- - File: src/index.ts - L5: import { myFunction } from './utils'; + src/utils.ts:15:export function myFunction() { + src/utils.ts:22: myFunction.call(); + src/index.ts:5:import { myFunction } from './utils'; --- - WARNING: Results truncated to prevent context overflow. To see more results: - - Use a more specific pattern to reduce matches - - Add file filters with the 'include' parameter (e.g., "*.js", "src/**") - - Specify a narrower 'path' to search in a subdirectory - - Increase 'maxResults' parameter if you need more matches (current: 20) + [0 lines truncated] ... ``` - **Confirmation:** No. -### `search_file_content` examples +### `grep_search` examples Search for a pattern with default result limiting: ``` -search_file_content(pattern="function\s+myFunction", path="src") +grep_search(pattern="function\\s+myFunction", path="src") ``` Search for a pattern with custom result limiting: ``` -search_file_content(pattern="function", path="src", maxResults=50) +grep_search(pattern="function", path="src", limit=50) ``` Search for a pattern with file filtering and custom result limiting: ``` -search_file_content(pattern="function", include="*.js", maxResults=10) +grep_search(pattern="function", glob="*.js", limit=10) ``` ## 6. `edit` (Edit) diff --git a/packages/core/src/tools/glob.test.ts b/packages/core/src/tools/glob.test.ts index 164eb7e0..0cf1f9e3 100644 --- a/packages/core/src/tools/glob.test.ts +++ b/packages/core/src/tools/glob.test.ts @@ -88,17 +88,6 @@ describe('GlobTool', () => { expect(result.returnDisplay).toBe('Found 2 matching file(s)'); }); - it('should find files case-sensitively when case_sensitive is true', async () => { - const params: GlobToolParams = { pattern: '*.txt', case_sensitive: true }; - const invocation = globTool.build(params); - const result = await invocation.execute(abortSignal); - expect(result.llmContent).toContain('Found 1 file(s)'); - expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt')); - expect(result.llmContent).not.toContain( - path.join(tempRootDir, 'FileB.TXT'), - ); - }); - it('should find files case-insensitively by default (pattern: *.TXT)', async () => { const params: GlobToolParams = { pattern: '*.TXT' }; const invocation = globTool.build(params); @@ -108,18 +97,6 @@ describe('GlobTool', () => { expect(result.llmContent).toContain(path.join(tempRootDir, 'FileB.TXT')); }); - it('should find files case-insensitively when case_sensitive is false (pattern: *.TXT)', async () => { - const params: GlobToolParams = { - pattern: '*.TXT', - case_sensitive: false, - }; - const invocation = globTool.build(params); - const result = await invocation.execute(abortSignal); - expect(result.llmContent).toContain('Found 2 file(s)'); - expect(result.llmContent).toContain(path.join(tempRootDir, 'fileA.txt')); - expect(result.llmContent).toContain(path.join(tempRootDir, 'FileB.TXT')); - }); - it('should find files using a pattern that includes a subdirectory', async () => { const params: GlobToolParams = { pattern: 'sub/*.md' }; const invocation = globTool.build(params); @@ -207,7 +184,7 @@ describe('GlobTool', () => { const filesListed = llmContent .trim() .split(/\r?\n/) - .slice(1) + .slice(2) .map((line) => line.trim()) .filter(Boolean); @@ -220,14 +197,13 @@ describe('GlobTool', () => { ); }); - it('should return a PATH_NOT_IN_WORKSPACE error if path is outside workspace', async () => { + it('should return error if path is outside workspace', async () => { // Bypassing validation to test execute method directly vi.spyOn(globTool, 'validateToolParams').mockReturnValue(null); const params: GlobToolParams = { pattern: '*.txt', path: '/etc' }; const invocation = globTool.build(params); const result = await invocation.execute(abortSignal); - expect(result.error?.type).toBe(ToolErrorType.PATH_NOT_IN_WORKSPACE); - expect(result.returnDisplay).toBe('Path is not within workspace'); + expect(result.returnDisplay).toBe('Error: Path is not within workspace'); }); it('should return a GLOB_EXECUTION_ERROR on glob failure', async () => { @@ -255,15 +231,6 @@ describe('GlobTool', () => { expect(globTool.validateToolParams(params)).toBeNull(); }); - it('should return null for valid parameters (pattern, path, and case_sensitive)', () => { - const params: GlobToolParams = { - pattern: '*.js', - path: 'sub', - case_sensitive: true, - }; - expect(globTool.validateToolParams(params)).toBeNull(); - }); - it('should return error if pattern is missing (schema validation)', () => { // Need to correctly define this as an object without pattern const params = { path: '.' }; @@ -297,16 +264,6 @@ describe('GlobTool', () => { ); }); - it('should return error if case_sensitive is provided but is not a boolean', () => { - const params = { - pattern: '*.ts', - case_sensitive: 'true', - } as unknown as GlobToolParams; // Force incorrect type - expect(globTool.validateToolParams(params)).toBe( - 'params/case_sensitive must be boolean', - ); - }); - it("should return error if search path resolves outside the tool's root directory", () => { // Create a globTool instance specifically for this test, with a deeper root tempRootDir = path.join(tempRootDir, 'sub'); @@ -319,7 +276,7 @@ describe('GlobTool', () => { path: '../../../../../../../../../../tmp', // Definitely outside }; expect(specificGlobTool.validateToolParams(paramsOutside)).toContain( - 'resolves outside the allowed workspace directories', + 'Path is not within workspace', ); }); @@ -329,14 +286,14 @@ describe('GlobTool', () => { path: 'nonexistent_subdir', }; expect(globTool.validateToolParams(params)).toContain( - 'Search path does not exist', + 'Path does not exist', ); }); it('should return error if specified search path is a file, not a directory', async () => { const params: GlobToolParams = { pattern: '*.txt', path: 'fileA.txt' }; expect(globTool.validateToolParams(params)).toContain( - 'Search path is not a directory', + 'Path is not a directory', ); }); }); @@ -348,20 +305,10 @@ describe('GlobTool', () => { expect(globTool.validateToolParams(validPath)).toBeNull(); expect(globTool.validateToolParams(invalidPath)).toContain( - 'resolves outside the allowed workspace directories', + 'Path is not within workspace', ); }); - it('should provide clear error messages when path is outside workspace', () => { - const invalidPath = { pattern: '*.ts', path: '/etc' }; - const error = globTool.validateToolParams(invalidPath); - - expect(error).toContain( - 'resolves outside the allowed workspace directories', - ); - expect(error).toContain(tempRootDir); - }); - it('should work with paths in workspace subdirectories', async () => { const params: GlobToolParams = { pattern: '*.md', path: 'sub' }; const invocation = globTool.build(params); @@ -417,47 +364,123 @@ describe('GlobTool', () => { expect(result.llmContent).toContain('Found 3 file(s)'); // fileA.txt, FileB.TXT, b.notignored.txt expect(result.llmContent).not.toContain('a.qwenignored.txt'); }); + }); - it('should not respect .gitignore when respect_git_ignore is false', async () => { - await fs.writeFile(path.join(tempRootDir, '.gitignore'), '*.ignored.txt'); - await fs.writeFile( - path.join(tempRootDir, 'a.ignored.txt'), - 'ignored content', - ); + describe('file count truncation', () => { + it('should truncate results when more than 100 files are found', async () => { + // Create 150 test files + for (let i = 1; i <= 150; i++) { + await fs.writeFile( + path.join(tempRootDir, `file${i}.trunctest`), + `content${i}`, + ); + } - const params: GlobToolParams = { - pattern: '*.txt', - respect_git_ignore: false, - }; + const params: GlobToolParams = { pattern: '*.trunctest' }; const invocation = globTool.build(params); const result = await invocation.execute(abortSignal); + const llmContent = partListUnionToString(result.llmContent); - expect(result.llmContent).toContain('Found 3 file(s)'); // fileA.txt, FileB.TXT, a.ignored.txt - expect(result.llmContent).toContain('a.ignored.txt'); + // Should report all 150 files found + expect(llmContent).toContain('Found 150 file(s)'); + + // Should include truncation notice + expect(llmContent).toContain('[50 files truncated] ...'); + + // Count the number of .trunctest files mentioned in the output + const fileMatches = llmContent.match(/file\d+\.trunctest/g); + expect(fileMatches).toBeDefined(); + expect(fileMatches?.length).toBe(100); + + // returnDisplay should indicate truncation + expect(result.returnDisplay).toBe( + 'Found 150 matching file(s) (truncated)', + ); }); - it('should not respect .qwenignore when respect_qwen_ignore is false', async () => { - await fs.writeFile( - path.join(tempRootDir, '.qwenignore'), - '*.qwenignored.txt', - ); - await fs.writeFile( - path.join(tempRootDir, 'a.qwenignored.txt'), - 'ignored content', - ); + it('should not truncate when exactly 100 files are found', async () => { + // Create exactly 100 test files + for (let i = 1; i <= 100; i++) { + await fs.writeFile( + path.join(tempRootDir, `exact${i}.trunctest`), + `content${i}`, + ); + } - // Recreate the tool to pick up the new .qwenignore file - globTool = new GlobTool(mockConfig); - - const params: GlobToolParams = { - pattern: '*.txt', - respect_qwen_ignore: false, - }; + const params: GlobToolParams = { pattern: '*.trunctest' }; const invocation = globTool.build(params); const result = await invocation.execute(abortSignal); - expect(result.llmContent).toContain('Found 3 file(s)'); // fileA.txt, FileB.TXT, a.qwenignored.txt - expect(result.llmContent).toContain('a.qwenignored.txt'); + // Should report all 100 files found + expect(result.llmContent).toContain('Found 100 file(s)'); + + // Should NOT include truncation notice + expect(result.llmContent).not.toContain('truncated'); + + // Should show all 100 files + expect(result.llmContent).toContain('exact1.trunctest'); + expect(result.llmContent).toContain('exact100.trunctest'); + + // returnDisplay should NOT indicate truncation + expect(result.returnDisplay).toBe('Found 100 matching file(s)'); + }); + + it('should not truncate when fewer than 100 files are found', async () => { + // Create 50 test files + for (let i = 1; i <= 50; i++) { + await fs.writeFile( + path.join(tempRootDir, `small${i}.trunctest`), + `content${i}`, + ); + } + + const params: GlobToolParams = { pattern: '*.trunctest' }; + const invocation = globTool.build(params); + const result = await invocation.execute(abortSignal); + + // Should report all 50 files found + expect(result.llmContent).toContain('Found 50 file(s)'); + + // Should NOT include truncation notice + expect(result.llmContent).not.toContain('truncated'); + + // returnDisplay should NOT indicate truncation + expect(result.returnDisplay).toBe('Found 50 matching file(s)'); + }); + + it('should use correct singular/plural in truncation message for 1 file truncated', async () => { + // Create 101 test files (will truncate 1 file) + for (let i = 1; i <= 101; i++) { + await fs.writeFile( + path.join(tempRootDir, `singular${i}.trunctest`), + `content${i}`, + ); + } + + const params: GlobToolParams = { pattern: '*.trunctest' }; + const invocation = globTool.build(params); + const result = await invocation.execute(abortSignal); + + // Should use singular "file" for 1 truncated file + expect(result.llmContent).toContain('[1 file truncated] ...'); + expect(result.llmContent).not.toContain('[1 files truncated]'); + }); + + it('should use correct plural in truncation message for multiple files truncated', async () => { + // Create 105 test files (will truncate 5 files) + for (let i = 1; i <= 105; i++) { + await fs.writeFile( + path.join(tempRootDir, `plural${i}.trunctest`), + `content${i}`, + ); + } + + const params: GlobToolParams = { pattern: '*.trunctest' }; + const invocation = globTool.build(params); + const result = await invocation.execute(abortSignal); + + // Should use plural "files" for multiple truncated files + expect(result.llmContent).toContain('[5 files truncated] ...'); }); }); }); diff --git a/packages/core/src/tools/glob.ts b/packages/core/src/tools/glob.ts index 4826c859..2e9fa58e 100644 --- a/packages/core/src/tools/glob.ts +++ b/packages/core/src/tools/glob.ts @@ -10,10 +10,17 @@ import { glob, escape } from 'glob'; import type { ToolInvocation, ToolResult } from './tools.js'; import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { ToolNames } from './tool-names.js'; -import { shortenPath, makeRelative } from '../utils/paths.js'; +import { resolveAndValidatePath } from '../utils/paths.js'; import { type Config } from '../config/config.js'; -import { DEFAULT_FILE_FILTERING_OPTIONS } from '../config/constants.js'; +import { + DEFAULT_FILE_FILTERING_OPTIONS, + type FileFilteringOptions, +} from '../config/constants.js'; import { ToolErrorType } from './tool-error.js'; +import { getErrorMessage } from '../utils/errors.js'; +import type { FileDiscoveryService } from '../services/fileDiscoveryService.js'; + +const MAX_FILE_COUNT = 100; // Subset of 'Path' interface provided by 'glob' that we can implement for testing export interface GlobPath { @@ -64,118 +71,68 @@ export interface GlobToolParams { * The directory to search in (optional, defaults to current directory) */ path?: string; - - /** - * Whether the search should be case-sensitive (optional, defaults to false) - */ - case_sensitive?: boolean; - - /** - * Whether to respect .gitignore patterns (optional, defaults to true) - */ - respect_git_ignore?: boolean; - - /** - * Whether to respect .qwenignore patterns (optional, defaults to true) - */ - respect_qwen_ignore?: boolean; } class GlobToolInvocation extends BaseToolInvocation< GlobToolParams, ToolResult > { + private fileService: FileDiscoveryService; + constructor( private config: Config, params: GlobToolParams, ) { super(params); + this.fileService = config.getFileService(); } getDescription(): string { let description = `'${this.params.pattern}'`; if (this.params.path) { - const searchDir = path.resolve( - this.config.getTargetDir(), - this.params.path || '.', - ); - const relativePath = makeRelative(searchDir, this.config.getTargetDir()); - description += ` within ${shortenPath(relativePath)}`; + description += ` in path '${this.params.path}'`; } + return description; } async execute(signal: AbortSignal): Promise { try { - const workspaceContext = this.config.getWorkspaceContext(); - const workspaceDirectories = workspaceContext.getDirectories(); + // Default to target directory if no path is provided + const searchDirAbs = resolveAndValidatePath( + this.config, + this.params.path, + ); + const searchLocationDescription = this.params.path + ? `within ${searchDirAbs}` + : `in the workspace directory`; - // If a specific path is provided, resolve it and check if it's within workspace - let searchDirectories: readonly string[]; - if (this.params.path) { - const searchDirAbsolute = path.resolve( - this.config.getTargetDir(), - this.params.path, - ); - if (!workspaceContext.isPathWithinWorkspace(searchDirAbsolute)) { - const rawError = `Error: Path "${this.params.path}" is not within any workspace directory`; - return { - llmContent: rawError, - returnDisplay: `Path is not within workspace`, - error: { - message: rawError, - type: ToolErrorType.PATH_NOT_IN_WORKSPACE, - }, - }; - } - searchDirectories = [searchDirAbsolute]; - } else { - // Search across all workspace directories - searchDirectories = workspaceDirectories; + // Collect entries from the search directory + let pattern = this.params.pattern; + const fullPath = path.join(searchDirAbs, pattern); + if (fs.existsSync(fullPath)) { + pattern = escape(pattern); } - // Get centralized file discovery service - const fileDiscovery = this.config.getFileService(); - - // Collect entries from all search directories - const allEntries: GlobPath[] = []; - for (const searchDir of searchDirectories) { - let pattern = this.params.pattern; - const fullPath = path.join(searchDir, pattern); - if (fs.existsSync(fullPath)) { - pattern = escape(pattern); - } - - const entries = (await glob(pattern, { - cwd: searchDir, - withFileTypes: true, - nodir: true, - stat: true, - nocase: !this.params.case_sensitive, - dot: true, - ignore: this.config.getFileExclusions().getGlobExcludes(), - follow: false, - signal, - })) as GlobPath[]; - - allEntries.push(...entries); - } + const allEntries = (await glob(pattern, { + cwd: searchDirAbs, + withFileTypes: true, + nodir: true, + stat: true, + nocase: true, + dot: true, + follow: false, + signal, + })) as GlobPath[]; const relativePaths = allEntries.map((p) => path.relative(this.config.getTargetDir(), p.fullpath()), ); - const { filteredPaths, gitIgnoredCount, qwenIgnoredCount } = - fileDiscovery.filterFilesWithReport(relativePaths, { - respectGitIgnore: - this.params?.respect_git_ignore ?? - this.config.getFileFilteringOptions().respectGitIgnore ?? - DEFAULT_FILE_FILTERING_OPTIONS.respectGitIgnore, - respectQwenIgnore: - this.params?.respect_qwen_ignore ?? - this.config.getFileFilteringOptions().respectQwenIgnore ?? - DEFAULT_FILE_FILTERING_OPTIONS.respectQwenIgnore, - }); + const { filteredPaths } = this.fileService.filterFilesWithReport( + relativePaths, + this.getFileFilteringOptions(), + ); const filteredAbsolutePaths = new Set( filteredPaths.map((p) => path.resolve(this.config.getTargetDir(), p)), @@ -186,20 +143,8 @@ class GlobToolInvocation extends BaseToolInvocation< ); if (!filteredEntries || filteredEntries.length === 0) { - let message = `No files found matching pattern "${this.params.pattern}"`; - if (searchDirectories.length === 1) { - message += ` within ${searchDirectories[0]}`; - } else { - message += ` within ${searchDirectories.length} workspace directories`; - } - if (gitIgnoredCount > 0) { - message += ` (${gitIgnoredCount} files were git-ignored)`; - } - if (qwenIgnoredCount > 0) { - message += ` (${qwenIgnoredCount} files were qwen-ignored)`; - } return { - llmContent: message, + llmContent: `No files found matching pattern "${this.params.pattern}" ${searchLocationDescription}`, returnDisplay: `No files found`, }; } @@ -215,29 +160,32 @@ class GlobToolInvocation extends BaseToolInvocation< oneDayInMs, ); - const sortedAbsolutePaths = sortedEntries.map((entry) => + const totalFileCount = sortedEntries.length; + const truncated = totalFileCount > MAX_FILE_COUNT; + + // Limit to MAX_FILE_COUNT if needed + const entriesToShow = truncated + ? sortedEntries.slice(0, MAX_FILE_COUNT) + : sortedEntries; + + const sortedAbsolutePaths = entriesToShow.map((entry) => entry.fullpath(), ); const fileListDescription = sortedAbsolutePaths.join('\n'); - const fileCount = sortedAbsolutePaths.length; - let resultMessage = `Found ${fileCount} file(s) matching "${this.params.pattern}"`; - if (searchDirectories.length === 1) { - resultMessage += ` within ${searchDirectories[0]}`; - } else { - resultMessage += ` across ${searchDirectories.length} workspace directories`; + let resultMessage = `Found ${totalFileCount} file(s) matching "${this.params.pattern}" ${searchLocationDescription}`; + resultMessage += `, sorted by modification time (newest first):\n---\n${fileListDescription}`; + + // Add truncation notice if needed + if (truncated) { + const omittedFiles = totalFileCount - MAX_FILE_COUNT; + const fileTerm = omittedFiles === 1 ? 'file' : 'files'; + resultMessage += `\n---\n[${omittedFiles} ${fileTerm} truncated] ...`; } - if (gitIgnoredCount > 0) { - resultMessage += ` (${gitIgnoredCount} additional files were git-ignored)`; - } - if (qwenIgnoredCount > 0) { - resultMessage += ` (${qwenIgnoredCount} additional files were qwen-ignored)`; - } - resultMessage += `, sorted by modification time (newest first):\n${fileListDescription}`; return { llmContent: resultMessage, - returnDisplay: `Found ${fileCount} matching file(s)`, + returnDisplay: `Found ${totalFileCount} matching file(s)${truncated ? ' (truncated)' : ''}`, }; } catch (error) { const errorMessage = @@ -246,7 +194,7 @@ class GlobToolInvocation extends BaseToolInvocation< const rawError = `Error during glob search operation: ${errorMessage}`; return { llmContent: rawError, - returnDisplay: `Error: An unexpected error occurred.`, + returnDisplay: `Error: ${errorMessage || 'An unexpected error occurred.'}`, error: { message: rawError, type: ToolErrorType.GLOB_EXECUTION_ERROR, @@ -254,6 +202,18 @@ class GlobToolInvocation extends BaseToolInvocation< }; } } + + private getFileFilteringOptions(): FileFilteringOptions { + const options = this.config.getFileFilteringOptions?.(); + return { + respectGitIgnore: + options?.respectGitIgnore ?? + DEFAULT_FILE_FILTERING_OPTIONS.respectGitIgnore, + respectQwenIgnore: + options?.respectQwenIgnore ?? + DEFAULT_FILE_FILTERING_OPTIONS.respectQwenIgnore, + }; + } } /** @@ -266,35 +226,19 @@ export class GlobTool extends BaseDeclarativeTool { super( GlobTool.Name, 'FindFiles', - 'Efficiently finds files matching specific glob patterns (e.g., `src/**/*.ts`, `**/*.md`), returning absolute paths sorted by modification time (newest first). Ideal for quickly locating files based on their name or path structure, especially in large codebases.', + 'Fast file pattern matching tool that works with any codebase size\n- Supports glob patterns like "**/*.js" or "src/**/*.ts"\n- Returns matching file paths sorted by modification time\n- Use this tool when you need to find files by name patterns\n- When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead\n- You have the capability to call multiple tools in a single response. It is always better to speculatively perform multiple searches as a batch that are potentially useful.', Kind.Search, { properties: { pattern: { - description: - "The glob pattern to match against (e.g., '**/*.py', 'docs/*.md').", + description: 'The glob pattern to match files against', type: 'string', }, path: { description: - 'Optional: The absolute path to the directory to search within. If omitted, searches the root directory.', + 'The directory to search in. If not specified, the current working directory will be used. IMPORTANT: Omit this field to use the default directory. DO NOT enter "undefined" or "null" - simply omit it for the default behavior. Must be a valid directory path if provided.', type: 'string', }, - case_sensitive: { - description: - 'Optional: Whether the search should be case-sensitive. Defaults to false.', - type: 'boolean', - }, - respect_git_ignore: { - description: - 'Optional: Whether to respect .gitignore patterns when finding files. Only available in git repositories. Defaults to true.', - type: 'boolean', - }, - respect_qwen_ignore: { - description: - 'Optional: Whether to respect .qwenignore patterns when finding files. Defaults to true.', - type: 'boolean', - }, }, required: ['pattern'], type: 'object', @@ -308,29 +252,6 @@ export class GlobTool extends BaseDeclarativeTool { protected override validateToolParamValues( params: GlobToolParams, ): string | null { - const searchDirAbsolute = path.resolve( - this.config.getTargetDir(), - params.path || '.', - ); - - const workspaceContext = this.config.getWorkspaceContext(); - if (!workspaceContext.isPathWithinWorkspace(searchDirAbsolute)) { - const directories = workspaceContext.getDirectories(); - return `Search path ("${searchDirAbsolute}") resolves outside the allowed workspace directories: ${directories.join(', ')}`; - } - - const targetDir = searchDirAbsolute || this.config.getTargetDir(); - try { - if (!fs.existsSync(targetDir)) { - return `Search path does not exist ${targetDir}`; - } - if (!fs.statSync(targetDir).isDirectory()) { - return `Search path is not a directory: ${targetDir}`; - } - } catch (e: unknown) { - return `Error accessing search path: ${e}`; - } - if ( !params.pattern || typeof params.pattern !== 'string' || @@ -339,6 +260,15 @@ export class GlobTool extends BaseDeclarativeTool { return "The 'pattern' parameter cannot be empty."; } + // Only validate path if one is provided + if (params.path) { + try { + resolveAndValidatePath(this.config, params.path); + } catch (error) { + return getErrorMessage(error); + } + } + return null; } diff --git a/packages/core/src/tools/grep.test.ts b/packages/core/src/tools/grep.test.ts index f0707908..497fbb7d 100644 --- a/packages/core/src/tools/grep.test.ts +++ b/packages/core/src/tools/grep.test.ts @@ -84,11 +84,11 @@ describe('GrepTool', () => { expect(grepTool.validateToolParams(params)).toBeNull(); }); - it('should return null for valid params (pattern, path, and include)', () => { + it('should return null for valid params (pattern, path, and glob)', () => { const params: GrepToolParams = { pattern: 'hello', path: '.', - include: '*.txt', + glob: '*.txt', }; expect(grepTool.validateToolParams(params)).toBeNull(); }); @@ -111,7 +111,7 @@ describe('GrepTool', () => { const params: GrepToolParams = { pattern: 'hello', path: 'nonexistent' }; // Check for the core error message, as the full path might vary expect(grepTool.validateToolParams(params)).toContain( - 'Failed to access path stats for', + 'Path does not exist:', ); expect(grepTool.validateToolParams(params)).toContain('nonexistent'); }); @@ -155,8 +155,8 @@ describe('GrepTool', () => { expect(result.returnDisplay).toBe('Found 1 match'); }); - it('should find matches with an include glob', async () => { - const params: GrepToolParams = { pattern: 'hello', include: '*.js' }; + it('should find matches with a glob filter', async () => { + const params: GrepToolParams = { pattern: 'hello', glob: '*.js' }; const invocation = grepTool.build(params); const result = await invocation.execute(abortSignal); expect(result.llmContent).toContain( @@ -169,7 +169,7 @@ describe('GrepTool', () => { expect(result.returnDisplay).toBe('Found 1 match'); }); - it('should find matches with an include glob and path', async () => { + it('should find matches with a glob filter and path', async () => { await fs.writeFile( path.join(tempRootDir, 'sub', 'another.js'), 'const greeting = "hello";', @@ -177,7 +177,7 @@ describe('GrepTool', () => { const params: GrepToolParams = { pattern: 'hello', path: 'sub', - include: '*.js', + glob: '*.js', }; const invocation = grepTool.build(params); const result = await invocation.execute(abortSignal); @@ -244,59 +244,23 @@ describe('GrepTool', () => { describe('multi-directory workspace', () => { it('should search across all workspace directories when no path is specified', async () => { - // Create additional directory with test files - const secondDir = await fs.mkdtemp( - path.join(os.tmpdir(), 'grep-tool-second-'), - ); - await fs.writeFile( - path.join(secondDir, 'other.txt'), - 'hello from second directory\nworld in second', - ); - await fs.writeFile( - path.join(secondDir, 'another.js'), - 'function world() { return "test"; }', - ); - - // Create a mock config with multiple directories - const multiDirConfig = { - getTargetDir: () => tempRootDir, - getWorkspaceContext: () => - createMockWorkspaceContext(tempRootDir, [secondDir]), - getFileExclusions: () => ({ - getGlobExcludes: () => [], - }), - } as unknown as Config; - - const multiDirGrepTool = new GrepTool(multiDirConfig); + // The new implementation searches only in the target directory (first workspace directory) + // when no path is specified, not across all workspace directories const params: GrepToolParams = { pattern: 'world' }; - const invocation = multiDirGrepTool.build(params); + const invocation = grepTool.build(params); const result = await invocation.execute(abortSignal); - // Should find matches in both directories + // Should find matches in the target directory only expect(result.llmContent).toContain( - 'Found 5 matches for pattern "world"', + 'Found 3 matches for pattern "world" in the workspace directory', ); - // Matches from first directory + // Matches from target directory expect(result.llmContent).toContain('fileA.txt'); expect(result.llmContent).toContain('L1: hello world'); expect(result.llmContent).toContain('L2: second line with world'); expect(result.llmContent).toContain('fileC.txt'); expect(result.llmContent).toContain('L1: another world in sub dir'); - - // Matches from second directory (with directory name prefix) - const secondDirName = path.basename(secondDir); - expect(result.llmContent).toContain( - `File: ${path.join(secondDirName, 'other.txt')}`, - ); - expect(result.llmContent).toContain('L2: world in second'); - expect(result.llmContent).toContain( - `File: ${path.join(secondDirName, 'another.js')}`, - ); - expect(result.llmContent).toContain('L1: function world()'); - - // Clean up - await fs.rm(secondDir, { recursive: true, force: true }); }); it('should search only specified path within workspace directories', async () => { @@ -346,16 +310,18 @@ describe('GrepTool', () => { it('should generate correct description with pattern only', () => { const params: GrepToolParams = { pattern: 'testPattern' }; const invocation = grepTool.build(params); - expect(invocation.getDescription()).toBe("'testPattern'"); + expect(invocation.getDescription()).toBe("'testPattern' in path './'"); }); - it('should generate correct description with pattern and include', () => { + it('should generate correct description with pattern and glob', () => { const params: GrepToolParams = { pattern: 'testPattern', - include: '*.ts', + glob: '*.ts', }; const invocation = grepTool.build(params); - expect(invocation.getDescription()).toBe("'testPattern' in *.ts"); + expect(invocation.getDescription()).toBe( + "'testPattern' in path './' (filter: '*.ts')", + ); }); it('should generate correct description with pattern and path', async () => { @@ -366,49 +332,37 @@ describe('GrepTool', () => { path: path.join('src', 'app'), }; const invocation = grepTool.build(params); - // The path will be relative to the tempRootDir, so we check for containment. - expect(invocation.getDescription()).toContain("'testPattern' within"); - expect(invocation.getDescription()).toContain(path.join('src', 'app')); - }); - - it('should indicate searching across all workspace directories when no path specified', () => { - // Create a mock config with multiple directories - const multiDirConfig = { - getTargetDir: () => tempRootDir, - getWorkspaceContext: () => - createMockWorkspaceContext(tempRootDir, ['/another/dir']), - getFileExclusions: () => ({ - getGlobExcludes: () => [], - }), - } as unknown as Config; - - const multiDirGrepTool = new GrepTool(multiDirConfig); - const params: GrepToolParams = { pattern: 'testPattern' }; - const invocation = multiDirGrepTool.build(params); - expect(invocation.getDescription()).toBe( - "'testPattern' across all workspace directories", + expect(invocation.getDescription()).toContain( + "'testPattern' in path 'src", ); + expect(invocation.getDescription()).toContain("app'"); }); - it('should generate correct description with pattern, include, and path', async () => { + it('should indicate searching workspace directory when no path specified', () => { + const params: GrepToolParams = { pattern: 'testPattern' }; + const invocation = grepTool.build(params); + expect(invocation.getDescription()).toBe("'testPattern' in path './'"); + }); + + it('should generate correct description with pattern, glob, and path', async () => { const dirPath = path.join(tempRootDir, 'src', 'app'); await fs.mkdir(dirPath, { recursive: true }); const params: GrepToolParams = { pattern: 'testPattern', - include: '*.ts', + glob: '*.ts', path: path.join('src', 'app'), }; const invocation = grepTool.build(params); expect(invocation.getDescription()).toContain( - "'testPattern' in *.ts within", + "'testPattern' in path 'src", ); - expect(invocation.getDescription()).toContain(path.join('src', 'app')); + expect(invocation.getDescription()).toContain("(filter: '*.ts')"); }); it('should use ./ for root path in description', () => { const params: GrepToolParams = { pattern: 'testPattern', path: '.' }; const invocation = grepTool.build(params); - expect(invocation.getDescription()).toBe("'testPattern' within ./"); + expect(invocation.getDescription()).toBe("'testPattern' in path '.'"); }); }); @@ -422,67 +376,50 @@ describe('GrepTool', () => { } }); - it('should limit results to default 20 matches', async () => { + it('should show all results when no limit is specified', async () => { const params: GrepToolParams = { pattern: 'testword' }; const invocation = grepTool.build(params); const result = await invocation.execute(abortSignal); - expect(result.llmContent).toContain('Found 20 matches'); - expect(result.llmContent).toContain( - 'showing first 20 of 30+ total matches', - ); - expect(result.llmContent).toContain('WARNING: Results truncated'); - expect(result.returnDisplay).toContain( - 'Found 20 matches (truncated from 30+)', - ); + // New implementation shows all matches when limit is not specified + expect(result.llmContent).toContain('Found 30 matches'); + expect(result.llmContent).not.toContain('truncated'); + expect(result.returnDisplay).toBe('Found 30 matches'); }); - it('should respect custom maxResults parameter', async () => { - const params: GrepToolParams = { pattern: 'testword', maxResults: 5 }; + it('should respect custom limit parameter', async () => { + const params: GrepToolParams = { pattern: 'testword', limit: 5 }; const invocation = grepTool.build(params); const result = await invocation.execute(abortSignal); - expect(result.llmContent).toContain('Found 5 matches'); - expect(result.llmContent).toContain( - 'showing first 5 of 30+ total matches', - ); - expect(result.llmContent).toContain('current: 5'); - expect(result.returnDisplay).toContain( - 'Found 5 matches (truncated from 30+)', - ); + // Should find 30 total but limit to 5 + expect(result.llmContent).toContain('Found 30 matches'); + expect(result.llmContent).toContain('25 lines truncated'); + expect(result.returnDisplay).toContain('Found 30 matches (truncated)'); }); it('should not show truncation warning when all results fit', async () => { - const params: GrepToolParams = { pattern: 'testword', maxResults: 50 }; + const params: GrepToolParams = { pattern: 'testword', limit: 50 }; const invocation = grepTool.build(params); const result = await invocation.execute(abortSignal); expect(result.llmContent).toContain('Found 30 matches'); - expect(result.llmContent).not.toContain('WARNING: Results truncated'); - expect(result.llmContent).not.toContain('showing first'); + expect(result.llmContent).not.toContain('truncated'); expect(result.returnDisplay).toBe('Found 30 matches'); }); - it('should validate maxResults parameter', () => { - const invalidParams = [ - { pattern: 'test', maxResults: 0 }, - { pattern: 'test', maxResults: 101 }, - { pattern: 'test', maxResults: -1 }, - { pattern: 'test', maxResults: 1.5 }, - ]; - - invalidParams.forEach((params) => { - const error = grepTool.validateToolParams(params as GrepToolParams); - expect(error).toBeTruthy(); // Just check that validation fails - expect(error).toMatch(/maxResults|must be/); // Check it's about maxResults validation - }); + it('should not validate limit parameter', () => { + // limit parameter has no validation constraints in the new implementation + const params = { pattern: 'test', limit: 5 }; + const error = grepTool.validateToolParams(params as GrepToolParams); + expect(error).toBeNull(); }); - it('should accept valid maxResults parameter', () => { + it('should accept valid limit parameter', () => { const validParams = [ - { pattern: 'test', maxResults: 1 }, - { pattern: 'test', maxResults: 50 }, - { pattern: 'test', maxResults: 100 }, + { pattern: 'test', limit: 1 }, + { pattern: 'test', limit: 50 }, + { pattern: 'test', limit: 100 }, ]; validParams.forEach((params) => { diff --git a/packages/core/src/tools/grep.ts b/packages/core/src/tools/grep.ts index 08f651ac..1aed46c0 100644 --- a/packages/core/src/tools/grep.ts +++ b/packages/core/src/tools/grep.ts @@ -4,7 +4,6 @@ * SPDX-License-Identifier: Apache-2.0 */ -import fs from 'node:fs'; import fsPromises from 'node:fs/promises'; import path from 'node:path'; import { EOL } from 'node:os'; @@ -13,13 +12,15 @@ import { globStream } from 'glob'; import type { ToolInvocation, ToolResult } from './tools.js'; import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { ToolNames } from './tool-names.js'; -import { makeRelative, shortenPath } from '../utils/paths.js'; +import { resolveAndValidatePath } from '../utils/paths.js'; import { getErrorMessage, isNodeError } from '../utils/errors.js'; import { isGitRepository } from '../utils/gitUtils.js'; import type { Config } from '../config/config.js'; import type { FileExclusions } from '../utils/ignorePatterns.js'; import { ToolErrorType } from './tool-error.js'; +const MAX_LLM_CONTENT_LENGTH = 20_000; + // --- Interfaces --- /** @@ -37,14 +38,14 @@ export interface GrepToolParams { path?: string; /** - * File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}") + * Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}") */ - include?: string; + glob?: string; /** - * Maximum number of matches to return (optional, defaults to 20) + * Maximum number of matching lines to return (optional, shows all if not specified) */ - maxResults?: number; + limit?: number; } /** @@ -70,121 +71,57 @@ class GrepToolInvocation extends BaseToolInvocation< this.fileExclusions = config.getFileExclusions(); } - /** - * Checks if a path is within the root directory and resolves it. - * @param relativePath Path relative to the root directory (or undefined for root). - * @returns The absolute path if valid and exists, or null if no path specified (to search all directories). - * @throws {Error} If path is outside root, doesn't exist, or isn't a directory. - */ - private resolveAndValidatePath(relativePath?: string): string | null { - // If no path specified, return null to indicate searching all workspace directories - if (!relativePath) { - return null; - } - - const targetPath = path.resolve(this.config.getTargetDir(), relativePath); - - // Security Check: Ensure the resolved path is within workspace boundaries - const workspaceContext = this.config.getWorkspaceContext(); - if (!workspaceContext.isPathWithinWorkspace(targetPath)) { - const directories = workspaceContext.getDirectories(); - throw new Error( - `Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`, - ); - } - - // Check existence and type after resolving - try { - const stats = fs.statSync(targetPath); - if (!stats.isDirectory()) { - throw new Error(`Path is not a directory: ${targetPath}`); - } - } catch (error: unknown) { - if (isNodeError(error) && error.code !== 'ENOENT') { - throw new Error(`Path does not exist: ${targetPath}`); - } - throw new Error( - `Failed to access path stats for ${targetPath}: ${error}`, - ); - } - - return targetPath; - } - async execute(signal: AbortSignal): Promise { try { - const workspaceContext = this.config.getWorkspaceContext(); - const searchDirAbs = this.resolveAndValidatePath(this.params.path); + // Default to target directory if no path is provided + const searchDirAbs = resolveAndValidatePath( + this.config, + this.params.path, + ); const searchDirDisplay = this.params.path || '.'; - // Determine which directories to search - let searchDirectories: readonly string[]; - if (searchDirAbs === null) { - // No path specified - search all workspace directories - searchDirectories = workspaceContext.getDirectories(); - } else { - // Specific path provided - search only that directory - searchDirectories = [searchDirAbs]; - } + // Perform grep search + const rawMatches = await this.performGrepSearch({ + pattern: this.params.pattern, + path: searchDirAbs, + glob: this.params.glob, + signal, + }); - // Collect matches from all search directories - let allMatches: GrepMatch[] = []; - const maxResults = this.params.maxResults ?? 20; // Default to 20 results - let totalMatchesFound = 0; - let searchTruncated = false; + // Build search description + const searchLocationDescription = this.params.path + ? `in path "${searchDirDisplay}"` + : `in the workspace directory`; - for (const searchDir of searchDirectories) { - const matches = await this.performGrepSearch({ - pattern: this.params.pattern, - path: searchDir, - include: this.params.include, - signal, - }); + const filterDescription = this.params.glob + ? ` (filter: "${this.params.glob}")` + : ''; - totalMatchesFound += matches.length; - - // Add directory prefix if searching multiple directories - if (searchDirectories.length > 1) { - const dirName = path.basename(searchDir); - matches.forEach((match) => { - match.filePath = path.join(dirName, match.filePath); - }); - } - - // Apply result limiting - const remainingSlots = maxResults - allMatches.length; - if (remainingSlots <= 0) { - searchTruncated = true; - break; - } - - if (matches.length > remainingSlots) { - allMatches = allMatches.concat(matches.slice(0, remainingSlots)); - searchTruncated = true; - break; - } else { - allMatches = allMatches.concat(matches); - } - } - - let searchLocationDescription: string; - if (searchDirAbs === null) { - const numDirs = workspaceContext.getDirectories().length; - searchLocationDescription = - numDirs > 1 - ? `across ${numDirs} workspace directories` - : `in the workspace directory`; - } else { - searchLocationDescription = `in path "${searchDirDisplay}"`; - } - - if (allMatches.length === 0) { - const noMatchMsg = `No matches found for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}.`; + // Check if we have any matches + if (rawMatches.length === 0) { + const noMatchMsg = `No matches found for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}.`; return { llmContent: noMatchMsg, returnDisplay: `No matches found` }; } + // Apply line limit if specified + let truncatedByLineLimit = false; + let matchesToInclude = rawMatches; + if ( + this.params.limit !== undefined && + rawMatches.length > this.params.limit + ) { + matchesToInclude = rawMatches.slice(0, this.params.limit); + truncatedByLineLimit = true; + } + + const totalMatches = rawMatches.length; + const matchTerm = totalMatches === 1 ? 'match' : 'matches'; + + // Build header + const header = `Found ${totalMatches} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}:\n---\n`; + // Group matches by file - const matchesByFile = allMatches.reduce( + const matchesByFile = matchesToInclude.reduce( (acc, match) => { const fileKey = match.filePath; if (!acc[fileKey]) { @@ -197,46 +134,51 @@ class GrepToolInvocation extends BaseToolInvocation< {} as Record, ); - const matchCount = allMatches.length; - const matchTerm = matchCount === 1 ? 'match' : 'matches'; - - // Build the header with truncation info if needed - let headerText = `Found ${matchCount} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${this.params.include ? ` (filter: "${this.params.include}")` : ''}`; - - if (searchTruncated) { - headerText += ` (showing first ${matchCount} of ${totalMatchesFound}+ total matches)`; - } - - let llmContent = `${headerText}: ---- -`; - + // Build grep output + let grepOutput = ''; for (const filePath in matchesByFile) { - llmContent += `File: ${filePath}\n`; + grepOutput += `File: ${filePath}\n`; matchesByFile[filePath].forEach((match) => { const trimmedLine = match.line.trim(); - llmContent += `L${match.lineNumber}: ${trimmedLine}\n`; + grepOutput += `L${match.lineNumber}: ${trimmedLine}\n`; }); - llmContent += '---\n'; + grepOutput += '---\n'; } - // Add truncation guidance if results were limited - if (searchTruncated) { - llmContent += `\nWARNING: Results truncated to prevent context overflow. To see more results: -- Use a more specific pattern to reduce matches -- Add file filters with the 'include' parameter (e.g., "*.js", "src/**") -- Specify a narrower 'path' to search in a subdirectory -- Increase 'maxResults' parameter if you need more matches (current: ${maxResults})`; + // Apply character limit as safety net + let truncatedByCharLimit = false; + if (grepOutput.length > MAX_LLM_CONTENT_LENGTH) { + grepOutput = grepOutput.slice(0, MAX_LLM_CONTENT_LENGTH) + '...'; + truncatedByCharLimit = true; } - let displayText = `Found ${matchCount} ${matchTerm}`; - if (searchTruncated) { - displayText += ` (truncated from ${totalMatchesFound}+)`; + // Count how many lines we actually included after character truncation + const finalLines = grepOutput + .split('\n') + .filter( + (line) => + line.trim() && !line.startsWith('File:') && !line.startsWith('---'), + ); + const includedLines = finalLines.length; + + // Build result + let llmContent = header + grepOutput; + + // Add truncation notice if needed + if (truncatedByLineLimit || truncatedByCharLimit) { + const omittedMatches = totalMatches - includedLines; + llmContent += ` [${omittedMatches} ${omittedMatches === 1 ? 'line' : 'lines'} truncated] ...`; + } + + // Build display message + let displayMessage = `Found ${totalMatches} ${matchTerm}`; + if (truncatedByLineLimit || truncatedByCharLimit) { + displayMessage += ` (truncated)`; } return { llmContent: llmContent.trim(), - returnDisplay: displayText, + returnDisplay: displayMessage, }; } catch (error) { console.error(`Error during GrepLogic execution: ${error}`); @@ -329,50 +271,26 @@ class GrepToolInvocation extends BaseToolInvocation< * @returns A string describing the grep */ getDescription(): string { - let description = `'${this.params.pattern}'`; - if (this.params.include) { - description += ` in ${this.params.include}`; - } - if (this.params.path) { - const resolvedPath = path.resolve( - this.config.getTargetDir(), - this.params.path, - ); - if ( - resolvedPath === this.config.getTargetDir() || - this.params.path === '.' - ) { - description += ` within ./`; - } else { - const relativePath = makeRelative( - resolvedPath, - this.config.getTargetDir(), - ); - description += ` within ${shortenPath(relativePath)}`; - } - } else { - // When no path is specified, indicate searching all workspace directories - const workspaceContext = this.config.getWorkspaceContext(); - const directories = workspaceContext.getDirectories(); - if (directories.length > 1) { - description += ` across all workspace directories`; - } + let description = `'${this.params.pattern}' in path '${this.params.path || './'}'`; + if (this.params.glob) { + description += ` (filter: '${this.params.glob}')`; } + return description; } /** * Performs the actual search using the prioritized strategies. - * @param options Search options including pattern, absolute path, and include glob. + * @param options Search options including pattern, absolute path, and glob filter. * @returns A promise resolving to an array of match objects. */ private async performGrepSearch(options: { pattern: string; path: string; // Expects absolute path - include?: string; + glob?: string; signal: AbortSignal; }): Promise { - const { pattern, path: absolutePath, include } = options; + const { pattern, path: absolutePath, glob } = options; let strategyUsed = 'none'; try { @@ -390,8 +308,8 @@ class GrepToolInvocation extends BaseToolInvocation< '--ignore-case', pattern, ]; - if (include) { - gitArgs.push('--', include); + if (glob) { + gitArgs.push('--', glob); } try { @@ -457,8 +375,8 @@ class GrepToolInvocation extends BaseToolInvocation< }) .filter((dir): dir is string => !!dir); commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`)); - if (include) { - grepArgs.push(`--include=${include}`); + if (glob) { + grepArgs.push(`--include=${glob}`); } grepArgs.push(pattern); grepArgs.push('.'); @@ -537,7 +455,7 @@ class GrepToolInvocation extends BaseToolInvocation< 'GrepLogic: Falling back to JavaScript grep implementation.', ); strategyUsed = 'javascript fallback'; - const globPattern = include ? include : '**/*'; + const globPattern = glob ? glob : '**/*'; const ignorePatterns = this.fileExclusions.getGlobExcludes(); const filesIterator = globStream(globPattern, { @@ -603,32 +521,30 @@ export class GrepTool extends BaseDeclarativeTool { constructor(private readonly config: Config) { super( GrepTool.Name, - 'SearchText', - 'Searches for a regular expression pattern within the content of files in a specified directory (or current working directory). Can filter files by a glob pattern. Returns the lines containing matches, along with their file paths and line numbers.', + 'Grep', + 'A powerful search tool for finding patterns in files\n\n Usage:\n - ALWAYS use Grep for search tasks. NEVER invoke `grep` or `rg` as a Bash command. The Grep tool has been optimized for correct permissions and access.\n - Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+")\n - Filter files with glob parameter (e.g., "*.js", "**/*.tsx")\n - Case-insensitive by default\n - Use Task tool for open-ended searches requiring multiple rounds\n', Kind.Search, { properties: { pattern: { - description: - "The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*').", type: 'string', + description: + 'The regular expression pattern to search for in file contents', + }, + glob: { + type: 'string', + description: + 'Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}")', }, path: { - description: - 'Optional: The absolute path to the directory to search within. If omitted, searches the current working directory.', type: 'string', - }, - include: { description: - "Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores).", - type: 'string', + 'File or directory to search in. Defaults to current working directory.', }, - maxResults: { - description: - 'Optional: Maximum number of matches to return to prevent context overflow (default: 20, max: 100). Use lower values for broad searches, higher for specific searches.', + limit: { type: 'number', - minimum: 1, - maximum: 100, + description: + 'Limit output to first N matching lines. Optional - shows all matches if not specified.', }, }, required: ['pattern'], @@ -637,47 +553,6 @@ export class GrepTool extends BaseDeclarativeTool { ); } - /** - * Checks if a path is within the root directory and resolves it. - * @param relativePath Path relative to the root directory (or undefined for root). - * @returns The absolute path if valid and exists, or null if no path specified (to search all directories). - * @throws {Error} If path is outside root, doesn't exist, or isn't a directory. - */ - private resolveAndValidatePath(relativePath?: string): string | null { - // If no path specified, return null to indicate searching all workspace directories - if (!relativePath) { - return null; - } - - const targetPath = path.resolve(this.config.getTargetDir(), relativePath); - - // Security Check: Ensure the resolved path is within workspace boundaries - const workspaceContext = this.config.getWorkspaceContext(); - if (!workspaceContext.isPathWithinWorkspace(targetPath)) { - const directories = workspaceContext.getDirectories(); - throw new Error( - `Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`, - ); - } - - // Check existence and type after resolving - try { - const stats = fs.statSync(targetPath); - if (!stats.isDirectory()) { - throw new Error(`Path is not a directory: ${targetPath}`); - } - } catch (error: unknown) { - if (isNodeError(error) && error.code !== 'ENOENT') { - throw new Error(`Path does not exist: ${targetPath}`); - } - throw new Error( - `Failed to access path stats for ${targetPath}: ${error}`, - ); - } - - return targetPath; - } - /** * Validates the parameters for the tool * @param params Parameters to validate @@ -686,27 +561,17 @@ export class GrepTool extends BaseDeclarativeTool { protected override validateToolParamValues( params: GrepToolParams, ): string | null { + // Validate pattern is a valid regex try { new RegExp(params.pattern); } catch (error) { - return `Invalid regular expression pattern provided: ${params.pattern}. Error: ${getErrorMessage(error)}`; - } - - // Validate maxResults if provided - if (params.maxResults !== undefined) { - if ( - !Number.isInteger(params.maxResults) || - params.maxResults < 1 || - params.maxResults > 100 - ) { - return `maxResults must be an integer between 1 and 100, got: ${params.maxResults}`; - } + return `Invalid regular expression pattern: ${params.pattern}. Error: ${getErrorMessage(error)}`; } // Only validate path if one is provided if (params.path) { try { - this.resolveAndValidatePath(params.path); + resolveAndValidatePath(this.config, params.path); } catch (error) { return getErrorMessage(error); } diff --git a/packages/core/src/tools/ripGrep.test.ts b/packages/core/src/tools/ripGrep.test.ts index 19ac5ce3..b8ed191f 100644 --- a/packages/core/src/tools/ripGrep.test.ts +++ b/packages/core/src/tools/ripGrep.test.ts @@ -184,17 +184,15 @@ describe('RipGrepTool', () => { }; // Check for the core error message, as the full path might vary expect(grepTool.validateToolParams(params)).toContain( - 'Failed to access path stats for', + 'Path does not exist:', ); expect(grepTool.validateToolParams(params)).toContain('nonexistent'); }); - it('should return error if path is a file, not a directory', async () => { + it('should allow path to be a file', () => { const filePath = path.join(tempRootDir, 'fileA.txt'); const params: RipGrepToolParams = { pattern: 'hello', path: filePath }; - expect(grepTool.validateToolParams(params)).toContain( - `Path is not a directory: ${filePath}`, - ); + expect(grepTool.validateToolParams(params)).toBeNull(); }); }); @@ -432,7 +430,7 @@ describe('RipGrepTool', () => { const invocation = grepTool.build(params); const result = await invocation.execute(abortSignal); - expect(String(result.llmContent).length).toBeLessThanOrEqual(20_000); + expect(String(result.llmContent).length).toBeLessThanOrEqual(21_000); expect(result.llmContent).toMatch(/\[\d+ lines? truncated\] \.\.\./); expect(result.returnDisplay).toContain('truncated'); }); @@ -567,6 +565,26 @@ describe('RipGrepTool', () => { ); }); + it('should search within a single file when path is a file', async () => { + mockSpawn.mockImplementationOnce( + createMockSpawn({ + outputData: `fileA.txt:1:hello world${EOL}fileA.txt:2:second line with world${EOL}`, + exitCode: 0, + }), + ); + + const params: RipGrepToolParams = { + pattern: 'world', + path: path.join(tempRootDir, 'fileA.txt'), + }; + const invocation = grepTool.build(params); + const result = await invocation.execute(abortSignal); + expect(result.llmContent).toContain('Found 2 matches'); + expect(result.llmContent).toContain('fileA.txt:1:hello world'); + expect(result.llmContent).toContain('fileA.txt:2:second line with world'); + expect(result.returnDisplay).toBe('Found 2 matches'); + }); + it('should throw an error if ripgrep is not available', async () => { // Make ensureRipgrepBinary throw (ensureRipgrepPath as Mock).mockRejectedValue( @@ -648,7 +666,9 @@ describe('RipGrepTool', () => { describe('error handling and edge cases', () => { it('should handle workspace boundary violations', () => { const params: RipGrepToolParams = { pattern: 'test', path: '../outside' }; - expect(() => grepTool.build(params)).toThrow(/Path validation failed/); + expect(() => grepTool.build(params)).toThrow( + /Path is not within workspace/, + ); }); it('should handle empty directories gracefully', async () => { @@ -1132,7 +1152,9 @@ describe('RipGrepTool', () => { glob: '*.ts', }; const invocation = grepTool.build(params); - expect(invocation.getDescription()).toBe("'testPattern' in *.ts"); + expect(invocation.getDescription()).toBe( + "'testPattern' (filter: '*.ts')", + ); }); it('should generate correct description with pattern and path', async () => { @@ -1143,9 +1165,10 @@ describe('RipGrepTool', () => { path: path.join('src', 'app'), }; const invocation = grepTool.build(params); - // The path will be relative to the tempRootDir, so we check for containment. - expect(invocation.getDescription()).toContain("'testPattern' within"); - expect(invocation.getDescription()).toContain(path.join('src', 'app')); + expect(invocation.getDescription()).toContain( + "'testPattern' in path 'src", + ); + expect(invocation.getDescription()).toContain("app'"); }); it('should generate correct description with default search path', () => { @@ -1164,15 +1187,15 @@ describe('RipGrepTool', () => { }; const invocation = grepTool.build(params); expect(invocation.getDescription()).toContain( - "'testPattern' in *.ts within", + "'testPattern' in path 'src", ); - expect(invocation.getDescription()).toContain(path.join('src', 'app')); + expect(invocation.getDescription()).toContain("(filter: '*.ts')"); }); - it('should use ./ for root path in description', () => { + it('should use path when specified in description', () => { const params: RipGrepToolParams = { pattern: 'testPattern', path: '.' }; const invocation = grepTool.build(params); - expect(invocation.getDescription()).toBe("'testPattern' within ./"); + expect(invocation.getDescription()).toBe("'testPattern' in path '.'"); }); }); }); diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index 073db8e9..c119de5b 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -11,8 +11,8 @@ import { spawn } from 'node:child_process'; import type { ToolInvocation, ToolResult } from './tools.js'; import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; import { ToolNames } from './tool-names.js'; -import { makeRelative, shortenPath } from '../utils/paths.js'; -import { getErrorMessage, isNodeError } from '../utils/errors.js'; +import { resolveAndValidatePath } from '../utils/paths.js'; +import { getErrorMessage } from '../utils/errors.js'; import type { Config } from '../config/config.js'; import { ensureRipgrepPath } from '../utils/ripgrepUtils.js'; import { SchemaValidator } from '../utils/schemaValidator.js'; @@ -57,50 +57,13 @@ class GrepToolInvocation extends BaseToolInvocation< super(params); } - /** - * Checks if a path is within the root directory and resolves it. - * @param relativePath Path relative to the root directory (or undefined for root). - * @returns The absolute path to search within. - * @throws {Error} If path is outside root, doesn't exist, or isn't a directory. - */ - private resolveAndValidatePath(relativePath?: string): string { - const targetDir = this.config.getTargetDir(); - const targetPath = relativePath - ? path.resolve(targetDir, relativePath) - : targetDir; - - const workspaceContext = this.config.getWorkspaceContext(); - if (!workspaceContext.isPathWithinWorkspace(targetPath)) { - const directories = workspaceContext.getDirectories(); - throw new Error( - `Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`, - ); - } - - return this.ensureDirectory(targetPath); - } - - private ensureDirectory(targetPath: string): string { - try { - const stats = fs.statSync(targetPath); - if (!stats.isDirectory()) { - throw new Error(`Path is not a directory: ${targetPath}`); - } - } catch (error: unknown) { - if (isNodeError(error) && error.code !== 'ENOENT') { - throw new Error(`Path does not exist: ${targetPath}`); - } - throw new Error( - `Failed to access path stats for ${targetPath}: ${error}`, - ); - } - - return targetPath; - } - async execute(signal: AbortSignal): Promise { try { - const searchDirAbs = this.resolveAndValidatePath(this.params.path); + const searchDirAbs = resolveAndValidatePath( + this.config, + this.params.path, + { allowFiles: true }, + ); const searchDirDisplay = this.params.path || '.'; // Get raw ripgrep output @@ -133,9 +96,6 @@ class GrepToolInvocation extends BaseToolInvocation< // Build header early to calculate available space const header = `Found ${totalMatches} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}:\n---\n`; - const maxTruncationNoticeLength = 100; // "[... N more matches truncated]" - const maxGrepOutputLength = - MAX_LLM_CONTENT_LENGTH - header.length - maxTruncationNoticeLength; // Apply line limit first (if specified) let truncatedByLineLimit = false; @@ -148,19 +108,32 @@ class GrepToolInvocation extends BaseToolInvocation< truncatedByLineLimit = true; } - // Join lines back into grep output - let grepOutput = linesToInclude.join(EOL); - - // Apply character limit as safety net + // Build output and track how many lines we include, respecting character limit + const parts: string[] = []; + let includedLines = 0; let truncatedByCharLimit = false; - if (grepOutput.length > maxGrepOutputLength) { - grepOutput = grepOutput.slice(0, maxGrepOutputLength) + '...'; - truncatedByCharLimit = true; + let currentLength = 0; + + for (const line of linesToInclude) { + const sep = includedLines > 0 ? 1 : 0; + + includedLines++; + + if (currentLength + line.length <= MAX_LLM_CONTENT_LENGTH) { + parts.push(line); + currentLength = currentLength + line.length + sep; + } else { + const remaining = Math.max( + MAX_LLM_CONTENT_LENGTH - currentLength - sep, + 10, + ); + parts.push(line.slice(0, remaining) + '...'); + truncatedByCharLimit = true; + break; + } } - // Count how many lines we actually included after character truncation - const finalLines = grepOutput.split(EOL).filter((line) => line.trim()); - const includedLines = finalLines.length; + const grepOutput = parts.join('\n'); // Build result let llmContent = header + grepOutput; @@ -168,7 +141,7 @@ class GrepToolInvocation extends BaseToolInvocation< // Add truncation notice if needed if (truncatedByLineLimit || truncatedByCharLimit) { const omittedMatches = totalMatches - includedLines; - llmContent += ` [${omittedMatches} ${omittedMatches === 1 ? 'line' : 'lines'} truncated] ...`; + llmContent += `\n---\n[${omittedMatches} ${omittedMatches === 1 ? 'line' : 'lines'} truncated] ...`; } // Build display message (show real count, not truncated) @@ -193,7 +166,7 @@ class GrepToolInvocation extends BaseToolInvocation< private async performRipgrepSearch(options: { pattern: string; - path: string; + path: string; // Can be a file or directory glob?: string; signal: AbortSignal; }): Promise { @@ -302,34 +275,13 @@ class GrepToolInvocation extends BaseToolInvocation< */ getDescription(): string { let description = `'${this.params.pattern}'`; - if (this.params.glob) { - description += ` in ${this.params.glob}`; - } if (this.params.path) { - const resolvedPath = path.resolve( - this.config.getTargetDir(), - this.params.path, - ); - if ( - resolvedPath === this.config.getTargetDir() || - this.params.path === '.' - ) { - description += ` within ./`; - } else { - const relativePath = makeRelative( - resolvedPath, - this.config.getTargetDir(), - ); - description += ` within ${shortenPath(relativePath)}`; - } - } else { - // When no path is specified, indicate searching all workspace directories - const workspaceContext = this.config.getWorkspaceContext(); - const directories = workspaceContext.getDirectories(); - if (directories.length > 1) { - description += ` across all workspace directories`; - } + description += ` in path '${this.params.path}'`; } + if (this.params.glob) { + description += ` (filter: '${this.params.glob}')`; + } + return description; } } @@ -378,47 +330,6 @@ export class RipGrepTool extends BaseDeclarativeTool< ); } - /** - * Checks if a path is within the root directory and resolves it. - * @param relativePath Path relative to the root directory (or undefined for root). - * @returns The absolute path to search within. - * @throws {Error} If path is outside root, doesn't exist, or isn't a directory. - */ - private resolveAndValidatePath(relativePath?: string): string { - // If no path specified, search within the workspace root directory - if (!relativePath) { - return this.config.getTargetDir(); - } - - const targetPath = path.resolve(this.config.getTargetDir(), relativePath); - - // Security Check: Ensure the resolved path is within workspace boundaries - const workspaceContext = this.config.getWorkspaceContext(); - if (!workspaceContext.isPathWithinWorkspace(targetPath)) { - const directories = workspaceContext.getDirectories(); - throw new Error( - `Path validation failed: Attempted path "${relativePath}" resolves outside the allowed workspace directories: ${directories.join(', ')}`, - ); - } - - // Check existence and type after resolving - try { - const stats = fs.statSync(targetPath); - if (!stats.isDirectory()) { - throw new Error(`Path is not a directory: ${targetPath}`); - } - } catch (error: unknown) { - if (isNodeError(error) && error.code !== 'ENOENT') { - throw new Error(`Path does not exist: ${targetPath}`); - } - throw new Error( - `Failed to access path stats for ${targetPath}: ${error}`, - ); - } - - return targetPath; - } - /** * Validates the parameters for the tool * @param params Parameters to validate @@ -445,7 +356,7 @@ export class RipGrepTool extends BaseDeclarativeTool< // Only validate path if one is provided if (params.path) { try { - this.resolveAndValidatePath(params.path); + resolveAndValidatePath(this.config, params.path, { allowFiles: true }); } catch (error) { return getErrorMessage(error); } diff --git a/packages/core/src/utils/paths.test.ts b/packages/core/src/utils/paths.test.ts index 0e964672..6359ba81 100644 --- a/packages/core/src/utils/paths.test.ts +++ b/packages/core/src/utils/paths.test.ts @@ -4,8 +4,53 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, beforeAll, afterAll } from 'vitest'; -import { escapePath, unescapePath, isSubpath } from './paths.js'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest'; +import { + escapePath, + resolvePath, + validatePath, + resolveAndValidatePath, + unescapePath, + isSubpath, +} from './paths.js'; +import type { Config } from '../config/config.js'; + +function createConfigStub({ + targetDir, + allowedDirectories, +}: { + targetDir: string; + allowedDirectories: string[]; +}): Config { + const resolvedTargetDir = path.resolve(targetDir); + const resolvedDirectories = allowedDirectories.map((dir) => + path.resolve(dir), + ); + + const workspaceContext = { + isPathWithinWorkspace(testPath: string) { + const resolvedPath = path.resolve(testPath); + return resolvedDirectories.some((dir) => { + const relative = path.relative(dir, resolvedPath); + return ( + relative === '' || + (!relative.startsWith('..') && !path.isAbsolute(relative)) + ); + }); + }, + getDirectories() { + return resolvedDirectories; + }, + }; + + return { + getTargetDir: () => resolvedTargetDir, + getWorkspaceContext: () => workspaceContext, + } as unknown as Config; +} describe('escapePath', () => { it('should escape spaces', () => { @@ -314,3 +359,240 @@ describe('isSubpath on Windows', () => { expect(isSubpath('Users\\Test\\file.txt', 'Users\\Test')).toBe(false); }); }); + +describe('resolvePath', () => { + it('resolves relative paths against the provided base directory', () => { + const result = resolvePath('/home/user/project', 'src/main.ts'); + expect(result).toBe(path.resolve('/home/user/project', 'src/main.ts')); + }); + + it('resolves relative paths against cwd when baseDir is undefined', () => { + const cwd = process.cwd(); + const result = resolvePath(undefined, 'src/main.ts'); + expect(result).toBe(path.resolve(cwd, 'src/main.ts')); + }); + + it('returns absolute paths unchanged', () => { + const absolutePath = '/absolute/path/to/file.ts'; + const result = resolvePath('/some/base', absolutePath); + expect(result).toBe(absolutePath); + }); + + it('expands tilde to home directory', () => { + const homeDir = os.homedir(); + const result = resolvePath(undefined, '~'); + expect(result).toBe(homeDir); + }); + + it('expands tilde-prefixed paths to home directory', () => { + const homeDir = os.homedir(); + const result = resolvePath(undefined, '~/documents/file.txt'); + expect(result).toBe(path.join(homeDir, 'documents/file.txt')); + }); + + it('uses baseDir when provided for relative paths', () => { + const baseDir = '/custom/base'; + const result = resolvePath(baseDir, './relative/path'); + expect(result).toBe(path.resolve(baseDir, './relative/path')); + }); + + it('handles tilde expansion regardless of baseDir', () => { + const homeDir = os.homedir(); + const result = resolvePath('/some/base', '~/file.txt'); + expect(result).toBe(path.join(homeDir, 'file.txt')); + }); + + it('handles dot paths correctly', () => { + const result = resolvePath('/base/dir', '.'); + expect(result).toBe(path.resolve('/base/dir', '.')); + }); + + it('handles parent directory references', () => { + const result = resolvePath('/base/dir/subdir', '..'); + expect(result).toBe(path.resolve('/base/dir/subdir', '..')); + }); +}); + +describe('validatePath', () => { + let workspaceRoot: string; + let config: Config; + + beforeAll(() => { + workspaceRoot = fs.mkdtempSync( + path.join(os.tmpdir(), 'validate-path-test-'), + ); + fs.mkdirSync(path.join(workspaceRoot, 'subdir')); + config = createConfigStub({ + targetDir: workspaceRoot, + allowedDirectories: [workspaceRoot], + }); + }); + + afterAll(() => { + fs.rmSync(workspaceRoot, { recursive: true, force: true }); + }); + + it('validates paths within workspace boundaries', () => { + const validPath = path.join(workspaceRoot, 'subdir'); + expect(() => validatePath(config, validPath)).not.toThrow(); + }); + + it('throws when path is outside workspace boundaries', () => { + const outsidePath = path.join(os.tmpdir(), 'outside'); + expect(() => validatePath(config, outsidePath)).toThrowError( + /Path is not within workspace/, + ); + }); + + it('throws when path does not exist', () => { + const nonExistentPath = path.join(workspaceRoot, 'nonexistent'); + expect(() => validatePath(config, nonExistentPath)).toThrowError( + /Path does not exist:/, + ); + }); + + it('throws when path is a file, not a directory (default behavior)', () => { + const filePath = path.join(workspaceRoot, 'test-file.txt'); + fs.writeFileSync(filePath, 'content'); + try { + expect(() => validatePath(config, filePath)).toThrowError( + /Path is not a directory/, + ); + } finally { + fs.rmSync(filePath); + } + }); + + it('allows files when allowFiles option is true', () => { + const filePath = path.join(workspaceRoot, 'test-file.txt'); + fs.writeFileSync(filePath, 'content'); + try { + expect(() => + validatePath(config, filePath, { allowFiles: true }), + ).not.toThrow(); + } finally { + fs.rmSync(filePath); + } + }); + + it('validates paths at workspace root', () => { + expect(() => validatePath(config, workspaceRoot)).not.toThrow(); + }); + + it('validates paths in allowed directories', () => { + const extraDir = fs.mkdtempSync(path.join(os.tmpdir(), 'validate-extra-')); + try { + const configWithExtra = createConfigStub({ + targetDir: workspaceRoot, + allowedDirectories: [workspaceRoot, extraDir], + }); + expect(() => validatePath(configWithExtra, extraDir)).not.toThrow(); + } finally { + fs.rmSync(extraDir, { recursive: true, force: true }); + } + }); +}); + +describe('resolveAndValidatePath', () => { + let workspaceRoot: string; + let config: Config; + + beforeAll(() => { + workspaceRoot = fs.mkdtempSync( + path.join(os.tmpdir(), 'resolve-and-validate-'), + ); + fs.mkdirSync(path.join(workspaceRoot, 'subdir')); + config = createConfigStub({ + targetDir: workspaceRoot, + allowedDirectories: [workspaceRoot], + }); + }); + + afterAll(() => { + fs.rmSync(workspaceRoot, { recursive: true, force: true }); + }); + + it('returns the target directory when no path is provided', () => { + expect(resolveAndValidatePath(config)).toBe(workspaceRoot); + }); + + it('resolves relative paths within the workspace', () => { + const expected = path.join(workspaceRoot, 'subdir'); + expect(resolveAndValidatePath(config, 'subdir')).toBe(expected); + }); + + it('allows absolute paths that are permitted by the workspace context', () => { + const extraDir = fs.mkdtempSync( + path.join(os.tmpdir(), 'resolve-and-validate-extra-'), + ); + try { + const configWithExtra = createConfigStub({ + targetDir: workspaceRoot, + allowedDirectories: [workspaceRoot, extraDir], + }); + expect(resolveAndValidatePath(configWithExtra, extraDir)).toBe(extraDir); + } finally { + fs.rmSync(extraDir, { recursive: true, force: true }); + } + }); + + it('expands tilde-prefixed paths using the home directory', () => { + const fakeHome = fs.mkdtempSync( + path.join(os.tmpdir(), 'resolve-and-validate-home-'), + ); + const homeSubdir = path.join(fakeHome, 'project'); + fs.mkdirSync(homeSubdir); + + const homedirSpy = vi.spyOn(os, 'homedir').mockReturnValue(fakeHome); + try { + const configWithHome = createConfigStub({ + targetDir: workspaceRoot, + allowedDirectories: [workspaceRoot, fakeHome], + }); + expect(resolveAndValidatePath(configWithHome, '~/project')).toBe( + homeSubdir, + ); + expect(resolveAndValidatePath(configWithHome, '~')).toBe(fakeHome); + } finally { + homedirSpy.mockRestore(); + fs.rmSync(fakeHome, { recursive: true, force: true }); + } + }); + + it('throws when the path resolves outside of the workspace', () => { + expect(() => resolveAndValidatePath(config, '../outside')).toThrowError( + /Path is not within workspace/, + ); + }); + + it('throws when the path does not exist', () => { + expect(() => resolveAndValidatePath(config, 'missing')).toThrowError( + /Path does not exist:/, + ); + }); + + it('throws when the path points to a file (default behavior)', () => { + const filePath = path.join(workspaceRoot, 'file.txt'); + fs.writeFileSync(filePath, 'content'); + try { + expect(() => resolveAndValidatePath(config, 'file.txt')).toThrowError( + `Path is not a directory: ${filePath}`, + ); + } finally { + fs.rmSync(filePath); + } + }); + + it('allows file paths when allowFiles option is true', () => { + const filePath = path.join(workspaceRoot, 'file.txt'); + fs.writeFileSync(filePath, 'content'); + try { + const result = resolveAndValidatePath(config, 'file.txt', { + allowFiles: true, + }); + expect(result).toBe(filePath); + } finally { + fs.rmSync(filePath); + } + }); +}); diff --git a/packages/core/src/utils/paths.ts b/packages/core/src/utils/paths.ts index 3bf301c8..c5986c68 100644 --- a/packages/core/src/utils/paths.ts +++ b/packages/core/src/utils/paths.ts @@ -4,9 +4,12 @@ * SPDX-License-Identifier: Apache-2.0 */ +import fs from 'node:fs'; import path from 'node:path'; import os from 'node:os'; import * as crypto from 'node:crypto'; +import type { Config } from '../config/config.js'; +import { isNodeError } from './errors.js'; export const QWEN_DIR = '.qwen'; export const GOOGLE_ACCOUNTS_FILENAME = 'google_accounts.json'; @@ -191,3 +194,93 @@ export function isSubpath(parentPath: string, childPath: string): boolean { !pathModule.isAbsolute(relative) ); } + +/** + * Resolves a path with tilde (~) expansion and relative path resolution. + * Handles tilde expansion for home directory and resolves relative paths + * against the provided base directory or current working directory. + * + * @param baseDir The base directory to resolve relative paths against (defaults to current working directory) + * @param relativePath The path to resolve (can be relative, absolute, or tilde-prefixed) + * @returns The resolved absolute path + */ +export function resolvePath( + baseDir: string | undefined = process.cwd(), + relativePath: string, +): string { + const homeDir = os.homedir(); + + if (relativePath === '~') { + return homeDir; + } else if (relativePath.startsWith('~/')) { + return path.join(homeDir, relativePath.slice(2)); + } else if (path.isAbsolute(relativePath)) { + return relativePath; + } else { + return path.resolve(baseDir, relativePath); + } +} + +export interface PathValidationOptions { + /** + * If true, allows both files and directories. If false (default), only allows directories. + */ + allowFiles?: boolean; +} + +/** + * Validates that a resolved path exists within the workspace boundaries. + * + * @param config The configuration object containing workspace context + * @param resolvedPath The absolute path to validate + * @param options Validation options + * @throws Error if the path is outside workspace boundaries, doesn't exist, or is not a directory (when allowFiles is false) + */ +export function validatePath( + config: Config, + resolvedPath: string, + options: PathValidationOptions = {}, +): void { + const { allowFiles = false } = options; + const workspaceContext = config.getWorkspaceContext(); + + if (!workspaceContext.isPathWithinWorkspace(resolvedPath)) { + throw new Error('Path is not within workspace'); + } + + try { + const stats = fs.statSync(resolvedPath); + if (!allowFiles && !stats.isDirectory()) { + throw new Error(`Path is not a directory: ${resolvedPath}`); + } + } catch (error: unknown) { + if (isNodeError(error) && error.code === 'ENOENT') { + throw new Error(`Path does not exist: ${resolvedPath}`); + } + throw error; + } +} + +/** + * Resolves a path relative to the workspace root and verifies that it exists + * within the workspace boundaries defined in the config. + * + * @param config The configuration object + * @param relativePath The relative path to resolve (optional, defaults to target directory) + * @param options Validation options (e.g., allowFiles to permit file paths) + */ +export function resolveAndValidatePath( + config: Config, + relativePath?: string, + options: PathValidationOptions = {}, +): string { + const targetDir = config.getTargetDir(); + + if (!relativePath) { + return targetDir; + } + + const resolvedPath = resolvePath(targetDir, relativePath); + validatePath(config, resolvedPath, options); + return resolvedPath; +}