mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
589 lines
19 KiB
TypeScript
589 lines
19 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2025 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import fsPromises from 'node:fs/promises';
|
|
import path from 'node:path';
|
|
import { EOL } from 'node:os';
|
|
import { spawn } from 'node:child_process';
|
|
import { globStream } from 'glob';
|
|
import type { ToolInvocation, ToolResult } from './tools.js';
|
|
import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js';
|
|
import { ToolNames } from './tool-names.js';
|
|
import { resolveAndValidatePath } from '../utils/paths.js';
|
|
import { getErrorMessage, isNodeError } from '../utils/errors.js';
|
|
import { isGitRepository } from '../utils/gitUtils.js';
|
|
import type { Config } from '../config/config.js';
|
|
import type { FileExclusions } from '../utils/ignorePatterns.js';
|
|
import { ToolErrorType } from './tool-error.js';
|
|
|
|
const MAX_LLM_CONTENT_LENGTH = 20_000;
|
|
|
|
// --- Interfaces ---
|
|
|
|
/**
|
|
* Parameters for the GrepTool
|
|
*/
|
|
export interface GrepToolParams {
|
|
/**
|
|
* The regular expression pattern to search for in file contents
|
|
*/
|
|
pattern: string;
|
|
|
|
/**
|
|
* The directory to search in (optional, defaults to current directory relative to root)
|
|
*/
|
|
path?: string;
|
|
|
|
/**
|
|
* Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}")
|
|
*/
|
|
glob?: string;
|
|
|
|
/**
|
|
* Maximum number of matching lines to return (optional, shows all if not specified)
|
|
*/
|
|
limit?: number;
|
|
}
|
|
|
|
/**
|
|
* Result object for a single grep match
|
|
*/
|
|
interface GrepMatch {
|
|
filePath: string;
|
|
lineNumber: number;
|
|
line: string;
|
|
}
|
|
|
|
class GrepToolInvocation extends BaseToolInvocation<
|
|
GrepToolParams,
|
|
ToolResult
|
|
> {
|
|
private readonly fileExclusions: FileExclusions;
|
|
|
|
constructor(
|
|
private readonly config: Config,
|
|
params: GrepToolParams,
|
|
) {
|
|
super(params);
|
|
this.fileExclusions = config.getFileExclusions();
|
|
}
|
|
|
|
async execute(signal: AbortSignal): Promise<ToolResult> {
|
|
try {
|
|
// Default to target directory if no path is provided
|
|
const searchDirAbs = resolveAndValidatePath(
|
|
this.config,
|
|
this.params.path,
|
|
);
|
|
const searchDirDisplay = this.params.path || '.';
|
|
|
|
// Perform grep search
|
|
const rawMatches = await this.performGrepSearch({
|
|
pattern: this.params.pattern,
|
|
path: searchDirAbs,
|
|
glob: this.params.glob,
|
|
signal,
|
|
});
|
|
|
|
// Build search description
|
|
const searchLocationDescription = this.params.path
|
|
? `in path "${searchDirDisplay}"`
|
|
: `in the workspace directory`;
|
|
|
|
const filterDescription = this.params.glob
|
|
? ` (filter: "${this.params.glob}")`
|
|
: '';
|
|
|
|
// Check if we have any matches
|
|
if (rawMatches.length === 0) {
|
|
const noMatchMsg = `No matches found for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}.`;
|
|
return { llmContent: noMatchMsg, returnDisplay: `No matches found` };
|
|
}
|
|
|
|
// Apply line limit if specified
|
|
let truncatedByLineLimit = false;
|
|
let matchesToInclude = rawMatches;
|
|
if (
|
|
this.params.limit !== undefined &&
|
|
rawMatches.length > this.params.limit
|
|
) {
|
|
matchesToInclude = rawMatches.slice(0, this.params.limit);
|
|
truncatedByLineLimit = true;
|
|
}
|
|
|
|
const totalMatches = rawMatches.length;
|
|
const matchTerm = totalMatches === 1 ? 'match' : 'matches';
|
|
|
|
// Build header
|
|
const header = `Found ${totalMatches} ${matchTerm} for pattern "${this.params.pattern}" ${searchLocationDescription}${filterDescription}:\n---\n`;
|
|
|
|
// Group matches by file
|
|
const matchesByFile = matchesToInclude.reduce(
|
|
(acc, match) => {
|
|
const fileKey = match.filePath;
|
|
if (!acc[fileKey]) {
|
|
acc[fileKey] = [];
|
|
}
|
|
acc[fileKey].push(match);
|
|
acc[fileKey].sort((a, b) => a.lineNumber - b.lineNumber);
|
|
return acc;
|
|
},
|
|
{} as Record<string, GrepMatch[]>,
|
|
);
|
|
|
|
// Build grep output
|
|
let grepOutput = '';
|
|
for (const filePath in matchesByFile) {
|
|
grepOutput += `File: ${filePath}\n`;
|
|
matchesByFile[filePath].forEach((match) => {
|
|
const trimmedLine = match.line.trim();
|
|
grepOutput += `L${match.lineNumber}: ${trimmedLine}\n`;
|
|
});
|
|
grepOutput += '---\n';
|
|
}
|
|
|
|
// Apply character limit as safety net
|
|
let truncatedByCharLimit = false;
|
|
if (grepOutput.length > MAX_LLM_CONTENT_LENGTH) {
|
|
grepOutput = grepOutput.slice(0, MAX_LLM_CONTENT_LENGTH) + '...';
|
|
truncatedByCharLimit = true;
|
|
}
|
|
|
|
// Count how many lines we actually included after character truncation
|
|
const finalLines = grepOutput
|
|
.split('\n')
|
|
.filter(
|
|
(line) =>
|
|
line.trim() && !line.startsWith('File:') && !line.startsWith('---'),
|
|
);
|
|
const includedLines = finalLines.length;
|
|
|
|
// Build result
|
|
let llmContent = header + grepOutput;
|
|
|
|
// Add truncation notice if needed
|
|
if (truncatedByLineLimit || truncatedByCharLimit) {
|
|
const omittedMatches = totalMatches - includedLines;
|
|
llmContent += ` [${omittedMatches} ${omittedMatches === 1 ? 'line' : 'lines'} truncated] ...`;
|
|
}
|
|
|
|
// Build display message
|
|
let displayMessage = `Found ${totalMatches} ${matchTerm}`;
|
|
if (truncatedByLineLimit || truncatedByCharLimit) {
|
|
displayMessage += ` (truncated)`;
|
|
}
|
|
|
|
return {
|
|
llmContent: llmContent.trim(),
|
|
returnDisplay: displayMessage,
|
|
};
|
|
} catch (error) {
|
|
console.error(`Error during GrepLogic execution: ${error}`);
|
|
const errorMessage = getErrorMessage(error);
|
|
return {
|
|
llmContent: `Error during grep search operation: ${errorMessage}`,
|
|
returnDisplay: `Error: ${errorMessage}`,
|
|
error: {
|
|
message: errorMessage,
|
|
type: ToolErrorType.GREP_EXECUTION_ERROR,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Checks if a command is available in the system's PATH.
|
|
* @param {string} command The command name (e.g., 'git', 'grep').
|
|
* @returns {Promise<boolean>} True if the command is available, false otherwise.
|
|
*/
|
|
private isCommandAvailable(command: string): Promise<boolean> {
|
|
return new Promise((resolve) => {
|
|
const checkCommand = process.platform === 'win32' ? 'where' : 'command';
|
|
const checkArgs =
|
|
process.platform === 'win32' ? [command] : ['-v', command];
|
|
try {
|
|
const child = spawn(checkCommand, checkArgs, {
|
|
stdio: 'ignore',
|
|
shell: process.platform === 'win32',
|
|
});
|
|
child.on('close', (code) => resolve(code === 0));
|
|
child.on('error', () => resolve(false));
|
|
} catch {
|
|
resolve(false);
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Parses the standard output of grep-like commands (git grep, system grep).
|
|
* Expects format: filePath:lineNumber:lineContent
|
|
* Handles colons within file paths and line content correctly.
|
|
* @param {string} output The raw stdout string.
|
|
* @param {string} basePath The absolute directory the search was run from, for relative paths.
|
|
* @returns {GrepMatch[]} Array of match objects.
|
|
*/
|
|
private parseGrepOutput(output: string, basePath: string): GrepMatch[] {
|
|
const results: GrepMatch[] = [];
|
|
if (!output) return results;
|
|
|
|
const lines = output.split(EOL); // Use OS-specific end-of-line
|
|
|
|
for (const line of lines) {
|
|
if (!line.trim()) continue;
|
|
|
|
// Find the index of the first colon.
|
|
const firstColonIndex = line.indexOf(':');
|
|
if (firstColonIndex === -1) continue; // Malformed
|
|
|
|
// Find the index of the second colon, searching *after* the first one.
|
|
const secondColonIndex = line.indexOf(':', firstColonIndex + 1);
|
|
if (secondColonIndex === -1) continue; // Malformed
|
|
|
|
// Extract parts based on the found colon indices
|
|
const filePathRaw = line.substring(0, firstColonIndex);
|
|
const lineNumberStr = line.substring(
|
|
firstColonIndex + 1,
|
|
secondColonIndex,
|
|
);
|
|
const lineContent = line.substring(secondColonIndex + 1);
|
|
|
|
const lineNumber = parseInt(lineNumberStr, 10);
|
|
|
|
if (!isNaN(lineNumber)) {
|
|
const absoluteFilePath = path.resolve(basePath, filePathRaw);
|
|
const relativeFilePath = path.relative(basePath, absoluteFilePath);
|
|
|
|
results.push({
|
|
filePath: relativeFilePath || path.basename(absoluteFilePath),
|
|
lineNumber,
|
|
line: lineContent,
|
|
});
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Gets a description of the grep operation
|
|
* @returns A string describing the grep
|
|
*/
|
|
getDescription(): string {
|
|
let description = `'${this.params.pattern}' in path '${this.params.path || './'}'`;
|
|
if (this.params.glob) {
|
|
description += ` (filter: '${this.params.glob}')`;
|
|
}
|
|
|
|
return description;
|
|
}
|
|
|
|
/**
|
|
* Performs the actual search using the prioritized strategies.
|
|
* @param options Search options including pattern, absolute path, and glob filter.
|
|
* @returns A promise resolving to an array of match objects.
|
|
*/
|
|
private async performGrepSearch(options: {
|
|
pattern: string;
|
|
path: string; // Expects absolute path
|
|
glob?: string;
|
|
signal: AbortSignal;
|
|
}): Promise<GrepMatch[]> {
|
|
const { pattern, path: absolutePath, glob } = options;
|
|
let strategyUsed = 'none';
|
|
|
|
try {
|
|
// --- Strategy 1: git grep ---
|
|
const isGit = isGitRepository(absolutePath);
|
|
const gitAvailable = isGit && (await this.isCommandAvailable('git'));
|
|
|
|
if (gitAvailable) {
|
|
strategyUsed = 'git grep';
|
|
const gitArgs = [
|
|
'grep',
|
|
'--untracked',
|
|
'-n',
|
|
'-E',
|
|
'--ignore-case',
|
|
pattern,
|
|
];
|
|
if (glob) {
|
|
gitArgs.push('--', glob);
|
|
}
|
|
|
|
try {
|
|
const output = await new Promise<string>((resolve, reject) => {
|
|
const child = spawn('git', gitArgs, {
|
|
cwd: absolutePath,
|
|
windowsHide: true,
|
|
});
|
|
const stdoutChunks: Buffer[] = [];
|
|
const stderrChunks: Buffer[] = [];
|
|
|
|
child.stdout.on('data', (chunk) => stdoutChunks.push(chunk));
|
|
child.stderr.on('data', (chunk) => stderrChunks.push(chunk));
|
|
child.on('error', (err) =>
|
|
reject(new Error(`Failed to start git grep: ${err.message}`)),
|
|
);
|
|
child.on('close', (code) => {
|
|
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
|
|
const stderrData = Buffer.concat(stderrChunks).toString('utf8');
|
|
if (code === 0) resolve(stdoutData);
|
|
else if (code === 1)
|
|
resolve(''); // No matches
|
|
else
|
|
reject(
|
|
new Error(`git grep exited with code ${code}: ${stderrData}`),
|
|
);
|
|
});
|
|
});
|
|
return this.parseGrepOutput(output, absolutePath);
|
|
} catch (gitError: unknown) {
|
|
console.debug(
|
|
`GrepLogic: git grep failed: ${getErrorMessage(
|
|
gitError,
|
|
)}. Falling back...`,
|
|
);
|
|
}
|
|
}
|
|
|
|
// --- Strategy 2: System grep ---
|
|
const grepAvailable = await this.isCommandAvailable('grep');
|
|
if (grepAvailable) {
|
|
strategyUsed = 'system grep';
|
|
const grepArgs = ['-r', '-n', '-H', '-E'];
|
|
// Extract directory names from exclusion patterns for grep --exclude-dir
|
|
const globExcludes = this.fileExclusions.getGlobExcludes();
|
|
const commonExcludes = globExcludes
|
|
.map((pattern) => {
|
|
let dir = pattern;
|
|
if (dir.startsWith('**/')) {
|
|
dir = dir.substring(3);
|
|
}
|
|
if (dir.endsWith('/**')) {
|
|
dir = dir.slice(0, -3);
|
|
} else if (dir.endsWith('/')) {
|
|
dir = dir.slice(0, -1);
|
|
}
|
|
|
|
// Only consider patterns that are likely directories. This filters out file patterns.
|
|
if (dir && !dir.includes('/') && !dir.includes('*')) {
|
|
return dir;
|
|
}
|
|
return null;
|
|
})
|
|
.filter((dir): dir is string => !!dir);
|
|
commonExcludes.forEach((dir) => grepArgs.push(`--exclude-dir=${dir}`));
|
|
if (glob) {
|
|
grepArgs.push(`--include=${glob}`);
|
|
}
|
|
grepArgs.push(pattern);
|
|
grepArgs.push('.');
|
|
|
|
try {
|
|
const output = await new Promise<string>((resolve, reject) => {
|
|
const child = spawn('grep', grepArgs, {
|
|
cwd: absolutePath,
|
|
windowsHide: true,
|
|
});
|
|
const stdoutChunks: Buffer[] = [];
|
|
const stderrChunks: Buffer[] = [];
|
|
|
|
const onData = (chunk: Buffer) => stdoutChunks.push(chunk);
|
|
const onStderr = (chunk: Buffer) => {
|
|
const stderrStr = chunk.toString();
|
|
// Suppress common harmless stderr messages
|
|
if (
|
|
!stderrStr.includes('Permission denied') &&
|
|
!/grep:.*: Is a directory/i.test(stderrStr)
|
|
) {
|
|
stderrChunks.push(chunk);
|
|
}
|
|
};
|
|
const onError = (err: Error) => {
|
|
cleanup();
|
|
reject(new Error(`Failed to start system grep: ${err.message}`));
|
|
};
|
|
const onClose = (code: number | null) => {
|
|
const stdoutData = Buffer.concat(stdoutChunks).toString('utf8');
|
|
const stderrData = Buffer.concat(stderrChunks)
|
|
.toString('utf8')
|
|
.trim();
|
|
cleanup();
|
|
if (code === 0) resolve(stdoutData);
|
|
else if (code === 1)
|
|
resolve(''); // No matches
|
|
else {
|
|
if (stderrData)
|
|
reject(
|
|
new Error(
|
|
`System grep exited with code ${code}: ${stderrData}`,
|
|
),
|
|
);
|
|
else resolve(''); // Exit code > 1 but no stderr, likely just suppressed errors
|
|
}
|
|
};
|
|
|
|
const cleanup = () => {
|
|
child.stdout.removeListener('data', onData);
|
|
child.stderr.removeListener('data', onStderr);
|
|
child.removeListener('error', onError);
|
|
child.removeListener('close', onClose);
|
|
if (child.connected) {
|
|
child.disconnect();
|
|
}
|
|
};
|
|
|
|
child.stdout.on('data', onData);
|
|
child.stderr.on('data', onStderr);
|
|
child.on('error', onError);
|
|
child.on('close', onClose);
|
|
});
|
|
return this.parseGrepOutput(output, absolutePath);
|
|
} catch (grepError: unknown) {
|
|
console.debug(
|
|
`GrepLogic: System grep failed: ${getErrorMessage(
|
|
grepError,
|
|
)}. Falling back...`,
|
|
);
|
|
}
|
|
}
|
|
|
|
// --- Strategy 3: Pure JavaScript Fallback ---
|
|
console.debug(
|
|
'GrepLogic: Falling back to JavaScript grep implementation.',
|
|
);
|
|
strategyUsed = 'javascript fallback';
|
|
const globPattern = glob ? glob : '**/*';
|
|
const ignorePatterns = this.fileExclusions.getGlobExcludes();
|
|
|
|
const filesIterator = globStream(globPattern, {
|
|
cwd: absolutePath,
|
|
dot: true,
|
|
ignore: ignorePatterns,
|
|
absolute: true,
|
|
nodir: true,
|
|
signal: options.signal,
|
|
});
|
|
|
|
const regex = new RegExp(pattern, 'i');
|
|
const allMatches: GrepMatch[] = [];
|
|
|
|
for await (const filePath of filesIterator) {
|
|
const fileAbsolutePath = filePath as string;
|
|
try {
|
|
const content = await fsPromises.readFile(fileAbsolutePath, 'utf8');
|
|
const lines = content.split(/\r?\n/);
|
|
lines.forEach((line, index) => {
|
|
if (regex.test(line)) {
|
|
allMatches.push({
|
|
filePath:
|
|
path.relative(absolutePath, fileAbsolutePath) ||
|
|
path.basename(fileAbsolutePath),
|
|
lineNumber: index + 1,
|
|
line,
|
|
});
|
|
}
|
|
});
|
|
} catch (readError: unknown) {
|
|
// Ignore errors like permission denied or file gone during read
|
|
if (!isNodeError(readError) || readError.code !== 'ENOENT') {
|
|
console.debug(
|
|
`GrepLogic: Could not read/process ${fileAbsolutePath}: ${getErrorMessage(
|
|
readError,
|
|
)}`,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
return allMatches;
|
|
} catch (error: unknown) {
|
|
console.error(
|
|
`GrepLogic: Error in performGrepSearch (Strategy: ${strategyUsed}): ${getErrorMessage(
|
|
error,
|
|
)}`,
|
|
);
|
|
throw error; // Re-throw
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- GrepLogic Class ---
|
|
|
|
/**
|
|
* Implementation of the Grep tool logic (moved from CLI)
|
|
*/
|
|
export class GrepTool extends BaseDeclarativeTool<GrepToolParams, ToolResult> {
|
|
static readonly Name = ToolNames.GREP;
|
|
|
|
constructor(private readonly config: Config) {
|
|
super(
|
|
GrepTool.Name,
|
|
'Grep',
|
|
'A powerful search tool for finding patterns in files\n\n Usage:\n - ALWAYS use Grep for search tasks. NEVER invoke `grep` or `rg` as a Bash command. The Grep tool has been optimized for correct permissions and access.\n - Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+")\n - Filter files with glob parameter (e.g., "*.js", "**/*.tsx")\n - Case-insensitive by default\n - Use Task tool for open-ended searches requiring multiple rounds\n',
|
|
Kind.Search,
|
|
{
|
|
properties: {
|
|
pattern: {
|
|
type: 'string',
|
|
description:
|
|
'The regular expression pattern to search for in file contents',
|
|
},
|
|
glob: {
|
|
type: 'string',
|
|
description:
|
|
'Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}")',
|
|
},
|
|
path: {
|
|
type: 'string',
|
|
description:
|
|
'File or directory to search in. Defaults to current working directory.',
|
|
},
|
|
limit: {
|
|
type: 'number',
|
|
description:
|
|
'Limit output to first N matching lines. Optional - shows all matches if not specified.',
|
|
},
|
|
},
|
|
required: ['pattern'],
|
|
type: 'object',
|
|
},
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Validates the parameters for the tool
|
|
* @param params Parameters to validate
|
|
* @returns An error message string if invalid, null otherwise
|
|
*/
|
|
protected override validateToolParamValues(
|
|
params: GrepToolParams,
|
|
): string | null {
|
|
// Validate pattern is a valid regex
|
|
try {
|
|
new RegExp(params.pattern);
|
|
} catch (error) {
|
|
return `Invalid regular expression pattern: ${params.pattern}. Error: ${getErrorMessage(error)}`;
|
|
}
|
|
|
|
// Only validate path if one is provided
|
|
if (params.path) {
|
|
try {
|
|
resolveAndValidatePath(this.config, params.path);
|
|
} catch (error) {
|
|
return getErrorMessage(error);
|
|
}
|
|
}
|
|
|
|
return null; // Parameters are valid
|
|
}
|
|
|
|
protected createInvocation(
|
|
params: GrepToolParams,
|
|
): ToolInvocation<GrepToolParams, ToolResult> {
|
|
return new GrepToolInvocation(this.config, params);
|
|
}
|
|
}
|