Replace spawn with execFile for memory-safe command execution (#1068)

This commit is contained in:
tanzhenxin
2025-11-20 15:04:00 +08:00
committed by GitHub
parent a15b84e2a1
commit 442a9aed58
20 changed files with 620 additions and 969 deletions

View File

@@ -4,30 +4,12 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, vi, type Mock } from 'vitest';
import {
canUseRipgrep,
getRipgrepCommand,
getBuiltinRipgrep,
} from './ripgrepUtils.js';
import { fileExists } from './fileUtils.js';
import { describe, it, expect } from 'vitest';
import { getBuiltinRipgrep } from './ripgrepUtils.js';
import path from 'node:path';
// Mock fileUtils
vi.mock('./fileUtils.js', async (importOriginal) => {
const actual = await importOriginal<typeof import('./fileUtils.js')>();
return {
...actual,
fileExists: vi.fn(),
};
});
describe('ripgrepUtils', () => {
beforeEach(() => {
vi.clearAllMocks();
});
describe('getBulltinRipgrepPath', () => {
describe('getBuiltinRipgrep', () => {
it('should return path with .exe extension on Windows', () => {
const originalPlatform = process.platform;
const originalArch = process.arch;
@@ -150,99 +132,4 @@ describe('ripgrepUtils', () => {
Object.defineProperty(process, 'arch', { value: originalArch });
});
});
describe('canUseRipgrep', () => {
it('should return true if ripgrep binary exists (builtin)', async () => {
(fileExists as Mock).mockResolvedValue(true);
const result = await canUseRipgrep(true);
expect(result).toBe(true);
expect(fileExists).toHaveBeenCalledOnce();
});
it('should return true if ripgrep binary exists (default)', async () => {
(fileExists as Mock).mockResolvedValue(true);
const result = await canUseRipgrep();
expect(result).toBe(true);
expect(fileExists).toHaveBeenCalledOnce();
});
});
describe('ensureRipgrepPath', () => {
it('should return bundled ripgrep path if binary exists (useBuiltin=true)', async () => {
(fileExists as Mock).mockResolvedValue(true);
const rgPath = await getRipgrepCommand(true);
expect(rgPath).toBeDefined();
expect(rgPath).toContain('rg');
expect(rgPath).not.toBe('rg'); // Should be full path, not just 'rg'
expect(fileExists).toHaveBeenCalledOnce();
expect(fileExists).toHaveBeenCalledWith(rgPath);
});
it('should return bundled ripgrep path if binary exists (default)', async () => {
(fileExists as Mock).mockResolvedValue(true);
const rgPath = await getRipgrepCommand();
expect(rgPath).toBeDefined();
expect(rgPath).toContain('rg');
expect(fileExists).toHaveBeenCalledOnce();
});
it('should fall back to system rg if bundled binary does not exist', async () => {
(fileExists as Mock).mockResolvedValue(false);
// When useBuiltin is true but bundled binary doesn't exist,
// it should fall back to checking system rg
// The test result depends on whether system rg is actually available
const rgPath = await getRipgrepCommand(true);
expect(fileExists).toHaveBeenCalledOnce();
// If system rg is available, it should return 'rg' (or 'rg.exe' on Windows)
// This test will pass if system ripgrep is installed
expect(rgPath).toBeDefined();
});
it('should use system rg when useBuiltin=false', async () => {
// When useBuiltin is false, should skip bundled check and go straight to system rg
const rgPath = await getRipgrepCommand(false);
// Should not check for bundled binary
expect(fileExists).not.toHaveBeenCalled();
// If system rg is available, it should return 'rg' (or 'rg.exe' on Windows)
expect(rgPath).toBeDefined();
});
it('should throw error if neither bundled nor system ripgrep is available', async () => {
// This test only makes sense in an environment where system rg is not installed
// We'll skip this test in CI/local environments where rg might be available
// Instead, we test the error message format
const originalPlatform = process.platform;
// Use an unsupported platform to trigger the error path
Object.defineProperty(process, 'platform', { value: 'freebsd' });
try {
await getRipgrepCommand();
// If we get here without error, system rg was available, which is fine
} catch (error) {
expect(error).toBeInstanceOf(Error);
const errorMessage = (error as Error).message;
// Should contain helpful error information
expect(
errorMessage.includes('Ripgrep binary not found') ||
errorMessage.includes('Failed to locate ripgrep') ||
errorMessage.includes('Unsupported platform'),
).toBe(true);
}
// Restore original value
Object.defineProperty(process, 'platform', { value: originalPlatform });
});
});
});

View File

@@ -6,7 +6,53 @@
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { execFile } from 'node:child_process';
import { fileExists } from './fileUtils.js';
import { execCommand, isCommandAvailable } from './shell-utils.js';
const RIPGREP_COMMAND = 'rg';
const RIPGREP_BUFFER_LIMIT = 20_000_000; // Keep buffers aligned with the original bundle.
const RIPGREP_TEST_TIMEOUT_MS = 5_000;
const RIPGREP_RUN_TIMEOUT_MS = 10_000;
const RIPGREP_WSL_TIMEOUT_MS = 60_000;
type RipgrepMode = 'builtin' | 'system';
interface RipgrepSelection {
mode: RipgrepMode;
command: string;
}
interface RipgrepHealth {
working: boolean;
lastTested: number;
selection: RipgrepSelection;
}
export interface RipgrepRunResult {
/**
* The stdout output from ripgrep
*/
stdout: string;
/**
* Whether the results were truncated due to buffer overflow or signal termination
*/
truncated: boolean;
/**
* Any error that occurred during execution (non-fatal errors like no matches won't populate this)
*/
error?: Error;
}
let cachedSelection: RipgrepSelection | null = null;
let cachedHealth: RipgrepHealth | null = null;
let macSigningAttempted = false;
function wslTimeout(): number {
return process.platform === 'linux' && process.env['WSL_INTEROP']
? RIPGREP_WSL_TIMEOUT_MS
: RIPGREP_RUN_TIMEOUT_MS;
}
// Get the directory of the current module
const __filename = fileURLToPath(import.meta.url);
@@ -88,59 +134,201 @@ export function getBuiltinRipgrep(): string | null {
return vendorPath;
}
/**
* Checks if system ripgrep is available and returns the command to use
* @returns The ripgrep command ('rg' or 'rg.exe') if available, or null if not found
*/
export async function getSystemRipgrep(): Promise<string | null> {
try {
const { spawn } = await import('node:child_process');
const rgCommand = process.platform === 'win32' ? 'rg.exe' : 'rg';
const isAvailable = await new Promise<boolean>((resolve) => {
const proc = spawn(rgCommand, ['--version']);
proc.on('error', () => resolve(false));
proc.on('exit', (code) => resolve(code === 0));
});
return isAvailable ? rgCommand : null;
} catch (_error) {
return null;
}
}
/**
* Checks if ripgrep binary exists and returns its path
* @param useBuiltin If true, tries bundled ripgrep first, then falls back to system ripgrep.
* If false, only checks for system ripgrep.
* @returns The path to ripgrep binary ('rg' or 'rg.exe' for system ripgrep, or full path for bundled), or null if not available
* @throws {Error} If an error occurs while resolving the ripgrep binary.
*/
export async function getRipgrepCommand(
export async function resolveRipgrep(
useBuiltin: boolean = true,
): Promise<string | null> {
try {
if (useBuiltin) {
// Try bundled ripgrep first
const rgPath = getBuiltinRipgrep();
if (rgPath && (await fileExists(rgPath))) {
return rgPath;
}
// Fallback to system rg if bundled binary is not available
}
): Promise<RipgrepSelection | null> {
if (cachedSelection) return cachedSelection;
// Check for system ripgrep
return await getSystemRipgrep();
} catch (_error) {
return null;
if (useBuiltin) {
// Try bundled ripgrep first
const rgPath = getBuiltinRipgrep();
if (rgPath && (await fileExists(rgPath))) {
cachedSelection = { mode: 'builtin', command: rgPath };
return cachedSelection;
}
// Fallback to system rg if bundled binary is not available
}
const { available, error } = isCommandAvailable(RIPGREP_COMMAND);
if (available) {
cachedSelection = { mode: 'system', command: RIPGREP_COMMAND };
return cachedSelection;
}
if (error) {
throw error;
}
return null;
}
/**
* Ensures that ripgrep is healthy by checking its version.
* @param selection The ripgrep selection to check.
* @throws {Error} If ripgrep is not found or is not healthy.
*/
export async function ensureRipgrepHealthy(
selection: RipgrepSelection,
): Promise<void> {
if (
cachedHealth &&
cachedHealth.selection.command === selection.command &&
cachedHealth.working
)
return;
try {
const { stdout, code } = await execCommand(
selection.command,
['--version'],
{
timeout: RIPGREP_TEST_TIMEOUT_MS,
},
);
const working = code === 0 && stdout.startsWith('ripgrep');
cachedHealth = { working, lastTested: Date.now(), selection };
} catch (error) {
cachedHealth = { working: false, lastTested: Date.now(), selection };
throw error;
}
}
export async function ensureMacBinarySigned(
selection: RipgrepSelection,
): Promise<void> {
if (process.platform !== 'darwin') return;
if (macSigningAttempted) return;
macSigningAttempted = true;
if (selection.mode !== 'builtin') return;
const binaryPath = selection.command;
const inspect = await execCommand('codesign', ['-vv', '-d', binaryPath], {
preserveOutputOnError: false,
});
const alreadySigned =
inspect.stdout
?.split('\n')
.some((line) => line.includes('linker-signed')) ?? false;
if (!alreadySigned) return;
await execCommand('codesign', [
'--sign',
'-',
'--force',
'--preserve-metadata=entitlements,requirements,flags,runtime',
binaryPath,
]);
await execCommand('xattr', ['-d', 'com.apple.quarantine', binaryPath]);
}
/**
* Checks if ripgrep binary is available
* @param useBuiltin If true, tries bundled ripgrep first, then falls back to system ripgrep.
* If false, only checks for system ripgrep.
* @returns True if ripgrep is available, false otherwise.
* @throws {Error} If an error occurs while resolving the ripgrep binary.
*/
export async function canUseRipgrep(
useBuiltin: boolean = true,
): Promise<boolean> {
const rgPath = await getRipgrepCommand(useBuiltin);
return rgPath !== null;
const selection = await resolveRipgrep(useBuiltin);
if (!selection) {
return false;
}
await ensureRipgrepHealthy(selection);
return true;
}
/**
* Runs ripgrep with the provided arguments
* @param args The arguments to pass to ripgrep
* @param signal The signal to abort the ripgrep process
* @returns The result of running ripgrep
* @throws {Error} If an error occurs while running ripgrep.
*/
export async function runRipgrep(
args: string[],
signal?: AbortSignal,
): Promise<RipgrepRunResult> {
const selection = await resolveRipgrep();
if (!selection) {
throw new Error('ripgrep not found.');
}
await ensureRipgrepHealthy(selection);
return new Promise<RipgrepRunResult>((resolve) => {
const child = execFile(
selection.command,
args,
{
maxBuffer: RIPGREP_BUFFER_LIMIT,
timeout: wslTimeout(),
signal,
},
(error, stdout = '', stderr = '') => {
if (!error) {
// Success case
resolve({
stdout,
truncated: false,
});
return;
}
// Exit code 1 = no matches found (not an error)
// The error.code from execFile can be string | number | undefined | null
const errorCode = (
error as Error & { code?: string | number | undefined | null }
).code;
if (errorCode === 1) {
resolve({ stdout: '', truncated: false });
return;
}
// Detect various error conditions
const wasKilled =
error.signal === 'SIGTERM' || error.name === 'AbortError';
const overflow = errorCode === 'ERR_CHILD_PROCESS_STDIO_MAXBUFFER';
const syntaxError = errorCode === 2;
const truncated = wasKilled || overflow;
let partialOutput = stdout;
// If killed or overflow with partial output, remove the last potentially incomplete line
if (truncated && partialOutput.length > 0) {
const lines = partialOutput.split('\n');
if (lines.length > 0) {
lines.pop();
partialOutput = lines.join('\n');
}
}
// Log warnings for abnormal exits (except syntax errors)
if (!syntaxError && truncated) {
console.warn(
`ripgrep exited abnormally (signal=${error.signal} code=${error.code}) with stderr:\n${stderr.trim() || '(empty)'}`,
);
}
resolve({
stdout: partialOutput,
truncated,
error: error instanceof Error ? error : undefined,
});
},
);
// Handle spawn errors
child.on('error', (err) =>
resolve({ stdout: '', truncated: false, error: err }),
);
});
}

View File

@@ -10,7 +10,12 @@ import os from 'node:os';
import { quote } from 'shell-quote';
import { doesToolInvocationMatch } from './tool-utils.js';
import { isShellCommandReadOnly } from './shellReadOnlyChecker.js';
import { spawn, type SpawnOptionsWithoutStdio } from 'node:child_process';
import {
execFile,
execFileSync,
type ExecFileOptions,
} from 'node:child_process';
import { accessSync, constants as fsConstants } from 'node:fs';
const SHELL_TOOL_NAMES = ['run_shell_command', 'ShellTool'];
@@ -455,46 +460,101 @@ export function checkCommandPermissions(
}
/**
* Determines whether a given shell command is allowed to execute based on
* the tool's configuration including allowlists and blocklists.
* Executes a command with the given arguments without using a shell.
*
* This function operates in "default allow" mode. It is a wrapper around
* `checkCommandPermissions`.
* This is a wrapper around Node.js's `execFile`, which spawns a process
* directly without invoking a shell, making it safer than `exec`.
* It's suitable for short-running commands with limited output.
*
* @param command The shell command string to validate.
* @param config The application configuration.
* @returns An object with 'allowed' boolean and optional 'reason' string if not allowed.
* @param command The command to execute (e.g., 'git', 'osascript').
* @param args Array of arguments to pass to the command.
* @param options Optional spawn options including:
* - preserveOutputOnError: If false (default), rejects on error.
* If true, resolves with output and error code.
* - Other standard spawn options (e.g., cwd, env).
* @returns A promise that resolves with stdout, stderr strings, and exit code.
* @throws Rejects with an error if the command fails (unless preserveOutputOnError is true).
*/
export const spawnAsync = (
export function execCommand(
command: string,
args: string[],
options?: SpawnOptionsWithoutStdio,
): Promise<{ stdout: string; stderr: string }> =>
new Promise((resolve, reject) => {
const child = spawn(command, args, options);
let stdout = '';
let stderr = '';
child.stdout.on('data', (data) => {
stdout += data.toString();
});
child.stderr.on('data', (data) => {
stderr += data.toString();
});
child.on('close', (code) => {
if (code === 0) {
resolve({ stdout, stderr });
} else {
reject(new Error(`Command failed with exit code ${code}:\n${stderr}`));
}
});
child.on('error', (err) => {
reject(err);
});
options: { preserveOutputOnError?: boolean } & ExecFileOptions = {},
): Promise<{ stdout: string; stderr: string; code: number }> {
return new Promise((resolve, reject) => {
const child = execFile(
command,
args,
{ encoding: 'utf8', ...options },
(error, stdout, stderr) => {
if (error) {
if (!options.preserveOutputOnError) {
reject(error);
} else {
resolve({
stdout: stdout ?? '',
stderr: stderr ?? '',
code: typeof error.code === 'number' ? error.code : 1,
});
}
return;
}
resolve({ stdout: stdout ?? '', stderr: stderr ?? '', code: 0 });
},
);
child.on('error', reject);
});
}
/**
* Resolves the path of a command in the system's PATH.
* @param {string} command The command name (e.g., 'git', 'grep').
* @returns {path: string | null; error?: Error} The path of the command, or null if it is not found and any error that occurred.
*/
export function resolveCommandPath(command: string): {
path: string | null;
error?: Error;
} {
try {
const isWin = process.platform === 'win32';
const checkCommand = isWin ? 'where' : 'command';
const checkArgs = isWin ? [command] : ['-v', command];
let result: string | null = null;
try {
result = execFileSync(checkCommand, checkArgs, {
encoding: 'utf8',
shell: isWin,
}).trim();
} catch {
console.warn(`Command ${checkCommand} not found`);
}
if (!result) return { path: null, error: undefined };
if (!isWin) {
accessSync(result, fsConstants.X_OK);
}
return { path: result, error: undefined };
} catch (error) {
return {
path: null,
error: error instanceof Error ? error : new Error(String(error)),
};
}
}
/**
* Checks if a command is available in the system's PATH.
* @param {string} command The command name (e.g., 'git', 'grep').
* @returns {available: boolean; error?: Error} The availability of the command and any error that occurred.
*/
export function isCommandAvailable(command: string): {
available: boolean;
error?: Error;
} {
const { path, error } = resolveCommandPath(command);
return { available: path !== null, error };
}
export function isCommandAllowed(
command: string,