Replace spawn with execFile for memory-safe command execution (#1068)

2025-12-21 17:27:54 +00:00 · 2025-11-20 15:04:00 +08:00
parent a15b84e2a1
commit 442a9aed58
20 changed files with 620 additions and 969 deletions
--- a/packages/core/src/utils/ripgrepUtils.test.ts
+++ b/packages/core/src/utils/ripgrepUtils.test.ts
@@ -4,30 +4,12 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { describe, it, expect, beforeEach, vi, type Mock } from 'vitest';
-import {
-  canUseRipgrep,
-  getRipgrepCommand,
-  getBuiltinRipgrep,
-} from './ripgrepUtils.js';
-import { fileExists } from './fileUtils.js';
+import { describe, it, expect } from 'vitest';
+import { getBuiltinRipgrep } from './ripgrepUtils.js';
 import path from 'node:path';

-// Mock fileUtils
-vi.mock('./fileUtils.js', async (importOriginal) => {
-  const actual = await importOriginal<typeof import('./fileUtils.js')>();
-  return {
-    ...actual,
-    fileExists: vi.fn(),
-  };
-});
-
 describe('ripgrepUtils', () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
-  });
-
-  describe('getBulltinRipgrepPath', () => {
+  describe('getBuiltinRipgrep', () => {
    it('should return path with .exe extension on Windows', () => {
      const originalPlatform = process.platform;
      const originalArch = process.arch;
@@ -150,99 +132,4 @@ describe('ripgrepUtils', () => {
      Object.defineProperty(process, 'arch', { value: originalArch });
    });
  });
-
-  describe('canUseRipgrep', () => {
-    it('should return true if ripgrep binary exists (builtin)', async () => {
-      (fileExists as Mock).mockResolvedValue(true);
-
-      const result = await canUseRipgrep(true);
-
-      expect(result).toBe(true);
-      expect(fileExists).toHaveBeenCalledOnce();
-    });
-
-    it('should return true if ripgrep binary exists (default)', async () => {
-      (fileExists as Mock).mockResolvedValue(true);
-
-      const result = await canUseRipgrep();
-
-      expect(result).toBe(true);
-      expect(fileExists).toHaveBeenCalledOnce();
-    });
-  });
-
-  describe('ensureRipgrepPath', () => {
-    it('should return bundled ripgrep path if binary exists (useBuiltin=true)', async () => {
-      (fileExists as Mock).mockResolvedValue(true);
-
-      const rgPath = await getRipgrepCommand(true);
-
-      expect(rgPath).toBeDefined();
-      expect(rgPath).toContain('rg');
-      expect(rgPath).not.toBe('rg'); // Should be full path, not just 'rg'
-      expect(fileExists).toHaveBeenCalledOnce();
-      expect(fileExists).toHaveBeenCalledWith(rgPath);
-    });
-
-    it('should return bundled ripgrep path if binary exists (default)', async () => {
-      (fileExists as Mock).mockResolvedValue(true);
-
-      const rgPath = await getRipgrepCommand();
-
-      expect(rgPath).toBeDefined();
-      expect(rgPath).toContain('rg');
-      expect(fileExists).toHaveBeenCalledOnce();
-    });
-
-    it('should fall back to system rg if bundled binary does not exist', async () => {
-      (fileExists as Mock).mockResolvedValue(false);
-      // When useBuiltin is true but bundled binary doesn't exist,
-      // it should fall back to checking system rg
-      // The test result depends on whether system rg is actually available
-
-      const rgPath = await getRipgrepCommand(true);
-
-      expect(fileExists).toHaveBeenCalledOnce();
-      // If system rg is available, it should return 'rg' (or 'rg.exe' on Windows)
-      // This test will pass if system ripgrep is installed
-      expect(rgPath).toBeDefined();
-    });
-
-    it('should use system rg when useBuiltin=false', async () => {
-      // When useBuiltin is false, should skip bundled check and go straight to system rg
-      const rgPath = await getRipgrepCommand(false);
-
-      // Should not check for bundled binary
-      expect(fileExists).not.toHaveBeenCalled();
-      // If system rg is available, it should return 'rg' (or 'rg.exe' on Windows)
-      expect(rgPath).toBeDefined();
-    });
-
-    it('should throw error if neither bundled nor system ripgrep is available', async () => {
-      // This test only makes sense in an environment where system rg is not installed
-      // We'll skip this test in CI/local environments where rg might be available
-      // Instead, we test the error message format
-      const originalPlatform = process.platform;
-
-      // Use an unsupported platform to trigger the error path
-      Object.defineProperty(process, 'platform', { value: 'freebsd' });
-
-      try {
-        await getRipgrepCommand();
-        // If we get here without error, system rg was available, which is fine
-      } catch (error) {
-        expect(error).toBeInstanceOf(Error);
-        const errorMessage = (error as Error).message;
-        // Should contain helpful error information
-        expect(
-          errorMessage.includes('Ripgrep binary not found') ||
-            errorMessage.includes('Failed to locate ripgrep') ||
-            errorMessage.includes('Unsupported platform'),
-        ).toBe(true);
-      }
-
-      // Restore original value
-      Object.defineProperty(process, 'platform', { value: originalPlatform });
-    });
-  });
 });
--- a/packages/core/src/utils/ripgrepUtils.ts
+++ b/packages/core/src/utils/ripgrepUtils.ts
@@ -6,7 +6,53 @@

 import path from 'node:path';
 import { fileURLToPath } from 'node:url';
+import { execFile } from 'node:child_process';
 import { fileExists } from './fileUtils.js';
+import { execCommand, isCommandAvailable } from './shell-utils.js';
+
+const RIPGREP_COMMAND = 'rg';
+const RIPGREP_BUFFER_LIMIT = 20_000_000; // Keep buffers aligned with the original bundle.
+const RIPGREP_TEST_TIMEOUT_MS = 5_000;
+const RIPGREP_RUN_TIMEOUT_MS = 10_000;
+const RIPGREP_WSL_TIMEOUT_MS = 60_000;
+
+type RipgrepMode = 'builtin' | 'system';
+
+interface RipgrepSelection {
+  mode: RipgrepMode;
+  command: string;
+}
+
+interface RipgrepHealth {
+  working: boolean;
+  lastTested: number;
+  selection: RipgrepSelection;
+}
+
+export interface RipgrepRunResult {
+  /**
+   * The stdout output from ripgrep
+   */
+  stdout: string;
+  /**
+   * Whether the results were truncated due to buffer overflow or signal termination
+   */
+  truncated: boolean;
+  /**
+   * Any error that occurred during execution (non-fatal errors like no matches won't populate this)
+   */
+  error?: Error;
+}
+
+let cachedSelection: RipgrepSelection | null = null;
+let cachedHealth: RipgrepHealth | null = null;
+let macSigningAttempted = false;
+
+function wslTimeout(): number {
+  return process.platform === 'linux' && process.env['WSL_INTEROP']
+    ? RIPGREP_WSL_TIMEOUT_MS
+    : RIPGREP_RUN_TIMEOUT_MS;
+}

 // Get the directory of the current module
 const __filename = fileURLToPath(import.meta.url);
@@ -88,59 +134,201 @@ export function getBuiltinRipgrep(): string | null {
  return vendorPath;
 }

-/**
- * Checks if system ripgrep is available and returns the command to use
- * @returns The ripgrep command ('rg' or 'rg.exe') if available, or null if not found
- */
-export async function getSystemRipgrep(): Promise<string | null> {
-  try {
-    const { spawn } = await import('node:child_process');
-    const rgCommand = process.platform === 'win32' ? 'rg.exe' : 'rg';
-    const isAvailable = await new Promise<boolean>((resolve) => {
-      const proc = spawn(rgCommand, ['--version']);
-      proc.on('error', () => resolve(false));
-      proc.on('exit', (code) => resolve(code === 0));
-    });
-    return isAvailable ? rgCommand : null;
-  } catch (_error) {
-    return null;
-  }
-}
-
 /**
 * Checks if ripgrep binary exists and returns its path
 * @param useBuiltin If true, tries bundled ripgrep first, then falls back to system ripgrep.
 *                   If false, only checks for system ripgrep.
 * @returns The path to ripgrep binary ('rg' or 'rg.exe' for system ripgrep, or full path for bundled), or null if not available
+ * @throws {Error} If an error occurs while resolving the ripgrep binary.
 */
-export async function getRipgrepCommand(
+export async function resolveRipgrep(
  useBuiltin: boolean = true,
-): Promise<string | null> {
-  try {
-    if (useBuiltin) {
-      // Try bundled ripgrep first
-      const rgPath = getBuiltinRipgrep();
-      if (rgPath && (await fileExists(rgPath))) {
-        return rgPath;
-      }
-      // Fallback to system rg if bundled binary is not available
-    }
+): Promise<RipgrepSelection | null> {
+  if (cachedSelection) return cachedSelection;

-    // Check for system ripgrep
-    return await getSystemRipgrep();
-  } catch (_error) {
-    return null;
+  if (useBuiltin) {
+    // Try bundled ripgrep first
+    const rgPath = getBuiltinRipgrep();
+    if (rgPath && (await fileExists(rgPath))) {
+      cachedSelection = { mode: 'builtin', command: rgPath };
+      return cachedSelection;
+    }
+    // Fallback to system rg if bundled binary is not available
  }
+
+  const { available, error } = isCommandAvailable(RIPGREP_COMMAND);
+  if (available) {
+    cachedSelection = { mode: 'system', command: RIPGREP_COMMAND };
+    return cachedSelection;
+  }
+
+  if (error) {
+    throw error;
+  }
+
+  return null;
+}
+
+/**
+ * Ensures that ripgrep is healthy by checking its version.
+ * @param selection The ripgrep selection to check.
+ * @throws {Error} If ripgrep is not found or is not healthy.
+ */
+export async function ensureRipgrepHealthy(
+  selection: RipgrepSelection,
+): Promise<void> {
+  if (
+    cachedHealth &&
+    cachedHealth.selection.command === selection.command &&
+    cachedHealth.working
+  )
+    return;
+
+  try {
+    const { stdout, code } = await execCommand(
+      selection.command,
+      ['--version'],
+      {
+        timeout: RIPGREP_TEST_TIMEOUT_MS,
+      },
+    );
+    const working = code === 0 && stdout.startsWith('ripgrep');
+    cachedHealth = { working, lastTested: Date.now(), selection };
+  } catch (error) {
+    cachedHealth = { working: false, lastTested: Date.now(), selection };
+    throw error;
+  }
+}
+
+export async function ensureMacBinarySigned(
+  selection: RipgrepSelection,
+): Promise<void> {
+  if (process.platform !== 'darwin') return;
+  if (macSigningAttempted) return;
+  macSigningAttempted = true;
+
+  if (selection.mode !== 'builtin') return;
+  const binaryPath = selection.command;
+
+  const inspect = await execCommand('codesign', ['-vv', '-d', binaryPath], {
+    preserveOutputOnError: false,
+  });
+  const alreadySigned =
+    inspect.stdout
+      ?.split('\n')
+      .some((line) => line.includes('linker-signed')) ?? false;
+  if (!alreadySigned) return;
+
+  await execCommand('codesign', [
+    '--sign',
+    '-',
+    '--force',
+    '--preserve-metadata=entitlements,requirements,flags,runtime',
+    binaryPath,
+  ]);
+  await execCommand('xattr', ['-d', 'com.apple.quarantine', binaryPath]);
 }

 /**
 * Checks if ripgrep binary is available
 * @param useBuiltin If true, tries bundled ripgrep first, then falls back to system ripgrep.
 *                   If false, only checks for system ripgrep.
+ * @returns True if ripgrep is available, false otherwise.
+ * @throws {Error} If an error occurs while resolving the ripgrep binary.
 */
 export async function canUseRipgrep(
  useBuiltin: boolean = true,
 ): Promise<boolean> {
-  const rgPath = await getRipgrepCommand(useBuiltin);
-  return rgPath !== null;
+  const selection = await resolveRipgrep(useBuiltin);
+  if (!selection) {
+    return false;
+  }
+  await ensureRipgrepHealthy(selection);
+  return true;
+}
+
+/**
+ * Runs ripgrep with the provided arguments
+ * @param args The arguments to pass to ripgrep
+ * @param signal The signal to abort the ripgrep process
+ * @returns The result of running ripgrep
+ * @throws {Error} If an error occurs while running ripgrep.
+ */
+export async function runRipgrep(
+  args: string[],
+  signal?: AbortSignal,
+): Promise<RipgrepRunResult> {
+  const selection = await resolveRipgrep();
+  if (!selection) {
+    throw new Error('ripgrep not found.');
+  }
+  await ensureRipgrepHealthy(selection);
+
+  return new Promise<RipgrepRunResult>((resolve) => {
+    const child = execFile(
+      selection.command,
+      args,
+      {
+        maxBuffer: RIPGREP_BUFFER_LIMIT,
+        timeout: wslTimeout(),
+        signal,
+      },
+      (error, stdout = '', stderr = '') => {
+        if (!error) {
+          // Success case
+          resolve({
+            stdout,
+            truncated: false,
+          });
+          return;
+        }
+
+        // Exit code 1 = no matches found (not an error)
+        // The error.code from execFile can be string | number | undefined | null
+        const errorCode = (
+          error as Error & { code?: string | number | undefined | null }
+        ).code;
+        if (errorCode === 1) {
+          resolve({ stdout: '', truncated: false });
+          return;
+        }
+
+        // Detect various error conditions
+        const wasKilled =
+          error.signal === 'SIGTERM' || error.name === 'AbortError';
+        const overflow = errorCode === 'ERR_CHILD_PROCESS_STDIO_MAXBUFFER';
+        const syntaxError = errorCode === 2;
+
+        const truncated = wasKilled || overflow;
+        let partialOutput = stdout;
+
+        // If killed or overflow with partial output, remove the last potentially incomplete line
+        if (truncated && partialOutput.length > 0) {
+          const lines = partialOutput.split('\n');
+          if (lines.length > 0) {
+            lines.pop();
+            partialOutput = lines.join('\n');
+          }
+        }
+
+        // Log warnings for abnormal exits (except syntax errors)
+        if (!syntaxError && truncated) {
+          console.warn(
+            `ripgrep exited abnormally (signal=${error.signal} code=${error.code}) with stderr:\n${stderr.trim() || '(empty)'}`,
+          );
+        }
+
+        resolve({
+          stdout: partialOutput,
+          truncated,
+          error: error instanceof Error ? error : undefined,
+        });
+      },
+    );
+
+    // Handle spawn errors
+    child.on('error', (err) =>
+      resolve({ stdout: '', truncated: false, error: err }),
+    );
+  });
 }
--- a/packages/core/src/utils/shell-utils.ts
+++ b/packages/core/src/utils/shell-utils.ts
@@ -10,7 +10,12 @@ import os from 'node:os';
 import { quote } from 'shell-quote';
 import { doesToolInvocationMatch } from './tool-utils.js';
 import { isShellCommandReadOnly } from './shellReadOnlyChecker.js';
-import { spawn, type SpawnOptionsWithoutStdio } from 'node:child_process';
+import {
+  execFile,
+  execFileSync,
+  type ExecFileOptions,
+} from 'node:child_process';
+import { accessSync, constants as fsConstants } from 'node:fs';

 const SHELL_TOOL_NAMES = ['run_shell_command', 'ShellTool'];

@@ -455,46 +460,101 @@ export function checkCommandPermissions(
 }

 /**
- * Determines whether a given shell command is allowed to execute based on
- * the tool's configuration including allowlists and blocklists.
+ * Executes a command with the given arguments without using a shell.
 *
- * This function operates in "default allow" mode. It is a wrapper around
- * `checkCommandPermissions`.
+ * This is a wrapper around Node.js's `execFile`, which spawns a process
+ * directly without invoking a shell, making it safer than `exec`.
+ * It's suitable for short-running commands with limited output.
 *
- * @param command The shell command string to validate.
- * @param config The application configuration.
- * @returns An object with 'allowed' boolean and optional 'reason' string if not allowed.
+ * @param command The command to execute (e.g., 'git', 'osascript').
+ * @param args Array of arguments to pass to the command.
+ * @param options Optional spawn options including:
+ *   - preserveOutputOnError: If false (default), rejects on error.
+ *                           If true, resolves with output and error code.
+ *   - Other standard spawn options (e.g., cwd, env).
+ * @returns A promise that resolves with stdout, stderr strings, and exit code.
+ * @throws Rejects with an error if the command fails (unless preserveOutputOnError is true).
 */
-export const spawnAsync = (
+export function execCommand(
  command: string,
  args: string[],
-  options?: SpawnOptionsWithoutStdio,
-): Promise<{ stdout: string; stderr: string }> =>
-  new Promise((resolve, reject) => {
-    const child = spawn(command, args, options);
-    let stdout = '';
-    let stderr = '';
-
-    child.stdout.on('data', (data) => {
-      stdout += data.toString();
-    });
-
-    child.stderr.on('data', (data) => {
-      stderr += data.toString();
-    });
-
-    child.on('close', (code) => {
-      if (code === 0) {
-        resolve({ stdout, stderr });
-      } else {
-        reject(new Error(`Command failed with exit code ${code}:\n${stderr}`));
-      }
-    });
-
-    child.on('error', (err) => {
-      reject(err);
-    });
+  options: { preserveOutputOnError?: boolean } & ExecFileOptions = {},
+): Promise<{ stdout: string; stderr: string; code: number }> {
+  return new Promise((resolve, reject) => {
+    const child = execFile(
+      command,
+      args,
+      { encoding: 'utf8', ...options },
+      (error, stdout, stderr) => {
+        if (error) {
+          if (!options.preserveOutputOnError) {
+            reject(error);
+          } else {
+            resolve({
+              stdout: stdout ?? '',
+              stderr: stderr ?? '',
+              code: typeof error.code === 'number' ? error.code : 1,
+            });
+          }
+          return;
+        }
+        resolve({ stdout: stdout ?? '', stderr: stderr ?? '', code: 0 });
+      },
+    );
+    child.on('error', reject);
  });
+}
+
+/**
+ * Resolves the path of a command in the system's PATH.
+ * @param {string} command The command name (e.g., 'git', 'grep').
+ * @returns {path: string | null; error?: Error} The path of the command, or null if it is not found and any error that occurred.
+ */
+export function resolveCommandPath(command: string): {
+  path: string | null;
+  error?: Error;
+} {
+  try {
+    const isWin = process.platform === 'win32';
+
+    const checkCommand = isWin ? 'where' : 'command';
+    const checkArgs = isWin ? [command] : ['-v', command];
+
+    let result: string | null = null;
+    try {
+      result = execFileSync(checkCommand, checkArgs, {
+        encoding: 'utf8',
+        shell: isWin,
+      }).trim();
+    } catch {
+      console.warn(`Command ${checkCommand} not found`);
+    }
+
+    if (!result) return { path: null, error: undefined };
+    if (!isWin) {
+      accessSync(result, fsConstants.X_OK);
+    }
+    return { path: result, error: undefined };
+  } catch (error) {
+    return {
+      path: null,
+      error: error instanceof Error ? error : new Error(String(error)),
+    };
+  }
+}
+
+/**
+ * Checks if a command is available in the system's PATH.
+ * @param {string} command The command name (e.g., 'git', 'grep').
+ * @returns {available: boolean; error?: Error} The availability of the command and any error that occurred.
+ */
+export function isCommandAvailable(command: string): {
+  available: boolean;
+  error?: Error;
+} {
+  const { path, error } = resolveCommandPath(command);
+  return { available: path !== null, error };
+}

 export function isCommandAllowed(
  command: string,