feat(core): refactor shell execution to use node-pty (#6088)

2025-12-20 16:57:46 +00:00 · 2025-08-14 13:40:12 -07:00
parent 48af0456c1
commit 980091cbc2
16 changed files with 453 additions and 409 deletions
--- a/packages/core/src/tools/shell.test.ts
+++ b/packages/core/src/tools/shell.test.ts
@@ -66,7 +66,6 @@ describe('ShellTool', () => {
      Buffer.from('abcdef', 'hex'),
    );

-    // Capture the output callback to simulate streaming events from the service
    mockShellExecutionService.mockImplementation((_cmd, _cwd, callback) => {
      mockShellOutputCallback = callback;
      return {
@@ -123,8 +122,6 @@ describe('ShellTool', () => {
      const fullResult: ShellExecutionResult = {
        rawOutput: Buffer.from(result.output || ''),
        output: 'Success',
-        stdout: 'Success',
-        stderr: '',
        exitCode: 0,
        signal: null,
        error: null,
@@ -141,7 +138,7 @@ describe('ShellTool', () => {
      resolveShellExecution({ pid: 54321 });

      vi.mocked(fs.existsSync).mockReturnValue(true);
-      vi.mocked(fs.readFileSync).mockReturnValue('54321\n54322\n'); // Service PID and background PID
+      vi.mocked(fs.readFileSync).mockReturnValue('54321\n54322\n');

      const result = await promise;

@@ -152,6 +149,8 @@ describe('ShellTool', () => {
        expect.any(String),
        expect.any(Function),
        mockAbortSignal,
+        undefined,
+        undefined,
      );
      expect(result.llmContent).toContain('Background PIDs: 54322');
      expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
@@ -164,8 +163,6 @@ describe('ShellTool', () => {
      resolveShellExecution({
        rawOutput: Buffer.from(''),
        output: '',
-        stdout: '',
-        stderr: '',
        exitCode: 0,
        signal: null,
        error: null,
@@ -178,6 +175,8 @@ describe('ShellTool', () => {
        expect.any(String),
        expect.any(Function),
        mockAbortSignal,
+        undefined,
+        undefined,
      );
    });

@@ -189,16 +188,13 @@ describe('ShellTool', () => {
        error,
        exitCode: 1,
        output: 'err',
-        stderr: 'err',
        rawOutput: Buffer.from('err'),
-        stdout: '',
        signal: null,
        aborted: false,
        pid: 12345,
      });

      const result = await promise;
-      // The final llmContent should contain the user's command, not the wrapper
      expect(result.llmContent).toContain('Error: wrapped command failed');
      expect(result.llmContent).not.toContain('pgrep');
    });
@@ -231,8 +227,6 @@ describe('ShellTool', () => {
      resolveExecutionPromise({
        output: 'long output',
        rawOutput: Buffer.from('long output'),
-        stdout: 'long output',
-        stderr: '',
        exitCode: 0,
        signal: null,
        error: null,
@@ -257,7 +251,7 @@ describe('ShellTool', () => {
      mockShellExecutionService.mockImplementation(() => {
        throw error;
      });
-      vi.mocked(fs.existsSync).mockReturnValue(true); // Pretend the file exists
+      vi.mocked(fs.existsSync).mockReturnValue(true);

      const invocation = shellTool.build({ command: 'a-command' });
      await expect(invocation.execute(mockAbortSignal)).rejects.toThrow(error);
@@ -280,33 +274,26 @@ describe('ShellTool', () => {
        const invocation = shellTool.build({ command: 'stream' });
        const promise = invocation.execute(mockAbortSignal, updateOutputMock);

-        // First chunk, should be throttled.
        mockShellOutputCallback({
          type: 'data',
-          stream: 'stdout',
          chunk: 'hello ',
        });
        expect(updateOutputMock).not.toHaveBeenCalled();

-        // Advance time past the throttle interval.
        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);

-        // Send a second chunk. THIS event triggers the update with the CUMULATIVE content.
        mockShellOutputCallback({
          type: 'data',
-          stream: 'stderr',
-          chunk: 'world',
+          chunk: 'hello world',
        });

        // It should have been called once now with the combined output.
        expect(updateOutputMock).toHaveBeenCalledOnce();
-        expect(updateOutputMock).toHaveBeenCalledWith('hello \nworld');
+        expect(updateOutputMock).toHaveBeenCalledWith('hello world');

        resolveExecutionPromise({
          rawOutput: Buffer.from(''),
          output: '',
-          stdout: '',
-          stderr: '',
          exitCode: 0,
          signal: null,
          error: null,
@@ -332,16 +319,13 @@ describe('ShellTool', () => {
        });
        expect(updateOutputMock).toHaveBeenCalledOnce();

-        // Advance time past the throttle interval.
        await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);

-        // Send a SECOND progress event. This one will trigger the flush.
        mockShellOutputCallback({
          type: 'binary_progress',
          bytesReceived: 2048,
        });

-        // Now it should be called a second time with the latest progress.
        expect(updateOutputMock).toHaveBeenCalledTimes(2);
        expect(updateOutputMock).toHaveBeenLastCalledWith(
          '[Receiving binary output... 2.0 KB received]',
@@ -350,8 +334,6 @@ describe('ShellTool', () => {
        resolveExecutionPromise({
          rawOutput: Buffer.from(''),
          output: '',
-          stdout: '',
-          stderr: '',
          exitCode: 0,
          signal: null,
          error: null,
--- a/packages/core/src/tools/shell.ts
+++ b/packages/core/src/tools/shell.ts
@@ -97,6 +97,8 @@ class ShellToolInvocation extends BaseToolInvocation<
  async execute(
    signal: AbortSignal,
    updateOutput?: (output: string) => void,
+    terminalColumns?: number,
+    terminalRows?: number,
  ): Promise<ToolResult> {
    const strippedCommand = stripShellWrapper(this.params.command);

@@ -129,9 +131,7 @@ class ShellToolInvocation extends BaseToolInvocation<
        this.params.directory || '',
      );

-      let cumulativeStdout = '';
-      let cumulativeStderr = '';
-
+      let cumulativeOutput = '';
      let lastUpdateTime = Date.now();
      let isBinaryStream = false;

@@ -148,15 +148,9 @@ class ShellToolInvocation extends BaseToolInvocation<

          switch (event.type) {
            case 'data':
-              if (isBinaryStream) break; // Don't process text if we are in binary mode
-              if (event.stream === 'stdout') {
-                cumulativeStdout += event.chunk;
-              } else {
-                cumulativeStderr += event.chunk;
-              }
-              currentDisplayOutput =
-                cumulativeStdout +
-                (cumulativeStderr ? `\n${cumulativeStderr}` : '');
+              if (isBinaryStream) break;
+              cumulativeOutput = event.chunk;
+              currentDisplayOutput = cumulativeOutput;
              if (Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS) {
                shouldUpdate = true;
              }
@@ -187,6 +181,8 @@ class ShellToolInvocation extends BaseToolInvocation<
          }
        },
        signal,
+        terminalColumns,
+        terminalRows,
      );

      const result = await resultPromise;
@@ -218,7 +214,7 @@ class ShellToolInvocation extends BaseToolInvocation<
      if (result.aborted) {
        llmContent = 'Command was cancelled by user before it could complete.';
        if (result.output.trim()) {
-          llmContent += ` Below is the output (on stdout and stderr) before it was cancelled:\n${result.output}`;
+          llmContent += ` Below is the output before it was cancelled:\n${result.output}`;
        } else {
          llmContent += ' There was no output before it was cancelled.';
        }
@@ -232,8 +228,7 @@ class ShellToolInvocation extends BaseToolInvocation<
        llmContent = [
          `Command: ${this.params.command}`,
          `Directory: ${this.params.directory || '(root)'}`,
-          `Stdout: ${result.stdout || '(empty)'}`,
-          `Stderr: ${result.stderr || '(empty)'}`,
+          `Output: ${result.output || '(empty)'}`,
          `Error: ${finalError}`, // Use the cleaned error string.
          `Exit Code: ${result.exitCode ?? '(none)'}`,
          `Signal: ${result.signal ?? '(none)'}`,
--- a/packages/core/src/tools/tools.ts
+++ b/packages/core/src/tools/tools.ts
@@ -50,6 +50,8 @@ export interface ToolInvocation<
  execute(
    signal: AbortSignal,
    updateOutput?: (output: string) => void,
+    terminalColumns?: number,
+    terminalRows?: number,
  ): Promise<TResult>;
 }

@@ -78,6 +80,8 @@ export abstract class BaseToolInvocation<
  abstract execute(
    signal: AbortSignal,
    updateOutput?: (output: string) => void,
+    terminalColumns?: number,
+    terminalRows?: number,
  ): Promise<TResult>;
 }

@@ -117,8 +121,16 @@ export class LegacyToolInvocation<
  execute(
    signal: AbortSignal,
    updateOutput?: (output: string) => void,
+    terminalColumns?: number,
+    terminalRows?: number,
  ): Promise<TResult> {
-    return this.legacyTool.execute(this.params, signal, updateOutput);
+    return this.legacyTool.execute(
+      this.params,
+      signal,
+      updateOutput,
+      terminalColumns,
+      terminalRows,
+    );
  }
 }

@@ -232,9 +244,16 @@ export abstract class DeclarativeTool<
    params: TParams,
    signal: AbortSignal,
    updateOutput?: (output: string) => void,
+    terminalColumns?: number,
+    terminalRows?: number,
  ): Promise<TResult> {
    const invocation = this.build(params);
-    return invocation.execute(signal, updateOutput);
+    return invocation.execute(
+      signal,
+      updateOutput,
+      terminalColumns,
+      terminalRows,
+    );
  }
 }

@@ -373,6 +392,8 @@ export abstract class BaseTool<
    params: TParams,
    signal: AbortSignal,
    updateOutput?: (output: string) => void,
+    terminalColumns?: number,
+    terminalRows?: number,
  ): Promise<TResult>;
 }