feat(core): refactor shell execution to use node-pty (#6088)

This commit is contained in:
Gal Zahavi
2025-08-14 13:40:12 -07:00
committed by GitHub
parent 48af0456c1
commit 980091cbc2
16 changed files with 453 additions and 409 deletions

View File

@@ -66,7 +66,6 @@ describe('ShellTool', () => {
Buffer.from('abcdef', 'hex'),
);
// Capture the output callback to simulate streaming events from the service
mockShellExecutionService.mockImplementation((_cmd, _cwd, callback) => {
mockShellOutputCallback = callback;
return {
@@ -123,8 +122,6 @@ describe('ShellTool', () => {
const fullResult: ShellExecutionResult = {
rawOutput: Buffer.from(result.output || ''),
output: 'Success',
stdout: 'Success',
stderr: '',
exitCode: 0,
signal: null,
error: null,
@@ -141,7 +138,7 @@ describe('ShellTool', () => {
resolveShellExecution({ pid: 54321 });
vi.mocked(fs.existsSync).mockReturnValue(true);
vi.mocked(fs.readFileSync).mockReturnValue('54321\n54322\n'); // Service PID and background PID
vi.mocked(fs.readFileSync).mockReturnValue('54321\n54322\n');
const result = await promise;
@@ -152,6 +149,8 @@ describe('ShellTool', () => {
expect.any(String),
expect.any(Function),
mockAbortSignal,
undefined,
undefined,
);
expect(result.llmContent).toContain('Background PIDs: 54322');
expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalledWith(tmpFile);
@@ -164,8 +163,6 @@ describe('ShellTool', () => {
resolveShellExecution({
rawOutput: Buffer.from(''),
output: '',
stdout: '',
stderr: '',
exitCode: 0,
signal: null,
error: null,
@@ -178,6 +175,8 @@ describe('ShellTool', () => {
expect.any(String),
expect.any(Function),
mockAbortSignal,
undefined,
undefined,
);
});
@@ -189,16 +188,13 @@ describe('ShellTool', () => {
error,
exitCode: 1,
output: 'err',
stderr: 'err',
rawOutput: Buffer.from('err'),
stdout: '',
signal: null,
aborted: false,
pid: 12345,
});
const result = await promise;
// The final llmContent should contain the user's command, not the wrapper
expect(result.llmContent).toContain('Error: wrapped command failed');
expect(result.llmContent).not.toContain('pgrep');
});
@@ -231,8 +227,6 @@ describe('ShellTool', () => {
resolveExecutionPromise({
output: 'long output',
rawOutput: Buffer.from('long output'),
stdout: 'long output',
stderr: '',
exitCode: 0,
signal: null,
error: null,
@@ -257,7 +251,7 @@ describe('ShellTool', () => {
mockShellExecutionService.mockImplementation(() => {
throw error;
});
vi.mocked(fs.existsSync).mockReturnValue(true); // Pretend the file exists
vi.mocked(fs.existsSync).mockReturnValue(true);
const invocation = shellTool.build({ command: 'a-command' });
await expect(invocation.execute(mockAbortSignal)).rejects.toThrow(error);
@@ -280,33 +274,26 @@ describe('ShellTool', () => {
const invocation = shellTool.build({ command: 'stream' });
const promise = invocation.execute(mockAbortSignal, updateOutputMock);
// First chunk, should be throttled.
mockShellOutputCallback({
type: 'data',
stream: 'stdout',
chunk: 'hello ',
});
expect(updateOutputMock).not.toHaveBeenCalled();
// Advance time past the throttle interval.
await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
// Send a second chunk. THIS event triggers the update with the CUMULATIVE content.
mockShellOutputCallback({
type: 'data',
stream: 'stderr',
chunk: 'world',
chunk: 'hello world',
});
// It should have been called once now with the combined output.
expect(updateOutputMock).toHaveBeenCalledOnce();
expect(updateOutputMock).toHaveBeenCalledWith('hello \nworld');
expect(updateOutputMock).toHaveBeenCalledWith('hello world');
resolveExecutionPromise({
rawOutput: Buffer.from(''),
output: '',
stdout: '',
stderr: '',
exitCode: 0,
signal: null,
error: null,
@@ -332,16 +319,13 @@ describe('ShellTool', () => {
});
expect(updateOutputMock).toHaveBeenCalledOnce();
// Advance time past the throttle interval.
await vi.advanceTimersByTimeAsync(OUTPUT_UPDATE_INTERVAL_MS + 1);
// Send a SECOND progress event. This one will trigger the flush.
mockShellOutputCallback({
type: 'binary_progress',
bytesReceived: 2048,
});
// Now it should be called a second time with the latest progress.
expect(updateOutputMock).toHaveBeenCalledTimes(2);
expect(updateOutputMock).toHaveBeenLastCalledWith(
'[Receiving binary output... 2.0 KB received]',
@@ -350,8 +334,6 @@ describe('ShellTool', () => {
resolveExecutionPromise({
rawOutput: Buffer.from(''),
output: '',
stdout: '',
stderr: '',
exitCode: 0,
signal: null,
error: null,

View File

@@ -97,6 +97,8 @@ class ShellToolInvocation extends BaseToolInvocation<
async execute(
signal: AbortSignal,
updateOutput?: (output: string) => void,
terminalColumns?: number,
terminalRows?: number,
): Promise<ToolResult> {
const strippedCommand = stripShellWrapper(this.params.command);
@@ -129,9 +131,7 @@ class ShellToolInvocation extends BaseToolInvocation<
this.params.directory || '',
);
let cumulativeStdout = '';
let cumulativeStderr = '';
let cumulativeOutput = '';
let lastUpdateTime = Date.now();
let isBinaryStream = false;
@@ -148,15 +148,9 @@ class ShellToolInvocation extends BaseToolInvocation<
switch (event.type) {
case 'data':
if (isBinaryStream) break; // Don't process text if we are in binary mode
if (event.stream === 'stdout') {
cumulativeStdout += event.chunk;
} else {
cumulativeStderr += event.chunk;
}
currentDisplayOutput =
cumulativeStdout +
(cumulativeStderr ? `\n${cumulativeStderr}` : '');
if (isBinaryStream) break;
cumulativeOutput = event.chunk;
currentDisplayOutput = cumulativeOutput;
if (Date.now() - lastUpdateTime > OUTPUT_UPDATE_INTERVAL_MS) {
shouldUpdate = true;
}
@@ -187,6 +181,8 @@ class ShellToolInvocation extends BaseToolInvocation<
}
},
signal,
terminalColumns,
terminalRows,
);
const result = await resultPromise;
@@ -218,7 +214,7 @@ class ShellToolInvocation extends BaseToolInvocation<
if (result.aborted) {
llmContent = 'Command was cancelled by user before it could complete.';
if (result.output.trim()) {
llmContent += ` Below is the output (on stdout and stderr) before it was cancelled:\n${result.output}`;
llmContent += ` Below is the output before it was cancelled:\n${result.output}`;
} else {
llmContent += ' There was no output before it was cancelled.';
}
@@ -232,8 +228,7 @@ class ShellToolInvocation extends BaseToolInvocation<
llmContent = [
`Command: ${this.params.command}`,
`Directory: ${this.params.directory || '(root)'}`,
`Stdout: ${result.stdout || '(empty)'}`,
`Stderr: ${result.stderr || '(empty)'}`,
`Output: ${result.output || '(empty)'}`,
`Error: ${finalError}`, // Use the cleaned error string.
`Exit Code: ${result.exitCode ?? '(none)'}`,
`Signal: ${result.signal ?? '(none)'}`,

View File

@@ -50,6 +50,8 @@ export interface ToolInvocation<
execute(
signal: AbortSignal,
updateOutput?: (output: string) => void,
terminalColumns?: number,
terminalRows?: number,
): Promise<TResult>;
}
@@ -78,6 +80,8 @@ export abstract class BaseToolInvocation<
abstract execute(
signal: AbortSignal,
updateOutput?: (output: string) => void,
terminalColumns?: number,
terminalRows?: number,
): Promise<TResult>;
}
@@ -117,8 +121,16 @@ export class LegacyToolInvocation<
execute(
signal: AbortSignal,
updateOutput?: (output: string) => void,
terminalColumns?: number,
terminalRows?: number,
): Promise<TResult> {
return this.legacyTool.execute(this.params, signal, updateOutput);
return this.legacyTool.execute(
this.params,
signal,
updateOutput,
terminalColumns,
terminalRows,
);
}
}
@@ -232,9 +244,16 @@ export abstract class DeclarativeTool<
params: TParams,
signal: AbortSignal,
updateOutput?: (output: string) => void,
terminalColumns?: number,
terminalRows?: number,
): Promise<TResult> {
const invocation = this.build(params);
return invocation.execute(signal, updateOutput);
return invocation.execute(
signal,
updateOutput,
terminalColumns,
terminalRows,
);
}
}
@@ -373,6 +392,8 @@ export abstract class BaseTool<
params: TParams,
signal: AbortSignal,
updateOutput?: (output: string) => void,
terminalColumns?: number,
terminalRows?: number,
): Promise<TResult>;
}