fix(tests): refactor integration tests to be less flaky (#4890)

Co-authored-by: matt korwel <matt.korwel@gmail.com>
This commit is contained in:
Allen Hutchison
2025-08-01 14:33:33 -07:00
committed by GitHub
parent dccca91fc9
commit 387706607d
12 changed files with 1073 additions and 115 deletions

View File

@@ -6,25 +6,84 @@
import { strict as assert } from 'assert';
import { test } from 'node:test';
import { TestRig } from './test-helper.js';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
test('reads a file', (t) => {
test('should be able to read a file', async () => {
const rig = new TestRig();
rig.setup(t.name);
await rig.setup('should be able to read a file');
rig.createFile('test.txt', 'hello world');
const output = rig.run(`read the file name test.txt`);
const result = await rig.run(
`read the file test.txt and show me its contents`,
);
assert.ok(output.toLowerCase().includes('hello'));
const foundToolCall = await rig.waitForToolCall('read_file');
// Add debugging information
if (!foundToolCall || !result.includes('hello world')) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains hello world': result.includes('hello world'),
});
}
assert.ok(foundToolCall, 'Expected to find a read_file tool call');
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'hello world', 'File read test');
});
test('writes a file', (t) => {
test('should be able to write a file', async () => {
const rig = new TestRig();
rig.setup(t.name);
await rig.setup('should be able to write a file');
rig.createFile('test.txt', '');
rig.run(`edit test.txt to have a hello world message`);
const result = await rig.run(`edit test.txt to have a hello world message`);
// Accept multiple valid tools for editing files
const foundToolCall = await rig.waitForAnyToolCall([
'write_file',
'edit',
'replace',
]);
// Add debugging information
if (!foundToolCall) {
printDebugInfo(rig, result);
}
assert.ok(
foundToolCall,
'Expected to find a write_file, edit, or replace tool call',
);
// Validate model output - will throw if no output
validateModelOutput(result, null, 'File write test');
const fileContent = rig.readFile('test.txt');
assert.ok(fileContent.toLowerCase().includes('hello'));
// Add debugging for file content
if (!fileContent.toLowerCase().includes('hello')) {
const writeCalls = rig
.readToolLogs()
.filter((t) => t.toolRequest.name === 'write_file')
.map((t) => t.toolRequest.args);
printDebugInfo(rig, result, {
'File content mismatch': true,
'Expected to contain': 'hello',
'Actual content': fileContent,
'Write tool calls': JSON.stringify(writeCalls),
});
}
assert.ok(
fileContent.toLowerCase().includes('hello'),
'Expected file to contain hello',
);
// Log success info if verbose
if (process.env.VERBOSE === 'true') {
console.log('File written successfully with hello message.');
}
});