fix(tests): refactor integration tests to be less flaky (#4890)

Co-authored-by: matt korwel <matt.korwel@gmail.com>
This commit is contained in:
Allen Hutchison
2025-08-01 14:33:33 -07:00
committed by GitHub
parent dccca91fc9
commit 387706607d
12 changed files with 1073 additions and 115 deletions

View File

@@ -6,16 +6,63 @@
import { test } from 'node:test';
import { strict as assert } from 'assert';
import { TestRig } from './test-helper.js';
import {
TestRig,
createToolCallErrorMessage,
printDebugInfo,
validateModelOutput,
} from './test-helper.js';
test('should be able to write a file', async (t) => {
test('should be able to write a file', async () => {
const rig = new TestRig();
rig.setup(t.name);
await rig.setup('should be able to write a file');
const prompt = `show me an example of using the write tool. put a dad joke in dad.txt`;
await rig.run(prompt);
const result = await rig.run(prompt);
const foundToolCall = await rig.waitForToolCall('write_file');
// Add debugging information
if (!foundToolCall) {
printDebugInfo(rig, result);
}
const allTools = rig.readToolLogs();
assert.ok(
foundToolCall,
createToolCallErrorMessage(
'write_file',
allTools.map((t) => t.toolRequest.name),
result,
),
);
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'dad.txt', 'Write file test');
const newFilePath = 'dad.txt';
const newFileContent = rig.readFile(newFilePath);
assert.notEqual(newFileContent, '');
// Add debugging for file content
if (newFileContent === '') {
console.error('File was created but is empty');
console.error(
'Tool calls:',
rig.readToolLogs().map((t) => ({
name: t.toolRequest.name,
args: t.toolRequest.args,
})),
);
}
assert.notEqual(newFileContent, '', 'Expected file to have content');
// Log success info if verbose
if (process.env.VERBOSE === 'true') {
console.log(
'File created successfully with content:',
newFileContent.substring(0, 100) + '...',
);
}
});