fix(tests): refactor integration tests to be less flaky (#4890)

Co-authored-by: matt korwel <matt.korwel@gmail.com>
This commit is contained in:
Allen Hutchison
2025-08-01 14:33:33 -07:00
committed by GitHub
parent dccca91fc9
commit 387706607d
12 changed files with 1073 additions and 115 deletions

View File

@@ -6,16 +6,36 @@
import { test } from 'node:test';
import { strict as assert } from 'assert';
import { TestRig } from './test-helper.js';
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
test('should be able to save to memory', async (t) => {
test('should be able to save to memory', async () => {
const rig = new TestRig();
rig.setup(t.name);
await rig.setup('should be able to save to memory');
const prompt = `remember that my favorite color is blue.
what is my favorite color? tell me that and surround it with $ symbol`;
const result = await rig.run(prompt);
assert.ok(result.toLowerCase().includes('$blue$'));
const foundToolCall = await rig.waitForToolCall('save_memory');
// Add debugging information
if (!foundToolCall || !result.toLowerCase().includes('blue')) {
const allTools = printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Contains blue': result.toLowerCase().includes('blue'),
});
console.error(
'Memory tool calls:',
allTools
.filter((t) => t.toolRequest.name === 'save_memory')
.map((t) => t.toolRequest.args),
);
}
assert.ok(foundToolCall, 'Expected to find a save_memory tool call');
// Validate model output - will throw if no output, warn if missing expected content
validateModelOutput(result, 'blue', 'Save memory test');
});