mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 16:57:46 +00:00
fix(tests): refactor integration tests to be less flaky (#4890)
Co-authored-by: matt korwel <matt.korwel@gmail.com>
This commit is contained in:
@@ -6,17 +6,45 @@
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'assert';
|
||||
import { TestRig } from './test-helper.js';
|
||||
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
||||
|
||||
test.skip('should be able to read multiple files', async (t) => {
|
||||
test('should be able to read multiple files', async () => {
|
||||
const rig = new TestRig();
|
||||
rig.setup(t.name);
|
||||
await rig.setup('should be able to read multiple files');
|
||||
rig.createFile('file1.txt', 'file 1 content');
|
||||
rig.createFile('file2.txt', 'file 2 content');
|
||||
|
||||
const prompt = `Read the files in this directory, list them and print them to the screen`;
|
||||
const prompt = `Please use read_many_files to read file1.txt and file2.txt and show me what's in them`;
|
||||
|
||||
const result = await rig.run(prompt);
|
||||
|
||||
assert.ok(result.includes('file 1 content'));
|
||||
assert.ok(result.includes('file 2 content'));
|
||||
// Check for either read_many_files or multiple read_file calls
|
||||
const allTools = rig.readToolLogs();
|
||||
const readManyFilesCall = await rig.waitForToolCall('read_many_files');
|
||||
const readFileCalls = allTools.filter(
|
||||
(t) => t.toolRequest.name === 'read_file',
|
||||
);
|
||||
|
||||
// Accept either read_many_files OR at least 2 read_file calls
|
||||
const foundValidPattern = readManyFilesCall || readFileCalls.length >= 2;
|
||||
|
||||
// Add debugging information
|
||||
if (!foundValidPattern) {
|
||||
printDebugInfo(rig, result, {
|
||||
'read_many_files called': readManyFilesCall,
|
||||
'read_file calls': readFileCalls.length,
|
||||
});
|
||||
}
|
||||
|
||||
assert.ok(
|
||||
foundValidPattern,
|
||||
'Expected to find either read_many_files or multiple read_file tool calls',
|
||||
);
|
||||
|
||||
// Validate model output - will throw if no output, warn if missing expected content
|
||||
validateModelOutput(
|
||||
result,
|
||||
['file 1 content', 'file 2 content'],
|
||||
'Read many files test',
|
||||
);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user