mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
257 lines
7.8 KiB
TypeScript
257 lines
7.8 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2025 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, it, expect } from 'vitest';
|
|
import { existsSync } from 'node:fs';
|
|
import * as path from 'node:path';
|
|
import { TestRig, printDebugInfo, validateModelOutput } from './test-helper.js';
|
|
|
|
describe('file-system', () => {
|
|
it('should be able to read a file', async () => {
|
|
const rig = new TestRig();
|
|
await rig.setup('should be able to read a file');
|
|
rig.createFile('test.txt', 'hello world');
|
|
|
|
const result = await rig.run(
|
|
`read the file test.txt and show me its contents`,
|
|
);
|
|
|
|
const foundToolCall = await rig.waitForToolCall('read_file');
|
|
|
|
// Add debugging information
|
|
if (!foundToolCall || !result.includes('hello world')) {
|
|
printDebugInfo(rig, result, {
|
|
'Found tool call': foundToolCall,
|
|
'Contains hello world': result.includes('hello world'),
|
|
});
|
|
}
|
|
|
|
expect(
|
|
foundToolCall,
|
|
'Expected to find a read_file tool call',
|
|
).toBeTruthy();
|
|
|
|
// Validate model output - will throw if no output, warn if missing expected content
|
|
validateModelOutput(result, 'hello world', 'File read test');
|
|
});
|
|
|
|
it('should be able to write a file', async () => {
|
|
const rig = new TestRig();
|
|
await rig.setup('should be able to write a file');
|
|
rig.createFile('test.txt', '');
|
|
|
|
const result = await rig.run(`edit test.txt to have a hello world message`);
|
|
|
|
// Accept multiple valid tools for editing files
|
|
const foundToolCall = await rig.waitForAnyToolCall(['write_file', 'edit']);
|
|
|
|
// Add debugging information
|
|
if (!foundToolCall) {
|
|
printDebugInfo(rig, result);
|
|
}
|
|
|
|
expect(
|
|
foundToolCall,
|
|
'Expected to find a write_file or edit tool call',
|
|
).toBeTruthy();
|
|
|
|
// Validate model output - will throw if no output
|
|
validateModelOutput(result, null, 'File write test');
|
|
|
|
const fileContent = rig.readFile('test.txt');
|
|
|
|
// Add debugging for file content
|
|
if (!fileContent.toLowerCase().includes('hello')) {
|
|
const writeCalls = rig
|
|
.readToolLogs()
|
|
.filter((t) => t.toolRequest.name === 'write_file')
|
|
.map((t) => t.toolRequest.args);
|
|
|
|
printDebugInfo(rig, result, {
|
|
'File content mismatch': true,
|
|
'Expected to contain': 'hello',
|
|
'Actual content': fileContent,
|
|
'Write tool calls': JSON.stringify(writeCalls),
|
|
});
|
|
}
|
|
|
|
expect(
|
|
fileContent.toLowerCase().includes('hello'),
|
|
'Expected file to contain hello',
|
|
).toBeTruthy();
|
|
|
|
// Log success info if verbose
|
|
if (process.env['VERBOSE'] === 'true') {
|
|
console.log('File written successfully with hello message.');
|
|
}
|
|
});
|
|
|
|
it('should correctly handle file paths with spaces', async () => {
|
|
const rig = new TestRig();
|
|
await rig.setup('should correctly handle file paths with spaces');
|
|
const fileName = 'my test file.txt';
|
|
|
|
const result = await rig.run(`write "hello" to "${fileName}"`);
|
|
|
|
const foundToolCall = await rig.waitForToolCall('write_file');
|
|
if (!foundToolCall) {
|
|
printDebugInfo(rig, result);
|
|
}
|
|
expect(
|
|
foundToolCall,
|
|
'Expected to find a write_file tool call',
|
|
).toBeTruthy();
|
|
|
|
const newFileContent = rig.readFile(fileName);
|
|
expect(newFileContent).toBe('hello');
|
|
});
|
|
|
|
it('should perform a read-then-write sequence', async () => {
|
|
const rig = new TestRig();
|
|
await rig.setup('should perform a read-then-write sequence');
|
|
const fileName = 'version.txt';
|
|
rig.createFile(fileName, '1.0.0');
|
|
|
|
const prompt = `Read the version from ${fileName} and write the next version 1.0.1 back to the file.`;
|
|
const result = await rig.run(prompt);
|
|
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
const readCall = toolLogs.find(
|
|
(log) => log.toolRequest.name === 'read_file',
|
|
);
|
|
const writeCall = toolLogs.find(
|
|
(log) =>
|
|
log.toolRequest.name === 'write_file' ||
|
|
log.toolRequest.name === 'replace',
|
|
);
|
|
|
|
if (!readCall || !writeCall) {
|
|
printDebugInfo(rig, result, { readCall, writeCall });
|
|
}
|
|
|
|
expect(readCall, 'Expected to find a read_file tool call').toBeDefined();
|
|
expect(
|
|
writeCall,
|
|
'Expected to find a write_file or replace tool call',
|
|
).toBeDefined();
|
|
|
|
const newFileContent = rig.readFile(fileName);
|
|
expect(newFileContent).toBe('1.0.1');
|
|
});
|
|
|
|
it.skip('should replace multiple instances of a string', async () => {
|
|
const rig = new TestRig();
|
|
await rig.setup('should replace multiple instances of a string');
|
|
const fileName = 'ambiguous.txt';
|
|
const fileContent = 'Hey there, \ntest line\ntest line';
|
|
const expectedContent = 'Hey there, \nnew line\nnew line';
|
|
rig.createFile(fileName, fileContent);
|
|
|
|
const result = await rig.run(
|
|
`replace "test line" with "new line" in ${fileName}`,
|
|
);
|
|
|
|
const foundToolCall = await rig.waitForAnyToolCall([
|
|
'replace',
|
|
'write_file',
|
|
]);
|
|
if (!foundToolCall) {
|
|
printDebugInfo(rig, result);
|
|
}
|
|
expect(
|
|
foundToolCall,
|
|
'Expected to find a replace or write_file tool call',
|
|
).toBeTruthy();
|
|
|
|
const toolLogs = rig.readToolLogs();
|
|
const successfulEdit = toolLogs.some(
|
|
(log) =>
|
|
(log.toolRequest.name === 'replace' ||
|
|
log.toolRequest.name === 'write_file') &&
|
|
log.toolRequest.success,
|
|
);
|
|
if (!successfulEdit) {
|
|
console.error(
|
|
'Expected a successful edit tool call, but none was found.',
|
|
);
|
|
printDebugInfo(rig, result);
|
|
}
|
|
expect(successfulEdit, 'Expected a successful edit tool call').toBeTruthy();
|
|
|
|
const newFileContent = rig.readFile(fileName);
|
|
expect(newFileContent).toBe(expectedContent);
|
|
});
|
|
|
|
it('should fail safely when trying to edit a non-existent file', async () => {
|
|
const rig = new TestRig();
|
|
await rig.setup(
|
|
'should fail safely when trying to edit a non-existent file',
|
|
);
|
|
const fileName = 'non_existent.txt';
|
|
|
|
const result = await rig.run(`In ${fileName}, replace "a" with "b"`);
|
|
|
|
await rig.waitForTelemetryReady();
|
|
const toolLogs = rig.readToolLogs();
|
|
|
|
const readAttempt = toolLogs.find(
|
|
(log) => log.toolRequest.name === 'read_file',
|
|
);
|
|
const writeAttempt = toolLogs.find(
|
|
(log) => log.toolRequest.name === 'write_file',
|
|
);
|
|
const successfulReplace = toolLogs.find(
|
|
(log) => log.toolRequest.name === 'replace' && log.toolRequest.success,
|
|
);
|
|
|
|
// The model can either investigate (and fail) or do nothing.
|
|
// If it chose to investigate by reading, that read must have failed.
|
|
if (readAttempt && readAttempt.toolRequest.success) {
|
|
console.error(
|
|
'A read_file attempt succeeded for a non-existent file when it should have failed.',
|
|
);
|
|
printDebugInfo(rig, result);
|
|
}
|
|
if (readAttempt) {
|
|
expect(
|
|
readAttempt.toolRequest.success,
|
|
'If model tries to read the file, that attempt must fail',
|
|
).toBe(false);
|
|
}
|
|
|
|
// CRITICAL: Verify that no matter what the model did, it never successfully
|
|
// wrote or replaced anything.
|
|
if (writeAttempt) {
|
|
console.error(
|
|
'A write_file attempt was made when no file should be written.',
|
|
);
|
|
printDebugInfo(rig, result);
|
|
}
|
|
expect(
|
|
writeAttempt,
|
|
'write_file should not have been called',
|
|
).toBeUndefined();
|
|
|
|
if (successfulReplace) {
|
|
console.error('A successful replace occurred when it should not have.');
|
|
printDebugInfo(rig, result);
|
|
}
|
|
expect(
|
|
successfulReplace,
|
|
'A successful replace should not have occurred',
|
|
).toBeUndefined();
|
|
|
|
// Final verification: ensure the file was not created.
|
|
const filePath = path.join(rig.testDir!, fileName);
|
|
const fileExists = existsSync(filePath);
|
|
expect(fileExists, 'The non-existent file should not be created').toBe(
|
|
false,
|
|
);
|
|
});
|
|
});
|