mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-22 17:57:46 +00:00
fix lint
This commit is contained in:
2
.github/workflows/terminal-bench.yml
vendored
2
.github/workflows/terminal-bench.yml
vendored
@@ -9,7 +9,7 @@ on:
|
||||
type: 'string'
|
||||
default: 'latest'
|
||||
release:
|
||||
types: [published]
|
||||
types: ['published']
|
||||
|
||||
jobs:
|
||||
terminal-bench:
|
||||
|
||||
@@ -4,7 +4,7 @@ services:
|
||||
dockerfile: Dockerfile
|
||||
image: ${T_BENCH_TASK_DOCKER_CLIENT_IMAGE_NAME}
|
||||
container_name: ${T_BENCH_TASK_DOCKER_CLIENT_CONTAINER_NAME}
|
||||
command: [ "sh", "-c", "sleep infinity" ]
|
||||
command: ['sh', '-c', 'sleep infinity']
|
||||
environment:
|
||||
- TEST_DIR=${T_BENCH_TEST_DIR}
|
||||
volumes:
|
||||
|
||||
@@ -110,7 +110,9 @@ describe('terminal-bench integration', () => {
|
||||
}
|
||||
|
||||
describe.each(testTasks)('Task: %s', (taskId) => {
|
||||
it(`should complete ${taskId} task with oracle agent`, async () => {
|
||||
it(
|
||||
`should complete ${taskId} task with oracle agent`,
|
||||
async () => {
|
||||
rig.setup(`terminal-bench-oracle-${taskId}`);
|
||||
|
||||
const outputPath = join(outputBase, `oracle-${taskId}`);
|
||||
@@ -153,16 +155,24 @@ describe('terminal-bench integration', () => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
const to = setTimeout(() => {
|
||||
const to = setTimeout(
|
||||
() => {
|
||||
child.kill();
|
||||
reject(new Error(`Process timeout for ${taskId}`));
|
||||
}, Math.max(60_000, DEFAULT_TIMEOUT_MS - 60_000)); // Leave 1 minute buffer
|
||||
},
|
||||
Math.max(60_000, DEFAULT_TIMEOUT_MS - 60_000),
|
||||
); // Leave 1 minute buffer
|
||||
|
||||
child.on('close', (code) => {
|
||||
clearTimeout(to);
|
||||
if (code !== 0) {
|
||||
console.error(`oracle agent failed for ${taskId} with stderr:`, stderr);
|
||||
reject(new Error(`Process exited with code ${code}: ${stderr}`));
|
||||
console.error(
|
||||
`oracle agent failed for ${taskId} with stderr:`,
|
||||
stderr,
|
||||
);
|
||||
reject(
|
||||
new Error(`Process exited with code ${code}: ${stderr}`),
|
||||
);
|
||||
} else {
|
||||
resolve(stdout);
|
||||
}
|
||||
@@ -196,9 +206,13 @@ describe('terminal-bench integration', () => {
|
||||
console.error(`Oracle agent failed for ${taskId}:`, error);
|
||||
throw error;
|
||||
}
|
||||
}, DEFAULT_TIMEOUT_MS);
|
||||
},
|
||||
DEFAULT_TIMEOUT_MS,
|
||||
);
|
||||
|
||||
it(`should complete ${taskId} task with qwen-code agent`, async () => {
|
||||
it(
|
||||
`should complete ${taskId} task with qwen-code agent`,
|
||||
async () => {
|
||||
rig.setup(`terminal-bench-qwen-${taskId}`);
|
||||
|
||||
const outputPath = join(outputBase, `qwen-${taskId}`);
|
||||
@@ -256,7 +270,10 @@ describe('terminal-bench integration', () => {
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (code !== 0) {
|
||||
console.error(`qwen-code agent failed for ${taskId} with stderr:`, stderr);
|
||||
console.error(
|
||||
`qwen-code agent failed for ${taskId} with stderr:`,
|
||||
stderr,
|
||||
);
|
||||
reject(new Error(`Process exited with code ${code}: ${stderr}`));
|
||||
} else {
|
||||
resolve(stdout);
|
||||
@@ -269,10 +286,13 @@ describe('terminal-bench integration', () => {
|
||||
});
|
||||
|
||||
// Set timeout based on task
|
||||
setTimeout(() => {
|
||||
setTimeout(
|
||||
() => {
|
||||
child.kill();
|
||||
reject(new Error(`Process timeout for ${taskId}`));
|
||||
}, Math.max(60_000, DEFAULT_TIMEOUT_MS - 60_000)); // Leave 1 minute buffer
|
||||
},
|
||||
Math.max(60_000, DEFAULT_TIMEOUT_MS - 60_000),
|
||||
); // Leave 1 minute buffer
|
||||
}).catch((error) => {
|
||||
// This is expected if API key is not configured correctly
|
||||
if (error instanceof Error && error.message?.includes('API')) {
|
||||
@@ -301,6 +321,8 @@ describe('terminal-bench integration', () => {
|
||||
expect(results).toHaveProperty('accuracy');
|
||||
expect(results.n_resolved).toBeGreaterThan(0); // At least one task should be resolved
|
||||
expect(results.accuracy).toBeGreaterThan(0); // Accuracy should be greater than 0
|
||||
}, DEFAULT_TIMEOUT_MS);
|
||||
},
|
||||
DEFAULT_TIMEOUT_MS,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user