fix lint

2025-12-22 17:57:46 +00:00 · 2025-09-05 16:24:09 +08:00
parent c00287ac45
commit f64bf8dede
3 changed files with 175 additions and 153 deletions
--- a/.github/workflows/terminal-bench.yml
+++ b/.github/workflows/terminal-bench.yml
@@ -9,7 +9,7 @@ on:
        type: 'string'
        default: 'latest'
  release:
-    types: [published]
+    types: ['published']

 jobs:
  terminal-bench:
--- a/integration-tests/terminal-bench/ci-tasks/swe-bench-astropy-1/docker-compose.yaml
+++ b/integration-tests/terminal-bench/ci-tasks/swe-bench-astropy-1/docker-compose.yaml
@@ -4,7 +4,7 @@ services:
      dockerfile: Dockerfile
    image: ${T_BENCH_TASK_DOCKER_CLIENT_IMAGE_NAME}
    container_name: ${T_BENCH_TASK_DOCKER_CLIENT_CONTAINER_NAME}
-    command: [ "sh", "-c", "sleep infinity" ]
+    command: ['sh', '-c', 'sleep infinity']
    environment:
      - TEST_DIR=${T_BENCH_TEST_DIR}
    volumes:
--- a/integration-tests/terminal-bench/terminal-bench.test.ts
+++ b/integration-tests/terminal-bench/terminal-bench.test.ts
@@ -110,40 +110,155 @@ describe('terminal-bench integration', () => {
  }

  describe.each(testTasks)('Task: %s', (taskId) => {
-    it(`should complete ${taskId} task with oracle agent`, async () => {
-      rig.setup(`terminal-bench-oracle-${taskId}`);
+    it(
+      `should complete ${taskId} task with oracle agent`,
+      async () => {
+        rig.setup(`terminal-bench-oracle-${taskId}`);

-      const outputPath = join(outputBase, `oracle-${taskId}`);
+        const outputPath = join(outputBase, `oracle-${taskId}`);

-      // Check if ci-tasks exists
-      if (!existsSync(ciTasksPath)) {
-        console.error(`CI tasks directory does not exist: ${ciTasksPath}`);
-        throw new Error(
-          'CI tasks not found. Please ensure ci-tasks directory is present.',
-        );
-      }
+        // Check if ci-tasks exists
+        if (!existsSync(ciTasksPath)) {
+          console.error(`CI tasks directory does not exist: ${ciTasksPath}`);
+          throw new Error(
+            'CI tasks not found. Please ensure ci-tasks directory is present.',
+          );
+        }

-      // Run oracle agent on the task using ci-tasks dataset (non-blocking)
-      const args = [
-        'run',
-        '--agent',
-        'oracle',
-        '--dataset-path',
-        ciTasksPath,
-        '--task-id',
-        taskId,
-        '--output-path',
-        outputPath,
-        '--n-concurrent',
-        '1',
-      ];
+        // Run oracle agent on the task using ci-tasks dataset (non-blocking)
+        const args = [
+          'run',
+          '--agent',
+          'oracle',
+          '--dataset-path',
+          ciTasksPath,
+          '--task-id',
+          taskId,
+          '--output-path',
+          outputPath,
+          '--n-concurrent',
+          '1',
+        ];

-      try {
+        try {
+          const result = await new Promise<string>((resolve, reject) => {
+            let stdout = '';
+            let stderr = '';
+
+            const child = spawn('tb', args, { env: { ...process.env } });
+
+            child.stdout?.on('data', (data) => {
+              stdout += data.toString();
+            });
+
+            child.stderr?.on('data', (data) => {
+              stderr += data.toString();
+            });
+
+            const to = setTimeout(
+              () => {
+                child.kill();
+                reject(new Error(`Process timeout for ${taskId}`));
+              },
+              Math.max(60_000, DEFAULT_TIMEOUT_MS - 60_000),
+            ); // Leave 1 minute buffer
+
+            child.on('close', (code) => {
+              clearTimeout(to);
+              if (code !== 0) {
+                console.error(
+                  `oracle agent failed for ${taskId} with stderr:`,
+                  stderr,
+                );
+                reject(
+                  new Error(`Process exited with code ${code}: ${stderr}`),
+                );
+              } else {
+                resolve(stdout);
+              }
+            });
+
+            child.on('error', (error) => {
+              clearTimeout(to);
+              console.error('Failed to start process:', error);
+              reject(error);
+            });
+          });
+
+          // Check if the run succeeded
+          expect(result).toContain('Results Summary');
+
+          // Check if results file was created
+          // Terminal-bench creates results in a timestamped subdirectory
+          const dirs = execSync(`ls -d ${outputPath}/*/`, { encoding: 'utf-8' })
+            .trim()
+            .split('\n');
+          const latestDir = dirs[dirs.length - 1]; // Get the latest run directory
+          const resultsFile = join(latestDir, 'results.json');
+
+          expect(existsSync(resultsFile)).toBe(true);
+
+          const results = JSON.parse(readFileSync(resultsFile, 'utf-8'));
+          expect(results.accuracy).toBe(1.0); // Oracle should always succeed
+          expect(results.n_resolved).toBe(1);
+          expect(results.n_unresolved).toBe(0);
+        } catch (error) {
+          console.error(`Oracle agent failed for ${taskId}:`, error);
+          throw error;
+        }
+      },
+      DEFAULT_TIMEOUT_MS,
+    );
+
+    it(
+      `should complete ${taskId} task with qwen-code agent`,
+      async () => {
+        rig.setup(`terminal-bench-qwen-${taskId}`);
+
+        const outputPath = join(outputBase, `qwen-${taskId}`);
+
+        // Check if API key is available
+        const apiKey = process.env['OPENAI_API_KEY'];
+        if (!apiKey) {
+          throw new Error(
+            'OPENAI_API_KEY environment variable is not set. This test requires an API key to run the qwen-code agent.',
+          );
+        }
+
+        // Run qwen-code agent using spawn to avoid blocking event loop
+        const args = [
+          'run',
+          '--agent-import-path',
+          'integration-tests.terminal-bench.qwen_code:QwenCodeAgent',
+          '--agent-kwarg',
+          `api_key=${apiKey}`,
+          '--agent-kwarg',
+          `version=${process.env['QWEN_CODE_VERSION'] || 'latest'}`,
+          '--dataset-path',
+          ciTasksPath,
+          '--task-id',
+          taskId,
+          '--output-path',
+          outputPath,
+          '--n-concurrent',
+          '1',
+        ];
+
+        const env = {
+          ...process.env,
+          OPENAI_API_KEY: apiKey,
+          OPENAI_MODEL: process.env['OPENAI_MODEL'] || 'qwen3-coder-plus',
+          OPENAI_BASE_URL:
+            process.env['OPENAI_BASE_URL'] ||
+            'https://dashscope.aliyuncs.com/compatible-mode/v1',
+        };
+
+        // Use spawn with promise to avoid blocking
        const result = await new Promise<string>((resolve, reject) => {
          let stdout = '';
          let stderr = '';

-          const child = spawn('tb', args, { env: { ...process.env } });
+          const child = spawn('tb', args, { env });

          child.stdout?.on('data', (data) => {
            stdout += data.toString();
@@ -153,15 +268,12 @@ describe('terminal-bench integration', () => {
            stderr += data.toString();
          });

-          const to = setTimeout(() => {
-            child.kill();
-            reject(new Error(`Process timeout for ${taskId}`));
-          }, Math.max(60_000, DEFAULT_TIMEOUT_MS - 60_000)); // Leave 1 minute buffer
-
          child.on('close', (code) => {
-            clearTimeout(to);
            if (code !== 0) {
-              console.error(`oracle agent failed for ${taskId} with stderr:`, stderr);
+              console.error(
+                `qwen-code agent failed for ${taskId} with stderr:`,
+                stderr,
+              );
              reject(new Error(`Process exited with code ${code}: ${stderr}`));
            } else {
              resolve(stdout);
@@ -169,138 +281,48 @@ describe('terminal-bench integration', () => {
          });

          child.on('error', (error) => {
-            clearTimeout(to);
            console.error('Failed to start process:', error);
            reject(error);
          });
+
+          // Set timeout based on task
+          setTimeout(
+            () => {
+              child.kill();
+              reject(new Error(`Process timeout for ${taskId}`));
+            },
+            Math.max(60_000, DEFAULT_TIMEOUT_MS - 60_000),
+          ); // Leave 1 minute buffer
+        }).catch((error) => {
+          // This is expected if API key is not configured correctly
+          if (error instanceof Error && error.message?.includes('API')) {
+            console.warn('API configuration issue - skipping test');
+            return '';
+          }
+          throw error;
        });

-        // Check if the run succeeded
+        if (!result) return; // Skip if API configuration issue
+
+        // Check if the run completed
        expect(result).toContain('Results Summary');

-        // Check if results file was created
-        // Terminal-bench creates results in a timestamped subdirectory
+        // Check results file in timestamped subdirectory
        const dirs = execSync(`ls -d ${outputPath}/*/`, { encoding: 'utf-8' })
          .trim()
          .split('\n');
-        const latestDir = dirs[dirs.length - 1]; // Get the latest run directory
+        const latestDir = dirs[dirs.length - 1];
        const resultsFile = join(latestDir, 'results.json');

        expect(existsSync(resultsFile)).toBe(true);

        const results = JSON.parse(readFileSync(resultsFile, 'utf-8'));
-        expect(results.accuracy).toBe(1.0); // Oracle should always succeed
-        expect(results.n_resolved).toBe(1);
-        expect(results.n_unresolved).toBe(0);
-      } catch (error) {
-        console.error(`Oracle agent failed for ${taskId}:`, error);
-        throw error;
-      }
-    }, DEFAULT_TIMEOUT_MS);
-
-    it(`should complete ${taskId} task with qwen-code agent`, async () => {
-      rig.setup(`terminal-bench-qwen-${taskId}`);
-
-      const outputPath = join(outputBase, `qwen-${taskId}`);
-
-      // Check if API key is available
-      const apiKey = process.env['OPENAI_API_KEY'];
-      if (!apiKey) {
-        throw new Error(
-          'OPENAI_API_KEY environment variable is not set. This test requires an API key to run the qwen-code agent.',
-        );
-      }
-
-      // Run qwen-code agent using spawn to avoid blocking event loop
-      const args = [
-        'run',
-        '--agent-import-path',
-        'integration-tests.terminal-bench.qwen_code:QwenCodeAgent',
-        '--agent-kwarg',
-        `api_key=${apiKey}`,
-        '--agent-kwarg',
-        `version=${process.env['QWEN_CODE_VERSION'] || 'latest'}`,
-        '--dataset-path',
-        ciTasksPath,
-        '--task-id',
-        taskId,
-        '--output-path',
-        outputPath,
-        '--n-concurrent',
-        '1',
-      ];
-
-      const env = {
-        ...process.env,
-        OPENAI_API_KEY: apiKey,
-        OPENAI_MODEL: process.env['OPENAI_MODEL'] || 'qwen3-coder-plus',
-        OPENAI_BASE_URL:
-          process.env['OPENAI_BASE_URL'] ||
-          'https://dashscope.aliyuncs.com/compatible-mode/v1',
-      };
-
-      // Use spawn with promise to avoid blocking
-      const result = await new Promise<string>((resolve, reject) => {
-        let stdout = '';
-        let stderr = '';
-
-        const child = spawn('tb', args, { env });
-
-        child.stdout?.on('data', (data) => {
-          stdout += data.toString();
-        });
-
-        child.stderr?.on('data', (data) => {
-          stderr += data.toString();
-        });
-
-        child.on('close', (code) => {
-          if (code !== 0) {
-            console.error(`qwen-code agent failed for ${taskId} with stderr:`, stderr);
-            reject(new Error(`Process exited with code ${code}: ${stderr}`));
-          } else {
-            resolve(stdout);
-          }
-        });
-
-        child.on('error', (error) => {
-          console.error('Failed to start process:', error);
-          reject(error);
-        });
-
-        // Set timeout based on task
-        setTimeout(() => {
-          child.kill();
-          reject(new Error(`Process timeout for ${taskId}`));
-        }, Math.max(60_000, DEFAULT_TIMEOUT_MS - 60_000)); // Leave 1 minute buffer
-      }).catch((error) => {
-        // This is expected if API key is not configured correctly
-        if (error instanceof Error && error.message?.includes('API')) {
-          console.warn('API configuration issue - skipping test');
-          return '';
-        }
-        throw error;
-      });
-
-      if (!result) return; // Skip if API configuration issue
-
-      // Check if the run completed
-      expect(result).toContain('Results Summary');
-
-      // Check results file in timestamped subdirectory
-      const dirs = execSync(`ls -d ${outputPath}/*/`, { encoding: 'utf-8' })
-        .trim()
-        .split('\n');
-      const latestDir = dirs[dirs.length - 1];
-      const resultsFile = join(latestDir, 'results.json');
-
-      expect(existsSync(resultsFile)).toBe(true);
-
-      const results = JSON.parse(readFileSync(resultsFile, 'utf-8'));
-      // Check that the task actually completed successfully
-      expect(results).toHaveProperty('accuracy');
-      expect(results.n_resolved).toBeGreaterThan(0); // At least one task should be resolved
-      expect(results.accuracy).toBeGreaterThan(0); // Accuracy should be greater than 0
-    }, DEFAULT_TIMEOUT_MS);
+        // Check that the task actually completed successfully
+        expect(results).toHaveProperty('accuracy');
+        expect(results.n_resolved).toBeGreaterThan(0); // At least one task should be resolved
+        expect(results.accuracy).toBeGreaterThan(0); // Accuracy should be greater than 0
+      },
+      DEFAULT_TIMEOUT_MS,
+    );
  });
 });