name: 'Terminal Bench Tests' on: push: branches: - 'feat/tbench*' workflow_dispatch: inputs: version: description: 'The version to test.' required: true type: 'string' default: 'latest' release: types: ['published'] jobs: terminal-bench: name: 'Terminal Bench (Task: ${{ matrix.task_id }})' runs-on: 'ubuntu-latest' strategy: fail-fast: false matrix: task_id: - 'hello-world' - 'swe-bench-astropy-1' steps: - name: 'Checkout' uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 with: submodules: 'recursive' - name: 'Install uv and set the python version' uses: 'astral-sh/setup-uv@557e51de59eb14aaaba2ed9621916900a91d50c6' # v6 with: python-version: '3.12' - name: 'Set up Node.js 20.x' uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4 with: node-version: '20.x' cache: 'npm' cache-dependency-path: 'package-lock.json' registry-url: 'https://registry.npmjs.org/' - name: 'Configure npm for rate limiting' run: |- npm config set fetch-retry-mintimeout 20000 npm config set fetch-retry-maxtimeout 120000 npm config set fetch-retries 5 npm config set fetch-timeout 300000 - name: 'Install dependencies' run: |- npm ci --prefer-offline --no-audit --progress=false - name: 'Build project' run: |- npm run build - name: 'Run Terminal Bench Oracle (task: ${{ matrix.task_id }})' run: 'npm run test:terminal-bench:oracle' timeout-minutes: 30 env: CI: 'true' NODE_ENV: 'test' VERBOSE: 'true' KEEP_OUTPUT: 'true' TB_TASK_ID: '${{ matrix.task_id }}' TB_TIMEOUT_MINUTES: '30' - name: 'Run Terminal Bench Qwen (task: ${{ matrix.task_id }})' run: 'npm run test:terminal-bench:qwen' timeout-minutes: 30 env: OPENAI_API_KEY: '${{ secrets.OPENAI_API_KEY }}' OPENAI_BASE_URL: '${{ secrets.OPENAI_BASE_URL }}' OPENAI_MODEL: '${{ secrets.OPENAI_MODEL }}' CI: 'true' NODE_ENV: 'test' VERBOSE: 'true' KEEP_OUTPUT: 'true' TB_TASK_ID: '${{ matrix.task_id }}' TB_TIMEOUT_MINUTES: '30' - name: 'Upload test artifacts' if: 'always()' uses: 'actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b' # ratchet:actions/upload-artifact@v4 with: name: 'terminal-bench-${{ matrix.task_id }}-output' path: | .integration-tests/** !.integration-tests/**/*.lock !.integration-tests/**/tb.lock integration-tests/**/*.log if-no-files-found: 'warn' retention-days: 7