mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-19 09:33:53 +00:00
* integrate terminal bench * fix ci * add ci * fix ci * fix ci * parallel ci * handle timeout * fix lint * trigger
97 lines
2.9 KiB
YAML
97 lines
2.9 KiB
YAML
name: 'Terminal Bench Tests'
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- 'feat/tbench*'
|
|
workflow_dispatch:
|
|
inputs:
|
|
version:
|
|
description: 'The version to test.'
|
|
required: true
|
|
type: 'string'
|
|
default: 'latest'
|
|
release:
|
|
types: ['published']
|
|
|
|
jobs:
|
|
terminal-bench:
|
|
name: 'Terminal Bench (Task: ${{ matrix.task_id }})'
|
|
runs-on: 'ubuntu-latest'
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
task_id:
|
|
- 'hello-world'
|
|
- 'swe-bench-astropy-1'
|
|
steps:
|
|
- name: 'Checkout'
|
|
uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
|
|
with:
|
|
submodules: 'recursive'
|
|
- name: 'Install uv and set the python version'
|
|
uses: 'astral-sh/setup-uv@557e51de59eb14aaaba2ed9621916900a91d50c6' # v6
|
|
with:
|
|
python-version: '3.12'
|
|
|
|
- name: 'Set up Node.js 20.x'
|
|
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4
|
|
with:
|
|
node-version: '20.x'
|
|
cache: 'npm'
|
|
cache-dependency-path: 'package-lock.json'
|
|
registry-url: 'https://registry.npmjs.org/'
|
|
|
|
- name: 'Configure npm for rate limiting'
|
|
run: |-
|
|
npm config set fetch-retry-mintimeout 20000
|
|
npm config set fetch-retry-maxtimeout 120000
|
|
npm config set fetch-retries 5
|
|
npm config set fetch-timeout 300000
|
|
|
|
- name: 'Install dependencies'
|
|
run: |-
|
|
npm ci --prefer-offline --no-audit --progress=false
|
|
|
|
- name: 'Build project'
|
|
run: |-
|
|
npm run build
|
|
|
|
- name: 'Run Terminal Bench Oracle (task: ${{ matrix.task_id }})'
|
|
run: 'npm run test:terminal-bench:oracle'
|
|
timeout-minutes: 30
|
|
env:
|
|
CI: 'true'
|
|
NODE_ENV: 'test'
|
|
VERBOSE: 'true'
|
|
KEEP_OUTPUT: 'true'
|
|
TB_TASK_ID: '${{ matrix.task_id }}'
|
|
TB_TIMEOUT_MINUTES: '30'
|
|
|
|
- name: 'Run Terminal Bench Qwen (task: ${{ matrix.task_id }})'
|
|
run: 'npm run test:terminal-bench:qwen'
|
|
timeout-minutes: 30
|
|
env:
|
|
OPENAI_API_KEY: '${{ secrets.OPENAI_API_KEY }}'
|
|
OPENAI_BASE_URL: '${{ secrets.OPENAI_BASE_URL }}'
|
|
OPENAI_MODEL: '${{ secrets.OPENAI_MODEL }}'
|
|
CI: 'true'
|
|
NODE_ENV: 'test'
|
|
VERBOSE: 'true'
|
|
KEEP_OUTPUT: 'true'
|
|
TB_TASK_ID: '${{ matrix.task_id }}'
|
|
TB_TIMEOUT_MINUTES: '30'
|
|
|
|
- name: 'Upload test artifacts'
|
|
if: 'always()'
|
|
uses: 'actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b' # ratchet:actions/upload-artifact@v4
|
|
with:
|
|
name: 'terminal-bench-${{ matrix.task_id }}-output'
|
|
path: |
|
|
.integration-tests/**
|
|
!.integration-tests/**/*.lock
|
|
!.integration-tests/**/tb.lock
|
|
integration-tests/**/*.log
|
|
if-no-files-found: 'warn'
|
|
retention-days: 7
|