mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
change(ci): slice files in matrix job
avoid duplicating work, avoid file discovery on each job
This commit is contained in:
parent
1a75387fa8
commit
dd0e4ab81a
2 changed files with 144 additions and 92 deletions
75
.github/workflows/tests.yml
vendored
75
.github/workflows/tests.yml
vendored
|
|
@ -21,25 +21,7 @@ jobs:
|
|||
name: "Generate slices"
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
slices: ${{ steps.matrix.outputs.slices }}
|
||||
slice_count: ${{ steps.matrix.outputs.slice_count }}
|
||||
steps:
|
||||
- name: Generate test slices
|
||||
id: matrix
|
||||
run: |
|
||||
COUNT="${{ inputs.slice_count }}"
|
||||
SLICES=$(python3 -c "import json; print(json.dumps({'slice': list(range(1, $COUNT + 1))}))")
|
||||
echo "slices=$SLICES" >> "$GITHUB_OUTPUT"
|
||||
echo "slice_count=$COUNT" >> "$GITHUB_OUTPUT"
|
||||
|
||||
test:
|
||||
name: Run tests slice
|
||||
needs: generate
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJSON(needs.generate.outputs.slices) }}
|
||||
matrix: ${{ steps.matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
|
@ -48,13 +30,26 @@ jobs:
|
|||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: test_durations.json
|
||||
# main always writes a new suffix, but jobs pick the latest one with the same prefix
|
||||
# quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
|
||||
# If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
|
||||
# If there are no exact matches, the action searches for partial matches of the restore keys.
|
||||
# When the action finds a partial match, the most recent cache is restored to the path directory.
|
||||
key: test-durations
|
||||
|
||||
- name: Generate test slices
|
||||
id: matrix
|
||||
run: |
|
||||
MATRIX=$(python3 scripts/run_tests_parallel.py --generate-slices ${{ inputs.slice_count }})
|
||||
echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
|
||||
|
||||
test:
|
||||
name: Run tests slice ${{ matrix.slice.index }}/${{ inputs.slice_count }}
|
||||
needs: generate
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJSON(needs.generate.outputs.matrix) }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
|
@ -99,33 +94,19 @@ jobs:
|
|||
# re-download, keeping the persisted cache small and fast to restore.
|
||||
run: uv cache prune --ci
|
||||
|
||||
- name: Run tests (slice ${{ matrix.slice }}/${{ needs.generate.outputs.slice_count }})
|
||||
# Per-file isolation via scripts/run_tests.sh: discovers
|
||||
# every test_*.py file under tests/ (excluding integration/ + e2e/),
|
||||
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
|
||||
- name: Run tests (slice ${{ matrix.slice.index }}/${{ inputs.slice_count }})
|
||||
# Per-file isolation via scripts/run_tests.sh: each test file runs
|
||||
# in its own freshly-spawned `python -m pytest <file>` subprocess
|
||||
# with bounded parallelism. No xdist, no shared workers, no
|
||||
# module-level state leakage between files.
|
||||
#
|
||||
# Why per-file (not per-test): per-test spawn cost (~250ms × 17k
|
||||
# tests = 70min CPU minimum) blew the wall-clock budget. Per-file
|
||||
# spawn (~250ms × ~850 files = ~3.5min) fits while still giving
|
||||
# every file a fresh interpreter — the only isolation boundary
|
||||
# that matters in practice (cross-file leakage was the original
|
||||
# flake source; intra-file is the test author's responsibility).
|
||||
#
|
||||
# Why drop xdist entirely: xdist's persistent workers accumulate
|
||||
# state across files, which is exactly the leakage we wanted to
|
||||
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
|
||||
# the job with cleaner semantics.
|
||||
#
|
||||
# Matrix slicing (--slice I/N): files are distributed across N
|
||||
# jobs by cached duration (LPT algorithm) so each job gets
|
||||
# roughly equal wall time. Without a cache, files default to 2s
|
||||
# estimate and get split roughly evenly by count — still correct,
|
||||
# just not perfectly balanced.
|
||||
# File list is pre-computed by the generate job (--generate-slices)
|
||||
# which runs LPT distribution once and passes the file list to each
|
||||
# matrix job via --files. Previously each job re-discovered files and
|
||||
# re-ran LPT independently — redundant N times.
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
scripts/run_tests.sh --slice ${{ matrix.slice }}/${{ needs.generate.outputs.slice_count }}
|
||||
scripts/run_tests.sh --files '${{ matrix.slice.files }}'
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
|
|
@ -135,7 +116,7 @@ jobs:
|
|||
- name: Upload per-slice durations
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: test-durations-slice-${{ matrix.slice }}
|
||||
name: test-durations-slice-${{ matrix.slice.index }}
|
||||
path: test_durations.json
|
||||
retention-days: 1
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue