change(ci): slice files in matrix job

avoid duplicating work, avoid file discovery on each job
This commit is contained in:
ethernet 2026-06-26 21:52:44 -04:00
parent 1a75387fa8
commit dd0e4ab81a
2 changed files with 144 additions and 92 deletions

View file

@ -21,25 +21,7 @@ jobs:
name: "Generate slices"
runs-on: ubuntu-latest
outputs:
slices: ${{ steps.matrix.outputs.slices }}
slice_count: ${{ steps.matrix.outputs.slice_count }}
steps:
- name: Generate test slices
id: matrix
run: |
COUNT="${{ inputs.slice_count }}"
SLICES=$(python3 -c "import json; print(json.dumps({'slice': list(range(1, $COUNT + 1))}))")
echo "slices=$SLICES" >> "$GITHUB_OUTPUT"
echo "slice_count=$COUNT" >> "$GITHUB_OUTPUT"
test:
name: Run tests slice
needs: generate
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.generate.outputs.slices) }}
matrix: ${{ steps.matrix.outputs.matrix }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@ -48,13 +30,26 @@ jobs:
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: test_durations.json
# main always writes a new suffix, but jobs pick the latest one with the same prefix
# quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
# If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
# If there are no exact matches, the action searches for partial matches of the restore keys.
# When the action finds a partial match, the most recent cache is restored to the path directory.
key: test-durations
- name: Generate test slices
id: matrix
run: |
MATRIX=$(python3 scripts/run_tests_parallel.py --generate-slices ${{ inputs.slice_count }})
echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
test:
name: Run tests slice ${{ matrix.slice.index }}/${{ inputs.slice_count }}
needs: generate
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.generate.outputs.matrix) }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install ripgrep (prebuilt binary)
run: |
set -euo pipefail
@ -99,33 +94,19 @@ jobs:
# re-download, keeping the persisted cache small and fast to restore.
run: uv cache prune --ci
- name: Run tests (slice ${{ matrix.slice }}/${{ needs.generate.outputs.slice_count }})
# Per-file isolation via scripts/run_tests.sh: discovers
# every test_*.py file under tests/ (excluding integration/ + e2e/),
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
- name: Run tests (slice ${{ matrix.slice.index }}/${{ inputs.slice_count }})
# Per-file isolation via scripts/run_tests.sh: each test file runs
# in its own freshly-spawned `python -m pytest <file>` subprocess
# with bounded parallelism. No xdist, no shared workers, no
# module-level state leakage between files.
#
# Why per-file (not per-test): per-test spawn cost (~250ms × 17k
# tests = 70min CPU minimum) blew the wall-clock budget. Per-file
# spawn (~250ms × ~850 files = ~3.5min) fits while still giving
# every file a fresh interpreter — the only isolation boundary
# that matters in practice (cross-file leakage was the original
# flake source; intra-file is the test author's responsibility).
#
# Why drop xdist entirely: xdist's persistent workers accumulate
# state across files, which is exactly the leakage we wanted to
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
# the job with cleaner semantics.
#
# Matrix slicing (--slice I/N): files are distributed across N
# jobs by cached duration (LPT algorithm) so each job gets
# roughly equal wall time. Without a cache, files default to 2s
# estimate and get split roughly evenly by count — still correct,
# just not perfectly balanced.
# File list is pre-computed by the generate job (--generate-slices)
# which runs LPT distribution once and passes the file list to each
# matrix job via --files. Previously each job re-discovered files and
# re-ran LPT independently — redundant N times.
run: |
source .venv/bin/activate
scripts/run_tests.sh --slice ${{ matrix.slice }}/${{ needs.generate.outputs.slice_count }}
scripts/run_tests.sh --files '${{ matrix.slice.files }}'
env:
# Ensure tests don't accidentally call real APIs
OPENROUTER_API_KEY: ""
@ -135,7 +116,7 @@ jobs:
- name: Upload per-slice durations
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: test-durations-slice-${{ matrix.slice }}
name: test-durations-slice-${{ matrix.slice.index }}
path: test_durations.json
retention-days: 1