mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
change(ci): slice files in matrix job
avoid duplicating work, avoid file discovery on each job
This commit is contained in:
parent
1a75387fa8
commit
dd0e4ab81a
2 changed files with 144 additions and 92 deletions
75
.github/workflows/tests.yml
vendored
75
.github/workflows/tests.yml
vendored
|
|
@ -21,25 +21,7 @@ jobs:
|
|||
name: "Generate slices"
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
slices: ${{ steps.matrix.outputs.slices }}
|
||||
slice_count: ${{ steps.matrix.outputs.slice_count }}
|
||||
steps:
|
||||
- name: Generate test slices
|
||||
id: matrix
|
||||
run: |
|
||||
COUNT="${{ inputs.slice_count }}"
|
||||
SLICES=$(python3 -c "import json; print(json.dumps({'slice': list(range(1, $COUNT + 1))}))")
|
||||
echo "slices=$SLICES" >> "$GITHUB_OUTPUT"
|
||||
echo "slice_count=$COUNT" >> "$GITHUB_OUTPUT"
|
||||
|
||||
test:
|
||||
name: Run tests slice
|
||||
needs: generate
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJSON(needs.generate.outputs.slices) }}
|
||||
matrix: ${{ steps.matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
|
@ -48,13 +30,26 @@ jobs:
|
|||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: test_durations.json
|
||||
# main always writes a new suffix, but jobs pick the latest one with the same prefix
|
||||
# quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
|
||||
# If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
|
||||
# If there are no exact matches, the action searches for partial matches of the restore keys.
|
||||
# When the action finds a partial match, the most recent cache is restored to the path directory.
|
||||
key: test-durations
|
||||
|
||||
- name: Generate test slices
|
||||
id: matrix
|
||||
run: |
|
||||
MATRIX=$(python3 scripts/run_tests_parallel.py --generate-slices ${{ inputs.slice_count }})
|
||||
echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
|
||||
|
||||
test:
|
||||
name: Run tests slice ${{ matrix.slice.index }}/${{ inputs.slice_count }}
|
||||
needs: generate
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJSON(needs.generate.outputs.matrix) }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
|
@ -99,33 +94,19 @@ jobs:
|
|||
# re-download, keeping the persisted cache small and fast to restore.
|
||||
run: uv cache prune --ci
|
||||
|
||||
- name: Run tests (slice ${{ matrix.slice }}/${{ needs.generate.outputs.slice_count }})
|
||||
# Per-file isolation via scripts/run_tests.sh: discovers
|
||||
# every test_*.py file under tests/ (excluding integration/ + e2e/),
|
||||
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
|
||||
- name: Run tests (slice ${{ matrix.slice.index }}/${{ inputs.slice_count }})
|
||||
# Per-file isolation via scripts/run_tests.sh: each test file runs
|
||||
# in its own freshly-spawned `python -m pytest <file>` subprocess
|
||||
# with bounded parallelism. No xdist, no shared workers, no
|
||||
# module-level state leakage between files.
|
||||
#
|
||||
# Why per-file (not per-test): per-test spawn cost (~250ms × 17k
|
||||
# tests = 70min CPU minimum) blew the wall-clock budget. Per-file
|
||||
# spawn (~250ms × ~850 files = ~3.5min) fits while still giving
|
||||
# every file a fresh interpreter — the only isolation boundary
|
||||
# that matters in practice (cross-file leakage was the original
|
||||
# flake source; intra-file is the test author's responsibility).
|
||||
#
|
||||
# Why drop xdist entirely: xdist's persistent workers accumulate
|
||||
# state across files, which is exactly the leakage we wanted to
|
||||
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
|
||||
# the job with cleaner semantics.
|
||||
#
|
||||
# Matrix slicing (--slice I/N): files are distributed across N
|
||||
# jobs by cached duration (LPT algorithm) so each job gets
|
||||
# roughly equal wall time. Without a cache, files default to 2s
|
||||
# estimate and get split roughly evenly by count — still correct,
|
||||
# just not perfectly balanced.
|
||||
# File list is pre-computed by the generate job (--generate-slices)
|
||||
# which runs LPT distribution once and passes the file list to each
|
||||
# matrix job via --files. Previously each job re-discovered files and
|
||||
# re-ran LPT independently — redundant N times.
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
scripts/run_tests.sh --slice ${{ matrix.slice }}/${{ needs.generate.outputs.slice_count }}
|
||||
scripts/run_tests.sh --files '${{ matrix.slice.files }}'
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
|
|
@ -135,7 +116,7 @@ jobs:
|
|||
- name: Upload per-slice durations
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: test-durations-slice-${{ matrix.slice }}
|
||||
name: test-durations-slice-${{ matrix.slice.index }}
|
||||
path: test_durations.json
|
||||
retention-days: 1
|
||||
|
||||
|
|
|
|||
|
|
@ -493,38 +493,27 @@ def _save_durations(
|
|||
path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")
|
||||
|
||||
|
||||
def _slice_files(
|
||||
def _compute_lpt_slices(
|
||||
files: List[Path],
|
||||
slice_index: int,
|
||||
slice_count: int,
|
||||
durations: dict[str, float],
|
||||
repo_root: Path,
|
||||
) -> List[Path]:
|
||||
"""Return the subset of *files* belonging to slice *slice_index*.
|
||||
) -> List[List[Path]]:
|
||||
"""Distribute files across N slices using LPT (Longest Processing Time first).
|
||||
|
||||
Uses **Longest Processing Time first** (LPT) distribution: sort files
|
||||
by estimated duration descending, then greedily assign each file to
|
||||
the slice with the smallest accumulated time so far. This minimizes
|
||||
the makespan (max slice duration) and keeps CI jobs balanced.
|
||||
Sorts files by estimated duration descending, then greedily assigns each
|
||||
file to the slice with the smallest accumulated time so far. This
|
||||
minimizes the makespan (max slice duration) and keeps CI jobs balanced.
|
||||
|
||||
Files with no cached duration get a default estimate of 2.0s (roughly
|
||||
the P50 from profiling). This means first-time ``--slice`` runs
|
||||
(no cache) still get reasonable distribution, and new files don't
|
||||
all land in one slice.
|
||||
the P50 from profiling). This means first-time runs (no cache) still
|
||||
get reasonable distribution, and new files don't all land in one slice.
|
||||
|
||||
``slice_index`` is 1-indexed (1..slice_count) for ergonomics —
|
||||
``--slice 1/4`` reads more naturally than ``--slice 0/4``.
|
||||
Returns a list of N file-lists, one per slice (0-indexed).
|
||||
"""
|
||||
if slice_count < 2:
|
||||
return files
|
||||
if not (1 <= slice_index <= slice_count):
|
||||
print(
|
||||
f"error: --slice index must be 1..{slice_count}, got {slice_index}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(2)
|
||||
return [files]
|
||||
|
||||
# Build (file, estimated_duration) pairs.
|
||||
default_dur = 2.0
|
||||
file_durs: List[Tuple[Path, float]] = []
|
||||
for f in files:
|
||||
|
|
@ -541,15 +530,47 @@ def _slice_files(
|
|||
bucket_totals: List[float] = [0.0] * slice_count
|
||||
|
||||
for f, dur in file_durs:
|
||||
# Find the least-loaded bucket.
|
||||
min_idx = min(range(slice_count), key=lambda i: bucket_totals[i])
|
||||
bucket_files[min_idx].append(f)
|
||||
bucket_totals[min_idx] += dur
|
||||
|
||||
# Print slice summary for visibility.
|
||||
return bucket_files
|
||||
|
||||
|
||||
def _slice_files(
|
||||
files: List[Path],
|
||||
slice_index: int,
|
||||
slice_count: int,
|
||||
durations: dict[str, float],
|
||||
repo_root: Path,
|
||||
) -> List[Path]:
|
||||
"""Return the subset of *files* belonging to slice *slice_index*.
|
||||
|
||||
Uses :func:`_compute_lpt_slices` for LPT distribution.
|
||||
|
||||
``slice_index`` is 1-indexed (1..slice_count) for ergonomics —
|
||||
``--slice 1/4`` reads more naturally than ``--slice 0/4``.
|
||||
"""
|
||||
if slice_count < 2:
|
||||
return files
|
||||
if not (1 <= slice_index <= slice_count):
|
||||
print(
|
||||
f"error: --slice index must be 1..{slice_count}, got {slice_index}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
bucket_files = _compute_lpt_slices(files, slice_count, durations, repo_root)
|
||||
|
||||
target = bucket_files[slice_index - 1]
|
||||
target_dur = bucket_totals[slice_index - 1]
|
||||
total_dur = sum(bucket_totals)
|
||||
target_dur = sum(
|
||||
durations.get(_format_file(f, repo_root), 2.0) for f in target
|
||||
)
|
||||
total_dur = sum(
|
||||
durations.get(_format_file(f, repo_root), 2.0)
|
||||
for bucket in bucket_files
|
||||
for f in bucket
|
||||
)
|
||||
print(
|
||||
f"Slice {slice_index}/{slice_count}: {len(target)} files "
|
||||
f"(~{target_dur:.0f}s estimated of {total_dur:.0f}s total)",
|
||||
|
|
@ -604,6 +625,27 @@ def main() -> int:
|
|||
"Env: HERMES_TEST_SLICE (format: I/N)."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--generate-slices",
|
||||
metavar="N",
|
||||
type=int,
|
||||
help=(
|
||||
"Discover test files, distribute them across N slices using "
|
||||
"LPT on cached durations, and print a JSON matrix to stdout "
|
||||
"then exit (no tests run). The JSON has the shape "
|
||||
"'{\"slices\": [{\"index\": 1, \"files\": [\"tests/foo.py\", ...]}, ...]}' "
|
||||
"so the CI generate job can feed it directly into a matrix."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--files",
|
||||
metavar="LIST",
|
||||
help=(
|
||||
"Explicit colon-separated list of test files to run. Bypasses "
|
||||
"discovery entirely — used by CI matrix jobs that receive their "
|
||||
"file list from the generate job."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"paths_positional",
|
||||
nargs="*",
|
||||
|
|
@ -642,26 +684,48 @@ def main() -> int:
|
|||
|
||||
repo_root = Path(__file__).resolve().parent.parent
|
||||
|
||||
# Resolve discovery roots: positional path args override --paths if any
|
||||
# were supplied, otherwise --paths (which itself defaults to 'tests').
|
||||
if args.paths_positional:
|
||||
# Positionals can be directories OR explicit .py files. Either is
|
||||
# fine — _discover_files handles both via rglob('test_*.py') for
|
||||
# dirs and direct inclusion for files.
|
||||
roots = [repo_root / p for p in args.paths_positional]
|
||||
# --files: explicit file list from the CI generate job — skip discovery.
|
||||
if args.files:
|
||||
files = [repo_root / f for f in args.files.split(":") if f.strip()]
|
||||
roots = []
|
||||
else:
|
||||
roots = [repo_root / p for p in args.paths.split(":") if p]
|
||||
# Resolve discovery roots: positional path args override --paths if any
|
||||
# were supplied, otherwise --paths (which itself defaults to 'tests').
|
||||
if args.paths_positional:
|
||||
roots = [repo_root / p for p in args.paths_positional]
|
||||
else:
|
||||
roots = [repo_root / p for p in args.paths.split(":") if p]
|
||||
|
||||
if args.include_integration:
|
||||
# Caller takes responsibility — typically used via explicit -k filter.
|
||||
global _SKIP_PARTS # noqa: PLW0603 — config knob
|
||||
_SKIP_PARTS = set()
|
||||
if args.include_integration:
|
||||
# Caller takes responsibility — typically used via explicit -k filter.
|
||||
global _SKIP_PARTS # noqa: PLW0603 — config knob
|
||||
_SKIP_PARTS = set()
|
||||
|
||||
files = _discover_files(roots)
|
||||
|
||||
files = _discover_files(roots)
|
||||
if not files:
|
||||
print(f"No test files discovered under {[str(r) for r in roots]}", file=sys.stderr)
|
||||
print(f"No test files to run", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# --generate-slices: compute LPT distribution and emit JSON, then exit.
|
||||
if args.generate_slices is not None:
|
||||
durations = _load_durations(repo_root)
|
||||
slices = _compute_lpt_slices(
|
||||
files, args.generate_slices, durations, repo_root
|
||||
)
|
||||
matrix = {
|
||||
"slice": [
|
||||
{
|
||||
"index": i + 1,
|
||||
"files": ":".join(_format_file(f, repo_root) for f in bucket),
|
||||
}
|
||||
for i, bucket in enumerate(slices)
|
||||
]
|
||||
}
|
||||
# Print to stdout so the CI step can capture it with $().
|
||||
print(json.dumps(matrix))
|
||||
return 0
|
||||
|
||||
# Count individual tests per file
|
||||
test_counts = _approximately_count_tests(files, repo_root)
|
||||
approx_total_tests = sum(test_counts.values())
|
||||
|
|
@ -675,12 +739,19 @@ def main() -> int:
|
|||
test_counts = {f: test_counts[f] for f in files if f in test_counts}
|
||||
approx_total_tests = sum(test_counts.values())
|
||||
|
||||
print(
|
||||
f"Discovered {len(files)} test files (~{approx_total_tests} tests) under "
|
||||
f"{[str(r.relative_to(repo_root)) if r.is_relative_to(repo_root) else str(r) for r in roots]}; "
|
||||
f"running with -j {args.jobs}",
|
||||
flush=True,
|
||||
)
|
||||
if roots:
|
||||
roots_str = [str(r.relative_to(repo_root)) if r.is_relative_to(repo_root) else str(r) for r in roots]
|
||||
print(
|
||||
f"Discovered {len(files)} test files (~{approx_total_tests} tests) under "
|
||||
f"{roots_str}; running with -j {args.jobs}",
|
||||
flush=True,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"Running {len(files)} test files (~{approx_total_tests} tests) "
|
||||
f"with -j {args.jobs}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
# Capture and print on completion (out-of-order is fine — keeps the
|
||||
# terminal clean rather than interleaving N parallel pytest outputs).
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue