From 45540cfb5ef1e30c71d46166a171d88101e8fcb7 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Fri, 19 Jun 2026 16:46:11 -0500 Subject: [PATCH] ci: run only the lanes a PR affects (python/frontend/site) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Heavy PR checks run on every PR because the workflows deliberately avoid `on.paths` filters — a path-gated workflow leaves its required check pending forever when no matching file changes, blocking merge. So a docs-only PR still spins up the TypeScript matrix, the full Python suite, and ruff/ty. Keep every workflow triggering on every PR (checks always report) but gate the expensive *steps* on what the PR touches. Skipping a step (not the job) leaves the job green, so required checks never hang — the same idiom already proven in contributor-check.yml. A classifier (scripts/ci/classify_changes.py) maps the PR diff to three lanes — python, frontend, site — surfaced as step outputs by a composite action (.github/actions/detect-changes). Fail-open: an empty diff or any .github/ change runs everything; python is a denylist (skipped only when every file is provably prose or a frontend-only package); skills/**/SKILL.md counts as python-relevant since the skill-doc tests read that tree. Non-PR events always run the full pipeline. --- .github/actions/detect-changes/action.yml | 48 ++++++++++++++++ .github/workflows/docs-site-checks.yml | 17 ++++++ .github/workflows/lint.yml | 33 ++++++++++- .github/workflows/tests.yml | 30 ++++++++++ .github/workflows/typecheck.yml | 25 +++++++-- scripts/ci/classify_changes.py | 68 +++++++++++++++++++++++ tests/ci/test_classify_changes.py | 56 +++++++++++++++++++ 7 files changed, 272 insertions(+), 5 deletions(-) create mode 100644 .github/actions/detect-changes/action.yml create mode 100644 scripts/ci/classify_changes.py create mode 100644 tests/ci/test_classify_changes.py diff --git a/.github/actions/detect-changes/action.yml b/.github/actions/detect-changes/action.yml new file mode 100644 index 00000000000..6a67530d7f2 --- /dev/null +++ b/.github/actions/detect-changes/action.yml @@ -0,0 +1,48 @@ +name: Detect affected areas +description: >- + Classify a PR's changed files into CI work categories (python, frontend, + site) so heavy jobs can skip work they cannot be affected by. Outputs are + always "true" on push/dispatch events and fail open (everything "true") when + the diff cannot be computed — a skipped category must never be a false + negative. + +# The caller must check out the repo with `fetch-depth: 0` BEFORE using this +# action, so both the PR base and head commits are present for `git diff`. + +outputs: + python: + description: Run Python tests / ruff / ty / windows-footguns. + value: ${{ steps.classify.outputs.python }} + frontend: + description: Run the TypeScript typecheck matrix + desktop build. + value: ${{ steps.classify.outputs.frontend }} + site: + description: Build the Docusaurus docs site. + value: ${{ steps.classify.outputs.site }} + +runs: + using: composite + steps: + - name: Classify changed files + id: classify + shell: bash + env: + EVENT_NAME: ${{ github.event_name }} + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + # Only pull_request events are gated. Other events (push, release, + # dispatch) leave CHANGED empty, so the classifier fails open and every + # lane runs — post-merge / on-demand validation is never weakened. + if [ "$EVENT_NAME" = "pull_request" ]; then + # Three-dot diff = what the PR introduces vs its merge base, matching + # how a reviewer reads it. An uncomputable diff (shallow clone, etc.) + # yields an empty list, which the classifier also fails open on. + CHANGED="$(git diff --name-only "${BASE_SHA}...${HEAD_SHA}" || true)" + fi + echo "Changed files:" + printf '%s\n' "${CHANGED:-(none)}" + # Caller already checked out the repo, so the classifier is at its + # repo-relative path. It is the single source of the fail-open default. + printf '%s\n' "${CHANGED:-}" | python3 scripts/ci/classify_changes.py diff --git a/.github/workflows/docs-site-checks.yml b/.github/workflows/docs-site-checks.yml index 975028afe23..53f8dce93f0 100644 --- a/.github/workflows/docs-site-checks.yml +++ b/.github/workflows/docs-site-checks.yml @@ -17,34 +17,51 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 # full history so detect-changes can diff base...head + + # Skip the site build on PRs that touch nothing the docs site is built + # from (website/, skills/, optional-skills/). The job still reports green + # (only the steps below are skipped) so the required check never hangs. + - name: Detect affected areas + id: changes + uses: ./.github/actions/detect-changes - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 + if: steps.changes.outputs.site == 'true' with: node-version: 22 cache: npm cache-dependency-path: website/package-lock.json - name: Install website dependencies + if: steps.changes.outputs.site == 'true' run: npm ci working-directory: website - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + if: steps.changes.outputs.site == 'true' with: python-version: "3.11" - name: Install ascii-guard + if: steps.changes.outputs.site == 'true' run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3 - name: Extract skill metadata for dashboard + if: steps.changes.outputs.site == 'true' run: python3 website/scripts/extract-skills.py - name: Regenerate per-skill docs pages + catalogs + if: steps.changes.outputs.site == 'true' run: python3 website/scripts/generate-skill-docs.py - name: Lint docs diagrams + if: steps.changes.outputs.site == 'true' run: npm run lint:diagrams working-directory: website - name: Build Docusaurus + if: steps.changes.outputs.site == 'true' run: npm run build working-directory: website diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f2765823a0b..30e0ca68f8e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -41,16 +41,26 @@ jobs: with: fetch-depth: 0 # need full history for merge-base + worktree + # Skip linting on PRs with no Python changes. The job still reports + # green (only the steps below are skipped) so the required check never + # hangs the way an `on.paths` filter would. + - name: Detect affected areas + id: changes + uses: ./.github/actions/detect-changes + - name: Install uv + if: steps.changes.outputs.python == 'true' uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5 - name: Install ruff + ty + if: steps.changes.outputs.python == 'true' run: | uv tool install ruff uv tool install ty - name: Determine base ref id: base + if: steps.changes.outputs.python == 'true' run: | # For PRs, diff against the merge base with the target branch. # For pushes to main, diff against the previous commit on main. @@ -67,6 +77,7 @@ jobs: echo "Base ref: ${BASE_REF}" - name: Run ruff + ty on HEAD + if: steps.changes.outputs.python == 'true' run: | mkdir -p .lint-reports/head ruff check --output-format json --exit-zero \ @@ -77,6 +88,7 @@ jobs: echo "HEAD ty: $(wc -c < .lint-reports/head/ty.json) bytes" - name: Run ruff + ty on base (via git worktree) + if: steps.changes.outputs.python == 'true' run: | mkdir -p .lint-reports/base # Use a worktree so we don't clobber the main checkout. If the basex @@ -103,6 +115,7 @@ jobs: echo "base ty: $(wc -c < .lint-reports/base/ty.json) bytes" - name: Generate diff summary + if: steps.changes.outputs.python == 'true' run: | python scripts/lint_diff.py \ --base-ruff .lint-reports/base/ruff.json \ @@ -115,6 +128,7 @@ jobs: cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY" - name: Upload reports as artifact + if: steps.changes.outputs.python == 'true' uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: lint-reports @@ -122,7 +136,7 @@ jobs: retention-days: 14 - name: Post / update PR comment - if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository + if: steps.changes.outputs.python == 'true' && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository continue-on-error: true uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7 with: @@ -167,14 +181,23 @@ jobs: steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 # full history so detect-changes can diff base...head + + - name: Detect affected areas + id: changes + uses: ./.github/actions/detect-changes - name: Install uv + if: steps.changes.outputs.python == 'true' uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5 - name: Install ruff + if: steps.changes.outputs.python == 'true' run: uv tool install ruff - name: ruff check . + if: steps.changes.outputs.python == 'true' # No --exit-zero, no || true. Exit code propagates to the job, # which propagates to the required-check gate. run: | @@ -191,11 +214,19 @@ jobs: steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 # full history so detect-changes can diff base...head + + - name: Detect affected areas + id: changes + uses: ./.github/actions/detect-changes - name: Set up Python + if: steps.changes.outputs.python == 'true' uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v5 with: python-version: "3.11" - name: Run footgun checker + if: steps.changes.outputs.python == 'true' run: python scripts/check-windows-footguns.py --all diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c1f59c5094a..c4dae1166dd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,8 +31,18 @@ jobs: steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 # full history so detect-changes can diff base...head + + # On PRs that touch no Python, every step below is skipped and the job + # reports green. The check still runs (no `on.paths` filter), so the + # required status never hangs. + - name: Detect affected areas + id: changes + uses: ./.github/actions/detect-changes - name: Restore duration cache + if: steps.changes.outputs.python == 'true' uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: path: test_durations.json @@ -44,6 +54,7 @@ jobs: key: test-durations - name: Install ripgrep (prebuilt binary) + if: steps.changes.outputs.python == 'true' run: | set -euo pipefail RG_VERSION=15.1.0 @@ -58,6 +69,7 @@ jobs: rg --version - name: Install uv + if: steps.changes.outputs.python == 'true' uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5 with: # Persist uv's download/wheel cache (~/.cache/uv) across runs. @@ -71,9 +83,11 @@ jobs: uv.lock - name: Set up Python 3.11 + if: steps.changes.outputs.python == 'true' run: uv python install 3.11 - name: Install dependencies + if: steps.changes.outputs.python == 'true' # `uv sync --locked` installs the exact pinned set from uv.lock (and # fails if the lock is out of sync with pyproject.toml), giving a # reproducible env. It also creates .venv itself, so no separate @@ -81,11 +95,13 @@ jobs: run: uv sync --locked --python 3.11 --extra all --extra dev - name: Minimize uv cache + if: steps.changes.outputs.python == 'true' # Optimized for CI: prunes pre-built wheels that are cheap to # re-download, keeping the persisted cache small and fast to restore. run: uv cache prune --ci - name: Run tests (slice ${{ matrix.slice }}/6) + if: steps.changes.outputs.python == 'true' # Per-file isolation via scripts/run_tests_parallel.py: discovers # every test_*.py file under tests/ (excluding integration/ + e2e/), # then runs `python -m pytest ` in a freshly-spawned subprocess @@ -119,6 +135,7 @@ jobs: NOUS_API_KEY: "" - name: Upload per-slice durations + if: steps.changes.outputs.python == 'true' uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: test-durations-slice-${{ matrix.slice }} @@ -164,8 +181,15 @@ jobs: steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 # full history so detect-changes can diff base...head + + - name: Detect affected areas + id: changes + uses: ./.github/actions/detect-changes - name: Install ripgrep (prebuilt binary) + if: steps.changes.outputs.python == 'true' run: | set -euo pipefail RG_VERSION=15.1.0 @@ -180,6 +204,7 @@ jobs: rg --version - name: Install uv + if: steps.changes.outputs.python == 'true' uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5 with: # Persist uv's download/wheel cache (~/.cache/uv) across runs. @@ -193,9 +218,11 @@ jobs: uv.lock - name: Set up Python 3.11 + if: steps.changes.outputs.python == 'true' run: uv python install 3.11 - name: Install dependencies + if: steps.changes.outputs.python == 'true' # `uv sync --locked` installs the exact pinned set from uv.lock (and # fails if the lock is out of sync with pyproject.toml), giving a # reproducible env. It also creates .venv itself, so no separate @@ -203,16 +230,19 @@ jobs: run: uv sync --locked --python 3.11 --extra all --extra dev - name: Minimize uv cache + if: steps.changes.outputs.python == 'true' # Optimized for CI: prunes pre-built wheels that are cheap to # re-download, keeping the persisted cache small and fast to restore. run: uv cache prune --ci - name: Packaged-wheel i18n smoke test + if: steps.changes.outputs.python == 'true' run: | source .venv/bin/activate python -m pytest -m integration tests/test_wheel_locales_e2e.py -v - name: Run e2e tests + if: steps.changes.outputs.python == 'true' run: | source .venv/bin/activate python -m pytest tests/e2e/ -v --tb=short diff --git a/.github/workflows/typecheck.yml b/.github/workflows/typecheck.yml index 29994e3e295..aeb7c35cdc8 100644 --- a/.github/workflows/typecheck.yml +++ b/.github/workflows/typecheck.yml @@ -20,12 +20,22 @@ jobs: fail-fast: false # report all failures, not just the first one steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 # full history so detect-changes can diff base...head + # Skip the install + typecheck on PRs that touch no TypeScript. The job + # still runs and reports green (only the steps below are skipped), so the + # required check never hangs the way an `on.paths` filter would. + - id: changes + uses: ./.github/actions/detect-changes - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 + if: steps.changes.outputs.frontend == 'true' with: node-version: 22 cache: npm - - run: npm ci - - run: npm run --prefix ${{ matrix.package }} typecheck + - if: steps.changes.outputs.frontend == 'true' + run: npm ci + - if: steps.changes.outputs.frontend == 'true' + run: npm run --prefix ${{ matrix.package }} typecheck # Production build of the desktop renderer. `typecheck` runs `tsc` only, # which does NOT exercise Vite/Rolldown module resolution — so an @@ -37,9 +47,16 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 # full history so detect-changes can diff base...head + - id: changes + uses: ./.github/actions/detect-changes - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 + if: steps.changes.outputs.frontend == 'true' with: node-version: 22 cache: npm - - run: npm ci - - run: npm run --prefix apps/desktop build + - if: steps.changes.outputs.frontend == 'true' + run: npm ci + - if: steps.changes.outputs.frontend == 'true' + run: npm run --prefix apps/desktop build diff --git a/scripts/ci/classify_changes.py b/scripts/ci/classify_changes.py new file mode 100644 index 00000000000..2c3c8b5cb3e --- /dev/null +++ b/scripts/ci/classify_changes.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +"""Classify a PR's changed files into CI work lanes. + +Reads newline-separated changed paths on stdin and writes ``key=value`` +booleans (one per lane) to ``$GITHUB_OUTPUT`` and stdout. The +``detect-changes`` composite action consumes them so steps gate on +``if: steps.changes.outputs. == 'true'``. + +Lanes: ``python`` (pytest / ruff / ty / footguns), ``frontend`` (TS typecheck +matrix + desktop build), ``site`` (Docusaurus + generated skill docs). Docker +is not a lane — it builds on push-to-main and release only, never per-PR. + +Contract — *fail open, never closed*. We may run a lane we didn't need, but +must never skip one a change could break: + +* An empty diff, or any ``.github/`` change, runs everything. +* ``python`` is a denylist: skipped only when *every* file is provably prose + or a frontend-only package; an unrecognized path keeps it on. +* ``skills/`` (incl. ``SKILL.md``) is python-relevant — the skill-doc tests + read that tree, so a doc-looking edit can still break Python. +""" + +from __future__ import annotations + +import os +import sys + +_FRONTEND = ("ui-tui/", "web/", "apps/") # TS typecheck-matrix packages +_ROOT_NPM = {"package.json", "package-lock.json"} # shifts every package's tree +_SITE = ("website/", "skills/", "optional-skills/") # docs site + skill pages +# Prose/frontend trees that can't touch Python. skills/ is excluded on purpose. +_PY_SKIP = ("docs/", "website/") + _FRONTEND + + +def _is_docs(p: str) -> bool: + if p.startswith(("skills/", "optional-skills/")): + return False + return p.endswith((".md", ".mdx")) or p.startswith("docs/") or p.startswith("LICENSE") + + +def _py_irrelevant(p: str) -> bool: + return _is_docs(p) or p in _ROOT_NPM or p.startswith(_PY_SKIP) + + +def classify(files: list[str]) -> dict[str, bool]: + """Map changed paths to ``{lane: should_run}``.""" + files = [f.strip() for f in files if f.strip()] + if not files or any(f.startswith(".github/") for f in files): + return dict.fromkeys(("python", "frontend", "site"), True) + return { + "python": any(not _py_irrelevant(f) for f in files), + "frontend": any(f.startswith(_FRONTEND) or f in _ROOT_NPM for f in files), + "site": any(f.startswith(_SITE) for f in files), + } + + +def main() -> int: + lanes = classify(sys.stdin.read().splitlines()) + out = "\n".join(f"{k}={str(v).lower()}" for k, v in lanes.items()) + if dest := os.environ.get("GITHUB_OUTPUT"): + with open(dest, "a", encoding="utf-8") as fh: + fh.write(out + "\n") + print(out) # echo for local runs + CI step logs + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/ci/test_classify_changes.py b/tests/ci/test_classify_changes.py new file mode 100644 index 00000000000..5a4b474c6af --- /dev/null +++ b/tests/ci/test_classify_changes.py @@ -0,0 +1,56 @@ +"""Contract tests for scripts/ci/classify_changes.py. + +Each case asserts the *relationship* between a changed-file set and the lanes +that must run — the safety contract of the gating, not a snapshot. Governing +invariant: fail open. We may run a lane we didn't need, never skip one a +change could have broken. +""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path + +import pytest + +_PATH = Path(__file__).resolve().parents[2] / "scripts" / "ci" / "classify_changes.py" +_spec = importlib.util.spec_from_file_location("classify_changes", _PATH) +_mod = importlib.util.module_from_spec(_spec) +_spec.loader.exec_module(_mod) +classify = _mod.classify + +ALL = {"python": True, "frontend": True, "site": True} + + +def _lanes(python=False, frontend=False, site=False) -> dict[str, bool]: + return {"python": python, "frontend": frontend, "site": site} + + +CASES = { + "docs-only → nothing heavy": (["README.md", "docs/guide.md"], _lanes()), + "python source → python": (["run_agent.py"], _lanes(python=True)), + "dep manifest → python": (["pyproject.toml"], _lanes(python=True)), + "uv.lock → python": (["uv.lock"], _lanes(python=True)), + "ts package → frontend": (["apps/desktop/src/app.tsx"], _lanes(frontend=True)), + "ui-tui → frontend": (["ui-tui/src/entry.ts"], _lanes(frontend=True)), + # Lockfile bump shifts every TS package's tree, but not the Python suite. + "root lockfile → frontend, not python": (["package-lock.json"], _lanes(frontend=True)), + "website → site": (["website/docs/intro.md"], _lanes(site=True)), + # SKILL.md reads like docs, but the skill-doc tests read skills/, so a + # skill edit must still run Python. + "skill md → python + site": (["skills/github/SKILL.md"], _lanes(python=True, site=True)), + # Unknown top-level file keeps Python on rather than risk a silent skip. + "unknown toplevel → python": (["Makefile"], _lanes(python=True)), + "mixed docs+python → python": (["README.md", "agent/x.py"], _lanes(python=True)), + "mixed docs+frontend → frontend": (["README.md", "apps/x.tsx"], _lanes(frontend=True)), + # Fail open: CI-config / empty / blank diffs run everything. + ".github change → all": ([".github/workflows/tests.yml"], ALL), + "action change → all": ([".github/actions/detect-changes/action.yml"], ALL), + "empty diff → all": ([], ALL), + "blank lines → all": (["", " "], ALL), +} + + +@pytest.mark.parametrize("files,expected", CASES.values(), ids=CASES.keys()) +def test_classify(files, expected): + assert classify(files) == expected