mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
ci(docker): run tests/docker/ in build-amd64 against the freshly-built image
The new tests/docker/ suite (added by this PR) was being picked up by the
sharded pytest matrix in tests.yml, where its session-scoped `built_image`
fixture issued a 3-7min `docker build` under tests/docker/conftest.py's
180s pytest-timeout cap. Every test in the directory failed in fixture
setup across all 6 shards.
Fix the suite so it actually runs (not skips):
1. Wire the docker tests into docker-publish.yml's build-amd64 job, right
after the existing smoke test. The image is already loaded into the
local daemon as `nousresearch/hermes-agent:test`; set
HERMES_TEST_IMAGE to that and the fixture's pre-built-image branch
short-circuits the rebuild. 21 tests run in ~90s locally against a
prebuilt image, no rebuild cost on top of the existing build step.
2. Exclude tests/docker/ from scripts/run_tests_parallel.py's default
discovery so the sharded matrix in tests.yml stops trying to build
the image. Explicit positional paths (`pytest tests/docker/` or
`scripts/run_tests.sh tests/docker/`) still pick the suite up — the
skip rule honors directory-level user intent, matching the existing
per-file override pattern.
The dedicated docker-tests step runs on every PR that touches docker
code (the existing path filters on docker-publish.yml already cover
`tests/docker/**` via `**/*.py`), so the suite gates real changes.
(cherry picked from commit 4c481860ce)
This commit is contained in:
parent
c524b8a4dc
commit
da8b2e95fd
2 changed files with 80 additions and 13 deletions
50
.github/workflows/docker-publish.yml
vendored
50
.github/workflows/docker-publish.yml
vendored
|
|
@ -80,6 +80,56 @@ jobs:
|
|||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Run the docker-integration test suite against the freshly-built
|
||||
# image already loaded into the local daemon (`:test`). These tests
|
||||
# are excluded from the sharded `tests.yml :: test` matrix on purpose
|
||||
# (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
|
||||
# shard would otherwise reach the session-scoped ``built_image``
|
||||
# fixture in ``tests/docker/conftest.py`` and start a 3-7min
|
||||
# ``docker build`` under a 180s pytest-timeout cap — guaranteed to
|
||||
# die in fixture setup.
|
||||
#
|
||||
# Piggybacking here avoids a second image build: the smoke test
|
||||
# already proved the image loads + runs, so the daemon has it under
|
||||
# `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
|
||||
# that. The fixture's ``HERMES_TEST_IMAGE`` branch (see
|
||||
# tests/docker/conftest.py:62-63) short-circuits the rebuild.
|
||||
#
|
||||
# Why this job and not a standalone one: the image is 5GB+; passing
|
||||
# it between jobs via ``docker save``/``upload-artifact`` is slower
|
||||
# than the build itself. Reusing the existing daemon state is the
|
||||
# cheapest path to coverage on every PR that touches docker code.
|
||||
# ---------------------------------------------------------------------
|
||||
- name: Install uv (for docker tests)
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Set up Python 3.11 (for docker tests)
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install Python dependencies (for docker tests)
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
# ``dev`` extra pulls in pytest, pytest-asyncio, pytest-timeout —
|
||||
# everything tests/docker/ needs. We deliberately avoid ``all``
|
||||
# here because the docker tests only drive the container via
|
||||
# subprocess and don't import hermes_agent's optional deps.
|
||||
uv pip install -e ".[dev]"
|
||||
|
||||
- name: Run docker integration tests
|
||||
env:
|
||||
# Skip rebuild; use the image already loaded by the build step.
|
||||
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
|
||||
# Match the policy in tests.yml :: test job — no accidental
|
||||
# real-API calls from inside the harness.
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/docker/ -v --tb=short
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
|
|
|
|||
|
|
@ -52,18 +52,23 @@ from typing import Dict, List, Tuple
|
|||
# Default test discovery roots.
|
||||
_DEFAULT_ROOTS = ["tests"]
|
||||
|
||||
# Directories to skip during discovery — the e2e + integration suites
|
||||
# require real services and are run separately. Match exactly the
|
||||
# ``--ignore=`` flags the previous CI command used.
|
||||
# Directories to skip during discovery — these suites require real
|
||||
# external services (a model gateway, a docker daemon with a prebuilt
|
||||
# image, etc.) and are run in their own dedicated CI jobs:
|
||||
#
|
||||
# ``docker`` joined this list in the salvage of PR #30136: the new
|
||||
# tests/docker/ harness builds the real Dockerfile in a session
|
||||
# fixture and runs ``docker run`` against it. On a CI runner where
|
||||
# Docker IS available (ubuntu-latest), the build can exceed
|
||||
# pytest-timeout's 180s ceiling and surface as a setup-timeout
|
||||
# instead of a real test failure. The harness has its own dedicated
|
||||
# action (.github/actions/hermes-smoke-test) plus the docker-lint
|
||||
# workflow; it is NOT meant to run in the regular ``test (N)`` shards.
|
||||
# tests/e2e/ — .github/workflows/tests.yml :: e2e job
|
||||
# tests/integration/ — historical; legacy --ignore flags
|
||||
# tests/docker/ — .github/workflows/docker-publish.yml ::
|
||||
# build-amd64 job (runs against the freshly-loaded
|
||||
# nousresearch/hermes-agent:test image, via
|
||||
# ``HERMES_TEST_IMAGE`` so the fixture skips
|
||||
# rebuild). The full pytest-shard runner can't
|
||||
# host these because the session-scoped
|
||||
# ``built_image`` fixture would do a 3-7min
|
||||
# ``docker build`` inside a 180s per-test
|
||||
# pytest-timeout cap (set by tests/docker/conftest.py),
|
||||
# so the build is guaranteed to die in fixture
|
||||
# setup. The dedicated job sidesteps both costs.
|
||||
_SKIP_PARTS = {"integration", "e2e", "docker"}
|
||||
|
||||
# Per-file wall-clock cap. Generous default — pytest-timeout still
|
||||
|
|
@ -145,7 +150,10 @@ def _discover_files(roots: List[Path]) -> List[Path]:
|
|||
|
||||
Exclude any file whose path contains a component in ``_SKIP_PARTS``,
|
||||
UNLESS the user explicitly named it as a root (in which case the
|
||||
user's intent overrides the skip filter).
|
||||
user's intent overrides the skip filter). This makes
|
||||
``scripts/run_tests.sh tests/docker/`` work locally the same way
|
||||
``pytest tests/docker/`` does — the CI-level skip exists to keep
|
||||
the sharded matrix from blowing up, not to block targeted runs.
|
||||
"""
|
||||
seen: set[Path] = set()
|
||||
out: List[Path] = []
|
||||
|
|
@ -160,8 +168,17 @@ def _discover_files(roots: List[Path]) -> List[Path]:
|
|||
seen.add(real)
|
||||
out.append(root)
|
||||
continue
|
||||
# If the explicit root itself sits inside a skipped dir (e.g.
|
||||
# the user said ``tests/docker``), the user has overridden the
|
||||
# skip for that subtree. Compute the set of skip-parts the user
|
||||
# opted into, and only filter files whose path crosses a
|
||||
# skip-part *outside* that opt-in.
|
||||
root_skip_overrides = {
|
||||
part for part in root.parts if part in _SKIP_PARTS
|
||||
}
|
||||
effective_skips = _SKIP_PARTS - root_skip_overrides
|
||||
for path in root.rglob("test_*.py"):
|
||||
if any(part in _SKIP_PARTS for part in path.parts):
|
||||
if any(part in effective_skips for part in path.parts):
|
||||
continue
|
||||
real = path.resolve()
|
||||
if real in seen:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue