From f0cb04921709b473b1e8f3a979b7fc384db37f11 Mon Sep 17 00:00:00 2001 From: ethernet Date: Thu, 25 Jun 2026 13:40:17 -0400 Subject: [PATCH] change(ci): migrate docker smoketests to real tests --- .github/actions/hermes-smoke-test/action.yml | 50 --------------- .github/workflows/docker.yml | 67 +++++++++----------- tests/docker/test_smoke.py | 60 ++++++++++++++++++ 3 files changed, 91 insertions(+), 86 deletions(-) delete mode 100644 .github/actions/hermes-smoke-test/action.yml create mode 100644 tests/docker/test_smoke.py diff --git a/.github/actions/hermes-smoke-test/action.yml b/.github/actions/hermes-smoke-test/action.yml deleted file mode 100644 index 8b79c4bf34d..00000000000 --- a/.github/actions/hermes-smoke-test/action.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Hermes smoke test -description: > - Run the image's built-in entrypoint against `--help` and `dashboard --help` - to catch basic runtime regressions before publishing. Requires the image - to already be loaded into the local Docker daemon under `image`. - - Works identically on amd64 and arm64 runners. - -inputs: - image: - description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test) - required: true - -runs: - using: composite - steps: - - name: Ensure /tmp/hermes-test is hermes-writable - shell: bash - run: | - # The image runs as the hermes user (UID 10000). GitHub Actions - # creates /tmp/hermes-test root-owned by default, which hermes - # can't write to — chown it to match the in-container UID before - # bind-mounting. Real users doing `docker run -v ~/.hermes:...` - # with their own UID hit the same issue and have their own - # remediations (HERMES_UID env var, or chown locally). - mkdir -p /tmp/hermes-test - sudo chown -R 10000:10000 /tmp/hermes-test - - - name: hermes --help - shell: bash - run: | - # Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so - # this exercises the actual production startup path. PR #30136 - # review caught that an --entrypoint override here had been - # silently neutered by the s6-overlay migration — stage2-hook - # ignores its CMD args, so the smoke test was a no-op. - docker run --rm \ - -v /tmp/hermes-test:/opt/data \ - "${{ inputs.image }}" --help - - - name: hermes dashboard --help - shell: bash - run: | - # Regression guard for #9153: dashboard was present in source but - # missing from the published image. If this fails, something in - # the Dockerfile is excluding the dashboard subcommand from the - # installed package. - docker run --rm \ - -v /tmp/hermes-test:/opt/data \ - "${{ inputs.image }}" dashboard --help diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 83d6eac261c..fd899ece4ee 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -24,11 +24,7 @@ env: IMAGE_NAME: nousresearch/hermes-agent jobs: - # --------------------------------------------------------------------------- - # Build amd64 natively. This job also runs the smoke tests (basic --help - # and the dashboard subcommand regression guard from #9153), because amd64 - # is the only arch we can `load` into the local daemon on an amd64 runner. - # --------------------------------------------------------------------------- + # Build, test, and optionally push the amd64 image. build-amd64: # Only run on the upstream repository, not on forks if: github.repository == 'NousResearch/hermes-agent' @@ -40,16 +36,16 @@ jobs: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - # The image build + smoke test + integration tests run on every event + # The image build + integration tests run on every event # (PRs, push-to-main, release). Publish steps below are gated to # push-to-main / release only. - name: Set up Docker Buildx uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3 - # Build once, load into the local daemon for smoke testing. Cached + # Build once, load into the local daemon for testing. Cached # to gha with a per-arch scope; the push step below reuses every # layer from this build. - - name: Build image (amd64, smoke test) + - name: Build image (amd64) uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 with: context: . @@ -62,24 +58,12 @@ jobs: cache-from: type=gha,scope=docker-amd64 cache-to: type=gha,mode=max,scope=docker-amd64 - - name: Smoke test image - uses: ./.github/actions/hermes-smoke-test - with: - image: ${{ env.IMAGE_NAME }}:test - - # --------------------------------------------------------------------- # Run the docker-integration test suite against the freshly-built - # image already loaded into the local daemon (`:test`). These tests - # are excluded from the sharded `tests.yml :: test` matrix on purpose - # (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each - # shard would otherwise reach the session-scoped ``built_image`` - # fixture in ``tests/docker/conftest.py`` and start a 3-7min - # ``docker build`` — guaranteed to - # die in fixture setup. + # image already loaded into the local daemon (`:test`). # - # Piggybacking here avoids a second image build: the smoke test - # already proved the image loads + runs, so the daemon has it under - # `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at + # Piggybacking here avoids a second image build: the build step + # already loaded the image into the daemon under + # `${IMAGE_NAME}:test`, so we just point ``HERMES_TEST_IMAGE`` at # that. The fixture's ``HERMES_TEST_IMAGE`` branch (see # tests/docker/conftest.py:62-63) short-circuits the rebuild. # @@ -112,7 +96,6 @@ jobs: OPENAI_API_KEY: "" NOUS_API_KEY: "" run: | - source .venv/bin/activate scripts/run_tests.sh tests/docker/ --file-timeout 300 -- -v --tb=short - name: Log in to Docker Hub @@ -160,10 +143,7 @@ jobs: retention-days: 1 # --------------------------------------------------------------------------- - # Build arm64 natively on GitHub's free arm64 runner. This replaces the - # previous QEMU-emulated arm64 build, which was ~5-10x slower and shared - # a cache scope with amd64. Matches the amd64 job's shape: build+load, - # smoke test, then on push/release push by digest. + # Build, test, and optionally push the arm64 image. # --------------------------------------------------------------------------- build-arm64: if: github.repository == 'NousResearch/hermes-agent' @@ -183,7 +163,7 @@ jobs: # push/release. Uses the workflow's GITHUB_TOKEN, which is valid for # the whole job — unlike the gha cache backend's short-lived Azure SAS # token, which expired mid-build on slow cold-cache arm64 runs and - # crashed the build before the smoke test (the reason the gha cache + # crashed the build before the tests ran (the reason the gha cache # was removed from arm64 PRs in the first place). - name: Log in to ghcr.io (build cache) uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0 @@ -192,7 +172,7 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - # Build once, load into the local daemon for smoke testing, then push + # Build once, load into the local daemon for testing, then push # by digest below. Reads AND writes the registry-backed cache so the # push reuses layers from this build and the next build starts warm. # @@ -200,7 +180,7 @@ jobs: # cache that previously broke here: its credential is the job-lifetime # GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives- # token failure mode cannot recur. - - name: Build image (arm64, smoke test, cached publish) + - name: Build image (arm64, cached publish) uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 with: context: . @@ -213,10 +193,25 @@ jobs: cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64 cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max - - name: Smoke test image - uses: ./.github/actions/hermes-smoke-test - with: - image: ${{ env.IMAGE_NAME }}:test + - name: Install uv for docker tests + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5 + + - name: Set up Python 3.11 for docker tests + run: uv python install 3.11 + + - name: Install Python dependencies for docker tests + run: | + uv sync --locked --python 3.11 --extra dev + + - name: Run docker tests + env: + # Skip rebuild; use the image already loaded by the build step. + HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test + OPENROUTER_API_KEY: "" + OPENAI_API_KEY: "" + NOUS_API_KEY: "" + run: | + scripts/run_tests.sh tests/docker/test_smoke.py --file-timeout 300 -- -v --tb=short - name: Log in to Docker Hub if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release' diff --git a/tests/docker/test_smoke.py b/tests/docker/test_smoke.py new file mode 100644 index 00000000000..9b9eed1d2ea --- /dev/null +++ b/tests/docker/test_smoke.py @@ -0,0 +1,60 @@ +"""Runtime smoke tests for the Docker image entrypoint and subcommands. + +Converted from the former ``.github/actions/hermes-smoke-test`` composite +action. These tests exercise the image's real ENTRYPOINT (``/init`` + +``main-wrapper.sh``) via ``docker run --rm --help`` and +``docker run --rm dashboard --help`` to catch basic runtime +regressions before publishing. + +The harness expects the ``built_image`` fixture from +``tests/docker/conftest.py``. When Docker isn't available every test +here is skipped at collection time. +""" +from __future__ import annotations + +import subprocess + + +def test_hermes_help(built_image: str) -> None: + """``docker run --rm --help`` must exit 0. + + Uses the image's real ENTRYPOINT (``/init`` + ``main-wrapper.sh``) + so this exercises the actual production startup path. PR #30136 + review caught that an ``--entrypoint`` override in the old composite + action had been silently neutered by the s6-overlay migration — + ``stage2-hook`` ignores CMD args passed after an overridden + entrypoint, so the smoke test was a no-op. + """ + r = subprocess.run( + ["docker", "run", "--rm", built_image, "--help"], + capture_output=True, text=True, timeout=60, + ) + assert r.returncode == 0, ( + f"hermes --help failed (exit {r.returncode}): " + f"stdout={r.stdout[-2000:]!r} stderr={r.stderr[-2000:]!r}" + ) + assert "Traceback" not in r.stderr, ( + f"hermes --help produced a traceback: {r.stderr[-2000:]!r}" + ) + + +def test_dashboard_subcommand_present(built_image: str) -> None: + """``docker run --rm dashboard --help`` must exit 0. + + Regression guard for #9153: the ``dashboard`` subcommand was present + in source but missing from the published image. If this fails, + something in the Dockerfile is excluding the dashboard subcommand + from the installed package. + """ + r = subprocess.run( + ["docker", "run", "--rm", built_image, "dashboard", "--help"], + capture_output=True, text=True, timeout=60, + ) + assert r.returncode == 0, ( + f"hermes dashboard --help failed (exit {r.returncode}): " + f"stdout={r.stdout[-2000:]!r} stderr={r.stderr[-2000:]!r}" + ) + combined = (r.stdout + r.stderr).lower() + assert "dashboard" in combined or "usage" in combined, ( + f"dashboard --help output unexpected: {combined[-2000:]!r}" + )