change(ci): migrate docker smoketests to real tests

This commit is contained in:
ethernet 2026-06-25 13:40:17 -04:00
parent 2bd17221b7
commit f0cb049217
3 changed files with 91 additions and 86 deletions

View file

@ -24,11 +24,7 @@ env:
IMAGE_NAME: nousresearch/hermes-agent
jobs:
# ---------------------------------------------------------------------------
# Build amd64 natively. This job also runs the smoke tests (basic --help
# and the dashboard subcommand regression guard from #9153), because amd64
# is the only arch we can `load` into the local daemon on an amd64 runner.
# ---------------------------------------------------------------------------
# Build, test, and optionally push the amd64 image.
build-amd64:
# Only run on the upstream repository, not on forks
if: github.repository == 'NousResearch/hermes-agent'
@ -40,16 +36,16 @@ jobs:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# The image build + smoke test + integration tests run on every event
# The image build + integration tests run on every event
# (PRs, push-to-main, release). Publish steps below are gated to
# push-to-main / release only.
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
# Build once, load into the local daemon for smoke testing. Cached
# Build once, load into the local daemon for testing. Cached
# to gha with a per-arch scope; the push step below reuses every
# layer from this build.
- name: Build image (amd64, smoke test)
- name: Build image (amd64)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
@ -62,24 +58,12 @@ jobs:
cache-from: type=gha,scope=docker-amd64
cache-to: type=gha,mode=max,scope=docker-amd64
- name: Smoke test image
uses: ./.github/actions/hermes-smoke-test
with:
image: ${{ env.IMAGE_NAME }}:test
# ---------------------------------------------------------------------
# Run the docker-integration test suite against the freshly-built
# image already loaded into the local daemon (`:test`). These tests
# are excluded from the sharded `tests.yml :: test` matrix on purpose
# (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
# shard would otherwise reach the session-scoped ``built_image``
# fixture in ``tests/docker/conftest.py`` and start a 3-7min
# ``docker build`` — guaranteed to
# die in fixture setup.
# image already loaded into the local daemon (`:test`).
#
# Piggybacking here avoids a second image build: the smoke test
# already proved the image loads + runs, so the daemon has it under
# `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
# Piggybacking here avoids a second image build: the build step
# already loaded the image into the daemon under
# `${IMAGE_NAME}:test`, so we just point ``HERMES_TEST_IMAGE`` at
# that. The fixture's ``HERMES_TEST_IMAGE`` branch (see
# tests/docker/conftest.py:62-63) short-circuits the rebuild.
#
@ -112,7 +96,6 @@ jobs:
OPENAI_API_KEY: ""
NOUS_API_KEY: ""
run: |
source .venv/bin/activate
scripts/run_tests.sh tests/docker/ --file-timeout 300 -- -v --tb=short
- name: Log in to Docker Hub
@ -160,10 +143,7 @@ jobs:
retention-days: 1
# ---------------------------------------------------------------------------
# Build arm64 natively on GitHub's free arm64 runner. This replaces the
# previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
# a cache scope with amd64. Matches the amd64 job's shape: build+load,
# smoke test, then on push/release push by digest.
# Build, test, and optionally push the arm64 image.
# ---------------------------------------------------------------------------
build-arm64:
if: github.repository == 'NousResearch/hermes-agent'
@ -183,7 +163,7 @@ jobs:
# push/release. Uses the workflow's GITHUB_TOKEN, which is valid for
# the whole job — unlike the gha cache backend's short-lived Azure SAS
# token, which expired mid-build on slow cold-cache arm64 runs and
# crashed the build before the smoke test (the reason the gha cache
# crashed the build before the tests ran (the reason the gha cache
# was removed from arm64 PRs in the first place).
- name: Log in to ghcr.io (build cache)
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
@ -192,7 +172,7 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# Build once, load into the local daemon for smoke testing, then push
# Build once, load into the local daemon for testing, then push
# by digest below. Reads AND writes the registry-backed cache so the
# push reuses layers from this build and the next build starts warm.
#
@ -200,7 +180,7 @@ jobs:
# cache that previously broke here: its credential is the job-lifetime
# GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
# token failure mode cannot recur.
- name: Build image (arm64, smoke test, cached publish)
- name: Build image (arm64, cached publish)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
@ -213,10 +193,25 @@ jobs:
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max
- name: Smoke test image
uses: ./.github/actions/hermes-smoke-test
with:
image: ${{ env.IMAGE_NAME }}:test
- name: Install uv for docker tests
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
- name: Set up Python 3.11 for docker tests
run: uv python install 3.11
- name: Install Python dependencies for docker tests
run: |
uv sync --locked --python 3.11 --extra dev
- name: Run docker tests
env:
# Skip rebuild; use the image already loaded by the build step.
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
OPENROUTER_API_KEY: ""
OPENAI_API_KEY: ""
NOUS_API_KEY: ""
run: |
scripts/run_tests.sh tests/docker/test_smoke.py --file-timeout 300 -- -v --tb=short
- name: Log in to Docker Hub
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'