hermes-agent/tests/docker/test_dump_build_sha.py
Ben 66489f38c7 fix(docker): bake build-time git SHA into the image
`hermes dump` and the startup banner both call `git rev-parse HEAD` to
report the running commit, but `.dockerignore` line 2 excludes `.git` —
so inside the published image `hermes dump` shows
`version: ... [(unknown)]` and the banner drops its `· upstream <sha>`
suffix entirely.  That makes support triage from container bug reports
impossible: we can't tell which commit the user is actually running.

Fix: thread the build-time SHA through as a Docker build-arg, write it
to `/opt/hermes/.hermes_build_sha` in the image, and have a new
`hermes_cli/build_info.get_build_sha()` read it as a fallback after the
existing live-git lookup fails.  Output format is unchanged in both
callsites — same 8-char short SHA whether resolved live or baked.

Wiring:
  - Dockerfile: `ARG HERMES_GIT_SHA=` + write-file step after the source
    copy.  Empty/missing arg → no file written → callers fall through to
    live git (so local `docker build` without --build-arg is unchanged).
  - docker-publish.yml: passes `HERMES_GIT_SHA=${{ github.sha }}` on all
    four build-push-action steps (amd64/arm64, smoke-test + final push).
  - dump.py:_get_git_commit() / banner.py:get_git_banner_state(): try
    live git first, fall back to baked SHA, then to legacy `(unknown)`
    / None.  Banner returns `upstream == local, ahead=0` because a built
    image is by definition pinned to one commit.

Coverage:
  - Unit tests cover build_info (file present/absent/empty/error,
    truncation, whitespace), dump (live-git wins, both fallbacks,
    identical output-format regression guard), and banner (no-repo +
    baked, no-repo + no-sha, shallow-clone fallback).
  - tests/docker/test_dump_build_sha.py is an integration regression
    guard that runs against the real image, reads
    `/opt/hermes/.hermes_build_sha`, and asserts `hermes dump` surfaces
    its content (or stays at `(unknown)` if no file).
  - Verified end-to-end: `docker build --build-arg HERMES_GIT_SHA=abc...`
    → `docker run ... dump` reports `[abc12345]`; without the build-arg
    it reports `[(unknown)]` as before.
2026-05-28 15:14:05 +10:00

104 lines
4 KiB
Python

"""Regression test: ``hermes dump`` reports a real git SHA inside the container.
Background: ``.dockerignore`` excludes ``.git``, so ``git rev-parse HEAD``
fails inside the published image and ``hermes dump`` used to report
``version: ... [(unknown)]``. The Dockerfile now writes the build-time
``$HERMES_GIT_SHA`` build-arg to ``/opt/hermes/.hermes_build_sha`` and
``hermes_cli/build_info.py`` reads it as a fallback.
CI (``.github/workflows/docker-publish.yml``) always sets the build-arg
to ``${{ github.sha }}``. Local ``docker build`` (the ``built_image``
fixture in ``tests/docker/conftest.py``) does NOT — so locally the file
is absent and ``hermes dump`` correctly falls back to ``(unknown)``.
This test handles both cases:
* If ``/opt/hermes/.hermes_build_sha`` exists in the image, assert that
``hermes dump`` surfaces its content as the version SHA (not
``(unknown)``).
* If the file is absent, assert the legacy behaviour (``(unknown)``)
still holds — defensive guard against the helper accidentally
reporting bogus data from somewhere else.
"""
from __future__ import annotations
import re
import subprocess
_VERSION_LINE = re.compile(r"^version:\s+(?P<rest>.+)$", re.MULTILINE)
_SHA_BRACKET = re.compile(r"\[(?P<sha>[^\]]+)\]\s*$")
def _run_dump(image: str) -> str:
"""Return the stdout of ``docker run <image> dump``.
Relies on Docker's anonymous VOLUME for ``/opt/data`` (declared by the
Dockerfile) so the container's hermes user (UID 10000) can bootstrap
its config. Anonymous volumes are auto-cleaned by ``--rm``, so unlike
a host bind-mount we don't have to chown anything to UID 10000 (which
would break cleanup on non-root hosts).
"""
r = subprocess.run(
["docker", "run", "--rm", image, "dump"],
capture_output=True, text=True, timeout=120,
)
assert r.returncode == 0, (
f"hermes dump exited {r.returncode}: "
f"stderr={r.stderr[-1000:]!r}\nstdout={r.stdout[-1000:]!r}"
)
return r.stdout
def _read_baked_sha_from_image(image: str) -> str | None:
"""Return the ``/opt/hermes/.hermes_build_sha`` content, or None if absent."""
r = subprocess.run(
[
"docker", "run", "--rm", "--entrypoint", "cat", image,
"/opt/hermes/.hermes_build_sha",
],
capture_output=True, text=True, timeout=30,
)
if r.returncode != 0:
return None
return r.stdout.strip() or None
def test_dump_reports_baked_sha_when_present(built_image: str) -> None:
"""When the image was built with ``HERMES_GIT_SHA``, dump must surface it.
Together with the smoke-test action (which exercises ``--help``), this
closes the regression loop for the missing-sha bug: any future change
that breaks the baked-file -> dump pipeline will fail CI here.
"""
baked = _read_baked_sha_from_image(built_image)
stdout = _run_dump(built_image)
match = _VERSION_LINE.search(stdout)
assert match, f"no `version:` line in dump output:\n{stdout[:2000]}"
sha_match = _SHA_BRACKET.search(match.group("rest"))
assert sha_match, (
f"`version:` line missing [<sha>] bracket: {match.group('rest')!r}"
)
reported = sha_match.group("sha")
if baked is None:
# Local-build path: no build-arg was passed. Verify the legacy
# fallback ``(unknown)`` is intact — guards against the helper
# ever inventing a SHA from thin air.
assert reported == "(unknown)", (
f"expected '(unknown)' when no SHA baked, got {reported!r}"
)
return
# CI path: build-arg was set, baked file exists. ``hermes dump``
# truncates to 8 chars via ``git rev-parse --short=8`` semantics.
assert reported != "(unknown)", (
"baked SHA file present in image but dump still reported "
f"'(unknown)' — the build-info fallback is broken. "
f"Baked file content: {baked!r}"
)
assert reported == baked[:8], (
f"dump reported {reported!r} but baked file contained {baked!r} "
f"(expected first 8 chars: {baked[:8]!r})"
)