diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index bdbea5c9c05..553a8b521ea 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -71,6 +71,8 @@ jobs: load: true platforms: linux/amd64 tags: ${{ env.IMAGE_NAME }}:test + build-args: | + HERMES_GIT_SHA=${{ github.sha }} cache-from: type=gha,scope=docker-amd64 cache-to: type=gha,mode=max,scope=docker-amd64 @@ -149,6 +151,8 @@ jobs: platforms: linux/amd64 labels: | org.opencontainers.image.revision=${{ github.sha }} + build-args: | + HERMES_GIT_SHA=${{ github.sha }} outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true cache-from: type=gha,scope=docker-amd64 cache-to: type=gha,mode=max,scope=docker-amd64 @@ -203,6 +207,8 @@ jobs: load: true platforms: linux/arm64 tags: ${{ env.IMAGE_NAME }}:test + build-args: | + HERMES_GIT_SHA=${{ github.sha }} cache-from: type=gha,scope=docker-arm64 cache-to: type=gha,mode=max,scope=docker-arm64 @@ -228,6 +234,8 @@ jobs: platforms: linux/arm64 labels: | org.opencontainers.image.revision=${{ github.sha }} + build-args: | + HERMES_GIT_SHA=${{ github.sha }} outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true cache-from: type=gha,scope=docker-arm64 cache-to: type=gha,mode=max,scope=docker-arm64 diff --git a/Dockerfile b/Dockerfile index e15fd3dda5a..f04909cc10e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -187,6 +187,29 @@ RUN chmod -R a+rX /opt/hermes && \ # this a fast (~1s) egg-link creation with no resolution or downloads. RUN uv pip install --no-cache-dir --no-deps -e "." +# ---------- Bake build-time git revision ---------- +# .dockerignore excludes .git, so `git rev-parse HEAD` from inside the +# container always returns nothing — meaning `hermes dump` reports +# "(unknown)" and the startup banner drops its `· upstream ` suffix. +# That makes support triage from container bug reports impossible: +# we can't tell which commit the user is actually running. +# +# Fix: write the commit SHA passed via the HERMES_GIT_SHA build-arg to +# /opt/hermes/.hermes_build_sha at build time, and have +# hermes_cli/build_info.py read it at runtime. Both `hermes dump` and +# banner.get_git_banner_state() try the baked SHA first, then fall back +# to live `git rev-parse` for source installs (unchanged behaviour). +# +# The arg is optional — local `docker build` without --build-arg simply +# omits the file, and the runtime falls back to live-git lookup. CI +# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so +# every published image has it. +ARG HERMES_GIT_SHA= +RUN if [ -n "${HERMES_GIT_SHA}" ]; then \ + printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \ + chown hermes:hermes /opt/hermes/.hermes_build_sha; \ + fi + # ---------- s6-overlay service wiring ---------- # Static services declared at build time: main-hermes + dashboard. # Per-profile gateway services are registered dynamically at runtime by diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index ef592beb7fd..dbbff246848 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -300,14 +300,42 @@ def _git_short_hash(repo_dir: Path, rev: str) -> Optional[str]: def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]: - """Return upstream/local git hashes for the startup banner.""" + """Return upstream/local git hashes for the startup banner. + + For source installs and dev images this runs ``git rev-parse`` against + the active checkout. When no checkout is available — the canonical case + is the published Docker image, which excludes ``.git`` from the build + context — we fall back to the baked-in build SHA (see + ``hermes_cli/build_info.py``) and return it as a frozen + ``upstream == local`` state with ``ahead=0``. A built image is by + definition pinned to one commit, so "ahead" is always zero and the + banner correctly shows ``· upstream `` with no carried-commits + annotation. + """ repo_dir = repo_dir or _resolve_repo_dir() if repo_dir is None: + # No git checkout — try the baked build SHA (Docker image path). + try: + from hermes_cli.build_info import get_build_sha + baked = get_build_sha(short=8) + if baked: + return {"upstream": baked, "local": baked, "ahead": 0} + except Exception: + pass return None upstream = _git_short_hash(repo_dir, "origin/main") local = _git_short_hash(repo_dir, "HEAD") if not upstream or not local: + # Live-git lookup failed (e.g. shallow clone without origin/main). + # Fall back to the baked build SHA if available. + try: + from hermes_cli.build_info import get_build_sha + baked = get_build_sha(short=8) + if baked: + return {"upstream": baked, "local": baked, "ahead": 0} + except Exception: + pass return None ahead = 0 diff --git a/hermes_cli/build_info.py b/hermes_cli/build_info.py new file mode 100644 index 00000000000..e4cc6f09974 --- /dev/null +++ b/hermes_cli/build_info.py @@ -0,0 +1,51 @@ +""" +Baked-in build metadata for Hermes Agent. + +Source installs report their git revision live via ``git rev-parse`` (see +``hermes_cli/dump.py`` and ``hermes_cli/banner.py``). That doesn't work inside +the published Docker image because ``.dockerignore`` excludes ``.git``, so +those callsites fall back to ``"(unknown)"`` / drop the banner suffix entirely. + +To make ``hermes dump`` and the startup banner identify the exact commit the +image was built from, the Docker build writes the build-time ``$HERMES_GIT_SHA`` +arg into ``/.hermes_build_sha``. This module is the single +read-side helper consumed by both callsites — keeping the lookup in one place +so the file path and missing-file behaviour stay consistent. + +Behaviour: + +- Returns ``None`` when the file is absent. Source installs and dev images + built without the ``HERMES_GIT_SHA`` build-arg fall through to live-git + resolution in the caller, so non-Docker installs are unaffected. +- Returns ``None`` on any IO / decoding error. The build-sha is a nice-to-have + for support triage; nothing in the CLI is allowed to crash because of it. +- Truncates to ``short`` characters (default 8) to match the format used by + ``git rev-parse --short=8`` throughout the codebase. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +# Path is resolved relative to this module so it works regardless of cwd — +# matches the pattern used by ``banner._resolve_repo_dir``. +_BUILD_SHA_FILE = Path(__file__).parent.parent / ".hermes_build_sha" + + +def get_build_sha(short: int = 8) -> Optional[str]: + """Return the baked-in build SHA, truncated to ``short`` chars, or None. + + Reads ``/.hermes_build_sha`` if present. The file is + written by the Dockerfile's ``HERMES_GIT_SHA`` build-arg and contains + the full 40-character commit hash on a single line. + """ + try: + if not _BUILD_SHA_FILE.is_file(): + return None + sha = _BUILD_SHA_FILE.read_text(encoding="utf-8").strip() + except Exception: + return None + if not sha: + return None + return sha[:short] if short and short > 0 else sha diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py index ded5bb10fa1..98de32bcdea 100644 --- a/hermes_cli/dump.py +++ b/hermes_cli/dump.py @@ -20,7 +20,15 @@ from agent.skill_utils import is_excluded_skill_path def _get_git_commit(project_root: Path) -> str: - """Return short git commit hash, or '(unknown)'.""" + """Return short git commit hash, or '(unknown)'. + + Source installs and dev images resolve this live via ``git rev-parse``. + The published Docker image excludes ``.git`` from the build context, so + that lookup always fails — we fall back to the baked-in build SHA written + to ``/.hermes_build_sha`` by the Dockerfile's + ``HERMES_GIT_SHA`` build-arg (see ``hermes_cli/build_info.py``). + The output format is identical regardless of source. + """ try: result = subprocess.run( ["git", "rev-parse", "--short=8", "HEAD"], @@ -28,9 +36,23 @@ def _get_git_commit(project_root: Path) -> str: cwd=str(project_root), ) if result.returncode == 0: - return result.stdout.strip() + value = result.stdout.strip() + if value: + return value except Exception: pass + + # Fall back to the build-time baked SHA (populated in published Docker + # images, absent otherwise). Defers the import so the dump module + # stays cheap on non-dump code paths. + try: + from hermes_cli.build_info import get_build_sha + baked = get_build_sha(short=8) + if baked: + return baked + except Exception: + pass + return "(unknown)" diff --git a/tests/docker/test_dump_build_sha.py b/tests/docker/test_dump_build_sha.py new file mode 100644 index 00000000000..c84a372e823 --- /dev/null +++ b/tests/docker/test_dump_build_sha.py @@ -0,0 +1,104 @@ +"""Regression test: ``hermes dump`` reports a real git SHA inside the container. + +Background: ``.dockerignore`` excludes ``.git``, so ``git rev-parse HEAD`` +fails inside the published image and ``hermes dump`` used to report +``version: ... [(unknown)]``. The Dockerfile now writes the build-time +``$HERMES_GIT_SHA`` build-arg to ``/opt/hermes/.hermes_build_sha`` and +``hermes_cli/build_info.py`` reads it as a fallback. + +CI (``.github/workflows/docker-publish.yml``) always sets the build-arg +to ``${{ github.sha }}``. Local ``docker build`` (the ``built_image`` +fixture in ``tests/docker/conftest.py``) does NOT — so locally the file +is absent and ``hermes dump`` correctly falls back to ``(unknown)``. + +This test handles both cases: + +* If ``/opt/hermes/.hermes_build_sha`` exists in the image, assert that + ``hermes dump`` surfaces its content as the version SHA (not + ``(unknown)``). +* If the file is absent, assert the legacy behaviour (``(unknown)``) + still holds — defensive guard against the helper accidentally + reporting bogus data from somewhere else. +""" +from __future__ import annotations + +import re +import subprocess + + +_VERSION_LINE = re.compile(r"^version:\s+(?P.+)$", re.MULTILINE) +_SHA_BRACKET = re.compile(r"\[(?P[^\]]+)\]\s*$") + + +def _run_dump(image: str) -> str: + """Return the stdout of ``docker run dump``. + + Relies on Docker's anonymous VOLUME for ``/opt/data`` (declared by the + Dockerfile) so the container's hermes user (UID 10000) can bootstrap + its config. Anonymous volumes are auto-cleaned by ``--rm``, so unlike + a host bind-mount we don't have to chown anything to UID 10000 (which + would break cleanup on non-root hosts). + """ + r = subprocess.run( + ["docker", "run", "--rm", image, "dump"], + capture_output=True, text=True, timeout=120, + ) + assert r.returncode == 0, ( + f"hermes dump exited {r.returncode}: " + f"stderr={r.stderr[-1000:]!r}\nstdout={r.stdout[-1000:]!r}" + ) + return r.stdout + + +def _read_baked_sha_from_image(image: str) -> str | None: + """Return the ``/opt/hermes/.hermes_build_sha`` content, or None if absent.""" + r = subprocess.run( + [ + "docker", "run", "--rm", "--entrypoint", "cat", image, + "/opt/hermes/.hermes_build_sha", + ], + capture_output=True, text=True, timeout=30, + ) + if r.returncode != 0: + return None + return r.stdout.strip() or None + + +def test_dump_reports_baked_sha_when_present(built_image: str) -> None: + """When the image was built with ``HERMES_GIT_SHA``, dump must surface it. + + Together with the smoke-test action (which exercises ``--help``), this + closes the regression loop for the missing-sha bug: any future change + that breaks the baked-file -> dump pipeline will fail CI here. + """ + baked = _read_baked_sha_from_image(built_image) + stdout = _run_dump(built_image) + + match = _VERSION_LINE.search(stdout) + assert match, f"no `version:` line in dump output:\n{stdout[:2000]}" + sha_match = _SHA_BRACKET.search(match.group("rest")) + assert sha_match, ( + f"`version:` line missing [] bracket: {match.group('rest')!r}" + ) + reported = sha_match.group("sha") + + if baked is None: + # Local-build path: no build-arg was passed. Verify the legacy + # fallback ``(unknown)`` is intact — guards against the helper + # ever inventing a SHA from thin air. + assert reported == "(unknown)", ( + f"expected '(unknown)' when no SHA baked, got {reported!r}" + ) + return + + # CI path: build-arg was set, baked file exists. ``hermes dump`` + # truncates to 8 chars via ``git rev-parse --short=8`` semantics. + assert reported != "(unknown)", ( + "baked SHA file present in image but dump still reported " + f"'(unknown)' — the build-info fallback is broken. " + f"Baked file content: {baked!r}" + ) + assert reported == baked[:8], ( + f"dump reported {reported!r} but baked file contained {baked!r} " + f"(expected first 8 chars: {baked[:8]!r})" + ) diff --git a/tests/hermes_cli/test_banner_git_state.py b/tests/hermes_cli/test_banner_git_state.py index 6556145e8f1..17e9aea7f71 100644 --- a/tests/hermes_cli/test_banner_git_state.py +++ b/tests/hermes_cli/test_banner_git_state.py @@ -61,3 +61,56 @@ def test_get_git_banner_state_reads_origin_and_head(tmp_path): state = banner.get_git_banner_state(repo_dir) assert state == {"upstream": "b2f477a3", "local": "af8aad31", "ahead": 3} + + +def test_get_git_banner_state_falls_back_to_build_sha_when_no_repo(): + """Docker image case: no .git checkout — baked build SHA fills the gap. + + ``_resolve_repo_dir`` returns None when neither the running code's + parent nor ``$HERMES_HOME/hermes-agent/`` is a git repo (the canonical + case inside the published container, where .git is dockerignored). + The banner should still report the build SHA so support bug reports + can identify the running commit. + """ + from hermes_cli import banner + + with patch.object(banner, "_resolve_repo_dir", return_value=None), \ + patch("hermes_cli.build_info.get_build_sha", return_value="abcdef12"): + state = banner.get_git_banner_state() + + assert state == {"upstream": "abcdef12", "local": "abcdef12", "ahead": 0} + + +def test_get_git_banner_state_returns_none_when_no_repo_and_no_build_sha(): + """Pip-installed wheel with neither git checkout nor baked SHA → None. + + Banner correctly omits the upstream/local suffix in this case. + """ + from hermes_cli import banner + + with patch.object(banner, "_resolve_repo_dir", return_value=None), \ + patch("hermes_cli.build_info.get_build_sha", return_value=None): + state = banner.get_git_banner_state() + + assert state is None + + +def test_get_git_banner_state_falls_back_when_live_git_returns_nothing(tmp_path): + """Shallow clone without origin/main → still surface build SHA if baked. + + Some install paths (e.g. ``git clone --depth 1`` without a remote) have + a ``.git`` directory but ``git rev-parse origin/main`` fails. When that + happens AND a baked SHA exists, return the baked one instead of None. + """ + from hermes_cli import banner + + repo_dir = tmp_path / "repo" + (repo_dir / ".git").mkdir(parents=True) + + # All git invocations fail (returncode=1, empty stdout). + failed = MagicMock(returncode=1, stdout="") + with patch("hermes_cli.banner.subprocess.run", return_value=failed), \ + patch("hermes_cli.build_info.get_build_sha", return_value="cafef00d"): + state = banner.get_git_banner_state(repo_dir) + + assert state == {"upstream": "cafef00d", "local": "cafef00d", "ahead": 0} diff --git a/tests/hermes_cli/test_build_info.py b/tests/hermes_cli/test_build_info.py new file mode 100644 index 00000000000..994c13e1dcf --- /dev/null +++ b/tests/hermes_cli/test_build_info.py @@ -0,0 +1,78 @@ +"""Tests for hermes_cli.build_info — baked-in build SHA resolution. + +The build SHA is written by the Dockerfile's ``HERMES_GIT_SHA`` build-arg +into ``/.hermes_build_sha``. These tests cover the read-side +helper: missing file, malformed file, truncation, and error tolerance. +""" + +from pathlib import Path +from unittest.mock import patch + + +def test_get_build_sha_returns_none_when_file_absent(tmp_path): + """Source installs: no file present → None, callers fall back to git.""" + from hermes_cli import build_info + + missing = tmp_path / ".hermes_build_sha" # never created + + with patch.object(build_info, "_BUILD_SHA_FILE", missing): + assert build_info.get_build_sha() is None + + +def test_get_build_sha_reads_baked_file(tmp_path): + """Docker image case: file exists with full 40-char SHA → truncated to 8.""" + from hermes_cli import build_info + + sha_file = tmp_path / ".hermes_build_sha" + sha_file.write_text("abcdef1234567890abcdef1234567890abcdef12\n") + + with patch.object(build_info, "_BUILD_SHA_FILE", sha_file): + assert build_info.get_build_sha() == "abcdef12" + + +def test_get_build_sha_respects_short_argument(tmp_path): + """``short=N`` truncates to N chars; ``short<=0`` returns full SHA.""" + from hermes_cli import build_info + + sha_file = tmp_path / ".hermes_build_sha" + full_sha = "abcdef1234567890abcdef1234567890abcdef12" + sha_file.write_text(full_sha + "\n") + + with patch.object(build_info, "_BUILD_SHA_FILE", sha_file): + assert build_info.get_build_sha(short=12) == "abcdef123456" + assert build_info.get_build_sha(short=0) == full_sha + assert build_info.get_build_sha(short=-1) == full_sha + + +def test_get_build_sha_strips_whitespace(tmp_path): + """The Dockerfile uses ``printf '%s\\n'`` — strip the trailing newline.""" + from hermes_cli import build_info + + sha_file = tmp_path / ".hermes_build_sha" + sha_file.write_text(" abcdef1234567890\n\n") + + with patch.object(build_info, "_BUILD_SHA_FILE", sha_file): + assert build_info.get_build_sha() == "abcdef12" + + +def test_get_build_sha_returns_none_for_empty_file(tmp_path): + """A whitespace-only file is treated as absent.""" + from hermes_cli import build_info + + sha_file = tmp_path / ".hermes_build_sha" + sha_file.write_text(" \n\n") + + with patch.object(build_info, "_BUILD_SHA_FILE", sha_file): + assert build_info.get_build_sha() is None + + +def test_get_build_sha_swallows_read_errors(tmp_path): + """Any IO exception from the read returns None — never raises.""" + from hermes_cli import build_info + + sha_file = tmp_path / ".hermes_build_sha" + sha_file.write_text("abcdef1234567890\n") + + with patch.object(build_info, "_BUILD_SHA_FILE", sha_file), \ + patch.object(Path, "read_text", side_effect=OSError("boom")): + assert build_info.get_build_sha() is None diff --git a/tests/hermes_cli/test_dump_git_commit.py b/tests/hermes_cli/test_dump_git_commit.py new file mode 100644 index 00000000000..264ad22a585 --- /dev/null +++ b/tests/hermes_cli/test_dump_git_commit.py @@ -0,0 +1,118 @@ +"""Tests for hermes_cli.dump._get_git_commit — git SHA resolution for ``hermes dump``. + +``hermes dump`` prints the running commit so support bug reports identify the +exact version. Source installs resolve it live via ``git rev-parse``; the +published Docker image excludes ``.git`` and falls back to the baked SHA +written by the Dockerfile's ``HERMES_GIT_SHA`` build-arg. + +These tests cover both paths plus the failure modes (no git, no baked file). +""" + +from unittest.mock import MagicMock, patch + + +def test_get_git_commit_uses_live_git_when_available(tmp_path): + """Source install: ``git rev-parse --short=8 HEAD`` wins; no fallback.""" + from hermes_cli import dump + + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + + git_result = MagicMock(returncode=0, stdout="deadbeef\n") + # build_info should NOT be consulted when live git succeeds. + with patch("hermes_cli.dump.subprocess.run", return_value=git_result) as mock_run, \ + patch("hermes_cli.build_info.get_build_sha") as mock_build: + commit = dump._get_git_commit(repo_dir) + + assert commit == "deadbeef" + mock_run.assert_called_once() + mock_build.assert_not_called() + + +def test_get_git_commit_falls_back_to_build_sha_when_live_git_fails(tmp_path): + """Docker image case: live git returns non-zero → use baked SHA.""" + from hermes_cli import dump + + repo_dir = tmp_path / "no-git-here" + repo_dir.mkdir() + + failed = MagicMock(returncode=128, stdout="") + with patch("hermes_cli.dump.subprocess.run", return_value=failed), \ + patch("hermes_cli.build_info.get_build_sha", return_value="cafef00d"): + commit = dump._get_git_commit(repo_dir) + + assert commit == "cafef00d" + + +def test_get_git_commit_falls_back_when_git_returns_empty_stdout(tmp_path): + """Edge case: git exits 0 but prints nothing — still try the baked SHA.""" + from hermes_cli import dump + + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + + empty = MagicMock(returncode=0, stdout="\n") + with patch("hermes_cli.dump.subprocess.run", return_value=empty), \ + patch("hermes_cli.build_info.get_build_sha", return_value="abcdef12"): + commit = dump._get_git_commit(repo_dir) + + assert commit == "abcdef12" + + +def test_get_git_commit_falls_back_when_git_raises(tmp_path): + """git binary missing (e.g. minimal container w/o git) → baked SHA path.""" + from hermes_cli import dump + + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + + with patch("hermes_cli.dump.subprocess.run", side_effect=FileNotFoundError("git")), \ + patch("hermes_cli.build_info.get_build_sha", return_value="feedface"): + commit = dump._get_git_commit(repo_dir) + + assert commit == "feedface" + + +def test_get_git_commit_returns_unknown_when_neither_source_available(tmp_path): + """Pip-installed wheel: no git, no baked SHA → '(unknown)' (legacy contract).""" + from hermes_cli import dump + + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + + failed = MagicMock(returncode=128, stdout="") + with patch("hermes_cli.dump.subprocess.run", return_value=failed), \ + patch("hermes_cli.build_info.get_build_sha", return_value=None): + commit = dump._get_git_commit(repo_dir) + + assert commit == "(unknown)" + + +def test_get_git_commit_output_format_identical_between_sources(tmp_path): + """Regression guard: live-git and baked-SHA outputs share the same shape. + + Ben explicitly asked for identical output between Docker and source installs + so support tooling that parses ``hermes dump`` doesn't have to special-case + container builds. Both paths must return a bare 8-char SHA — no prefix, + no suffix, no annotation. + """ + from hermes_cli import dump + + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + + # Live-git path. + git_result = MagicMock(returncode=0, stdout="b2f477a3\n") + with patch("hermes_cli.dump.subprocess.run", return_value=git_result): + live = dump._get_git_commit(repo_dir) + + # Baked-SHA path. + failed = MagicMock(returncode=128, stdout="") + with patch("hermes_cli.dump.subprocess.run", return_value=failed), \ + patch("hermes_cli.build_info.get_build_sha", return_value="b2f477a3"): + baked = dump._get_git_commit(repo_dir) + + assert live == baked == "b2f477a3" + # Same length, same charset — no decoration in either branch. + assert len(live) == 8 + assert all(c in "0123456789abcdef" for c in live)