From b34532319548708160f1ebccb525e9bb7436eb0b Mon Sep 17 00:00:00 2001 From: Ben Barclay Date: Thu, 28 May 2026 13:06:11 +1000 Subject: [PATCH] fix(docker): tee supervised gateway stdout to docker logs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #33583 (the gateway-run-supervised redirect). Before this fix, the supervised gateway's stdout (most visibly the "Hermes Gateway Starting…" rich-console banner) was swallowed by `s6-log` into the rotated file at `${HERMES_HOME}/logs/gateways//current` and never reached `docker logs`. Operational signal lived in two places: * **docker logs** — saw stderr (Python `logging` defaults to stderr), so warnings/errors were visible. * **the rotated file** — saw stdout (rich banners, `print()` output, third-party libs that wrote to fd 1). This was surprising for users coming from the pre-s6 image, where `docker run … gateway run` produced a single unified stream in `docker logs`. They'd see partial output, conclude something was broken, and dig around for the missing pieces. Fix: add the `1` s6-log action directive before the file destination so each line is forwarded to s6-log's stdout — which propagates up the s6-supervise pipeline to /init's stdout = container stdout = `docker logs`. The file destination is preserved as a second destination, so the rotated log (with ISO 8601 timestamps) still exists for `hermes logs` and for survival across container restarts. Trade-off considered: timestamps. Putting `T` between `1` and the file destination (not before `1`) means: * docker logs sees raw lines — Python's logging formatter has its own timestamps, and `docker logs --timestamps` adds another layer when desired. No double-stamping in the common reading path. * The persisted file gets s6-log's ISO 8601 timestamp so even output that lacked a Python-logger timestamp (rich banners, third-party raw prints) is correlatable in `current`. Verification: * New unit-test assertion in `test_service_manager.py` locks the `s6-log 1` directive into the rendered run-script. Mutation- tested by reverting to the pre-fix script (no `1`); the assert catches it cleanly. * New docker-harness test `test_supervised_gateway_stdout_reaches_docker_logs` builds the image, runs `docker run … gateway run`, and asserts the unique `⚕` banner glyph reaches `docker logs`. Also verifies the rotated file still contains the banner (no regression on the existing file destination). Mutation-tested end-to-end: built a deliberately-broken image without the `1` directive and the test failed exactly as designed, citing the banner present in `current` but absent from `docker logs`. * `website/docs/user-guide/docker.md` gains a new `:::note Where gateway logs go` admonition documenting both destinations and the audit-log file at `${HERMES_HOME}/logs/container-boot.log`. Existing functionality preserved: every other docker-harness test still passes against the new image. Unit-test sweep across `tests/hermes_cli/` (5561 tests) is green. --- hermes_cli/service_manager.py | 34 ++++++++++- tests/docker/test_gateway_run_supervised.py | 66 +++++++++++++++++++++ tests/hermes_cli/test_service_manager.py | 10 ++++ website/docs/user-guide/docker.md | 9 +++ 4 files changed, 118 insertions(+), 1 deletion(-) diff --git a/hermes_cli/service_manager.py b/hermes_cli/service_manager.py index b894ca764a8..1037760b69b 100644 --- a/hermes_cli/service_manager.py +++ b/hermes_cli/service_manager.py @@ -628,6 +628,38 @@ class S6ServiceManager: — so a container started with ``-e HERMES_HOME=/data/hermes`` gets its logs under /data/hermes/logs/..., not the build-time default. + + Output routing — the script is two action directives, applied + per line, in order: + + 1. ``1`` (forward to stdout) — propagates the line up the + s6-supervise pipeline to /init's stdout, which is the + container's stdout, which is ``docker logs``. Without + this, supervised stdout would be terminated inside + s6-log and never reach the container's log stream; + users would have to ``docker exec`` and ``tail`` the + file just to see startup banners. (Python's ``logging`` + module defaults to stderr, which s6-supervise leaves + unfiltered — so warnings/errors already reach docker + logs. This change is specifically about the rich-console + banner output and other plain stdout writes.) + 2. ``T `` — also write a timestamped copy to the + rotated log directory (``current`` + archived ``@*.s`` + files). This is what ``hermes logs`` reads and what + persists across container restarts via the volume mount. + + ``T`` is non-sticky: it only prefixes lines for the next + action directive. We deliberately put ``T`` between ``1`` + and the log dir (not before ``1``) so: + + * ``docker logs`` shows raw lines — Python's logging + formatter has its own timestamps, and ``docker logs + --timestamps`` adds a third layer when desired. No + double-stamping in the most common reading path. + * The persisted file gets s6-log's own ISO 8601 timestamp + so even output that lacked a Python-logger timestamp + (rich banners, third-party libs' raw prints) is + correlatable in ``current``. """ import shlex prof = shlex.quote(profile) @@ -638,7 +670,7 @@ class S6ServiceManager: f'log_dir="$HERMES_HOME/logs/gateways/{prof}"\n' f'mkdir -p "$log_dir"\n' f'chown -R hermes:hermes "$log_dir" 2>/dev/null || true\n' - f'exec s6-setuidgid hermes s6-log n10 s1000000 T "$log_dir"\n' + f'exec s6-setuidgid hermes s6-log 1 n10 s1000000 T "$log_dir"\n' ) # -- lifecycle --------------------------------------------------------- diff --git a/tests/docker/test_gateway_run_supervised.py b/tests/docker/test_gateway_run_supervised.py index aec2257b0e2..91314d5b2f1 100644 --- a/tests/docker/test_gateway_run_supervised.py +++ b/tests/docker/test_gateway_run_supervised.py @@ -327,3 +327,69 @@ def test_dashboard_supervised_when_env_set( assert _svstat_wants_up(container_name, "dashboard"), ( f"dashboard slot not up: {_svstat(container_name, 'dashboard')!r}" ) + + +def test_supervised_gateway_stdout_reaches_docker_logs( + built_image: str, container_name: str, +) -> None: + """The supervised gateway's stdout — including the rich-console + startup banner — must reach ``docker logs``, not just the rotated + log file under ``${HERMES_HOME}/logs/gateways//current``. + + Without the ``1`` action directive in ``_render_log_run``, s6-log + swallows the gateway's stdout into the file and ``docker logs`` + only sees stderr (Python ``logging`` defaults to stderr). That's + a poor user experience: the iconic "Hermes Gateway Starting…" + banner with the ⚕ symbol is the most visible "yes, your gateway + started" signal, and forcing users to ``docker exec`` + ``tail`` + the log file just to see it is friction users don't expect. + + With the ``1`` directive, s6-log forwards every line to its own + stdout (which propagates up through the s6-supervise pipeline to + /init's stdout = container stdout = ``docker logs``) AND also + writes a timestamped copy to the rotated file. Best of both. + + We assert by looking for the literal banner glyph (``⚕``) — a + distinctive character that won't appear in stderr-routed + Python-logging output, so its presence in ``docker logs`` proves + the stdout-tee is working. + """ + subprocess.run( + ["docker", "run", "-d", "--name", container_name, built_image, + "gateway", "run"], + check=True, capture_output=True, timeout=30, + ) + # Banner is printed during gateway startup — give it time to + # initialize past the imports + config-load phase. + time.sleep(8) + + logs = subprocess.run( + ["docker", "logs", container_name], + capture_output=True, text=True, timeout=10, + ) + combined = logs.stdout + logs.stderr + + # The banner ⚕ symbol is the load-bearing assertion — it's unique + # to gateway startup stdout output and won't appear in stderr + # (Python logging) or s6 boot messages. + assert "⚕" in combined or "Hermes Gateway Starting" in combined, ( + "Supervised gateway's stdout banner did not reach docker logs. " + "This means the `1` action directive in _render_log_run isn't " + "forwarding stdout to /init. " + f"docker logs (last 2000 chars):\n{combined[-2000:]}\n" + f"file contents:\n{_sh(container_name, 'cat /opt/data/logs/gateways/default/current').stdout}" + ) + + # Cross-check: the same banner must also be in the rotated log + # file (we kept the file destination, just added stdout). The + # file version has s6-log's ISO 8601 timestamp prefix; the + # docker logs version is raw. + file_contents = _sh( + container_name, "cat /opt/data/logs/gateways/default/current", + ).stdout + assert "⚕" in file_contents or "Hermes Gateway Starting" in file_contents, ( + "Banner also missing from rotated log file — the file " + "destination may have been dropped by the new s6-log script. " + f"File contents:\n{file_contents}" + ) + diff --git a/tests/hermes_cli/test_service_manager.py b/tests/hermes_cli/test_service_manager.py index c627a9044fa..a5e2913bb4b 100644 --- a/tests/hermes_cli/test_service_manager.py +++ b/tests/hermes_cli/test_service_manager.py @@ -555,6 +555,16 @@ def test_s6_register_creates_service_dir_and_triggers_scan( assert "/opt/data/logs/gateways/coder" not in log_text, ( "log_dir was hard-coded; must use ${HERMES_HOME} at run time" ) + # `1` action directive forwards lines to stdout BEFORE the file + # destination so the supervised gateway's stdout (including the + # rich-console banner and plain print() output) reaches docker + # logs, not just the rotated file. See _render_log_run's docstring + # for the full output-routing rationale. + assert "s6-log 1 " in log_text, ( + "log/run must include the `1` action directive before the file " + "destination so supervised stdout reaches docker logs. Saw: " + f"{log_text!r}" + ) # s6-svscanctl -a was invoked against the scandir assert any( diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md index fefde13a5b6..c81cadbf588 100644 --- a/website/docs/user-guide/docker.md +++ b/website/docs/user-guide/docker.md @@ -49,6 +49,15 @@ You'll see a one-line breadcrumb in `docker logs` confirming the upgrade. To opt This behavior applies to the s6-based image only. Earlier (tini-based) images still run `gateway run` as the foreground main process. ::: +:::note Where gateway logs go +Inside the s6 image, the supervised gateway's output is tee'd to two destinations: + +- **`docker logs `** — every line in real time (raw, no extra prefix). This is the same stream you'd get from a foreground gateway, so existing `docker logs --follow` / `--timestamps` / log-shipper integrations work unchanged. +- **`${HERMES_HOME}/logs/gateways//current`** (mapped to `~/.hermes/logs/gateways//current` on the host via the volume mount) — rotated, with an ISO 8601 timestamp prepended per line. Rotation is 10 archives × 1 MB each, so it can't fill the disk. This is what `hermes logs` reads and what survives container restarts. + +The per-profile reconciler keeps a separate audit log at `${HERMES_HOME}/logs/container-boot.log` — one line per profile per container boot, recording whether each gateway was restored to its prior state. +::: + Note: the API server is gated on `API_SERVER_ENABLED=true`. To expose it beyond `127.0.0.1` inside the container, also set `API_SERVER_HOST=0.0.0.0` and an `API_SERVER_KEY` (minimum 8 characters — generate one with `openssl rand -hex 32`). Example: ```sh