From 61ee2dbfdb407af8a5a649683dc4966272a94b53 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 14 Jun 2026 20:47:05 -0700 Subject: [PATCH] fix(s6): make profile gateway log parent writable (#46291) * fix(gateway): chown logs/gateways parent so late-added profiles can log The per-profile log service script created $HERMES_HOME/logs/gateways/ via 'mkdir -p' but only chowned the leaf logs/gateways/. When the first log service boots in root context, the gateways/ parent stays root:root; every profile registered later runs its log service as the dropped hermes user, 'mkdir -p' fails with EACCES, and s6-log enters a sub-second fatal crash-loop flooding the container log. The stage2 recursive heal does not catch it either: it is gated on needs_chown, which is false when the top-level $HERMES_HOME is already hermes-owned. Two complementary fixes: - service_manager._render_log_run: chown the gateways/ parent (non-recursively) before the leaf chown. Runs on every root-context boot, so it also heals volumes already poisoned by older images. - docker/stage2-hook.sh: seed logs/gateways in the as_hermes mkdir -p block; cont-init runs before any service starts, so the parent already exists hermes-owned when the first log/run does 'mkdir -p'. The needs_chown repair loop needs no twin entry: it already chowns logs/ recursively, which covers logs/gateways. Fixes #45258 * chore(release): map salvaged contributor --------- Co-authored-by: tangtaizhong666 --- docker/stage2-hook.sh | 1 + hermes_cli/service_manager.py | 9 +++ scripts/release.py | 1 + tests/hermes_cli/test_service_manager.py | 35 ++++++++++++ tests/tools/test_stage2_hook_log_dir_seed.py | 60 ++++++++++++++++++++ 5 files changed, 106 insertions(+) create mode 100644 tests/tools/test_stage2_hook_log_dir_seed.py diff --git a/docker/stage2-hook.sh b/docker/stage2-hook.sh index a63879ea34c..b00c15a7081 100755 --- a/docker/stage2-hook.sh +++ b/docker/stage2-hook.sh @@ -316,6 +316,7 @@ as_hermes mkdir -p \ "$HERMES_HOME/cron" \ "$HERMES_HOME/sessions" \ "$HERMES_HOME/logs" \ + "$HERMES_HOME/logs/gateways" \ "$HERMES_HOME/hooks" \ "$HERMES_HOME/memories" \ "$HERMES_HOME/skills" \ diff --git a/hermes_cli/service_manager.py b/hermes_cli/service_manager.py index 129e66e506f..fee11a5e286 100644 --- a/hermes_cli/service_manager.py +++ b/hermes_cli/service_manager.py @@ -689,6 +689,15 @@ class S6ServiceManager: f': "${{HERMES_HOME:=/opt/data}}"\n' f'log_dir="$HERMES_HOME/logs/gateways/{prof}"\n' f'mkdir -p "$log_dir"\n' + # The gateways/ parent must be chowned too (non-recursively): + # `mkdir -p` creates it root-owned on a root-context boot, and a + # leaf-only chown leaves it that way — every profile registered + # later then runs its log service as hermes and crash-loops on + # `mkdir: Permission denied`. The parent chown runs on every + # root-context boot, so it also heals volumes already poisoned + # by older images. Non-recursive on purpose: sibling profile + # dirs are each managed by their own log/run. See #45258. + f'chown hermes:hermes "$HERMES_HOME/logs/gateways" 2>/dev/null || true\n' f'chown -R hermes:hermes "$log_dir" 2>/dev/null || true\n' # Skip the drop when already non-root (CAP_SETGID). f'[ "$(id -u)" = 0 ] || exec s6-log 1 n10 s1000000 T "$log_dir"\n' diff --git a/scripts/release.py b/scripts/release.py index 6fe9ebb051c..83fd033233d 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -84,6 +84,7 @@ AUTHOR_MAP = { "290859878+synapsesx@users.noreply.github.com": "synapsesx", "157689911+itsflownium@users.noreply.github.com": "itsflownium", "dirtyren@users.noreply.github.com": "dirtyren", + "tangtaizhong792@gmail.com": "tangtaizong666", "github@aldo.pw": "aldoeliacim", "max@c60spaceship.com": "MaxFreedomPollard", "achaljhawar03@gmail.com": "achaljhawar", diff --git a/tests/hermes_cli/test_service_manager.py b/tests/hermes_cli/test_service_manager.py index bf157ef25b6..3e1f9bbece7 100644 --- a/tests/hermes_cli/test_service_manager.py +++ b/tests/hermes_cli/test_service_manager.py @@ -950,3 +950,38 @@ def test_s6_stop_tolerates_marker_write_failure(monkeypatch, s6_scandir): mgr.stop("gateway-coder") # must not raise assert any(cmd[0] == "s6-svc" and "-d" in cmd for cmd in svc_calls) + + +def test_s6_log_run_chowns_gateways_parent(s6_scandir, fake_subprocess_run) -> None: + """The log/run script must chown the logs/gateways/ parent, not just the leaf. + + Regression guard for #45258: `mkdir -p` creates the gateways/ parent + root-owned on a root-context boot, and a leaf-only chown leaves it that + way. Every profile registered later then runs its log service as the + dropped hermes user and s6-log crash-loops on `mkdir: Permission denied`. + """ + mgr = S6ServiceManager(scandir=s6_scandir) + mgr.register_profile_gateway("coder") + + log_text = (s6_scandir / "gateway-coder" / "log" / "run").read_text() + + parent_chown = 'chown hermes:hermes "$HERMES_HOME/logs/gateways"' + assert parent_chown in log_text, ( + "log/run must chown the logs/gateways parent so profiles added " + f"after a root-context boot can create their leaf dirs. Saw: {log_text!r}" + ) + # Non-recursive on purpose: sibling profile leaf dirs are each managed + # by their own log/run; a recursive parent chown would race them. + assert 'chown -R hermes:hermes "$HERMES_HOME/logs/gateways"' not in log_text + + # Ordering: mkdir creates the parent, then the parent chown repairs its + # ownership, then the leaf chown — all before s6-log execs. + mkdir_idx = log_text.index('mkdir -p "$log_dir"') + parent_idx = log_text.index(parent_chown) + leaf_idx = log_text.index('chown -R hermes:hermes "$log_dir"') + exec_idx = log_text.index("s6-log 1 ") + assert mkdir_idx < parent_idx < leaf_idx < exec_idx + + # The parent path must be a runtime env expansion, never a baked-in + # absolute path (same contract as the log_dir itself). + assert '/opt/data/logs/gateways"' not in log_text diff --git a/tests/tools/test_stage2_hook_log_dir_seed.py b/tests/tools/test_stage2_hook_log_dir_seed.py new file mode 100644 index 00000000000..a0affa34bc3 --- /dev/null +++ b/tests/tools/test_stage2_hook_log_dir_seed.py @@ -0,0 +1,60 @@ +"""Contract test: the s6-overlay stage2 hook seeds $HERMES_HOME/logs/gateways +as the hermes user. + +Regression guard for #45258: the per-profile gateway log service +(`gateway-/log/run`) creates `logs/gateways/` via `mkdir -p` but only +chowns the leaf `logs/gateways/`. If the first log service to boot +runs in root context, the `gateways/` parent is created root-owned and stays +that way; every profile registered later runs its log service as the dropped +hermes user and s6-log crash-loops on `mkdir: Permission denied`. + +Seeding `logs/gateways` in stage2 (cont-init runs before any service starts) +guarantees the parent already exists hermes-owned by the time the first +log/run executes its `mkdir -p`. +""" +from __future__ import annotations + +import re +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[2] +STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh" + + +@pytest.fixture(scope="module") +def stage2_text() -> str: + if not STAGE2_HOOK.exists(): + pytest.skip("docker/stage2-hook.sh not present in this checkout") + return STAGE2_HOOK.read_text() + + +def _seed_mkdir_block(text: str) -> str: + """Extract the `as_hermes mkdir -p \\ ...` seed block.""" + m = re.search(r"as_hermes mkdir -p \\\n(?:[^\n]*\\\n)*[^\n]*\n", text) + assert m, "stage2-hook.sh must contain the as_hermes mkdir -p seed block" + return m.group(0) + + +def test_logs_gateways_is_seeded(stage2_text: str) -> None: + block = _seed_mkdir_block(stage2_text) + assert '"$HERMES_HOME/logs/gateways"' in block, ( + "logs/gateways must be seeded hermes-owned in stage2 so profiles " + "added after first boot can create their log dirs (#45258)" + ) + # The parent must also be seeded so mkdir -p inside the block never + # creates logs/ implicitly with surprising ownership. + assert '"$HERMES_HOME/logs"' in block + + +def test_logs_subtree_is_healed_when_chown_needed(stage2_text: str) -> None: + """The needs_chown repair loop must cover the logs subtree recursively — + that is what makes the seed entry above sufficient (no separate + logs/gateways loop entry needed).""" + m = re.search(r"for sub in ([^;]*); do", stage2_text) + assert m, "stage2-hook.sh must contain the needs_chown subdir repair loop" + assert "logs" in m.group(1).split(), ( + "the needs_chown loop must recursively chown logs/ — it covers " + "logs/gateways, so the seed list does not need a loop twin" + )