fix(s6): make profile gateway log parent writable (#46291)

* fix(gateway): chown logs/gateways parent so late-added profiles can log

The per-profile log service script created $HERMES_HOME/logs/gateways/
via 'mkdir -p' but only chowned the leaf logs/gateways/<profile>. When
the first log service boots in root context, the gateways/ parent stays
root:root; every profile registered later runs its log service as the
dropped hermes user, 'mkdir -p' fails with EACCES, and s6-log enters a
sub-second fatal crash-loop flooding the container log. The stage2
recursive heal does not catch it either: it is gated on needs_chown,
which is false when the top-level $HERMES_HOME is already hermes-owned.

Two complementary fixes:

- service_manager._render_log_run: chown the gateways/ parent
  (non-recursively) before the leaf chown. Runs on every root-context
  boot, so it also heals volumes already poisoned by older images.
- docker/stage2-hook.sh: seed logs/gateways in the as_hermes mkdir -p
  block; cont-init runs before any service starts, so the parent
  already exists hermes-owned when the first log/run does 'mkdir -p'.

The needs_chown repair loop needs no twin entry: it already chowns
logs/ recursively, which covers logs/gateways.

Fixes #45258

* chore(release): map salvaged contributor

---------

Co-authored-by: tangtaizhong666 <tangtaizhong792@gmail.com>
This commit is contained in:
Teknium 2026-06-14 20:47:05 -07:00 committed by GitHub
parent f795513782
commit 61ee2dbfdb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 106 additions and 0 deletions

View file

@ -316,6 +316,7 @@ as_hermes mkdir -p \
"$HERMES_HOME/cron" \
"$HERMES_HOME/sessions" \
"$HERMES_HOME/logs" \
"$HERMES_HOME/logs/gateways" \
"$HERMES_HOME/hooks" \
"$HERMES_HOME/memories" \
"$HERMES_HOME/skills" \

View file

@ -689,6 +689,15 @@ class S6ServiceManager:
f': "${{HERMES_HOME:=/opt/data}}"\n'
f'log_dir="$HERMES_HOME/logs/gateways/{prof}"\n'
f'mkdir -p "$log_dir"\n'
# The gateways/ parent must be chowned too (non-recursively):
# `mkdir -p` creates it root-owned on a root-context boot, and a
# leaf-only chown leaves it that way — every profile registered
# later then runs its log service as hermes and crash-loops on
# `mkdir: Permission denied`. The parent chown runs on every
# root-context boot, so it also heals volumes already poisoned
# by older images. Non-recursive on purpose: sibling profile
# dirs are each managed by their own log/run. See #45258.
f'chown hermes:hermes "$HERMES_HOME/logs/gateways" 2>/dev/null || true\n'
f'chown -R hermes:hermes "$log_dir" 2>/dev/null || true\n'
# Skip the drop when already non-root (CAP_SETGID).
f'[ "$(id -u)" = 0 ] || exec s6-log 1 n10 s1000000 T "$log_dir"\n'

View file

@ -84,6 +84,7 @@ AUTHOR_MAP = {
"290859878+synapsesx@users.noreply.github.com": "synapsesx",
"157689911+itsflownium@users.noreply.github.com": "itsflownium",
"dirtyren@users.noreply.github.com": "dirtyren",
"tangtaizhong792@gmail.com": "tangtaizong666",
"github@aldo.pw": "aldoeliacim",
"max@c60spaceship.com": "MaxFreedomPollard",
"achaljhawar03@gmail.com": "achaljhawar",

View file

@ -950,3 +950,38 @@ def test_s6_stop_tolerates_marker_write_failure(monkeypatch, s6_scandir):
mgr.stop("gateway-coder") # must not raise
assert any(cmd[0] == "s6-svc" and "-d" in cmd for cmd in svc_calls)
def test_s6_log_run_chowns_gateways_parent(s6_scandir, fake_subprocess_run) -> None:
"""The log/run script must chown the logs/gateways/ parent, not just the leaf.
Regression guard for #45258: `mkdir -p` creates the gateways/ parent
root-owned on a root-context boot, and a leaf-only chown leaves it that
way. Every profile registered later then runs its log service as the
dropped hermes user and s6-log crash-loops on `mkdir: Permission denied`.
"""
mgr = S6ServiceManager(scandir=s6_scandir)
mgr.register_profile_gateway("coder")
log_text = (s6_scandir / "gateway-coder" / "log" / "run").read_text()
parent_chown = 'chown hermes:hermes "$HERMES_HOME/logs/gateways"'
assert parent_chown in log_text, (
"log/run must chown the logs/gateways parent so profiles added "
f"after a root-context boot can create their leaf dirs. Saw: {log_text!r}"
)
# Non-recursive on purpose: sibling profile leaf dirs are each managed
# by their own log/run; a recursive parent chown would race them.
assert 'chown -R hermes:hermes "$HERMES_HOME/logs/gateways"' not in log_text
# Ordering: mkdir creates the parent, then the parent chown repairs its
# ownership, then the leaf chown — all before s6-log execs.
mkdir_idx = log_text.index('mkdir -p "$log_dir"')
parent_idx = log_text.index(parent_chown)
leaf_idx = log_text.index('chown -R hermes:hermes "$log_dir"')
exec_idx = log_text.index("s6-log 1 ")
assert mkdir_idx < parent_idx < leaf_idx < exec_idx
# The parent path must be a runtime env expansion, never a baked-in
# absolute path (same contract as the log_dir itself).
assert '/opt/data/logs/gateways"' not in log_text

View file

@ -0,0 +1,60 @@
"""Contract test: the s6-overlay stage2 hook seeds $HERMES_HOME/logs/gateways
as the hermes user.
Regression guard for #45258: the per-profile gateway log service
(`gateway-<profile>/log/run`) creates `logs/gateways/` via `mkdir -p` but only
chowns the leaf `logs/gateways/<profile>`. If the first log service to boot
runs in root context, the `gateways/` parent is created root-owned and stays
that way; every profile registered later runs its log service as the dropped
hermes user and s6-log crash-loops on `mkdir: Permission denied`.
Seeding `logs/gateways` in stage2 (cont-init runs before any service starts)
guarantees the parent already exists hermes-owned by the time the first
log/run executes its `mkdir -p`.
"""
from __future__ import annotations
import re
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
@pytest.fixture(scope="module")
def stage2_text() -> str:
if not STAGE2_HOOK.exists():
pytest.skip("docker/stage2-hook.sh not present in this checkout")
return STAGE2_HOOK.read_text()
def _seed_mkdir_block(text: str) -> str:
"""Extract the `as_hermes mkdir -p \\ ...` seed block."""
m = re.search(r"as_hermes mkdir -p \\\n(?:[^\n]*\\\n)*[^\n]*\n", text)
assert m, "stage2-hook.sh must contain the as_hermes mkdir -p seed block"
return m.group(0)
def test_logs_gateways_is_seeded(stage2_text: str) -> None:
block = _seed_mkdir_block(stage2_text)
assert '"$HERMES_HOME/logs/gateways"' in block, (
"logs/gateways must be seeded hermes-owned in stage2 so profiles "
"added after first boot can create their log dirs (#45258)"
)
# The parent must also be seeded so mkdir -p inside the block never
# creates logs/ implicitly with surprising ownership.
assert '"$HERMES_HOME/logs"' in block
def test_logs_subtree_is_healed_when_chown_needed(stage2_text: str) -> None:
"""The needs_chown repair loop must cover the logs subtree recursively —
that is what makes the seed entry above sufficient (no separate
logs/gateways loop entry needed)."""
m = re.search(r"for sub in ([^;]*); do", stage2_text)
assert m, "stage2-hook.sh must contain the needs_chown subdir repair loop"
assert "logs" in m.group(1).split(), (
"the needs_chown loop must recursively chown logs/ — it covers "
"logs/gateways, so the seed list does not need a loop twin"
)