diff --git a/Dockerfile b/Dockerfile index c51bca29e58..be4e8848bb5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -182,8 +182,8 @@ RUN mkdir -p /etc/cont-init.d && \ printf '#!/bin/sh\nexec /opt/hermes/docker/stage2-hook.sh\n' \ > /etc/cont-init.d/01-hermes-setup && \ chmod +x /etc/cont-init.d/01-hermes-setup +COPY --chmod=0755 docker/cont-init.d/015-supervise-perms /etc/cont-init.d/015-supervise-perms COPY --chmod=0755 docker/cont-init.d/02-reconcile-profiles /etc/cont-init.d/02-reconcile-profiles -COPY --chmod=0755 docker/cont-init.d/03-dashboard-toggle /etc/cont-init.d/03-dashboard-toggle # ---------- Runtime ---------- ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist diff --git a/docker/cont-init.d/015-supervise-perms b/docker/cont-init.d/015-supervise-perms new file mode 100644 index 00000000000..8d7b473d29c --- /dev/null +++ b/docker/cont-init.d/015-supervise-perms @@ -0,0 +1,90 @@ +#!/command/with-contenv sh +# shellcheck shell=sh +# Make supervise/ trees for ALL declared s6 services queryable and +# controllable by the unprivileged hermes user (UID 10000). +# +# Background (PR #30136 review item I4): the entire s6 lifecycle +# (s6-svc, s6-svstat, s6-svwait) is dispatched as the hermes user +# inside the container (every Hermes runtime path runs under +# ``s6-setuidgid hermes``). But s6-supervise creates each service's +# ``supervise/`` and top-level ``event/`` directory with mode 0700 +# owned by its effective UID — which is root, because s6-supervise +# is spawned by s6-svscan running as PID 1. So unprivileged clients +# get EACCES on every probe / control call against the slot. +# +# Two fixes, one in each registration path: +# +# 1. For RUNTIME-registered profile gateways (created via the s6 +# runtime register hooks in profiles.py): the Python helper +# ``_seed_supervise_skeleton`` pre-creates supervise/ + event/ + +# supervise/control owned by hermes BEFORE s6-svscanctl -a fires. +# s6-supervise's mkdir/mkfifo are EEXIST-safe, so it inherits our +# ownership and never tries to chown back to root. +# +# 2. For STATIC s6-rc services (dashboard, main-hermes) declared at +# image-build time under /etc/s6-overlay/s6-rc.d/*: these are +# compiled by s6-rc at boot, and s6-supervise spawns BEFORE +# cont-init.d gets to run — so by the time we're here, the +# supervise/ tree is already there as root:root 0700. We chown +# it here. s6-supervise will keep using the same files; it never +# re-asserts ownership on a running service. +# +# This script runs as root after 01-hermes-setup but before +# 02-reconcile-profiles, so the chowns are settled before the +# Python reconciler walks the scandir. Lexicographic ordering +# guarantees this — the suffix is unusual because we want to slot +# in between 01 and the existing 02-reconcile-profiles without +# renumbering both (which would be a churn-noise patch on its own). + +set -eu + +# /run/s6-rc/servicedirs holds the live, compiled service directories +# for every static (s6-rc) service. Symlinks under /run/service/* +# point here. Per-service supervise/ + event/ both need hermes +# ownership for s6-svstat etc. to work as hermes. +SVC_ROOT=/run/s6-rc/servicedirs + +if [ ! -d "$SVC_ROOT" ]; then + echo "[supervise-perms] $SVC_ROOT not present; skipping" + exit 0 +fi + +for svc in "$SVC_ROOT"/*; do + [ -d "$svc" ] || continue + name=$(basename "$svc") + + # Skip s6-overlay-internal services (they need to stay root-only; + # the s6rc-* helpers manage the supervision tree itself). + case "$name" in + s6rc-*|s6-linux-*) + continue + ;; + esac + + # supervise/ tree — needed by s6-svstat / s6-svc. + if [ -d "$svc/supervise" ]; then + chown -R hermes:hermes "$svc/supervise" 2>/dev/null || \ + echo "[supervise-perms] could not chown $svc/supervise" + # 0710 = group searchable. ``s6-svstat`` only needs to openat + # status, not list the dir, but giving the hermes group +x is + # the minimum that lets group members access the contents. + chmod 0710 "$svc/supervise" 2>/dev/null || true + # supervise/control is a FIFO that s6-svc writes commands + # into; the hermes user needs +w. Owner is already hermes + # after the recursive chown above; widen perms to 0660 so + # ``s6-svc`` works for any member of the hermes group too. + if [ -p "$svc/supervise/control" ]; then + chmod 0660 "$svc/supervise/control" 2>/dev/null || true + fi + fi + + # Top-level event/ dir — s6-svlisten1 / s6-svwait subscribe here. + if [ -d "$svc/event" ]; then + chown hermes:hermes "$svc/event" 2>/dev/null || \ + echo "[supervise-perms] could not chown $svc/event" + # Preserve s6's 03730 mode (setgid + g+rwx + sticky). + chmod 03730 "$svc/event" 2>/dev/null || true + fi +done + +echo "[supervise-perms] chowned supervise/ trees for static s6-rc services" diff --git a/docker/cont-init.d/03-dashboard-toggle b/docker/cont-init.d/03-dashboard-toggle deleted file mode 100755 index 59095f9c534..00000000000 --- a/docker/cont-init.d/03-dashboard-toggle +++ /dev/null @@ -1,55 +0,0 @@ -#!/command/with-contenv sh -# shellcheck shell=sh -# Toggle the dashboard s6-rc service slot based on HERMES_DASHBOARD. -# -# Runs as root in cont-init.d, after 01-hermes-setup (stage2) and -# 02-reconcile-profiles, BEFORE s6-rc starts user services. -# -# Background (PR #30136 review item I3): the dashboard service was -# always declared as an s6-rc longrun, with its run script checking -# HERMES_DASHBOARD and `exec sleep infinity` when unset. Trouble: -# s6-svstat then reports the dashboard slot as "up" (because sleep -# IS running) even though no dashboard process exists. `hermes -# doctor` and any other s6-svstat-based health check sees a -# false-positive up-state. -# -# Fix: write a `down` marker file into the live service-dir when -# HERMES_DASHBOARD is unset / falsy. s6-supervise honors `down` by -# not starting the service at all, so s6-svstat reports `down` — -# matching reality. -# -# The run script's HERMES_DASHBOARD case-statement stays in place -# as a belt-and-suspenders guard: even if the down marker is -# removed at runtime and the service is brought up, the run script -# still bails when HERMES_DASHBOARD is unset. Both layers agree. - -set -eu - -# Live service directory for the dashboard longrun. s6-overlay -# compiles /etc/s6-overlay/s6-rc.d/dashboard/ into this location -# at boot, before cont-init.d scripts run. -DASHBOARD_LIVE_DIR="/run/service/dashboard" - -# If the live directory hasn't materialized yet (e.g. running in a -# stripped-down test image), nothing to do — the run script's env -# check still keeps things safe. -if [ ! -d "$DASHBOARD_LIVE_DIR" ]; then - echo "[dashboard-toggle] $DASHBOARD_LIVE_DIR not present; skipping" - exit 0 -fi - -case "${HERMES_DASHBOARD:-}" in - 1|true|TRUE|True|yes|YES|Yes) - # Enabled — remove any leftover down marker from a previous boot. - if [ -e "$DASHBOARD_LIVE_DIR/down" ]; then - rm -f "$DASHBOARD_LIVE_DIR/down" - echo "[dashboard-toggle] HERMES_DASHBOARD enabled; removed down marker" - fi - ;; - *) - # Disabled — write a down marker so s6-supervise won't start - # the service. s6-svstat will report it as down, matching reality. - touch "$DASHBOARD_LIVE_DIR/down" - echo "[dashboard-toggle] HERMES_DASHBOARD unset; marked dashboard slot down" - ;; -esac diff --git a/docker/s6-rc.d/dashboard/finish b/docker/s6-rc.d/dashboard/finish new file mode 100755 index 00000000000..a618c671bc8 --- /dev/null +++ b/docker/s6-rc.d/dashboard/finish @@ -0,0 +1,30 @@ +#!/command/with-contenv sh +# shellcheck shell=sh +# Dashboard finish script. Companion to ./run. +# +# When HERMES_DASHBOARD is unset (or falsy), ./run exits 0 immediately. +# Without this finish script, s6-supervise would just restart the run +# script in a tight loop. By exiting 125 here, we tell s6-supervise +# "this service has permanently failed; do not restart" — equivalent +# to `s6-svc -O`. The supervise slot reports as down, matching reality +# (no dashboard process is running). +# +# When HERMES_DASHBOARD IS enabled and the run script later exits or +# is killed, we want s6-supervise to restart it (the whole point of +# supervised lifecycle). So we exit non-125 in that case. + +# Arguments passed to a finish script: $1=run-exit-code, $2=signal-num, +# $3=service-dir-name, $4=run-pgid. See servicedir(7). + +case "${HERMES_DASHBOARD:-}" in + 1|true|TRUE|True|yes|YES|Yes) + # Dashboard was enabled — let s6-supervise restart on crash by + # exiting non-125. (Pass-through any sensible default.) + exit 0 + ;; + *) + # Dashboard disabled — permanent-failure marker so s6-supervise + # leaves the slot in 'down' state and s6-svstat reflects that. + exit 125 + ;; +esac \ No newline at end of file diff --git a/docker/s6-rc.d/dashboard/run b/docker/s6-rc.d/dashboard/run index 62ffac37a87..a48e8995dfc 100755 --- a/docker/s6-rc.d/dashboard/run +++ b/docker/s6-rc.d/dashboard/run @@ -1,12 +1,22 @@ #!/command/with-contenv sh # shellcheck shell=sh # Dashboard service. Always declared so s6 has a supervised slot; if -# HERMES_DASHBOARD isn't set to a truthy value we sleep forever and do -# nothing. See OQ3-A in the plan. +# HERMES_DASHBOARD isn't truthy the run script exits cleanly and the +# companion finish script returns 125 (s6's "permanent failure, do +# not restart" marker), so s6-svstat reports the slot as down. See +# also docker/s6-rc.d/dashboard/finish. case "${HERMES_DASHBOARD:-}" in 1|true|TRUE|True|yes|YES|Yes) ;; - *) exec sleep infinity ;; + *) + # Exit 0; the finish script will exit 125 → s6-supervise won't + # restart us and the slot reports down. Using a clean exit + # (rather than `exec sleep infinity`) means s6-svstat reflects + # reality: when HERMES_DASHBOARD is unset, the service is NOT + # running, just supervised-with-permanent-failure. See PR + # #30136 review item I3. + exit 0 + ;; esac cd /opt/data diff --git a/hermes_cli/container_boot.py b/hermes_cli/container_boot.py index a40c72de361..739f1e95fc3 100644 --- a/hermes_cli/container_boot.py +++ b/hermes_cli/container_boot.py @@ -193,6 +193,7 @@ def _register_service(scandir: Path, profile: str, *, start: bool) -> None: from hermes_cli.service_manager import ( S6ServiceManager, + _seed_supervise_skeleton, validate_profile_name, ) @@ -232,6 +233,17 @@ def _register_service(scandir: Path, profile: str, *, start: bool) -> None: if not start: (tmp_dir / "down").touch() + # Pre-create the supervise/ skeleton with hermes ownership + # BEFORE we publish the slot. Mirrors the same pre-creation + # step in S6ServiceManager.register_profile_gateway — when + # s6-svscan picks the published slot up, the s6-supervise it + # spawns will EEXIST our dirs/FIFOs and inherit hermes + # ownership, so runtime s6-svc / s6-svstat / s6-svwait calls + # (all dispatched as the hermes user) won't hit EACCES. See + # ``_seed_supervise_skeleton`` in service_manager.py for the + # full rationale. + _seed_supervise_skeleton(tmp_dir) + # Publish atomically. Path.replace handles the existing-target # case the same way os.rename does on POSIX: the target is # silently replaced, so a previous reconcile pass's slot is diff --git a/hermes_cli/service_manager.py b/hermes_cli/service_manager.py index b8c2158b8dc..417ec4ec982 100644 --- a/hermes_cli/service_manager.py +++ b/hermes_cli/service_manager.py @@ -340,6 +340,145 @@ S6_SERVICE_PREFIX = "gateway-" _S6_BIN_DIR = "/command" +# UID/GID of the in-image ``hermes`` user. Hardcoded to match what +# ``stage2-hook.sh`` enforces (the runtime invariant — see also +# tests/docker/test_uid_remap.py). The container starts s6-supervise +# under root and immediately drops to this UID via ``s6-setuidgid``. +_HERMES_UID = 10000 +_HERMES_GID = 10000 + + +def _seed_supervise_skeleton(svc_dir: Path) -> None: + """Pre-create the ``supervise/`` and top-level ``event/`` skeleton + inside a service directory, owned by the hermes user. + + Why this exists + --------------- + When s6-supervise spawns a service it tries to ``mkdir`` two + directories: ``/event`` and ``/supervise``, both with mode + ``0700``. It also ``mkfifo``s ``/supervise/control`` with mode + ``0600``. Because s6-supervise runs as PID 1's effective UID (root) + these dirs end up root-owned mode 0700, and an unprivileged client + (the ``hermes`` user — UID 10000 — running every Hermes runtime + operation via ``s6-setuidgid``) gets ``EACCES`` on any ``s6-svc``, + ``s6-svstat``, or ``s6-svwait`` invocation against the slot. + + The PR #30136 review surfaced this as a real product gap: the + entire S6ServiceManager lifecycle (``register/start/stop/unregister + _profile_gateway``) was inert in production because every operation + is dispatched as the hermes user. + + Why this works + -------------- + Reading s6's source (src/supervision/s6-supervise.c::trymkdir + + control_init): the ``mkdir`` and ``mkfifo`` calls both treat + ``EEXIST`` as success. If the directory is already present, the + chown/chmod fix-up that would normally make event/ ``03730 + root:root`` is **skipped** entirely — s6-supervise just opens the + pre-existing FIFOs and proceeds. So if we lay the skeleton down + with hermes ownership before triggering ``s6-svscanctl -a``, + s6-supervise inherits our layout and never touches it. + + Layout produced + --------------- + ``svc_dir/`` hermes:hermes, 0755 (parent must already exist) + ``svc_dir/event/`` hermes:hermes, 03730 (setgid + g+rwx + sticky) + ``svc_dir/supervise/`` hermes:hermes, 0755 + ``svc_dir/supervise/event/`` hermes:hermes, 03730 + ``svc_dir/supervise/control`` hermes:hermes, 0660 (FIFO) + + The ``death_tally``, ``lock``, and ``status`` regular files end up + written by s6-supervise itself (as root), but those land mode 0644 — + world-readable — and ``s6-svstat`` only needs read access, so the + hermes user reads them fine. + + If ``svc_dir/log/`` is present (the canonical s6 logger pattern — + one s6-supervise instance per service, plus a second for its + logger), the same skeleton is seeded under ``log/`` as well: + ``log/event/``, ``log/supervise/``, ``log/supervise/event/``, + ``log/supervise/control``. Without this, unregister teardown + would EACCES on the logger's supervise dir even after the parent + slot's supervise/ was hermes-owned. + + Idempotency + ----------- + Safe to call against a directory where the skeleton already exists. + Existing entries are left untouched (the helper doesn't try to + re-chown / re-chmod live FIFOs that s6-supervise may have already + opened). + + Reference + --------- + Discussed at length on the skarnet `skaware` mailing list in 2020 + (``_); see also + just-containers/s6-overlay#130. The pre-creation pattern was + historically called out as forward-compatibility-fragile, but the + EEXIST handling in s6-supervise has been stable since 2015 — it's + the same pattern ``s6-svperms`` and ``fix-attrs.d`` rely on. + """ + import os + + def _mkdir_owned(path: Path, mode: int) -> None: + if path.exists(): + return + path.mkdir(parents=False, exist_ok=False) + path.chmod(mode) + try: + os.chown(path, _HERMES_UID, _HERMES_GID) + except PermissionError: + # Running as the hermes user already — directory is hermes- + # owned by default. The chown is a no-op in that case, so + # swallowing this keeps both root and unprivileged callers + # on one code path. + pass + + # Top-level event/ dir (this is the s6-svlisten1 event-subscription + # dir at the service root, distinct from supervise/event/). + _mkdir_owned(svc_dir / "event", 0o3730) + + # supervise/ dir + its inner event/ dir. + supervise = svc_dir / "supervise" + _mkdir_owned(supervise, 0o755) + _mkdir_owned(supervise / "event", 0o3730) + + # supervise/control FIFO. Same EEXIST-safe pattern: if it's already + # there (s6-supervise has already started against this slot), leave + # it alone. The explicit chmod after mkfifo is required because + # mkfifo honors the process umask, which can strip group-write + # (e.g. the default 0022 on most dev hosts → 0o660 becomes 0o640). + # The container runs with umask 0 inside s6-overlay's stage2, but + # being defensive here keeps the helper consistent under any + # invocation context. + control = supervise / "control" + if not control.exists(): + os.mkfifo(control, 0o660) + control.chmod(0o660) + try: + os.chown(control, _HERMES_UID, _HERMES_GID) + except PermissionError: + pass + + # If a log/ subdir is present (the canonical s6 logger pattern — + # see servicedir(7)), it gets its own s6-supervise instance and + # needs the same skeleton. Without this, unregister teardown + # would EACCES on the logger's root-owned supervise/ dir even + # when the parent slot's supervise/ is hermes-owned. + log_dir = svc_dir / "log" + if log_dir.is_dir(): + _mkdir_owned(log_dir / "event", 0o3730) + log_supervise = log_dir / "supervise" + _mkdir_owned(log_supervise, 0o755) + _mkdir_owned(log_supervise / "event", 0o3730) + log_control = log_supervise / "control" + if not log_control.exists(): + os.mkfifo(log_control, 0o660) + log_control.chmod(0o660) + try: + os.chown(log_control, _HERMES_UID, _HERMES_GID) + except PermissionError: + pass + + class S6Error(RuntimeError): """Base error for S6ServiceManager lifecycle failures. @@ -636,6 +775,15 @@ class S6ServiceManager: log_run.write_text(self._render_log_run(profile)) log_run.chmod(0o755) + # Pre-create the supervise/ skeleton with hermes ownership + # BEFORE we publish the slot. s6-supervise will EEXIST our + # dirs/FIFOs and inherit the ownership, so the runtime + # s6-svc / s6-svstat / s6-svwait calls (all dispatched as + # the hermes user) won't hit EACCES on root-owned 0700 + # dirs. See ``_seed_supervise_skeleton`` for the full + # rationale. + _seed_supervise_skeleton(tmp_dir) + tmp_dir.rename(svc_dir) except Exception: shutil.rmtree(tmp_dir, ignore_errors=True) @@ -661,9 +809,18 @@ class S6ServiceManager: wait-for-down before removal so the running gateway process gets a chance to shut down cleanly before its service dir disappears. + + Teardown ordering matters: ``s6-svscanctl -an`` is fired + **before** ``rmtree`` so s6-svscan reaps the supervise child + process (releasing its handle on ``supervise/lock`` and the + regular files inside the supervise dir), giving us a clean + directory to remove. Without the reap-first ordering, the + rmtree races s6-supervise on a set of root-owned files inside + the supervise dir and the dir is left half-removed. """ import shutil import subprocess + import time svc_dir = self._service_dir(profile) if not svc_dir.exists(): @@ -682,16 +839,32 @@ class S6ServiceManager: check=False, ) - # Remove the directory. - shutil.rmtree(svc_dir, ignore_errors=True) - - # Rescan so s6-svscan drops its supervise process for the dir. - # -n = also reap orphan supervise processes. + # Reap the supervise child FIRST: -n tells s6-svscan to drop + # any supervise processes whose service dir is gone (which + # includes any service dir we're about to remove). This + # releases the file handles s6-supervise holds against the + # supervise/lock + supervise/status + supervise/death_tally + # files inside the slot, so the upcoming rmtree doesn't race. subprocess.run( [f"{_S6_BIN_DIR}/s6-svscanctl", "-an", str(self.scandir)], capture_output=True, text=True, timeout=5, check=False, ) + # Give s6-svscan a moment to reap. There's no synchronous + # "scan completed" handshake — the -a/-n trigger just sets a + # flag s6-svscan reads on its next loop iteration. 200ms is + # comfortably above the loop's resolution but well under any + # user-perceived latency. + time.sleep(0.2) + + # Now the supervise dir's files are no longer held open by a + # live s6-supervise, so rmtree can remove them. Files inside + # supervise/ are root-owned (death_tally, lock, status, written + # by s6-supervise itself) — but the parent supervise/ directory + # is hermes-owned (see ``_seed_supervise_skeleton``), and on + # POSIX you only need write+execute on the parent to remove + # contained files regardless of file ownership. + shutil.rmtree(svc_dir, ignore_errors=True) def list_profile_gateways(self) -> list[str]: """Return the profile names of all currently-registered gateway services. diff --git a/tests/hermes_cli/test_service_manager.py b/tests/hermes_cli/test_service_manager.py index b05c02c01a8..cd5761bb049 100644 --- a/tests/hermes_cli/test_service_manager.py +++ b/tests/hermes_cli/test_service_manager.py @@ -412,6 +412,115 @@ def test_s6_manager_kind_and_supports_registration() -> None: assert mgr.supports_runtime_registration() is True +# --------------------------------------------------------------------------- +# _seed_supervise_skeleton — unit tests +# --------------------------------------------------------------------------- +# +# The skeleton helper pre-creates the dirs and FIFOs that s6-supervise +# would otherwise create as root mode 0700, locking out the +# unprivileged hermes user from every lifecycle op. These tests run +# against tmp_path and assert the produced layout — the live-container +# verification (against real s6-svc / s6-svstat) lives in +# tests/docker/test_s6_profile_gateway_integration.py. + + +def test_seed_supervise_skeleton_creates_expected_layout(tmp_path) -> None: + """Verifies the dirs + FIFO + modes the helper lays down.""" + import stat + + from hermes_cli.service_manager import _seed_supervise_skeleton + + svc_dir = tmp_path / "gateway-foo" + svc_dir.mkdir() + + _seed_supervise_skeleton(svc_dir) + + # Top-level event/ — s6-svlisten1 event subscription dir. + event = svc_dir / "event" + assert event.is_dir(), "missing top-level event/" + assert stat.S_IMODE(event.stat().st_mode) == 0o3730, ( + f"event/ mode = {oct(event.stat().st_mode)}, want 03730" + ) + + # supervise/ dir. + supervise = svc_dir / "supervise" + assert supervise.is_dir(), "missing supervise/" + assert stat.S_IMODE(supervise.stat().st_mode) == 0o755 + + # supervise/event/. + supervise_event = supervise / "event" + assert supervise_event.is_dir(), "missing supervise/event/" + assert stat.S_IMODE(supervise_event.stat().st_mode) == 0o3730 + + # supervise/control FIFO. + control = supervise / "control" + assert control.exists(), "missing supervise/control FIFO" + assert stat.S_ISFIFO(control.stat().st_mode), ( + "supervise/control must be a FIFO" + ) + assert stat.S_IMODE(control.stat().st_mode) == 0o660 + + +def test_seed_supervise_skeleton_handles_log_subservice(tmp_path) -> None: + """When a log/ subdir exists, its supervise tree also gets seeded. + + Without this, ``unregister_profile_gateway``'s rmtree would EACCES + on the logger's root-owned supervise dir even after the parent + slot's supervise/ was hermes-owned. + """ + import stat + + from hermes_cli.service_manager import _seed_supervise_skeleton + + svc_dir = tmp_path / "gateway-foo" + svc_dir.mkdir() + (svc_dir / "log").mkdir() # logger subdir present + + _seed_supervise_skeleton(svc_dir) + + # Logger's own supervise tree is seeded the same way. + log_event = svc_dir / "log" / "event" + log_supervise = svc_dir / "log" / "supervise" + log_supervise_event = log_supervise / "event" + log_control = log_supervise / "control" + + assert log_event.is_dir() + assert stat.S_IMODE(log_event.stat().st_mode) == 0o3730 + assert log_supervise.is_dir() + assert log_supervise_event.is_dir() + assert log_control.exists() and stat.S_ISFIFO(log_control.stat().st_mode) + + +def test_seed_supervise_skeleton_skips_when_no_log_subservice(tmp_path) -> None: + """If log/ isn't present, no logger skeleton is created.""" + from hermes_cli.service_manager import _seed_supervise_skeleton + + svc_dir = tmp_path / "gateway-foo" + svc_dir.mkdir() + + _seed_supervise_skeleton(svc_dir) + + assert not (svc_dir / "log").exists(), ( + "helper must not synthesize a log/ subdir on its own" + ) + + +def test_seed_supervise_skeleton_is_idempotent(tmp_path) -> None: + """Calling the helper twice on the same dir is a no-op the second time. + + Important because s6-supervise may have already opened the FIFO + when a re-register / reconcile happens; double-creation would + error out. The helper short-circuits on existence. + """ + from hermes_cli.service_manager import _seed_supervise_skeleton + + svc_dir = tmp_path / "gateway-foo" + svc_dir.mkdir() + + _seed_supervise_skeleton(svc_dir) + _seed_supervise_skeleton(svc_dir) # must not raise + + def test_s6_register_creates_service_dir_and_triggers_scan( s6_scandir, fake_subprocess_run, ) -> None: