From a36221ed91745dcb3c25254fafc7df5720e49ad5 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 21 May 2026 17:05:32 +1000 Subject: [PATCH] docs(s6): document container supervision; doctor + skill + user-guide updates Phase 5 of the s6-overlay supervision plan. Documentation + small diagnostic cleanups; no behavior changes. website/docs/user-guide/docker.md: - Replace the old 'entrypoint script does the bootstrap' section with the s6-overlay boot flow (cont-init.d/01-hermes-setup, cont-init.d/02-reconcile-profiles, static main-hermes + dashboard services, ENTRYPOINT-as-main-program pattern). - Add a 'Per-profile gateway supervision' subsection covering the new lifecycle commands, restart semantics, log persistence, and 'Manager: s6 (container supervisor)' status reporting. - Add 'Breaking change vs. pre-s6 images' callout naming the /init ENTRYPOINT and pointing affected wrappers at the pin workaround. website/docs/user-guide/profiles.md: - Add a note under 'Persistent services' pointing container users at the docker.md section explaining s6 supervision inside the image. Host-side systemd/launchd documentation is unchanged. skills/software-development/hermes-s6-container-supervision/SKILL.md: - New maintainer skill covering the supervision-tree map, file layout, the Architecture B rationale (cont-init.d args + halt exit-code propagation), quick recipes, and the 8 pitfalls we hit while implementing the plan (PATH-without-/command, root-owned profile dirs, SOUL.md as marker, the '143' anti-pattern, etc.). hermes_cli/doctor.py: - _check_gateway_service_linger skips on s6 (the linger concept doesn't apply inside the container). - New _check_s6_supervision section reports main-hermes/dashboard state and per-profile-gateway count (registered vs supervised up), only inside the s6 container. Host doctor output unchanged. - External Tools / Docker check no longer emits a 'docker not found' warning inside the container; prints an explanatory info line instead. Still respects an explicit TERMINAL_ENV=docker (in case the user mounted /var/run/docker.sock). hermes_cli/gateway.py: - Document _container_systemd_operational more precisely: it's NOT for our Hermes Docker image (s6-overlay handles that via detect_service_manager() == 's6'). It still covers systemd-nspawn / k8s-with-systemd-init cases, so leaving it in place is correct; the docstring just makes that explicit. Test harness (verification, no test changes in this commit): 19 passed, 0 xfailed. 66 service-manager / container-boot / profiles-s6-hooks / gateway-s6-dispatch unit tests still green. 61 doctor tests still green. Hadolint + shellcheck clean. Refs: docs/plans/2026-05-07-s6-overlay-dynamic-subagent-gateways.md --- hermes_cli/doctor.py | 86 ++++++++- hermes_cli/gateway.py | 12 +- .../hermes-s6-container-supervision/SKILL.md | 176 ++++++++++++++++++ website/docs/user-guide/docker.md | 49 +++-- website/docs/user-guide/profiles.md | 4 + 5 files changed, 314 insertions(+), 13 deletions(-) create mode 100644 skills/software-development/hermes-s6-container-supervision/SKILL.md diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index df75ac68664..9cac0678cef 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -207,14 +207,69 @@ def _fail_and_issue(text: str, detail: str, fix: str, issues: list[str]) -> None issues.append(fix) +def _check_s6_supervision(issues: list[str]) -> None: + """Inside a container under our s6 /init, surface what s6 sees. + + Runs as a counterpart to :func:`_check_gateway_service_linger` for + the systemd-on-host case. No-op everywhere except in the s6 + container so host runs aren't cluttered with irrelevant output. + + Reports: + - Whether the main-hermes and dashboard static services are up + - How many per-profile gateway slots are registered (via + ``S6ServiceManager.list_profile_gateways()``) and how many are + currently supervised as ``up`` + """ + try: + from hermes_cli.service_manager import ( + S6ServiceManager, + detect_service_manager, + ) + except Exception: + return + + if detect_service_manager() != "s6": + return + + _section("s6 Supervision") + + mgr = S6ServiceManager() + + # Static services. They live under /run/service/ via s6-rc symlinks, + # so the same s6-svstat probe works. + for static in ("main-hermes", "dashboard"): + if mgr.is_running(static): + check_ok(f"{static}: up") + else: + check_info(f"{static}: down (expected if not enabled via env)") + + profiles = mgr.list_profile_gateways() + if not profiles: + check_info("No per-profile gateways registered yet — create one with `hermes profile create `") + return + + up_count = sum(1 for p in profiles if mgr.is_running(f"gateway-{p}")) + check_ok( + f"Per-profile gateways: {up_count}/{len(profiles)} supervised up" + + (f" ({', '.join(sorted(profiles))})" if len(profiles) <= 8 else "") + ) + + def _check_gateway_service_linger(issues: list[str]) -> None: - """Warn when a systemd user gateway service will stop after logout.""" + """Warn when a systemd user gateway service will stop after logout. + + Skipped inside a container running under s6 — the linger concept + (user-systemd surviving SSH logout) doesn't apply there, and the + s6 supervision state is surfaced separately by + ``_check_s6_supervision``. + """ try: from hermes_cli.gateway import ( get_systemd_linger_status, get_systemd_unit_path, is_linux, ) + from hermes_cli.service_manager import detect_service_manager except Exception as e: check_warn("Gateway service linger", f"(could not import gateway helpers: {e})") return @@ -222,6 +277,12 @@ def _check_gateway_service_linger(issues: list[str]) -> None: if not is_linux(): return + # Inside a container under our s6 /init, _check_s6_supervision + # reports the live supervision state; the linger warning would be + # confusing here (no systemd, no logout, no "lingering" concept). + if detect_service_manager() == "s6": + return + unit_path = get_systemd_unit_path() if not unit_path.exists(): return @@ -984,6 +1045,7 @@ def run_doctor(args): pass _check_gateway_service_linger(issues) + _check_s6_supervision(issues) if sys.platform != "win32": _section("Command Installation") @@ -1076,6 +1138,26 @@ def run_doctor(args): # Docker (optional) terminal_env = os.getenv("TERMINAL_ENV", "local") + try: + from hermes_constants import is_container as _is_container + running_in_container = _is_container() + except Exception: + running_in_container = False + + if running_in_container: + # Inside our container the Docker terminal backend is not + # configured by default (Docker-in-Docker isn't set up); the + # local backend is the intended one. Skip the noisy "docker + # not found" warning. If the user has explicitly chosen + # TERMINAL_ENV=docker inside the container they likely mounted + # /var/run/docker.sock, so fall through to the normal check. + if terminal_env != "docker": + check_info( + "Running inside a container — using local terminal backend " + "(docker-in-docker is not configured by default)" + ) + # Skip to next section; Docker isn't relevant here. + terminal_env = "local" if terminal_env == "docker": if _safe_which("docker"): # Check if docker daemon is running @@ -1098,6 +1180,8 @@ def run_doctor(args): check_ok("docker", "(optional)") elif _is_termux(): check_info("Docker backend is not available inside Termux (expected on Android)") + elif running_in_container: + pass # already explained above else: check_warn("docker not found", "(optional)") diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index d9f397437fa..e68fac0a4f4 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -1214,7 +1214,17 @@ def _systemd_operational(system: bool = False) -> bool: def _container_systemd_operational() -> bool: - """Return True when a container exposes working user or system systemd.""" + """Return True when a container exposes working user or system systemd. + + This is NOT our Hermes Docker image — that one runs s6-overlay as + PID 1 (since Phase 2 of the s6-overlay supervision plan) and is + detected via ``service_manager.detect_service_manager() == "s6"``. + This function handles the "container managed by something else" + case: systemd-nspawn, certain k8s pods, containers built FROM + systemd-bearing distros where the user has wired systemd as their + init. In those environments systemctl behaves identically to the + host case, so we fall through to the normal systemd code paths. + """ if _systemd_operational(system=False): return True if _systemd_operational(system=True): diff --git a/skills/software-development/hermes-s6-container-supervision/SKILL.md b/skills/software-development/hermes-s6-container-supervision/SKILL.md new file mode 100644 index 00000000000..934b26bc181 --- /dev/null +++ b/skills/software-development/hermes-s6-container-supervision/SKILL.md @@ -0,0 +1,176 @@ +--- +name: hermes-s6-container-supervision +description: Modify, debug, or extend the s6-overlay supervision tree inside the Hermes Agent Docker image — adding new services, debugging profile gateways, understanding the Architecture B main-program pattern. +version: 1.0.0 +author: Hermes Agent +license: MIT +metadata: + hermes: + tags: [docker, s6, supervision, gateway, profiles] + related_skills: [hermes-agent, hermes-agent-dev] +--- + +# Hermes s6-overlay Container Supervision + +## When to use this skill + +Load this skill when you're working on: +- Adding or removing a static service in the Hermes Docker image (something that should be supervised at every container start, like the dashboard) +- Diagnosing why a per-profile gateway isn't starting, restarting, or surviving `docker restart` +- Understanding why the container's CMD is `/opt/hermes/docker/main-wrapper.sh` and how leading-dash args reach the user's program +- Modifying `cont-init.d` boot scripts (UID remap, volume seeding, profile reconciliation) +- Changing the rendered run-script for per-profile gateways (Phase 4) + +If you're just running the Hermes Agent and want to use Docker, see `website/docs/user-guide/docker.md` instead. + +## Architecture at a glance + +``` +/init ← PID 1 (s6-overlay v3.2.3.0) +├── cont-init.d ← oneshot setup, runs as root +│ ├── 01-hermes-setup ← docker/stage2-hook.sh +│ │ ├── UID/GID remap +│ │ ├── chown /opt/data +│ │ ├── chown /opt/data/profiles (every boot) +│ │ ├── seed .env / config.yaml / SOUL.md +│ │ └── skills_sync.py +│ └── 02-reconcile-profiles ← hermes_cli.container_boot +│ ├── chown /run/service (hermes-writable for runtime register) +│ └── walk $HERMES_HOME/profiles//gateway_state.json +│ → recreate /run/service/gateway-/ +│ → auto-start only those with prior_state == "running" +│ +├── s6-rc.d (static services, in /etc/s6-overlay/s6-rc.d/) +│ ├── main-hermes/run ← exec sleep infinity (no-op slot) +│ └── dashboard/run ← if HERMES_DASHBOARD=1, runs `hermes dashboard` +│ +├── /run/service (s6-svscan watches; tmpfs) +│ ├── gateway-coder/ ← runtime-registered per-profile +│ │ ├── type ("longrun") +│ │ ├── run ("#!/command/with-contenv sh ... exec s6-setuidgid hermes hermes -p coder gateway run") +│ │ ├── down (marker — present means "registered but don't auto-start") +│ │ └── log/run (s6-log → $HERMES_HOME/logs/gateways/coder/current) +│ └── ... +│ +└── CMD ("main program") ← /opt/hermes/docker/main-wrapper.sh + └── routes user args: bare exec | hermes subcommand | hermes (no args) + — exec'd by /init with stdin/stdout/stderr inherited (TTY for --tui) +``` + +## Key files + +| Path | Role | +|---|---| +| `Dockerfile` | s6-overlay install + cont-init.d wiring + `ENTRYPOINT ["/init", "/opt/hermes/docker/main-wrapper.sh"]` | +| `docker/stage2-hook.sh` | The "old entrypoint logic" — UID remap, chown, seed, skills sync. Runs as cont-init.d/01-hermes-setup. | +| `docker/cont-init.d/02-reconcile-profiles` | Calls `hermes_cli.container_boot` on every boot to restore profile gateway slots from the persistent volume. | +| `docker/main-wrapper.sh` | The container's CMD. Routes user args, drops to hermes via `s6-setuidgid`, exec's the chosen program. | +| `docker/s6-rc.d/main-hermes/run` | No-op `sleep infinity` — slot exists so the s6-rc user bundle is valid; main hermes runs as the CMD, not as a supervised service. | +| `docker/s6-rc.d/dashboard/run` | Conditional service — `exec sleep infinity` unless `HERMES_DASHBOARD` is truthy. | +| `docker/entrypoint.sh` | Back-compat shim that `exec`s the stage2 hook. External scripts that hard-coded the old entrypoint path still work. | +| `hermes_cli/service_manager.py` | `S6ServiceManager`: `register_profile_gateway`, `unregister_profile_gateway`, `start/stop/restart/is_running`, `list_profile_gateways`. | +| `hermes_cli/container_boot.py` | `reconcile_profile_gateways()` — walks persistent profiles, regenerates s6 slots, emits `container-boot.log`. | +| `hermes_cli/gateway.py::_dispatch_via_service_manager_if_s6` | Intercepts `hermes gateway start/stop/restart` and routes to s6 when running in a container. | + +## Why Architecture B (CMD as main program, not s6-supervised) + +The original plan (v1–v3) called for main hermes to run as a supervised s6-rc service. Two real s6-overlay v3 mechanics blocked that: + +1. **cont-init.d scripts receive no CMD args** — so the stage2 hook can't parse `docker run chat -q "hi"` to set `HERMES_ARGS` for a service `run` script to consume. +2. **`/run/s6/basedir/bin/halt` does NOT propagate the exit code** written to `/run/s6-linux-init-container-results/exitcode`. Containers always exit 143 (SIGTERM) regardless. Confirmed by skarnet (s6 author) in [issue #477](https://github.com/just-containers/s6-overlay/issues/477): _"if you want a container shutdown, you need to either have your CMD exit, or, if you have no CMD, write the container exit code you want then call halt"_. + +So we use the s6-overlay-native CMD pattern: `ENTRYPOINT ["/init", "/opt/hermes/docker/main-wrapper.sh"]`. /init prepends the wrapper to user args automatically — so `docker run --version` becomes `/init main-wrapper.sh --version`, and `--version` doesn't get intercepted by /init's POSIX shell. The wrapper drops to hermes via `s6-setuidgid`, then exec's the chosen program. The program's exit code becomes the container exit code, exactly matching the pre-s6 tini contract. + +Trade-off: main hermes is unsupervised under s6. That exactly matches its behavior under tini (the pre-s6 image). Dashboard supervision is the only **new** guarantee — and per-profile gateways under `/run/service/` get full supervision. + +## Quick recipes + +### Verify s6 is PID 1 in a running container + +```sh +docker exec sh -c 'cat /proc/1/comm; readlink /proc/1/exe' +# Expect: s6-svscan or init / /package/admin/s6/.../s6-svscan +``` + +### Inspect a profile gateway service + +```sh +# /command/ isn't on docker-exec PATH — use absolute path +docker exec /command/s6-svstat /run/service/gateway- +# "up (pid …) … seconds" → running +# "down (exitcode N) … seconds, normally up, want up, …" → s6 wants it up but the process keeps exiting (crash loop) +# "down … normally up, ready …" → user stopped it +``` + +### Bring a service up/down manually + +```sh +docker exec /command/s6-svc -u /run/service/gateway- # up +docker exec /command/s6-svc -d /run/service/gateway- # down +docker exec /command/s6-svc -t /run/service/gateway- # SIGTERM (restart) +``` + +### Watch the cont-init reconciler log + +```sh +docker exec tail -n 50 /opt/data/logs/container-boot.log +# 2026-05-21T06:18:05+0000 profile=coder prior_state=running action=started +# 2026-05-21T06:18:05+0000 profile=writer prior_state=stopped action=registered +``` + +### Add a new static service + +1. Create `docker/s6-rc.d//type` with `longrun\n` and `docker/s6-rc.d//run` (use `#!/command/with-contenv sh` + `# shellcheck shell=sh`). +2. Drop to hermes via `s6-setuidgid hermes` at the top of run (unless you specifically need root). +3. Create empty `docker/s6-rc.d//dependencies.d/base` so it waits for the base bundle. +4. Create empty `docker/s6-rc.d/user/contents.d/` so it joins the user bundle. +5. The `COPY docker/s6-rc.d/` in the Dockerfile picks it up automatically — no other changes. + +### Change the per-profile gateway run command + +Edit `S6ServiceManager._render_run_script` in `hermes_cli/service_manager.py`. The function is also called by `hermes_cli/container_boot.py::_register_service` during boot reconciliation, so it's the single source of truth. Update the corresponding assertion in `tests/hermes_cli/test_service_manager.py::test_s6_register_creates_service_dir_and_triggers_scan`. + +### Run the docker test harness + +```sh +docker build -t hermes-agent-harness:latest . +HERMES_TEST_IMAGE=hermes-agent-harness:latest scripts/run_tests.sh tests/docker/ -v +# Expect 19 passed, 0 xfailed against the s6 image +``` + +The harness lives in `tests/docker/` and skips when Docker isn't available. The per-test timeout is bumped to 180s (see `tests/docker/conftest.py`). + +## Common pitfalls + +### "command not found" via `docker exec` + +`/command/` (where s6-overlay puts its binaries) is on PATH only for processes spawned by the supervision tree — services, cont-init.d, main-wrapper.sh. `docker exec s6-svstat …` will fail with "command not found"; always use the absolute path `/command/s6-svstat`. The `hermes` binary works because the Dockerfile adds `/opt/hermes/.venv/bin` to the runtime `ENV PATH`. + +### Profile directory ownership + +The cont-init reconciler runs as hermes (`s6-setuidgid hermes` in `02-reconcile-profiles`). If a profile dir ends up root-owned (e.g. because `docker exec hermes profile create …` ran as root by default), the reconciler can't read SOUL.md and fails with `PermissionError`. Mitigation: `stage2-hook.sh` chowns `$HERMES_HOME/profiles` to hermes on **every** boot, idempotently. Don't remove that block. + +### Files written by `docker exec` are root-owned + +`docker exec` defaults to root. Either pass `--user hermes` or rely on the stage2 chown sweep next reboot. Don't write files under `$HERMES_HOME/profiles//` as root manually — the next reconcile pass will sweep them but in-flight operations may hit perm errors. + +### Service slot exists but s6-svstat says "s6-supervise not running" + +The service directory is on tmpfs and was wiped on container restart. Either the cont-init reconciler hasn't run yet (give it a moment after `docker restart`) or it failed. Check `docker logs | grep '02-reconcile'`. + +### Gateway starts then immediately exits (`down (exitcode 1)` in svstat) + +Most likely the profile has no model or auth configured. The service slot is correct — the gateway itself is unconfigured. Run `hermes -p setup` first. The s6 supervisor will keep restarting it; that's the desired behavior (when you fix the config, the next attempt succeeds and stays up). + +### Reconciler skipped a profile + +The reconciler keys on the **presence of `SOUL.md`** as the "real profile" marker. `hermes profile create` always seeds it. If a profile dir is missing SOUL.md (stray directory, partial restore, backup-in-progress), the reconciler skips it intentionally. Add a `SOUL.md` (even empty) to opt back in. + +### "Help, the container exits 143!" + +Check whether something is invoking `s6-svscanctl -t` or `/run/s6/basedir/bin/halt` — both cause /init to begin stage 3 shutdown but return 143 (SIGTERM) rather than the desired exit code. This was the Phase 2 architecture pivot from A to B. For container shutdown with a real exit code, you must let the CMD (main-wrapper.sh) exit normally; do **not** try to control exit from a finish script. + +## Related skills + +- `hermes-agent-dev`: General hermes-agent codebase navigation +- `hermes-tool-quirks`: Specific Hermes-tool workarounds (sed/grep/etc.) — load when debugging the s6 stack's interaction with hermes built-in tools. diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md index 2cd931751da..615bafc9a5a 100644 --- a/website/docs/user-guide/docker.md +++ b/website/docs/user-guide/docker.md @@ -260,24 +260,51 @@ The official image is based on `debian:13.4` and includes: - Python 3 with all Hermes dependencies (`uv pip install -e ".[all]"`) - Node.js + npm (for browser automation and WhatsApp bridge) - Playwright with Chromium (`npx playwright install --with-deps chromium --only-shell`) -- ripgrep, ffmpeg, git, and tini as system utilities +- ripgrep, ffmpeg, git, and `xz-utils` as system utilities - **`docker-cli`** — so agents running inside the container can drive the host's Docker daemon (bind-mount `/var/run/docker.sock` to opt in) for `docker build`, `docker run`, container inspection, etc. - **`openssh-client`** — enables the [SSH terminal backend](/docs/user-guide/configuration#ssh-backend) from inside the container. The SSH backend shells out to the system `ssh` binary; without this, it failed silently in containerized installs. - The WhatsApp bridge (`scripts/whatsapp-bridge/`) +- **[`s6-overlay`](https://github.com/just-containers/s6-overlay) v3** as PID 1 (replaces the older `tini`) — supervises the dashboard and per-profile gateways with auto-restart on crash, reaps zombie subprocesses, and forwards signals. -The entrypoint script (`docker/entrypoint.sh`) bootstraps the data volume on first run: -- Creates the directory structure (`sessions/`, `memories/`, `skills/`, etc.) -- Copies `.env.example` → `.env` if no `.env` exists -- Copies default `config.yaml` if missing -- Copies default `SOUL.md` if missing -- Syncs bundled skills using a manifest-based approach (preserves user edits) -- Optionally launches `hermes dashboard` as a background side-process when `HERMES_DASHBOARD=1` (see [Running the dashboard](#running-the-dashboard)) -- Then runs `hermes` with whatever arguments you pass +The container's `ENTRYPOINT` is s6-overlay's `/init`. On boot it: +1. Runs `/etc/cont-init.d/01-hermes-setup` (= `docker/stage2-hook.sh`) as root: optional UID/GID remap, fixes volume ownership, seeds `.env` / `config.yaml` / `SOUL.md` on first boot, syncs bundled skills. +2. Runs `/etc/cont-init.d/02-reconcile-profiles` (= `hermes_cli.container_boot`): walks `$HERMES_HOME/profiles//`, recreates the per-profile gateway s6 service slot under `/run/service/gateway-/`, and auto-starts only those whose last recorded state was `running` (see [Per-profile gateway supervision](#per-profile-gateway-supervision)). +3. Starts the static `main-hermes` and `dashboard` s6-rc services. +4. Exec's the container's CMD as the main program (`/opt/hermes/docker/main-wrapper.sh`), which routes the arguments the user passed to `docker run`: + - no args → `hermes` (the default) + - first arg is an executable on PATH (e.g. `sleep`, `bash`) → exec it directly + - anything else → `hermes ` (subcommand passthrough) + The container exits when this main program exits, with its exit code. -:::warning -Do not override the image entrypoint unless you keep `/opt/hermes/docker/entrypoint.sh` in the command chain. The entrypoint drops root privileges to the `hermes` user before gateway state files are created. Starting `hermes gateway run` as root inside the official image is refused by default because it can leave root-owned files in `/opt/data` and break later dashboard or gateway starts. Set `HERMES_ALLOW_ROOT_GATEWAY=1` only when you intentionally accept that risk. +:::warning Breaking change vs. pre-s6 images +The container ENTRYPOINT is now `/init` (s6-overlay), not `/usr/bin/tini`. All five documented `docker run` invocation patterns (no args, `chat -q "…"`, `sleep infinity`, `bash`, `--tui`) behave identically to the tini-based image. If you have a downstream wrapper that depended on tini-specific signal behavior or hard-coded `/usr/bin/tini --` invocation, pin to the previous image tag. ::: +:::warning Privilege model +Do not override the image entrypoint unless you keep `/init` (or, equivalently, the legacy `docker/entrypoint.sh` shim that forwards to the stage2 hook) in the command chain. s6-overlay's `/init` runs as root so it can chown the volume on first boot, then drops to the `hermes` user via `s6-setuidgid` for every supervised service AND for the main program. Starting `hermes gateway run` as root inside the official image is refused by default because it can leave root-owned files in `/opt/data` and break later dashboard or gateway starts. Set `HERMES_ALLOW_ROOT_GATEWAY=1` only when you intentionally accept that risk. +::: + +### Per-profile gateway supervision + +Inside the container, each profile created with `hermes profile create ` automatically gets an s6-supervised gateway service registered at `/run/service/gateway-/`. The lifecycle commands you'd run on the host work the same way: + +```sh +hermes profile create coder # registers gateway-coder s6 slot +hermes -p coder gateway start # s6-svc -u → supervised gateway +hermes -p coder gateway stop # s6-svc -d → service down +hermes -p coder gateway restart # s6-svc -t → SIGTERM the supervisor +hermes profile delete coder # tears down the s6 slot +``` + +**Supervision benefits over the pre-s6 image:** + +- Gateway crashes are auto-restarted by `s6-supervise` after a ~1s backoff. +- Dashboard crashes are auto-restarted (set `HERMES_DASHBOARD=1` to start it). +- `docker restart` preserves running gateways: the cont-init reconciler reads `$HERMES_HOME/profiles//gateway_state.json` and brings the slot back up if the last recorded state was `running`. Stopped gateways stay stopped. +- Per-profile gateway logs persist under `$HERMES_HOME/logs/gateways//current` (rotated by `s6-log`), and the reconciler's actions are appended to `$HERMES_HOME/logs/container-boot.log` per boot. + +`hermes status` inside the container reports `Manager: s6 (container supervisor)`. Use `/command/s6-svstat /run/service/gateway-` for the raw supervisor view (note `/command/` is on PATH for supervision-tree processes only; pass the absolute path when calling from `docker exec`). + ## Upgrading Pull the latest image and recreate the container. Your data directory is untouched. diff --git a/website/docs/user-guide/profiles.md b/website/docs/user-guide/profiles.md index 73ea0a8cadd..dfbd1d95e5f 100644 --- a/website/docs/user-guide/profiles.md +++ b/website/docs/user-guide/profiles.md @@ -172,6 +172,10 @@ assistant gateway install # creates hermes-gateway-assistant service Each profile gets its own service name. They run independently. +:::note Inside the official Docker image +Per-profile gateways are supervised by [s6-overlay](https://github.com/just-containers/s6-overlay) (PID 1 in the container), so `hermes profile create ` automatically registers an s6 service slot at `/run/service/gateway-/`. `hermes -p gateway start/stop/restart` dispatches to `s6-svc` instead of spawning a bare process — crashes are auto-restarted and `docker restart` preserves the previously-running set of gateways. See [Per-profile gateway supervision](/docs/user-guide/docker#per-profile-gateway-supervision) for details. +::: + ## Configuring profiles Each profile has its own: