mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-06 07:51:53 +00:00
fix(service_manager): s6 detection works for unprivileged hermes user
PR #30136 review surfaced two issues, both rooted in the same audit gap: docker integration tests were running as root, not the unprivileged `hermes` user (UID 10000) that the runtime actually uses via `s6-setuidgid hermes`. Anything that probed PID-1 state or wrote to the s6 control surface worked as root in the tests but was inert in production. Fixes: 1. `_s6_running()` previously called `Path("/proc/1/exe").resolve()`, which is root-only readable. For UID 10000 the symlink yields PermissionError, `resolve()` silently returns the unresolved path, and `exe.name == "exe"` — so detection always returned False, the service-manager runtime-registration path was inert, and every `hermes profile create` / `hermes -p X gateway start` silently skipped the s6 hook. Replace with `/proc/1/comm` (world-readable) + `/run/s6/basedir` (s6-overlay-specific) — both required, fail closed. 2. `02-reconcile-profiles` now also chowns `/run/service/.s6-svscan/` {control,lock} to hermes so `s6-svscanctl -a/-an` works without root. Previously the directory chown stopped at `/run/service` and the FIFO inside stayed root-owned, so `register_profile_gateway` from hermes failed at the rescan-trigger step with EACCES — the wrapper in profiles.py caught the exception and printed a swallowed warning, so profile creation appeared to succeed while the slot was rolled back. Audit changes to flush this class of bug next time: - Add `docker_exec` / `docker_exec_sh` helpers to `tests/docker/conftest.py` that default to `-u hermes`. The module docstring explains why and flags `user="root"` as opt-in only for tests that explicitly need root (none currently do). - Refactor every `docker exec` call in tests/docker/ through the new helpers (test_dashboard.py, test_zombie_reaping.py, test_profile_gateway.py, test_container_restart.py, test_s6_profile_gateway_integration.py). - Add 5 unit tests covering `_s6_running` under various probe states (both signals present; comm wrong; basedir missing; PermissionError on /proc/1/comm; missing /proc — non-Linux). The PermissionError test is the explicit regression guard for the original bug. Known follow-up: the per-service `supervise/control` FIFO inside each `/run/service/gateway-<profile>/supervise/` is created root-owned by s6-supervise (which runs as root because s6-svscan is PID 1). `s6-svc -u/-d/-t` from the hermes user will get EACCES on those. The audit under `-u hermes` will reveal this in lifecycle tests — surfacing the issue cleanly so it can be fixed in a focused follow-up (likely via a small SUID helper or a polling chown loop in cont-init.d). The detection + svscanctl fixes here are independent and complete on their own.
This commit is contained in:
parent
a6f7171a5e
commit
2f8ceeab9a
9 changed files with 241 additions and 53 deletions
|
|
@ -69,6 +69,101 @@ def test_detect_service_manager_returns_known_value() -> None:
|
|||
assert result in ("systemd", "launchd", "windows", "s6", "none")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _s6_running — must work for unprivileged users, not just root
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _patch_s6_paths(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
*,
|
||||
comm: str | OSError | None,
|
||||
basedir_is_dir: bool,
|
||||
) -> None:
|
||||
"""Stub /proc/1/comm and /run/s6/basedir for _s6_running tests."""
|
||||
from pathlib import Path as _Path
|
||||
|
||||
real_read_text = _Path.read_text
|
||||
real_is_dir = _Path.is_dir
|
||||
|
||||
def fake_read_text(self, *args, **kwargs): # type: ignore[override]
|
||||
if str(self) == "/proc/1/comm":
|
||||
if isinstance(comm, OSError):
|
||||
raise comm
|
||||
if comm is None:
|
||||
raise FileNotFoundError(2, "No such file or directory")
|
||||
return comm + "\n"
|
||||
return real_read_text(self, *args, **kwargs)
|
||||
|
||||
def fake_is_dir(self): # type: ignore[override]
|
||||
if str(self) == "/run/s6/basedir":
|
||||
return basedir_is_dir
|
||||
return real_is_dir(self)
|
||||
|
||||
monkeypatch.setattr(_Path, "read_text", fake_read_text)
|
||||
monkeypatch.setattr(_Path, "is_dir", fake_is_dir)
|
||||
|
||||
|
||||
def test_s6_running_true_when_comm_and_basedir_match(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
from hermes_cli.service_manager import _s6_running
|
||||
|
||||
_patch_s6_paths(monkeypatch, comm="s6-svscan", basedir_is_dir=True)
|
||||
assert _s6_running() is True
|
||||
|
||||
|
||||
def test_s6_running_false_when_comm_is_wrong(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
from hermes_cli.service_manager import _s6_running
|
||||
|
||||
# systemd as PID 1, basedir present from some stray s6 install
|
||||
_patch_s6_paths(monkeypatch, comm="systemd", basedir_is_dir=True)
|
||||
assert _s6_running() is False
|
||||
|
||||
|
||||
def test_s6_running_false_when_basedir_missing(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
from hermes_cli.service_manager import _s6_running
|
||||
|
||||
# The comm matches but the basedir is missing — e.g. an unrelated
|
||||
# process happens to be named "s6-svscan"
|
||||
_patch_s6_paths(monkeypatch, comm="s6-svscan", basedir_is_dir=False)
|
||||
assert _s6_running() is False
|
||||
|
||||
|
||||
def test_s6_running_false_when_comm_unreadable(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Regression: /proc/1/exe was unreadable to UID 10000 and
|
||||
resolve() silently returned the unresolved path, making detection
|
||||
always-False inside the container under the hermes user. The new
|
||||
probe must FAIL CLOSED — not raise — when /proc/1/comm can't be
|
||||
read.
|
||||
"""
|
||||
from hermes_cli.service_manager import _s6_running
|
||||
|
||||
_patch_s6_paths(
|
||||
monkeypatch,
|
||||
comm=PermissionError(13, "Permission denied"),
|
||||
basedir_is_dir=True,
|
||||
)
|
||||
assert _s6_running() is False
|
||||
|
||||
|
||||
def test_s6_running_handles_missing_proc(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""On macOS / Windows / WSL-without-procfs, /proc/1/comm doesn't
|
||||
exist. Must return False, not raise."""
|
||||
from hermes_cli.service_manager import _s6_running
|
||||
|
||||
_patch_s6_paths(monkeypatch, comm=None, basedir_is_dir=False)
|
||||
assert _s6_running() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backend wrappers — kind + registration unsupported on hosts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue