mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-03 07:21:54 +00:00
Phase 3 of the s6-overlay supervision plan. Implements the runtime-
registration surface from D4 — only the s6 backend supports
register_profile_gateway / unregister_profile_gateway /
list_profile_gateways; host backends continue to raise
NotImplementedError. No caller yet (Phase 4 wires in the profile
create/delete hooks).
Key implementation notes:
- Service directory shape: /run/service/gateway-<profile>/{type,run,log/run}.
Atomic register: write to gateway-<profile>.tmp, fsync via
os.rename. Cleanup on rescan failure.
- Run script uses #!/command/with-contenv sh so HERMES_HOME and any
extra_env arrive at exec time. The hermes -p <profile> gateway
start --foreground --port <port> command is wrapped in
s6-setuidgid hermes for the per-service privilege drop (OQ2-A).
- Log script (OQ8-C): persists via s6-log to
${HERMES_HOME}/logs/gateways/<profile>/. CRITICAL — HERMES_HOME is
a runtime env-var expansion in the rendered script, NOT a Python
f-string substitution. Negative-asserted in
test_s6_register_creates_service_dir_and_triggers_scan so
regressions are caught.
- PATH gotcha: /command/ is only on PATH for processes spawned by
the supervision tree (services, cont-init.d). `docker exec` and
profile-create hooks don't get it. S6ServiceManager calls all
s6-* binaries via absolute path through the new _S6_BIN_DIR
constant so callers don't have to fix up env vars.
- validate_profile_name rejects path-traversal, leading-dash (s6
would parse as a flag), uppercase, whitespace, and names >251
chars (s6-svscan default name_max).
Test coverage:
- 13 new unit tests in tests/hermes_cli/test_service_manager.py
(kind detection, run-script content, env quoting, register
rollback on rescan failure, unregister idempotence, list filter,
lifecycle dispatch, svstat parsing). Total: 36 passing.
- 2 new in-container integration tests in
tests/docker/test_s6_profile_gateway_integration.py validating
end-to-end registration against a real s6 supervision tree.
Docker harness: 14 passed, 2 xfailed (Phase 4 target unchanged).
Refs: docs/plans/2026-05-07-s6-overlay-dynamic-subagent-gateways.md
124 lines
4.7 KiB
Python
124 lines
4.7 KiB
Python
"""Harness: in-container integration tests for S6ServiceManager.
|
|
|
|
The unit tests in tests/hermes_cli/test_service_manager.py exercise the
|
|
class against a tmp-path scandir with a stubbed ``subprocess.run``.
|
|
These tests run the real class inside a real container against the
|
|
real s6-svc / s6-svscanctl binaries, validating end-to-end.
|
|
|
|
Phase 3 only registers the service slot — it doesn't depend on the
|
|
gateway actually starting (the binary will refuse to start without a
|
|
valid profile config). The full register → start → supervised-restart
|
|
→ unregister cycle is covered by Phase 4 once profile create/delete
|
|
hooks land.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import subprocess
|
|
import time
|
|
|
|
|
|
_REGISTER_SCRIPT = """
|
|
import sys
|
|
sys.path.insert(0, "/opt/hermes")
|
|
from hermes_cli.service_manager import S6ServiceManager
|
|
S6ServiceManager().register_profile_gateway("phase3test", port=9301)
|
|
# Don't worry about whether the gateway actually starts — we only care
|
|
# that the supervision slot was created. The gateway run script will
|
|
# likely error out (no profile config exists) but that's expected.
|
|
print("REGISTERED")
|
|
"""
|
|
|
|
_UNREGISTER_SCRIPT = """
|
|
import sys
|
|
sys.path.insert(0, "/opt/hermes")
|
|
from hermes_cli.service_manager import S6ServiceManager
|
|
S6ServiceManager().unregister_profile_gateway("phase3test")
|
|
print("UNREGISTERED")
|
|
"""
|
|
|
|
|
|
def _exec(container: str, *args: str, timeout: int = 30) -> subprocess.CompletedProcess:
|
|
return subprocess.run(
|
|
["docker", "exec", container, *args],
|
|
capture_output=True, text=True, timeout=timeout,
|
|
)
|
|
|
|
|
|
def test_s6_register_creates_service_dir_in_live_container(
|
|
built_image: str, container_name: str,
|
|
) -> None:
|
|
"""S6ServiceManager.register_profile_gateway must create
|
|
``/run/service/gateway-<profile>/`` and trigger s6-svscan rescan
|
|
against the real s6 supervision tree."""
|
|
subprocess.run(
|
|
["docker", "run", "-d", "--name", container_name, built_image,
|
|
"sleep", "120"],
|
|
check=True, capture_output=True, timeout=30,
|
|
)
|
|
# Give the supervision tree a moment to come up.
|
|
time.sleep(3)
|
|
|
|
r = _exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
|
|
assert "REGISTERED" in r.stdout, (
|
|
f"register failed: stderr={r.stderr!r} stdout={r.stdout!r}"
|
|
)
|
|
|
|
# Service directory exists with the expected structure.
|
|
r = _exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
|
|
assert r.returncode == 0, "service directory not created"
|
|
|
|
r = _exec(container_name, "test", "-f", "/run/service/gateway-phase3test/run")
|
|
assert r.returncode == 0, "run script not created"
|
|
|
|
r = _exec(container_name, "test", "-f",
|
|
"/run/service/gateway-phase3test/log/run")
|
|
assert r.returncode == 0, "log/run script not created"
|
|
|
|
# s6-svscan picked it up — s6-svstat works against the dir.
|
|
# `docker exec` doesn't put /command/ on PATH (only the supervision
|
|
# tree does), so call s6-svstat by absolute path.
|
|
r = _exec(container_name, "/command/s6-svstat",
|
|
"/run/service/gateway-phase3test")
|
|
assert r.returncode == 0, f"s6-svstat failed: {r.stderr or r.stdout}"
|
|
|
|
# list_profile_gateways picks it up.
|
|
r = _exec(container_name, "python3", "-c", (
|
|
"from hermes_cli.service_manager import S6ServiceManager;"
|
|
"print(S6ServiceManager().list_profile_gateways())"
|
|
))
|
|
assert "phase3test" in r.stdout, f"list output: {r.stdout!r}"
|
|
|
|
|
|
def test_s6_unregister_removes_service_dir_in_live_container(
|
|
built_image: str, container_name: str,
|
|
) -> None:
|
|
"""unregister_profile_gateway must stop the service, remove the
|
|
directory, and trigger s6-svscan rescan so the supervise process
|
|
is dropped."""
|
|
subprocess.run(
|
|
["docker", "run", "-d", "--name", container_name, built_image,
|
|
"sleep", "120"],
|
|
check=True, capture_output=True, timeout=30,
|
|
)
|
|
time.sleep(3)
|
|
|
|
# First register so we have something to unregister.
|
|
r = _exec(container_name, "python3", "-c", _REGISTER_SCRIPT, timeout=30)
|
|
assert "REGISTERED" in r.stdout
|
|
|
|
# Then unregister.
|
|
r = _exec(container_name, "python3", "-c", _UNREGISTER_SCRIPT, timeout=30)
|
|
assert "UNREGISTERED" in r.stdout, (
|
|
f"unregister failed: stderr={r.stderr!r} stdout={r.stdout!r}"
|
|
)
|
|
|
|
# Directory is gone.
|
|
r = _exec(container_name, "test", "-d", "/run/service/gateway-phase3test")
|
|
assert r.returncode != 0, "service directory still exists after unregister"
|
|
|
|
# list_profile_gateways no longer includes it.
|
|
r = _exec(container_name, "python3", "-c", (
|
|
"from hermes_cli.service_manager import S6ServiceManager;"
|
|
"print(S6ServiceManager().list_profile_gateways())"
|
|
))
|
|
assert "phase3test" not in r.stdout
|