mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
After the supervise-perms fix lands, the s6 lifecycle actually works
for the hermes user — hermes -p <profile> gateway start now genuinely
brings the supervised gateway up rather than silently no-op'ing on
EACCES. That exposes a latent bug in this test's assertion: it
expected 'want up' to appear literally in s6-svstat output, but
s6-svstat elides redundancies — when the slot is currently up AND
s6 wants it up, the output is just 'up (pid N pgid N) X seconds';
the explicit 'want up' token only appears when current ≠ wanted
(e.g. 'down (exitcode 1) … , want up' on a crash-loop).
Add a small helper _svstat_wants_up() that reads the want-state
correctly across both spellings:
* 'up …' → wanted up (unless explicit 'want down')
* 'down …, want up' → wanted up explicitly
* 'down …' → wanted down
Both stop and start assertions now use the helper. Also rewords
the module docstring to acknowledge that the supervised process
may succeed OR crash-loop depending on environment, but the want-
state contract holds either way.
(cherry picked from commit 02c933aedc)
138 lines
5.1 KiB
Python
138 lines
5.1 KiB
Python
"""Harness: per-profile gateway start/stop inside the container.
|
||
|
||
Phase 4 wires `hermes -p <profile> gateway start/stop` through the s6
|
||
ServiceManager dispatch path inside the container — so the lifecycle
|
||
commands now bring up an s6-supervised gateway rather than refusing
|
||
with the pre-Phase-4 informational message.
|
||
|
||
These tests were marked ``xfail(strict=True)`` through Phase 0–3 and
|
||
flip to plain ``test_…`` once Phase 4 lands (now).
|
||
|
||
NB: The harness profile has no model/auth configured. Depending on
|
||
how the gateway run script handles missing config, the supervised
|
||
process may either spin up successfully (and svstat reports ``up``)
|
||
or exit fast and get throttled by s6 (and svstat reports ``down …,
|
||
want up``). Both states are valid "user asked for gateway up" results
|
||
— what we assert is the *want* intent the lifecycle command set, NOT
|
||
the supervised process's health. ``s6-svc -u`` records ``want up`` in
|
||
the supervise/status file regardless of the run-script outcome.
|
||
|
||
Every ``docker exec`` here runs as the unprivileged ``hermes`` user
|
||
(via :func:`docker_exec_sh` in conftest); see the conftest module
|
||
docstring.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import subprocess
|
||
import time
|
||
|
||
from tests.docker.conftest import docker_exec_sh
|
||
|
||
PROFILE = "test-harness-profile"
|
||
|
||
|
||
def _sh(
|
||
container: str, command: str, timeout: int = 30,
|
||
) -> subprocess.CompletedProcess[str]:
|
||
return docker_exec_sh(container, command, timeout=timeout)
|
||
|
||
|
||
def _svstat(container: str) -> str:
|
||
"""Returns the raw s6-svstat output for the test profile's slot.
|
||
/command/s6-svstat is called by absolute path because /command/
|
||
isn't on PATH for docker-exec sessions."""
|
||
r = _sh(container, f"/command/s6-svstat /run/service/gateway-{PROFILE}")
|
||
return r.stdout if r.returncode == 0 else ""
|
||
|
||
|
||
def _svstat_wants_up(container: str) -> bool:
|
||
"""Read the slot's want-state from s6-svstat output.
|
||
|
||
s6-svstat formats the output to elide redundancies — when the
|
||
service is currently up AND s6 wants it up, the literal token
|
||
``want up`` doesn't appear (it's implicit from the leading ``up``).
|
||
When the service is down but s6 wants it back up, ``, want up``
|
||
appears explicitly. So a comprehensive "is the want-intent set to
|
||
up" check has to accept both spellings.
|
||
"""
|
||
state = _svstat(container)
|
||
if not state:
|
||
return False
|
||
head = state.split()[0] if state.split() else ""
|
||
if head == "up":
|
||
# Currently up implies wanted-up unless ``want down`` is set.
|
||
return "want down" not in state
|
||
# Currently down — ``want up`` only shows up when explicitly set.
|
||
return "want up" in state
|
||
|
||
|
||
def test_profile_create_then_gateway_start(
|
||
built_image: str, container_name: str,
|
||
) -> None:
|
||
subprocess.run(
|
||
["docker", "run", "-d", "--name", container_name, built_image,
|
||
"sleep", "120"],
|
||
check=True, capture_output=True, timeout=30,
|
||
)
|
||
time.sleep(3)
|
||
|
||
r = _sh(container_name, f"hermes profile create {PROFILE}")
|
||
assert r.returncode == 0, f"profile create failed: {r.stderr}"
|
||
|
||
# Profile create's s6-register hook should have produced a service slot.
|
||
r = _sh(container_name, f"test -d /run/service/gateway-{PROFILE}")
|
||
assert r.returncode == 0, "s6 service slot not created on profile create"
|
||
|
||
r = _sh(container_name, f"hermes -p {PROFILE} gateway start", timeout=60)
|
||
assert r.returncode == 0, (
|
||
f"gateway start failed: stderr={r.stderr!r} stdout={r.stdout!r}"
|
||
)
|
||
|
||
# After start, s6's intent is "up" — even if the supervised gateway
|
||
# process spin-fails (no model/auth in the test profile), the
|
||
# supervision-state contract holds. See ``_svstat_wants_up`` for
|
||
# why we accept both ``up …`` (currently up) and ``down …, want
|
||
# up`` (down but s6 wants up).
|
||
time.sleep(2)
|
||
assert _svstat_wants_up(container_name), (
|
||
f"slot want-state is not up after gateway start: "
|
||
f"{_svstat(container_name)!r}"
|
||
)
|
||
|
||
r = _sh(container_name, f"hermes -p {PROFILE} gateway stop", timeout=30)
|
||
assert r.returncode == 0
|
||
|
||
time.sleep(2)
|
||
assert not _svstat_wants_up(container_name), (
|
||
f"slot want-state still up after gateway stop: "
|
||
f"{_svstat(container_name)!r}"
|
||
)
|
||
|
||
|
||
def test_profile_delete_stops_gateway(
|
||
built_image: str, container_name: str,
|
||
) -> None:
|
||
"""Deleting a profile should stop its gateway and remove the s6
|
||
service slot."""
|
||
subprocess.run(
|
||
["docker", "run", "-d", "--name", container_name, built_image,
|
||
"sleep", "120"],
|
||
check=True, capture_output=True, timeout=30,
|
||
)
|
||
time.sleep(3)
|
||
|
||
_sh(container_name, f"hermes profile create {PROFILE}")
|
||
_sh(container_name, f"hermes -p {PROFILE} gateway start", timeout=60)
|
||
time.sleep(3)
|
||
|
||
r = _sh(
|
||
container_name,
|
||
f"hermes profile delete {PROFILE} --yes",
|
||
timeout=30,
|
||
)
|
||
assert r.returncode == 0, f"profile delete failed: {r.stderr}"
|
||
|
||
time.sleep(2)
|
||
# Service slot should be gone.
|
||
r = _sh(container_name, f"test -d /run/service/gateway-{PROFILE}")
|
||
assert r.returncode != 0, "s6 service slot still present after profile delete"
|