mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-03 07:21:54 +00:00
fix(gateway): route --all stop/restart through s6 under container
PR #30136 review caught that `hermes gateway stop --all` and `... restart --all` were broken under s6. The Phase 4 dispatcher was gated on `not stop_all` (and the symmetric restart_all), so `--all` fell through to `kill_gateway_processes(all_profiles=True)`. pkill SIGTERMed every gateway, s6-supervise observed the crashes, and restarted every gateway ~1s later — net effect: `--all` *kicked* gateways instead of *stopping* them. Add `_dispatch_all_via_service_manager_if_s6(action)` that iterates `mgr.list_profile_gateways()` and routes stop/restart through each service slot. s6's `want up`/`want down` flips correctly, so a stop persists. Partial failures are surfaced per-profile with a running success count; the host pkill path is only reached when s6 isn't in play. `start --all` isn't a CLI surface — the helper rejects it and returns False (host code path can take over).
This commit is contained in:
parent
8ae959adb6
commit
efd3569739
2 changed files with 206 additions and 2 deletions
|
|
@ -115,3 +115,147 @@ def test_dispatch_defaults_profile_to_default(
|
|||
)
|
||||
assert gw._dispatch_via_service_manager_if_s6("start") is True
|
||||
assert rec.calls == [("start", "gateway-default")]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _dispatch_all_via_service_manager_if_s6 — --all under s6
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _ListingRecorder(_CallRecorder):
|
||||
"""_CallRecorder that also exposes a profile list."""
|
||||
|
||||
def __init__(self, profiles: list[str]) -> None:
|
||||
super().__init__()
|
||||
self._profiles = profiles
|
||||
|
||||
def list_profile_gateways(self) -> list[str]:
|
||||
return list(self._profiles)
|
||||
|
||||
|
||||
def test_dispatch_all_returns_false_on_host(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
from hermes_cli import gateway as gw
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.detect_service_manager", lambda: "systemd",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.get_service_manager",
|
||||
lambda: pytest.fail("manager should not be constructed on host"),
|
||||
)
|
||||
assert gw._dispatch_all_via_service_manager_if_s6("stop") is False
|
||||
|
||||
|
||||
def test_dispatch_all_iterates_every_profile_on_stop(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture,
|
||||
) -> None:
|
||||
from hermes_cli import gateway as gw
|
||||
rec = _ListingRecorder(["coder", "writer", "assistant"])
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.detect_service_manager", lambda: "s6",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.get_service_manager", lambda: rec,
|
||||
)
|
||||
assert gw._dispatch_all_via_service_manager_if_s6("stop") is True
|
||||
assert rec.calls == [
|
||||
("stop", "gateway-coder"),
|
||||
("stop", "gateway-writer"),
|
||||
("stop", "gateway-assistant"),
|
||||
]
|
||||
out = capsys.readouterr().out
|
||||
assert "Stopped 3 profile gateway(s)" in out
|
||||
|
||||
|
||||
def test_dispatch_all_iterates_every_profile_on_restart(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture,
|
||||
) -> None:
|
||||
from hermes_cli import gateway as gw
|
||||
rec = _ListingRecorder(["coder", "writer"])
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.detect_service_manager", lambda: "s6",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.get_service_manager", lambda: rec,
|
||||
)
|
||||
assert gw._dispatch_all_via_service_manager_if_s6("restart") is True
|
||||
assert rec.calls == [
|
||||
("restart", "gateway-coder"),
|
||||
("restart", "gateway-writer"),
|
||||
]
|
||||
out = capsys.readouterr().out
|
||||
assert "Restarted 2 profile gateway(s)" in out
|
||||
|
||||
|
||||
def test_dispatch_all_handles_partial_failure(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture,
|
||||
) -> None:
|
||||
"""A failure on one profile must not skip the others; the helper
|
||||
reports each failure and the success count."""
|
||||
from hermes_cli import gateway as gw
|
||||
|
||||
class _FailOnWriter(_ListingRecorder):
|
||||
def stop(self, name: str) -> None:
|
||||
if name == "gateway-writer":
|
||||
raise RuntimeError("supervise FIFO permission denied")
|
||||
super().stop(name)
|
||||
|
||||
rec = _FailOnWriter(["coder", "writer", "assistant"])
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.detect_service_manager", lambda: "s6",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.get_service_manager", lambda: rec,
|
||||
)
|
||||
assert gw._dispatch_all_via_service_manager_if_s6("stop") is True
|
||||
# The two successful ones were called; writer raised before recording.
|
||||
assert ("stop", "gateway-coder") in rec.calls
|
||||
assert ("stop", "gateway-assistant") in rec.calls
|
||||
assert ("stop", "gateway-writer") not in rec.calls
|
||||
out = capsys.readouterr().out
|
||||
assert "Stopped 2 profile gateway(s)" in out
|
||||
assert "Could not stop gateway-writer" in out
|
||||
assert "supervise FIFO permission denied" in out
|
||||
|
||||
|
||||
def test_dispatch_all_empty_list_reports_and_returns_true(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture,
|
||||
) -> None:
|
||||
"""With no profile gateways registered the helper still claims the
|
||||
dispatch (returns True) and prints a friendly message — the host
|
||||
fallback would just pkill nothing, which isn't useful inside a
|
||||
container."""
|
||||
from hermes_cli import gateway as gw
|
||||
rec = _ListingRecorder([])
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.detect_service_manager", lambda: "s6",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.get_service_manager", lambda: rec,
|
||||
)
|
||||
assert gw._dispatch_all_via_service_manager_if_s6("stop") is True
|
||||
assert rec.calls == []
|
||||
assert "No profile gateways" in capsys.readouterr().out
|
||||
|
||||
|
||||
def test_dispatch_all_unknown_action_returns_false(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""`start --all` is not a supported CLI surface; the helper must
|
||||
fall through to the host code path rather than no-op."""
|
||||
from hermes_cli import gateway as gw
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.detect_service_manager", lambda: "s6",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.service_manager.get_service_manager",
|
||||
lambda: pytest.fail(
|
||||
"manager should not be constructed for unsupported --all action",
|
||||
),
|
||||
)
|
||||
assert gw._dispatch_all_via_service_manager_if_s6("start") is False
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue