mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
add generic gateway startup readiness checks
This commit is contained in:
parent
10494b42a1
commit
bad9fe2452
9 changed files with 637 additions and 8 deletions
|
|
@ -125,6 +125,25 @@ async def test_gateway_stop_service_restart_sets_named_exit_code():
|
|||
assert runner._exit_code == GATEWAY_SERVICE_RESTART_EXIT_CODE
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gateway_stop_emits_shutdown_hook_after_drain(monkeypatch):
|
||||
runner, adapter = make_restart_runner()
|
||||
adapter.disconnect = AsyncMock()
|
||||
runner.hooks.emit = AsyncMock()
|
||||
|
||||
with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
|
||||
await runner.stop(restart=True, service_restart=True)
|
||||
|
||||
runner.hooks.emit.assert_awaited_once_with(
|
||||
"gateway:shutdown",
|
||||
{
|
||||
"restart": True,
|
||||
"service_restart": True,
|
||||
"detached_restart": False,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_drain_active_agents_throttles_status_updates():
|
||||
runner, _adapter = make_restart_runner()
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import pytest
|
|||
from gateway.hooks import HookRegistry
|
||||
|
||||
|
||||
def _create_hook(hooks_dir, hook_name, events, handler_code):
|
||||
def _create_hook(hooks_dir, hook_name, events, handler_code, *, manifest_extra=""):
|
||||
"""Helper to create a hook directory with HOOK.yaml and handler.py."""
|
||||
hook_dir = hooks_dir / hook_name
|
||||
hook_dir.mkdir(parents=True)
|
||||
|
|
@ -17,6 +17,7 @@ def _create_hook(hooks_dir, hook_name, events, handler_code):
|
|||
f"name: {hook_name}\n"
|
||||
f"description: Test hook\n"
|
||||
f"events: {events}\n"
|
||||
f"{manifest_extra}"
|
||||
)
|
||||
(hook_dir / "handler.py").write_text(handler_code)
|
||||
return hook_dir
|
||||
|
|
@ -112,6 +113,24 @@ class TestDiscoverAndLoad:
|
|||
|
||||
assert len(reg.loaded_hooks) == 2
|
||||
|
||||
def test_preserves_optional_startup_readiness_metadata(self, tmp_path):
|
||||
_create_hook(
|
||||
tmp_path,
|
||||
"ready-hook",
|
||||
'["gateway:startup"]',
|
||||
"def handle(e, c): pass\n",
|
||||
manifest_extra="startup_readiness:\n id: beam-runtime\n required: false\n",
|
||||
)
|
||||
|
||||
reg = HookRegistry()
|
||||
with patch("gateway.hooks.HOOKS_DIR", tmp_path), _patch_no_builtins(reg):
|
||||
reg.discover_and_load()
|
||||
|
||||
assert reg.loaded_hooks[0]["startup_readiness"] == {
|
||||
"id": "beam-runtime",
|
||||
"required": False,
|
||||
}
|
||||
|
||||
|
||||
class TestEmit:
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
|||
|
|
@ -132,6 +132,68 @@ async def test_runner_records_connected_platform_state_on_success(monkeypatch, t
|
|||
assert state["platforms"]["discord"]["error_message"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_discovers_plugins_before_loading_hooks(monkeypatch, tmp_path):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
Platform.DISCORD: PlatformConfig(enabled=True, token="***")
|
||||
},
|
||||
sessions_dir=tmp_path / "sessions",
|
||||
)
|
||||
runner = GatewayRunner(config)
|
||||
order: list[str] = []
|
||||
|
||||
monkeypatch.setattr(runner, "_create_adapter", lambda platform, platform_config: _SuccessfulAdapter())
|
||||
monkeypatch.setattr("hermes_cli.plugins.discover_plugins", lambda: order.append("plugins"))
|
||||
monkeypatch.setattr(runner.hooks, "discover_and_load", lambda: order.append("hooks"))
|
||||
monkeypatch.setattr(runner.hooks, "emit", AsyncMock())
|
||||
|
||||
ok = await runner.start()
|
||||
|
||||
assert ok is True
|
||||
assert order == ["plugins", "hooks"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_initializes_startup_checks_before_gateway_startup_emit(monkeypatch, tmp_path):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
Platform.DISCORD: PlatformConfig(enabled=True, token="***")
|
||||
},
|
||||
sessions_dir=tmp_path / "sessions",
|
||||
)
|
||||
runner = GatewayRunner(config)
|
||||
|
||||
runner.hooks._loaded_hooks = [
|
||||
{
|
||||
"name": "beam-runtime",
|
||||
"events": ["gateway:startup"],
|
||||
"path": str(tmp_path / "hook"),
|
||||
"startup_readiness": {
|
||||
"id": "beam-runtime",
|
||||
"required": True,
|
||||
},
|
||||
}
|
||||
]
|
||||
monkeypatch.setattr(runner, "_create_adapter", lambda platform, platform_config: _SuccessfulAdapter())
|
||||
monkeypatch.setattr("hermes_cli.plugins.discover_plugins", lambda: None)
|
||||
monkeypatch.setattr(runner.hooks, "discover_and_load", lambda: None)
|
||||
|
||||
async def _assert_checks(event_type, context):
|
||||
state = read_runtime_status()
|
||||
assert event_type == "gateway:startup"
|
||||
assert state["startup_checks"]["beam-runtime"]["state"] == "pending"
|
||||
assert state["startup_checks"]["beam-runtime"]["required"] is True
|
||||
|
||||
monkeypatch.setattr(runner.hooks, "emit", _assert_checks)
|
||||
|
||||
ok = await runner.start()
|
||||
|
||||
assert ok is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_gateway_verbosity_imports_redacting_formatter(monkeypatch, tmp_path):
|
||||
"""Verbosity != None must not crash with NameError on RedactingFormatter (#8044)."""
|
||||
|
|
|
|||
|
|
@ -132,6 +132,72 @@ class TestGatewayRuntimeStatus:
|
|||
assert payload["platforms"]["discord"]["error_code"] is None
|
||||
assert payload["platforms"]["discord"]["error_message"] is None
|
||||
|
||||
def test_reset_startup_checks_replaces_previous_run_entries(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
status.write_runtime_status(
|
||||
gateway_state="running",
|
||||
startup_checks={
|
||||
"old-check": {
|
||||
"state": "ready",
|
||||
"required": True,
|
||||
"source": "old-hook",
|
||||
"detail": None,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
status.reset_startup_checks([
|
||||
{
|
||||
"name": "new-hook",
|
||||
"startup_readiness": {
|
||||
"id": "new-check",
|
||||
"required": False,
|
||||
},
|
||||
}
|
||||
])
|
||||
|
||||
payload = status.read_runtime_status()
|
||||
assert set(payload["startup_checks"]) == {"new-check"}
|
||||
assert payload["startup_checks"]["new-check"]["state"] == "pending"
|
||||
assert payload["startup_checks"]["new-check"]["required"] is False
|
||||
assert payload["startup_checks"]["new-check"]["source"] == "new-hook"
|
||||
|
||||
def test_mark_startup_check_ready_persists_detail(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
status.reset_startup_checks([
|
||||
{
|
||||
"name": "beam",
|
||||
"startup_readiness": {
|
||||
"id": "beam-runtime",
|
||||
"required": True,
|
||||
},
|
||||
}
|
||||
])
|
||||
|
||||
status.mark_startup_check_ready("beam-runtime", detail="ready for RPC")
|
||||
|
||||
payload = status.read_runtime_status()
|
||||
assert payload["startup_checks"]["beam-runtime"]["state"] == "ready"
|
||||
assert payload["startup_checks"]["beam-runtime"]["detail"] == "ready for RPC"
|
||||
|
||||
def test_mark_startup_check_failed_creates_missing_entry(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
status.mark_startup_check_failed(
|
||||
"late-hook",
|
||||
detail="startup hook crashed",
|
||||
required=False,
|
||||
source="late-hook",
|
||||
)
|
||||
|
||||
payload = status.read_runtime_status()
|
||||
assert payload["startup_checks"]["late-hook"]["state"] == "failed"
|
||||
assert payload["startup_checks"]["late-hook"]["required"] is False
|
||||
assert payload["startup_checks"]["late-hook"]["source"] == "late-hook"
|
||||
assert payload["startup_checks"]["late-hook"]["detail"] == "startup hook crashed"
|
||||
|
||||
|
||||
class TestTerminatePid:
|
||||
def test_force_uses_taskkill_on_windows(self, monkeypatch):
|
||||
|
|
|
|||
|
|
@ -6,12 +6,21 @@ from pathlib import Path
|
|||
from types import SimpleNamespace
|
||||
|
||||
import hermes_cli.gateway as gateway_cli
|
||||
import pytest
|
||||
from gateway.restart import (
|
||||
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
|
||||
GATEWAY_SERVICE_RESTART_EXIT_CODE,
|
||||
)
|
||||
|
||||
|
||||
_REAL_AWAIT_SERVICE_READY = gateway_cli._await_service_ready_or_exit
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _stub_service_readiness(monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "_await_service_ready_or_exit", lambda **kwargs: None)
|
||||
|
||||
|
||||
class TestSystemdServiceRefresh:
|
||||
def test_systemd_install_repairs_outdated_unit_without_force(self, tmp_path, monkeypatch):
|
||||
unit_path = tmp_path / "hermes-gateway.service"
|
||||
|
|
@ -82,6 +91,30 @@ class TestSystemdServiceRefresh:
|
|||
["systemctl", "--user", "reload-or-restart", gateway_cli.get_service_name()],
|
||||
]
|
||||
|
||||
def test_systemd_start_waits_for_readiness_before_reporting_success(self, monkeypatch):
|
||||
calls = []
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: calls.append(("refresh", system)))
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_run_systemctl",
|
||||
lambda cmd, system=False, check=True, timeout=30, **kwargs: calls.append((tuple(cmd), system, timeout)),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_await_service_ready_or_exit",
|
||||
lambda **kwargs: calls.append(("ready", kwargs)),
|
||||
)
|
||||
|
||||
gateway_cli.systemd_start()
|
||||
|
||||
assert calls == [
|
||||
("refresh", False),
|
||||
(("start", gateway_cli.get_service_name()), False, 30),
|
||||
("ready", {"action": "start"}),
|
||||
]
|
||||
|
||||
|
||||
class TestGeneratedSystemdUnits:
|
||||
def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self):
|
||||
|
|
@ -268,6 +301,32 @@ class TestLaunchdServiceRecovery:
|
|||
["launchctl", "kickstart", target],
|
||||
]
|
||||
|
||||
def test_launchd_start_waits_for_readiness_before_reporting_success(self, tmp_path, monkeypatch):
|
||||
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
||||
plist_path.write_text(gateway_cli.generate_launchd_plist(), encoding="utf-8")
|
||||
label = gateway_cli.get_launchd_label()
|
||||
calls = []
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
||||
monkeypatch.setattr(gateway_cli, "refresh_launchd_plist_if_needed", lambda: None)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli.subprocess,
|
||||
"run",
|
||||
lambda cmd, check=False, **kwargs: calls.append(cmd) or SimpleNamespace(returncode=0, stdout="", stderr=""),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_await_service_ready_or_exit",
|
||||
lambda **kwargs: calls.append(("ready", kwargs)),
|
||||
)
|
||||
|
||||
gateway_cli.launchd_start()
|
||||
|
||||
assert calls == [
|
||||
["launchctl", "kickstart", f"{gateway_cli._launchd_domain()}/{label}"],
|
||||
("ready", {"action": "start"}),
|
||||
]
|
||||
|
||||
def test_launchd_restart_drains_running_gateway_before_kickstart(self, monkeypatch):
|
||||
calls = []
|
||||
target = f"{gateway_cli._launchd_domain()}/{gateway_cli.get_launchd_label()}"
|
||||
|
|
@ -315,7 +374,7 @@ class TestLaunchdServiceRecovery:
|
|||
gateway_cli.launchd_restart()
|
||||
|
||||
assert calls == [("self", 321)]
|
||||
assert "restart requested" in capsys.readouterr().out.lower()
|
||||
assert "service restarted" in capsys.readouterr().out.lower()
|
||||
|
||||
def test_launchd_stop_uses_bootout_not_kill(self, monkeypatch):
|
||||
"""launchd_stop must bootout the service so KeepAlive doesn't respawn it."""
|
||||
|
|
@ -393,6 +452,109 @@ class TestLaunchdServiceRecovery:
|
|||
assert "not loaded" in output.lower()
|
||||
|
||||
|
||||
class TestGatewayServiceReadiness:
|
||||
def test_wait_for_service_readiness_accepts_running_gateway_without_checks(self, monkeypatch):
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 123)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.read_runtime_status",
|
||||
lambda: {"pid": 123, "gateway_state": "running", "startup_checks": {}},
|
||||
)
|
||||
|
||||
warnings = gateway_cli._wait_for_service_readiness(action="start", timeout=0.1, poll_interval=0.0)
|
||||
|
||||
assert warnings == []
|
||||
|
||||
def test_wait_for_service_readiness_ignores_stale_runtime_state_until_pid_matches(self, monkeypatch):
|
||||
runtime_states = iter(
|
||||
[
|
||||
{"pid": 999, "gateway_state": "running", "startup_checks": {}},
|
||||
{"pid": 123, "gateway_state": "running", "startup_checks": {}},
|
||||
]
|
||||
)
|
||||
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 123)
|
||||
monkeypatch.setattr("gateway.status.read_runtime_status", lambda: next(runtime_states))
|
||||
|
||||
warnings = gateway_cli._wait_for_service_readiness(action="start", timeout=0.1, poll_interval=0.0)
|
||||
|
||||
assert warnings == []
|
||||
|
||||
def test_wait_for_service_readiness_returns_optional_pending_warnings(self, monkeypatch):
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 123)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.read_runtime_status",
|
||||
lambda: {
|
||||
"pid": 123,
|
||||
"gateway_state": "running",
|
||||
"startup_checks": {
|
||||
"optional-check": {
|
||||
"state": "pending",
|
||||
"required": False,
|
||||
"source": "test-hook",
|
||||
"detail": "still warming",
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
warnings = gateway_cli._wait_for_service_readiness(action="start", timeout=0.1, poll_interval=0.0)
|
||||
|
||||
assert warnings == ["pending: optional-check (test-hook): still warming"]
|
||||
|
||||
def test_wait_for_service_readiness_fails_when_required_check_fails(self, monkeypatch):
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 123)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.read_runtime_status",
|
||||
lambda: {
|
||||
"pid": 123,
|
||||
"gateway_state": "running",
|
||||
"startup_checks": {
|
||||
"beam-runtime": {
|
||||
"state": "failed",
|
||||
"required": True,
|
||||
"source": "beam",
|
||||
"detail": "RPC boot failed",
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
with pytest.raises(RuntimeError, match=r"required startup checks failed: beam-runtime \(beam\): RPC boot failed"):
|
||||
gateway_cli._wait_for_service_readiness(action="start", timeout=0.1, poll_interval=0.0)
|
||||
|
||||
def test_wait_for_service_readiness_times_out_on_pending_required_check(self, monkeypatch):
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 123)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.read_runtime_status",
|
||||
lambda: {
|
||||
"pid": 123,
|
||||
"gateway_state": "running",
|
||||
"startup_checks": {
|
||||
"beam-runtime": {
|
||||
"state": "pending",
|
||||
"required": True,
|
||||
"source": "beam",
|
||||
"detail": "waiting for runtime",
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
with pytest.raises(RuntimeError, match=r"timed out waiting for required startup checks: beam-runtime \(beam\): waiting for runtime"):
|
||||
gateway_cli._wait_for_service_readiness(action="start", timeout=0.01, poll_interval=0.0)
|
||||
|
||||
def test_await_service_ready_or_exit_raises_system_exit_when_not_ready(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "_await_service_ready_or_exit", _REAL_AWAIT_SERVICE_READY)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_wait_for_service_readiness",
|
||||
lambda **kwargs: (_ for _ in ()).throw(RuntimeError("not ready")),
|
||||
)
|
||||
|
||||
with pytest.raises(SystemExit, match="1"):
|
||||
gateway_cli._await_service_ready_or_exit(action="start")
|
||||
|
||||
|
||||
class TestGatewayServiceDetection:
|
||||
def test_supports_systemd_services_requires_systemctl_binary(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
|
||||
|
|
@ -475,7 +637,7 @@ class TestGatewaySystemServiceRouting:
|
|||
gateway_cli.systemd_restart()
|
||||
|
||||
assert calls == [("refresh", False), ("self", 654)]
|
||||
assert "restart requested" in capsys.readouterr().out.lower()
|
||||
assert "service restarted" in capsys.readouterr().out.lower()
|
||||
|
||||
def test_gateway_install_passes_system_flags(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue