From a3f878ba41ea29e6121b5d53c67d73a78279a29d Mon Sep 17 00:00:00 2001 From: Season Date: Thu, 23 Apr 2026 19:23:13 +0800 Subject: [PATCH] Add gateway runtime checks to doctor --- hermes_cli/doctor.py | 224 ++++++++++- hermes_cli/gateway.py | 73 ++++ tests/hermes_cli/test_doctor.py | 352 ++++++++++++++++++ .../hermes_cli/test_gateway_runtime_health.py | 137 ++++++- 4 files changed, 783 insertions(+), 3 deletions(-) diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 064b1d68d1e..c7d516a3270 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -125,8 +125,8 @@ def check_warn(text: str, detail: str = ""): def check_fail(text: str, detail: str = ""): print(f" {color('✗', Colors.RED)} {text}" + (f" {color(detail, Colors.DIM)}" if detail else "")) -def check_info(text: str): - print(f" {color('→', Colors.CYAN)} {text}") +def check_info(text: str, detail: str = ""): + print(f" {color('→', Colors.CYAN)} {text}" + (f" {color(detail, Colors.DIM)}" if detail else "")) def _check_gateway_service_linger(issues: list[str]) -> None: @@ -162,6 +162,225 @@ def _check_gateway_service_linger(issues: list[str]) -> None: check_warn("Could not verify systemd linger", f"({linger_detail})") +_PLATFORM_ALERT_STATES = frozenset({"retrying", "fatal", "disconnected"}) + + +def _count_active_cron_jobs() -> int: + """Return the number of enabled cron jobs that depend on the gateway.""" + try: + # Lazy import keeps doctor usable if cron helpers are unavailable. + from cron.jobs import list_jobs + + return len(list_jobs(include_disabled=False)) + except Exception: + return 0 + + +def _append_runtime_issue( + issues: list[str], + warning: str, + issue: str, + detail: str = "", +) -> None: + check_warn(warning, detail) + issues.append(issue) + + +def _platform_state_detail(platform_state: dict) -> str: + message = platform_state.get("error_message") or platform_state.get("error_code") + return f"({message})" if message else "" + + +def _check_systemd_runtime_state(health, issues: list[str]) -> None: + # Lazy import avoids loading gateway restart machinery unless systemd state is rendered. + from gateway.restart import GATEWAY_SERVICE_RESTART_EXIT_CODE + + props = health.systemd_unit or {} + active_state = props.get("ActiveState") + sub_state = props.get("SubState") + result = props.get("Result") + exec_status = props.get("ExecMainStatus") + + if active_state == "activating" and sub_state == "auto-restart": + _append_runtime_issue( + issues, + "Gateway service is auto-restarting", + "Gateway service is auto-restarting — inspect logs with 'hermes gateway status --deep'", + ) + elif active_state == "failed" and exec_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE): + _append_runtime_issue( + issues, + "Gateway service failed during planned restart", + "Gateway service is stuck after a planned restart — run 'hermes gateway status --deep'", + f"(ExecMainStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE})", + ) + elif active_state == "failed": + detail = f"(Result={result})" if result else "" + _append_runtime_issue( + issues, + "Gateway service failed", + "Gateway service failed — inspect logs with 'hermes gateway status --deep'", + detail, + ) + + +def _runtime_updated_detail(updated_at: str | None) -> str: + return f"(updated {updated_at})" if updated_at else "" + + +def _paren_list(parts: list[str]) -> str: + return f"({'; '.join(parts)})" if parts else "" + + +def _check_runtime_health(issues: list[str]) -> None: + """Check live gateway and delivery-surface runtime health.""" + print() + print(color("◆ Runtime Health", Colors.CYAN, Colors.BOLD)) + + try: + # Lazy import keeps doctor usable if gateway helpers fail to import. + from hermes_cli.gateway import _format_gateway_pids, get_gateway_runtime_health + + health = get_gateway_runtime_health() + except Exception as e: + check_warn("Runtime health unavailable", f"({e})") + return + + snapshot = health.snapshot + configured_platforms = health.configured_platforms + active_cron_jobs = _count_active_cron_jobs() + gateway_needed = bool(configured_platforms or active_cron_jobs) + + if not gateway_needed and not snapshot.running and not snapshot.service_installed: + check_info("No long-lived gateway-managed runtime configured") + return + + if snapshot.running: + detail_parts = [] + if snapshot.gateway_pids: + detail_parts.append(f"PID(s): {_format_gateway_pids(snapshot.gateway_pids, limit=None)}") + if snapshot.manager: + detail_parts.append(snapshot.manager) + check_ok("Gateway process running", _paren_list(detail_parts)) + else: + if configured_platforms: + detail_parts = [f"configured: {', '.join(configured_platforms)}"] + issue = "Start the gateway so configured platforms can receive messages" + if health.gateway_state == "startup_failed" and health.exit_reason: + detail_parts.append(f"last startup issue: {health.exit_reason}") + issue = f"{issue}; last startup issue: {health.exit_reason}" + _append_runtime_issue( + issues, + "Gateway is not running", + issue, + _paren_list(detail_parts), + ) + if active_cron_jobs: + _append_runtime_issue( + issues, + "Gateway is not running — scheduled jobs will not fire automatically", + "Start the gateway so scheduled jobs can fire automatically", + f"({active_cron_jobs} active job(s))", + ) + + if ( + snapshot.service_installed + and not snapshot.service_running + and not snapshot.has_process_service_mismatch + ): + if gateway_needed: + _append_runtime_issue( + issues, + "Gateway service installed but stopped", + "Start the installed gateway service with 'hermes gateway start'", + ) + else: + check_info( + "Gateway service installed but stopped", + "(no configured delivery surfaces or scheduled jobs)", + ) + + if snapshot.has_process_service_mismatch: + pid_detail = _format_gateway_pids(snapshot.gateway_pids, limit=None) + _append_runtime_issue( + issues, + "Gateway process is running but the installed service is not active", + "Gateway process is not service-managed — stop the manual process or start the service", + f"(PID(s): {pid_detail})", + ) + + _check_systemd_runtime_state(health, issues) + + if not snapshot.running: + return + + if not configured_platforms and not active_cron_jobs: + check_info("No configured delivery surfaces or scheduled jobs to check") + + if active_cron_jobs: + check_ok( + "Scheduled jobs can fire automatically", + f"({active_cron_jobs} active job(s))", + ) + + if not health.runtime_status_available: + check_warn("Gateway runtime status unavailable", f"({_DHH}/gateway_state.json missing or unreadable)") + else: + gateway_state = health.gateway_state + updated_detail = _runtime_updated_detail(health.updated_at) + if gateway_state == "running": + check_ok("Gateway runtime state running", updated_detail) + elif gateway_state == "draining": + check_info("Gateway runtime state draining", updated_detail) + elif gateway_state == "startup_failed": + reason = health.exit_reason or "unknown startup issue" + detail_parts = [reason] + if health.updated_at: + detail_parts.append(f"updated {health.updated_at}") + _append_runtime_issue( + issues, + "Gateway startup failed", + f"Gateway startup failed: {reason}", + _paren_list(detail_parts), + ) + elif gateway_state: + check_info(f"Gateway runtime state {gateway_state}", updated_detail) + else: + check_warn("Gateway runtime state unknown") + + if configured_platforms and health.runtime_status_available: + for platform in configured_platforms: + platform_state = health.platforms.get(platform) + if not platform_state: + _append_runtime_issue( + issues, + f"{platform} runtime health unknown", + f"{platform} is configured but missing from gateway runtime status", + ) + continue + + state = platform_state.get("state") + if state == "connected": + check_ok(f"{platform} connected") + elif state == "connecting": + check_info(f"{platform} connecting") + elif state in _PLATFORM_ALERT_STATES: + _append_runtime_issue( + issues, + f"{platform} {state}", + f"{platform} runtime state is {state}", + _platform_state_detail(platform_state), + ) + elif state: + check_info(f"{platform} {state}") + else: + _append_runtime_issue( + issues, + f"{platform} runtime health unknown", + f"{platform} runtime state is missing", + ) + + def run_doctor(args): """Run diagnostic checks.""" should_fix = getattr(args, 'fix', False) @@ -613,6 +832,7 @@ def run_doctor(args): pass _check_gateway_service_linger(issues) + _check_runtime_health(issues) # ========================================================================= # Check: Command installation (hermes bin symlink) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 8b360087cfe..4136eb7bd63 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -12,6 +12,7 @@ import subprocess import sys from dataclasses import dataclass from pathlib import Path +from typing import Any PROJECT_ROOT = Path(__file__).parent.parent.resolve() @@ -59,6 +60,19 @@ class GatewayRuntimeSnapshot: def has_process_service_mismatch(self) -> bool: return self.service_installed and self.running and not self.service_running + +@dataclass(frozen=True) +class GatewayRuntimeHealth: + snapshot: GatewayRuntimeSnapshot + configured_platforms: tuple[str, ...] + runtime_status_available: bool + gateway_state: str | None + exit_reason: str | None + platforms: dict[str, dict[str, Any]] + updated_at: str | None + systemd_unit: dict[str, str] + + def _get_service_pids() -> set: """Return PIDs currently managed by systemd or launchd gateway services. @@ -532,6 +546,65 @@ def get_gateway_runtime_snapshot(system: bool = False) -> GatewayRuntimeSnapshot ) +def get_gateway_runtime_health(system: bool = False) -> GatewayRuntimeHealth: + """Return normalized gateway runtime health for diagnostics surfaces.""" + snapshot = get_gateway_runtime_snapshot(system=system) + + try: + from gateway.config import load_gateway_config + + configured_platforms = tuple( + platform.value for platform in load_gateway_config().get_connected_platforms() + ) + except Exception: + configured_platforms = () + + try: + from gateway.status import read_runtime_status + + runtime_status = read_runtime_status() + except Exception: + runtime_status = None + + runtime_status_available = isinstance(runtime_status, dict) + gateway_state = runtime_status.get("gateway_state") if runtime_status_available else None + exit_reason = runtime_status.get("exit_reason") if runtime_status_available else None + updated_at = runtime_status.get("updated_at") if runtime_status_available else None + raw_platforms = runtime_status.get("platforms", {}) if runtime_status_available else {} + if not isinstance(raw_platforms, dict): + raw_platforms = {} + + configured_set = set(configured_platforms) + platforms: dict[str, dict[str, Any]] = {} + for platform, payload in raw_platforms.items(): + slug = str(platform) + if slug not in configured_set or not isinstance(payload, dict): + continue + platforms[slug] = dict(payload) + + if not snapshot.running: + platforms = {} + if gateway_state == "running": + gateway_state = "stopped" + + systemd_unit = ( + _read_systemd_unit_properties(system=system) + if supports_systemd_services() and snapshot.service_installed + else {} + ) + + return GatewayRuntimeHealth( + snapshot=snapshot, + configured_platforms=configured_platforms, + runtime_status_available=runtime_status_available, + gateway_state=gateway_state, + exit_reason=exit_reason, + platforms=platforms, + updated_at=updated_at, + systemd_unit=systemd_unit, + ) + + def _format_gateway_pids(pids: tuple[int, ...] | list[int], *, limit: int | None = 3) -> str: rendered = [str(pid) for pid in pids[:limit] if pid > 0] if limit is not None else [str(pid) for pid in pids if pid > 0] if limit is not None and len(pids) > limit: diff --git a/tests/hermes_cli/test_doctor.py b/tests/hermes_cli/test_doctor.py index 948cafaf71c..95c4931a660 100644 --- a/tests/hermes_cli/test_doctor.py +++ b/tests/hermes_cli/test_doctor.py @@ -12,6 +12,9 @@ import hermes_cli.doctor as doctor import hermes_cli.gateway as gateway_cli from hermes_cli import doctor as doctor_mod from hermes_cli.doctor import _has_provider_env_config +from hermes_cli.gateway import GatewayRuntimeHealth, GatewayRuntimeSnapshot + +_DEFAULT_RUNTIME_UPDATED_AT = "2026-04-23T00:00:00+00:00" class TestDoctorPlatformHints: @@ -159,6 +162,355 @@ def test_check_gateway_service_linger_skips_when_service_not_installed(monkeypat assert issues == [] +def _gateway_health( + *, + snapshot=None, + configured=(), + runtime_status_available=True, + gateway_state="running", + exit_reason=None, + platforms=None, + systemd_unit=None, + updated_at=_DEFAULT_RUNTIME_UPDATED_AT, +): + if not runtime_status_available and updated_at == _DEFAULT_RUNTIME_UPDATED_AT: + updated_at = None + return GatewayRuntimeHealth( + snapshot=snapshot or GatewayRuntimeSnapshot( + manager="manual process", + gateway_pids=(1234,), + ), + configured_platforms=tuple(configured), + runtime_status_available=runtime_status_available, + gateway_state=gateway_state, + exit_reason=exit_reason, + platforms=platforms or {}, + updated_at=updated_at, + systemd_unit=systemd_unit or {}, + ) + + +def _run_runtime_check(monkeypatch, capsys, health, *, active_cron_jobs=0): + monkeypatch.setattr(gateway_cli, "get_gateway_runtime_health", lambda: health) + monkeypatch.setattr(doctor, "_count_active_cron_jobs", lambda: active_cron_jobs) + issues = [] + doctor._check_runtime_health(issues) + return capsys.readouterr().out, issues + + +def test_runtime_health_no_gateway_configured_is_info_only(monkeypatch, capsys): + health = _gateway_health( + snapshot=GatewayRuntimeSnapshot(manager="manual process"), + configured=(), + runtime_status_available=False, + gateway_state=None, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "No long-lived gateway-managed runtime configured" in out + assert issues == [] + + +def test_runtime_health_gateway_not_running_adds_one_liveness_issue(monkeypatch, capsys): + health = _gateway_health( + snapshot=GatewayRuntimeSnapshot(manager="manual process"), + configured=("telegram",), + gateway_state="stopped", + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "Gateway is not running" in out + assert len(issues) == 1 + assert issues[0] == "Start the gateway so configured platforms can receive messages" + + +def test_runtime_health_gateway_not_running_includes_startup_failure(monkeypatch, capsys): + health = _gateway_health( + snapshot=GatewayRuntimeSnapshot(manager="manual process"), + configured=("telegram",), + gateway_state="startup_failed", + exit_reason="telegram conflict", + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "last startup issue: telegram conflict" in out + assert issues == [ + "Start the gateway so configured platforms can receive messages; last startup issue: telegram conflict" + ] + + +def test_runtime_health_missing_status_file_does_not_emit_platform_issues(monkeypatch, capsys): + health = _gateway_health( + configured=("telegram", "discord", "slack"), + runtime_status_available=False, + gateway_state=None, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "Gateway runtime status unavailable" in out + assert "runtime health unknown" not in out + assert issues == [] + + +def test_runtime_health_startup_failed_adds_issue(monkeypatch, capsys): + health = _gateway_health( + configured=("telegram",), + gateway_state="startup_failed", + exit_reason="telegram conflict", + platforms={"telegram": {"state": "connected"}}, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "Gateway startup failed" in out + assert "telegram conflict" in out + assert issues == ["Gateway startup failed: telegram conflict"] + + +def test_runtime_health_platform_retrying_adds_issue(monkeypatch, capsys): + health = _gateway_health( + configured=("telegram",), + platforms={ + "telegram": { + "state": "retrying", + "error_message": "another poller is active", + } + }, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "telegram retrying" in out + assert "another poller is active" in out + assert issues == ["telegram runtime state is retrying"] + + +def test_runtime_health_unknown_non_alert_platform_state_is_info_only(monkeypatch, capsys): + health = _gateway_health( + configured=("telegram",), + platforms={"telegram": {"state": "idle"}}, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "telegram idle" in out + assert issues == [] + + +def test_runtime_health_missing_configured_platform_entry_adds_issue(monkeypatch, capsys): + health = _gateway_health(configured=("telegram",), platforms={}) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "telegram runtime health unknown" in out + assert issues == ["telegram is configured but missing from gateway runtime status"] + + +def test_runtime_health_transient_states_are_info_only(monkeypatch, capsys): + health = _gateway_health( + configured=("telegram",), + gateway_state="draining", + platforms={"telegram": {"state": "connecting"}}, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "Gateway runtime state draining" in out + assert "telegram connecting" in out + assert issues == [] + + +def test_runtime_health_cron_jobs_without_gateway_adds_issue(monkeypatch, capsys): + health = _gateway_health( + snapshot=GatewayRuntimeSnapshot(manager="manual process"), + configured=(), + runtime_status_available=False, + gateway_state=None, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health, active_cron_jobs=2) + + assert "scheduled jobs will not fire automatically" in out + assert issues == ["Start the gateway so scheduled jobs can fire automatically"] + + +def test_runtime_health_cron_jobs_with_gateway_are_ok(monkeypatch, capsys): + health = _gateway_health(configured=(), platforms={}) + + out, issues = _run_runtime_check(monkeypatch, capsys, health, active_cron_jobs=2) + + assert "Scheduled jobs can fire automatically" in out + assert "scheduled jobs will not fire automatically" not in out + assert issues == [] + + +def test_runtime_health_renders_updated_at_for_running_state(monkeypatch, capsys): + health = _gateway_health( + configured=("telegram",), + platforms={"telegram": {"state": "connected"}}, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "updated 2026-04-23T00:00:00+00:00" in out + assert issues == [] + + +def test_runtime_health_running_state_without_updated_at_has_no_empty_detail(monkeypatch, capsys): + health = _gateway_health( + configured=("telegram",), + platforms={"telegram": {"state": "connected"}}, + updated_at=None, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "Gateway runtime state running" in out + assert "updated " not in out + assert issues == [] + + +def test_runtime_health_running_gateway_with_no_surfaces_is_info_only(monkeypatch, capsys): + health = _gateway_health(configured=(), platforms={}) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "Gateway process running" in out + assert "No configured delivery surfaces or scheduled jobs to check" in out + assert issues == [] + + +def test_runtime_health_unknown_runtime_state_is_warn_only(monkeypatch, capsys): + health = _gateway_health( + configured=(), + runtime_status_available=True, + gateway_state=None, + platforms={}, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "Gateway runtime state unknown" in out + assert issues == [] + + +def test_runtime_health_stopped_service_without_consumers_is_info_only(monkeypatch, capsys): + health = _gateway_health( + snapshot=GatewayRuntimeSnapshot( + manager="systemd (user)", + service_installed=True, + service_running=False, + ), + configured=(), + runtime_status_available=False, + gateway_state=None, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "Gateway service installed but stopped" in out + assert issues == [] + + +def test_runtime_health_stopped_service_with_configured_platform_adds_issue(monkeypatch, capsys): + health = _gateway_health( + snapshot=GatewayRuntimeSnapshot( + manager="systemd (user)", + service_installed=True, + service_running=False, + ), + configured=("telegram",), + gateway_state="stopped", + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "Gateway service installed but stopped" in out + assert "Start the installed gateway service with 'hermes gateway start'" in issues + + +def test_runtime_health_service_process_mismatch_adds_issue(monkeypatch, capsys): + health = _gateway_health( + snapshot=GatewayRuntimeSnapshot( + manager="systemd (user)", + service_installed=True, + service_running=False, + gateway_pids=(1234,), + ), + configured=(), + platforms={}, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "installed service is not active" in out + assert issues == [ + "Gateway process is not service-managed — stop the manual process or start the service" + ] + + +def test_runtime_health_service_process_mismatch_suppresses_stopped_service_issue(monkeypatch, capsys): + health = _gateway_health( + snapshot=GatewayRuntimeSnapshot( + manager="systemd (user)", + service_installed=True, + service_running=False, + gateway_pids=(1234,), + ), + configured=("telegram",), + platforms={"telegram": {"state": "connected"}}, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert "Gateway process is running but the installed service is not active" in out + assert "Gateway service installed but stopped" not in out + assert issues == [ + "Gateway process is not service-managed — stop the manual process or start the service" + ] + + +@pytest.mark.parametrize( + ("systemd_unit", "expected"), + [ + ( + {"ActiveState": "activating", "SubState": "auto-restart"}, + "Gateway service is auto-restarting", + ), + ( + {"ActiveState": "failed", "Result": "exit-code", "ExecMainStatus": "1"}, + "Gateway service failed", + ), + ], +) +def test_runtime_health_systemd_failure_states_add_issue( + monkeypatch, + capsys, + systemd_unit, + expected, +): + health = _gateway_health( + snapshot=GatewayRuntimeSnapshot( + manager="systemd (user)", + service_installed=True, + service_running=False, + ), + configured=(), + gateway_state="stopped", + systemd_unit=systemd_unit, + ) + + out, issues = _run_runtime_check(monkeypatch, capsys, health) + + assert expected in out + assert len(issues) == 1 + assert expected in issues[0] + + # ── Memory provider section (doctor should only check the *active* provider) ── diff --git a/tests/hermes_cli/test_gateway_runtime_health.py b/tests/hermes_cli/test_gateway_runtime_health.py index 15c0705cfe9..134c806002c 100644 --- a/tests/hermes_cli/test_gateway_runtime_health.py +++ b/tests/hermes_cli/test_gateway_runtime_health.py @@ -1,4 +1,139 @@ -from hermes_cli.gateway import _runtime_health_lines +from types import SimpleNamespace + +import hermes_cli.gateway as gateway_cli +from hermes_cli.gateway import ( + GatewayRuntimeSnapshot, + get_gateway_runtime_health, + _runtime_health_lines, +) + + +def _platform(slug: str): + return SimpleNamespace(value=slug) + + +def _config(*platforms: str): + return SimpleNamespace( + get_connected_platforms=lambda: [_platform(platform) for platform in platforms] + ) + + +def _snapshot(*, running: bool = True, service_installed: bool = False, service_running: bool = False): + return GatewayRuntimeSnapshot( + manager="manual process", + service_installed=service_installed, + service_running=service_running, + gateway_pids=(1234,) if running and not service_running else (), + ) + + +def _patch_runtime_health_deps(monkeypatch, *, snapshot=None, config=None, status=None): + monkeypatch.setattr( + gateway_cli, + "get_gateway_runtime_snapshot", + lambda system=False: snapshot or _snapshot(), + ) + monkeypatch.setattr( + "gateway.config.load_gateway_config", + lambda: config or _config(), + ) + monkeypatch.setattr("gateway.status.read_runtime_status", lambda: status) + monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False) + + +def test_gateway_runtime_health_handles_missing_status(monkeypatch): + _patch_runtime_health_deps( + monkeypatch, + snapshot=_snapshot(running=True), + config=_config("telegram"), + status=None, + ) + + health = get_gateway_runtime_health() + + assert health.runtime_status_available is False + assert health.gateway_state is None + assert health.platforms == {} + + +def test_gateway_runtime_health_treats_unparseable_status_as_unavailable(monkeypatch): + _patch_runtime_health_deps( + monkeypatch, + snapshot=_snapshot(running=True), + config=_config("telegram"), + status=None, + ) + + health = get_gateway_runtime_health() + + assert health.runtime_status_available is False + assert health.gateway_state is None + + +def test_gateway_runtime_health_filters_configured_platforms(monkeypatch): + _patch_runtime_health_deps( + monkeypatch, + snapshot=_snapshot(running=True), + config=_config("telegram", "discord"), + status={ + "gateway_state": "running", + "updated_at": "2026-04-23T00:00:00+00:00", + "platforms": { + "telegram": {"state": "connected"}, + "discord": {"state": "connecting"}, + "slack": {"state": "connected"}, + }, + }, + ) + + health = get_gateway_runtime_health() + + assert health.runtime_status_available is True + assert health.gateway_state == "running" + assert health.updated_at == "2026-04-23T00:00:00+00:00" + assert set(health.platforms) == {"telegram", "discord"} + + +def test_gateway_runtime_health_drops_stale_platforms_when_not_running(monkeypatch): + _patch_runtime_health_deps( + monkeypatch, + snapshot=_snapshot(running=False), + config=_config("telegram"), + status={ + "gateway_state": "running", + "platforms": {"telegram": {"state": "connected"}}, + }, + ) + + health = get_gateway_runtime_health() + + assert health.gateway_state == "stopped" + assert health.platforms == {} + + +def test_gateway_runtime_health_exposes_systemd_properties(monkeypatch): + _patch_runtime_health_deps( + monkeypatch, + snapshot=_snapshot(running=False, service_installed=True), + config=_config("telegram"), + status={"gateway_state": "stopped", "platforms": {}}, + ) + monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True) + monkeypatch.setattr( + gateway_cli, + "_read_systemd_unit_properties", + lambda system=False: { + "ActiveState": "activating", + "SubState": "auto-restart", + }, + ) + + health = get_gateway_runtime_health() + + assert health.systemd_unit == { + "ActiveState": "activating", + "SubState": "auto-restart", + } def test_runtime_health_lines_include_fatal_platform_and_startup_reason(monkeypatch):