mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
refactor(gateway): route all active_agents coercion through parse_active_agents; harden drain-timeout fallback
Second cleanup pass (simplify-code review of the first follow-up): - write_runtime_status now clamps active_agents via parse_active_agents instead of an inline max(0, int(...)). Removes the duplicated clamp the helper's docstring acknowledged AND closes a write-side ValueError gap (a non-numeric active_agents previously raised; now degrades to 0). - hermes_cli/gateway.py draining-status line routes its active-agents count through parse_active_agents too — the third coercion site of the same persisted field, now consistent and non-raising with the two HTTP surfaces. - web_server.py /api/status: the drain-timeout resolver fallback now catches ImportError specifically and falls back to DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT (a real float) instead of a blanket 'except Exception -> None'. None would have violated the surfaced field's int/float contract and stripped NAS's poll-deadline hint silently. - Dropped a redundant 'if runtime else 0' branch (parse_active_agents already handles the empty/None case) and tightened the parse_active_agents docstring to describe the actual single-contract role (write + both reads).
This commit is contained in:
parent
b577f25100
commit
4d7bb382b0
3 changed files with 16 additions and 10 deletions
|
|
@ -595,7 +595,7 @@ def write_runtime_status(
|
|||
if restart_requested is not _UNSET:
|
||||
payload["restart_requested"] = bool(restart_requested)
|
||||
if active_agents is not _UNSET:
|
||||
payload["active_agents"] = max(0, int(active_agents))
|
||||
payload["active_agents"] = parse_active_agents(active_agents)
|
||||
if served_profiles is not _UNSET:
|
||||
# Profiles this gateway multiplexes (multi-profile mode). Absent/empty
|
||||
# for a single-profile gateway. Lets `hermes status` show per-profile
|
||||
|
|
@ -624,11 +624,11 @@ def read_runtime_status() -> Optional[dict[str, Any]]:
|
|||
def parse_active_agents(raw: Any) -> int:
|
||||
"""Coerce a persisted ``active_agents`` value to a clamped non-negative int.
|
||||
|
||||
The status file is written atomically but can still hold an
|
||||
absent/None/garbage ``active_agents`` after a partial write or a manual
|
||||
edit. Both HTTP surfaces (``/api/status`` and ``/health/detailed``) read it
|
||||
through this single helper so the field they expose is consistent and never
|
||||
negative. Mirrors the write-side clamp in ``write_runtime_status``.
|
||||
The shared coercion for the in-flight gateway-turn count. Used on the WRITE
|
||||
side (``write_runtime_status``) and by both HTTP read surfaces
|
||||
(``/api/status`` and ``/health/detailed``) so the count is clamped to a
|
||||
single contract — never negative, never raising on a manually-edited or
|
||||
otherwise non-numeric value (degrades to ``0``).
|
||||
"""
|
||||
try:
|
||||
return max(0, int(raw))
|
||||
|
|
|
|||
|
|
@ -4573,7 +4573,9 @@ def _runtime_health_lines() -> list[str]:
|
|||
lines.append(f"⚠ Last startup issue: {exit_reason}")
|
||||
elif gateway_state == "draining":
|
||||
action = "restart" if restart_requested else "shutdown"
|
||||
count = int(active_agents or 0)
|
||||
from gateway.status import parse_active_agents
|
||||
|
||||
count = parse_active_agents(active_agents)
|
||||
lines.append(f"⏳ Gateway draining for {action} ({count} active agent(s))")
|
||||
elif gateway_state == "stopped" and exit_reason:
|
||||
lines.append(f"⚠ Last shutdown reason: {exit_reason}")
|
||||
|
|
|
|||
|
|
@ -1844,7 +1844,7 @@ async def get_status(profile: Optional[str] = None):
|
|||
# liveness via the single shared contract in gateway.status. Liveness
|
||||
# keys off gateway_running (a live PID/health probe), NEVER
|
||||
# gateway_updated_at — a healthy idle gateway never advances that.
|
||||
active_agents = parse_active_agents(runtime.get("active_agents", 0)) if runtime else 0
|
||||
active_agents = parse_active_agents((runtime or {}).get("active_agents", 0))
|
||||
gateway_busy = derive_gateway_busy(
|
||||
gateway_running=gateway_running,
|
||||
gateway_state=gateway_state,
|
||||
|
|
@ -1862,8 +1862,12 @@ async def get_status(profile: Optional[str] = None):
|
|||
from hermes_cli.gateway import _get_restart_drain_timeout
|
||||
|
||||
restart_drain_timeout = _get_restart_drain_timeout()
|
||||
except Exception:
|
||||
restart_drain_timeout = None
|
||||
except ImportError:
|
||||
# Resolver moved/renamed — fall back to the real default so the
|
||||
# field stays a numeric poll-deadline hint, never None.
|
||||
from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
|
||||
|
||||
restart_drain_timeout = DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
|
||||
|
||||
# Dashboard auth gate (Phase 7): surface whether the gate is engaged
|
||||
# and which providers are registered so ``hermes status`` and the
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue