From 06d907dc4e7c1db101ae6fe90cd4c0ff5dba9588 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Wed, 17 Jun 2026 05:19:43 -0700 Subject: [PATCH] fix(dashboard): only run runtime-pid liveness fallback against local status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_runtime_status_running_pid() validates liveness with a local os.kill(pid, 0) probe. In /api/status the runtime record can be the REMOTE health-probe body (cross-container), whose PID belongs to another host and is display-only — probing it locally is wrong and trips the test live-system guard (os.kill on a PID outside the test subtree). Run the fallback only against the local read_runtime_status() record. --- hermes_cli/web_server.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index efab7f455d1..70f39162cf8 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -1668,13 +1668,20 @@ async def get_status(profile: Optional[str] = None): # Prefer the detailed health endpoint response (has full state) when the # local runtime status file is absent or stale (cross-container). - runtime = read_runtime_status() + local_runtime = read_runtime_status() + runtime = local_runtime if runtime is None and remote_health_body and remote_health_body.get("gateway_state"): runtime = remote_health_body - runtime_pid = get_runtime_status_running_pid(runtime) - if not gateway_running and runtime_pid is not None: - gateway_running = True - gateway_pid = runtime_pid + # The runtime-status PID fallback validates liveness with a local + # os.kill() probe, so it must only run against the LOCAL status file — + # never the remote health body, whose PID belongs to another host and + # is display-only. (Running os.kill on a remote PID is both wrong and + # trips the test live-system guard.) + if not gateway_running and local_runtime is not None: + runtime_pid = get_runtime_status_running_pid(local_runtime) + if runtime_pid is not None: + gateway_running = True + gateway_pid = runtime_pid if runtime: gateway_state = runtime.get("gateway_state")