fix(dashboard): only run runtime-pid liveness fallback against local status

get_runtime_status_running_pid() validates liveness with a local
os.kill(pid, 0) probe. In /api/status the runtime record can be the
REMOTE health-probe body (cross-container), whose PID belongs to another
host and is display-only — probing it locally is wrong and trips the
test live-system guard (os.kill on a PID outside the test subtree).
Run the fallback only against the local read_runtime_status() record.
This commit is contained in:
teknium1 2026-06-17 05:19:43 -07:00 committed by Teknium
parent dc86d48a3e
commit 06d907dc4e

View file

@ -1668,13 +1668,20 @@ async def get_status(profile: Optional[str] = None):
# Prefer the detailed health endpoint response (has full state) when the
# local runtime status file is absent or stale (cross-container).
runtime = read_runtime_status()
local_runtime = read_runtime_status()
runtime = local_runtime
if runtime is None and remote_health_body and remote_health_body.get("gateway_state"):
runtime = remote_health_body
runtime_pid = get_runtime_status_running_pid(runtime)
if not gateway_running and runtime_pid is not None:
gateway_running = True
gateway_pid = runtime_pid
# The runtime-status PID fallback validates liveness with a local
# os.kill() probe, so it must only run against the LOCAL status file —
# never the remote health body, whose PID belongs to another host and
# is display-only. (Running os.kill on a remote PID is both wrong and
# trips the test live-system guard.)
if not gateway_running and local_runtime is not None:
runtime_pid = get_runtime_status_running_pid(local_runtime)
if runtime_pid is not None:
gateway_running = True
gateway_pid = runtime_pid
if runtime:
gateway_state = runtime.get("gateway_state")