fix(gateway): actively reap no-systemd gateway orphan before restart

Builds on @wgu9's runtime-tracking fix: now that find_gateway_pids() can
see a no-supervisor `gateway restart` runtime, have stop_profile_gateway()
fall back to an orphan-aware, profile-scoped reap (SIGTERM then SIGKILL)
when the pidfile/runtime record is missing or stale. Closes the duplicate-
accumulation path in #51325 — a follow-up restart now kills the prior
orphan instead of stacking another listener on :8644. Gated on
not supports_systemd_services() so a transient `gateway restart` argv on
supervised hosts is never killed.

Also adds the AUTHOR_MAP entry for the salvaged contributor.
This commit is contained in:
teknium1 2026-06-23 21:55:06 -07:00 committed by Teknium
parent 044996e403
commit 3d56807fbd
3 changed files with 124 additions and 1 deletions

View file

@ -1,6 +1,7 @@
"""Tests for hermes_cli.gateway."""
import argparse
import signal
import sys
from types import ModuleType, SimpleNamespace
@ -838,6 +839,53 @@ def test_find_gateway_pids_includes_restart_managers_without_systemd(monkeypatch
assert calls == [(set(), True, True)]
def test_reap_unsupervised_orphans_noop_on_systemd_hosts(monkeypatch):
"""On supervised hosts a `gateway restart` argv is transient — never reap."""
monkeypatch.setattr(gateway, "supports_systemd_services", lambda: True)
killed = []
monkeypatch.setattr(gateway.os, "kill", lambda pid, sig: killed.append((pid, sig)))
# Should not even consult the scan when a supervisor is present.
monkeypatch.setattr(
gateway, "find_gateway_pids",
lambda *a, **k: (_ for _ in ()).throw(AssertionError("scanned on systemd host")),
)
assert gateway._reap_unsupervised_gateway_orphans() is False
assert killed == []
def test_reap_unsupervised_orphans_sigterms_then_sigkills_survivor(monkeypatch):
"""No-systemd: orphan gets SIGTERM, and a survivor is force-killed."""
monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
monkeypatch.setattr(gateway, "find_gateway_pids", lambda exclude_pids=None: [708])
monkeypatch.setattr("gateway.status.write_planned_stop_marker", lambda pid: True)
# Orphan ignores SIGTERM (matches the field report) and stays alive, so the
# follow-up SIGKILL must fire.
monkeypatch.setattr("gateway.status._pid_exists", lambda pid: True)
sent = []
monkeypatch.setattr(gateway.os, "kill", lambda pid, sig: sent.append((pid, sig)))
# Collapse the drain window: no real sleeping, and jump past the deadline
# after the first check so the loop exits immediately.
monkeypatch.setattr(gateway.time, "sleep", lambda _s: None)
ticks = iter([0.0, 100.0, 200.0])
monkeypatch.setattr(gateway.time, "monotonic", lambda: next(ticks, 200.0))
assert gateway._reap_unsupervised_gateway_orphans() is True
assert (708, signal.SIGTERM) in sent
assert (708, signal.SIGKILL) in sent
def test_reap_unsupervised_orphans_returns_false_when_none_found(monkeypatch):
monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
monkeypatch.setattr(gateway, "find_gateway_pids", lambda exclude_pids=None: [])
killed = []
monkeypatch.setattr(gateway.os, "kill", lambda pid, sig: killed.append((pid, sig)))
assert gateway._reap_unsupervised_gateway_orphans() is False
assert killed == []
def test_scan_gateway_pids_detects_windows_hermes_exe_case_variants(monkeypatch):
monkeypatch.setattr(gateway, "is_windows", lambda: True)
monkeypatch.setattr(gateway, "_get_ancestor_pids", lambda: set())