mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gateway): launchd_stop uses bootout so KeepAlive doesn't respawn (#7119)
launchd_stop() previously used `launchctl kill SIGTERM` which only signals the process. Because the plist has KeepAlive.SuccessfulExit=false, launchd immediately respawns the gateway — making `hermes gateway stop` a no-op that prints '✓ Service stopped' while the service keeps running. Switch to `launchctl bootout` which unloads the service definition so KeepAlive can't trigger. The process exits and stays stopped until `hermes gateway start` (which already handles re-bootstrapping unloaded jobs via error codes 3/113). Also adds _wait_for_gateway_exit() after bootout to ensure the process is fully gone before returning, and tolerates 'already unloaded' errors. Fixes: .env changes not taking effect after gateway stop+restart on macOS. The root cause was that stop didn't actually stop — the respawned process loaded the old env before the user's restart command ran.
This commit is contained in:
parent
957485876b
commit
437feabb74
2 changed files with 70 additions and 1 deletions
|
|
@ -1196,7 +1196,19 @@ def launchd_start():
|
|||
|
||||
def launchd_stop():
|
||||
label = get_launchd_label()
|
||||
subprocess.run(["launchctl", "kill", "SIGTERM", f"{_launchd_domain()}/{label}"], check=True, timeout=30)
|
||||
target = f"{_launchd_domain()}/{label}"
|
||||
# bootout unloads the service definition so KeepAlive doesn't respawn
|
||||
# the process. A plain `kill SIGTERM` only signals the process — launchd
|
||||
# immediately restarts it because KeepAlive.SuccessfulExit = false.
|
||||
# `hermes gateway start` re-bootstraps when it detects the job is unloaded.
|
||||
try:
|
||||
subprocess.run(["launchctl", "bootout", target], check=True, timeout=90)
|
||||
except subprocess.CalledProcessError as e:
|
||||
if e.returncode in (3, 113):
|
||||
pass # Already unloaded — nothing to stop.
|
||||
else:
|
||||
raise
|
||||
_wait_for_gateway_exit(timeout=10.0, force_after=5.0)
|
||||
print("✓ Service stopped")
|
||||
|
||||
def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0):
|
||||
|
|
|
|||
|
|
@ -234,6 +234,63 @@ class TestLaunchdServiceRecovery:
|
|||
["launchctl", "kickstart", target],
|
||||
]
|
||||
|
||||
def test_launchd_stop_uses_bootout_not_kill(self, monkeypatch):
|
||||
"""launchd_stop must bootout the service so KeepAlive doesn't respawn it."""
|
||||
label = gateway_cli.get_launchd_label()
|
||||
domain = gateway_cli._launchd_domain()
|
||||
target = f"{domain}/{label}"
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_run(cmd, check=False, **kwargs):
|
||||
calls.append(cmd)
|
||||
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
||||
monkeypatch.setattr(gateway_cli, "_wait_for_gateway_exit", lambda **kw: None)
|
||||
|
||||
gateway_cli.launchd_stop()
|
||||
|
||||
assert calls == [["launchctl", "bootout", target]]
|
||||
|
||||
def test_launchd_stop_tolerates_already_unloaded(self, monkeypatch, capsys):
|
||||
"""launchd_stop silently handles exit codes 3/113 (job not loaded)."""
|
||||
label = gateway_cli.get_launchd_label()
|
||||
domain = gateway_cli._launchd_domain()
|
||||
target = f"{domain}/{label}"
|
||||
|
||||
def fake_run(cmd, check=False, **kwargs):
|
||||
if "bootout" in cmd:
|
||||
raise gateway_cli.subprocess.CalledProcessError(3, cmd, stderr="Could not find service")
|
||||
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
||||
monkeypatch.setattr(gateway_cli, "_wait_for_gateway_exit", lambda **kw: None)
|
||||
|
||||
# Should not raise — exit code 3 means already unloaded
|
||||
gateway_cli.launchd_stop()
|
||||
|
||||
output = capsys.readouterr().out
|
||||
assert "stopped" in output.lower()
|
||||
|
||||
def test_launchd_stop_waits_for_process_exit(self, monkeypatch):
|
||||
"""launchd_stop calls _wait_for_gateway_exit after bootout."""
|
||||
wait_called = []
|
||||
|
||||
def fake_run(cmd, check=False, **kwargs):
|
||||
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
||||
|
||||
def fake_wait(**kwargs):
|
||||
wait_called.append(kwargs)
|
||||
|
||||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
||||
monkeypatch.setattr(gateway_cli, "_wait_for_gateway_exit", fake_wait)
|
||||
|
||||
gateway_cli.launchd_stop()
|
||||
|
||||
assert len(wait_called) == 1
|
||||
assert wait_called[0] == {"timeout": 10.0, "force_after": 5.0}
|
||||
|
||||
def test_launchd_status_reports_local_stale_plist_when_unloaded(self, tmp_path, monkeypatch, capsys):
|
||||
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
||||
plist_path.write_text("<plist>old content</plist>", encoding="utf-8")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue