mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
fix: refresh systemd unit on gateway boot (not just start/restart) (#19684)
The resilient restart settings from PR #18639 only took effect when the gateway was started via `hermes gateway start` or `hermes gateway restart` — both of which call refresh_systemd_unit_if_needed() which writes the new unit and runs daemon-reload. However, when the gateway self-restarts via exit-code-75 (stale-code detection after `hermes update`, or the /restart command), systemd respawns the process directly without going through any CLI function. The unit file on disk stays stale, and systemd keeps using the old cached settings (StartLimitBurst=5, RestartSec=30) until someone manually runs `hermes gateway restart`. This meant that after PR #18639 was deployed, users who never ran `hermes gateway restart` manually were still vulnerable to the permanent-death-on-network-outage bug. Fix: call refresh_systemd_unit_if_needed() at the top of run_gateway() (the foreground entry point that systemd's ExecStart invokes). This ensures that on every boot — whether triggered by systemd restart, exit-75 respawn, or manual foreground run — the unit definition and daemon state are current. The call is best-effort (exceptions caught) and a no-op when the unit is already current (one stat + string compare).
This commit is contained in:
parent
33f554d83c
commit
af6f9bc2a1
2 changed files with 48 additions and 0 deletions
|
|
@ -107,6 +107,40 @@ class TestSystemdServiceRefresh:
|
|||
]
|
||||
|
||||
|
||||
def test_run_gateway_refreshes_outdated_unit_on_boot(self, tmp_path, monkeypatch):
|
||||
"""run_gateway() should refresh the systemd unit on boot so that
|
||||
restart settings take effect even when the process was respawned
|
||||
via exit-code-75 (bypassing `hermes gateway restart`)."""
|
||||
unit_path = tmp_path / "hermes-gateway.service"
|
||||
unit_path.write_text("old unit\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
|
||||
monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")
|
||||
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_run(cmd, check=True, **kwargs):
|
||||
calls.append(cmd)
|
||||
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
||||
|
||||
# Prevent run_gateway from actually starting the gateway
|
||||
def fake_start_gateway(**kwargs):
|
||||
import asyncio
|
||||
f = asyncio.Future()
|
||||
f.set_result(True)
|
||||
return f
|
||||
|
||||
monkeypatch.setattr("gateway.run.start_gateway", fake_start_gateway)
|
||||
|
||||
gateway_cli.run_gateway()
|
||||
|
||||
assert unit_path.read_text(encoding="utf-8") == "new unit\n"
|
||||
assert ["systemctl", "--user", "daemon-reload"] in calls
|
||||
|
||||
|
||||
class TestGeneratedSystemdUnits:
|
||||
def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self):
|
||||
unit = gateway_cli.generate_systemd_unit(system=False)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue