From 4d18717b6c798d4f6bab9e736c6ed10c5a8365f4 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 8 Jun 2026 00:20:08 -0700 Subject: [PATCH] fix(gateway): drop --replace from systemd unit templates (#41892) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Under systemd's Restart=always, --replace turns every restart into a self-kill loop: the new instance reads gateway.pid, kills the previous process, writes its own PID, and on the next restart the cycle repeats. A process supervisor owns the lifecycle — --replace is for manual one-shot takeovers and fights the supervisor. Remove --replace from both the system-level and user-level systemd ExecStart lines. The --replace flag stays available for manual 'hermes gateway run --replace' and on the macOS launchd fallback path (#23387), which is a deliberate manual takeover, not a supervised unit. Also drop RestartMaxDelaySec / RestartSteps from the templates — they require systemd v255+ and are silently ignored on older versions. The _strip_optional_systemd_directives normalizer stays so existing installs whose on-disk unit still carries those directives aren't flagged as outdated. Credit: reported and diagnosed by @Skippy-the-Magnificent-one (PR #37145); reimplemented here under project authorship because the original commit was authored under a non-existent email. --- hermes_cli/gateway.py | 8 ++------ tests/hermes_cli/test_gateway_service.py | 7 ++++++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 335505a1e1c..5ff74259185 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -2409,7 +2409,7 @@ StartLimitIntervalSec=0 Type=simple User={username} Group={group_name} -ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run --replace +ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run WorkingDirectory={working_dir} Environment="HOME={home_dir}" Environment="USER={username}" @@ -2419,8 +2419,6 @@ Environment="VIRTUAL_ENV={venv_dir}" Environment="HERMES_HOME={hermes_home}" Restart=always RestartSec=5 -RestartMaxDelaySec=300 -RestartSteps=5 RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE} KillMode=mixed KillSignal=SIGTERM @@ -2447,15 +2445,13 @@ StartLimitIntervalSec=0 [Service] Type=simple -ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run --replace +ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run WorkingDirectory={working_dir} Environment="PATH={sane_path}" Environment="VIRTUAL_ENV={venv_dir}" Environment="HERMES_HOME={hermes_home}" Restart=always RestartSec=5 -RestartMaxDelaySec=300 -RestartSteps=5 RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE} KillMode=mixed KillSignal=SIGTERM diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index 18e89fa408d..0b897af01f8 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -1772,7 +1772,12 @@ class TestProfileArg: monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: profile_dir) unit = gateway_cli.generate_systemd_unit(system=False) assert "--profile mybot" in unit - assert "gateway run --replace" in unit + assert "gateway run" in unit + # Under a process supervisor (Restart=always), --replace makes each + # restart kill its predecessor → self-kill loop. The systemd unit must + # NOT use --replace; the supervisor owns the lifecycle. (--replace stays + # on the manual launchd fallback path — see test_launchd_plist_includes_profile.) + assert "--replace" not in unit def test_launchd_plist_includes_profile(self, tmp_path, monkeypatch): """generate_launchd_plist should include --profile in ProgramArguments for named profiles."""