From 326b146d68bcd58c777a7c71808dbf6e58566617 Mon Sep 17 00:00:00 2001 From: Teknium Date: Sat, 21 Mar 2026 09:26:39 -0700 Subject: [PATCH] fix: prevent systemd restart storm on gateway connection failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cherry-picked from PR #2319 by @itenev. When the gateway fails to connect (e.g. PrivilegedIntentsRequired, missing token), systemd's default RestartSec=10 with no start rate limit causes rapid reconnect storms flooding logs and triggering platform-side rate limits. - StartLimitIntervalSec=600 + StartLimitBurst=5 in [Unit] (max 5 restarts per 10 min) - RestartSec: 10 → 30 - Applied to both templates in gateway.py and scripts/hermes-gateway --- hermes_cli/gateway.py | 8 ++++++-- scripts/hermes-gateway | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index fb2de2d10..c3315f8d0 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -420,6 +420,8 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) Description={SERVICE_DESCRIPTION} After=network-online.target Wants=network-online.target +StartLimitIntervalSec=600 +StartLimitBurst=5 [Service] Type=simple @@ -434,7 +436,7 @@ Environment="PATH={sane_path}" Environment="VIRTUAL_ENV={venv_dir}" Environment="HERMES_HOME={hermes_home}" Restart=on-failure -RestartSec=10 +RestartSec=30 KillMode=mixed KillSignal=SIGTERM TimeoutStopSec=60 @@ -448,6 +450,8 @@ WantedBy=multi-user.target return f"""[Unit] Description={SERVICE_DESCRIPTION} After=network.target +StartLimitIntervalSec=600 +StartLimitBurst=5 [Service] Type=simple @@ -457,7 +461,7 @@ Environment="PATH={sane_path}" Environment="VIRTUAL_ENV={venv_dir}" Environment="HERMES_HOME={hermes_home}" Restart=on-failure -RestartSec=10 +RestartSec=30 KillMode=mixed KillSignal=SIGTERM TimeoutStopSec=60 diff --git a/scripts/hermes-gateway b/scripts/hermes-gateway index 59fa1056f..b0d45810e 100755 --- a/scripts/hermes-gateway +++ b/scripts/hermes-gateway @@ -82,13 +82,15 @@ def generate_systemd_unit() -> str: return f"""[Unit] Description={SERVICE_DESCRIPTION} After=network.target +StartLimitIntervalSec=600 +StartLimitBurst=5 [Service] Type=simple ExecStart={python_path} {script_path} run WorkingDirectory={working_dir} Restart=on-failure -RestartSec=10 +RestartSec=30 StandardOutput=journal StandardError=journal