mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: prevent systemd restart storm on gateway connection failure
Cherry-picked from PR #2319 by @itenev. When the gateway fails to connect (e.g. PrivilegedIntentsRequired, missing token), systemd's default RestartSec=10 with no start rate limit causes rapid reconnect storms flooding logs and triggering platform-side rate limits. - StartLimitIntervalSec=600 + StartLimitBurst=5 in [Unit] (max 5 restarts per 10 min) - RestartSec: 10 → 30 - Applied to both templates in gateway.py and scripts/hermes-gateway
This commit is contained in:
parent
3ba6043c62
commit
326b146d68
2 changed files with 9 additions and 3 deletions
|
|
@ -420,6 +420,8 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
|
|||
Description={SERVICE_DESCRIPTION}
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
StartLimitIntervalSec=600
|
||||
StartLimitBurst=5
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
|
|
@ -434,7 +436,7 @@ Environment="PATH={sane_path}"
|
|||
Environment="VIRTUAL_ENV={venv_dir}"
|
||||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
RestartSec=30
|
||||
KillMode=mixed
|
||||
KillSignal=SIGTERM
|
||||
TimeoutStopSec=60
|
||||
|
|
@ -448,6 +450,8 @@ WantedBy=multi-user.target
|
|||
return f"""[Unit]
|
||||
Description={SERVICE_DESCRIPTION}
|
||||
After=network.target
|
||||
StartLimitIntervalSec=600
|
||||
StartLimitBurst=5
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
|
|
@ -457,7 +461,7 @@ Environment="PATH={sane_path}"
|
|||
Environment="VIRTUAL_ENV={venv_dir}"
|
||||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
RestartSec=30
|
||||
KillMode=mixed
|
||||
KillSignal=SIGTERM
|
||||
TimeoutStopSec=60
|
||||
|
|
|
|||
|
|
@ -82,13 +82,15 @@ def generate_systemd_unit() -> str:
|
|||
return f"""[Unit]
|
||||
Description={SERVICE_DESCRIPTION}
|
||||
After=network.target
|
||||
StartLimitIntervalSec=600
|
||||
StartLimitBurst=5
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart={python_path} {script_path} run
|
||||
WorkingDirectory={working_dir}
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
RestartSec=30
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue