mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-10 03:22:05 +00:00
fix(gateway): also catch restart TimeoutExpired; friendly message
Some checks are pending
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Docker Build and Publish / move-latest (push) Blocked by required conditions
Lint (ruff + ty) / ruff + ty diff (push) Waiting to run
Lint (ruff + ty) / ruff enforcement (blocking) (push) Waiting to run
Lint (ruff + ty) / Windows footguns (blocking) (push) Waiting to run
Nix / nix (macos-latest) (push) Waiting to run
Nix / nix (ubuntu-latest) (push) Waiting to run
OSV-Scanner / Scan lockfiles (push) Waiting to run
Build Skills Index / build-index (push) Waiting to run
Build Skills Index / deploy-with-index (push) Blocked by required conditions
Tests / test (push) Waiting to run
Tests / e2e (push) Waiting to run
uv.lock check / uv lock --check (push) Waiting to run
Some checks are pending
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Docker Build and Publish / move-latest (push) Blocked by required conditions
Lint (ruff + ty) / ruff + ty diff (push) Waiting to run
Lint (ruff + ty) / ruff enforcement (blocking) (push) Waiting to run
Lint (ruff + ty) / Windows footguns (blocking) (push) Waiting to run
Nix / nix (macos-latest) (push) Waiting to run
Nix / nix (ubuntu-latest) (push) Waiting to run
OSV-Scanner / Scan lockfiles (push) Waiting to run
Build Skills Index / build-index (push) Waiting to run
Build Skills Index / deploy-with-index (push) Blocked by required conditions
Tests / test (push) Waiting to run
Tests / e2e (push) Waiting to run
uv.lock check / uv lock --check (push) Waiting to run
Extends #19994 to the restart path. Dashboard spawns 'hermes gateway restart' in the background; when a wedged adapter websocket pushes drain past the 90s CLI timeout, the dashboard previously surfaced a raw subprocess.TimeoutExpired traceback. Mirror systemd_stop()'s TimeoutExpired catch onto both forcing-restart sites in systemd_restart(). Adds a test that exercises the no-active-pid branch end-to-end.
This commit is contained in:
parent
dccf1fb6e0
commit
78b0008f44
2 changed files with 53 additions and 0 deletions
|
|
@ -2456,6 +2456,13 @@ def systemd_restart(system: bool = False):
|
|||
_print_systemd_start_limit_wait(system=system)
|
||||
return
|
||||
raise
|
||||
except subprocess.TimeoutExpired:
|
||||
label = _service_scope_label(system)
|
||||
print(
|
||||
f"Gateway {label} service is still restarting after 90s; "
|
||||
"check `hermes gateway status` or logs for final state."
|
||||
)
|
||||
return
|
||||
_wait_for_systemd_service_restart(system=system, previous_pid=pid)
|
||||
return
|
||||
|
||||
|
|
@ -2475,6 +2482,13 @@ def systemd_restart(system: bool = False):
|
|||
_print_systemd_start_limit_wait(system=system)
|
||||
return
|
||||
raise
|
||||
except subprocess.TimeoutExpired:
|
||||
label = _service_scope_label(system)
|
||||
print(
|
||||
f"Gateway {label} service is still restarting after 90s; "
|
||||
"check `hermes gateway status` or logs for final state."
|
||||
)
|
||||
return
|
||||
_wait_for_systemd_service_restart(system=system, previous_pid=pid)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -164,6 +164,45 @@ class TestSystemdServiceRefresh:
|
|||
assert "still stopping after 90s" in output
|
||||
assert "hermes gateway status" in output
|
||||
|
||||
def test_systemd_restart_timeout_prints_status_guidance(self, monkeypatch, capsys):
|
||||
"""`hermes gateway restart` must not surface a raw TimeoutExpired traceback.
|
||||
|
||||
The dashboard spawns `hermes gateway restart` in the background; when a
|
||||
wedged adapter websocket pushes drain past the 90s CLI timeout, the
|
||||
dashboard would previously show a Python traceback (issue #19937
|
||||
follow-up: the same failure mode applies to restart, not just stop).
|
||||
"""
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
|
||||
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda: None)
|
||||
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
|
||||
monkeypatch.setattr(status, "get_running_pid", lambda cleanup_stale=True: None)
|
||||
monkeypatch.setattr(gateway_cli, "_systemd_main_pid", lambda system=False: None)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_recover_pending_systemd_restart",
|
||||
lambda system=False, previous_pid=None: False,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_systemd_service_is_start_limited",
|
||||
lambda system=False: False,
|
||||
)
|
||||
|
||||
def fake_run_systemctl(args, **kwargs):
|
||||
# reset-failed is a pre-step (check=False, 30s) — let it pass.
|
||||
if args and args[0] == "reset-failed":
|
||||
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
||||
raise subprocess.TimeoutExpired(args, kwargs.get("timeout"))
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
|
||||
|
||||
gateway_cli.systemd_restart()
|
||||
|
||||
output = capsys.readouterr().out
|
||||
assert "still restarting after 90s" in output
|
||||
assert "hermes gateway status" in output
|
||||
|
||||
def test_run_gateway_refreshes_outdated_unit_on_boot(self, tmp_path, monkeypatch):
|
||||
"""run_gateway() should refresh the systemd unit on boot so that
|
||||
restart settings take effect even when the process was respawned
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue