mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-15 04:12:25 +00:00
fix(gateway): also catch restart TimeoutExpired; friendly message
Some checks failed
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Docker Build and Publish / move-latest (push) Blocked by required conditions
Lint (ruff + ty) / ruff + ty diff (push) Waiting to run
Lint (ruff + ty) / ruff enforcement (blocking) (push) Waiting to run
Lint (ruff + ty) / Windows footguns (blocking) (push) Waiting to run
Nix / nix (macos-latest) (push) Waiting to run
Nix / nix (ubuntu-latest) (push) Waiting to run
OSV-Scanner / Scan lockfiles (push) Waiting to run
Tests / test (push) Waiting to run
Tests / e2e (push) Waiting to run
uv.lock check / uv lock --check (push) Waiting to run
Build Skills Index / build-index (push) Has been cancelled
Build Skills Index / deploy-with-index (push) Has been cancelled
Some checks failed
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Docker Build and Publish / move-latest (push) Blocked by required conditions
Lint (ruff + ty) / ruff + ty diff (push) Waiting to run
Lint (ruff + ty) / ruff enforcement (blocking) (push) Waiting to run
Lint (ruff + ty) / Windows footguns (blocking) (push) Waiting to run
Nix / nix (macos-latest) (push) Waiting to run
Nix / nix (ubuntu-latest) (push) Waiting to run
OSV-Scanner / Scan lockfiles (push) Waiting to run
Tests / test (push) Waiting to run
Tests / e2e (push) Waiting to run
uv.lock check / uv lock --check (push) Waiting to run
Build Skills Index / build-index (push) Has been cancelled
Build Skills Index / deploy-with-index (push) Has been cancelled
Extends #19994 to the restart path. Dashboard spawns 'hermes gateway restart' in the background; when a wedged adapter websocket pushes drain past the 90s CLI timeout, the dashboard previously surfaced a raw subprocess.TimeoutExpired traceback. Mirror systemd_stop()'s TimeoutExpired catch onto both forcing-restart sites in systemd_restart(). Adds a test that exercises the no-active-pid branch end-to-end.
This commit is contained in:
parent
dccf1fb6e0
commit
78b0008f44
2 changed files with 53 additions and 0 deletions
|
|
@ -2456,6 +2456,13 @@ def systemd_restart(system: bool = False):
|
||||||
_print_systemd_start_limit_wait(system=system)
|
_print_systemd_start_limit_wait(system=system)
|
||||||
return
|
return
|
||||||
raise
|
raise
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
label = _service_scope_label(system)
|
||||||
|
print(
|
||||||
|
f"Gateway {label} service is still restarting after 90s; "
|
||||||
|
"check `hermes gateway status` or logs for final state."
|
||||||
|
)
|
||||||
|
return
|
||||||
_wait_for_systemd_service_restart(system=system, previous_pid=pid)
|
_wait_for_systemd_service_restart(system=system, previous_pid=pid)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -2475,6 +2482,13 @@ def systemd_restart(system: bool = False):
|
||||||
_print_systemd_start_limit_wait(system=system)
|
_print_systemd_start_limit_wait(system=system)
|
||||||
return
|
return
|
||||||
raise
|
raise
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
label = _service_scope_label(system)
|
||||||
|
print(
|
||||||
|
f"Gateway {label} service is still restarting after 90s; "
|
||||||
|
"check `hermes gateway status` or logs for final state."
|
||||||
|
)
|
||||||
|
return
|
||||||
_wait_for_systemd_service_restart(system=system, previous_pid=pid)
|
_wait_for_systemd_service_restart(system=system, previous_pid=pid)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -164,6 +164,45 @@ class TestSystemdServiceRefresh:
|
||||||
assert "still stopping after 90s" in output
|
assert "still stopping after 90s" in output
|
||||||
assert "hermes gateway status" in output
|
assert "hermes gateway status" in output
|
||||||
|
|
||||||
|
def test_systemd_restart_timeout_prints_status_guidance(self, monkeypatch, capsys):
|
||||||
|
"""`hermes gateway restart` must not surface a raw TimeoutExpired traceback.
|
||||||
|
|
||||||
|
The dashboard spawns `hermes gateway restart` in the background; when a
|
||||||
|
wedged adapter websocket pushes drain past the 90s CLI timeout, the
|
||||||
|
dashboard would previously show a Python traceback (issue #19937
|
||||||
|
follow-up: the same failure mode applies to restart, not just stop).
|
||||||
|
"""
|
||||||
|
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||||
|
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
|
||||||
|
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda: None)
|
||||||
|
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
|
||||||
|
monkeypatch.setattr(status, "get_running_pid", lambda cleanup_stale=True: None)
|
||||||
|
monkeypatch.setattr(gateway_cli, "_systemd_main_pid", lambda system=False: None)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
gateway_cli,
|
||||||
|
"_recover_pending_systemd_restart",
|
||||||
|
lambda system=False, previous_pid=None: False,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
gateway_cli,
|
||||||
|
"_systemd_service_is_start_limited",
|
||||||
|
lambda system=False: False,
|
||||||
|
)
|
||||||
|
|
||||||
|
def fake_run_systemctl(args, **kwargs):
|
||||||
|
# reset-failed is a pre-step (check=False, 30s) — let it pass.
|
||||||
|
if args and args[0] == "reset-failed":
|
||||||
|
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
||||||
|
raise subprocess.TimeoutExpired(args, kwargs.get("timeout"))
|
||||||
|
|
||||||
|
monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
|
||||||
|
|
||||||
|
gateway_cli.systemd_restart()
|
||||||
|
|
||||||
|
output = capsys.readouterr().out
|
||||||
|
assert "still restarting after 90s" in output
|
||||||
|
assert "hermes gateway status" in output
|
||||||
|
|
||||||
def test_run_gateway_refreshes_outdated_unit_on_boot(self, tmp_path, monkeypatch):
|
def test_run_gateway_refreshes_outdated_unit_on_boot(self, tmp_path, monkeypatch):
|
||||||
"""run_gateway() should refresh the systemd unit on boot so that
|
"""run_gateway() should refresh the systemd unit on boot so that
|
||||||
restart settings take effect even when the process was respawned
|
restart settings take effect even when the process was respawned
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue