mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-26 01:01:40 +00:00
fix(gateway): recover stale pid and planned restart state
This commit is contained in:
parent
284e084bcc
commit
b52123eb15
7 changed files with 646 additions and 79 deletions
|
|
@ -77,8 +77,10 @@ class TestSystemdServiceRefresh:
|
|||
gateway_cli.systemd_restart()
|
||||
|
||||
assert unit_path.read_text(encoding="utf-8") == "new unit\n"
|
||||
assert calls[:2] == [
|
||||
assert calls[:4] == [
|
||||
["systemctl", "--user", "daemon-reload"],
|
||||
["systemctl", "--user", "show", gateway_cli.get_service_name(), "--no-pager", "--property", "ActiveState,SubState,Result,ExecMainStatus"],
|
||||
["systemctl", "--user", "reset-failed", gateway_cli.get_service_name()],
|
||||
["systemctl", "--user", "reload-or-restart", gateway_cli.get_service_name()],
|
||||
]
|
||||
|
||||
|
|
@ -474,13 +476,21 @@ class TestGatewaySystemServiceRouting:
|
|||
raise ProcessLookupError()
|
||||
monkeypatch.setattr(os, "kill", fake_kill)
|
||||
|
||||
# Simulate systemctl is-active returning "active" with a new PID
|
||||
# Simulate systemctl reset-failed/start followed by an active unit
|
||||
new_pid = [None]
|
||||
def fake_subprocess_run(cmd, **kwargs):
|
||||
if "is-active" in cmd:
|
||||
result = SimpleNamespace(stdout="active\n", returncode=0)
|
||||
new_pid[0] = 999 # new PID
|
||||
return result
|
||||
if "reset-failed" in cmd:
|
||||
calls.append(("reset-failed", cmd))
|
||||
return SimpleNamespace(stdout="", returncode=0)
|
||||
if "start" in cmd:
|
||||
calls.append(("start", cmd))
|
||||
return SimpleNamespace(stdout="", returncode=0)
|
||||
if "show" in cmd:
|
||||
new_pid[0] = 999
|
||||
return SimpleNamespace(
|
||||
stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
|
||||
returncode=0,
|
||||
)
|
||||
raise AssertionError(f"Unexpected systemctl call: {cmd}")
|
||||
|
||||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_subprocess_run)
|
||||
|
|
@ -494,9 +504,131 @@ class TestGatewaySystemServiceRouting:
|
|||
gateway_cli.systemd_restart()
|
||||
|
||||
assert ("self", 654) in calls
|
||||
assert any(call[0] == "reset-failed" for call in calls)
|
||||
assert any(call[0] == "start" for call in calls)
|
||||
out = capsys.readouterr().out.lower()
|
||||
assert "restarted" in out
|
||||
|
||||
def test_systemd_restart_recovers_failed_planned_restart(self, monkeypatch, capsys):
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.read_runtime_status",
|
||||
lambda: {"restart_requested": True, "gateway_state": "stopped"},
|
||||
)
|
||||
monkeypatch.setattr(gateway_cli, "_request_gateway_self_restart", lambda pid: False)
|
||||
|
||||
calls = []
|
||||
started = {"value": False}
|
||||
|
||||
def fake_subprocess_run(cmd, **kwargs):
|
||||
if "show" in cmd:
|
||||
if not started["value"]:
|
||||
return SimpleNamespace(
|
||||
stdout=(
|
||||
"ActiveState=failed\n"
|
||||
"SubState=failed\n"
|
||||
"Result=exit-code\n"
|
||||
f"ExecMainStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}\n"
|
||||
),
|
||||
returncode=0,
|
||||
)
|
||||
return SimpleNamespace(
|
||||
stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
|
||||
returncode=0,
|
||||
)
|
||||
if "reset-failed" in cmd:
|
||||
calls.append(("reset-failed", cmd))
|
||||
return SimpleNamespace(stdout="", returncode=0)
|
||||
if "start" in cmd:
|
||||
started["value"] = True
|
||||
calls.append(("start", cmd))
|
||||
return SimpleNamespace(stdout="", returncode=0)
|
||||
raise AssertionError(f"Unexpected command: {cmd}")
|
||||
|
||||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_subprocess_run)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.get_running_pid",
|
||||
lambda: 999 if started["value"] else None,
|
||||
)
|
||||
|
||||
gateway_cli.systemd_restart()
|
||||
|
||||
assert any(call[0] == "reset-failed" for call in calls)
|
||||
assert any(call[0] == "start" for call in calls)
|
||||
out = capsys.readouterr().out.lower()
|
||||
assert "restarted" in out
|
||||
|
||||
def test_systemd_status_surfaces_planned_restart_failure(self, monkeypatch, capsys):
|
||||
unit = SimpleNamespace(exists=lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit)
|
||||
monkeypatch.setattr(gateway_cli, "has_conflicting_systemd_units", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "has_legacy_hermes_units", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "systemd_unit_is_current", lambda system=False: True)
|
||||
monkeypatch.setattr(gateway_cli, "_runtime_health_lines", lambda: ["⚠ Last shutdown reason: Gateway restart requested"])
|
||||
monkeypatch.setattr(gateway_cli, "get_systemd_linger_status", lambda: (True, ""))
|
||||
monkeypatch.setattr(gateway_cli, "_read_systemd_unit_properties", lambda system=False: {
|
||||
"ActiveState": "failed",
|
||||
"SubState": "failed",
|
||||
"Result": "exit-code",
|
||||
"ExecMainStatus": str(GATEWAY_SERVICE_RESTART_EXIT_CODE),
|
||||
})
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_run_systemctl(args, **kwargs):
|
||||
calls.append(args)
|
||||
if args[:2] == ["status", gateway_cli.get_service_name()]:
|
||||
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
||||
if args[:2] == ["is-active", gateway_cli.get_service_name()]:
|
||||
return SimpleNamespace(returncode=3, stdout="failed\n", stderr="")
|
||||
raise AssertionError(f"Unexpected args: {args}")
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
|
||||
|
||||
gateway_cli.systemd_status()
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "Planned restart is stuck in systemd failed state" in out
|
||||
|
||||
def test_gateway_status_dispatches_full_flag(self, monkeypatch):
|
||||
user_unit = SimpleNamespace(exists=lambda: True)
|
||||
system_unit = SimpleNamespace(exists=lambda: False)
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"get_systemd_unit_path",
|
||||
lambda system=False: system_unit if system else user_unit,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"get_gateway_runtime_snapshot",
|
||||
lambda system=False: gateway_cli.GatewayRuntimeSnapshot(
|
||||
manager="systemd (user)",
|
||||
service_installed=True,
|
||||
service_running=False,
|
||||
gateway_pids=(),
|
||||
service_scope="user",
|
||||
),
|
||||
)
|
||||
|
||||
calls = []
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"systemd_status",
|
||||
lambda deep=False, system=False, full=False: calls.append((deep, system, full)),
|
||||
)
|
||||
|
||||
gateway_cli.gateway_command(
|
||||
SimpleNamespace(gateway_command="status", deep=False, system=False, full=True)
|
||||
)
|
||||
|
||||
assert calls == [(False, False, True)]
|
||||
|
||||
def test_gateway_install_passes_system_flags(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
||||
|
|
@ -547,11 +679,15 @@ class TestGatewaySystemServiceRouting:
|
|||
)
|
||||
|
||||
calls = []
|
||||
monkeypatch.setattr(gateway_cli, "systemd_status", lambda deep=False, system=False: calls.append((deep, system)))
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"systemd_status",
|
||||
lambda deep=False, system=False, full=False: calls.append((deep, system, full)),
|
||||
)
|
||||
|
||||
gateway_cli.gateway_command(SimpleNamespace(gateway_command="status", deep=False, system=False))
|
||||
|
||||
assert calls == [(False, False)]
|
||||
assert calls == [(False, False, False)]
|
||||
|
||||
def test_gateway_status_reports_manual_process_when_service_is_stopped(self, monkeypatch, capsys):
|
||||
user_unit = SimpleNamespace(exists=lambda: True)
|
||||
|
|
@ -565,7 +701,11 @@ class TestGatewaySystemServiceRouting:
|
|||
"get_systemd_unit_path",
|
||||
lambda system=False: system_unit if system else user_unit,
|
||||
)
|
||||
monkeypatch.setattr(gateway_cli, "systemd_status", lambda deep=False, system=False: print("service stopped"))
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"systemd_status",
|
||||
lambda deep=False, system=False, full=False: print("service stopped"),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"get_gateway_runtime_snapshot",
|
||||
|
|
@ -1570,6 +1710,23 @@ class TestMigrateLegacyCommand:
|
|||
|
||||
assert called == {"interactive": False, "dry_run": False}
|
||||
|
||||
|
||||
class TestGatewayStatusParser:
|
||||
def test_gateway_status_subparser_accepts_full_flag(self):
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "hermes_cli.main", "gateway", "status", "-l", "--help"],
|
||||
cwd=str(gateway_cli.PROJECT_ROOT),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "unrecognized arguments" not in result.stderr
|
||||
|
||||
def test_gateway_command_migrate_legacy_dry_run_passes_through(
|
||||
self, monkeypatch
|
||||
):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue