mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-13 03:52:00 +00:00
fix(gateway): wait for systemd restart readiness
This commit is contained in:
parent
3cdbf334d5
commit
d797755a1c
4 changed files with 587 additions and 78 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
|
@ -70,6 +71,15 @@ import gateway.platforms.discord as discord_platform # noqa: E402
|
|||
from gateway.platforms.discord import DiscordAdapter # noqa: E402
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _speed_up_command_sync_mutation_pacing(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
DiscordAdapter,
|
||||
"_command_sync_mutation_interval_seconds",
|
||||
lambda self: 0.0,
|
||||
)
|
||||
|
||||
|
||||
class FakeTree:
|
||||
def __init__(self):
|
||||
self.sync = AsyncMock(return_value=[])
|
||||
|
|
@ -536,6 +546,136 @@ async def test_post_connect_initialization_skips_sync_when_policy_off(monkeypatc
|
|||
fake_tree.sync.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_post_connect_initialization_skips_same_fingerprint_after_success(tmp_path, monkeypatch):
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
monkeypatch.setattr("hermes_constants.get_hermes_home", lambda: tmp_path)
|
||||
|
||||
class _DesiredCommand:
|
||||
def to_dict(self, tree):
|
||||
return {
|
||||
"name": "status",
|
||||
"description": "Show Hermes status",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
}
|
||||
|
||||
fake_tree = SimpleNamespace(
|
||||
get_commands=lambda: [_DesiredCommand()],
|
||||
fetch_commands=AsyncMock(return_value=[]),
|
||||
)
|
||||
fake_http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=fake_tree,
|
||||
http=fake_http,
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
|
||||
await adapter._run_post_connect_initialization()
|
||||
await adapter._run_post_connect_initialization()
|
||||
|
||||
fake_tree.fetch_commands.assert_awaited_once()
|
||||
fake_http.upsert_global_command.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_post_connect_initialization_respects_discord_retry_after(tmp_path, monkeypatch):
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
monkeypatch.setattr("hermes_constants.get_hermes_home", lambda: tmp_path)
|
||||
|
||||
class _DesiredCommand:
|
||||
def to_dict(self, tree):
|
||||
return {
|
||||
"name": "status",
|
||||
"description": "Show Hermes status",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
}
|
||||
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=SimpleNamespace(get_commands=lambda: [_DesiredCommand()]),
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
class _DiscordRateLimit(RuntimeError):
|
||||
retry_after = 123.0
|
||||
|
||||
sync = AsyncMock(side_effect=_DiscordRateLimit("discord rate limited"))
|
||||
monkeypatch.setattr(adapter, "_safe_sync_slash_commands", sync)
|
||||
|
||||
await adapter._run_post_connect_initialization()
|
||||
await adapter._run_post_connect_initialization()
|
||||
|
||||
sync.assert_awaited_once()
|
||||
state = json.loads((tmp_path / discord_platform._DISCORD_COMMAND_SYNC_STATE_FILE).read_text())
|
||||
entry = state["999"]
|
||||
assert entry["retry_after"] == 123.0
|
||||
assert entry["retry_after_until"] > entry["last_attempt_at"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_slash_commands_paces_mutation_writes(monkeypatch):
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
monkeypatch.setattr(
|
||||
DiscordAdapter,
|
||||
"_command_sync_mutation_interval_seconds",
|
||||
lambda self: 1.25,
|
||||
)
|
||||
sleeps = []
|
||||
|
||||
async def fake_sleep(delay):
|
||||
sleeps.append(delay)
|
||||
|
||||
monkeypatch.setattr(discord_platform.asyncio, "sleep", fake_sleep)
|
||||
|
||||
class _DesiredCommand:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self, tree):
|
||||
assert tree is not None
|
||||
return dict(self._payload)
|
||||
|
||||
desired_one = {
|
||||
"name": "status",
|
||||
"description": "Show Hermes status",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
}
|
||||
desired_two = {
|
||||
"name": "debug",
|
||||
"description": "Generate a debug report",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
}
|
||||
fake_tree = SimpleNamespace(
|
||||
get_commands=lambda: [_DesiredCommand(desired_one), _DesiredCommand(desired_two)],
|
||||
fetch_commands=AsyncMock(return_value=[]),
|
||||
)
|
||||
fake_http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=fake_tree,
|
||||
http=fake_http,
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
|
||||
summary = await adapter._safe_sync_slash_commands()
|
||||
|
||||
assert summary["created"] == 2
|
||||
assert fake_http.upsert_global_command.await_count == 2
|
||||
assert sleeps == [1.25]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_reads_permission_attrs_from_existing_command():
|
||||
"""Regression: AppCommand.to_dict() in discord.py does NOT include
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import os
|
||||
import pwd
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
|
|
@ -90,6 +91,13 @@ class TestSystemdServiceRefresh:
|
|||
monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")
|
||||
|
||||
calls = []
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
|
||||
monkeypatch.setattr(gateway_cli, "_recover_pending_systemd_restart", lambda system=False, previous_pid=None: False)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_wait_for_systemd_service_restart",
|
||||
lambda system=False, previous_pid=None: calls.append(("wait", system, previous_pid)) or True,
|
||||
)
|
||||
|
||||
def fake_run(cmd, check=True, **kwargs):
|
||||
calls.append(cmd)
|
||||
|
|
@ -100,11 +108,12 @@ class TestSystemdServiceRefresh:
|
|||
gateway_cli.systemd_restart()
|
||||
|
||||
assert unit_path.read_text(encoding="utf-8") == "new unit\n"
|
||||
assert calls[:4] == [
|
||||
assert calls[:5] == [
|
||||
["systemctl", "--user", "daemon-reload"],
|
||||
["systemctl", "--user", "show", gateway_cli.get_service_name(), "--no-pager", "--property", "ActiveState,SubState,Result,ExecMainStatus"],
|
||||
["systemctl", "--user", "show", gateway_cli.get_service_name(), "--no-pager", "--property", "ActiveState,SubState,Result,ExecMainStatus,MainPID"],
|
||||
["systemctl", "--user", "reset-failed", gateway_cli.get_service_name()],
|
||||
["systemctl", "--user", "reload-or-restart", gateway_cli.get_service_name()],
|
||||
["systemctl", "--user", "restart", gateway_cli.get_service_name()],
|
||||
("wait", False, None),
|
||||
]
|
||||
|
||||
def test_systemd_stop_marks_running_gateway_as_planned_stop(self, monkeypatch):
|
||||
|
|
@ -611,62 +620,141 @@ class TestGatewayServiceDetection:
|
|||
assert gateway_cli._is_service_running() is False
|
||||
|
||||
class TestGatewaySystemServiceRouting:
|
||||
def test_systemd_restart_self_requests_graceful_restart_and_waits(self, monkeypatch, capsys):
|
||||
def test_systemd_restart_gracefully_restarts_running_service_and_waits(self, monkeypatch, capsys):
|
||||
calls = []
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
|
||||
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: calls.append(("refresh", system)))
|
||||
monkeypatch.setattr(gateway_cli, "_get_restart_drain_timeout", lambda: 12.0)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.get_running_pid",
|
||||
lambda: 654,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_request_gateway_self_restart",
|
||||
lambda pid: calls.append(("self", pid)) or True,
|
||||
"_graceful_restart_via_sigusr1",
|
||||
lambda pid, timeout: calls.append(("graceful", pid, timeout)) or True,
|
||||
)
|
||||
|
||||
# Simulate: old process dies immediately, new process becomes active
|
||||
kill_call_count = [0]
|
||||
def fake_kill(pid, sig):
|
||||
kill_call_count[0] += 1
|
||||
if kill_call_count[0] >= 2: # first call checks, second = dead
|
||||
raise ProcessLookupError()
|
||||
monkeypatch.setattr(os, "kill", fake_kill)
|
||||
|
||||
# Simulate systemctl reset-failed/start followed by an active unit
|
||||
new_pid = [None]
|
||||
# Simulate systemctl reset-failed/restart followed by an active unit.
|
||||
# A plain start does not break systemd's auto-restart timer once the
|
||||
# old gateway has exited with the planned restart code.
|
||||
def fake_subprocess_run(cmd, **kwargs):
|
||||
if "reset-failed" in cmd:
|
||||
calls.append(("reset-failed", cmd))
|
||||
return SimpleNamespace(stdout="", returncode=0)
|
||||
if "start" in cmd:
|
||||
calls.append(("start", cmd))
|
||||
if "restart" in cmd:
|
||||
calls.append(("restart", cmd))
|
||||
return SimpleNamespace(stdout="", returncode=0)
|
||||
if "show" in cmd:
|
||||
new_pid[0] = 999
|
||||
return SimpleNamespace(
|
||||
stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
|
||||
returncode=0,
|
||||
)
|
||||
raise AssertionError(f"Unexpected systemctl call: {cmd}")
|
||||
|
||||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_subprocess_run)
|
||||
# get_running_pid returns new PID after restart
|
||||
pid_calls = [0]
|
||||
def fake_get_pid():
|
||||
pid_calls[0] += 1
|
||||
return 999 if pid_calls[0] > 1 else 654
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", fake_get_pid)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_wait_for_systemd_service_restart",
|
||||
lambda system=False, previous_pid=None: calls.append(("wait", system, previous_pid)) or True,
|
||||
)
|
||||
|
||||
gateway_cli.systemd_restart()
|
||||
|
||||
assert ("self", 654) in calls
|
||||
assert ("graceful", 654, 17.0) in calls
|
||||
assert any(call[0] == "reset-failed" for call in calls)
|
||||
assert any(call[0] == "start" for call in calls)
|
||||
assert any(call[0] == "restart" for call in calls)
|
||||
assert ("wait", False, 654) in calls
|
||||
out = capsys.readouterr().out.lower()
|
||||
assert "restarted" in out
|
||||
assert "restarting gracefully" in out
|
||||
|
||||
def test_systemd_restart_uses_systemd_main_pid_when_pid_file_is_missing(self, monkeypatch, capsys):
|
||||
calls = []
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
|
||||
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
|
||||
monkeypatch.setattr(gateway_cli, "_get_restart_drain_timeout", lambda: 10.0)
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_read_systemd_unit_properties",
|
||||
lambda system=False: {
|
||||
"ActiveState": "active",
|
||||
"SubState": "running",
|
||||
"Result": "success",
|
||||
"ExecMainStatus": "0",
|
||||
"MainPID": "777",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_graceful_restart_via_sigusr1",
|
||||
lambda pid, timeout: calls.append(("graceful", pid, timeout)) or True,
|
||||
)
|
||||
monkeypatch.setattr(gateway_cli, "_run_systemctl", lambda args, **kwargs: calls.append(args) or SimpleNamespace(stdout="", returncode=0))
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_wait_for_systemd_service_restart",
|
||||
lambda system=False, previous_pid=None: calls.append(("wait", system, previous_pid)) or True,
|
||||
)
|
||||
|
||||
gateway_cli.systemd_restart()
|
||||
|
||||
assert ("graceful", 777, 15.0) in calls
|
||||
assert ("wait", False, 777) in calls
|
||||
assert "restarting gracefully (pid 777)" in capsys.readouterr().out.lower()
|
||||
|
||||
def test_wait_for_systemd_restart_waits_for_runtime_running(self, monkeypatch, capsys):
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_read_systemd_unit_properties",
|
||||
lambda system=False: {
|
||||
"ActiveState": "active",
|
||||
"SubState": "running",
|
||||
"Result": "success",
|
||||
"ExecMainStatus": "0",
|
||||
"MainPID": "999",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_gateway_runtime_status_for_pid",
|
||||
lambda pid: {"pid": pid, "gateway_state": "running"},
|
||||
)
|
||||
|
||||
assert gateway_cli._wait_for_systemd_service_restart(previous_pid=777, timeout=0.1) is True
|
||||
assert "restarted (pid 999)" in capsys.readouterr().out.lower()
|
||||
|
||||
def test_systemd_restart_reports_start_limit_hit(self, monkeypatch, capsys):
|
||||
calls = []
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
|
||||
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
|
||||
monkeypatch.setattr(gateway_cli, "_recover_pending_systemd_restart", lambda system=False, previous_pid=None: False)
|
||||
|
||||
def fake_run_systemctl(args, **kwargs):
|
||||
calls.append(args)
|
||||
if args[0] == "show":
|
||||
return SimpleNamespace(stdout="ActiveState=inactive\nSubState=dead\nResult=success\nExecMainStatus=0\nMainPID=0\n", stderr="", returncode=0)
|
||||
if args[0] == "reset-failed":
|
||||
return SimpleNamespace(stdout="", stderr="", returncode=0)
|
||||
if args[0] == "restart":
|
||||
raise subprocess.CalledProcessError(
|
||||
1,
|
||||
["systemctl", "--user", *args],
|
||||
stderr="Job failed. See result 'start-limit-hit'.",
|
||||
)
|
||||
raise AssertionError(f"Unexpected args: {args}")
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
|
||||
|
||||
gateway_cli.systemd_restart()
|
||||
|
||||
assert ["restart", gateway_cli.get_service_name()] in calls
|
||||
out = capsys.readouterr().out.lower()
|
||||
assert "rate-limited by systemd" in out
|
||||
assert "reset-failed" in out
|
||||
|
||||
def test_systemd_restart_recovers_failed_planned_restart(self, monkeypatch, capsys):
|
||||
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
|
||||
|
|
@ -711,6 +799,11 @@ class TestGatewaySystemServiceRouting:
|
|||
"gateway.status.get_running_pid",
|
||||
lambda: 999 if started["value"] else None,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"_gateway_runtime_status_for_pid",
|
||||
lambda pid: {"pid": pid, "gateway_state": "running"},
|
||||
)
|
||||
|
||||
gateway_cli.systemd_restart()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue