mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
When /update runs via Telegram, hermes update --gateway is spawned inside
the gateway's systemd cgroup. The update process itself calls
systemctl restart hermes-gateway, which tears down the cgroup with
KillMode=mixed — SIGKILL to all remaining processes. The wrapping bash
shell is killed before it can execute the exit-code epilogue, so
.update_exit_code is never created. The new gateway's update watcher
then polls for 30 minutes and sends a spurious timeout message.
Fix: write .update_exit_code from Python inside cmd_update() immediately
after the git pull + pip install succeed ("Update complete!"), before
attempting the gateway restart. The shell epilogue still writes it too
(idempotent overwrite), but now the marker exists even when the process
is killed mid-restart.
917 lines
37 KiB
Python
917 lines
37 KiB
Python
"""Tests for cmd_update gateway auto-restart — systemd + launchd coverage.
|
|
|
|
Ensures ``hermes update`` correctly detects running gateways managed by
|
|
systemd (Linux) or launchd (macOS) and restarts/informs the user properly,
|
|
rather than leaving zombie processes or telling users to manually restart
|
|
when launchd will auto-respawn.
|
|
"""
|
|
|
|
import subprocess
|
|
from types import SimpleNamespace
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pytest
|
|
|
|
import hermes_cli.gateway as gateway_cli
|
|
from hermes_cli.main import cmd_update
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_run_side_effect(
|
|
branch="main",
|
|
verify_ok=True,
|
|
commit_count="3",
|
|
systemd_active=False,
|
|
system_service_active=False,
|
|
system_restart_rc=0,
|
|
launchctl_loaded=False,
|
|
):
|
|
"""Build a subprocess.run side_effect that simulates git + service commands."""
|
|
|
|
def side_effect(cmd, **kwargs):
|
|
joined = " ".join(str(c) for c in cmd)
|
|
|
|
# git rev-parse --abbrev-ref HEAD
|
|
if "rev-parse" in joined and "--abbrev-ref" in joined:
|
|
return subprocess.CompletedProcess(cmd, 0, stdout=f"{branch}\n", stderr="")
|
|
|
|
# git rev-parse --verify origin/{branch}
|
|
if "rev-parse" in joined and "--verify" in joined:
|
|
rc = 0 if verify_ok else 128
|
|
return subprocess.CompletedProcess(cmd, rc, stdout="", stderr="")
|
|
|
|
# git rev-list HEAD..origin/{branch} --count
|
|
if "rev-list" in joined:
|
|
return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="")
|
|
|
|
# systemctl list-units hermes-gateway* — discover all gateway services
|
|
if "systemctl" in joined and "list-units" in joined:
|
|
if "--user" in joined and systemd_active:
|
|
return subprocess.CompletedProcess(
|
|
cmd, 0,
|
|
stdout="hermes-gateway.service loaded active running Hermes Gateway\n",
|
|
stderr="",
|
|
)
|
|
elif "--user" not in joined and system_service_active:
|
|
return subprocess.CompletedProcess(
|
|
cmd, 0,
|
|
stdout="hermes-gateway.service loaded active running Hermes Gateway\n",
|
|
stderr="",
|
|
)
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
|
|
|
# systemctl is-active — distinguish --user from system scope
|
|
if "systemctl" in joined and "is-active" in joined:
|
|
if "--user" in joined:
|
|
if systemd_active:
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
|
|
return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
|
|
else:
|
|
# System-level check (no --user)
|
|
if system_service_active:
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
|
|
return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
|
|
|
|
# systemctl restart — distinguish --user from system scope
|
|
if "systemctl" in joined and "restart" in joined:
|
|
if "--user" not in joined and system_service_active:
|
|
stderr = "" if system_restart_rc == 0 else "Failed to restart: Permission denied"
|
|
return subprocess.CompletedProcess(cmd, system_restart_rc, stdout="", stderr=stderr)
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
|
|
|
# launchctl list ai.hermes.gateway
|
|
if "launchctl" in joined and "list" in joined:
|
|
if launchctl_loaded:
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="PID\tStatus\tLabel\n123\t0\tai.hermes.gateway\n", stderr="")
|
|
return subprocess.CompletedProcess(cmd, 113, stdout="", stderr="Could not find service")
|
|
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
|
|
|
return side_effect
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_args():
|
|
return SimpleNamespace()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Launchd plist includes --replace
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestLaunchdPlistReplace:
|
|
"""The generated launchd plist must include --replace so respawned
|
|
gateways kill stale instances."""
|
|
|
|
def test_plist_contains_replace_flag(self):
|
|
plist = gateway_cli.generate_launchd_plist()
|
|
assert "--replace" in plist
|
|
|
|
def test_plist_program_arguments_order(self):
|
|
"""--replace comes after 'run' in the ProgramArguments."""
|
|
plist = gateway_cli.generate_launchd_plist()
|
|
lines = [line.strip() for line in plist.splitlines()]
|
|
# Find 'run' and '--replace' in the string entries
|
|
string_values = [
|
|
line.replace("<string>", "").replace("</string>", "")
|
|
for line in lines
|
|
if "<string>" in line and "</string>" in line
|
|
]
|
|
assert "run" in string_values
|
|
assert "--replace" in string_values
|
|
run_idx = string_values.index("run")
|
|
replace_idx = string_values.index("--replace")
|
|
assert replace_idx == run_idx + 1
|
|
|
|
|
|
class TestLaunchdPlistPath:
|
|
def test_plist_contains_environment_variables(self):
|
|
plist = gateway_cli.generate_launchd_plist()
|
|
assert "<key>EnvironmentVariables</key>" in plist
|
|
assert "<key>PATH</key>" in plist
|
|
assert "<key>VIRTUAL_ENV</key>" in plist
|
|
assert "<key>HERMES_HOME</key>" in plist
|
|
|
|
def test_plist_path_includes_venv_bin(self):
|
|
plist = gateway_cli.generate_launchd_plist()
|
|
detected = gateway_cli._detect_venv_dir()
|
|
venv_bin = str(detected / "bin") if detected else str(gateway_cli.PROJECT_ROOT / "venv" / "bin")
|
|
assert venv_bin in plist
|
|
|
|
def test_plist_path_starts_with_venv_bin(self):
|
|
plist = gateway_cli.generate_launchd_plist()
|
|
lines = plist.splitlines()
|
|
for i, line in enumerate(lines):
|
|
if "<key>PATH</key>" in line.strip():
|
|
path_value = lines[i + 1].strip()
|
|
path_value = path_value.replace("<string>", "").replace("</string>", "")
|
|
detected = gateway_cli._detect_venv_dir()
|
|
venv_bin = str(detected / "bin") if detected else str(gateway_cli.PROJECT_ROOT / "venv" / "bin")
|
|
assert path_value.startswith(venv_bin + ":")
|
|
break
|
|
else:
|
|
raise AssertionError("PATH key not found in plist")
|
|
|
|
def test_plist_path_includes_node_modules_bin(self):
|
|
plist = gateway_cli.generate_launchd_plist()
|
|
node_bin = str(gateway_cli.PROJECT_ROOT / "node_modules" / ".bin")
|
|
lines = plist.splitlines()
|
|
for i, line in enumerate(lines):
|
|
if "<key>PATH</key>" in line.strip():
|
|
path_value = lines[i + 1].strip()
|
|
path_value = path_value.replace("<string>", "").replace("</string>", "")
|
|
assert node_bin in path_value.split(":")
|
|
break
|
|
else:
|
|
raise AssertionError("PATH key not found in plist")
|
|
|
|
def test_plist_path_includes_current_env_path(self, monkeypatch):
|
|
monkeypatch.setenv("PATH", "/custom/bin:/usr/bin:/bin")
|
|
plist = gateway_cli.generate_launchd_plist()
|
|
assert "/custom/bin" in plist
|
|
|
|
def test_plist_path_deduplicates_venv_bin_when_already_in_path(self, monkeypatch):
|
|
detected = gateway_cli._detect_venv_dir()
|
|
venv_bin = str(detected / "bin") if detected else str(gateway_cli.PROJECT_ROOT / "venv" / "bin")
|
|
monkeypatch.setenv("PATH", f"{venv_bin}:/usr/bin:/bin")
|
|
plist = gateway_cli.generate_launchd_plist()
|
|
lines = plist.splitlines()
|
|
for i, line in enumerate(lines):
|
|
if "<key>PATH</key>" in line.strip():
|
|
path_value = lines[i + 1].strip()
|
|
path_value = path_value.replace("<string>", "").replace("</string>", "")
|
|
parts = path_value.split(":")
|
|
assert parts.count(venv_bin) == 1
|
|
break
|
|
else:
|
|
raise AssertionError("PATH key not found in plist")
|
|
|
|
|
|
class TestLaunchdPlistCurrentness:
|
|
def test_launchd_plist_is_current_ignores_path_drift(self, tmp_path, monkeypatch):
|
|
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
|
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
|
|
|
monkeypatch.setenv("PATH", "/custom/bin:/usr/bin:/bin")
|
|
plist_path.write_text(gateway_cli.generate_launchd_plist(), encoding="utf-8")
|
|
|
|
monkeypatch.setenv("PATH", "/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin")
|
|
|
|
assert gateway_cli.launchd_plist_is_current() is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# cmd_update — macOS launchd detection
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestLaunchdPlistRefresh:
|
|
"""refresh_launchd_plist_if_needed rewrites stale plists (like systemd's
|
|
refresh_systemd_unit_if_needed)."""
|
|
|
|
def test_refresh_rewrites_stale_plist(self, tmp_path, monkeypatch):
|
|
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
|
plist_path.write_text("<plist>old content</plist>")
|
|
|
|
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
|
|
|
calls = []
|
|
def fake_run(cmd, check=False, **kwargs):
|
|
calls.append(cmd)
|
|
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
|
|
|
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
|
|
|
result = gateway_cli.refresh_launchd_plist_if_needed()
|
|
|
|
assert result is True
|
|
# Plist should now contain the generated content (which includes --replace)
|
|
assert "--replace" in plist_path.read_text()
|
|
# Should have booted out then bootstrapped
|
|
assert any("bootout" in str(c) for c in calls)
|
|
assert any("bootstrap" in str(c) for c in calls)
|
|
|
|
def test_refresh_skips_when_current(self, tmp_path, monkeypatch):
|
|
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
|
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
|
|
|
# Write the current expected content
|
|
plist_path.write_text(gateway_cli.generate_launchd_plist())
|
|
|
|
calls = []
|
|
monkeypatch.setattr(
|
|
gateway_cli.subprocess, "run",
|
|
lambda cmd, **kw: calls.append(cmd) or SimpleNamespace(returncode=0),
|
|
)
|
|
|
|
result = gateway_cli.refresh_launchd_plist_if_needed()
|
|
|
|
assert result is False
|
|
assert len(calls) == 0 # No launchctl calls needed
|
|
|
|
def test_refresh_skips_when_no_plist(self, tmp_path, monkeypatch):
|
|
plist_path = tmp_path / "nonexistent.plist"
|
|
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
|
|
|
result = gateway_cli.refresh_launchd_plist_if_needed()
|
|
assert result is False
|
|
|
|
def test_launchd_start_calls_refresh(self, tmp_path, monkeypatch):
|
|
"""launchd_start refreshes the plist before starting."""
|
|
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
|
plist_path.write_text("<plist>old</plist>")
|
|
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
|
|
|
calls = []
|
|
def fake_run(cmd, check=False, **kwargs):
|
|
calls.append(cmd)
|
|
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
|
|
|
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
|
|
|
gateway_cli.launchd_start()
|
|
|
|
# First calls should be refresh (bootout/bootstrap), then kickstart
|
|
cmd_strs = [" ".join(c) for c in calls]
|
|
assert any("bootout" in s for s in cmd_strs)
|
|
assert any("kickstart" in s for s in cmd_strs)
|
|
|
|
def test_launchd_start_recreates_missing_plist_and_loads_service(self, tmp_path, monkeypatch):
|
|
"""launchd_start self-heals when the plist file is missing entirely."""
|
|
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
|
assert not plist_path.exists()
|
|
|
|
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
|
|
|
calls = []
|
|
def fake_run(cmd, check=False, **kwargs):
|
|
calls.append(cmd)
|
|
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
|
|
|
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
|
|
|
gateway_cli.launchd_start()
|
|
|
|
# Should have created the plist
|
|
assert plist_path.exists()
|
|
assert "--replace" in plist_path.read_text()
|
|
|
|
cmd_strs = [" ".join(c) for c in calls]
|
|
# Should bootstrap the new plist, then kickstart
|
|
assert any("bootstrap" in s for s in cmd_strs)
|
|
assert any("kickstart" in s for s in cmd_strs)
|
|
# Should NOT call bootout (nothing to bootout)
|
|
assert not any("bootout" in s for s in cmd_strs)
|
|
|
|
|
|
class TestCmdUpdateLaunchdRestart:
|
|
"""cmd_update correctly detects and handles launchd on macOS."""
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_update_detects_launchd_and_skips_manual_restart_message(
|
|
self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
|
|
):
|
|
"""When launchd is running the gateway, update should print
|
|
'auto-restart via launchd' instead of 'Restart it with: hermes gateway run'."""
|
|
# Create a fake launchd plist so is_macos + plist.exists() passes
|
|
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
|
plist_path.write_text("<plist/>")
|
|
|
|
monkeypatch.setattr(
|
|
gateway_cli, "is_macos", lambda: True,
|
|
)
|
|
monkeypatch.setattr(
|
|
gateway_cli, "get_launchd_plist_path", lambda: plist_path,
|
|
)
|
|
|
|
mock_run.side_effect = _make_run_side_effect(
|
|
commit_count="3",
|
|
launchctl_loaded=True,
|
|
)
|
|
|
|
# Mock launchd_restart + find_gateway_pids (new code discovers all gateways)
|
|
with patch.object(gateway_cli, "launchd_restart") as mock_launchd_restart, \
|
|
patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
|
cmd_update(mock_args)
|
|
|
|
captured = capsys.readouterr().out
|
|
assert "Restarted" in captured
|
|
assert "Restart manually: hermes gateway run" not in captured
|
|
mock_launchd_restart.assert_called_once_with()
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_update_without_launchd_shows_manual_restart(
|
|
self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
|
|
):
|
|
"""When no service manager is running but manual gateway is found, show manual restart hint."""
|
|
monkeypatch.setattr(
|
|
gateway_cli, "is_macos", lambda: True,
|
|
)
|
|
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
|
# plist does NOT exist — no launchd service
|
|
monkeypatch.setattr(
|
|
gateway_cli, "get_launchd_plist_path", lambda: plist_path,
|
|
)
|
|
|
|
mock_run.side_effect = _make_run_side_effect(
|
|
commit_count="3",
|
|
launchctl_loaded=False,
|
|
)
|
|
|
|
# Simulate a manual gateway process found by find_gateway_pids
|
|
with patch.object(gateway_cli, "find_gateway_pids", return_value=[12345]), \
|
|
patch("os.kill"):
|
|
cmd_update(mock_args)
|
|
|
|
captured = capsys.readouterr().out
|
|
assert "Restart manually: hermes gateway run" in captured
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_update_with_systemd_still_restarts_via_systemd(
|
|
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
|
):
|
|
"""On Linux with systemd active, update should restart via systemctl."""
|
|
monkeypatch.setattr(
|
|
gateway_cli, "is_macos", lambda: False,
|
|
)
|
|
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
|
|
|
mock_run.side_effect = _make_run_side_effect(
|
|
commit_count="3",
|
|
systemd_active=True,
|
|
)
|
|
|
|
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
|
cmd_update(mock_args)
|
|
|
|
captured = capsys.readouterr().out
|
|
assert "Restarted hermes-gateway" in captured
|
|
# Verify systemctl restart was called
|
|
restart_calls = [
|
|
c for c in mock_run.call_args_list
|
|
if "restart" in " ".join(str(a) for a in c.args[0])
|
|
and "systemctl" in " ".join(str(a) for a in c.args[0])
|
|
]
|
|
assert len(restart_calls) == 1
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_update_no_gateway_running_skips_restart(
|
|
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
|
):
|
|
"""When no gateway is running, update should skip the restart section entirely."""
|
|
monkeypatch.setattr(
|
|
gateway_cli, "is_macos", lambda: False,
|
|
)
|
|
|
|
mock_run.side_effect = _make_run_side_effect(
|
|
commit_count="3",
|
|
systemd_active=False,
|
|
)
|
|
|
|
with patch("gateway.status.get_running_pid", return_value=None):
|
|
cmd_update(mock_args)
|
|
|
|
captured = capsys.readouterr().out
|
|
assert "Stopped gateway" not in captured
|
|
assert "Gateway restarted" not in captured
|
|
assert "Gateway restarted via launchd" not in captured
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# cmd_update — system-level systemd service detection
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestCmdUpdateSystemService:
|
|
"""cmd_update detects system-level gateway services where --user fails."""
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_update_detects_system_service_and_restarts(
|
|
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
|
):
|
|
"""When user systemd is inactive but a system service exists, restart via system scope."""
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
|
|
|
mock_run.side_effect = _make_run_side_effect(
|
|
commit_count="3",
|
|
systemd_active=False,
|
|
system_service_active=True,
|
|
)
|
|
|
|
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
|
cmd_update(mock_args)
|
|
|
|
captured = capsys.readouterr().out
|
|
assert "Restarted hermes-gateway" in captured
|
|
# Verify systemctl restart (no --user) was called
|
|
restart_calls = [
|
|
c for c in mock_run.call_args_list
|
|
if "restart" in " ".join(str(a) for a in c.args[0])
|
|
and "systemctl" in " ".join(str(a) for a in c.args[0])
|
|
and "--user" not in " ".join(str(a) for a in c.args[0])
|
|
]
|
|
assert len(restart_calls) == 1
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_update_system_service_restart_failure_shows_error(
|
|
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
|
):
|
|
"""When system service restart fails, show the failure message."""
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
|
|
|
mock_run.side_effect = _make_run_side_effect(
|
|
commit_count="3",
|
|
systemd_active=False,
|
|
system_service_active=True,
|
|
system_restart_rc=1,
|
|
)
|
|
|
|
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
|
cmd_update(mock_args)
|
|
|
|
captured = capsys.readouterr().out
|
|
assert "Failed to restart" in captured
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_user_service_takes_priority_over_system(
|
|
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
|
):
|
|
"""When both user and system services are active, both are restarted."""
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
|
|
|
mock_run.side_effect = _make_run_side_effect(
|
|
commit_count="3",
|
|
systemd_active=True,
|
|
system_service_active=True,
|
|
)
|
|
|
|
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
|
cmd_update(mock_args)
|
|
|
|
captured = capsys.readouterr().out
|
|
# Both scopes are discovered and restarted
|
|
assert "Restarted hermes-gateway" in captured
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Service PID exclusion — the core bug fix
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestServicePidExclusion:
|
|
"""After restarting a service, the stale-process sweep must NOT kill
|
|
the freshly-spawned service PID. This was the root cause of the bug
|
|
where ``hermes update`` would restart the gateway and immediately kill it.
|
|
"""
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_update_launchd_does_not_kill_service_pid(
|
|
self, mock_run, _mock_which, mock_args, capsys, monkeypatch, tmp_path,
|
|
):
|
|
"""After launchd restart, the sweep must exclude the service PID."""
|
|
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
|
plist_path.write_text("<plist/>")
|
|
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "is_linux", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
|
|
|
# The service PID that launchd manages after restart
|
|
SERVICE_PID = 42000
|
|
|
|
mock_run.side_effect = _make_run_side_effect(
|
|
commit_count="3",
|
|
launchctl_loaded=True,
|
|
)
|
|
|
|
# Simulate find_gateway_pids returning the service PID (the bug scenario)
|
|
# and _get_service_pids returning the same PID to exclude it
|
|
with patch.object(
|
|
gateway_cli, "_get_service_pids", return_value={SERVICE_PID}
|
|
), patch.object(
|
|
gateway_cli, "find_gateway_pids",
|
|
side_effect=lambda exclude_pids=None, all_profiles=False: (
|
|
[SERVICE_PID] if not exclude_pids else
|
|
[p for p in [SERVICE_PID] if p not in exclude_pids]
|
|
),
|
|
), patch("os.kill") as mock_kill:
|
|
cmd_update(mock_args)
|
|
|
|
captured = capsys.readouterr().out
|
|
# Service was restarted
|
|
assert "Restarted" in captured
|
|
# The service PID should NOT have been killed by the manual sweep
|
|
kill_calls = [
|
|
c for c in mock_kill.call_args_list
|
|
if c.args[0] == SERVICE_PID
|
|
]
|
|
assert len(kill_calls) == 0, (
|
|
f"Service PID {SERVICE_PID} was killed by the manual sweep — "
|
|
f"this is the bug where update restarts then immediately kills the gateway"
|
|
)
|
|
# Should NOT show manual restart message
|
|
assert "Restart manually" not in captured
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_update_systemd_does_not_kill_service_pid(
|
|
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
|
):
|
|
"""After systemd restart, the sweep must exclude the service PID."""
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
|
|
|
SERVICE_PID = 55000
|
|
|
|
mock_run.side_effect = _make_run_side_effect(
|
|
commit_count="3",
|
|
systemd_active=True,
|
|
)
|
|
|
|
with patch.object(
|
|
gateway_cli, "_get_service_pids", return_value={SERVICE_PID}
|
|
), patch.object(
|
|
gateway_cli, "find_gateway_pids",
|
|
side_effect=lambda exclude_pids=None, all_profiles=False: (
|
|
[SERVICE_PID] if not exclude_pids else
|
|
[p for p in [SERVICE_PID] if p not in exclude_pids]
|
|
),
|
|
), patch("os.kill") as mock_kill:
|
|
cmd_update(mock_args)
|
|
|
|
captured = capsys.readouterr().out
|
|
assert "Restarted hermes-gateway" in captured
|
|
# Service PID must not be killed
|
|
kill_calls = [
|
|
c for c in mock_kill.call_args_list
|
|
if c.args[0] == SERVICE_PID
|
|
]
|
|
assert len(kill_calls) == 0
|
|
assert "Restart manually" not in captured
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_update_kills_manual_pid_but_not_service_pid(
|
|
self, mock_run, _mock_which, mock_args, capsys, monkeypatch, tmp_path,
|
|
):
|
|
"""When both a service PID and a manual PID exist, only the manual one
|
|
is killed."""
|
|
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
|
plist_path.write_text("<plist/>")
|
|
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "is_linux", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
|
|
|
SERVICE_PID = 42000
|
|
MANUAL_PID = 42999
|
|
|
|
mock_run.side_effect = _make_run_side_effect(
|
|
commit_count="3",
|
|
launchctl_loaded=True,
|
|
)
|
|
|
|
def fake_find(exclude_pids=None, all_profiles=False):
|
|
_exclude = exclude_pids or set()
|
|
return [p for p in [SERVICE_PID, MANUAL_PID] if p not in _exclude]
|
|
|
|
with patch.object(
|
|
gateway_cli, "_get_service_pids", return_value={SERVICE_PID}
|
|
), patch.object(
|
|
gateway_cli, "find_gateway_pids", side_effect=fake_find,
|
|
), patch("os.kill") as mock_kill:
|
|
cmd_update(mock_args)
|
|
|
|
captured = capsys.readouterr().out
|
|
assert "Restarted" in captured
|
|
# Manual PID should be killed
|
|
manual_kills = [c for c in mock_kill.call_args_list if c.args[0] == MANUAL_PID]
|
|
assert len(manual_kills) == 1
|
|
# Service PID should NOT be killed
|
|
service_kills = [c for c in mock_kill.call_args_list if c.args[0] == SERVICE_PID]
|
|
assert len(service_kills) == 0
|
|
# Should show manual stop message since manual PID was killed
|
|
assert "Stopped 1 manual gateway" in captured
|
|
|
|
|
|
class TestGetServicePids:
|
|
"""Unit tests for _get_service_pids()."""
|
|
|
|
def test_returns_systemd_main_pid(self, monkeypatch):
|
|
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
|
|
|
def fake_run(cmd, **kwargs):
|
|
joined = " ".join(str(c) for c in cmd)
|
|
if "list-units" in joined:
|
|
return subprocess.CompletedProcess(
|
|
cmd, 0,
|
|
stdout="hermes-gateway.service loaded active running Hermes Gateway\n",
|
|
stderr="",
|
|
)
|
|
if "show" in joined and "MainPID" in joined:
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="12345\n", stderr="")
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
|
|
|
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
|
|
|
pids = gateway_cli._get_service_pids()
|
|
assert 12345 in pids
|
|
|
|
def test_returns_launchd_pid(self, monkeypatch):
|
|
monkeypatch.setattr(gateway_cli, "is_linux", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "get_launchd_label", lambda: "ai.hermes.gateway")
|
|
|
|
def fake_run(cmd, **kwargs):
|
|
joined = " ".join(str(c) for c in cmd)
|
|
if "launchctl" in joined and "list" in joined:
|
|
return subprocess.CompletedProcess(
|
|
cmd, 0,
|
|
stdout="PID\tStatus\tLabel\n67890\t0\tai.hermes.gateway\n",
|
|
stderr="",
|
|
)
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
|
|
|
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
|
|
|
pids = gateway_cli._get_service_pids()
|
|
assert 67890 in pids
|
|
|
|
def test_returns_empty_when_no_services(self, monkeypatch):
|
|
monkeypatch.setattr(gateway_cli, "is_linux", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
|
|
|
pids = gateway_cli._get_service_pids()
|
|
assert pids == set()
|
|
|
|
def test_excludes_zero_pid(self, monkeypatch):
|
|
"""systemd returns MainPID=0 for stopped services; skip those."""
|
|
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
|
|
|
def fake_run(cmd, **kwargs):
|
|
joined = " ".join(str(c) for c in cmd)
|
|
if "list-units" in joined:
|
|
return subprocess.CompletedProcess(
|
|
cmd, 0,
|
|
stdout="hermes-gateway.service loaded inactive dead Hermes Gateway\n",
|
|
stderr="",
|
|
)
|
|
if "show" in joined and "MainPID" in joined:
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="0\n", stderr="")
|
|
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
|
|
|
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
|
|
|
pids = gateway_cli._get_service_pids()
|
|
assert 0 not in pids
|
|
assert pids == set()
|
|
|
|
|
|
class TestFindGatewayPidsExclude:
|
|
"""find_gateway_pids respects exclude_pids."""
|
|
|
|
def test_excludes_specified_pids(self, monkeypatch):
|
|
monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
|
|
|
|
def fake_run(cmd, **kwargs):
|
|
return subprocess.CompletedProcess(
|
|
cmd, 0,
|
|
stdout=(
|
|
"user 100 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
|
|
"user 200 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
|
|
),
|
|
stderr="",
|
|
)
|
|
|
|
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
|
monkeypatch.setattr("os.getpid", lambda: 999)
|
|
|
|
pids = gateway_cli.find_gateway_pids(exclude_pids={100})
|
|
assert 100 not in pids
|
|
assert 200 in pids
|
|
|
|
def test_no_exclude_returns_all(self, monkeypatch):
|
|
monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
|
|
|
|
def fake_run(cmd, **kwargs):
|
|
return subprocess.CompletedProcess(
|
|
cmd, 0,
|
|
stdout=(
|
|
"user 100 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
|
|
"user 200 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
|
|
),
|
|
stderr="",
|
|
)
|
|
|
|
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
|
monkeypatch.setattr("os.getpid", lambda: 999)
|
|
|
|
pids = gateway_cli.find_gateway_pids()
|
|
assert 100 in pids
|
|
assert 200 in pids
|
|
|
|
def test_filters_to_current_profile(self, monkeypatch, tmp_path):
|
|
profile_dir = tmp_path / ".hermes" / "profiles" / "orcha"
|
|
profile_dir.mkdir(parents=True)
|
|
monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: profile_dir)
|
|
|
|
def fake_run(cmd, **kwargs):
|
|
return subprocess.CompletedProcess(
|
|
cmd, 0,
|
|
stdout=(
|
|
"100 /Users/dgrieco/.hermes/hermes-agent/venv/bin/python -m hermes_cli.main --profile orcha gateway run --replace\n"
|
|
"200 /Users/dgrieco/.hermes/hermes-agent/venv/bin/python -m hermes_cli.main --profile other gateway run --replace\n"
|
|
),
|
|
stderr="",
|
|
)
|
|
|
|
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
|
monkeypatch.setattr("os.getpid", lambda: 999)
|
|
monkeypatch.setattr(gateway_cli, "_get_service_pids", lambda: set())
|
|
monkeypatch.setattr(gateway_cli, "_profile_arg", lambda hermes_home=None: "--profile orcha")
|
|
|
|
pids = gateway_cli.find_gateway_pids()
|
|
|
|
assert pids == [100]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Gateway mode writes exit code before restart (#8300)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestGatewayModeWritesExitCodeEarly:
|
|
"""When running as ``hermes update --gateway``, the exit code marker must be
|
|
written *before* the gateway restart attempt. Without this, systemd's
|
|
``KillMode=mixed`` kills the update process (and its wrapping shell) during
|
|
the cgroup teardown, so the shell epilogue that normally writes the exit
|
|
code never executes. The new gateway's update watcher then polls for 30
|
|
minutes and sends a spurious timeout message.
|
|
"""
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_exit_code_written_in_gateway_mode(
|
|
self, mock_run, _mock_which, capsys, tmp_path, monkeypatch,
|
|
):
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
|
|
|
# Point HERMES_HOME at a temp dir so the marker file lands there
|
|
hermes_home = tmp_path / ".hermes"
|
|
hermes_home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
import hermes_cli.config as _cfg
|
|
monkeypatch.setattr(_cfg, "get_hermes_home", lambda: hermes_home)
|
|
# Also patch the module-level ref used by cmd_update
|
|
import hermes_cli.main as _main_mod
|
|
monkeypatch.setattr(_main_mod, "get_hermes_home", lambda: hermes_home)
|
|
|
|
mock_run.side_effect = _make_run_side_effect(commit_count="1")
|
|
|
|
args = SimpleNamespace(gateway=True)
|
|
|
|
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
|
cmd_update(args)
|
|
|
|
exit_code_path = hermes_home / ".update_exit_code"
|
|
assert exit_code_path.exists(), ".update_exit_code not written in gateway mode"
|
|
assert exit_code_path.read_text() == "0"
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_exit_code_not_written_in_normal_mode(
|
|
self, mock_run, _mock_which, capsys, tmp_path, monkeypatch,
|
|
):
|
|
"""Non-gateway mode should NOT write the exit code (the shell does it)."""
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
|
|
|
hermes_home = tmp_path / ".hermes"
|
|
hermes_home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
import hermes_cli.config as _cfg
|
|
monkeypatch.setattr(_cfg, "get_hermes_home", lambda: hermes_home)
|
|
import hermes_cli.main as _main_mod
|
|
monkeypatch.setattr(_main_mod, "get_hermes_home", lambda: hermes_home)
|
|
|
|
mock_run.side_effect = _make_run_side_effect(commit_count="1")
|
|
|
|
args = SimpleNamespace(gateway=False)
|
|
|
|
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
|
cmd_update(args)
|
|
|
|
exit_code_path = hermes_home / ".update_exit_code"
|
|
assert not exit_code_path.exists(), ".update_exit_code should not be written outside gateway mode"
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
@patch("subprocess.run")
|
|
def test_exit_code_written_before_restart_call(
|
|
self, mock_run, _mock_which, capsys, tmp_path, monkeypatch,
|
|
):
|
|
"""Exit code must exist BEFORE systemctl restart is called."""
|
|
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
|
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
|
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
|
|
|
hermes_home = tmp_path / ".hermes"
|
|
hermes_home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
import hermes_cli.config as _cfg
|
|
monkeypatch.setattr(_cfg, "get_hermes_home", lambda: hermes_home)
|
|
import hermes_cli.main as _main_mod
|
|
monkeypatch.setattr(_main_mod, "get_hermes_home", lambda: hermes_home)
|
|
|
|
exit_code_path = hermes_home / ".update_exit_code"
|
|
|
|
# Track whether exit code exists when systemctl restart is called
|
|
exit_code_existed_at_restart = []
|
|
|
|
original_side_effect = _make_run_side_effect(
|
|
commit_count="1", systemd_active=True,
|
|
)
|
|
|
|
def tracking_side_effect(cmd, **kwargs):
|
|
joined = " ".join(str(c) for c in cmd)
|
|
if "systemctl" in joined and "restart" in joined:
|
|
exit_code_existed_at_restart.append(exit_code_path.exists())
|
|
return original_side_effect(cmd, **kwargs)
|
|
|
|
mock_run.side_effect = tracking_side_effect
|
|
|
|
args = SimpleNamespace(gateway=True)
|
|
|
|
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
|
cmd_update(args)
|
|
|
|
assert exit_code_existed_at_restart, "systemctl restart was never called"
|
|
assert exit_code_existed_at_restart[0] is True, \
|
|
".update_exit_code must exist BEFORE systemctl restart (cgroup kill race)"
|