diff --git a/gateway/run.py b/gateway/run.py index 3428c59f7f..3e6f39be37 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -4617,8 +4617,8 @@ class GatewayRunner: async def _handle_update_command(self, event: MessageEvent) -> str: """Handle /update command — update Hermes Agent to the latest version. - Spawns ``hermes update`` in a separate systemd scope so it survives the - gateway restart that ``hermes update`` may trigger at the end. Marker + Spawns ``hermes update`` in a detached session (via ``setsid``) so it + survives the gateway restart that ``hermes update`` may trigger. Marker files are written so either the current gateway process or the next one can notify the user when the update finishes. """ @@ -4658,28 +4658,28 @@ class GatewayRunner: pending_path.write_text(json.dumps(pending)) exit_code_path.unlink(missing_ok=True) - # Spawn `hermes update` in a separate cgroup so it survives gateway - # restart. systemd-run --user --scope creates a transient scope unit. + # Spawn `hermes update` detached so it survives gateway restart. + # Use setsid for portable session detach (works under system services + # where systemd-run --user fails due to missing D-Bus session). hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd) update_cmd = ( f"{hermes_cmd_str} update > {shlex.quote(str(output_path))} 2>&1; " f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}" ) try: - systemd_run = shutil.which("systemd-run") - if systemd_run: + setsid_bin = shutil.which("setsid") + if setsid_bin: + # Preferred: setsid creates a new session, fully detached subprocess.Popen( - [systemd_run, "--user", "--scope", - "--unit=hermes-update", "--", - "bash", "-c", update_cmd], + [setsid_bin, "bash", "-c", update_cmd], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True, ) else: - # Fallback: best-effort detach with start_new_session + # Fallback: start_new_session=True calls os.setsid() in child subprocess.Popen( - ["bash", "-c", f"nohup {update_cmd} &"], + ["bash", "-c", update_cmd], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True, diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 763bcea4ef..9dca210563 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -3165,6 +3165,7 @@ def cmd_update(args): _gw_service_name = get_service_name() existing_pid = get_running_pid() has_systemd_service = False + has_system_service = False has_launchd_service = False try: @@ -3177,6 +3178,19 @@ def cmd_update(args): except (FileNotFoundError, subprocess.TimeoutExpired): pass + # Also check for a system-level service (hermes gateway install --system). + # This covers gateways running under system systemd where --user + # fails due to missing D-Bus session. + if not has_systemd_service and is_linux(): + try: + check = subprocess.run( + ["systemctl", "is-active", _gw_service_name], + capture_output=True, text=True, timeout=5, + ) + has_system_service = check.stdout.strip() == "active" + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + # Check for macOS launchd service if is_macos(): try: @@ -3191,7 +3205,7 @@ def cmd_update(args): except (FileNotFoundError, subprocess.TimeoutExpired): pass - if existing_pid or has_systemd_service or has_launchd_service: + if existing_pid or has_systemd_service or has_system_service or has_launchd_service: print() # When a service manager is handling the gateway, let it @@ -3232,6 +3246,21 @@ def cmd_update(args): print(" hermes gateway restart") else: print(" Try manually: hermes gateway restart") + elif has_system_service: + # System-level service (hermes gateway install --system). + # No D-Bus session needed — systemctl without --user talks + # directly to the system manager over /run/systemd/private. + print("→ Restarting system gateway service...") + restart = subprocess.run( + ["systemctl", "restart", _gw_service_name], + capture_output=True, text=True, timeout=15, + ) + if restart.returncode == 0: + print("✓ Gateway restarted (system service).") + else: + print(f"⚠ Gateway restart failed: {restart.stderr.strip()}") + print(" System services may require root. Try:") + print(f" sudo systemctl restart {_gw_service_name}") elif has_launchd_service: # Refresh the plist first (picks up --replace and other # changes from the update we just pulled). diff --git a/tests/gateway/test_update_command.py b/tests/gateway/test_update_command.py index e8fb3ddc19..0fc774a0ab 100644 --- a/tests/gateway/test_update_command.py +++ b/tests/gateway/test_update_command.py @@ -202,7 +202,7 @@ class TestHandleUpdateCommand: with patch("gateway.run._hermes_home", hermes_home), \ patch("gateway.run.__file__", fake_file), \ - patch("shutil.which", side_effect=lambda x: "/usr/bin/hermes" if x == "hermes" else "/usr/bin/systemd-run"), \ + patch("shutil.which", side_effect=lambda x: "/usr/bin/hermes" if x == "hermes" else "/usr/bin/setsid"), \ patch("subprocess.Popen"): result = await runner._handle_update_command(event) @@ -215,8 +215,8 @@ class TestHandleUpdateCommand: assert not (hermes_home / ".update_exit_code").exists() @pytest.mark.asyncio - async def test_spawns_systemd_run(self, tmp_path): - """Uses systemd-run when available.""" + async def test_spawns_setsid(self, tmp_path): + """Uses setsid when available.""" runner = _make_runner() event = _make_event() @@ -236,16 +236,16 @@ class TestHandleUpdateCommand: patch("subprocess.Popen", mock_popen): result = await runner._handle_update_command(event) - # Verify systemd-run was used + # Verify setsid was used call_args = mock_popen.call_args[0][0] - assert call_args[0] == "/usr/bin/systemd-run" - assert "--scope" in call_args + assert call_args[0] == "/usr/bin/setsid" + assert call_args[1] == "bash" assert ".update_exit_code" in call_args[-1] assert "Starting Hermes update" in result @pytest.mark.asyncio - async def test_fallback_nohup_when_no_systemd_run(self, tmp_path): - """Falls back to nohup when systemd-run is not available.""" + async def test_fallback_when_no_setsid(self, tmp_path): + """Falls back to start_new_session=True when setsid is not available.""" runner = _make_runner() event = _make_event() @@ -260,24 +260,27 @@ class TestHandleUpdateCommand: mock_popen = MagicMock() - def which_no_systemd(x): + def which_no_setsid(x): if x == "hermes": return "/usr/bin/hermes" - if x == "systemd-run": + if x == "setsid": return None return None with patch("gateway.run._hermes_home", hermes_home), \ patch("gateway.run.__file__", fake_file), \ - patch("shutil.which", side_effect=which_no_systemd), \ + patch("shutil.which", side_effect=which_no_setsid), \ patch("subprocess.Popen", mock_popen): result = await runner._handle_update_command(event) - # Verify bash -c nohup fallback was used + # Verify plain bash -c fallback (no nohup, no setsid) call_args = mock_popen.call_args[0][0] assert call_args[0] == "bash" - assert "nohup" in call_args[2] + assert "nohup" not in call_args[2] assert ".update_exit_code" in call_args[2] + # start_new_session=True should be in kwargs + call_kwargs = mock_popen.call_args[1] + assert call_kwargs.get("start_new_session") is True assert "Starting Hermes update" in result @pytest.mark.asyncio diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py index 89ac842196..1d6b064af6 100644 --- a/tests/hermes_cli/test_update_gateway_restart.py +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -25,6 +25,8 @@ def _make_run_side_effect( verify_ok=True, commit_count="3", systemd_active=False, + system_service_active=False, + system_restart_rc=0, launchctl_loaded=False, ): """Build a subprocess.run side_effect that simulates git + service commands.""" @@ -45,14 +47,23 @@ def _make_run_side_effect( if "rev-list" in joined: return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="") - # systemctl --user is-active + # systemctl is-active — distinguish --user from system scope if "systemctl" in joined and "is-active" in joined: - if systemd_active: - return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="") - return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="") + if "--user" in joined: + if systemd_active: + return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="") + return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="") + else: + # System-level check (no --user) + if system_service_active: + return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="") + return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="") - # systemctl --user restart + # systemctl restart — distinguish --user from system scope if "systemctl" in joined and "restart" in joined: + if "--user" not in joined and system_service_active: + stderr = "" if system_restart_rc == 0 else "Failed to restart: Permission denied" + return subprocess.CompletedProcess(cmd, system_restart_rc, stdout="", stderr=stderr) return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") # launchctl list ai.hermes.gateway @@ -393,3 +404,91 @@ class TestCmdUpdateLaunchdRestart: assert "Stopped gateway" not in captured assert "Gateway restarted" not in captured assert "Gateway restarted via launchd" not in captured + + +# --------------------------------------------------------------------------- +# cmd_update — system-level systemd service detection +# --------------------------------------------------------------------------- + + +class TestCmdUpdateSystemService: + """cmd_update detects system-level gateway services where --user fails.""" + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_detects_system_service_and_restarts( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """When user systemd is inactive but a system service exists, restart via system scope.""" + monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) + monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=False, + system_service_active=True, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "system gateway service" in captured.lower() + assert "Gateway restarted (system service)" in captured + # Verify systemctl restart (no --user) was called + restart_calls = [ + c for c in mock_run.call_args_list + if "restart" in " ".join(str(a) for a in c.args[0]) + and "systemctl" in " ".join(str(a) for a in c.args[0]) + and "--user" not in " ".join(str(a) for a in c.args[0]) + ] + assert len(restart_calls) == 1 + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_system_service_restart_failure_shows_sudo_hint( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """When system service restart fails (e.g. no root), show sudo hint.""" + monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) + monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=False, + system_service_active=True, + system_restart_rc=1, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "sudo systemctl restart" in captured + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_user_service_takes_priority_over_system( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """When both user and system services are active, user wins.""" + monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) + monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=True, + system_service_active=True, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"), \ + patch("os.kill"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + # Should restart via user service, not system + assert "Gateway restarted." in captured + assert "(system service)" not in captured