diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 8b360087c..7796cc575 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -761,6 +761,21 @@ def get_systemd_unit_path(system: bool = False) -> Path: return Path.home() / ".config" / "systemd" / "user" / f"{name}.service" +class UserSystemdUnavailableError(RuntimeError): + """Raised when ``systemctl --user`` cannot reach the user D-Bus session. + + Typically hit on fresh RHEL/Debian SSH sessions where linger is disabled + and no user@.service is running, so ``/run/user/$UID/bus`` never exists. + Carries a user-facing remediation message in ``args[0]``. + """ + + +def _user_dbus_socket_path() -> Path: + """Return the expected per-user D-Bus socket path (regardless of existence).""" + xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}" + return Path(xdg) / "bus" + + def _ensure_user_systemd_env() -> None: """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user. @@ -783,6 +798,126 @@ def _ensure_user_systemd_env() -> None: os.environ["DBUS_SESSION_BUS_ADDRESS"] = f"unix:path={bus_path}" +def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool: + """Poll for the user D-Bus socket to appear, up to ``timeout`` seconds. + + Linger-enabled user@.service can take a second or two to spawn the socket + after ``loginctl enable-linger`` runs. Returns True once the socket exists. + """ + import time + + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + if _user_dbus_socket_path().exists(): + _ensure_user_systemd_env() + return True + time.sleep(0.2) + return _user_dbus_socket_path().exists() + + +def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None: + """Ensure ``systemctl --user`` will reach the user D-Bus session bus. + + No-op when the bus socket is already there (the common case on desktops + and linger-enabled servers). On fresh SSH sessions where the socket is + missing: + + * If linger is already enabled, wait briefly for user@.service to spawn + the socket. + * If linger is disabled and ``auto_enable_linger`` is True, try + ``loginctl enable-linger $USER`` (works as non-root when polkit permits + it, otherwise needs sudo). + * If the socket is still missing afterwards, raise + :class:`UserSystemdUnavailableError` with a precise remediation message. + + Callers should treat the exception as a terminal condition for user-scope + systemd operations and surface the message to the user. + """ + _ensure_user_systemd_env() + bus_path = _user_dbus_socket_path() + if bus_path.exists(): + return + + import getpass + + username = getpass.getuser() + linger_enabled, linger_detail = get_systemd_linger_status() + + if linger_enabled is True: + if _wait_for_user_dbus_socket(timeout=3.0): + return + # Linger is on but socket still missing — unusual; fall through to error. + _raise_user_systemd_unavailable( + username, + reason="User D-Bus socket is missing even though linger is enabled.", + fix_hint=( + f" systemctl start user@{os.getuid()}.service\n" + " (may require sudo; try again after the command succeeds)" + ), + ) + + if auto_enable_linger and shutil.which("loginctl"): + try: + result = subprocess.run( + ["loginctl", "enable-linger", username], + capture_output=True, + text=True, + check=False, + timeout=30, + ) + except Exception as exc: + _raise_user_systemd_unavailable( + username, + reason=f"loginctl enable-linger failed ({exc}).", + fix_hint=f" sudo loginctl enable-linger {username}", + ) + else: + if result.returncode == 0: + if _wait_for_user_dbus_socket(timeout=5.0): + print(f"✓ Enabled linger for {username} — user D-Bus now available") + return + # enable-linger succeeded but the socket never appeared. + _raise_user_systemd_unavailable( + username, + reason="Linger was enabled, but the user D-Bus socket did not appear.", + fix_hint=( + " Log out and log back in, then re-run the command.\n" + f" Or reboot and run: systemctl --user start {get_service_name()}" + ), + ) + detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip() + _raise_user_systemd_unavailable( + username, + reason=f"loginctl enable-linger was denied: {detail}", + fix_hint=f" sudo loginctl enable-linger {username}", + ) + + _raise_user_systemd_unavailable( + username, + reason=( + "User D-Bus session is not available " + f"({linger_detail or 'linger disabled'})." + ), + fix_hint=f" sudo loginctl enable-linger {username}", + ) + + +def _raise_user_systemd_unavailable(username: str, *, reason: str, fix_hint: str) -> None: + """Build a user-facing error message and raise UserSystemdUnavailableError.""" + msg = ( + f"{reason}\n" + " systemctl --user cannot reach the user D-Bus session in this shell.\n" + "\n" + " To fix:\n" + f"{fix_hint}\n" + "\n" + " Alternative: run the gateway in the foreground (stays up until\n" + " you exit / close the terminal):\n" + " hermes gateway run" + ) + raise UserSystemdUnavailableError(msg) + + def _systemctl_cmd(system: bool = False) -> list[str]: if not system: _ensure_user_systemd_env() @@ -1623,6 +1758,11 @@ def systemd_start(system: bool = False): system = _select_systemd_scope(system) if system: _require_root_for_system_service("start") + else: + # Fail fast with actionable guidance if the user D-Bus session is not + # reachable (common on fresh RHEL/Debian SSH sessions without linger). + # Raises UserSystemdUnavailableError with a remediation message. + _preflight_user_systemd() refresh_systemd_unit_if_needed(system=system) _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30) print(f"✓ {_service_scope_label(system).capitalize()} service started") @@ -1642,6 +1782,8 @@ def systemd_restart(system: bool = False): system = _select_systemd_scope(system) if system: _require_root_for_system_service("restart") + else: + _preflight_user_systemd() refresh_systemd_unit_if_needed(system=system) from gateway.status import get_running_pid @@ -3516,6 +3658,10 @@ def gateway_setup(): systemd_start() elif is_macos(): launchd_start() + except UserSystemdUnavailableError as e: + print_error(" Failed to start — user systemd not reachable:") + for line in str(e).splitlines(): + print(f" {line}") except subprocess.CalledProcessError as e: print_error(f" Failed to start: {e}") else: @@ -3580,6 +3726,10 @@ def gateway_setup(): else: stop_profile_gateway() print_info("Start manually: hermes gateway") + except UserSystemdUnavailableError as e: + print_error(" Restart failed — user systemd not reachable:") + for line in str(e).splitlines(): + print(f" {line}") except subprocess.CalledProcessError as e: print_error(f" Restart failed: {e}") elif service_installed: @@ -3589,6 +3739,10 @@ def gateway_setup(): systemd_start() elif is_macos(): launchd_start() + except UserSystemdUnavailableError as e: + print_error(" Start failed — user systemd not reachable:") + for line in str(e).splitlines(): + print(f" {line}") except subprocess.CalledProcessError as e: print_error(f" Start failed: {e}") else: @@ -3612,6 +3766,10 @@ def gateway_setup(): systemd_start(system=installed_scope == "system") else: launchd_start() + except UserSystemdUnavailableError as e: + print_error(" Start failed — user systemd not reachable:") + for line in str(e).splitlines(): + print(f" {line}") except subprocess.CalledProcessError as e: print_error(f" Start failed: {e}") except subprocess.CalledProcessError as e: @@ -3649,6 +3807,18 @@ def gateway_setup(): def gateway_command(args): """Handle gateway subcommands.""" + try: + return _gateway_command_inner(args) + except UserSystemdUnavailableError as e: + # Clean, actionable message instead of a traceback when the user D-Bus + # session is unreachable (fresh SSH shell, no linger, container, etc.). + print_error("User systemd not reachable:") + for line in str(e).splitlines(): + print(f" {line}") + sys.exit(1) + + +def _gateway_command_inner(args): subcmd = getattr(args, 'gateway_command', None) # Default to run if no subcommand diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 1fe5ae058..362961689 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -2334,6 +2334,7 @@ def setup_gateway(config: dict): launchd_install, launchd_start, launchd_restart, + UserSystemdUnavailableError, ) service_installed = _is_service_installed() @@ -2357,6 +2358,10 @@ def setup_gateway(config: dict): systemd_restart() elif _is_macos: launchd_restart() + except UserSystemdUnavailableError as e: + print_error(" Restart failed — user systemd not reachable:") + for line in str(e).splitlines(): + print(f" {line}") except Exception as e: print_error(f" Restart failed: {e}") elif service_installed: @@ -2366,6 +2371,10 @@ def setup_gateway(config: dict): systemd_start() elif _is_macos: launchd_start() + except UserSystemdUnavailableError as e: + print_error(" Start failed — user systemd not reachable:") + for line in str(e).splitlines(): + print(f" {line}") except Exception as e: print_error(f" Start failed: {e}") elif supports_service_manager: @@ -2389,6 +2398,10 @@ def setup_gateway(config: dict): systemd_start(system=installed_scope == "system") elif _is_macos: launchd_start() + except UserSystemdUnavailableError as e: + print_error(" Start failed — user systemd not reachable:") + for line in str(e).splitlines(): + print(f" {line}") except Exception as e: print_error(f" Start failed: {e}") except Exception as e: diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index fda893e1e..68554a496 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -5,6 +5,8 @@ import pwd from pathlib import Path from types import SimpleNamespace +import pytest + import hermes_cli.gateway as gateway_cli from gateway.restart import ( DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT, @@ -1083,6 +1085,116 @@ class TestEnsureUserSystemdEnv: assert calls == [] +class TestPreflightUserSystemd: + """Tests for _preflight_user_systemd() — D-Bus reachability before systemctl --user. + + Covers issue #5130 / Rick's RHEL 9.6 SSH scenario: setup tries to start the + gateway via ``systemctl --user start`` in a shell with no user D-Bus session, + which previously failed with a raw ``CalledProcessError`` and no remediation. + """ + + def test_noop_when_bus_socket_exists(self, monkeypatch): + """Socket already there (desktop / linger + prior login) → no-op.""" + monkeypatch.setattr( + gateway_cli, "_user_dbus_socket_path", + lambda: type("P", (), {"exists": lambda self: True})(), + ) + # Should not raise, no subprocess calls needed. + gateway_cli._preflight_user_systemd() + + def test_raises_when_linger_disabled_and_loginctl_denied(self, monkeypatch): + """Rick's scenario: no D-Bus, no linger, non-root SSH → clear error.""" + monkeypatch.setattr( + gateway_cli, "_user_dbus_socket_path", + lambda: type("P", (), {"exists": lambda self: False})(), + ) + monkeypatch.setattr( + gateway_cli, "get_systemd_linger_status", lambda: (False, ""), + ) + monkeypatch.setattr(gateway_cli.shutil, "which", lambda _: "/usr/bin/loginctl") + + class _Result: + returncode = 1 + stdout = "" + stderr = "Interactive authentication required." + + monkeypatch.setattr( + gateway_cli.subprocess, "run", lambda *a, **kw: _Result(), + ) + + with pytest.raises(gateway_cli.UserSystemdUnavailableError) as exc_info: + gateway_cli._preflight_user_systemd() + + msg = str(exc_info.value) + assert "sudo loginctl enable-linger" in msg + assert "hermes gateway run" in msg # foreground fallback mentioned + assert "Interactive authentication required" in msg + + def test_raises_when_loginctl_missing(self, monkeypatch): + """No loginctl binary at all → suggest sudo install + manual fix.""" + monkeypatch.setattr( + gateway_cli, "_user_dbus_socket_path", + lambda: type("P", (), {"exists": lambda self: False})(), + ) + monkeypatch.setattr( + gateway_cli, "get_systemd_linger_status", + lambda: (None, "loginctl not found"), + ) + monkeypatch.setattr(gateway_cli.shutil, "which", lambda _: None) + + with pytest.raises(gateway_cli.UserSystemdUnavailableError) as exc_info: + gateway_cli._preflight_user_systemd() + + assert "sudo loginctl enable-linger" in str(exc_info.value) + + def test_linger_enabled_but_socket_still_missing(self, monkeypatch): + """Edge case: linger says yes but the bus socket never came up.""" + monkeypatch.setattr( + gateway_cli, "_user_dbus_socket_path", + lambda: type("P", (), {"exists": lambda self: False})(), + ) + monkeypatch.setattr( + gateway_cli, "get_systemd_linger_status", lambda: (True, ""), + ) + monkeypatch.setattr( + gateway_cli, "_wait_for_user_dbus_socket", lambda timeout=3.0: False, + ) + + with pytest.raises(gateway_cli.UserSystemdUnavailableError) as exc_info: + gateway_cli._preflight_user_systemd() + + assert "linger is enabled" in str(exc_info.value) + + def test_enable_linger_succeeds_and_socket_appears(self, monkeypatch, capsys): + """Happy remediation path: polkit allows enable-linger, socket spawns.""" + monkeypatch.setattr( + gateway_cli, "_user_dbus_socket_path", + lambda: type("P", (), {"exists": lambda self: False})(), + ) + monkeypatch.setattr( + gateway_cli, "get_systemd_linger_status", lambda: (False, ""), + ) + monkeypatch.setattr(gateway_cli.shutil, "which", lambda _: "/usr/bin/loginctl") + + class _OkResult: + returncode = 0 + stdout = "" + stderr = "" + + monkeypatch.setattr( + gateway_cli.subprocess, "run", lambda *a, **kw: _OkResult(), + ) + monkeypatch.setattr( + gateway_cli, "_wait_for_user_dbus_socket", + lambda timeout=5.0: True, + ) + + # Should not raise. + gateway_cli._preflight_user_systemd() + out = capsys.readouterr().out + assert "Enabled linger" in out + + class TestProfileArg: """Tests for _profile_arg — returns '--profile ' for named profiles."""