mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(gateway): don't dead-end setup wizard when only system-scope unit is installed
The setup wizard dropped non-root users at a bare shell prompt when trying to start a system-scope gateway service. Previously _require_root_for_system_service called sys.exit(1), which the wizard's `except Exception` guards cannot catch (SystemExit is a BaseException). Users with a pre-existing /etc/systemd/system unit (e.g. from an earlier `sudo hermes setup` run) hit this whenever they re-ran `hermes setup` as a regular user. - Convert _require_root_for_system_service to raise a typed SystemScopeRequiresRootError (RuntimeError subclass) instead of sys.exit(1). The direct CLI path (`hermes gateway install|start|stop| restart|uninstall` without sudo) still exits 1 cleanly via a new catch at the top of gateway_command, matching the existing UserSystemdUnavailableError pattern. - Add _system_scope_wizard_would_need_root() pre-check and _print_system_scope_remediation() helper. Both setup wizards (hermes_cli/setup.py and hermes_cli/gateway.py::gateway_setup) now detect the dead-end before prompting and print actionable guidance: either `sudo systemctl start <service>` this time, or uninstall the system unit and install a per-user one. - Defense-in-depth: all 5 wizard prompt sites also catch SystemScopeRequiresRootError and fall back to the remediation helper if the pre-check is bypassed (race, etc.). Tests: 12 new tests in TestSystemScopeRequiresRootError, TestSystemScopeWizardPreCheck, TestSystemScopeRemediationOutput, and TestGatewayCommandCatchesSystemScopeError covering the exception contract, pre-check matrix (root vs non-root, system-only vs user-present vs none vs explicit system=True), remediation output for each action, and the direct-CLI exit-1 path.
This commit is contained in:
parent
04cf4788cc
commit
3cdbf334d5
3 changed files with 285 additions and 7 deletions
|
|
@ -967,6 +967,27 @@ class UserSystemdUnavailableError(RuntimeError):
|
|||
"""
|
||||
|
||||
|
||||
class SystemScopeRequiresRootError(RuntimeError):
|
||||
"""Raised when a system-scope gateway operation is attempted as non-root.
|
||||
|
||||
System-scope units live in ``/etc/systemd/system/`` and require root for
|
||||
install / uninstall / start / stop / restart via ``systemctl``. The
|
||||
previous behavior was ``sys.exit(1)`` which blew past the wizard's
|
||||
``except Exception`` guards and dumped the user at a bare shell prompt
|
||||
with no guidance. Raising a typed exception lets callers that can
|
||||
recover (the setup wizard) print actionable remediation instead, while
|
||||
``gateway_command`` still exits 1 with the same message for the direct
|
||||
CLI path.
|
||||
|
||||
``args[0]`` carries the user-facing message, ``args[1]`` the action name.
|
||||
``str(e)`` returns only the message (not the tuple repr) so format
|
||||
strings like ``f"Failed: {e}"`` render cleanly.
|
||||
"""
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.args[0] if self.args else ""
|
||||
|
||||
|
||||
def _user_dbus_socket_path() -> Path:
|
||||
"""Return the expected per-user D-Bus socket path (regardless of existence)."""
|
||||
xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
|
||||
|
|
@ -1382,8 +1403,10 @@ def print_systemd_scope_conflict_warning() -> None:
|
|||
|
||||
def _require_root_for_system_service(action: str) -> None:
|
||||
if os.geteuid() != 0:
|
||||
print(f"System gateway {action} requires root. Re-run with sudo.")
|
||||
sys.exit(1)
|
||||
raise SystemScopeRequiresRootError(
|
||||
f"System gateway {action} requires root. Re-run with sudo.",
|
||||
action,
|
||||
)
|
||||
|
||||
|
||||
def _system_service_identity(run_as_user: str | None = None) -> tuple[str, str, str]:
|
||||
|
|
@ -1930,6 +1953,47 @@ def _select_systemd_scope(system: bool = False) -> bool:
|
|||
return get_systemd_unit_path(system=True).exists() and not get_systemd_unit_path(system=False).exists()
|
||||
|
||||
|
||||
def _system_scope_wizard_would_need_root(system: bool = False) -> bool:
|
||||
"""True when the setup wizard is about to trigger a system-scope operation
|
||||
as a non-root user.
|
||||
|
||||
Replicates the decision ``_select_systemd_scope`` makes inside
|
||||
``systemd_start`` / ``systemd_restart`` / ``systemd_stop`` so the wizard
|
||||
can detect the dead-end BEFORE prompting, rather than letting
|
||||
``SystemScopeRequiresRootError`` propagate out and leave the user
|
||||
staring at a bare shell.
|
||||
"""
|
||||
if os.geteuid() == 0:
|
||||
return False
|
||||
return _select_systemd_scope(system=system)
|
||||
|
||||
|
||||
def _print_system_scope_remediation(action: str) -> None:
|
||||
"""Print actionable remediation when the wizard skips a system-scope
|
||||
prompt because the user isn't root. Keeps the wizard flowing instead of
|
||||
aborting.
|
||||
"""
|
||||
svc = get_service_name()
|
||||
print_warning(
|
||||
f"Gateway is installed as a system-wide service — "
|
||||
f"{action} requires root."
|
||||
)
|
||||
print_info(" Options:")
|
||||
print_info(f" 1. {action.capitalize()} it this time:")
|
||||
if action == "start":
|
||||
print_info(f" sudo systemctl start {svc}")
|
||||
elif action == "stop":
|
||||
print_info(f" sudo systemctl stop {svc}")
|
||||
elif action == "restart":
|
||||
print_info(f" sudo systemctl restart {svc}")
|
||||
else:
|
||||
print_info(f" sudo systemctl {action} {svc}")
|
||||
print_info(" 2. Switch to a per-user service (recommended for personal use):")
|
||||
print_info(" sudo hermes gateway uninstall --system")
|
||||
print_info(" hermes gateway install")
|
||||
print_info(" hermes gateway start")
|
||||
|
||||
|
||||
def _get_restart_drain_timeout() -> float:
|
||||
"""Return the configured gateway restart drain timeout in seconds."""
|
||||
raw = os.getenv("HERMES_RESTART_DRAIN_TIMEOUT", "").strip()
|
||||
|
|
@ -4115,7 +4179,9 @@ def gateway_setup():
|
|||
print_success("Gateway service is installed and running.")
|
||||
elif service_installed:
|
||||
print_warning("Gateway service is installed but not running.")
|
||||
if prompt_yes_no(" Start it now?", True):
|
||||
if supports_systemd_services() and _system_scope_wizard_would_need_root():
|
||||
_print_system_scope_remediation("start")
|
||||
elif prompt_yes_no(" Start it now?", True):
|
||||
try:
|
||||
if supports_systemd_services():
|
||||
systemd_start()
|
||||
|
|
@ -4125,6 +4191,12 @@ def gateway_setup():
|
|||
print_error(" Failed to start — user systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
except SystemScopeRequiresRootError as e:
|
||||
# Defense in depth: the pre-check above should have caught
|
||||
# this, but handle the race/edge case gracefully instead of
|
||||
# letting the exception escape the wizard.
|
||||
print_error(f" Failed to start: {e}")
|
||||
_print_system_scope_remediation("start")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print_error(f" Failed to start: {e}")
|
||||
else:
|
||||
|
|
@ -4174,7 +4246,9 @@ def gateway_setup():
|
|||
service_running = _is_service_running()
|
||||
|
||||
if service_running:
|
||||
if prompt_yes_no(" Restart the gateway to pick up changes?", True):
|
||||
if supports_systemd_services() and _system_scope_wizard_would_need_root():
|
||||
_print_system_scope_remediation("restart")
|
||||
elif prompt_yes_no(" Restart the gateway to pick up changes?", True):
|
||||
try:
|
||||
if supports_systemd_services():
|
||||
systemd_restart()
|
||||
|
|
@ -4187,10 +4261,15 @@ def gateway_setup():
|
|||
print_error(" Restart failed — user systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
except SystemScopeRequiresRootError as e:
|
||||
print_error(f" Restart failed: {e}")
|
||||
_print_system_scope_remediation("restart")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print_error(f" Restart failed: {e}")
|
||||
elif service_installed:
|
||||
if prompt_yes_no(" Start the gateway service?", True):
|
||||
if supports_systemd_services() and _system_scope_wizard_would_need_root():
|
||||
_print_system_scope_remediation("start")
|
||||
elif prompt_yes_no(" Start the gateway service?", True):
|
||||
try:
|
||||
if supports_systemd_services():
|
||||
systemd_start()
|
||||
|
|
@ -4200,6 +4279,9 @@ def gateway_setup():
|
|||
print_error(" Start failed — user systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
except SystemScopeRequiresRootError as e:
|
||||
print_error(f" Start failed: {e}")
|
||||
_print_system_scope_remediation("start")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print_error(f" Start failed: {e}")
|
||||
else:
|
||||
|
|
@ -4273,6 +4355,14 @@ def gateway_command(args):
|
|||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
sys.exit(1)
|
||||
except SystemScopeRequiresRootError as e:
|
||||
# The direct ``hermes gateway install|uninstall|start|stop|restart``
|
||||
# path lands here when the user typed a system-scope action without
|
||||
# sudo. Same exit code as before — just gives the wizard a way to
|
||||
# intercept the same condition with friendlier guidance before the
|
||||
# error is raised.
|
||||
print(str(e))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _gateway_command_inner(args):
|
||||
|
|
|
|||
|
|
@ -2462,6 +2462,9 @@ def setup_gateway(config: dict):
|
|||
launchd_start,
|
||||
launchd_restart,
|
||||
UserSystemdUnavailableError,
|
||||
SystemScopeRequiresRootError,
|
||||
_system_scope_wizard_would_need_root,
|
||||
_print_system_scope_remediation,
|
||||
)
|
||||
|
||||
service_installed = _is_service_installed()
|
||||
|
|
@ -2479,7 +2482,9 @@ def setup_gateway(config: dict):
|
|||
print()
|
||||
|
||||
if service_running:
|
||||
if prompt_yes_no(" Restart the gateway to pick up changes?", True):
|
||||
if supports_systemd and _system_scope_wizard_would_need_root():
|
||||
_print_system_scope_remediation("restart")
|
||||
elif prompt_yes_no(" Restart the gateway to pick up changes?", True):
|
||||
try:
|
||||
if supports_systemd:
|
||||
systemd_restart()
|
||||
|
|
@ -2489,10 +2494,19 @@ def setup_gateway(config: dict):
|
|||
print_error(" Restart failed — user systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
except SystemScopeRequiresRootError as e:
|
||||
# Defense in depth: the pre-check above should have
|
||||
# caught this, but a race (unit file appearing mid-run)
|
||||
# could still land here. Previously this exited the
|
||||
# whole wizard via sys.exit(1).
|
||||
print_error(f" Restart failed: {e}")
|
||||
_print_system_scope_remediation("restart")
|
||||
except Exception as e:
|
||||
print_error(f" Restart failed: {e}")
|
||||
elif service_installed:
|
||||
if prompt_yes_no(" Start the gateway service?", True):
|
||||
if supports_systemd and _system_scope_wizard_would_need_root():
|
||||
_print_system_scope_remediation("start")
|
||||
elif prompt_yes_no(" Start the gateway service?", True):
|
||||
try:
|
||||
if supports_systemd:
|
||||
systemd_start()
|
||||
|
|
@ -2502,6 +2516,9 @@ def setup_gateway(config: dict):
|
|||
print_error(" Start failed — user systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
except SystemScopeRequiresRootError as e:
|
||||
print_error(f" Start failed: {e}")
|
||||
_print_system_scope_remediation("start")
|
||||
except Exception as e:
|
||||
print_error(f" Start failed: {e}")
|
||||
elif supports_service_manager:
|
||||
|
|
@ -2529,6 +2546,9 @@ def setup_gateway(config: dict):
|
|||
print_error(" Start failed — user systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
except SystemScopeRequiresRootError as e:
|
||||
print_error(f" Start failed: {e}")
|
||||
_print_system_scope_remediation("start")
|
||||
except Exception as e:
|
||||
print_error(f" Start failed: {e}")
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -2177,3 +2177,171 @@ class TestSystemdInstallOffersLegacyRemoval:
|
|||
|
||||
assert prompt_called["count"] == 0
|
||||
assert remove_called["invoked"] is False
|
||||
|
||||
|
||||
class TestSystemScopeRequiresRootError:
|
||||
"""Tests for the SystemScopeRequiresRootError replacement of sys.exit(1).
|
||||
|
||||
Before this change, ``_require_root_for_system_service`` called
|
||||
``sys.exit(1)`` when non-root code tried a system-scope systemd
|
||||
operation. The wizard's ``except Exception`` guards don't catch
|
||||
``SystemExit`` (it's a ``BaseException`` subclass), so the user was
|
||||
dumped at a bare shell prompt mid-setup. The fix raises a typed
|
||||
exception instead, which the wizard intercepts and handles with
|
||||
actionable remediation.
|
||||
"""
|
||||
|
||||
def test_require_root_raises_when_non_root(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
|
||||
|
||||
with pytest.raises(gateway_cli.SystemScopeRequiresRootError) as excinfo:
|
||||
gateway_cli._require_root_for_system_service("start")
|
||||
|
||||
assert excinfo.value.args[0] == "System gateway start requires root. Re-run with sudo."
|
||||
assert excinfo.value.args[1] == "start"
|
||||
# str(e) renders only the message, not the tuple repr, so that
|
||||
# wizard format strings like f"Failed: {e}" print cleanly.
|
||||
assert str(excinfo.value) == "System gateway start requires root. Re-run with sudo."
|
||||
assert f"Failed: {excinfo.value}" == "Failed: System gateway start requires root. Re-run with sudo."
|
||||
|
||||
def test_require_root_noop_when_root(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 0)
|
||||
|
||||
# Should not raise, should not exit
|
||||
gateway_cli._require_root_for_system_service("start")
|
||||
|
||||
def test_error_is_runtime_error_subclass(self):
|
||||
"""Wizards use ``except Exception`` guards — the error must be a
|
||||
``RuntimeError`` (catchable by ``Exception``), NOT a ``SystemExit``
|
||||
(``BaseException``), so the wizard can recover from it.
|
||||
"""
|
||||
err = gateway_cli.SystemScopeRequiresRootError("msg", "start")
|
||||
assert isinstance(err, RuntimeError)
|
||||
assert isinstance(err, Exception)
|
||||
assert not isinstance(err, SystemExit)
|
||||
|
||||
|
||||
class TestSystemScopeWizardPreCheck:
|
||||
"""Tests for _system_scope_wizard_would_need_root — the guard the
|
||||
wizard uses to detect the dead-end BEFORE prompting the user to start
|
||||
a service that will fail without sudo.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _setup_units(tmp_path, monkeypatch, system_present: bool, user_present: bool):
|
||||
sys_dir = tmp_path / "sys"
|
||||
usr_dir = tmp_path / "usr"
|
||||
sys_dir.mkdir()
|
||||
usr_dir.mkdir()
|
||||
if system_present:
|
||||
(sys_dir / "hermes-gateway.service").write_text("[Unit]\n")
|
||||
if user_present:
|
||||
(usr_dir / "hermes-gateway.service").write_text("[Unit]\n")
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"get_systemd_unit_path",
|
||||
lambda system=False: (sys_dir if system else usr_dir) / "hermes-gateway.service",
|
||||
)
|
||||
|
||||
def test_non_root_with_only_system_unit_returns_true(self, tmp_path, monkeypatch):
|
||||
self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=False)
|
||||
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
|
||||
|
||||
assert gateway_cli._system_scope_wizard_would_need_root() is True
|
||||
|
||||
def test_root_never_needs_root(self, tmp_path, monkeypatch):
|
||||
self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=False)
|
||||
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 0)
|
||||
|
||||
assert gateway_cli._system_scope_wizard_would_need_root() is False
|
||||
|
||||
def test_non_root_with_user_unit_present_returns_false(self, tmp_path, monkeypatch):
|
||||
# User-scope unit present — user can start it themselves, no sudo needed.
|
||||
self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=True)
|
||||
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
|
||||
|
||||
assert gateway_cli._system_scope_wizard_would_need_root() is False
|
||||
|
||||
def test_non_root_with_no_units_returns_false(self, tmp_path, monkeypatch):
|
||||
self._setup_units(tmp_path, monkeypatch, system_present=False, user_present=False)
|
||||
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
|
||||
|
||||
assert gateway_cli._system_scope_wizard_would_need_root() is False
|
||||
|
||||
def test_non_root_with_explicit_system_arg_returns_true(self, tmp_path, monkeypatch):
|
||||
# Caller passed system=True explicitly (e.g. ``hermes gateway start --system``).
|
||||
self._setup_units(tmp_path, monkeypatch, system_present=False, user_present=False)
|
||||
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
|
||||
|
||||
assert gateway_cli._system_scope_wizard_would_need_root(system=True) is True
|
||||
|
||||
|
||||
class TestSystemScopeRemediationOutput:
|
||||
"""Tests for _print_system_scope_remediation — the actionable guidance
|
||||
shown when the wizard detects a system-scope-only setup as non-root.
|
||||
"""
|
||||
|
||||
def test_start_remediation_mentions_sudo_systemctl_and_uninstall(self, capsys, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
|
||||
|
||||
gateway_cli._print_system_scope_remediation("start")
|
||||
out = capsys.readouterr().out
|
||||
|
||||
assert "system-wide service" in out
|
||||
assert "start requires root" in out
|
||||
assert "sudo systemctl start hermes-gateway" in out
|
||||
assert "sudo hermes gateway uninstall --system" in out
|
||||
assert "hermes gateway install" in out
|
||||
|
||||
def test_restart_remediation_uses_systemctl_restart(self, capsys, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
|
||||
|
||||
gateway_cli._print_system_scope_remediation("restart")
|
||||
out = capsys.readouterr().out
|
||||
|
||||
assert "restart requires root" in out
|
||||
assert "sudo systemctl restart hermes-gateway" in out
|
||||
|
||||
def test_stop_remediation_uses_systemctl_stop(self, capsys, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
|
||||
|
||||
gateway_cli._print_system_scope_remediation("stop")
|
||||
out = capsys.readouterr().out
|
||||
|
||||
assert "stop requires root" in out
|
||||
assert "sudo systemctl stop hermes-gateway" in out
|
||||
|
||||
|
||||
class TestGatewayCommandCatchesSystemScopeError:
|
||||
"""The direct CLI path (``hermes gateway start --system`` etc.) must
|
||||
still exit 1 with a clean message when non-root. The top-level
|
||||
``gateway_command`` catches ``SystemScopeRequiresRootError`` and
|
||||
converts it back to ``sys.exit(1)``, preserving existing CLI behavior.
|
||||
"""
|
||||
|
||||
def test_non_root_system_start_exits_one_with_clean_message(self, tmp_path, monkeypatch, capsys):
|
||||
sys_dir = tmp_path / "sys"
|
||||
usr_dir = tmp_path / "usr"
|
||||
sys_dir.mkdir()
|
||||
usr_dir.mkdir()
|
||||
(sys_dir / "hermes-gateway.service").write_text("[Unit]\n")
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"get_systemd_unit_path",
|
||||
lambda system=False: (sys_dir if system else usr_dir) / "hermes-gateway.service",
|
||||
)
|
||||
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
|
||||
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "kill_gateway_processes", lambda **kw: 0)
|
||||
|
||||
args = SimpleNamespace(gateway_command="start", system=True, all=False)
|
||||
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
gateway_cli.gateway_command(args)
|
||||
|
||||
assert excinfo.value.code == 1
|
||||
out = capsys.readouterr().out
|
||||
# Renders the message, NOT the ``('msg', 'action')`` tuple repr
|
||||
assert "System gateway start requires root. Re-run with sudo." in out
|
||||
assert "('" not in out # no tuple repr leaking through
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue