diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 0f5f4d15f..689164e15 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -100,6 +100,59 @@ def _get_service_pids() -> set: return pids +def _get_parent_pid(pid: int) -> int | None: + """Return the parent PID for ``pid``, or ``None`` when unavailable.""" + if pid <= 1: + return None + try: + result = subprocess.run( + ["ps", "-o", "ppid=", "-p", str(pid)], + capture_output=True, + text=True, + timeout=5, + ) + except (FileNotFoundError, subprocess.TimeoutExpired): + return None + if result.returncode != 0: + return None + raw = result.stdout.strip() + if not raw: + return None + try: + parent_pid = int(raw.splitlines()[-1].strip()) + except ValueError: + return None + return parent_pid if parent_pid > 0 else None + + +def _is_pid_ancestor_of_current_process(target_pid: int) -> bool: + """Return True when ``target_pid`` is this process or one of its ancestors.""" + if target_pid <= 0: + return False + + pid = os.getpid() + seen: set[int] = set() + while pid and pid not in seen: + if pid == target_pid: + return True + seen.add(pid) + pid = _get_parent_pid(pid) or 0 + return False + + +def _request_gateway_self_restart(pid: int) -> bool: + """Ask a running gateway ancestor to restart itself asynchronously.""" + if not hasattr(signal, "SIGUSR1"): + return False + if not _is_pid_ancestor_of_current_process(pid): + return False + try: + os.kill(pid, signal.SIGUSR1) + except (ProcessLookupError, PermissionError, OSError): + return False + return True + + def find_gateway_pids(exclude_pids: set | None = None) -> list: """Find PIDs of running gateway processes. @@ -971,6 +1024,12 @@ def systemd_restart(system: bool = False): if system: _require_root_for_system_service("restart") refresh_systemd_unit_if_needed(system=system) + from gateway.status import get_running_pid + + pid = get_running_pid() + if pid is not None and _request_gateway_self_restart(pid): + print(f"✓ {_service_scope_label(system).capitalize()} service restart requested") + return subprocess.run(_systemctl_cmd(system) + ["reload-or-restart", get_service_name()], check=True, timeout=90) print(f"✓ {_service_scope_label(system).capitalize()} service restarted") @@ -1309,6 +1368,9 @@ def launchd_restart(): try: pid = get_running_pid() + if pid is not None and _request_gateway_self_restart(pid): + print("✓ Service restart requested") + return if pid is not None: try: terminate_pid(pid, force=False) diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index 3586564e8..26919608d 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -243,6 +243,7 @@ class TestLaunchdServiceRecovery: target = f"{gateway_cli._launchd_domain()}/{gateway_cli.get_launchd_label()}" monkeypatch.setattr(gateway_cli, "_get_restart_drain_timeout", lambda: 12.0) + monkeypatch.setattr(gateway_cli, "_request_gateway_self_restart", lambda pid: False) monkeypatch.setattr(gateway_cli, "_wait_for_gateway_exit", lambda timeout, force_after=None: True) monkeypatch.setattr(gateway_cli, "terminate_pid", lambda pid, force=False: calls.append(("term", pid, force))) monkeypatch.setattr( @@ -263,6 +264,29 @@ class TestLaunchdServiceRecovery: ["launchctl", "kickstart", "-k", target], ] + def test_launchd_restart_self_requests_graceful_restart_without_kickstart(self, monkeypatch, capsys): + calls = [] + + monkeypatch.setattr( + "gateway.status.get_running_pid", + lambda: 321, + ) + monkeypatch.setattr( + gateway_cli, + "_request_gateway_self_restart", + lambda pid: calls.append(("self", pid)) or True, + ) + monkeypatch.setattr( + gateway_cli.subprocess, + "run", + lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("launchctl should not run")), + ) + + gateway_cli.launchd_restart() + + assert calls == [("self", 321)] + assert "restart requested" in capsys.readouterr().out.lower() + def test_launchd_stop_uses_bootout_not_kill(self, monkeypatch): """launchd_stop must bootout the service so KeepAlive doesn't respawn it.""" label = gateway_cli.get_launchd_label() @@ -366,6 +390,31 @@ class TestGatewayServiceDetection: class TestGatewaySystemServiceRouting: + def test_systemd_restart_self_requests_graceful_restart_without_reload_or_restart(self, monkeypatch, capsys): + calls = [] + + monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False) + monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: calls.append(("refresh", system))) + monkeypatch.setattr( + "gateway.status.get_running_pid", + lambda: 654, + ) + monkeypatch.setattr( + gateway_cli, + "_request_gateway_self_restart", + lambda pid: calls.append(("self", pid)) or True, + ) + monkeypatch.setattr( + gateway_cli.subprocess, + "run", + lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("systemctl should not run")), + ) + + gateway_cli.systemd_restart() + + assert calls == [("refresh", False), ("self", 654)] + assert "restart requested" in capsys.readouterr().out.lower() + def test_gateway_install_passes_system_flags(self, monkeypatch): monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True) monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)