diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 9811749e3ca..63ea644ec0a 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -3891,6 +3891,16 @@ def launchd_restart(): print("✓ Service restart requested") return if pid is not None: + # Announce the drain BEFORE waiting on it. This wait can run for + # the full drain budget (180s by default) while the old gateway + # finishes in-flight agent runs, and it streams into surfaces with + # no other feedback — the desktop updater's live output most of + # all, where a silent stop here reads as "update stuck" (#44515). + # Mirrors the systemd branch's "draining (up to Ns)..." line. + print( + f"→ Stopping gateway (PID {pid}) — draining in-flight runs " + f"(up to {drain_timeout:.0f}s)..." + ) try: terminate_pid(pid, force=False) except (ProcessLookupError, PermissionError, OSError): diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 916d33bba1a..a9ee8865728 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -10178,6 +10178,14 @@ def _cmd_update_impl(args, gateway_mode: bool): # gateway doesn't support SIGUSR1 or doesn't exit within # the drain budget, fall back to SIGTERM — the watcher # still sees the exit and relaunches either way. + # Announce the drain first: this wait can hold for the full + # budget per gateway with no other output, and on surfaces + # that stream update progress (the desktop updater most of + # all) the silence reads as a hung update (#44515). + print( + f" → {proc.profile}: draining gateway PID {pid} " + f"(up to {int(_drain_budget)}s)..." + ) drained = _graceful_restart_via_sigusr1( pid, drain_timeout=_drain_budget, diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index 6dd504a5f70..56a6181b316 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -774,7 +774,7 @@ class TestLaunchdServiceRecovery: ["launchctl", "kickstart", target], ] - def test_launchd_restart_drains_running_gateway_before_kickstart(self, monkeypatch): + def test_launchd_restart_drains_running_gateway_before_kickstart(self, monkeypatch, capsys): calls = [] target = f"{gateway_cli._launchd_domain()}/{gateway_cli.get_launchd_label()}" @@ -799,6 +799,12 @@ class TestLaunchdServiceRecovery: ("term", 321, False), ["launchctl", "kickstart", "-k", target], ] + # The drain can silently hold for the full budget (180s default); the + # desktop updater streams this output as its only progress feedback, + # so the stop must be announced BEFORE the wait (#44515). + out = capsys.readouterr().out + assert "draining in-flight runs" in out + assert "up to 12s" in out def test_launchd_restart_self_requests_graceful_restart_without_kickstart(self, monkeypatch, capsys): calls = []