From 463bf2be25baf88809817c869a3985e917af2dd5 Mon Sep 17 00:00:00 2001 From: AIalliAI <285906080+AIalliAI@users.noreply.github.com> Date: Thu, 11 Jun 2026 23:45:15 +0000 Subject: [PATCH] fix(update): announce gateway drain waits so desktop updates don't look hung MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On macOS, the desktop updater's stage 1 (hermes update --gateway) ends by restarting running gateways. launchd_restart() SIGTERMs the gateway and silently waits up to agent.restart_drain_timeout (default 180s) for the drain; the manual profile-gateway loop waits its drain budget per gateway the same way. Neither path prints anything before the wait, so the desktop updater's live output goes dead for minutes right after '✓ Update complete!' — users read it as a hung update and force-kill their gateway processes to make it move (#44515). The systemd branch already announces its drain ('draining (up to Ns)...'); launchd and the manual loop did not. Print the stop/drain (with PID and budget) before the wait in both paths, mirroring the systemd branch, and assert the message in the existing launchd drain test. Fixes #44515 --- hermes_cli/gateway.py | 10 ++++++++++ hermes_cli/main.py | 8 ++++++++ tests/hermes_cli/test_gateway_service.py | 8 +++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 9811749e3ca..63ea644ec0a 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -3891,6 +3891,16 @@ def launchd_restart(): print("✓ Service restart requested") return if pid is not None: + # Announce the drain BEFORE waiting on it. This wait can run for + # the full drain budget (180s by default) while the old gateway + # finishes in-flight agent runs, and it streams into surfaces with + # no other feedback — the desktop updater's live output most of + # all, where a silent stop here reads as "update stuck" (#44515). + # Mirrors the systemd branch's "draining (up to Ns)..." line. + print( + f"→ Stopping gateway (PID {pid}) — draining in-flight runs " + f"(up to {drain_timeout:.0f}s)..." + ) try: terminate_pid(pid, force=False) except (ProcessLookupError, PermissionError, OSError): diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 916d33bba1a..a9ee8865728 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -10178,6 +10178,14 @@ def _cmd_update_impl(args, gateway_mode: bool): # gateway doesn't support SIGUSR1 or doesn't exit within # the drain budget, fall back to SIGTERM — the watcher # still sees the exit and relaunches either way. + # Announce the drain first: this wait can hold for the full + # budget per gateway with no other output, and on surfaces + # that stream update progress (the desktop updater most of + # all) the silence reads as a hung update (#44515). + print( + f" → {proc.profile}: draining gateway PID {pid} " + f"(up to {int(_drain_budget)}s)..." + ) drained = _graceful_restart_via_sigusr1( pid, drain_timeout=_drain_budget, diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index 6dd504a5f70..56a6181b316 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -774,7 +774,7 @@ class TestLaunchdServiceRecovery: ["launchctl", "kickstart", target], ] - def test_launchd_restart_drains_running_gateway_before_kickstart(self, monkeypatch): + def test_launchd_restart_drains_running_gateway_before_kickstart(self, monkeypatch, capsys): calls = [] target = f"{gateway_cli._launchd_domain()}/{gateway_cli.get_launchd_label()}" @@ -799,6 +799,12 @@ class TestLaunchdServiceRecovery: ("term", 321, False), ["launchctl", "kickstart", "-k", target], ] + # The drain can silently hold for the full budget (180s default); the + # desktop updater streams this output as its only progress feedback, + # so the stop must be announced BEFORE the wait (#44515). + out = capsys.readouterr().out + assert "draining in-flight runs" in out + assert "up to 12s" in out def test_launchd_restart_self_requests_graceful_restart_without_kickstart(self, monkeypatch, capsys): calls = []