From ef6492b6484aff843aa86598c9ef68b9eecf3038 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 22 Jun 2026 06:02:31 -0700 Subject: [PATCH] fix(gateway): cold-start installed Windows gateway after update when none was running (#50804) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The post-update gateway resume path (`_resume_windows_gateways_after_update`) only relaunched gateways that were *running* when the update began — it enumerates live PIDs in `_pause_windows_gateways_for_update` and respawns exactly those. A gateway that had already died between updates (e.g. it was launched attached to a terminal/TUI that later closed, taking the child with it) was never brought back: the Startup-folder / Scheduled-Task autostart entry only fires on the next login, not after an in-place update. So a Desktop-GUI update (which runs `hermes update --yes --gateway`) on a box whose gateway had quietly died would complete with no gateway running, and the user had no indication anything should have come up. Fix: when no gateway is running at pause time but an autostart entry is installed (`gateway_windows.is_installed()` — an explicit "I want a gateway" signal), return a `cold_start_if_installed` token. The resume step then does a fresh detached spawn via `gateway_windows._spawn_detached()` — the same windowless `pythonw` + `CREATE_BREAKAWAY_FROM_JOB` path `hermes gateway start` uses. It re-checks liveness immediately before spawning so a concurrent start (autostart entry firing) can't produce a duplicate. Gateway-less users (no autostart entry) get nothing forced on them — the pause step still returns None for them. POSIX is unaffected: enabled systemd units already restart via `Restart=always`. Windows-only; best-effort throughout (logs at debug and no-ops on any error). Tests: pause returns the cold-start token only when installed, returns None when not installed, resume cold-starts on the token, and resume skips the cold-start when a gateway is already running. --- hermes_cli/main.py | 73 +++++++++++ .../test_update_concurrent_quarantine.py | 114 ++++++++++++++++++ 2 files changed, 187 insertions(+) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index df6c7329c15..6222de6bb00 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -8431,6 +8431,31 @@ def _pause_windows_gateways_for_update() -> dict | None: logger.debug("Could not discover Windows gateway PIDs before update: %s", exc) return None if not running_pids: + # No gateway is running right now, but the user may have installed an + # autostart entry (Scheduled Task or Startup-folder login item) — that + # is an explicit "I want a gateway" signal. A gateway that died between + # updates (e.g. the spawning terminal/TUI closed, taking its child with + # it) would otherwise never come back: the autostart entry only fires on + # the next login, and the update flow's resume path only relaunched + # gateways that were running when the update began. Cold-start one after + # the update so an installed gateway is actually up post-update. Users + # who run gateway-less (no autostart entry) get nothing forced on them. + try: + from hermes_cli import gateway_windows + + if gateway_windows.is_installed(): + return { + "resume_needed": True, + "profiles": {}, + "unmapped_pids": [], + "unmapped": [], + "cold_start_if_installed": True, + } + except Exception as exc: + logger.debug( + "Could not check Windows gateway autostart state before update: %s", + exc, + ) return None profile_processes = {} @@ -8508,6 +8533,51 @@ def _pause_windows_gateways_for_update() -> dict | None: } +def _cold_start_windows_gateway_after_update() -> None: + """Start a fresh detached gateway after update when one is installed but down. + + Invoked from ``_resume_windows_gateways_after_update`` for the + ``cold_start_if_installed`` case: no gateway was running when the update + began, but an autostart entry (Scheduled Task / Startup-folder login item) + is installed, signalling the user wants a gateway. Unlike the relaunch + paths — which watch an old PID and respawn once it exits — this is a direct + fresh spawn via the same windowless ``pythonw`` + breakaway path that + ``hermes gateway start`` uses (``gateway_windows._spawn_detached``). + + Best-effort and idempotent: re-checks that nothing is running first so a + concurrent start (e.g. the autostart entry firing) can't produce a + duplicate gateway. + """ + if not _is_windows(): + return + try: + from hermes_cli import gateway_windows + from hermes_cli.gateway import find_gateway_pids + except Exception as exc: + logger.debug("Could not load Windows gateway cold-start helpers: %s", exc) + return + + # Re-check liveness right before spawning — between pause and resume the + # autostart entry may have already brought a gateway up, or a leftover + # process may have re-registered. Don't double-start. + try: + if list(find_gateway_pids(all_profiles=True)): + return + except Exception as exc: + logger.debug("Could not re-check gateway liveness before cold-start: %s", exc) + return + + try: + pid = gateway_windows._spawn_detached() + except Exception as exc: + logger.debug("Could not cold-start Windows gateway after update: %s", exc) + return + + if pid: + print() + print(f" ✓ Starting Windows gateway after update (PID {pid})") + + def _resume_windows_gateways_after_update(token: dict | None) -> None: """Restart Windows profile gateways previously paused for update.""" if not token or not token.get("resume_needed"): @@ -8518,7 +8588,10 @@ def _resume_windows_gateways_after_update(token: dict | None) -> None: profiles = token.get("profiles") or {} unmapped = token.get("unmapped") or [] + cold_start = bool(token.get("cold_start_if_installed")) if not profiles and not any(u.get("argv") for u in unmapped): + if cold_start: + _cold_start_windows_gateway_after_update() return try: diff --git a/tests/hermes_cli/test_update_concurrent_quarantine.py b/tests/hermes_cli/test_update_concurrent_quarantine.py index efb2e1e5fca..5345319bb49 100644 --- a/tests/hermes_cli/test_update_concurrent_quarantine.py +++ b/tests/hermes_cli/test_update_concurrent_quarantine.py @@ -597,6 +597,120 @@ def test_resume_windows_gateways_after_update_respawns_unmapped_by_cmdline( assert "Restarting 1 unmapped Windows gateway process(es)" in out +@patch.object(cli_main, "_is_windows", return_value=True) +def test_pause_returns_cold_start_token_when_installed_but_none_running( + _winp, + monkeypatch, +): + """No gateway running + autostart entry installed → cold-start token. + + A gateway that died between updates (spawning terminal/TUI closed) leaves + nothing for the resume path to relaunch, but the installed autostart entry + is an explicit "I want a gateway" signal. The pause step must return a + token that tells resume to cold-start one. + """ + import hermes_cli.gateway as gateway_mod + from hermes_cli import gateway_windows + + monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda **_k: []) + monkeypatch.setattr(gateway_windows, "is_installed", lambda: True) + + token = cli_main._pause_windows_gateways_for_update() + + assert token == { + "resume_needed": True, + "profiles": {}, + "unmapped_pids": [], + "unmapped": [], + "cold_start_if_installed": True, + } + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_pause_returns_none_when_nothing_running_and_not_installed( + _winp, + monkeypatch, +): + """No gateway running + no autostart entry → no token (gateway-less user). + + Users who deliberately run without a gateway must not get one forced on + them by an update. + """ + import hermes_cli.gateway as gateway_mod + from hermes_cli import gateway_windows + + monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda **_k: []) + monkeypatch.setattr(gateway_windows, "is_installed", lambda: False) + + assert cli_main._pause_windows_gateways_for_update() is None + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_resume_cold_starts_gateway_when_token_requests_it( + _winp, + monkeypatch, + capsys, +): + """cold_start_if_installed token + nothing running → fresh detached spawn.""" + import hermes_cli.gateway as gateway_mod + from hermes_cli import gateway_windows + + monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda **_k: []) + spawned = [] + monkeypatch.setattr( + gateway_windows, + "_spawn_detached", + lambda: spawned.append(True) or 4242, + ) + + token = { + "resume_needed": True, + "profiles": {}, + "unmapped_pids": [], + "unmapped": [], + "cold_start_if_installed": True, + } + + cli_main._resume_windows_gateways_after_update(token) + + assert token["resume_needed"] is False + assert spawned == [True] + assert "Starting Windows gateway after update (PID 4242)" in capsys.readouterr().out + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_resume_cold_start_skips_when_gateway_already_running( + _winp, + monkeypatch, + capsys, +): + """Don't double-start: if a gateway came up between pause and resume + (e.g. the autostart entry fired), the cold-start must no-op.""" + import hermes_cli.gateway as gateway_mod + from hermes_cli import gateway_windows + + monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda **_k: [9001]) + spawned = [] + monkeypatch.setattr( + gateway_windows, + "_spawn_detached", + lambda: spawned.append(True) or 4242, + ) + + token = { + "resume_needed": True, + "profiles": {}, + "unmapped_pids": [], + "unmapped": [], + "cold_start_if_installed": True, + } + + cli_main._resume_windows_gateways_after_update(token) + + assert spawned == [] + assert "Starting Windows gateway after update" not in capsys.readouterr().out + + # --------------------------------------------------------------------------- # cmd_update integration — concurrent-instance gate # ---------------------------------------------------------------------------