diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 71f35c1953d..81d6951e71e 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -8030,6 +8030,182 @@ def _run_pre_update_backup(args) -> None: print() +def _write_update_planned_stop_marker(profile_path: Path, pid: int) -> bool: + """Write a planned-stop marker into a specific profile home.""" + try: + from datetime import timezone + + from gateway.status import _get_process_start_time + from utils import atomic_json_write + + record = { + "target_pid": pid, + "target_start_time": _get_process_start_time(pid), + "stopper_pid": os.getpid(), + "written_at": datetime.now(timezone.utc).isoformat(), + } + atomic_json_write( + Path(profile_path) / ".gateway-planned-stop.json", + record, + indent=None, + separators=(",", ":"), + ) + return True + except (OSError, PermissionError): + return False + + +def _wait_for_windows_update_gateway_exit( + pids: list[int], *, timeout: float +) -> set[int]: + """Wait for the given gateway PIDs to exit, returning survivors.""" + if not pids: + return set() + + from gateway.status import _pid_exists + + remaining = set(pids) + deadline = _time.monotonic() + max(timeout, 0.0) + while remaining and _time.monotonic() < deadline: + for pid in list(remaining): + try: + if not _pid_exists(pid): + remaining.discard(pid) + except Exception: + remaining.discard(pid) + if remaining: + _time.sleep(0.25) + + survivors: set[int] = set() + for pid in remaining: + try: + if _pid_exists(pid): + survivors.add(pid) + except Exception: + pass + return survivors + + +def _pause_windows_gateways_for_update() -> dict | None: + """Stop running Windows gateways before mutating the checkout or venv. + + Windows scheduled/startup gateways run through pythonw.exe, so the generic + hermes.exe concurrent-instance guard does not see them. They still import + from the checkout and can keep files locked while ``git`` or ``uv`` updates + the install. Stop only PIDs that the gateway discovery code identifies. + """ + if not _is_windows(): + return None + + try: + from gateway.status import terminate_pid + from hermes_cli.gateway import ( + _get_restart_drain_timeout, + find_gateway_pids, + find_profile_gateway_processes, + ) + except Exception as exc: + logger.debug("Could not prepare Windows gateway pause for update: %s", exc) + return None + + try: + running_pids = list(dict.fromkeys(find_gateway_pids(all_profiles=True))) + except Exception as exc: + logger.debug("Could not discover Windows gateway PIDs before update: %s", exc) + return None + if not running_pids: + return None + + profile_processes = {} + try: + profile_processes = { + proc.pid: proc for proc in find_profile_gateway_processes() + } + except Exception as exc: + logger.debug("Could not map Windows gateway PIDs to profiles: %s", exc) + + profiles: dict[str, int] = {} + mapped_pids = [] + for pid in running_pids: + proc = profile_processes.get(pid) + if proc is None: + continue + profiles[str(proc.profile)] = int(pid) + mapped_pids.append(int(pid)) + _write_update_planned_stop_marker(Path(proc.path), int(pid)) + + print("→ Stopping Windows gateway process(es) before updating Hermes...") + try: + drain_timeout = max(float(_get_restart_drain_timeout()), 1.0) + except Exception: + drain_timeout = 10.0 + survivors = _wait_for_windows_update_gateway_exit( + mapped_pids, + timeout=drain_timeout, + ) + unmapped_pids = [pid for pid in running_pids if pid not in profile_processes] + + force_killed = [] + for pid in sorted(set(survivors).union(unmapped_pids)): + try: + terminate_pid(int(pid), force=True) + force_killed.append(int(pid)) + except (ProcessLookupError, PermissionError, OSError): + pass + + if profiles: + print(f" ✓ Paused gateway profile(s): {', '.join(sorted(profiles))}") + if force_killed: + print(f" → Force-stopped {len(force_killed)} gateway process(es)") + + if unmapped_pids: + print( + f" → Stopped {len(unmapped_pids)} gateway process(es) without profile mapping" + ) + print(" Restart manually after update: hermes gateway run") + + return { + "resume_needed": True, + "profiles": profiles, + "unmapped_pids": unmapped_pids, + } + + +def _resume_windows_gateways_after_update(token: dict | None) -> None: + """Restart Windows profile gateways previously paused for update.""" + if not token or not token.get("resume_needed"): + return + token["resume_needed"] = False + if not _is_windows(): + return + + profiles = token.get("profiles") or {} + if not profiles: + return + + try: + from hermes_cli.gateway import launch_detached_profile_gateway_restart + except Exception as exc: + logger.debug("Could not load Windows gateway restart helper: %s", exc) + return + + relaunched = [] + for profile, old_pid in sorted(profiles.items()): + try: + if launch_detached_profile_gateway_restart(str(profile), int(old_pid)): + relaunched.append(str(profile)) + except Exception as exc: + logger.debug( + "Could not restart Windows gateway profile %s after update: %s", + profile, + exc, + ) + + if relaunched: + print() + print(f" ✓ Restarting Windows gateway profile(s): {', '.join(relaunched)}") + + def _discard_lockfile_churn(git_cmd, repo_root): """Restore tracked ``package-lock.json`` files that npm dirtied locally. @@ -8232,6 +8408,15 @@ def _cmd_update_impl(args, gateway_mode: bool): # always roll back to the exact state they had before this update. _run_pre_update_backup(args) + _windows_gateway_resume = _pause_windows_gateways_for_update() + if _windows_gateway_resume: + import atexit as _atexit + + _atexit.register( + _resume_windows_gateways_after_update, + _windows_gateway_resume, + ) + # Try git-based update first, fall back to ZIP download on Windows # when git file I/O is broken (antivirus, NTFS filter drivers, etc.) use_zip_update = False @@ -8294,7 +8479,10 @@ def _cmd_update_impl(args, gateway_mode: bool): if use_zip_update: # ZIP-based update for Windows when git is broken - _update_via_zip(args) + try: + _update_via_zip(args) + finally: + _resume_windows_gateways_after_update(_windows_gateway_resume) return # Fetch and pull @@ -8431,6 +8619,7 @@ def _cmd_update_impl(args, gateway_mode: bool): check=False, ) print("✓ Already up to date!") + _resume_windows_gateways_after_update(_windows_gateway_resume) return print(f"→ Found {commit_count} new commit(s)") @@ -9627,6 +9816,8 @@ def _cmd_update_impl(args, gateway_mode: bool): except Exception as e: logger.debug("Gateway restart during update failed: %s", e) + _resume_windows_gateways_after_update(_windows_gateway_resume) + # Warn if legacy Hermes gateway unit files are still installed. # When both hermes.service (from a pre-rename install) and the # current hermes-gateway.service are enabled, they SIGTERM-fight diff --git a/tests/hermes_cli/test_update_concurrent_quarantine.py b/tests/hermes_cli/test_update_concurrent_quarantine.py index fe14856fd7e..0ee3f938cf2 100644 --- a/tests/hermes_cli/test_update_concurrent_quarantine.py +++ b/tests/hermes_cli/test_update_concurrent_quarantine.py @@ -7,6 +7,7 @@ Windows-specific code paths can be exercised on any host. from __future__ import annotations +import json import os import sys import types @@ -446,6 +447,97 @@ def test_quarantine_actionable_warning_when_everything_fails( assert "Hermes Desktop" in captured or "gateway" in captured.lower() +# --------------------------------------------------------------------------- +# Windows gateway pause/resume before update mutation +# --------------------------------------------------------------------------- + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_pause_windows_gateways_for_update_stops_profile_and_unmapped_pids( + _winp, + monkeypatch, + tmp_path, + capsys, +): + import gateway.status as status_mod + import hermes_cli.gateway as gateway_mod + + profile_home = tmp_path / "profiles" / "work" + profile_home.mkdir(parents=True) + profile_proc = SimpleNamespace(profile="work", path=profile_home, pid=101) + + monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda **_k: [101, 202]) + monkeypatch.setattr( + gateway_mod, + "find_profile_gateway_processes", + lambda **_k: [profile_proc], + ) + monkeypatch.setattr(gateway_mod, "_get_restart_drain_timeout", lambda: 0.1) + waited_for = [] + + def fake_wait(pids, *, timeout): + waited_for.extend(pids) + return set() + + monkeypatch.setattr(cli_main, "_wait_for_windows_update_gateway_exit", fake_wait) + + terminated = [] + monkeypatch.setattr( + status_mod, + "terminate_pid", + lambda pid, force=False: terminated.append((pid, force)), + ) + + token = cli_main._pause_windows_gateways_for_update() + + assert token == { + "resume_needed": True, + "profiles": {"work": 101}, + "unmapped_pids": [202], + } + assert waited_for == [101] + assert terminated == [(202, True)] + + marker = json.loads((profile_home / ".gateway-planned-stop.json").read_text()) + assert marker["target_pid"] == 101 + assert marker["stopper_pid"] == os.getpid() + + captured = capsys.readouterr().out + assert "Paused gateway profile(s): work" in captured + assert "without profile mapping" in captured + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_resume_windows_gateways_after_update_relaunches_paused_profiles( + _winp, + monkeypatch, + capsys, +): + import hermes_cli.gateway as gateway_mod + + relaunched = [] + monkeypatch.setattr( + gateway_mod, + "launch_detached_profile_gateway_restart", + lambda profile, old_pid: relaunched.append((profile, old_pid)) or True, + ) + + token = { + "resume_needed": True, + "profiles": {"default": 101, "work": 202}, + "unmapped_pids": [], + } + + cli_main._resume_windows_gateways_after_update(token) + + assert token["resume_needed"] is False + assert relaunched == [("default", 101), ("work", 202)] + assert ( + "Restarting Windows gateway profile(s): default, work" + in capsys.readouterr().out + ) + + # --------------------------------------------------------------------------- # cmd_update integration — concurrent-instance gate # ---------------------------------------------------------------------------