mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 09:21:36 +00:00
fix(update): stop Windows gateways before mutating install
This commit is contained in:
parent
957a8ffa88
commit
78c11d99e3
2 changed files with 284 additions and 1 deletions
|
|
@ -8030,6 +8030,182 @@ def _run_pre_update_backup(args) -> None:
|
|||
print()
|
||||
|
||||
|
||||
def _write_update_planned_stop_marker(profile_path: Path, pid: int) -> bool:
|
||||
"""Write a planned-stop marker into a specific profile home."""
|
||||
try:
|
||||
from datetime import timezone
|
||||
|
||||
from gateway.status import _get_process_start_time
|
||||
from utils import atomic_json_write
|
||||
|
||||
record = {
|
||||
"target_pid": pid,
|
||||
"target_start_time": _get_process_start_time(pid),
|
||||
"stopper_pid": os.getpid(),
|
||||
"written_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
atomic_json_write(
|
||||
Path(profile_path) / ".gateway-planned-stop.json",
|
||||
record,
|
||||
indent=None,
|
||||
separators=(",", ":"),
|
||||
)
|
||||
return True
|
||||
except (OSError, PermissionError):
|
||||
return False
|
||||
|
||||
|
||||
def _wait_for_windows_update_gateway_exit(
|
||||
pids: list[int], *, timeout: float
|
||||
) -> set[int]:
|
||||
"""Wait for the given gateway PIDs to exit, returning survivors."""
|
||||
if not pids:
|
||||
return set()
|
||||
|
||||
from gateway.status import _pid_exists
|
||||
|
||||
remaining = set(pids)
|
||||
deadline = _time.monotonic() + max(timeout, 0.0)
|
||||
while remaining and _time.monotonic() < deadline:
|
||||
for pid in list(remaining):
|
||||
try:
|
||||
if not _pid_exists(pid):
|
||||
remaining.discard(pid)
|
||||
except Exception:
|
||||
remaining.discard(pid)
|
||||
if remaining:
|
||||
_time.sleep(0.25)
|
||||
|
||||
survivors: set[int] = set()
|
||||
for pid in remaining:
|
||||
try:
|
||||
if _pid_exists(pid):
|
||||
survivors.add(pid)
|
||||
except Exception:
|
||||
pass
|
||||
return survivors
|
||||
|
||||
|
||||
def _pause_windows_gateways_for_update() -> dict | None:
|
||||
"""Stop running Windows gateways before mutating the checkout or venv.
|
||||
|
||||
Windows scheduled/startup gateways run through pythonw.exe, so the generic
|
||||
hermes.exe concurrent-instance guard does not see them. They still import
|
||||
from the checkout and can keep files locked while ``git`` or ``uv`` updates
|
||||
the install. Stop only PIDs that the gateway discovery code identifies.
|
||||
"""
|
||||
if not _is_windows():
|
||||
return None
|
||||
|
||||
try:
|
||||
from gateway.status import terminate_pid
|
||||
from hermes_cli.gateway import (
|
||||
_get_restart_drain_timeout,
|
||||
find_gateway_pids,
|
||||
find_profile_gateway_processes,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Could not prepare Windows gateway pause for update: %s", exc)
|
||||
return None
|
||||
|
||||
try:
|
||||
running_pids = list(dict.fromkeys(find_gateway_pids(all_profiles=True)))
|
||||
except Exception as exc:
|
||||
logger.debug("Could not discover Windows gateway PIDs before update: %s", exc)
|
||||
return None
|
||||
if not running_pids:
|
||||
return None
|
||||
|
||||
profile_processes = {}
|
||||
try:
|
||||
profile_processes = {
|
||||
proc.pid: proc for proc in find_profile_gateway_processes()
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.debug("Could not map Windows gateway PIDs to profiles: %s", exc)
|
||||
|
||||
profiles: dict[str, int] = {}
|
||||
mapped_pids = []
|
||||
for pid in running_pids:
|
||||
proc = profile_processes.get(pid)
|
||||
if proc is None:
|
||||
continue
|
||||
profiles[str(proc.profile)] = int(pid)
|
||||
mapped_pids.append(int(pid))
|
||||
_write_update_planned_stop_marker(Path(proc.path), int(pid))
|
||||
|
||||
print("→ Stopping Windows gateway process(es) before updating Hermes...")
|
||||
try:
|
||||
drain_timeout = max(float(_get_restart_drain_timeout()), 1.0)
|
||||
except Exception:
|
||||
drain_timeout = 10.0
|
||||
survivors = _wait_for_windows_update_gateway_exit(
|
||||
mapped_pids,
|
||||
timeout=drain_timeout,
|
||||
)
|
||||
unmapped_pids = [pid for pid in running_pids if pid not in profile_processes]
|
||||
|
||||
force_killed = []
|
||||
for pid in sorted(set(survivors).union(unmapped_pids)):
|
||||
try:
|
||||
terminate_pid(int(pid), force=True)
|
||||
force_killed.append(int(pid))
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
pass
|
||||
|
||||
if profiles:
|
||||
print(f" ✓ Paused gateway profile(s): {', '.join(sorted(profiles))}")
|
||||
if force_killed:
|
||||
print(f" → Force-stopped {len(force_killed)} gateway process(es)")
|
||||
|
||||
if unmapped_pids:
|
||||
print(
|
||||
f" → Stopped {len(unmapped_pids)} gateway process(es) without profile mapping"
|
||||
)
|
||||
print(" Restart manually after update: hermes gateway run")
|
||||
|
||||
return {
|
||||
"resume_needed": True,
|
||||
"profiles": profiles,
|
||||
"unmapped_pids": unmapped_pids,
|
||||
}
|
||||
|
||||
|
||||
def _resume_windows_gateways_after_update(token: dict | None) -> None:
|
||||
"""Restart Windows profile gateways previously paused for update."""
|
||||
if not token or not token.get("resume_needed"):
|
||||
return
|
||||
token["resume_needed"] = False
|
||||
if not _is_windows():
|
||||
return
|
||||
|
||||
profiles = token.get("profiles") or {}
|
||||
if not profiles:
|
||||
return
|
||||
|
||||
try:
|
||||
from hermes_cli.gateway import launch_detached_profile_gateway_restart
|
||||
except Exception as exc:
|
||||
logger.debug("Could not load Windows gateway restart helper: %s", exc)
|
||||
return
|
||||
|
||||
relaunched = []
|
||||
for profile, old_pid in sorted(profiles.items()):
|
||||
try:
|
||||
if launch_detached_profile_gateway_restart(str(profile), int(old_pid)):
|
||||
relaunched.append(str(profile))
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Could not restart Windows gateway profile %s after update: %s",
|
||||
profile,
|
||||
exc,
|
||||
)
|
||||
|
||||
if relaunched:
|
||||
print()
|
||||
print(f" ✓ Restarting Windows gateway profile(s): {', '.join(relaunched)}")
|
||||
|
||||
|
||||
def _discard_lockfile_churn(git_cmd, repo_root):
|
||||
"""Restore tracked ``package-lock.json`` files that npm dirtied locally.
|
||||
|
||||
|
|
@ -8232,6 +8408,15 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
|||
# always roll back to the exact state they had before this update.
|
||||
_run_pre_update_backup(args)
|
||||
|
||||
_windows_gateway_resume = _pause_windows_gateways_for_update()
|
||||
if _windows_gateway_resume:
|
||||
import atexit as _atexit
|
||||
|
||||
_atexit.register(
|
||||
_resume_windows_gateways_after_update,
|
||||
_windows_gateway_resume,
|
||||
)
|
||||
|
||||
# Try git-based update first, fall back to ZIP download on Windows
|
||||
# when git file I/O is broken (antivirus, NTFS filter drivers, etc.)
|
||||
use_zip_update = False
|
||||
|
|
@ -8294,7 +8479,10 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
|||
|
||||
if use_zip_update:
|
||||
# ZIP-based update for Windows when git is broken
|
||||
_update_via_zip(args)
|
||||
try:
|
||||
_update_via_zip(args)
|
||||
finally:
|
||||
_resume_windows_gateways_after_update(_windows_gateway_resume)
|
||||
return
|
||||
|
||||
# Fetch and pull
|
||||
|
|
@ -8431,6 +8619,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
|||
check=False,
|
||||
)
|
||||
print("✓ Already up to date!")
|
||||
_resume_windows_gateways_after_update(_windows_gateway_resume)
|
||||
return
|
||||
|
||||
print(f"→ Found {commit_count} new commit(s)")
|
||||
|
|
@ -9627,6 +9816,8 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
|||
except Exception as e:
|
||||
logger.debug("Gateway restart during update failed: %s", e)
|
||||
|
||||
_resume_windows_gateways_after_update(_windows_gateway_resume)
|
||||
|
||||
# Warn if legacy Hermes gateway unit files are still installed.
|
||||
# When both hermes.service (from a pre-rename install) and the
|
||||
# current hermes-gateway.service are enabled, they SIGTERM-fight
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ Windows-specific code paths can be exercised on any host.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
|
|
@ -446,6 +447,97 @@ def test_quarantine_actionable_warning_when_everything_fails(
|
|||
assert "Hermes Desktop" in captured or "gateway" in captured.lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Windows gateway pause/resume before update mutation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@patch.object(cli_main, "_is_windows", return_value=True)
|
||||
def test_pause_windows_gateways_for_update_stops_profile_and_unmapped_pids(
|
||||
_winp,
|
||||
monkeypatch,
|
||||
tmp_path,
|
||||
capsys,
|
||||
):
|
||||
import gateway.status as status_mod
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
|
||||
profile_home = tmp_path / "profiles" / "work"
|
||||
profile_home.mkdir(parents=True)
|
||||
profile_proc = SimpleNamespace(profile="work", path=profile_home, pid=101)
|
||||
|
||||
monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda **_k: [101, 202])
|
||||
monkeypatch.setattr(
|
||||
gateway_mod,
|
||||
"find_profile_gateway_processes",
|
||||
lambda **_k: [profile_proc],
|
||||
)
|
||||
monkeypatch.setattr(gateway_mod, "_get_restart_drain_timeout", lambda: 0.1)
|
||||
waited_for = []
|
||||
|
||||
def fake_wait(pids, *, timeout):
|
||||
waited_for.extend(pids)
|
||||
return set()
|
||||
|
||||
monkeypatch.setattr(cli_main, "_wait_for_windows_update_gateway_exit", fake_wait)
|
||||
|
||||
terminated = []
|
||||
monkeypatch.setattr(
|
||||
status_mod,
|
||||
"terminate_pid",
|
||||
lambda pid, force=False: terminated.append((pid, force)),
|
||||
)
|
||||
|
||||
token = cli_main._pause_windows_gateways_for_update()
|
||||
|
||||
assert token == {
|
||||
"resume_needed": True,
|
||||
"profiles": {"work": 101},
|
||||
"unmapped_pids": [202],
|
||||
}
|
||||
assert waited_for == [101]
|
||||
assert terminated == [(202, True)]
|
||||
|
||||
marker = json.loads((profile_home / ".gateway-planned-stop.json").read_text())
|
||||
assert marker["target_pid"] == 101
|
||||
assert marker["stopper_pid"] == os.getpid()
|
||||
|
||||
captured = capsys.readouterr().out
|
||||
assert "Paused gateway profile(s): work" in captured
|
||||
assert "without profile mapping" in captured
|
||||
|
||||
|
||||
@patch.object(cli_main, "_is_windows", return_value=True)
|
||||
def test_resume_windows_gateways_after_update_relaunches_paused_profiles(
|
||||
_winp,
|
||||
monkeypatch,
|
||||
capsys,
|
||||
):
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
|
||||
relaunched = []
|
||||
monkeypatch.setattr(
|
||||
gateway_mod,
|
||||
"launch_detached_profile_gateway_restart",
|
||||
lambda profile, old_pid: relaunched.append((profile, old_pid)) or True,
|
||||
)
|
||||
|
||||
token = {
|
||||
"resume_needed": True,
|
||||
"profiles": {"default": 101, "work": 202},
|
||||
"unmapped_pids": [],
|
||||
}
|
||||
|
||||
cli_main._resume_windows_gateways_after_update(token)
|
||||
|
||||
assert token["resume_needed"] is False
|
||||
assert relaunched == [("default", 101), ("work", 202)]
|
||||
assert (
|
||||
"Restarting Windows gateway profile(s): default, work"
|
||||
in capsys.readouterr().out
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_update integration — concurrent-instance gate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue