From 2a7308b7c4c4e76648211d5953395bd8ecd0ad64 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 19 May 2026 11:10:51 -0700 Subject: [PATCH] fix(update): quarantine hermes.exe vs concurrent Windows instance (#26670) (#26677) * fix(update): detect concurrent hermes.exe on Windows; retry + restart-defer quarantine Closes #26670. When 'hermes update' runs on Windows with another hermes.exe alive (most commonly the Hermes Desktop Electron app's spawned backend) _quarantine_running_hermes_exe() fails to rename the venv shim with [WinError 32]. uv pip install -e . then exits 2, the git-pull fast path is silently abandoned, and the ZIP fallback runs (and fails the same way) before eventually succeeding. This change implements three of the five proposed fixes from the issue: 1. Concurrent-instance detection (preferred fix). _detect_concurrent_hermes_instances() uses psutil to enumerate processes whose .exe is one of our venv shims (hermes.exe / hermes-gateway.exe), excluding the caller's PID. When any match exists, cmd_update prints an actionable message naming the blocking PIDs and exits 2 BEFORE any destructive work. New --force flag bypasses the gate. 2. Retry + restart-deferred fallback. _quarantine_running_hermes_exe() now retries the rename up to 4 times with 100/250/500/1000 ms backoff (covers the transient AV-scanner-handle case). If all retries fail, it schedules the replacement via MoveFileExW with the OS deferred-rename flag so the new shim can land at the original path and the update completes; the old image is fully unloaded after the user's next system restart. 3. Actionable warning text. The old 'Could not quarantine: [WinError 32]' warning is replaced with one that names the likely culprits (Hermes Desktop, REPLs, gateway, AV) and points to the new --force flag. Tests: - 13 new tests in tests/hermes_cli/test_update_concurrent_quarantine.py covering: psutil-based enumeration, self-pid exclusion, case-insensitive matching of .EXE, no-psutil graceful degradation, off-Windows no-op, helpful warning formatting, retry-then-succeed, restart-deferred fallback, cmd_update abort + exit code 2, and --force bypass. - New autouse fixture in tests/hermes_cli/conftest.py defaults _detect_concurrent_hermes_instances to [] so the rest of the suite isn't tripped by the developer's own running hermes.exe. Opt-out marker 'real_concurrent_gate' registered in pyproject.toml. - Updating docs page (website/docs/getting-started/updating.md) gains a short section explaining the new Windows error and remediation. * chore: refresh uv.lock to match pyproject.toml exact pins aiohttp 3.13.4 -> 3.13.3 (matches pyproject pin: aiohttp==3.13.3) anthropic 0.87.0 -> 0.86.0 (matches pyproject pin: anthropic==0.86.0) hermes-agent 0.13.0 -> 0.14.0 (matches pyproject version) CI's uv lock --check was failing on the merged state because main drifted: pyproject.toml uses exact == pins for those two deps and the hermes-agent version was bumped to 0.14.0 but the lockfile still had 0.13.0. --- hermes_cli/main.py | 224 +++++++++++- pyproject.toml | 1 + tests/hermes_cli/conftest.py | 27 ++ .../test_update_concurrent_quarantine.py | 328 ++++++++++++++++++ website/docs/getting-started/updating.md | 20 ++ 5 files changed, 591 insertions(+), 9 deletions(-) create mode 100644 tests/hermes_cli/test_update_concurrent_quarantine.py diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 4ae845d2e53..8f24360aadd 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -7199,7 +7199,95 @@ def _hermes_exe_shims(scripts_dir: Path) -> list[Path]: ] -def _quarantine_running_hermes_exe(scripts_dir: Path) -> list[tuple[Path, Path]]: +def _detect_concurrent_hermes_instances( + scripts_dir: Path, *, exclude_pid: int | None = None +) -> list[tuple[int, str]]: + """Find other live processes whose .exe is one of our entry-point shims. + + Windows blocks DELETE/REPLACE on a running .exe — and even RENAME on the + same .exe when another process opened it without ``FILE_SHARE_DELETE``. + The Hermes Desktop Electron app spawns ``hermes.EXE`` as a backend child, + so during ``hermes update`` the user-invoked process and the desktop's + child both hold the same file. The quarantine rename then fails with + ``[WinError 32]`` and uv inherits the lock. + + This helper enumerates processes whose ``exe`` matches one of the venv's + shims (``hermes.exe`` / ``hermes-gateway.exe``) and returns ``(pid, + process_name)`` pairs. The caller's own PID is excluded so the running + ``hermes update`` invocation never reports itself. + + Returns an empty list off-Windows, on missing psutil, or when no other + instances exist. Never raises — process enumeration is best-effort. + """ + if not _is_windows(): + return [] + + try: + import psutil + except Exception: + return [] + + if exclude_pid is None: + exclude_pid = os.getpid() + + # Resolve every shim path to its canonical form once for cheap comparison. + shim_paths: set[str] = set() + for shim in _hermes_exe_shims(scripts_dir): + try: + shim_paths.add(str(shim.resolve()).lower()) + except OSError: + shim_paths.add(str(shim).lower()) + if not shim_paths: + return [] + + matches: list[tuple[int, str]] = [] + try: + proc_iter = psutil.process_iter(["pid", "exe", "name"]) + except Exception: + return [] + + for proc in proc_iter: + try: + info = proc.info + except Exception: + continue + pid = info.get("pid") + exe = info.get("exe") + if not exe or pid is None or pid == exclude_pid: + continue + try: + exe_norm = str(Path(exe).resolve()).lower() + except (OSError, ValueError): + exe_norm = str(exe).lower() + if exe_norm in shim_paths: + name = info.get("name") or Path(exe).name + matches.append((int(pid), str(name))) + + return matches + + +def _format_concurrent_instances_message( + matches: list[tuple[int, str]], scripts_dir: Path +) -> str: + """Build a human-readable explanation + remediation hint for the user.""" + shim = scripts_dir / "hermes.exe" + lines = ["✗ Another hermes.exe is running:"] + for pid, name in matches: + lines.append(f" PID {pid} {name}") + lines.append("") + lines.append(f" Updating now would fail to overwrite {shim} because") + lines.append(" Windows blocks REPLACE on a running executable.") + lines.append("") + lines.append(" Close Hermes Desktop, exit any open `hermes` REPLs, and") + lines.append(" stop the gateway (`hermes gateway stop`) before retrying.") + lines.append(" Override with `hermes update --force` if you've already") + lines.append(" confirmed those processes will not write to the venv.") + return "\n".join(lines) + + +def _quarantine_running_hermes_exe( + scripts_dir: Path, *, max_attempts: int = 4 +) -> list[tuple[Path, Path]]: """Pre-empt Windows file lock on the running ``hermes.exe``. Windows allows RENAMING a mapped/running executable (the kernel tracks the @@ -7212,29 +7300,129 @@ def _quarantine_running_hermes_exe(scripts_dir: Path) -> list[tuple[Path, Path]] fresh shims at the original paths. The ``.old`` files are cleaned up on the next hermes invocation by ``_cleanup_quarantined_exes``. + Rename can still fail when *another* process has opened the .exe without + ``FILE_SHARE_DELETE`` — typically AV real-time scanners with transient + handles (recovers in <1s), or the Hermes Desktop backend child process + (won't recover until the user closes it). We mitigate: + + 1. Retry up to ``max_attempts`` times with exponential backoff + (100/250/500/1000 ms). Handles the AV-scanner case. + 2. If all retries fail, schedule the .exe for replacement on next + reboot via ``MoveFileExW(MOVEFILE_DELAY_UNTIL_REBOOT)``. This still + lets uv create a fresh shim at the original path (Windows will keep + the old file's content under a new name until the reboot), so the + update can complete; the user just needs to reboot to fully unload + the stale image. + 3. Print a clear warning naming the most likely culprit (running + Hermes Desktop / gateway / REPL) and pointing to ``--force``. + Returns the list of (original, quarantined) pairs so the caller can roll - back if the install itself fails before uv writes a replacement. + back if the install itself fails before uv writes a replacement. Pairs + where we used ``MOVEFILE_DELAY_UNTIL_REBOOT`` are NOT returned — they + are already deferred and roll-back is meaningless. """ moved: list[tuple[Path, Path]] = [] if not _is_windows(): return moved import time + stamp = int(time.time() * 1000) + # Backoff schedule: first attempt is immediate, subsequent ones sleep. + # 100ms / 250ms / 500ms covers the typical AV scanner re-scan window. + backoff_ms = [0, 100, 250, 500, 1000] + attempts = max(1, min(max_attempts, len(backoff_ms))) + for shim in _hermes_exe_shims(scripts_dir): if not shim.exists(): continue target = shim.with_suffix(shim.suffix + f".old.{stamp}") - try: - shim.rename(target) - moved.append((shim, target)) - except OSError as e: - # Best-effort: keep going. uv's failure later will surface the - # real error; this is a heuristic, not a hard guarantee. - print(f" ⚠ Could not quarantine {shim.name}: {e}") + + last_exc: OSError | None = None + for attempt in range(attempts): + delay = backoff_ms[attempt] / 1000.0 + if delay: + time.sleep(delay) + try: + shim.rename(target) + moved.append((shim, target)) + last_exc = None + break + except OSError as e: + last_exc = e + continue + + if last_exc is None: + continue + + # All in-process renames failed. Try MoveFileEx with + # MOVEFILE_DELAY_UNTIL_REBOOT as a last resort. This succeeds in the + # exact case where the inline rename failed (another process holds + # the handle without share-delete), at the cost of requiring a + # reboot to fully reclaim the old .exe. + scheduled = _schedule_replace_on_reboot(shim, target) + if scheduled: + print( + f" ⚠ {shim.name} is locked by another process; scheduled " + f"replacement on next reboot." + ) + print( + " The new shim was written at the same path, but a " + "reboot is needed to fully unload the old one." + ) + # Do NOT append to ``moved``: we don't want roll-back to undo a + # reboot-deferred operation. + continue + + # Truly couldn't budge the .exe. Print an actionable warning and let + # uv try its luck — sometimes uv's own retry handling pulls through. + print( + f" ⚠ Could not quarantine {shim.name} ({last_exc.__class__.__name__}: " + f"another process is holding it open)." + ) + print( + " Close Hermes Desktop, exit other `hermes` REPLs, stop the " + "gateway, or pause AV scanning, then re-run `hermes update`." + ) + return moved +def _schedule_replace_on_reboot(shim: Path, quarantine_target: Path) -> bool: + """Schedule ``shim`` -> ``quarantine_target`` via PendingFileRenameOperations. + + Uses Win32 ``MoveFileExW`` with ``MOVEFILE_REPLACE_EXISTING | + MOVEFILE_DELAY_UNTIL_REBOOT``. The OS persists the rename in + ``HKLM\\System\\CurrentControlSet\\Control\\Session Manager\\ + PendingFileRenameOperations`` and applies it before any user-mode code + runs on next boot — at which point no process can hold the .exe. + + Returns ``True`` if the schedule call succeeded, ``False`` otherwise + (non-Windows, ctypes failure, lack of privilege, etc.). Never raises. + """ + if not _is_windows(): + return False + try: + import ctypes + from ctypes import wintypes + + MOVEFILE_REPLACE_EXISTING = 0x1 + MOVEFILE_DELAY_UNTIL_REBOOT = 0x4 + + MoveFileExW = ctypes.windll.kernel32.MoveFileExW + MoveFileExW.argtypes = [wintypes.LPCWSTR, wintypes.LPCWSTR, wintypes.DWORD] + MoveFileExW.restype = wintypes.BOOL + + ok = MoveFileExW( + str(shim), + str(quarantine_target), + MOVEFILE_REPLACE_EXISTING | MOVEFILE_DELAY_UNTIL_REBOOT, + ) + return bool(ok) + except Exception: + return False + + def _restore_quarantined_exes(moved: list[tuple[Path, Path]]) -> None: """Roll back ``_quarantine_running_hermes_exe`` if uv didn't write replacements.""" for original, quarantined in moved: @@ -8020,6 +8208,18 @@ def _cmd_update_impl(args, gateway_mode: bool): print("⚕ Updating Hermes Agent...") print() + # On Windows, abort early if another hermes.exe is holding the venv shim + # open. Continuing would result in a string of WinError 32 warnings and + # then either a deferred-rename leftover or a failed git-pull fast path + # that silently falls back to the slower ZIP route. See issue #26670. + if _is_windows() and not getattr(args, "force", False): + scripts_dir = _venv_scripts_dir() + if scripts_dir is not None: + concurrent = _detect_concurrent_hermes_instances(scripts_dir) + if concurrent: + print(_format_concurrent_instances_message(concurrent, scripts_dir)) + sys.exit(2) + # Pre-update backup — runs before any git/file mutation so users can # always roll back to the exact state they had before this update. _run_pre_update_backup(args) @@ -12351,6 +12551,12 @@ Examples: default=False, help="Assume yes for interactive prompts (config migration, stash restore). API-key entry is skipped; run 'hermes config migrate' separately for those.", ) + update_parser.add_argument( + "--force", + action="store_true", + default=False, + help="Windows: proceed with the update even when another hermes.exe is detected. The concurrent process will likely cause WinError 32 warnings and may leave a reboot-deferred .exe replacement.", + ) update_parser.set_defaults(func=cmd_update) # ========================================================================= diff --git a/pyproject.toml b/pyproject.toml index 4580b4003a6..d9b0363db32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -226,6 +226,7 @@ include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gat testpaths = ["tests"] markers = [ "integration: marks tests requiring external services (API keys, Modal, etc.)", + "real_concurrent_gate: opt out of the autouse stub that disables _detect_concurrent_hermes_instances", ] addopts = "-m 'not integration' -n auto" diff --git a/tests/hermes_cli/conftest.py b/tests/hermes_cli/conftest.py index 531f033e7e0..3eee1b2f32f 100644 --- a/tests/hermes_cli/conftest.py +++ b/tests/hermes_cli/conftest.py @@ -17,3 +17,30 @@ def all_assignees_spawnable(monkeypatch): """ from hermes_cli import profiles monkeypatch.setattr(profiles, "profile_exists", lambda name: True) + + +@pytest.fixture(autouse=True) +def _suppress_concurrent_hermes_gate(request, monkeypatch): + """Default ``_detect_concurrent_hermes_instances`` to ``[]`` for every test. + + The Windows update path now refuses to proceed when another + ``hermes.exe`` is detected (issue #26670). On a developer's Windows + machine running the test suite via ``hermes`` itself, this would + flag the running agent as a concurrent instance and abort every + ``cmd_update`` test. Tests that want to exercise the gate explicitly + re-patch ``_detect_concurrent_hermes_instances`` with their own + return value — autouse here gives a clean default without touching + the rest of the suite. + + Tests that need to call the REAL function (e.g. unit tests for the + helper itself) opt out with ``@pytest.mark.real_concurrent_gate``. + """ + if request.node.get_closest_marker("real_concurrent_gate"): + return + try: + from hermes_cli import main as _cli_main + except Exception: + return + monkeypatch.setattr( + _cli_main, "_detect_concurrent_hermes_instances", lambda *_a, **_k: [] + ) diff --git a/tests/hermes_cli/test_update_concurrent_quarantine.py b/tests/hermes_cli/test_update_concurrent_quarantine.py new file mode 100644 index 00000000000..dbf1f3ee5f8 --- /dev/null +++ b/tests/hermes_cli/test_update_concurrent_quarantine.py @@ -0,0 +1,328 @@ +"""Tests for issue #26670 — concurrent hermes.exe detection and improved +quarantine retry / reboot-deferred fallback during `hermes update` on Windows. + +These tests force ``_is_windows`` to return ``True`` via patching so the +Windows-specific code paths can be exercised on any host. +""" + +from __future__ import annotations + +import os +import sys +import types +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from hermes_cli import main as cli_main + + +# Tests in this module either exercise the REAL _detect_concurrent_hermes_instances +# helper (and need the autouse stub in tests/hermes_cli/conftest.py disabled), +# or supply their own explicit return value via patch.object. Mark the whole +# module so the conftest fixture skips its default stub. +pytestmark = pytest.mark.real_concurrent_gate + + +# --------------------------------------------------------------------------- +# _detect_concurrent_hermes_instances +# --------------------------------------------------------------------------- + + +def _make_proc(pid: int, exe: str, name: str = "hermes.exe"): + """Build a duck-typed psutil Process stand-in with the .info dict.""" + proc = MagicMock() + proc.info = {"pid": pid, "exe": exe, "name": name} + return proc + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_detect_concurrent_returns_empty_when_no_other_processes(_winp, tmp_path): + scripts_dir = tmp_path + (scripts_dir / "hermes.exe").write_bytes(b"") + (scripts_dir / "hermes-gateway.exe").write_bytes(b"") + + fake_psutil = types.SimpleNamespace(process_iter=lambda attrs: iter([])) + with patch.dict(sys.modules, {"psutil": fake_psutil}): + result = cli_main._detect_concurrent_hermes_instances(scripts_dir) + + assert result == [] + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_detect_concurrent_excludes_self_pid(_winp, tmp_path): + scripts_dir = tmp_path + shim = scripts_dir / "hermes.exe" + shim.write_bytes(b"") + my_pid = os.getpid() + + procs = [_make_proc(my_pid, str(shim), "hermes.exe")] + fake_psutil = types.SimpleNamespace(process_iter=lambda attrs: iter(procs)) + with patch.dict(sys.modules, {"psutil": fake_psutil}): + result = cli_main._detect_concurrent_hermes_instances(scripts_dir) + + assert result == [] + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_detect_concurrent_finds_other_hermes_process(_winp, tmp_path): + scripts_dir = tmp_path + shim = scripts_dir / "hermes.exe" + shim.write_bytes(b"") + + other_pid = os.getpid() + 1 + procs = [ + _make_proc(other_pid, str(shim), "hermes.exe"), + _make_proc(os.getpid() + 2, r"C:\\Windows\\System32\\notepad.exe", "notepad.exe"), + ] + fake_psutil = types.SimpleNamespace(process_iter=lambda attrs: iter(procs)) + with patch.dict(sys.modules, {"psutil": fake_psutil}): + result = cli_main._detect_concurrent_hermes_instances(scripts_dir) + + assert result == [(other_pid, "hermes.exe")] + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_detect_concurrent_matches_case_insensitively(_winp, tmp_path): + scripts_dir = tmp_path + shim = scripts_dir / "hermes.exe" + shim.write_bytes(b"") + + # Simulate the desktop spawning hermes.EXE (uppercase ext) from same path + upper = str(shim).replace("hermes.exe", "HERMES.EXE") + procs = [_make_proc(9999, upper, "HERMES.EXE")] + fake_psutil = types.SimpleNamespace(process_iter=lambda attrs: iter(procs)) + with patch.dict(sys.modules, {"psutil": fake_psutil}): + result = cli_main._detect_concurrent_hermes_instances(scripts_dir) + + assert result == [(9999, "HERMES.EXE")] + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_detect_concurrent_no_psutil_returns_empty(_winp, tmp_path): + scripts_dir = tmp_path + (scripts_dir / "hermes.exe").write_bytes(b"") + + # Block psutil import — simulate environment without it. + with patch.dict(sys.modules, {"psutil": None}): + result = cli_main._detect_concurrent_hermes_instances(scripts_dir) + + assert result == [] + + +@patch.object(cli_main, "_is_windows", return_value=False) +def test_detect_concurrent_is_noop_off_windows(_winp, tmp_path): + """No process enumeration off-Windows; the file-lock issue is Windows-only.""" + assert cli_main._detect_concurrent_hermes_instances(tmp_path) == [] + + +# --------------------------------------------------------------------------- +# _format_concurrent_instances_message +# --------------------------------------------------------------------------- + + +def test_format_message_mentions_pids_and_remediation(tmp_path): + matches = [(1234, "hermes.exe"), (5678, "hermes.exe")] + msg = cli_main._format_concurrent_instances_message(matches, tmp_path) + + assert "1234" in msg + assert "5678" in msg + assert "hermes.exe" in msg + assert "Hermes Desktop" in msg + assert "--force" in msg + # Mentions the file that would have been overwritten + assert str(tmp_path / "hermes.exe") in msg + + +# --------------------------------------------------------------------------- +# _quarantine_running_hermes_exe — retry + reboot-deferred fallback +# --------------------------------------------------------------------------- + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_quarantine_succeeds_first_attempt(_winp, tmp_path): + """When the rename works immediately, no warning, single rename pair returned.""" + shim = tmp_path / "hermes.exe" + shim.write_bytes(b"old") + + pairs = cli_main._quarantine_running_hermes_exe(tmp_path) + + assert len(pairs) == 1 + orig, quarantine = pairs[0] + assert orig == shim + assert quarantine.name.startswith("hermes.exe.old.") + assert quarantine.exists() + assert not shim.exists() + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_quarantine_retries_then_succeeds(_winp, tmp_path, monkeypatch): + """A transient OSError on the first attempt should not be fatal.""" + shim = tmp_path / "hermes.exe" + shim.write_bytes(b"old") + + original_rename = Path.rename + call_count = {"n": 0} + + def flaky_rename(self, target): + call_count["n"] += 1 + if call_count["n"] == 1: + raise OSError(32, "share violation (simulated AV scan)") + return original_rename(self, target) + + # Speed up the test: avoid actual sleeps in the backoff schedule. + monkeypatch.setattr(cli_main, "_hermes_exe_shims", lambda d: [shim]) + with patch.object(Path, "rename", flaky_rename), patch( + "time.sleep", lambda *_a, **_k: None + ): + pairs = cli_main._quarantine_running_hermes_exe(tmp_path) + + assert call_count["n"] >= 2 + assert len(pairs) == 1 + assert not shim.exists() + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_quarantine_falls_back_to_reboot_schedule(_winp, tmp_path, capsys, monkeypatch): + """When every retry fails, we schedule via MoveFileEx and warn helpfully.""" + shim = tmp_path / "hermes.exe" + shim.write_bytes(b"locked") + + def always_fails(self, target): + raise OSError(32, "The process cannot access the file (simulated lock)") + + scheduled_calls: list[tuple[Path, Path]] = [] + + def fake_schedule(s: Path, q: Path) -> bool: + scheduled_calls.append((s, q)) + return True + + monkeypatch.setattr(cli_main, "_hermes_exe_shims", lambda d: [shim]) + with patch.object(Path, "rename", always_fails), patch.object( + cli_main, "_schedule_replace_on_reboot", fake_schedule + ), patch("time.sleep", lambda *_a, **_k: None): + pairs = cli_main._quarantine_running_hermes_exe(tmp_path) + + captured = capsys.readouterr().out + + # The reboot-deferred path was used. + assert scheduled_calls and scheduled_calls[0][0] == shim + # It is NOT added to the returned roll-back list (the issue calls this + # out — don't undo a deferred operation). + assert pairs == [] + # The user got a clear message, not raw [WinError 32]. + assert "scheduled" in captured.lower() + assert "reboot" in captured.lower() + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_quarantine_actionable_warning_when_everything_fails( + _winp, tmp_path, capsys, monkeypatch +): + """When even MoveFileEx fails we should print remediation hints, not a bare error.""" + shim = tmp_path / "hermes.exe" + shim.write_bytes(b"locked") + + def always_fails(self, target): + raise OSError(32, "share violation") + + monkeypatch.setattr(cli_main, "_hermes_exe_shims", lambda d: [shim]) + with patch.object(Path, "rename", always_fails), patch.object( + cli_main, "_schedule_replace_on_reboot", lambda *_a, **_k: False + ), patch("time.sleep", lambda *_a, **_k: None): + pairs = cli_main._quarantine_running_hermes_exe(tmp_path) + + captured = capsys.readouterr().out + assert pairs == [] + # New message format: no raw "[WinError 32]" dump; instead names the cause + # and tells the user what to do. + assert "another process" in captured.lower() + assert "Hermes Desktop" in captured or "gateway" in captured.lower() + + +# --------------------------------------------------------------------------- +# cmd_update integration — concurrent-instance gate +# --------------------------------------------------------------------------- + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_cmd_update_aborts_on_concurrent_instance(_winp, tmp_path, capsys): + """If another hermes.exe is running, the update bails out before + touching the working tree (exit code 2).""" + scripts_dir = tmp_path / "Scripts" + scripts_dir.mkdir() + + args = SimpleNamespace( + check=False, + gateway=False, + yes=False, + force=False, + backup=False, + no_backup=True, + ) + + with patch.object( + cli_main, "_venv_scripts_dir", return_value=scripts_dir + ), patch.object( + cli_main, + "_detect_concurrent_hermes_instances", + return_value=[(4242, "hermes.exe")], + ), patch.object( + cli_main, "_run_pre_update_backup" + ) as mock_backup, patch.object( + cli_main, "_install_hangup_protection", return_value={} + ), patch.object( + cli_main, "_finalize_update_output" + ): + with pytest.raises(SystemExit) as excinfo: + cli_main.cmd_update(args) + + assert excinfo.value.code == 2 + # The pre-update backup runs AFTER the concurrent check; should not have + # been invoked. + mock_backup.assert_not_called() + + captured = capsys.readouterr().out + assert "4242" in captured + assert "--force" in captured + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_cmd_update_force_bypasses_concurrent_check(_winp, tmp_path): + """--force lets the update proceed past the concurrent-instance gate + (subsequent steps are mocked so we only verify the gate is skipped).""" + scripts_dir = tmp_path / "Scripts" + scripts_dir.mkdir() + + args = SimpleNamespace( + check=False, + gateway=False, + yes=False, + force=True, # ← the bypass + backup=False, + no_backup=True, + ) + + detect = MagicMock(return_value=[(9, "hermes.exe")]) + + # Short-circuit out of _cmd_update_impl via a sentinel raise immediately + # AFTER the gate. _run_pre_update_backup is the first call after the gate. + sentinel = RuntimeError("reached post-gate body") + with patch.object( + cli_main, "_venv_scripts_dir", return_value=scripts_dir + ), patch.object( + cli_main, "_detect_concurrent_hermes_instances", detect + ), patch.object( + cli_main, "_run_pre_update_backup", side_effect=sentinel + ), patch.object( + cli_main, "_install_hangup_protection", return_value={} + ), patch.object( + cli_main, "_finalize_update_output" + ): + with pytest.raises(RuntimeError, match="reached post-gate body"): + cli_main.cmd_update(args) + + # When --force is set, we should not have even consulted psutil. + detect.assert_not_called() diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md index d4ced41a4d7..4a6c9b4ba92 100644 --- a/website/docs/getting-started/updating.md +++ b/website/docs/getting-started/updating.md @@ -69,6 +69,26 @@ updates: `--backup` was the always-on behavior in earlier builds, but it was adding minutes to every update on large homes, so it's now opt-in. The lightweight pairing-data snapshot above still runs unconditionally. +### Windows: another `hermes.exe` is running + +On Windows, `hermes update` will refuse to run if it detects another `hermes.exe` process holding the venv's entry-point executable open — most commonly the Hermes Desktop app's spawned backend, an open `hermes` REPL in another terminal, or a running gateway: + +``` +$ hermes update +✗ Another hermes.exe is running: + PID 12345 hermes.exe + + Updating now would fail to overwrite ...\venv\Scripts\hermes.exe because + Windows blocks REPLACE on a running executable. + + Close Hermes Desktop, exit any open `hermes` REPLs, and + stop the gateway (`hermes gateway stop`) before retrying. + Override with `hermes update --force` if you've already + confirmed those processes will not write to the venv. +``` + +Close the listed processes and re-run. If you're sure the concurrent process won't interfere (rare — usually only useful when an antivirus shim is mis-attributed), pass `--force` to skip the check. In that case the updater will still retry the `.exe` rename with exponential backoff and, on stubborn locks, schedule the replacement for next reboot via `MoveFileEx(MOVEFILE_DELAY_UNTIL_REBOOT)` so the update can complete. + Expected output looks like: ```