mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(gateway): cold-start installed Windows gateway after update when none was running (#50804)
The post-update gateway resume path (`_resume_windows_gateways_after_update`) only relaunched gateways that were *running* when the update began — it enumerates live PIDs in `_pause_windows_gateways_for_update` and respawns exactly those. A gateway that had already died between updates (e.g. it was launched attached to a terminal/TUI that later closed, taking the child with it) was never brought back: the Startup-folder / Scheduled-Task autostart entry only fires on the next login, not after an in-place update. So a Desktop-GUI update (which runs `hermes update --yes --gateway`) on a box whose gateway had quietly died would complete with no gateway running, and the user had no indication anything should have come up. Fix: when no gateway is running at pause time but an autostart entry is installed (`gateway_windows.is_installed()` — an explicit "I want a gateway" signal), return a `cold_start_if_installed` token. The resume step then does a fresh detached spawn via `gateway_windows._spawn_detached()` — the same windowless `pythonw` + `CREATE_BREAKAWAY_FROM_JOB` path `hermes gateway start` uses. It re-checks liveness immediately before spawning so a concurrent start (autostart entry firing) can't produce a duplicate. Gateway-less users (no autostart entry) get nothing forced on them — the pause step still returns None for them. POSIX is unaffected: enabled systemd units already restart via `Restart=always`. Windows-only; best-effort throughout (logs at debug and no-ops on any error). Tests: pause returns the cold-start token only when installed, returns None when not installed, resume cold-starts on the token, and resume skips the cold-start when a gateway is already running.
This commit is contained in:
parent
da498ed99b
commit
ef6492b648
2 changed files with 187 additions and 0 deletions
|
|
@ -8431,6 +8431,31 @@ def _pause_windows_gateways_for_update() -> dict | None:
|
|||
logger.debug("Could not discover Windows gateway PIDs before update: %s", exc)
|
||||
return None
|
||||
if not running_pids:
|
||||
# No gateway is running right now, but the user may have installed an
|
||||
# autostart entry (Scheduled Task or Startup-folder login item) — that
|
||||
# is an explicit "I want a gateway" signal. A gateway that died between
|
||||
# updates (e.g. the spawning terminal/TUI closed, taking its child with
|
||||
# it) would otherwise never come back: the autostart entry only fires on
|
||||
# the next login, and the update flow's resume path only relaunched
|
||||
# gateways that were running when the update began. Cold-start one after
|
||||
# the update so an installed gateway is actually up post-update. Users
|
||||
# who run gateway-less (no autostart entry) get nothing forced on them.
|
||||
try:
|
||||
from hermes_cli import gateway_windows
|
||||
|
||||
if gateway_windows.is_installed():
|
||||
return {
|
||||
"resume_needed": True,
|
||||
"profiles": {},
|
||||
"unmapped_pids": [],
|
||||
"unmapped": [],
|
||||
"cold_start_if_installed": True,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Could not check Windows gateway autostart state before update: %s",
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
profile_processes = {}
|
||||
|
|
@ -8508,6 +8533,51 @@ def _pause_windows_gateways_for_update() -> dict | None:
|
|||
}
|
||||
|
||||
|
||||
def _cold_start_windows_gateway_after_update() -> None:
|
||||
"""Start a fresh detached gateway after update when one is installed but down.
|
||||
|
||||
Invoked from ``_resume_windows_gateways_after_update`` for the
|
||||
``cold_start_if_installed`` case: no gateway was running when the update
|
||||
began, but an autostart entry (Scheduled Task / Startup-folder login item)
|
||||
is installed, signalling the user wants a gateway. Unlike the relaunch
|
||||
paths — which watch an old PID and respawn once it exits — this is a direct
|
||||
fresh spawn via the same windowless ``pythonw`` + breakaway path that
|
||||
``hermes gateway start`` uses (``gateway_windows._spawn_detached``).
|
||||
|
||||
Best-effort and idempotent: re-checks that nothing is running first so a
|
||||
concurrent start (e.g. the autostart entry firing) can't produce a
|
||||
duplicate gateway.
|
||||
"""
|
||||
if not _is_windows():
|
||||
return
|
||||
try:
|
||||
from hermes_cli import gateway_windows
|
||||
from hermes_cli.gateway import find_gateway_pids
|
||||
except Exception as exc:
|
||||
logger.debug("Could not load Windows gateway cold-start helpers: %s", exc)
|
||||
return
|
||||
|
||||
# Re-check liveness right before spawning — between pause and resume the
|
||||
# autostart entry may have already brought a gateway up, or a leftover
|
||||
# process may have re-registered. Don't double-start.
|
||||
try:
|
||||
if list(find_gateway_pids(all_profiles=True)):
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.debug("Could not re-check gateway liveness before cold-start: %s", exc)
|
||||
return
|
||||
|
||||
try:
|
||||
pid = gateway_windows._spawn_detached()
|
||||
except Exception as exc:
|
||||
logger.debug("Could not cold-start Windows gateway after update: %s", exc)
|
||||
return
|
||||
|
||||
if pid:
|
||||
print()
|
||||
print(f" ✓ Starting Windows gateway after update (PID {pid})")
|
||||
|
||||
|
||||
def _resume_windows_gateways_after_update(token: dict | None) -> None:
|
||||
"""Restart Windows profile gateways previously paused for update."""
|
||||
if not token or not token.get("resume_needed"):
|
||||
|
|
@ -8518,7 +8588,10 @@ def _resume_windows_gateways_after_update(token: dict | None) -> None:
|
|||
|
||||
profiles = token.get("profiles") or {}
|
||||
unmapped = token.get("unmapped") or []
|
||||
cold_start = bool(token.get("cold_start_if_installed"))
|
||||
if not profiles and not any(u.get("argv") for u in unmapped):
|
||||
if cold_start:
|
||||
_cold_start_windows_gateway_after_update()
|
||||
return
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -597,6 +597,120 @@ def test_resume_windows_gateways_after_update_respawns_unmapped_by_cmdline(
|
|||
assert "Restarting 1 unmapped Windows gateway process(es)" in out
|
||||
|
||||
|
||||
@patch.object(cli_main, "_is_windows", return_value=True)
|
||||
def test_pause_returns_cold_start_token_when_installed_but_none_running(
|
||||
_winp,
|
||||
monkeypatch,
|
||||
):
|
||||
"""No gateway running + autostart entry installed → cold-start token.
|
||||
|
||||
A gateway that died between updates (spawning terminal/TUI closed) leaves
|
||||
nothing for the resume path to relaunch, but the installed autostart entry
|
||||
is an explicit "I want a gateway" signal. The pause step must return a
|
||||
token that tells resume to cold-start one.
|
||||
"""
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
from hermes_cli import gateway_windows
|
||||
|
||||
monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda **_k: [])
|
||||
monkeypatch.setattr(gateway_windows, "is_installed", lambda: True)
|
||||
|
||||
token = cli_main._pause_windows_gateways_for_update()
|
||||
|
||||
assert token == {
|
||||
"resume_needed": True,
|
||||
"profiles": {},
|
||||
"unmapped_pids": [],
|
||||
"unmapped": [],
|
||||
"cold_start_if_installed": True,
|
||||
}
|
||||
|
||||
|
||||
@patch.object(cli_main, "_is_windows", return_value=True)
|
||||
def test_pause_returns_none_when_nothing_running_and_not_installed(
|
||||
_winp,
|
||||
monkeypatch,
|
||||
):
|
||||
"""No gateway running + no autostart entry → no token (gateway-less user).
|
||||
|
||||
Users who deliberately run without a gateway must not get one forced on
|
||||
them by an update.
|
||||
"""
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
from hermes_cli import gateway_windows
|
||||
|
||||
monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda **_k: [])
|
||||
monkeypatch.setattr(gateway_windows, "is_installed", lambda: False)
|
||||
|
||||
assert cli_main._pause_windows_gateways_for_update() is None
|
||||
|
||||
|
||||
@patch.object(cli_main, "_is_windows", return_value=True)
|
||||
def test_resume_cold_starts_gateway_when_token_requests_it(
|
||||
_winp,
|
||||
monkeypatch,
|
||||
capsys,
|
||||
):
|
||||
"""cold_start_if_installed token + nothing running → fresh detached spawn."""
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
from hermes_cli import gateway_windows
|
||||
|
||||
monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda **_k: [])
|
||||
spawned = []
|
||||
monkeypatch.setattr(
|
||||
gateway_windows,
|
||||
"_spawn_detached",
|
||||
lambda: spawned.append(True) or 4242,
|
||||
)
|
||||
|
||||
token = {
|
||||
"resume_needed": True,
|
||||
"profiles": {},
|
||||
"unmapped_pids": [],
|
||||
"unmapped": [],
|
||||
"cold_start_if_installed": True,
|
||||
}
|
||||
|
||||
cli_main._resume_windows_gateways_after_update(token)
|
||||
|
||||
assert token["resume_needed"] is False
|
||||
assert spawned == [True]
|
||||
assert "Starting Windows gateway after update (PID 4242)" in capsys.readouterr().out
|
||||
|
||||
|
||||
@patch.object(cli_main, "_is_windows", return_value=True)
|
||||
def test_resume_cold_start_skips_when_gateway_already_running(
|
||||
_winp,
|
||||
monkeypatch,
|
||||
capsys,
|
||||
):
|
||||
"""Don't double-start: if a gateway came up between pause and resume
|
||||
(e.g. the autostart entry fired), the cold-start must no-op."""
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
from hermes_cli import gateway_windows
|
||||
|
||||
monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda **_k: [9001])
|
||||
spawned = []
|
||||
monkeypatch.setattr(
|
||||
gateway_windows,
|
||||
"_spawn_detached",
|
||||
lambda: spawned.append(True) or 4242,
|
||||
)
|
||||
|
||||
token = {
|
||||
"resume_needed": True,
|
||||
"profiles": {},
|
||||
"unmapped_pids": [],
|
||||
"unmapped": [],
|
||||
"cold_start_if_installed": True,
|
||||
}
|
||||
|
||||
cli_main._resume_windows_gateways_after_update(token)
|
||||
|
||||
assert spawned == []
|
||||
assert "Starting Windows gateway after update" not in capsys.readouterr().out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_update integration — concurrent-instance gate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue