fix(gateway): prevent Windows Telegram /restart leaving gateway stopped

This commit is contained in:
Martin 2026-05-14 22:22:29 +02:00 committed by Teknium
parent 1d378605dd
commit 417a653d9e
4 changed files with 198 additions and 14 deletions

View file

@ -5275,10 +5275,13 @@ def _gateway_command_inner(args):
launchd_start()
elif is_windows():
from hermes_cli import gateway_windows
if gateway_windows.is_installed():
gateway_windows.start()
else:
run_gateway(verbose=0)
# On Windows, even without a registered Scheduled Task / Startup
# entry, gateway_windows.start() uses the safe detached
# pythonw.exe launcher. Do not fall back to run_gateway() here:
# when invoked from a gateway-hosted agent/tool call, foreground
# run_gateway() is tied to the very gateway process we just
# stopped and can die before the replacement is stable.
gateway_windows.start()
else:
run_gateway(verbose=0)
return
@ -5299,13 +5302,19 @@ def _gateway_command_inner(args):
pass
elif is_windows():
from hermes_cli import gateway_windows
if gateway_windows.is_installed():
service_configured = True
try:
gateway_windows.restart()
service_available = True
except (subprocess.CalledProcessError, RuntimeError):
pass
# Prefer the Windows-specific restart path: it supports both
# registered Scheduled Task / Startup installs and no-service
# detached restarts. In the normal successful Telegram-triggered
# restart flow, this avoids the generic foreground run_gateway()
# path that can be reaped with the old gateway process. If the
# Windows backend raises, intentionally preserve the existing
# generic failure fallback below.
service_configured = gateway_windows.is_installed()
try:
gateway_windows.restart()
return
except (subprocess.CalledProcessError, RuntimeError, OSError):
pass
if not service_available:
# systemd/launchd restart failed — check if linger is the issue

View file

@ -42,7 +42,7 @@ _SCHTASKS_TIMEOUT_S = 15
_SCHTASKS_NO_OUTPUT_TIMEOUT_S = 30
# Patterns in schtasks stderr that mean "fall back to the Startup folder".
_FALLBACK_PATTERNS = re.compile(
r"(access is denied|acceso denegado|schtasks timed out|schtasks produced no output)",
r"(access is denied|acceso denegado|přístup byl odepřen|schtasks timed out|schtasks produced no output)",
re.IGNORECASE,
)
@ -344,6 +344,56 @@ def _derive_venv_pythonw(python_exe: str) -> str:
return python_exe
def _read_pyvenv_cfg(venv_dir: Path) -> dict[str, str]:
cfg_path = venv_dir / "pyvenv.cfg"
try:
lines = cfg_path.read_text(encoding="utf-8").splitlines()
except OSError:
return {}
parsed: dict[str, str] = {}
for raw in lines:
if "=" not in raw:
continue
key, value = raw.split("=", 1)
parsed[key.strip().lower()] = value.strip()
return parsed
def _resolve_detached_python(python_exe: str) -> tuple[str, Path, list[str]]:
"""Return (windowed_python, venv_dir, extra_pythonpath) for detached runs.
uv-created Windows venv launchers are special: ``venv\\Scripts\\pythonw.exe``
starts hidden, but then respawns the base interpreter as console
``python.exe``. That child opens a visible Windows Terminal tab. For uv
venvs, use the base ``pythonw.exe`` directly and put the repo + venv
site-packages on ``PYTHONPATH`` so imports still resolve without the venv
launcher.
"""
p = Path(python_exe)
venv_dir = p.parent.parent
windowed = _derive_venv_pythonw(python_exe)
cfg = _read_pyvenv_cfg(venv_dir)
home = cfg.get("home", "")
if "uv" in cfg and home:
base_pythonw = Path(home) / "pythonw.exe"
site_packages = venv_dir / "Lib" / "site-packages"
if base_pythonw.exists() and site_packages.exists():
return (str(base_pythonw), venv_dir, [str(site_packages)])
return (windowed, venv_dir, [])
def _prepend_pythonpath(env_overlay: dict[str, str], entries: list[str]) -> None:
clean_entries = [entry for entry in entries if entry]
if not clean_entries:
return
existing = os.environ.get("PYTHONPATH", "")
if existing:
clean_entries.append(existing)
env_overlay["PYTHONPATH"] = os.pathsep.join(clean_entries)
def _build_gateway_argv() -> tuple[list[str], str, dict[str, str]]:
"""Build (argv, working_dir, env_overlay) for the gateway subprocess.
@ -359,7 +409,7 @@ def _build_gateway_argv() -> tuple[list[str], str, dict[str, str]]:
get_python_path,
)
python_exe = _derive_venv_pythonw(get_python_path())
python_exe, venv_dir, extra_pythonpath = _resolve_detached_python(get_python_path())
working_dir = str(PROJECT_ROOT)
hermes_home = str(Path(get_hermes_home()).resolve())
profile_arg = _profile_arg(hermes_home)
@ -373,8 +423,9 @@ def _build_gateway_argv() -> tuple[list[str], str, dict[str, str]]:
"HERMES_HOME": hermes_home,
"PYTHONIOENCODING": "utf-8",
"HERMES_GATEWAY_DETACHED": "1",
"VIRTUAL_ENV": str(Path(python_exe).resolve().parent.parent),
"VIRTUAL_ENV": str(venv_dir),
}
_prepend_pythonpath(env_overlay, [working_dir, *extra_pythonpath] if extra_pythonpath else [])
return argv, working_dir, env_overlay

View file

@ -268,6 +268,67 @@ def test_gateway_start_in_container_with_operational_systemd_uses_systemd(monkey
assert calls == [False]
def test_gateway_restart_on_windows_without_service_uses_detached_backend(monkeypatch):
"""Windows manual restart must not fall back to foreground run_gateway().
A Telegram-hosted agent may run `hermes gateway restart` via the terminal
tool. The generic manual fallback stops the gateway and then calls
run_gateway() in the same foreground subprocess; on Windows that subprocess
can be reaped when its gateway parent is terminated, leaving the gateway
down. The Windows backend restarts via detached pythonw.exe even when no
Scheduled Task / Startup item is installed.
"""
import hermes_cli.gateway_windows as gateway_windows
calls = []
monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
monkeypatch.setattr(gateway, "is_macos", lambda: False)
monkeypatch.setattr(gateway, "is_windows", lambda: True)
monkeypatch.setattr(gateway_windows, "is_installed", lambda: False)
monkeypatch.setattr(gateway_windows, "restart", lambda: calls.append("restart"))
monkeypatch.setattr(
gateway,
"run_gateway",
lambda *args, **kwargs: pytest.fail("Windows restart must not use foreground run_gateway()"),
)
monkeypatch.setattr(
gateway,
"stop_profile_gateway",
lambda: pytest.fail("Windows restart must not use generic manual stop fallback"),
)
args = SimpleNamespace(gateway_command="restart", system=False, all=False)
gateway.gateway_command(args)
assert calls == ["restart"]
def test_gateway_restart_on_windows_preserves_failure_fallback(monkeypatch):
"""If the Windows backend cannot launch, keep the existing fallback."""
import hermes_cli.gateway_windows as gateway_windows
calls = []
def fail_restart():
calls.append("restart")
raise OSError("simulated detached backend failure")
monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
monkeypatch.setattr(gateway, "is_macos", lambda: False)
monkeypatch.setattr(gateway, "is_windows", lambda: True)
monkeypatch.setattr(gateway_windows, "is_installed", lambda: False)
monkeypatch.setattr(gateway_windows, "restart", fail_restart)
monkeypatch.setattr(gateway, "stop_profile_gateway", lambda: calls.append("stop") or False)
monkeypatch.setattr(gateway, "_wait_for_gateway_exit", lambda *args, **kwargs: calls.append("wait"))
monkeypatch.setattr(gateway, "run_gateway", lambda *args, **kwargs: calls.append("run"))
args = SimpleNamespace(gateway_command="restart", system=False, all=False)
gateway.gateway_command(args)
assert calls == ["restart", "stop", "wait", "run"]
def test_systemd_status_warns_when_linger_disabled(monkeypatch, tmp_path, capsys):
unit_path = tmp_path / "hermes-gateway.service"
unit_path.write_text("[Unit]\n")

View file

@ -0,0 +1,63 @@
"""Tests for the Windows gateway backend."""
import pytest
import hermes_cli.gateway_windows as gateway_windows
@pytest.mark.parametrize(
"detail",
[
"ERROR: Access is denied.",
"ERROR: Acceso denegado.",
"ERROR: Přístup byl odepřen.",
"schtasks timed out after 15s",
"schtasks produced no output",
],
)
def test_schtasks_fallback_patterns_cover_localized_access_denied(detail):
"""Localized schtasks access-denied errors should use Startup fallback."""
assert gateway_windows._should_fall_back(1, detail) is True
def test_schtasks_fallback_does_not_hide_unknown_errors():
assert gateway_windows._should_fall_back(1, "ERROR: The system cannot find the file specified.") is False
def test_build_gateway_argv_uses_base_pythonw_for_uv_venv_launcher(monkeypatch, tmp_path):
"""Avoid uv's venv pythonw launcher because it respawns console python.exe."""
project = tmp_path / "project"
scripts = project / "venv" / "Scripts"
site_packages = project / "venv" / "Lib" / "site-packages"
base = tmp_path / "uv" / "python" / "cpython-3.11-windows-x86_64-none"
scripts.mkdir(parents=True)
site_packages.mkdir(parents=True)
base.mkdir(parents=True)
venv_python = scripts / "python.exe"
venv_pythonw = scripts / "pythonw.exe"
base_pythonw = base / "pythonw.exe"
for exe in (venv_python, venv_pythonw, base_pythonw):
exe.write_text("", encoding="utf-8")
(project / "venv" / "pyvenv.cfg").write_text(
f"home = {base}\nimplementation = CPython\nuv = 0.11.14\nversion_info = 3.11.15\n",
encoding="utf-8",
)
import hermes_cli.gateway as gateway
monkeypatch.setattr(gateway_windows.sys, "platform", "win32")
monkeypatch.setattr(gateway, "PROJECT_ROOT", project)
monkeypatch.setattr(gateway, "get_python_path", lambda: str(venv_python))
monkeypatch.setattr(gateway, "_profile_arg", lambda hermes_home: "")
monkeypatch.setattr("hermes_cli.config.get_hermes_home", lambda: str(tmp_path / "hermes-home"))
argv, cwd, env_overlay = gateway_windows._build_gateway_argv()
assert argv[:3] == [str(base_pythonw), "-m", "hermes_cli.main"]
assert cwd == str(project)
assert env_overlay["VIRTUAL_ENV"] == str(project / "venv")
assert str(project) in env_overlay["PYTHONPATH"].split(gateway_windows.os.pathsep)
assert str(site_packages) in env_overlay["PYTHONPATH"].split(gateway_windows.os.pathsep)