fix(windows): harden gateway scheduled task (#45610)

* fix(windows): harden gateway scheduled task

* fix(windows): launch gateway scheduled task via console-less wscript

The Scheduled Task ran the gateway through cmd.exe, which allocates a
console. During logon Windows broadcasts CTRL_CLOSE_EVENT to console
process groups, reaping cmd.exe and the half-initialized gateway with
STATUS_CONTROL_C_EXIT (0xC000013A) - which Task Scheduler treats as a
user cancel, so RestartOnFailure never fires and the gateway vanishes on
every reboot (issue #45599 root cause #1).

Add a console-less .vbs launcher (wscript.exe -> pythonw.exe, both
GUI-subsystem) mirroring the gateway.cmd env + argv, and point the task
action at it. The .cmd stays for the Startup-folder fallback and /Run.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Jeff <jeffrobodie@gmail.com>
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
lEWFkRAD 2026-06-23 19:07:52 -04:00 committed by GitHub
parent 0ba1dfed78
commit 433db17c0a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 299 additions and 28 deletions

View file

@ -38,6 +38,7 @@ import subprocess
import sys
import time
from pathlib import Path
from xml.sax.saxutils import escape
# Short timeouts: schtasks occasionally wedges and we don't want to hang forever.
_SCHTASKS_TIMEOUT_S = 15
@ -51,6 +52,9 @@ _ACCESS_DENIED_PATTERN = re.compile(r"(access is denied|acceso denegado)", re.IG
_TASK_NAME_DEFAULT = "Hermes_Gateway"
_TASK_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"
_TASK_LOGON_DELAY = "PT30S"
_TASK_RESTART_INTERVAL = "PT1M"
_TASK_RESTART_COUNT = 999
def _schtasks_encoding() -> str:
@ -358,12 +362,13 @@ def _build_gateway_cmd_script(
lines.append(f'set "HERMES_HOME={hermes_home}"')
lines.append('set "PYTHONIOENCODING=utf-8"')
lines.append('set "HERMES_GATEWAY_DETACHED=1"')
pythonw_path, venv_dir, extra_pythonpath = _resolve_detached_python(python_path)
# VIRTUAL_ENV lets the gateway's own python detection find the venv
# if someone imports hermes_constants-based logic during startup.
venv_dir = str(Path(python_path).resolve().parent.parent)
lines.append(f'set "VIRTUAL_ENV={venv_dir}"')
pythonpath_entries = [str(Path(__file__).resolve().parent.parent), *extra_pythonpath]
lines.append(f'set "PYTHONPATH={";".join([*pythonpath_entries, "%PYTHONPATH%"])}"')
pythonw_path = _derive_venv_pythonw(python_path)
prog_args = [pythonw_path, "-m", "hermes_cli.main"]
if profile_arg:
prog_args.extend(profile_arg.split())
@ -379,6 +384,78 @@ def _build_gateway_cmd_script(
return "\r\n".join(lines) + "\r\n"
def _quote_vbs_string(value: str) -> str:
"""Quote a value as a VBScript double-quoted string literal.
VBScript escapes an embedded double-quote by doubling it. A newline cannot
appear inside a literal, so refuse it (same guard as ``_quote_cmd_script_arg``).
"""
if "\r" in value or "\n" in value:
raise ValueError(f"refusing to quote VBScript value containing newline: {value!r}")
return '"' + value.replace('"', '""') + '"'
def _build_gateway_vbs_script(
python_path: str,
working_dir: str,
hermes_home: str,
profile_arg: str,
) -> str:
"""Build a console-less ``gateway.vbs`` launcher (CRLF-terminated).
The Scheduled Task runs this through ``wscript.exe`` instead of ``cmd.exe``.
Why: issue #45599 root cause #1. Driving the gateway through ``cmd.exe``
allocates a console, and during logon Windows broadcasts ``CTRL_CLOSE_EVENT``
to console process groups reaping cmd.exe and the half-initialized gateway
with ``STATUS_CONTROL_C_EXIT`` (``0xC000013A``). Task Scheduler treats that
code as a user cancel, so the ``RestartOnFailure`` policy never fires and the
gateway silently disappears on every reboot.
``wscript.exe`` and ``pythonw.exe`` are both GUI-subsystem executables with
no console, so this launcher receives no console control events. It mirrors
``_build_gateway_cmd_script`` (same env + argv via ``_resolve_detached_python``)
but sets the environment on the WScript.Shell process and ``Run``s pythonw
directly no cmd.exe anywhere in the chain.
"""
pythonw_path, venv_dir, extra_pythonpath = _resolve_detached_python(python_path)
prog_args = [pythonw_path, "-m", "hermes_cli.main"]
if profile_arg:
prog_args.extend(profile_arg.split())
prog_args.extend(["gateway", "run"])
# list2cmdline gives CreateProcess-correct quoting for WScript.Shell.Run.
command_line = subprocess.list2cmdline(prog_args)
repo_root = str(Path(__file__).resolve().parent.parent)
static_pythonpath = os.pathsep.join([repo_root, *extra_pythonpath])
lines = [
f"' {_TASK_DESCRIPTION}",
"Option Explicit",
"Dim sh, env, existing_pp",
'Set sh = CreateObject("WScript.Shell")',
'Set env = sh.Environment("PROCESS")',
f"env.Item({_quote_vbs_string('HERMES_HOME')}) = {_quote_vbs_string(hermes_home)}",
f"env.Item({_quote_vbs_string('PYTHONIOENCODING')}) = {_quote_vbs_string('utf-8')}",
f"env.Item({_quote_vbs_string('HERMES_GATEWAY_DETACHED')}) = {_quote_vbs_string('1')}",
f"env.Item({_quote_vbs_string('VIRTUAL_ENV')}) = {_quote_vbs_string(str(venv_dir))}",
# Mirror the cmd wrapper's ``PYTHONPATH=<static>;%PYTHONPATH%``: chain onto
# whatever PYTHONPATH the task environment already carries, at runtime.
f"existing_pp = env.Item({_quote_vbs_string('PYTHONPATH')})",
"If Len(existing_pp) > 0 Then",
f" env.Item({_quote_vbs_string('PYTHONPATH')}) = {_quote_vbs_string(static_pythonpath + os.pathsep)} & existing_pp",
"Else",
f" env.Item({_quote_vbs_string('PYTHONPATH')}) = {_quote_vbs_string(static_pythonpath)}",
"End If",
f"sh.CurrentDirectory = {_quote_vbs_string(working_dir)}",
# Window style 0 = hidden; bWaitOnReturn False = detached/async. pythonw is
# GUI-subsystem so no console is ever created for the gateway either.
f"sh.Run {_quote_vbs_string(command_line)}, 0, False",
]
return "\r\n".join(lines) + "\r\n"
def _build_startup_launcher(script_path: Path) -> str:
"""The tiny .cmd that goes in the Startup folder. Just minimizes and chains.
@ -425,6 +502,15 @@ def _write_task_script() -> Path:
tmp = script_path.with_suffix(".tmp")
tmp.write_text(content, encoding="utf-8", newline="")
tmp.replace(script_path)
# Also render the console-less .vbs launcher the Scheduled Task runs via
# wscript.exe (issue #45599 fix A). The .cmd above stays for the
# Startup-folder fallback and direct /Run paths.
vbs_content = _build_gateway_vbs_script(python_path, working_dir, hermes_home, profile_arg)
vbs_path = script_path.with_suffix(".vbs")
vbs_tmp = vbs_path.with_name(vbs_path.name + ".tmp")
vbs_tmp.write_text(vbs_content, encoding="utf-8", newline="")
vbs_tmp.replace(vbs_path)
return script_path
@ -443,6 +529,74 @@ def _resolve_task_user() -> str | None:
return f"{domain}\\{username}" if domain else username
def _build_scheduled_task_xml(task_name: str, launcher_path: Path, user: str | None) -> str:
"""Render a Task Scheduler XML definition with safe long-running defaults.
``launcher_path`` is the console-less ``.vbs`` the task runs via
``wscript.exe`` not the ``.cmd`` (see ``_build_gateway_vbs_script`` /
issue #45599 root cause #1).
"""
user_principal = f"\n <UserId>{escape(user)}</UserId>" if user else ""
return f"""<?xml version="1.0" encoding="UTF-16"?>
<Task version="1.4" xmlns="http://schemas.microsoft.com/windows/2004/02/mit/task">
<RegistrationInfo>
<Description>{escape(_TASK_DESCRIPTION)}</Description>
</RegistrationInfo>
<Triggers>
<LogonTrigger>
<Enabled>true</Enabled>
<Delay>{_TASK_LOGON_DELAY}</Delay>
</LogonTrigger>
</Triggers>
<Principals>
<Principal id="Author">{user_principal}
<LogonType>InteractiveToken</LogonType>
<RunLevel>LeastPrivilege</RunLevel>
</Principal>
</Principals>
<Settings>
<MultipleInstancesPolicy>IgnoreNew</MultipleInstancesPolicy>
<DisallowStartIfOnBatteries>false</DisallowStartIfOnBatteries>
<StopIfGoingOnBatteries>false</StopIfGoingOnBatteries>
<AllowHardTerminate>true</AllowHardTerminate>
<StartWhenAvailable>true</StartWhenAvailable>
<RunOnlyIfNetworkAvailable>false</RunOnlyIfNetworkAvailable>
<IdleSettings>
<StopOnIdleEnd>false</StopOnIdleEnd>
<RestartOnIdle>false</RestartOnIdle>
</IdleSettings>
<AllowStartOnDemand>true</AllowStartOnDemand>
<Enabled>true</Enabled>
<Hidden>false</Hidden>
<RunOnlyIfIdle>false</RunOnlyIfIdle>
<WakeToRun>false</WakeToRun>
<ExecutionTimeLimit>PT0S</ExecutionTimeLimit>
<Priority>7</Priority>
<RestartOnFailure>
<Interval>{_TASK_RESTART_INTERVAL}</Interval>
<Count>{_TASK_RESTART_COUNT}</Count>
</RestartOnFailure>
</Settings>
<Actions Context="Author">
<Exec>
<Command>wscript.exe</Command>
<Arguments>//B //Nologo "{escape(str(launcher_path))}"</Arguments>
</Exec>
</Actions>
</Task>
"""
def _write_scheduled_task_xml(task_name: str, launcher_path: Path, user: str | None) -> Path:
xml_path = launcher_path.with_suffix(".task.xml")
xml_path.write_text(
_build_scheduled_task_xml(task_name, launcher_path, user),
encoding="utf-16",
newline="",
)
return xml_path
def _install_scheduled_task(task_name: str, script_path: Path) -> tuple[bool, str]:
"""Create or replace the Scheduled Task. Returns (success, detail).
@ -451,8 +605,6 @@ def _install_scheduled_task(task_name: str, script_path: Path) -> tuple[bool, st
preserves those stale triggers and can make the gateway relaunch every
minute. Delete+create gives us a clean ONLOGON task every install.
"""
quoted_script = _quote_schtasks_arg(str(script_path))
delete_code, delete_out, delete_err = _exec_schtasks(["/Delete", "/F", "/TN", task_name])
delete_detail = (delete_err or delete_out or "").strip()
if delete_code != 0 and delete_detail and "cannot find" not in delete_detail.lower():
@ -460,32 +612,28 @@ def _install_scheduled_task(task_name: str, script_path: Path) -> tuple[bool, st
return (False, f"schtasks /Delete failed (code {delete_code}): {delete_detail}")
# Non-fatal: /Create /F below may still replace it. Keep the detail in
# the final error if creation also fails.
# password" variant; if that fails, retry without /RU /NP /IT.
base = [
"/Create",
"/F",
"/SC",
"ONLOGON",
"/RL",
"LIMITED",
"/TN",
task_name,
"/TR",
quoted_script,
]
user = _resolve_task_user()
variants = []
if user:
variants.append([*base, "/RU", user, "/NP", "/IT"])
# The Scheduled Task launches the console-less .vbs (issue #45599 fix A), not
# the .cmd. The .cmd stays for the Startup-folder fallback and direct /Run.
launcher_path = script_path.with_suffix(".vbs")
xml_path = _write_scheduled_task_xml(task_name, launcher_path, user)
base = ["/Create", "/F", "/TN", task_name, "/XML", str(xml_path)]
variants = [[*base, "/RU", user, "/NP", "/IT"]] if user else []
variants.append(base)
last_code = 1
last_err = ""
for argv in variants:
code, out, err = _exec_schtasks(argv)
if code == 0:
return (True, f"Created Scheduled Task {task_name!r}")
last_code, last_err = code, (err or out or "")
try:
for argv in variants:
code, out, err = _exec_schtasks(argv)
if code == 0:
return (True, f"Created Scheduled Task {task_name!r}")
last_code, last_err = code, (err or out or "")
finally:
try:
xml_path.unlink(missing_ok=True)
except OSError:
pass
if delete_detail and "cannot find" not in delete_detail.lower():
last_err = f"{last_err.strip()} (delete detail: {delete_detail})"
return (False, f"schtasks /Create failed (code {last_code}): {last_err.strip()}")

View file

@ -190,7 +190,11 @@ def _arrange_startup_fallback(monkeypatch, tmp_path, running_pids):
def test_gateway_cmd_script_uses_pythonw_without_replace_or_start_churn(monkeypatch):
"""Scheduled Task wrapper should launch pythonw once and avoid replace loops."""
monkeypatch.setattr(gateway_windows, "_derive_venv_pythonw", lambda exe: exe.replace("python.exe", "pythonw.exe"))
monkeypatch.setattr(
gateway_windows,
"_resolve_detached_python",
lambda exe: (exe.replace("python.exe", "pythonw.exe"), r"C:\\Hermes\\hermes-agent\\venv", []),
)
content = gateway_windows._build_gateway_cmd_script(
r"C:\\Hermes\\hermes-agent\\venv\\Scripts\\python.exe",
@ -206,6 +210,41 @@ def test_gateway_cmd_script_uses_pythonw_without_replace_or_start_churn(monkeypa
assert "exit /b 0" in content
def test_gateway_cmd_script_uses_uv_safe_base_pythonw(monkeypatch, tmp_path):
"""Scheduled Task wrapper should share the detached uv-venv workaround."""
project = tmp_path / "project"
scripts = project / "venv" / "Scripts"
site_packages = project / "venv" / "Lib" / "site-packages"
hermes_home = tmp_path / "hermes-home"
base = tmp_path / "uv" / "python" / "cpython-3.11-windows-x86_64-none"
scripts.mkdir(parents=True)
site_packages.mkdir(parents=True)
hermes_home.mkdir()
base.mkdir(parents=True)
venv_python = scripts / "python.exe"
venv_pythonw = scripts / "pythonw.exe"
base_pythonw = base / "pythonw.exe"
for exe in (venv_python, venv_pythonw, base_pythonw):
exe.write_text("", encoding="utf-8")
(project / "venv" / "pyvenv.cfg").write_text(
f"home = {base}\nimplementation = CPython\nuv = 0.11.14\nversion_info = 3.11.15\n",
encoding="utf-8",
)
content = gateway_windows._build_gateway_cmd_script(
str(venv_python),
str(hermes_home),
str(hermes_home),
"",
)
assert str(base_pythonw) in content
assert f'set "VIRTUAL_ENV={project / "venv"}"' in content
assert str(site_packages) in content
assert str(venv_pythonw) not in content
def test_elevated_gateway_command_uses_pythonw_hidden_console(monkeypatch):
"""UAC handoff should not leave a second elevated cmd.exe window open."""
calls = []
@ -239,14 +278,18 @@ def test_install_scheduled_task_recreates_instead_of_change(monkeypatch, tmp_pat
"""Install must delete+create so stale minute-repeat task settings are not preserved."""
calls = []
script_path = tmp_path / "Hermes_Gateway_alice.cmd"
xml_seen = {}
monkeypatch.setattr(gateway_windows, "_assert_windows", lambda: None)
monkeypatch.setattr(gateway_windows, "_resolve_task_user", lambda: r"DOMAIN\\alice")
def fake_schtasks(args):
calls.append(tuple(args))
if args[0] == "/Delete":
return (0, "SUCCESS", "")
if args[0] == "/Create":
xml_path = Path(args[args.index("/XML") + 1])
xml_seen["text"] = xml_path.read_text(encoding="utf-16")
return (0, "SUCCESS", "")
raise AssertionError(f"unexpected schtasks args: {args}")
@ -257,8 +300,88 @@ def test_install_scheduled_task_recreates_instead_of_change(monkeypatch, tmp_pat
assert "/Change" not in [arg for call in calls for arg in call]
assert calls[0][:4] == ("/Delete", "/F", "/TN", "Hermes_Gateway_alice")
assert calls[1][0] == "/Create"
assert "/SC" in calls[1]
assert "ONLOGON" in calls[1]
assert "/XML" in calls[1]
assert "/SC" not in calls[1]
assert "<Delay>PT30S</Delay>" in xml_seen["text"]
assert "<StartWhenAvailable>true</StartWhenAvailable>" in xml_seen["text"]
assert "<StopOnIdleEnd>false</StopOnIdleEnd>" in xml_seen["text"]
assert "<DisallowStartIfOnBatteries>false</DisallowStartIfOnBatteries>" in xml_seen["text"]
assert "<StopIfGoingOnBatteries>false</StopIfGoingOnBatteries>" in xml_seen["text"]
assert "<ExecutionTimeLimit>PT0S</ExecutionTimeLimit>" in xml_seen["text"]
assert "<RestartOnFailure>" in xml_seen["text"]
assert "<Count>999</Count>" in xml_seen["text"]
# Scheduled Task launches the console-less .vbs via wscript.exe, never cmd.exe
# (issue #45599 fix A: no console -> no logon CTRL_CLOSE_EVENT / 0xC000013A).
assert "<Command>wscript.exe</Command>" in xml_seen["text"]
assert "//B //Nologo" in xml_seen["text"]
assert "Hermes_Gateway_alice.vbs" in xml_seen["text"]
assert "cmd.exe" not in xml_seen["text"]
def test_gateway_vbs_script_is_console_less(monkeypatch):
"""The .vbs launcher must avoid cmd.exe entirely and Run pythonw hidden
(issue #45599 fix A: no console -> no logon CTRL_CLOSE_EVENT / 0xC000013A)."""
monkeypatch.setattr(
gateway_windows,
"_resolve_detached_python",
lambda exe: (r"C:\venv\Scripts\pythonw.exe", Path(r"C:\venv"), []),
)
content = gateway_windows._build_gateway_vbs_script(
r"C:\venv\Scripts\python.exe",
r"C:\Hermes",
r"C:\Hermes",
"--profile work",
)
assert "cmd.exe" not in content.lower()
assert 'CreateObject("WScript.Shell")' in content
assert "pythonw.exe" in content
assert "hermes_cli.main" in content
assert "gateway run" in content
assert ", 0, False" in content # hidden window, detached/async
for var in ("HERMES_HOME", "PYTHONIOENCODING", "HERMES_GATEWAY_DETACHED", "VIRTUAL_ENV", "PYTHONPATH"):
assert var in content
assert "--profile" in content and "work" in content
assert content.endswith("\r\n")
def test_gateway_vbs_script_quotes_spaced_paths(monkeypatch):
"""Spaced exe/dir paths stay correctly quoted through the VBScript literal."""
monkeypatch.setattr(
gateway_windows,
"_resolve_detached_python",
lambda exe: (r"C:\Program Files\Py\pythonw.exe", Path(r"C:\v env"), []),
)
content = gateway_windows._build_gateway_vbs_script(
r"C:\Program Files\Py\python.exe",
r"C:\work dir",
r"C:\h home",
"",
)
# list2cmdline quotes the spaced exe; _quote_vbs_string doubles those quotes.
assert '""C:\\Program Files\\Py\\pythonw.exe""' in content
assert 'sh.CurrentDirectory = "C:\\work dir"' in content
def test_gateway_vbs_script_pythonpath_chains_runtime_value(monkeypatch):
"""PYTHONPATH chains onto the task env's existing value, like ;%PYTHONPATH%."""
monkeypatch.setattr(
gateway_windows,
"_resolve_detached_python",
lambda exe: (r"C:\v\pythonw.exe", Path(r"C:\v"), [r"C:\v\Lib\site-packages"]),
)
content = gateway_windows._build_gateway_vbs_script(
r"C:\v\python.exe", r"C:\w", r"C:\h", "",
)
assert 'existing_pp = env.Item("PYTHONPATH")' in content
assert "If Len(existing_pp) > 0 Then" in content
assert r"C:\v\Lib\site-packages" in content
def test_quote_vbs_string_doubles_quotes_and_rejects_newlines():
assert gateway_windows._quote_vbs_string("plain") == '"plain"'
assert gateway_windows._quote_vbs_string('a"b') == '"a""b"'
with pytest.raises(ValueError):
gateway_windows._quote_vbs_string("line1\nline2")
def test_install_scheduled_task_success_start_now_uses_direct_spawn_not_task_run(monkeypatch, tmp_path, capsys):