From 433db17c0a8d5581b4fb38289539fc1ee5cc7696 Mon Sep 17 00:00:00 2001 From: lEWFkRAD Date: Tue, 23 Jun 2026 19:07:52 -0400 Subject: [PATCH] fix(windows): harden gateway scheduled task (#45610) * fix(windows): harden gateway scheduled task * fix(windows): launch gateway scheduled task via console-less wscript The Scheduled Task ran the gateway through cmd.exe, which allocates a console. During logon Windows broadcasts CTRL_CLOSE_EVENT to console process groups, reaping cmd.exe and the half-initialized gateway with STATUS_CONTROL_C_EXIT (0xC000013A) - which Task Scheduler treats as a user cancel, so RestartOnFailure never fires and the gateway vanishes on every reboot (issue #45599 root cause #1). Add a console-less .vbs launcher (wscript.exe -> pythonw.exe, both GUI-subsystem) mirroring the gateway.cmd env + argv, and point the task action at it. The .cmd stays for the Startup-folder fallback and /Run. Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Jeff Co-authored-by: Claude Opus 4.8 (1M context) --- hermes_cli/gateway_windows.py | 198 ++++++++++++++++++++--- tests/hermes_cli/test_gateway_windows.py | 129 ++++++++++++++- 2 files changed, 299 insertions(+), 28 deletions(-) diff --git a/hermes_cli/gateway_windows.py b/hermes_cli/gateway_windows.py index 466031bfaa7..994ab6e1c50 100644 --- a/hermes_cli/gateway_windows.py +++ b/hermes_cli/gateway_windows.py @@ -38,6 +38,7 @@ import subprocess import sys import time from pathlib import Path +from xml.sax.saxutils import escape # Short timeouts: schtasks occasionally wedges and we don't want to hang forever. _SCHTASKS_TIMEOUT_S = 15 @@ -51,6 +52,9 @@ _ACCESS_DENIED_PATTERN = re.compile(r"(access is denied|acceso denegado)", re.IG _TASK_NAME_DEFAULT = "Hermes_Gateway" _TASK_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration" +_TASK_LOGON_DELAY = "PT30S" +_TASK_RESTART_INTERVAL = "PT1M" +_TASK_RESTART_COUNT = 999 def _schtasks_encoding() -> str: @@ -358,12 +362,13 @@ def _build_gateway_cmd_script( lines.append(f'set "HERMES_HOME={hermes_home}"') lines.append('set "PYTHONIOENCODING=utf-8"') lines.append('set "HERMES_GATEWAY_DETACHED=1"') + pythonw_path, venv_dir, extra_pythonpath = _resolve_detached_python(python_path) # VIRTUAL_ENV lets the gateway's own python detection find the venv # if someone imports hermes_constants-based logic during startup. - venv_dir = str(Path(python_path).resolve().parent.parent) lines.append(f'set "VIRTUAL_ENV={venv_dir}"') + pythonpath_entries = [str(Path(__file__).resolve().parent.parent), *extra_pythonpath] + lines.append(f'set "PYTHONPATH={";".join([*pythonpath_entries, "%PYTHONPATH%"])}"') - pythonw_path = _derive_venv_pythonw(python_path) prog_args = [pythonw_path, "-m", "hermes_cli.main"] if profile_arg: prog_args.extend(profile_arg.split()) @@ -379,6 +384,78 @@ def _build_gateway_cmd_script( return "\r\n".join(lines) + "\r\n" +def _quote_vbs_string(value: str) -> str: + """Quote a value as a VBScript double-quoted string literal. + + VBScript escapes an embedded double-quote by doubling it. A newline cannot + appear inside a literal, so refuse it (same guard as ``_quote_cmd_script_arg``). + """ + if "\r" in value or "\n" in value: + raise ValueError(f"refusing to quote VBScript value containing newline: {value!r}") + return '"' + value.replace('"', '""') + '"' + + +def _build_gateway_vbs_script( + python_path: str, + working_dir: str, + hermes_home: str, + profile_arg: str, +) -> str: + """Build a console-less ``gateway.vbs`` launcher (CRLF-terminated). + + The Scheduled Task runs this through ``wscript.exe`` instead of ``cmd.exe``. + + Why: issue #45599 root cause #1. Driving the gateway through ``cmd.exe`` + allocates a console, and during logon Windows broadcasts ``CTRL_CLOSE_EVENT`` + to console process groups — reaping cmd.exe and the half-initialized gateway + with ``STATUS_CONTROL_C_EXIT`` (``0xC000013A``). Task Scheduler treats that + code as a user cancel, so the ``RestartOnFailure`` policy never fires and the + gateway silently disappears on every reboot. + + ``wscript.exe`` and ``pythonw.exe`` are both GUI-subsystem executables with + no console, so this launcher receives no console control events. It mirrors + ``_build_gateway_cmd_script`` (same env + argv via ``_resolve_detached_python``) + but sets the environment on the WScript.Shell process and ``Run``s pythonw + directly — no cmd.exe anywhere in the chain. + """ + pythonw_path, venv_dir, extra_pythonpath = _resolve_detached_python(python_path) + + prog_args = [pythonw_path, "-m", "hermes_cli.main"] + if profile_arg: + prog_args.extend(profile_arg.split()) + prog_args.extend(["gateway", "run"]) + # list2cmdline gives CreateProcess-correct quoting for WScript.Shell.Run. + command_line = subprocess.list2cmdline(prog_args) + + repo_root = str(Path(__file__).resolve().parent.parent) + static_pythonpath = os.pathsep.join([repo_root, *extra_pythonpath]) + + lines = [ + f"' {_TASK_DESCRIPTION}", + "Option Explicit", + "Dim sh, env, existing_pp", + 'Set sh = CreateObject("WScript.Shell")', + 'Set env = sh.Environment("PROCESS")', + f"env.Item({_quote_vbs_string('HERMES_HOME')}) = {_quote_vbs_string(hermes_home)}", + f"env.Item({_quote_vbs_string('PYTHONIOENCODING')}) = {_quote_vbs_string('utf-8')}", + f"env.Item({_quote_vbs_string('HERMES_GATEWAY_DETACHED')}) = {_quote_vbs_string('1')}", + f"env.Item({_quote_vbs_string('VIRTUAL_ENV')}) = {_quote_vbs_string(str(venv_dir))}", + # Mirror the cmd wrapper's ``PYTHONPATH=;%PYTHONPATH%``: chain onto + # whatever PYTHONPATH the task environment already carries, at runtime. + f"existing_pp = env.Item({_quote_vbs_string('PYTHONPATH')})", + "If Len(existing_pp) > 0 Then", + f" env.Item({_quote_vbs_string('PYTHONPATH')}) = {_quote_vbs_string(static_pythonpath + os.pathsep)} & existing_pp", + "Else", + f" env.Item({_quote_vbs_string('PYTHONPATH')}) = {_quote_vbs_string(static_pythonpath)}", + "End If", + f"sh.CurrentDirectory = {_quote_vbs_string(working_dir)}", + # Window style 0 = hidden; bWaitOnReturn False = detached/async. pythonw is + # GUI-subsystem so no console is ever created for the gateway either. + f"sh.Run {_quote_vbs_string(command_line)}, 0, False", + ] + return "\r\n".join(lines) + "\r\n" + + def _build_startup_launcher(script_path: Path) -> str: """The tiny .cmd that goes in the Startup folder. Just minimizes and chains. @@ -425,6 +502,15 @@ def _write_task_script() -> Path: tmp = script_path.with_suffix(".tmp") tmp.write_text(content, encoding="utf-8", newline="") tmp.replace(script_path) + + # Also render the console-less .vbs launcher the Scheduled Task runs via + # wscript.exe (issue #45599 fix A). The .cmd above stays for the + # Startup-folder fallback and direct /Run paths. + vbs_content = _build_gateway_vbs_script(python_path, working_dir, hermes_home, profile_arg) + vbs_path = script_path.with_suffix(".vbs") + vbs_tmp = vbs_path.with_name(vbs_path.name + ".tmp") + vbs_tmp.write_text(vbs_content, encoding="utf-8", newline="") + vbs_tmp.replace(vbs_path) return script_path @@ -443,6 +529,74 @@ def _resolve_task_user() -> str | None: return f"{domain}\\{username}" if domain else username +def _build_scheduled_task_xml(task_name: str, launcher_path: Path, user: str | None) -> str: + """Render a Task Scheduler XML definition with safe long-running defaults. + + ``launcher_path`` is the console-less ``.vbs`` the task runs via + ``wscript.exe`` — not the ``.cmd`` (see ``_build_gateway_vbs_script`` / + issue #45599 root cause #1). + """ + user_principal = f"\n {escape(user)}" if user else "" + return f""" + + + {escape(_TASK_DESCRIPTION)} + + + + true + {_TASK_LOGON_DELAY} + + + + {user_principal} + InteractiveToken + LeastPrivilege + + + + IgnoreNew + false + false + true + true + false + + false + false + + true + true + false + false + false + PT0S + 7 + + {_TASK_RESTART_INTERVAL} + {_TASK_RESTART_COUNT} + + + + + wscript.exe + //B //Nologo "{escape(str(launcher_path))}" + + + +""" + + +def _write_scheduled_task_xml(task_name: str, launcher_path: Path, user: str | None) -> Path: + xml_path = launcher_path.with_suffix(".task.xml") + xml_path.write_text( + _build_scheduled_task_xml(task_name, launcher_path, user), + encoding="utf-16", + newline="", + ) + return xml_path + + def _install_scheduled_task(task_name: str, script_path: Path) -> tuple[bool, str]: """Create or replace the Scheduled Task. Returns (success, detail). @@ -451,8 +605,6 @@ def _install_scheduled_task(task_name: str, script_path: Path) -> tuple[bool, st preserves those stale triggers and can make the gateway relaunch every minute. Delete+create gives us a clean ONLOGON task every install. """ - quoted_script = _quote_schtasks_arg(str(script_path)) - delete_code, delete_out, delete_err = _exec_schtasks(["/Delete", "/F", "/TN", task_name]) delete_detail = (delete_err or delete_out or "").strip() if delete_code != 0 and delete_detail and "cannot find" not in delete_detail.lower(): @@ -460,32 +612,28 @@ def _install_scheduled_task(task_name: str, script_path: Path) -> tuple[bool, st return (False, f"schtasks /Delete failed (code {delete_code}): {delete_detail}") # Non-fatal: /Create /F below may still replace it. Keep the detail in # the final error if creation also fails. - # password" variant; if that fails, retry without /RU /NP /IT. - base = [ - "/Create", - "/F", - "/SC", - "ONLOGON", - "/RL", - "LIMITED", - "/TN", - task_name, - "/TR", - quoted_script, - ] user = _resolve_task_user() - variants = [] - if user: - variants.append([*base, "/RU", user, "/NP", "/IT"]) + # The Scheduled Task launches the console-less .vbs (issue #45599 fix A), not + # the .cmd. The .cmd stays for the Startup-folder fallback and direct /Run. + launcher_path = script_path.with_suffix(".vbs") + xml_path = _write_scheduled_task_xml(task_name, launcher_path, user) + base = ["/Create", "/F", "/TN", task_name, "/XML", str(xml_path)] + variants = [[*base, "/RU", user, "/NP", "/IT"]] if user else [] variants.append(base) last_code = 1 last_err = "" - for argv in variants: - code, out, err = _exec_schtasks(argv) - if code == 0: - return (True, f"Created Scheduled Task {task_name!r}") - last_code, last_err = code, (err or out or "") + try: + for argv in variants: + code, out, err = _exec_schtasks(argv) + if code == 0: + return (True, f"Created Scheduled Task {task_name!r}") + last_code, last_err = code, (err or out or "") + finally: + try: + xml_path.unlink(missing_ok=True) + except OSError: + pass if delete_detail and "cannot find" not in delete_detail.lower(): last_err = f"{last_err.strip()} (delete detail: {delete_detail})" return (False, f"schtasks /Create failed (code {last_code}): {last_err.strip()}") diff --git a/tests/hermes_cli/test_gateway_windows.py b/tests/hermes_cli/test_gateway_windows.py index 43f2b01dbf9..c327039fcfd 100644 --- a/tests/hermes_cli/test_gateway_windows.py +++ b/tests/hermes_cli/test_gateway_windows.py @@ -190,7 +190,11 @@ def _arrange_startup_fallback(monkeypatch, tmp_path, running_pids): def test_gateway_cmd_script_uses_pythonw_without_replace_or_start_churn(monkeypatch): """Scheduled Task wrapper should launch pythonw once and avoid replace loops.""" - monkeypatch.setattr(gateway_windows, "_derive_venv_pythonw", lambda exe: exe.replace("python.exe", "pythonw.exe")) + monkeypatch.setattr( + gateway_windows, + "_resolve_detached_python", + lambda exe: (exe.replace("python.exe", "pythonw.exe"), r"C:\\Hermes\\hermes-agent\\venv", []), + ) content = gateway_windows._build_gateway_cmd_script( r"C:\\Hermes\\hermes-agent\\venv\\Scripts\\python.exe", @@ -206,6 +210,41 @@ def test_gateway_cmd_script_uses_pythonw_without_replace_or_start_churn(monkeypa assert "exit /b 0" in content +def test_gateway_cmd_script_uses_uv_safe_base_pythonw(monkeypatch, tmp_path): + """Scheduled Task wrapper should share the detached uv-venv workaround.""" + project = tmp_path / "project" + scripts = project / "venv" / "Scripts" + site_packages = project / "venv" / "Lib" / "site-packages" + hermes_home = tmp_path / "hermes-home" + base = tmp_path / "uv" / "python" / "cpython-3.11-windows-x86_64-none" + scripts.mkdir(parents=True) + site_packages.mkdir(parents=True) + hermes_home.mkdir() + base.mkdir(parents=True) + + venv_python = scripts / "python.exe" + venv_pythonw = scripts / "pythonw.exe" + base_pythonw = base / "pythonw.exe" + for exe in (venv_python, venv_pythonw, base_pythonw): + exe.write_text("", encoding="utf-8") + (project / "venv" / "pyvenv.cfg").write_text( + f"home = {base}\nimplementation = CPython\nuv = 0.11.14\nversion_info = 3.11.15\n", + encoding="utf-8", + ) + + content = gateway_windows._build_gateway_cmd_script( + str(venv_python), + str(hermes_home), + str(hermes_home), + "", + ) + + assert str(base_pythonw) in content + assert f'set "VIRTUAL_ENV={project / "venv"}"' in content + assert str(site_packages) in content + assert str(venv_pythonw) not in content + + def test_elevated_gateway_command_uses_pythonw_hidden_console(monkeypatch): """UAC handoff should not leave a second elevated cmd.exe window open.""" calls = [] @@ -239,14 +278,18 @@ def test_install_scheduled_task_recreates_instead_of_change(monkeypatch, tmp_pat """Install must delete+create so stale minute-repeat task settings are not preserved.""" calls = [] script_path = tmp_path / "Hermes_Gateway_alice.cmd" + xml_seen = {} monkeypatch.setattr(gateway_windows, "_assert_windows", lambda: None) + monkeypatch.setattr(gateway_windows, "_resolve_task_user", lambda: r"DOMAIN\\alice") def fake_schtasks(args): calls.append(tuple(args)) if args[0] == "/Delete": return (0, "SUCCESS", "") if args[0] == "/Create": + xml_path = Path(args[args.index("/XML") + 1]) + xml_seen["text"] = xml_path.read_text(encoding="utf-16") return (0, "SUCCESS", "") raise AssertionError(f"unexpected schtasks args: {args}") @@ -257,8 +300,88 @@ def test_install_scheduled_task_recreates_instead_of_change(monkeypatch, tmp_pat assert "/Change" not in [arg for call in calls for arg in call] assert calls[0][:4] == ("/Delete", "/F", "/TN", "Hermes_Gateway_alice") assert calls[1][0] == "/Create" - assert "/SC" in calls[1] - assert "ONLOGON" in calls[1] + assert "/XML" in calls[1] + assert "/SC" not in calls[1] + assert "PT30S" in xml_seen["text"] + assert "true" in xml_seen["text"] + assert "false" in xml_seen["text"] + assert "false" in xml_seen["text"] + assert "false" in xml_seen["text"] + assert "PT0S" in xml_seen["text"] + assert "" in xml_seen["text"] + assert "999" in xml_seen["text"] + # Scheduled Task launches the console-less .vbs via wscript.exe, never cmd.exe + # (issue #45599 fix A: no console -> no logon CTRL_CLOSE_EVENT / 0xC000013A). + assert "wscript.exe" in xml_seen["text"] + assert "//B //Nologo" in xml_seen["text"] + assert "Hermes_Gateway_alice.vbs" in xml_seen["text"] + assert "cmd.exe" not in xml_seen["text"] + + +def test_gateway_vbs_script_is_console_less(monkeypatch): + """The .vbs launcher must avoid cmd.exe entirely and Run pythonw hidden + (issue #45599 fix A: no console -> no logon CTRL_CLOSE_EVENT / 0xC000013A).""" + monkeypatch.setattr( + gateway_windows, + "_resolve_detached_python", + lambda exe: (r"C:\venv\Scripts\pythonw.exe", Path(r"C:\venv"), []), + ) + content = gateway_windows._build_gateway_vbs_script( + r"C:\venv\Scripts\python.exe", + r"C:\Hermes", + r"C:\Hermes", + "--profile work", + ) + assert "cmd.exe" not in content.lower() + assert 'CreateObject("WScript.Shell")' in content + assert "pythonw.exe" in content + assert "hermes_cli.main" in content + assert "gateway run" in content + assert ", 0, False" in content # hidden window, detached/async + for var in ("HERMES_HOME", "PYTHONIOENCODING", "HERMES_GATEWAY_DETACHED", "VIRTUAL_ENV", "PYTHONPATH"): + assert var in content + assert "--profile" in content and "work" in content + assert content.endswith("\r\n") + + +def test_gateway_vbs_script_quotes_spaced_paths(monkeypatch): + """Spaced exe/dir paths stay correctly quoted through the VBScript literal.""" + monkeypatch.setattr( + gateway_windows, + "_resolve_detached_python", + lambda exe: (r"C:\Program Files\Py\pythonw.exe", Path(r"C:\v env"), []), + ) + content = gateway_windows._build_gateway_vbs_script( + r"C:\Program Files\Py\python.exe", + r"C:\work dir", + r"C:\h home", + "", + ) + # list2cmdline quotes the spaced exe; _quote_vbs_string doubles those quotes. + assert '""C:\\Program Files\\Py\\pythonw.exe""' in content + assert 'sh.CurrentDirectory = "C:\\work dir"' in content + + +def test_gateway_vbs_script_pythonpath_chains_runtime_value(monkeypatch): + """PYTHONPATH chains onto the task env's existing value, like ;%PYTHONPATH%.""" + monkeypatch.setattr( + gateway_windows, + "_resolve_detached_python", + lambda exe: (r"C:\v\pythonw.exe", Path(r"C:\v"), [r"C:\v\Lib\site-packages"]), + ) + content = gateway_windows._build_gateway_vbs_script( + r"C:\v\python.exe", r"C:\w", r"C:\h", "", + ) + assert 'existing_pp = env.Item("PYTHONPATH")' in content + assert "If Len(existing_pp) > 0 Then" in content + assert r"C:\v\Lib\site-packages" in content + + +def test_quote_vbs_string_doubles_quotes_and_rejects_newlines(): + assert gateway_windows._quote_vbs_string("plain") == '"plain"' + assert gateway_windows._quote_vbs_string('a"b') == '"a""b"' + with pytest.raises(ValueError): + gateway_windows._quote_vbs_string("line1\nline2") def test_install_scheduled_task_success_start_now_uses_direct_spawn_not_task_run(monkeypatch, tmp_path, capsys):