From acc0a81624686f90300828cf71cf00a9cec0b645 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 8 May 2026 11:06:14 -0700 Subject: [PATCH] fix(windows): browser tool + spurious SIGINT from subprocess spawning Three related Windows-only fixes that together make the browser toolset actually usable on Windows. Symptom chain: user invokes browser_navigate -> tool returns {"success": false, "error": "Daemon process exited during startup with no error output"} and the CLI exits mid-turn with the session summary. Root cause (3 layers): 1. tools/browser_tool.py::_find_agent_browser() resolved node_modules/.bin/agent-browser to the extensionless POSIX shell shim via Path.exists(). On Windows, CreateProcessW cannot execute that script (WinError 193 "not a valid Win32 application"). Fix: delegate to shutil.which with path=node_modules/.bin so PATHEXT picks up agent-browser.CMD on Windows and the extensionless shim stays correct on POSIX. 2. Windows Terminal / Win32 delivers a spurious CTRL_C_EVENT to the parent hermes.exe whenever a background thread spawns a .cmd subprocess. Python 3.11's default SIGINT handler raises KeyboardInterrupt in MainThread, which unwinds prompt_toolkit's app.run() -> cli.py::run()'s finally block calls _run_cleanup() -> _emergency_cleanup_all_sessions -> spawns a concurrent _run_browser_command("close", ...) on the same session the agent thread just opened. Two agent-browser processes race on the same --session name, the daemon startup loses, and the tool returns the "Daemon process exited during startup" error. Fix: install a Windows-only SIGINT handler that absorbs the signal silently. Real user Ctrl+C still routes through prompt_toolkit's own c-c keybinding at the TUI layer, which is how Claude Code handles the same quirk (driving cancellation via the TUI key handler, not signals). 3. In tools/browser_tool.py, both Popen sites now pass creationflags=CREATE_NO_WINDOW | STARTF_USESTDHANDLES with close_fds=True on Windows. CREATE_NO_WINDOW suppresses the .cmd console flash; STARTF_USESTDHANDLES + close_fds ensures the child inherits only our three chosen handles (DEVNULL stdin, temp-file stdout/stderr) and no leaked parent console handles that could confuse agent-browser's native daemon spawn. Notably we do NOT add CREATE_NEW_PROCESS_GROUP - on Python 3.11 Windows the flag interacts badly with asyncio's ProactorEventLoop and makes things worse. Verified end-to-end on Windows 10 / Windows Terminal / PowerShell: browser_navigate to https://example.com returns {"success": true, "title": "Example Domain"} and the CLI stays alive for follow-up tool calls and assistant turns. Refs: earlier Windows quirks commits 1cebb3bad (Ctrl+Enter newline), 26f5af52a (environment hints), aefd1a37f (Playwright Chromium). --- cli.py | 31 ++++++++++++++++++ tools/browser_tool.py | 75 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 100 insertions(+), 6 deletions(-) diff --git a/cli.py b/cli.py index cf5abcba14..f8dae5ac55 100644 --- a/cli.py +++ b/cli.py @@ -678,6 +678,7 @@ def _run_cleanup(): if _cleanup_done: return _cleanup_done = True + try: _cleanup_all_terminals() except Exception: @@ -12256,6 +12257,36 @@ class HermesCLI: _signal.signal(_signal.SIGTERM, _signal_handler) if hasattr(_signal, 'SIGHUP'): _signal.signal(_signal.SIGHUP, _signal_handler) + + # Windows: install a SIGINT handler that absorbs the signal + # instead of letting Python's default handler raise + # KeyboardInterrupt in MainThread. Windows Terminal / Win32 + # delivers spurious CTRL_C_EVENT to the hermes process when + # child processes are spawned from background threads (agent + # subprocess Popen path). The default Python SIGINT handler + # would then unwind prompt_toolkit's app.run(), trigger + # _run_cleanup mid-turn, and close browser sessions mid-open + # — causing "Daemon process exited during startup" errors. + # + # The handler is a silent no-op. Real user Ctrl+C still works + # because prompt_toolkit binds c-c at the TUI layer and never + # reaches this OS-signal path. This matches how Claude Code + # handles the same Windows quirk (cancellation is driven by + # the TUI key handler, not by OS signals). + # + # POSIX: leave the default SIGINT handler alone. prompt_toolkit + # installs its own handler there and it works as expected. + if sys.platform == "win32": + def _sigint_absorb(signum, frame): + # Absorb silently. Do NOT call agent.interrupt() here: + # Windows fires spurious CTRL_C_EVENT whenever a + # background thread spawns a .cmd subprocess, and + # interrupt() would inject a fake user message each + # time. Real user Ctrl+C routes through prompt_toolkit's + # own c-c key binding at the TUI layer (same pattern as + # Claude Code's Windows handling). + return + _signal.signal(_signal.SIGINT, _sigint_absorb) except Exception: pass # Signal handlers may fail in restricted environments diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 214bd25b7f..1d6a732134 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -737,9 +737,45 @@ def _run_chrome_fallback_command( stdout_fd = os.open(stdout_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) stderr_fd = os.open(stderr_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) try: + # On Windows, launch the child in a new process group so parent + # console Ctrl+C doesn't kill it with STATUS_CONTROL_C_EXIT + # (0xC000013A = rc 3221225786), AND insulate its stdio + handle + # inheritance from the parent. + # + # Additional Windows hardening beyond CREATE_NEW_PROCESS_GROUP: + # * STARTF_USESTDHANDLES + explicit handles → CreateProcess hands + # the child ONLY our three chosen handles (DEVNULL stdin + + # temp-file stdout/stderr). Without this, some parents leak + # console handles that break downstream grandchild spawns — the + # agent-browser Rust binary spawns a detached daemon grandchild, + # and that grandchild's CreateProcess dies silently + # ("Daemon process exited during startup with no error output") + # when inherited parent handles are in a weird state. Observed + # in the Hermes CLI where sys.stdout and sys.stderr both report + # fileno=1 (stderr dup'd onto stdout at the OS level). + # * close_fds=True → block inheritance of every other handle. + # (Default on POSIX; must be explicit on Windows for stdio.) + _popen_extra: dict = {} + if os.name == "nt": + # CREATE_NO_WINDOW → don't attach a console (cmd.exe would + # otherwise briefly allocate one for the .cmd shim). + # Do NOT add CREATE_NEW_PROCESS_GROUP: on Python 3.11 Windows + # it interacts with asyncio's ProactorEventLoop such that the + # subprocess creation cancels the running loop task, which + # surfaces as KeyboardInterrupt in app.run() and tears down + # the CLI mid-turn. The agent thread's subprocess spawn + # unwound MainThread's prompt_toolkit loop that way — see + # diag log: "asyncio.CancelledError → KeyboardInterrupt". + _CREATE_NO_WINDOW = 0x08000000 + _popen_extra["creationflags"] = _CREATE_NO_WINDOW + _popen_extra["close_fds"] = True + _si = subprocess.STARTUPINFO() + _si.dwFlags |= subprocess.STARTF_USESTDHANDLES + _popen_extra["startupinfo"] = _si proc = subprocess.Popen( full, stdout=stdout_fd, stderr=stderr_fd, stdin=subprocess.DEVNULL, env=browser_env, + **_popen_extra, ) finally: os.close(stdout_fd) @@ -1637,13 +1673,22 @@ def _find_agent_browser() -> str: _agent_browser_resolved = True return which_result - # Check local node_modules/.bin/ (npm install in repo root) + # Check local node_modules/.bin/ (npm install in repo root). + # On Windows, npm drops three shims in .bin: an extensionless POSIX shell + # script (for Git Bash / WSL), `agent-browser.cmd` (for cmd/PowerShell), + # and `agent-browser.ps1` (for PowerShell). CreateProcess (used by Python's + # subprocess on Windows) cannot execute the extensionless shim — it raises + # WinError 193 "%1 is not a valid Win32 application". We must resolve to the + # `.cmd` shim instead. `shutil.which` consults PATHEXT, so we delegate to it + # with an explicit path so POSIX hosts still pick the extensionless shim. repo_root = Path(__file__).parent.parent - local_bin = repo_root / "node_modules" / ".bin" / "agent-browser" - if local_bin.exists(): - _cached_agent_browser = str(local_bin) - _agent_browser_resolved = True - return _cached_agent_browser + local_bin_dir = repo_root / "node_modules" / ".bin" + if local_bin_dir.is_dir(): + local_which = shutil.which("agent-browser", path=str(local_bin_dir)) + if local_which: + _cached_agent_browser = local_which + _agent_browser_resolved = True + return _cached_agent_browser # Check common npx locations (also search the extended fallback PATH) npx_path = shutil.which("npx") @@ -1858,12 +1903,30 @@ def _run_browser_command( stdout_fd = os.open(stdout_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) stderr_fd = os.open(stderr_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) try: + # See matching comment at the other Popen site above — on + # Windows we put agent-browser in its own process group, force + # STARTF_USESTDHANDLES so CreateProcess hands the child ONLY our + # three explicit handles (no leaked parent-console handles to + # confuse the Rust binary's daemon-spawn), and close_fds=True to + # block inheritance of everything else. + _popen_extra: dict = {} + if os.name == "nt": + # See matching block at the other Popen site — CREATE_NO_WINDOW + # only, NO CREATE_NEW_PROCESS_GROUP (cancels asyncio loop task + # on Python 3.11 Windows → KeyboardInterrupt in CLI MainThread). + _CREATE_NO_WINDOW = 0x08000000 + _popen_extra["creationflags"] = _CREATE_NO_WINDOW + _popen_extra["close_fds"] = True + _si = subprocess.STARTUPINFO() + _si.dwFlags |= subprocess.STARTF_USESTDHANDLES + _popen_extra["startupinfo"] = _si proc = subprocess.Popen( cmd_parts, stdout=stdout_fd, stderr=stderr_fd, stdin=subprocess.DEVNULL, env=browser_env, + **_popen_extra, ) finally: os.close(stdout_fd)