mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-12 03:42:08 +00:00
fix(windows): browser tool + spurious SIGINT from subprocess spawning
Three related Windows-only fixes that together make the browser toolset
actually usable on Windows. Symptom chain: user invokes browser_navigate
-> tool returns {"success": false, "error": "Daemon process exited
during startup with no error output"} and the CLI exits mid-turn with
the session summary.
Root cause (3 layers):
1. tools/browser_tool.py::_find_agent_browser() resolved
node_modules/.bin/agent-browser to the extensionless POSIX shell
shim via Path.exists(). On Windows, CreateProcessW cannot execute
that script (WinError 193 "not a valid Win32 application"). Fix:
delegate to shutil.which with path=node_modules/.bin so PATHEXT
picks up agent-browser.CMD on Windows and the extensionless shim
stays correct on POSIX.
2. Windows Terminal / Win32 delivers a spurious CTRL_C_EVENT to the
parent hermes.exe whenever a background thread spawns a .cmd
subprocess. Python 3.11's default SIGINT handler raises
KeyboardInterrupt in MainThread, which unwinds prompt_toolkit's
app.run() -> cli.py::run()'s finally block calls _run_cleanup()
-> _emergency_cleanup_all_sessions -> spawns a concurrent
_run_browser_command("close", ...) on the same session the agent
thread just opened. Two agent-browser processes race on the same
--session name, the daemon startup loses, and the tool returns
the "Daemon process exited during startup" error. Fix: install a
Windows-only SIGINT handler that absorbs the signal silently.
Real user Ctrl+C still routes through prompt_toolkit's own c-c
keybinding at the TUI layer, which is how Claude Code handles the
same quirk (driving cancellation via the TUI key handler, not
signals).
3. In tools/browser_tool.py, both Popen sites now pass
creationflags=CREATE_NO_WINDOW | STARTF_USESTDHANDLES with
close_fds=True on Windows. CREATE_NO_WINDOW suppresses the .cmd
console flash; STARTF_USESTDHANDLES + close_fds ensures the child
inherits only our three chosen handles (DEVNULL stdin, temp-file
stdout/stderr) and no leaked parent console handles that could
confuse agent-browser's native daemon spawn. Notably we do NOT
add CREATE_NEW_PROCESS_GROUP - on Python 3.11 Windows the flag
interacts badly with asyncio's ProactorEventLoop and makes things
worse.
Verified end-to-end on Windows 10 / Windows Terminal / PowerShell:
browser_navigate to https://example.com returns
{"success": true, "title": "Example Domain"} and the CLI stays alive
for follow-up tool calls and assistant turns.
Refs: earlier Windows quirks commits 1cebb3bad (Ctrl+Enter newline),
26f5af52a (environment hints), aefd1a37f (Playwright Chromium).
This commit is contained in:
parent
c34884ea20
commit
acc0a81624
2 changed files with 100 additions and 6 deletions
31
cli.py
31
cli.py
|
|
@ -678,6 +678,7 @@ def _run_cleanup():
|
||||||
if _cleanup_done:
|
if _cleanup_done:
|
||||||
return
|
return
|
||||||
_cleanup_done = True
|
_cleanup_done = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_cleanup_all_terminals()
|
_cleanup_all_terminals()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
@ -12256,6 +12257,36 @@ class HermesCLI:
|
||||||
_signal.signal(_signal.SIGTERM, _signal_handler)
|
_signal.signal(_signal.SIGTERM, _signal_handler)
|
||||||
if hasattr(_signal, 'SIGHUP'):
|
if hasattr(_signal, 'SIGHUP'):
|
||||||
_signal.signal(_signal.SIGHUP, _signal_handler)
|
_signal.signal(_signal.SIGHUP, _signal_handler)
|
||||||
|
|
||||||
|
# Windows: install a SIGINT handler that absorbs the signal
|
||||||
|
# instead of letting Python's default handler raise
|
||||||
|
# KeyboardInterrupt in MainThread. Windows Terminal / Win32
|
||||||
|
# delivers spurious CTRL_C_EVENT to the hermes process when
|
||||||
|
# child processes are spawned from background threads (agent
|
||||||
|
# subprocess Popen path). The default Python SIGINT handler
|
||||||
|
# would then unwind prompt_toolkit's app.run(), trigger
|
||||||
|
# _run_cleanup mid-turn, and close browser sessions mid-open
|
||||||
|
# — causing "Daemon process exited during startup" errors.
|
||||||
|
#
|
||||||
|
# The handler is a silent no-op. Real user Ctrl+C still works
|
||||||
|
# because prompt_toolkit binds c-c at the TUI layer and never
|
||||||
|
# reaches this OS-signal path. This matches how Claude Code
|
||||||
|
# handles the same Windows quirk (cancellation is driven by
|
||||||
|
# the TUI key handler, not by OS signals).
|
||||||
|
#
|
||||||
|
# POSIX: leave the default SIGINT handler alone. prompt_toolkit
|
||||||
|
# installs its own handler there and it works as expected.
|
||||||
|
if sys.platform == "win32":
|
||||||
|
def _sigint_absorb(signum, frame):
|
||||||
|
# Absorb silently. Do NOT call agent.interrupt() here:
|
||||||
|
# Windows fires spurious CTRL_C_EVENT whenever a
|
||||||
|
# background thread spawns a .cmd subprocess, and
|
||||||
|
# interrupt() would inject a fake user message each
|
||||||
|
# time. Real user Ctrl+C routes through prompt_toolkit's
|
||||||
|
# own c-c key binding at the TUI layer (same pattern as
|
||||||
|
# Claude Code's Windows handling).
|
||||||
|
return
|
||||||
|
_signal.signal(_signal.SIGINT, _sigint_absorb)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # Signal handlers may fail in restricted environments
|
pass # Signal handlers may fail in restricted environments
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -737,9 +737,45 @@ def _run_chrome_fallback_command(
|
||||||
stdout_fd = os.open(stdout_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
stdout_fd = os.open(stdout_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
stderr_fd = os.open(stderr_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
stderr_fd = os.open(stderr_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
try:
|
try:
|
||||||
|
# On Windows, launch the child in a new process group so parent
|
||||||
|
# console Ctrl+C doesn't kill it with STATUS_CONTROL_C_EXIT
|
||||||
|
# (0xC000013A = rc 3221225786), AND insulate its stdio + handle
|
||||||
|
# inheritance from the parent.
|
||||||
|
#
|
||||||
|
# Additional Windows hardening beyond CREATE_NEW_PROCESS_GROUP:
|
||||||
|
# * STARTF_USESTDHANDLES + explicit handles → CreateProcess hands
|
||||||
|
# the child ONLY our three chosen handles (DEVNULL stdin +
|
||||||
|
# temp-file stdout/stderr). Without this, some parents leak
|
||||||
|
# console handles that break downstream grandchild spawns — the
|
||||||
|
# agent-browser Rust binary spawns a detached daemon grandchild,
|
||||||
|
# and that grandchild's CreateProcess dies silently
|
||||||
|
# ("Daemon process exited during startup with no error output")
|
||||||
|
# when inherited parent handles are in a weird state. Observed
|
||||||
|
# in the Hermes CLI where sys.stdout and sys.stderr both report
|
||||||
|
# fileno=1 (stderr dup'd onto stdout at the OS level).
|
||||||
|
# * close_fds=True → block inheritance of every other handle.
|
||||||
|
# (Default on POSIX; must be explicit on Windows for stdio.)
|
||||||
|
_popen_extra: dict = {}
|
||||||
|
if os.name == "nt":
|
||||||
|
# CREATE_NO_WINDOW → don't attach a console (cmd.exe would
|
||||||
|
# otherwise briefly allocate one for the .cmd shim).
|
||||||
|
# Do NOT add CREATE_NEW_PROCESS_GROUP: on Python 3.11 Windows
|
||||||
|
# it interacts with asyncio's ProactorEventLoop such that the
|
||||||
|
# subprocess creation cancels the running loop task, which
|
||||||
|
# surfaces as KeyboardInterrupt in app.run() and tears down
|
||||||
|
# the CLI mid-turn. The agent thread's subprocess spawn
|
||||||
|
# unwound MainThread's prompt_toolkit loop that way — see
|
||||||
|
# diag log: "asyncio.CancelledError → KeyboardInterrupt".
|
||||||
|
_CREATE_NO_WINDOW = 0x08000000
|
||||||
|
_popen_extra["creationflags"] = _CREATE_NO_WINDOW
|
||||||
|
_popen_extra["close_fds"] = True
|
||||||
|
_si = subprocess.STARTUPINFO()
|
||||||
|
_si.dwFlags |= subprocess.STARTF_USESTDHANDLES
|
||||||
|
_popen_extra["startupinfo"] = _si
|
||||||
proc = subprocess.Popen(
|
proc = subprocess.Popen(
|
||||||
full, stdout=stdout_fd, stderr=stderr_fd,
|
full, stdout=stdout_fd, stderr=stderr_fd,
|
||||||
stdin=subprocess.DEVNULL, env=browser_env,
|
stdin=subprocess.DEVNULL, env=browser_env,
|
||||||
|
**_popen_extra,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
os.close(stdout_fd)
|
os.close(stdout_fd)
|
||||||
|
|
@ -1637,13 +1673,22 @@ def _find_agent_browser() -> str:
|
||||||
_agent_browser_resolved = True
|
_agent_browser_resolved = True
|
||||||
return which_result
|
return which_result
|
||||||
|
|
||||||
# Check local node_modules/.bin/ (npm install in repo root)
|
# Check local node_modules/.bin/ (npm install in repo root).
|
||||||
|
# On Windows, npm drops three shims in .bin: an extensionless POSIX shell
|
||||||
|
# script (for Git Bash / WSL), `agent-browser.cmd` (for cmd/PowerShell),
|
||||||
|
# and `agent-browser.ps1` (for PowerShell). CreateProcess (used by Python's
|
||||||
|
# subprocess on Windows) cannot execute the extensionless shim — it raises
|
||||||
|
# WinError 193 "%1 is not a valid Win32 application". We must resolve to the
|
||||||
|
# `.cmd` shim instead. `shutil.which` consults PATHEXT, so we delegate to it
|
||||||
|
# with an explicit path so POSIX hosts still pick the extensionless shim.
|
||||||
repo_root = Path(__file__).parent.parent
|
repo_root = Path(__file__).parent.parent
|
||||||
local_bin = repo_root / "node_modules" / ".bin" / "agent-browser"
|
local_bin_dir = repo_root / "node_modules" / ".bin"
|
||||||
if local_bin.exists():
|
if local_bin_dir.is_dir():
|
||||||
_cached_agent_browser = str(local_bin)
|
local_which = shutil.which("agent-browser", path=str(local_bin_dir))
|
||||||
_agent_browser_resolved = True
|
if local_which:
|
||||||
return _cached_agent_browser
|
_cached_agent_browser = local_which
|
||||||
|
_agent_browser_resolved = True
|
||||||
|
return _cached_agent_browser
|
||||||
|
|
||||||
# Check common npx locations (also search the extended fallback PATH)
|
# Check common npx locations (also search the extended fallback PATH)
|
||||||
npx_path = shutil.which("npx")
|
npx_path = shutil.which("npx")
|
||||||
|
|
@ -1858,12 +1903,30 @@ def _run_browser_command(
|
||||||
stdout_fd = os.open(stdout_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
stdout_fd = os.open(stdout_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
stderr_fd = os.open(stderr_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
stderr_fd = os.open(stderr_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
try:
|
try:
|
||||||
|
# See matching comment at the other Popen site above — on
|
||||||
|
# Windows we put agent-browser in its own process group, force
|
||||||
|
# STARTF_USESTDHANDLES so CreateProcess hands the child ONLY our
|
||||||
|
# three explicit handles (no leaked parent-console handles to
|
||||||
|
# confuse the Rust binary's daemon-spawn), and close_fds=True to
|
||||||
|
# block inheritance of everything else.
|
||||||
|
_popen_extra: dict = {}
|
||||||
|
if os.name == "nt":
|
||||||
|
# See matching block at the other Popen site — CREATE_NO_WINDOW
|
||||||
|
# only, NO CREATE_NEW_PROCESS_GROUP (cancels asyncio loop task
|
||||||
|
# on Python 3.11 Windows → KeyboardInterrupt in CLI MainThread).
|
||||||
|
_CREATE_NO_WINDOW = 0x08000000
|
||||||
|
_popen_extra["creationflags"] = _CREATE_NO_WINDOW
|
||||||
|
_popen_extra["close_fds"] = True
|
||||||
|
_si = subprocess.STARTUPINFO()
|
||||||
|
_si.dwFlags |= subprocess.STARTF_USESTDHANDLES
|
||||||
|
_popen_extra["startupinfo"] = _si
|
||||||
proc = subprocess.Popen(
|
proc = subprocess.Popen(
|
||||||
cmd_parts,
|
cmd_parts,
|
||||||
stdout=stdout_fd,
|
stdout=stdout_fd,
|
||||||
stderr=stderr_fd,
|
stderr=stderr_fd,
|
||||||
stdin=subprocess.DEVNULL,
|
stdin=subprocess.DEVNULL,
|
||||||
env=browser_env,
|
env=browser_env,
|
||||||
|
**_popen_extra,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
os.close(stdout_fd)
|
os.close(stdout_fd)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue