mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(process_registry): use taskkill /T /F for tree-kill on Windows
The Windows branch of `_terminate_host_pid` early-returned after
`os.kill(pid, SIGTERM)` (which Python maps to `TerminateProcess` for
the target handle only), leaving descendant processes — e.g. Chromium
renderer/GPU/network helpers spawned by an `agent-browser` daemon —
running on Windows even after the preceding commit fixed POSIX.
The right Windows primitive is `taskkill /PID <pid> /T /F`:
`/T` walks the tree, `/F` force-terminates. Same approach
`gateway.status.terminate_pid(force=True)` already uses for the
gateway's own shutdown path; reuse the same shape here.
Why NOT extend the POSIX psutil tree-walk to Windows:
1. Windows doesn't maintain a Unix-style process tree. `psutil.
Process.children(recursive=True)` walks PPID links that go stale
when intermediate processes exit, so enumeration is best-effort
and silently misses orphaned descendants. The whole bug we're
fixing is orphaned descendants.
2. `psutil.Process.terminate()` on Windows is `TerminateProcess()`
for one handle — same single-PID scope as the existing
`os.kill`. The existing comment in `gateway/status.py::
terminate_pid` warns this explicitly: 'os.kill SIGTERM is not
equivalent to a tree-killing hard stop' on Windows.
3. Headless Chromium has no GUI window, so the softer
`taskkill /T` without `/F` (which sends WM_CLOSE) won't reach
it either. `/F` is required.
POSIX path is unchanged. The taskkill subprocess uses the same
`creationflags=windows_hide_flags()` pattern other Windows shellouts
in this codebase use. `FileNotFoundError` / `TimeoutExpired` /
`OSError` fall back to bare `os.kill(SIGTERM)` as cheap insurance.
Tests cover the Windows branch via the codebase's standard
`monkeypatch _IS_WINDOWS` pattern (`references/windows-native-
support.md`), plus POSIX tree-walk order, NoSuchProcess swallow,
and the OSError fallback path. 7 new tests, all green on Linux CI.
This commit is contained in:
parent
22f3f5a75a
commit
7ce6b504a2
2 changed files with 203 additions and 2 deletions
|
|
@ -1007,3 +1007,163 @@ def test_drain_notifications_empty_queue():
|
|||
|
||||
results = process_registry.drain_notifications()
|
||||
assert results == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _terminate_host_pid — cross-platform process-tree termination
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTerminateHostPidWindows:
|
||||
"""Windows branch uses ``taskkill /T /F`` — the documented MS tree-kill
|
||||
primitive. We can't use psutil's ``children(recursive=True)`` /
|
||||
``.terminate()`` path on Windows because (1) Windows doesn't maintain
|
||||
a Unix-style process tree so the walk is unreliable, and (2)
|
||||
``Process.terminate()`` on Windows is ``TerminateProcess()`` for the
|
||||
target handle only, not the tree.
|
||||
"""
|
||||
|
||||
def test_windows_invokes_taskkill_with_tree_and_force_flags(self, monkeypatch):
|
||||
"""The Windows branch must shell out to ``taskkill /PID N /T /F``."""
|
||||
from tools import process_registry as pr
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_run(args, **kwargs):
|
||||
captured["args"] = args
|
||||
captured["kwargs"] = kwargs
|
||||
return MagicMock(returncode=0, stderr="", stdout="")
|
||||
|
||||
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
|
||||
monkeypatch.setattr(pr.subprocess, "run", fake_run)
|
||||
|
||||
pr.ProcessRegistry._terminate_host_pid(12345)
|
||||
|
||||
assert captured["args"][0] == "taskkill"
|
||||
assert "/PID" in captured["args"]
|
||||
assert "12345" in captured["args"]
|
||||
assert "/T" in captured["args"], "Tree flag required to reach descendants"
|
||||
assert "/F" in captured["args"], "Force flag required for headless Chromium"
|
||||
|
||||
def test_windows_falls_back_to_os_kill_when_taskkill_missing(self, monkeypatch):
|
||||
"""If ``taskkill.exe`` is somehow unavailable, fall back to a bare
|
||||
``os.kill(pid, SIGTERM)`` so we at least try to kill the parent."""
|
||||
from tools import process_registry as pr
|
||||
|
||||
kill_calls = []
|
||||
|
||||
def fake_run(*args, **kwargs):
|
||||
raise FileNotFoundError("taskkill not found")
|
||||
|
||||
def fake_kill(pid, sig):
|
||||
kill_calls.append((pid, sig))
|
||||
|
||||
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
|
||||
monkeypatch.setattr(pr.subprocess, "run", fake_run)
|
||||
monkeypatch.setattr(pr.os, "kill", fake_kill)
|
||||
|
||||
pr.ProcessRegistry._terminate_host_pid(12345)
|
||||
|
||||
assert kill_calls == [(12345, signal.SIGTERM)]
|
||||
|
||||
def test_windows_does_not_call_psutil(self, monkeypatch):
|
||||
"""The Windows branch must NOT exercise the psutil tree-walk
|
||||
(it's unreliable on Windows — see the function docstring)."""
|
||||
from tools import process_registry as pr
|
||||
import psutil
|
||||
|
||||
psutil_calls = []
|
||||
|
||||
class _BoomProcess:
|
||||
def __init__(self, pid):
|
||||
psutil_calls.append(("Process", pid))
|
||||
|
||||
def children(self, recursive=False):
|
||||
psutil_calls.append(("children", recursive))
|
||||
return []
|
||||
|
||||
def terminate(self):
|
||||
psutil_calls.append(("terminate",))
|
||||
|
||||
def fake_run(args, **kwargs):
|
||||
return MagicMock(returncode=0, stderr="", stdout="")
|
||||
|
||||
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
|
||||
monkeypatch.setattr(pr.subprocess, "run", fake_run)
|
||||
monkeypatch.setattr(psutil, "Process", _BoomProcess)
|
||||
|
||||
pr.ProcessRegistry._terminate_host_pid(12345)
|
||||
|
||||
assert psutil_calls == [], (
|
||||
f"Windows branch must not touch psutil, but saw {psutil_calls!r}"
|
||||
)
|
||||
|
||||
|
||||
class TestTerminateHostPidPosix:
|
||||
"""POSIX branch walks the tree via psutil and SIGTERMs children first."""
|
||||
|
||||
def test_posix_walks_tree_and_terminates_children_then_parent(self, monkeypatch):
|
||||
from tools import process_registry as pr
|
||||
import psutil
|
||||
|
||||
terminate_order = []
|
||||
|
||||
class _FakeChild:
|
||||
def __init__(self, pid):
|
||||
self.pid = pid
|
||||
|
||||
def terminate(self):
|
||||
terminate_order.append(self.pid)
|
||||
|
||||
class _FakeParent:
|
||||
def __init__(self, pid):
|
||||
self.pid = pid
|
||||
|
||||
def children(self, recursive=False):
|
||||
assert recursive is True
|
||||
return [_FakeChild(101), _FakeChild(102), _FakeChild(103)]
|
||||
|
||||
def terminate(self):
|
||||
terminate_order.append(self.pid)
|
||||
|
||||
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
|
||||
monkeypatch.setattr(psutil, "Process", _FakeParent)
|
||||
|
||||
pr.ProcessRegistry._terminate_host_pid(12345)
|
||||
|
||||
assert terminate_order == [101, 102, 103, 12345], (
|
||||
"Children must be terminated before the parent"
|
||||
)
|
||||
|
||||
def test_posix_no_such_process_swallowed(self, monkeypatch):
|
||||
from tools import process_registry as pr
|
||||
import psutil
|
||||
|
||||
def boom(pid):
|
||||
raise psutil.NoSuchProcess(pid)
|
||||
|
||||
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
|
||||
monkeypatch.setattr(psutil, "Process", boom)
|
||||
|
||||
# Must not raise.
|
||||
pr.ProcessRegistry._terminate_host_pid(999999999)
|
||||
|
||||
def test_posix_oserror_falls_back_to_os_kill(self, monkeypatch):
|
||||
from tools import process_registry as pr
|
||||
import psutil
|
||||
|
||||
def boom(pid):
|
||||
raise PermissionError("can't read /proc")
|
||||
|
||||
kill_calls = []
|
||||
|
||||
def fake_kill(pid, sig):
|
||||
kill_calls.append((pid, sig))
|
||||
|
||||
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
|
||||
monkeypatch.setattr(psutil, "Process", boom)
|
||||
monkeypatch.setattr(pr.os, "kill", fake_kill)
|
||||
|
||||
pr.ProcessRegistry._terminate_host_pid(12345)
|
||||
|
||||
assert kill_calls == [(12345, signal.SIGTERM)]
|
||||
|
|
|
|||
|
|
@ -434,9 +434,50 @@ class ProcessRegistry:
|
|||
|
||||
@staticmethod
|
||||
def _terminate_host_pid(pid: int) -> None:
|
||||
"""Terminate a host-visible PID without requiring the original process handle."""
|
||||
"""Terminate a host-visible PID and its descendants.
|
||||
|
||||
POSIX: walks the process tree with ``psutil`` and SIGTERMs
|
||||
children before the parent so subprocess trees (e.g. Chromium
|
||||
renderers/GPU helpers spawned by an ``agent-browser`` daemon)
|
||||
don't get reparented to init and survive cleanup.
|
||||
|
||||
Windows: shells out to ``taskkill /PID <pid> /T /F``. This is
|
||||
the documented Microsoft primitive for tree-kill and matches the
|
||||
existing convention in ``gateway.status.terminate_pid``. We can't
|
||||
reuse the POSIX psutil path on Windows because:
|
||||
|
||||
1. Windows doesn't maintain a Unix-style process tree —
|
||||
``psutil.Process.children(recursive=True)`` walks PPID
|
||||
links that go stale when intermediate processes exit, so
|
||||
enumeration is best-effort and misses orphaned descendants.
|
||||
2. ``psutil.Process.terminate()`` on Windows is
|
||||
``TerminateProcess()`` which kills only the target handle
|
||||
and is a hard kill — there is no Windows equivalent of a
|
||||
SIGTERM that cascades through a process group. (See the
|
||||
warning in ``gateway/status.py::terminate_pid``: "os.kill
|
||||
with SIGTERM is not equivalent to a tree-killing hard stop"
|
||||
on Windows.) Headless Chromium has no GUI window, so the
|
||||
softer ``taskkill /T`` without ``/F`` won't reach it either.
|
||||
|
||||
``psutil`` is a hard dependency (see ``pyproject.toml``); the
|
||||
bare-``os.kill`` fallback covers OSError / PermissionError on
|
||||
POSIX and a missing ``taskkill.exe`` on Windows (effectively
|
||||
unreachable on real Windows installs, but cheap insurance).
|
||||
"""
|
||||
if _IS_WINDOWS:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
try:
|
||||
subprocess.run(
|
||||
["taskkill", "/PID", str(pid), "/T", "/F"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
creationflags=windows_hide_flags(),
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
||||
try:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
except (OSError, ProcessLookupError, PermissionError):
|
||||
pass
|
||||
return
|
||||
|
||||
import psutil
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue