From 7ce6b504a269ac3f9aed5b406b7a18c432e2fdb5 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sat, 23 May 2026 17:55:29 -0700 Subject: [PATCH] fix(process_registry): use taskkill /T /F for tree-kill on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Windows branch of `_terminate_host_pid` early-returned after `os.kill(pid, SIGTERM)` (which Python maps to `TerminateProcess` for the target handle only), leaving descendant processes — e.g. Chromium renderer/GPU/network helpers spawned by an `agent-browser` daemon — running on Windows even after the preceding commit fixed POSIX. The right Windows primitive is `taskkill /PID /T /F`: `/T` walks the tree, `/F` force-terminates. Same approach `gateway.status.terminate_pid(force=True)` already uses for the gateway's own shutdown path; reuse the same shape here. Why NOT extend the POSIX psutil tree-walk to Windows: 1. Windows doesn't maintain a Unix-style process tree. `psutil. Process.children(recursive=True)` walks PPID links that go stale when intermediate processes exit, so enumeration is best-effort and silently misses orphaned descendants. The whole bug we're fixing is orphaned descendants. 2. `psutil.Process.terminate()` on Windows is `TerminateProcess()` for one handle — same single-PID scope as the existing `os.kill`. The existing comment in `gateway/status.py:: terminate_pid` warns this explicitly: 'os.kill SIGTERM is not equivalent to a tree-killing hard stop' on Windows. 3. Headless Chromium has no GUI window, so the softer `taskkill /T` without `/F` (which sends WM_CLOSE) won't reach it either. `/F` is required. POSIX path is unchanged. The taskkill subprocess uses the same `creationflags=windows_hide_flags()` pattern other Windows shellouts in this codebase use. `FileNotFoundError` / `TimeoutExpired` / `OSError` fall back to bare `os.kill(SIGTERM)` as cheap insurance. Tests cover the Windows branch via the codebase's standard `monkeypatch _IS_WINDOWS` pattern (`references/windows-native- support.md`), plus POSIX tree-walk order, NoSuchProcess swallow, and the OSError fallback path. 7 new tests, all green on Linux CI. --- tests/tools/test_process_registry.py | 160 +++++++++++++++++++++++++++ tools/process_registry.py | 45 +++++++- 2 files changed, 203 insertions(+), 2 deletions(-) diff --git a/tests/tools/test_process_registry.py b/tests/tools/test_process_registry.py index 3ac5bdfd1f1..10e4421e5f0 100644 --- a/tests/tools/test_process_registry.py +++ b/tests/tools/test_process_registry.py @@ -1007,3 +1007,163 @@ def test_drain_notifications_empty_queue(): results = process_registry.drain_notifications() assert results == [] + + +# --------------------------------------------------------------------------- +# _terminate_host_pid — cross-platform process-tree termination +# --------------------------------------------------------------------------- + + +class TestTerminateHostPidWindows: + """Windows branch uses ``taskkill /T /F`` — the documented MS tree-kill + primitive. We can't use psutil's ``children(recursive=True)`` / + ``.terminate()`` path on Windows because (1) Windows doesn't maintain + a Unix-style process tree so the walk is unreliable, and (2) + ``Process.terminate()`` on Windows is ``TerminateProcess()`` for the + target handle only, not the tree. + """ + + def test_windows_invokes_taskkill_with_tree_and_force_flags(self, monkeypatch): + """The Windows branch must shell out to ``taskkill /PID N /T /F``.""" + from tools import process_registry as pr + + captured = {} + + def fake_run(args, **kwargs): + captured["args"] = args + captured["kwargs"] = kwargs + return MagicMock(returncode=0, stderr="", stdout="") + + monkeypatch.setattr(pr, "_IS_WINDOWS", True) + monkeypatch.setattr(pr.subprocess, "run", fake_run) + + pr.ProcessRegistry._terminate_host_pid(12345) + + assert captured["args"][0] == "taskkill" + assert "/PID" in captured["args"] + assert "12345" in captured["args"] + assert "/T" in captured["args"], "Tree flag required to reach descendants" + assert "/F" in captured["args"], "Force flag required for headless Chromium" + + def test_windows_falls_back_to_os_kill_when_taskkill_missing(self, monkeypatch): + """If ``taskkill.exe`` is somehow unavailable, fall back to a bare + ``os.kill(pid, SIGTERM)`` so we at least try to kill the parent.""" + from tools import process_registry as pr + + kill_calls = [] + + def fake_run(*args, **kwargs): + raise FileNotFoundError("taskkill not found") + + def fake_kill(pid, sig): + kill_calls.append((pid, sig)) + + monkeypatch.setattr(pr, "_IS_WINDOWS", True) + monkeypatch.setattr(pr.subprocess, "run", fake_run) + monkeypatch.setattr(pr.os, "kill", fake_kill) + + pr.ProcessRegistry._terminate_host_pid(12345) + + assert kill_calls == [(12345, signal.SIGTERM)] + + def test_windows_does_not_call_psutil(self, monkeypatch): + """The Windows branch must NOT exercise the psutil tree-walk + (it's unreliable on Windows — see the function docstring).""" + from tools import process_registry as pr + import psutil + + psutil_calls = [] + + class _BoomProcess: + def __init__(self, pid): + psutil_calls.append(("Process", pid)) + + def children(self, recursive=False): + psutil_calls.append(("children", recursive)) + return [] + + def terminate(self): + psutil_calls.append(("terminate",)) + + def fake_run(args, **kwargs): + return MagicMock(returncode=0, stderr="", stdout="") + + monkeypatch.setattr(pr, "_IS_WINDOWS", True) + monkeypatch.setattr(pr.subprocess, "run", fake_run) + monkeypatch.setattr(psutil, "Process", _BoomProcess) + + pr.ProcessRegistry._terminate_host_pid(12345) + + assert psutil_calls == [], ( + f"Windows branch must not touch psutil, but saw {psutil_calls!r}" + ) + + +class TestTerminateHostPidPosix: + """POSIX branch walks the tree via psutil and SIGTERMs children first.""" + + def test_posix_walks_tree_and_terminates_children_then_parent(self, monkeypatch): + from tools import process_registry as pr + import psutil + + terminate_order = [] + + class _FakeChild: + def __init__(self, pid): + self.pid = pid + + def terminate(self): + terminate_order.append(self.pid) + + class _FakeParent: + def __init__(self, pid): + self.pid = pid + + def children(self, recursive=False): + assert recursive is True + return [_FakeChild(101), _FakeChild(102), _FakeChild(103)] + + def terminate(self): + terminate_order.append(self.pid) + + monkeypatch.setattr(pr, "_IS_WINDOWS", False) + monkeypatch.setattr(psutil, "Process", _FakeParent) + + pr.ProcessRegistry._terminate_host_pid(12345) + + assert terminate_order == [101, 102, 103, 12345], ( + "Children must be terminated before the parent" + ) + + def test_posix_no_such_process_swallowed(self, monkeypatch): + from tools import process_registry as pr + import psutil + + def boom(pid): + raise psutil.NoSuchProcess(pid) + + monkeypatch.setattr(pr, "_IS_WINDOWS", False) + monkeypatch.setattr(psutil, "Process", boom) + + # Must not raise. + pr.ProcessRegistry._terminate_host_pid(999999999) + + def test_posix_oserror_falls_back_to_os_kill(self, monkeypatch): + from tools import process_registry as pr + import psutil + + def boom(pid): + raise PermissionError("can't read /proc") + + kill_calls = [] + + def fake_kill(pid, sig): + kill_calls.append((pid, sig)) + + monkeypatch.setattr(pr, "_IS_WINDOWS", False) + monkeypatch.setattr(psutil, "Process", boom) + monkeypatch.setattr(pr.os, "kill", fake_kill) + + pr.ProcessRegistry._terminate_host_pid(12345) + + assert kill_calls == [(12345, signal.SIGTERM)] diff --git a/tools/process_registry.py b/tools/process_registry.py index 771ebf0b474..38c35b3c5a0 100644 --- a/tools/process_registry.py +++ b/tools/process_registry.py @@ -434,9 +434,50 @@ class ProcessRegistry: @staticmethod def _terminate_host_pid(pid: int) -> None: - """Terminate a host-visible PID without requiring the original process handle.""" + """Terminate a host-visible PID and its descendants. + + POSIX: walks the process tree with ``psutil`` and SIGTERMs + children before the parent so subprocess trees (e.g. Chromium + renderers/GPU helpers spawned by an ``agent-browser`` daemon) + don't get reparented to init and survive cleanup. + + Windows: shells out to ``taskkill /PID /T /F``. This is + the documented Microsoft primitive for tree-kill and matches the + existing convention in ``gateway.status.terminate_pid``. We can't + reuse the POSIX psutil path on Windows because: + + 1. Windows doesn't maintain a Unix-style process tree — + ``psutil.Process.children(recursive=True)`` walks PPID + links that go stale when intermediate processes exit, so + enumeration is best-effort and misses orphaned descendants. + 2. ``psutil.Process.terminate()`` on Windows is + ``TerminateProcess()`` which kills only the target handle + and is a hard kill — there is no Windows equivalent of a + SIGTERM that cascades through a process group. (See the + warning in ``gateway/status.py::terminate_pid``: "os.kill + with SIGTERM is not equivalent to a tree-killing hard stop" + on Windows.) Headless Chromium has no GUI window, so the + softer ``taskkill /T`` without ``/F`` won't reach it either. + + ``psutil`` is a hard dependency (see ``pyproject.toml``); the + bare-``os.kill`` fallback covers OSError / PermissionError on + POSIX and a missing ``taskkill.exe`` on Windows (effectively + unreachable on real Windows installs, but cheap insurance). + """ if _IS_WINDOWS: - os.kill(pid, signal.SIGTERM) + try: + subprocess.run( + ["taskkill", "/PID", str(pid), "/T", "/F"], + capture_output=True, + text=True, + timeout=10, + creationflags=windows_hide_flags(), + ) + except (FileNotFoundError, subprocess.TimeoutExpired, OSError): + try: + os.kill(pid, signal.SIGTERM) + except (OSError, ProcessLookupError, PermissionError): + pass return import psutil