fix(process_registry): use taskkill /T /F for tree-kill on Windows

The Windows branch of `_terminate_host_pid` early-returned after
`os.kill(pid, SIGTERM)` (which Python maps to `TerminateProcess` for
the target handle only), leaving descendant processes — e.g. Chromium
renderer/GPU/network helpers spawned by an `agent-browser` daemon —
running on Windows even after the preceding commit fixed POSIX.

The right Windows primitive is `taskkill /PID <pid> /T /F`:
`/T` walks the tree, `/F` force-terminates. Same approach
`gateway.status.terminate_pid(force=True)` already uses for the
gateway's own shutdown path; reuse the same shape here.

Why NOT extend the POSIX psutil tree-walk to Windows:

  1. Windows doesn't maintain a Unix-style process tree. `psutil.
     Process.children(recursive=True)` walks PPID links that go stale
     when intermediate processes exit, so enumeration is best-effort
     and silently misses orphaned descendants. The whole bug we're
     fixing is orphaned descendants.

  2. `psutil.Process.terminate()` on Windows is `TerminateProcess()`
     for one handle — same single-PID scope as the existing
     `os.kill`. The existing comment in `gateway/status.py::
     terminate_pid` warns this explicitly: 'os.kill SIGTERM is not
     equivalent to a tree-killing hard stop' on Windows.

  3. Headless Chromium has no GUI window, so the softer
     `taskkill /T` without `/F` (which sends WM_CLOSE) won't reach
     it either. `/F` is required.

POSIX path is unchanged. The taskkill subprocess uses the same
`creationflags=windows_hide_flags()` pattern other Windows shellouts
in this codebase use. `FileNotFoundError` / `TimeoutExpired` /
`OSError` fall back to bare `os.kill(SIGTERM)` as cheap insurance.

Tests cover the Windows branch via the codebase's standard
`monkeypatch _IS_WINDOWS` pattern (`references/windows-native-
support.md`), plus POSIX tree-walk order, NoSuchProcess swallow,
and the OSError fallback path. 7 new tests, all green on Linux CI.
This commit is contained in:
teknium1 2026-05-23 17:55:29 -07:00 committed by Teknium
parent 22f3f5a75a
commit 7ce6b504a2
2 changed files with 203 additions and 2 deletions

View file

@ -1007,3 +1007,163 @@ def test_drain_notifications_empty_queue():
results = process_registry.drain_notifications()
assert results == []
# ---------------------------------------------------------------------------
# _terminate_host_pid — cross-platform process-tree termination
# ---------------------------------------------------------------------------
class TestTerminateHostPidWindows:
"""Windows branch uses ``taskkill /T /F`` — the documented MS tree-kill
primitive. We can't use psutil's ``children(recursive=True)`` /
``.terminate()`` path on Windows because (1) Windows doesn't maintain
a Unix-style process tree so the walk is unreliable, and (2)
``Process.terminate()`` on Windows is ``TerminateProcess()`` for the
target handle only, not the tree.
"""
def test_windows_invokes_taskkill_with_tree_and_force_flags(self, monkeypatch):
"""The Windows branch must shell out to ``taskkill /PID N /T /F``."""
from tools import process_registry as pr
captured = {}
def fake_run(args, **kwargs):
captured["args"] = args
captured["kwargs"] = kwargs
return MagicMock(returncode=0, stderr="", stdout="")
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
monkeypatch.setattr(pr.subprocess, "run", fake_run)
pr.ProcessRegistry._terminate_host_pid(12345)
assert captured["args"][0] == "taskkill"
assert "/PID" in captured["args"]
assert "12345" in captured["args"]
assert "/T" in captured["args"], "Tree flag required to reach descendants"
assert "/F" in captured["args"], "Force flag required for headless Chromium"
def test_windows_falls_back_to_os_kill_when_taskkill_missing(self, monkeypatch):
"""If ``taskkill.exe`` is somehow unavailable, fall back to a bare
``os.kill(pid, SIGTERM)`` so we at least try to kill the parent."""
from tools import process_registry as pr
kill_calls = []
def fake_run(*args, **kwargs):
raise FileNotFoundError("taskkill not found")
def fake_kill(pid, sig):
kill_calls.append((pid, sig))
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
monkeypatch.setattr(pr.subprocess, "run", fake_run)
monkeypatch.setattr(pr.os, "kill", fake_kill)
pr.ProcessRegistry._terminate_host_pid(12345)
assert kill_calls == [(12345, signal.SIGTERM)]
def test_windows_does_not_call_psutil(self, monkeypatch):
"""The Windows branch must NOT exercise the psutil tree-walk
(it's unreliable on Windows — see the function docstring)."""
from tools import process_registry as pr
import psutil
psutil_calls = []
class _BoomProcess:
def __init__(self, pid):
psutil_calls.append(("Process", pid))
def children(self, recursive=False):
psutil_calls.append(("children", recursive))
return []
def terminate(self):
psutil_calls.append(("terminate",))
def fake_run(args, **kwargs):
return MagicMock(returncode=0, stderr="", stdout="")
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
monkeypatch.setattr(pr.subprocess, "run", fake_run)
monkeypatch.setattr(psutil, "Process", _BoomProcess)
pr.ProcessRegistry._terminate_host_pid(12345)
assert psutil_calls == [], (
f"Windows branch must not touch psutil, but saw {psutil_calls!r}"
)
class TestTerminateHostPidPosix:
"""POSIX branch walks the tree via psutil and SIGTERMs children first."""
def test_posix_walks_tree_and_terminates_children_then_parent(self, monkeypatch):
from tools import process_registry as pr
import psutil
terminate_order = []
class _FakeChild:
def __init__(self, pid):
self.pid = pid
def terminate(self):
terminate_order.append(self.pid)
class _FakeParent:
def __init__(self, pid):
self.pid = pid
def children(self, recursive=False):
assert recursive is True
return [_FakeChild(101), _FakeChild(102), _FakeChild(103)]
def terminate(self):
terminate_order.append(self.pid)
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
monkeypatch.setattr(psutil, "Process", _FakeParent)
pr.ProcessRegistry._terminate_host_pid(12345)
assert terminate_order == [101, 102, 103, 12345], (
"Children must be terminated before the parent"
)
def test_posix_no_such_process_swallowed(self, monkeypatch):
from tools import process_registry as pr
import psutil
def boom(pid):
raise psutil.NoSuchProcess(pid)
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
monkeypatch.setattr(psutil, "Process", boom)
# Must not raise.
pr.ProcessRegistry._terminate_host_pid(999999999)
def test_posix_oserror_falls_back_to_os_kill(self, monkeypatch):
from tools import process_registry as pr
import psutil
def boom(pid):
raise PermissionError("can't read /proc")
kill_calls = []
def fake_kill(pid, sig):
kill_calls.append((pid, sig))
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
monkeypatch.setattr(psutil, "Process", boom)
monkeypatch.setattr(pr.os, "kill", fake_kill)
pr.ProcessRegistry._terminate_host_pid(12345)
assert kill_calls == [(12345, signal.SIGTERM)]

View file

@ -434,9 +434,50 @@ class ProcessRegistry:
@staticmethod
def _terminate_host_pid(pid: int) -> None:
"""Terminate a host-visible PID without requiring the original process handle."""
"""Terminate a host-visible PID and its descendants.
POSIX: walks the process tree with ``psutil`` and SIGTERMs
children before the parent so subprocess trees (e.g. Chromium
renderers/GPU helpers spawned by an ``agent-browser`` daemon)
don't get reparented to init and survive cleanup.
Windows: shells out to ``taskkill /PID <pid> /T /F``. This is
the documented Microsoft primitive for tree-kill and matches the
existing convention in ``gateway.status.terminate_pid``. We can't
reuse the POSIX psutil path on Windows because:
1. Windows doesn't maintain a Unix-style process tree —
``psutil.Process.children(recursive=True)`` walks PPID
links that go stale when intermediate processes exit, so
enumeration is best-effort and misses orphaned descendants.
2. ``psutil.Process.terminate()`` on Windows is
``TerminateProcess()`` which kills only the target handle
and is a hard kill there is no Windows equivalent of a
SIGTERM that cascades through a process group. (See the
warning in ``gateway/status.py::terminate_pid``: "os.kill
with SIGTERM is not equivalent to a tree-killing hard stop"
on Windows.) Headless Chromium has no GUI window, so the
softer ``taskkill /T`` without ``/F`` won't reach it either.
``psutil`` is a hard dependency (see ``pyproject.toml``); the
bare-``os.kill`` fallback covers OSError / PermissionError on
POSIX and a missing ``taskkill.exe`` on Windows (effectively
unreachable on real Windows installs, but cheap insurance).
"""
if _IS_WINDOWS:
os.kill(pid, signal.SIGTERM)
try:
subprocess.run(
["taskkill", "/PID", str(pid), "/T", "/F"],
capture_output=True,
text=True,
timeout=10,
creationflags=windows_hide_flags(),
)
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
try:
os.kill(pid, signal.SIGTERM)
except (OSError, ProcessLookupError, PermissionError):
pass
return
import psutil