fix(browser): use process-tree termination for daemon cleanup

os.kill(pid, SIGTERM) only signals the parent, leaving Chromium child
    processes (renderer, GPU, etc.) orphaned.  Reuse the existing
    ProcessRegistry._terminate_host_pid() helper which walks the process
    tree leaf-up via psutil, terminating children before the parent.
This commit is contained in:
Yuan Li 2026-05-20 19:24:57 +08:00 committed by Teknium
parent 72ff3e909c
commit 22f3f5a75a
2 changed files with 40 additions and 37 deletions

View file

@ -72,7 +72,7 @@ class TestReapOrphanedBrowserSessions:
assert not d.exists()
def test_orphaned_alive_daemon_is_killed(self, fake_tmpdir):
"""Alive daemon not tracked by _active_sessions gets SIGTERM (legacy path).
"""Alive daemon not tracked by _active_sessions is terminated (legacy path).
No owner_pid file => falls back to tracked_names check.
"""
@ -82,18 +82,17 @@ class TestReapOrphanedBrowserSessions:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
# Don't actually kill anything
def mock_terminate(pid):
kill_calls.append(pid)
# Post-#21561 the liveness probe goes through
# ``gateway.status._pid_exists`` (which wraps ``psutil.pid_exists``
# so it's safe on Windows — ``os.kill(pid, 0)`` is bpo-14484).
with patch("gateway.status._pid_exists", return_value=True), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
assert (12345, signal.SIGTERM) in kill_calls
assert 12345 in kill_calls
def test_tracked_session_is_not_reaped(self, fake_tmpdir):
"""Sessions tracked in _active_sessions are left alone (legacy path)."""
@ -108,13 +107,13 @@ class TestReapOrphanedBrowserSessions:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
def mock_terminate(pid):
kill_calls.append(pid)
with patch("os.kill", side_effect=mock_kill):
with patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
# Should NOT have tried to kill anything
# Should NOT have tried to terminate anything
assert len(kill_calls) == 0
# Dir should still exist
assert d.exists()
@ -126,23 +125,24 @@ class TestReapOrphanedBrowserSessions:
``gateway.status._pid_exists`` (which wraps ``psutil.pid_exists``
because ``os.kill(pid, 0)`` is a footgun on Windows bpo-14484).
With no owner_pid file and no tracked-name entry, the reaper
SIGTERMs the daemon and removes its socket dir regardless of
whether SIGTERM succeeded (best-effort semantics).
terminates the daemon (and its process tree) and removes its socket
dir regardless of whether termination succeeded (best-effort
semantics).
"""
from tools.browser_tool import _reap_orphaned_browser_sessions
d = _make_socket_dir(fake_tmpdir, "h_perm1234567", pid=12345)
sigterm_calls = []
terminate_calls = []
def mock_kill(pid, sig):
sigterm_calls.append((pid, sig))
def mock_terminate(pid):
terminate_calls.append(pid)
with patch("gateway.status._pid_exists", return_value=True), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
assert (12345, signal.SIGTERM) in sigterm_calls
assert 12345 in terminate_calls
assert not d.exists()
def test_cdp_sessions_are_also_reaped(self, fake_tmpdir):
@ -203,15 +203,15 @@ class TestOwnerPidCrossProcess:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
def mock_terminate(pid):
kill_calls.append(pid)
# Owner alive → reaper skips without ever probing the daemon.
with patch("gateway.status._pid_exists", return_value=True), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
assert (12345, signal.SIGTERM) not in kill_calls
assert 12345 not in kill_calls
assert d.exists()
def test_dead_owner_triggers_reap(self, fake_tmpdir):
@ -225,17 +225,17 @@ class TestOwnerPidCrossProcess:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
def mock_terminate(pid):
kill_calls.append(pid)
# Owner 999999999 dead, daemon 12345 alive.
pid_alive = {999999999: False, 12345: True}
with patch("gateway.status._pid_exists",
side_effect=lambda pid: pid_alive.get(int(pid), False)), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
assert (12345, signal.SIGTERM) in kill_calls
assert 12345 in kill_calls
assert not d.exists()
def test_corrupt_owner_pid_falls_back_to_legacy(self, fake_tmpdir):
@ -253,15 +253,15 @@ class TestOwnerPidCrossProcess:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
def mock_terminate(pid):
kill_calls.append(pid)
with patch("gateway.status._pid_exists", return_value=True), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
# Legacy path took over → tracked → not reaped
assert (12345, signal.SIGTERM) not in kill_calls
assert 12345 not in kill_calls
assert d.exists()
def test_owner_pid_permission_error_treated_as_alive(self, fake_tmpdir):
@ -280,16 +280,16 @@ class TestOwnerPidCrossProcess:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
def mock_terminate(pid):
kill_calls.append(pid)
# Owner 22222 reported alive (PermissionError collapses to True
# inside _pid_exists). Daemon never probed, never SIGTERMed.
# inside _pid_exists). Daemon never probed, never terminated.
with patch("gateway.status._pid_exists", return_value=True), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
assert (12345, signal.SIGTERM) not in kill_calls
assert 12345 not in kill_calls
assert d.exists()
def test_write_owner_pid_creates_file_with_current_pid(