mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(browser): use process-tree termination for daemon cleanup
os.kill(pid, SIGTERM) only signals the parent, leaving Chromium child
processes (renderer, GPU, etc.) orphaned. Reuse the existing
ProcessRegistry._terminate_host_pid() helper which walks the process
tree leaf-up via psutil, terminating children before the parent.
This commit is contained in:
parent
72ff3e909c
commit
22f3f5a75a
2 changed files with 40 additions and 37 deletions
|
|
@ -72,7 +72,7 @@ class TestReapOrphanedBrowserSessions:
|
|||
assert not d.exists()
|
||||
|
||||
def test_orphaned_alive_daemon_is_killed(self, fake_tmpdir):
|
||||
"""Alive daemon not tracked by _active_sessions gets SIGTERM (legacy path).
|
||||
"""Alive daemon not tracked by _active_sessions is terminated (legacy path).
|
||||
|
||||
No owner_pid file => falls back to tracked_names check.
|
||||
"""
|
||||
|
|
@ -82,18 +82,17 @@ class TestReapOrphanedBrowserSessions:
|
|||
|
||||
kill_calls = []
|
||||
|
||||
def mock_kill(pid, sig):
|
||||
kill_calls.append((pid, sig))
|
||||
# Don't actually kill anything
|
||||
def mock_terminate(pid):
|
||||
kill_calls.append(pid)
|
||||
|
||||
# Post-#21561 the liveness probe goes through
|
||||
# ``gateway.status._pid_exists`` (which wraps ``psutil.pid_exists``
|
||||
# so it's safe on Windows — ``os.kill(pid, 0)`` is bpo-14484).
|
||||
with patch("gateway.status._pid_exists", return_value=True), \
|
||||
patch("os.kill", side_effect=mock_kill):
|
||||
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
assert (12345, signal.SIGTERM) in kill_calls
|
||||
assert 12345 in kill_calls
|
||||
|
||||
def test_tracked_session_is_not_reaped(self, fake_tmpdir):
|
||||
"""Sessions tracked in _active_sessions are left alone (legacy path)."""
|
||||
|
|
@ -108,13 +107,13 @@ class TestReapOrphanedBrowserSessions:
|
|||
|
||||
kill_calls = []
|
||||
|
||||
def mock_kill(pid, sig):
|
||||
kill_calls.append((pid, sig))
|
||||
def mock_terminate(pid):
|
||||
kill_calls.append(pid)
|
||||
|
||||
with patch("os.kill", side_effect=mock_kill):
|
||||
with patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
# Should NOT have tried to kill anything
|
||||
# Should NOT have tried to terminate anything
|
||||
assert len(kill_calls) == 0
|
||||
# Dir should still exist
|
||||
assert d.exists()
|
||||
|
|
@ -126,23 +125,24 @@ class TestReapOrphanedBrowserSessions:
|
|||
``gateway.status._pid_exists`` (which wraps ``psutil.pid_exists``
|
||||
because ``os.kill(pid, 0)`` is a footgun on Windows — bpo-14484).
|
||||
With no owner_pid file and no tracked-name entry, the reaper
|
||||
SIGTERMs the daemon and removes its socket dir regardless of
|
||||
whether SIGTERM succeeded (best-effort semantics).
|
||||
terminates the daemon (and its process tree) and removes its socket
|
||||
dir regardless of whether termination succeeded (best-effort
|
||||
semantics).
|
||||
"""
|
||||
from tools.browser_tool import _reap_orphaned_browser_sessions
|
||||
|
||||
d = _make_socket_dir(fake_tmpdir, "h_perm1234567", pid=12345)
|
||||
|
||||
sigterm_calls = []
|
||||
terminate_calls = []
|
||||
|
||||
def mock_kill(pid, sig):
|
||||
sigterm_calls.append((pid, sig))
|
||||
def mock_terminate(pid):
|
||||
terminate_calls.append(pid)
|
||||
|
||||
with patch("gateway.status._pid_exists", return_value=True), \
|
||||
patch("os.kill", side_effect=mock_kill):
|
||||
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
assert (12345, signal.SIGTERM) in sigterm_calls
|
||||
assert 12345 in terminate_calls
|
||||
assert not d.exists()
|
||||
|
||||
def test_cdp_sessions_are_also_reaped(self, fake_tmpdir):
|
||||
|
|
@ -203,15 +203,15 @@ class TestOwnerPidCrossProcess:
|
|||
|
||||
kill_calls = []
|
||||
|
||||
def mock_kill(pid, sig):
|
||||
kill_calls.append((pid, sig))
|
||||
def mock_terminate(pid):
|
||||
kill_calls.append(pid)
|
||||
|
||||
# Owner alive → reaper skips without ever probing the daemon.
|
||||
with patch("gateway.status._pid_exists", return_value=True), \
|
||||
patch("os.kill", side_effect=mock_kill):
|
||||
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
assert (12345, signal.SIGTERM) not in kill_calls
|
||||
assert 12345 not in kill_calls
|
||||
assert d.exists()
|
||||
|
||||
def test_dead_owner_triggers_reap(self, fake_tmpdir):
|
||||
|
|
@ -225,17 +225,17 @@ class TestOwnerPidCrossProcess:
|
|||
|
||||
kill_calls = []
|
||||
|
||||
def mock_kill(pid, sig):
|
||||
kill_calls.append((pid, sig))
|
||||
def mock_terminate(pid):
|
||||
kill_calls.append(pid)
|
||||
|
||||
# Owner 999999999 dead, daemon 12345 alive.
|
||||
pid_alive = {999999999: False, 12345: True}
|
||||
with patch("gateway.status._pid_exists",
|
||||
side_effect=lambda pid: pid_alive.get(int(pid), False)), \
|
||||
patch("os.kill", side_effect=mock_kill):
|
||||
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
assert (12345, signal.SIGTERM) in kill_calls
|
||||
assert 12345 in kill_calls
|
||||
assert not d.exists()
|
||||
|
||||
def test_corrupt_owner_pid_falls_back_to_legacy(self, fake_tmpdir):
|
||||
|
|
@ -253,15 +253,15 @@ class TestOwnerPidCrossProcess:
|
|||
|
||||
kill_calls = []
|
||||
|
||||
def mock_kill(pid, sig):
|
||||
kill_calls.append((pid, sig))
|
||||
def mock_terminate(pid):
|
||||
kill_calls.append(pid)
|
||||
|
||||
with patch("gateway.status._pid_exists", return_value=True), \
|
||||
patch("os.kill", side_effect=mock_kill):
|
||||
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
# Legacy path took over → tracked → not reaped
|
||||
assert (12345, signal.SIGTERM) not in kill_calls
|
||||
assert 12345 not in kill_calls
|
||||
assert d.exists()
|
||||
|
||||
def test_owner_pid_permission_error_treated_as_alive(self, fake_tmpdir):
|
||||
|
|
@ -280,16 +280,16 @@ class TestOwnerPidCrossProcess:
|
|||
|
||||
kill_calls = []
|
||||
|
||||
def mock_kill(pid, sig):
|
||||
kill_calls.append((pid, sig))
|
||||
def mock_terminate(pid):
|
||||
kill_calls.append(pid)
|
||||
|
||||
# Owner 22222 reported alive (PermissionError collapses to True
|
||||
# inside _pid_exists). Daemon never probed, never SIGTERMed.
|
||||
# inside _pid_exists). Daemon never probed, never terminated.
|
||||
with patch("gateway.status._pid_exists", return_value=True), \
|
||||
patch("os.kill", side_effect=mock_kill):
|
||||
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
assert (12345, signal.SIGTERM) not in kill_calls
|
||||
assert 12345 not in kill_calls
|
||||
assert d.exists()
|
||||
|
||||
def test_write_owner_pid_creates_file_with_current_pid(
|
||||
|
|
|
|||
|
|
@ -102,7 +102,6 @@ from plugins.browser.firecrawl.provider import ( # noqa: F401
|
|||
FirecrawlBrowserProvider as FirecrawlProvider,
|
||||
)
|
||||
from tools.tool_backend_helpers import normalize_browser_cloud_provider
|
||||
|
||||
# Camofox local anti-detection browser backend (optional).
|
||||
# When CAMOFOX_URL is set, all browser operations route through the
|
||||
# camofox REST API instead of the agent-browser CLI.
|
||||
|
|
@ -1386,8 +1385,11 @@ def _reap_orphaned_browser_sessions():
|
|||
continue
|
||||
|
||||
# Daemon is alive and its owner is dead (or legacy + untracked). Reap.
|
||||
# Use the process-tree termination helper so Chromium children
|
||||
# (renderer, GPU, etc.) are cleaned up, not just the daemon parent.
|
||||
try:
|
||||
os.kill(daemon_pid, signal.SIGTERM)
|
||||
from tools.process_registry import ProcessRegistry
|
||||
ProcessRegistry._terminate_host_pid(daemon_pid)
|
||||
logger.info("Reaped orphaned browser daemon PID %d (session %s)",
|
||||
daemon_pid, session_name)
|
||||
reaped += 1
|
||||
|
|
@ -3437,8 +3439,9 @@ def _cleanup_single_browser_session(task_id: str) -> None:
|
|||
pid_file = os.path.join(socket_dir, f"{session_name}.pid")
|
||||
if os.path.isfile(pid_file):
|
||||
try:
|
||||
from tools.process_registry import ProcessRegistry
|
||||
daemon_pid = int(Path(pid_file).read_text(encoding="utf-8").strip())
|
||||
os.kill(daemon_pid, signal.SIGTERM)
|
||||
ProcessRegistry._terminate_host_pid(daemon_pid)
|
||||
logger.debug("Killed daemon pid %s for %s", daemon_pid, session_name)
|
||||
except (ProcessLookupError, ValueError, PermissionError, OSError):
|
||||
logger.debug("Could not kill daemon pid for %s (already dead or inaccessible)", session_name)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue