From 624580e8363f5dcd5903a01d483d6f006f5be9d9 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sun, 21 Jun 2026 13:39:41 -0700 Subject: [PATCH] fix(browser): verify daemon identity before orphan reaper kills a PID (#14073) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The browser orphan reaper reads a daemon PID from a `.pid` file in a world-writable, predictably-named temp dir (`/tmp/agent-browser-h_*`) it does not write itself, then tree-kills that PID via `_terminate_host_pid` after only a liveness check. A same-user actor could plant a fake socket dir whose `.pid` points at an arbitrary victim process, and OS PID reuse after the real daemon exits could land the recorded PID on an unrelated process — either way an arbitrary same-user process (and its whole tree) gets SIGTERMed. Local DoS. Add `_verify_reapable_browser_daemon()`, gated before the kill: via psutil (a hard dep, fine cross-platform for the same-user processes the reaper can signal) require both (1) identity — `agent-browser` in the process name/cmdline — and (2) binding — the live process references *this* session's socket dir in its cmdline or `AGENT_BROWSER_SOCKET_DIR`. The binding check is the real spoof defense: a planted/recycled PID won't embed our exact socket path. Fail-closed on any ambiguity (unreadable cmdline, no match), leaving the process and its socket dir untouched for a later sweep. Builds on @sgaofen's fix in #14394 (cmdline identity check); rewritten to use psutil instead of `/proc`+`ps` (cross-platform, Windows-covered) and to add the session-socket-dir binding check for recycled-PID / spoof resistance. Co-authored-by: sgaofen <135070653+sgaofen@users.noreply.github.com> --- tests/tools/test_browser_orphan_reaper.py | 132 ++++++++++++++++++++++ tools/browser_tool.py | 97 ++++++++++++++++ 2 files changed, 229 insertions(+) diff --git a/tests/tools/test_browser_orphan_reaper.py b/tests/tools/test_browser_orphan_reaper.py index 3f2be1ace00..beed82e8362 100644 --- a/tests/tools/test_browser_orphan_reaper.py +++ b/tests/tools/test_browser_orphan_reaper.py @@ -85,7 +85,10 @@ class TestReapOrphanedBrowserSessions: # Post-#21561 the liveness probe goes through # ``gateway.status._pid_exists`` (which wraps ``psutil.pid_exists`` # so it's safe on Windows — ``os.kill(pid, 0)`` is bpo-14484). + # The identity guard (#14073) is mocked True here — its own behavior + # is covered by TestReaperIdentityGuard below. with patch("gateway.status._pid_exists", return_value=True), \ + patch("tools.browser_tool._verify_reapable_browser_daemon", return_value=True), \ patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate): _reap_orphaned_browser_sessions() @@ -136,6 +139,7 @@ class TestReapOrphanedBrowserSessions: terminate_calls.append(pid) with patch("gateway.status._pid_exists", return_value=True), \ + patch("tools.browser_tool._verify_reapable_browser_daemon", return_value=True), \ patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate): _reap_orphaned_browser_sessions() @@ -229,6 +233,7 @@ class TestOwnerPidCrossProcess: pid_alive = {999999999: False, 12345: True} with patch("gateway.status._pid_exists", side_effect=lambda pid: pid_alive.get(int(pid), False)), \ + patch("tools.browser_tool._verify_reapable_browser_daemon", return_value=True), \ patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate): _reap_orphaned_browser_sessions() @@ -380,6 +385,133 @@ class TestOwnerPidCrossProcess: assert session_name in socket_dir_arg +class TestReaperIdentityGuard: + """Tests for _verify_reapable_browser_daemon — the #14073 fix. + + The reaper reads daemon PIDs from world-writable, predictably-named temp + dirs. Before tree-killing a live PID it must confirm the process really is + *this* session's agent-browser daemon, defeating planted pid files and + recycled PIDs that would otherwise become an arbitrary same-user DoS. + """ + + class _FakeProc: + def __init__(self, name="agent-browser", cmdline=None, environ=None, + raise_environ=False): + self._name = name + self._cmdline = cmdline if cmdline is not None else [] + self._environ = environ or {} + self._raise_environ = raise_environ + + def name(self): + return self._name + + def cmdline(self): + return self._cmdline + + def environ(self): + if self._raise_environ: + import psutil + raise psutil.AccessDenied() + return self._environ + + def _run(self, fake_proc, socket_dir, session_name="h_sess123456", + daemon_pid=12345, no_such=False, access_denied=False): + import psutil + from tools.browser_tool import _verify_reapable_browser_daemon + + def _factory(pid): + if no_such: + raise psutil.NoSuchProcess(pid) + if access_denied: + raise psutil.AccessDenied(pid) + return fake_proc + + with patch("psutil.Process", side_effect=_factory): + return _verify_reapable_browser_daemon( + daemon_pid, socket_dir, session_name) + + def test_real_daemon_bound_via_cmdline_is_reapable(self): + socket_dir = "/tmp/agent-browser-h_sess123456" + proc = self._FakeProc( + name="agent-browser", + cmdline=["agent-browser", "open", "--session", "h_sess123456", + "--socket-dir", socket_dir], + ) + assert self._run(proc, socket_dir) is True + + def test_daemon_bound_via_environ_is_reapable(self): + socket_dir = "/tmp/agent-browser-h_sess123456" + proc = self._FakeProc( + name="agent-browser-linux-x64", + cmdline=["agent-browser-linux-x64", "daemon"], # no dir in cmd + environ={"AGENT_BROWSER_SOCKET_DIR": socket_dir}, + ) + assert self._run(proc, socket_dir) is True + + def test_planted_pid_for_non_browser_process_is_refused(self): + """A planted .pid pointing at e.g. `sleep 600` must NOT be reaped.""" + socket_dir = "/tmp/agent-browser-h_sess123456" + proc = self._FakeProc(name="sleep", cmdline=["/bin/sleep", "600"]) + assert self._run(proc, socket_dir) is False + + def test_recycled_pid_browser_not_bound_to_our_dir_is_refused(self): + """An agent-browser process for a DIFFERENT session must not be reaped. + + Models PID reuse / a concurrent unrelated daemon: it looks like + agent-browser but is bound to another socket dir. + """ + socket_dir = "/tmp/agent-browser-h_sess123456" + proc = self._FakeProc( + name="agent-browser", + cmdline=["agent-browser", "open", "--session", "h_OTHER999", + "--socket-dir", "/tmp/agent-browser-h_OTHER999"], + environ={"AGENT_BROWSER_SOCKET_DIR": + "/tmp/agent-browser-h_OTHER999"}, + ) + assert self._run(proc, socket_dir) is False + + def test_browser_name_but_environ_denied_and_no_cmdline_bind_refused(self): + """Looks like browser, cmdline doesn't bind, environ() denied -> refuse.""" + socket_dir = "/tmp/agent-browser-h_sess123456" + proc = self._FakeProc( + name="agent-browser", + cmdline=["agent-browser", "daemon"], # no dir + raise_environ=True, + ) + assert self._run(proc, socket_dir) is False + + def test_vanished_process_is_not_reapable(self): + socket_dir = "/tmp/agent-browser-h_sess123456" + assert self._run(None, socket_dir, no_such=True) is False + + def test_access_denied_on_identity_read_refuses(self): + socket_dir = "/tmp/agent-browser-h_sess123456" + assert self._run(None, socket_dir, access_denied=True) is False + + def test_planted_pid_survives_full_reaper_path(self, fake_tmpdir): + """End-to-end through the reaper: a planted non-browser PID is spared. + + No owner_pid (legacy path), not tracked, PID 'alive' — but the live + process is `sleep`, not agent-browser, so it must be left alone and the + socket dir retained. + """ + from tools.browser_tool import _reap_orphaned_browser_sessions + + d = _make_socket_dir(fake_tmpdir, "h_planted9999", pid=12345) + + terminate_calls = [] + proc = self._FakeProc(name="sleep", cmdline=["/bin/sleep", "600"]) + + with patch("gateway.status._pid_exists", return_value=True), \ + patch("psutil.Process", return_value=proc), \ + patch("tools.process_registry.ProcessRegistry._terminate_host_pid", + side_effect=lambda pid: terminate_calls.append(pid)): + _reap_orphaned_browser_sessions() + + assert terminate_calls == [], "planted non-browser PID must not be killed" + assert d.exists(), "socket dir retained for a later sweep" + + class TestEmergencyCleanupRunsReaper: """Verify atexit-registered cleanup sweeps orphans even without an active session.""" diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 90975175786..3332d3a740d 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -1320,6 +1320,92 @@ def _write_owner_pid(socket_dir: str, session_name: str) -> None: session_name, exc) +def _verify_reapable_browser_daemon(daemon_pid: int, socket_dir: str, + session_name: str) -> bool: + """Confirm a live PID is genuinely *this* session's agent-browser daemon. + + The orphan reaper scans world-writable, predictably-named temp paths + (``/tmp/agent-browser-h_*`` etc.) and reads a daemon PID from a ``.pid`` + file we do not write ourselves — the agent-browser daemon writes it. A + same-user actor can therefore plant a fake socket dir whose ``.pid`` points + at an arbitrary victim process, or a recycled PID can land on an unrelated + process after the real daemon exits. Either way, terminating that PID + (a *tree* kill via ``_terminate_host_pid``) is an arbitrary-process DoS. + + Before reaping we require, via ``psutil`` (a hard dependency, cross-platform + for same-user processes — the only processes the reaper can signal): + + 1. **Identity** — the process looks like agent-browser: ``agent-browser`` + appears in its name or command line. + 2. **Binding** — the process is bound to *this* session's socket dir: the + socket dir path (or its basename) appears in the command line, or in + ``AGENT_BROWSER_SOCKET_DIR`` in the process environment. + + Requirement (2) is the real spoof defense: a planted process pointing at a + victim PID will not have the victim's cmdline/environ referencing our + socket dir. An attacker would need a process that genuinely embeds this + exact session path — i.e. a real daemon they already own and could signal + directly. Fail-closed: any ambiguity (unreadable cmdline, no match) means + we refuse to reap and leave the process and its socket dir alone. + + Returns ``True`` only when both checks pass. + """ + try: + import psutil + except ImportError: # psutil is a hard dep; defensive only + logger.warning( + "Refusing to reap browser daemon PID %d (session %s): " + "psutil unavailable for identity verification", + daemon_pid, session_name) + return False + + try: + proc = psutil.Process(daemon_pid) + name = (proc.name() or "").lower() + cmdline = " ".join(proc.cmdline() or []).lower() + except psutil.NoSuchProcess: + # Vanished between the liveness check and now — nothing to reap. + return False + except (psutil.AccessDenied, OSError) as exc: + logger.warning( + "Refusing to reap browser daemon PID %d (session %s): " + "could not read process identity (%s)", + daemon_pid, session_name, exc) + return False + + looks_like_browser = "agent-browser" in name or "agent-browser" in cmdline + if not looks_like_browser: + logger.warning( + "Refusing to reap PID %d (session %s): not an agent-browser " + "process (name=%r)", daemon_pid, session_name, name) + return False + + # Binding check: the live process must reference *this* socket dir. + socket_dir_l = socket_dir.lower() + socket_base_l = os.path.basename(socket_dir).lower() + bound = socket_dir_l in cmdline or ( + socket_base_l and socket_base_l in cmdline) + if not bound: + try: + env_dir = (proc.environ() or {}).get( + "AGENT_BROWSER_SOCKET_DIR", "") + bound = bool(env_dir) and os.path.normpath(env_dir) == \ + os.path.normpath(socket_dir) + except (psutil.AccessDenied, psutil.NoSuchProcess, OSError): + # environ() can be denied even same-user on some platforms. + # cmdline already failed to bind — fail closed. + bound = False + + if not bound: + logger.warning( + "Refusing to reap agent-browser PID %d: not bound to session " + "socket dir %s (possible recycled PID or planted pid file)", + daemon_pid, socket_dir) + return False + + return True + + def _reap_orphaned_browser_sessions(): """Scan for orphaned agent-browser daemon processes from previous runs. @@ -1415,6 +1501,17 @@ def _reap_orphaned_browser_sessions(): shutil.rmtree(socket_dir, ignore_errors=True) continue + # The PID is live — but the .pid file lives in a world-writable, + # predictably-named temp dir we don't write ourselves, and PIDs get + # recycled after the real daemon exits. Verify the process really is + # *this* session's agent-browser daemon before tree-killing it; refuse + # otherwise (don't touch the process, leave the socket dir for a later + # sweep once the imposter PID is gone). Fixes the arbitrary same-user + # process DoS in issue #14073. + if not _verify_reapable_browser_daemon( + daemon_pid, socket_dir, session_name): + continue + # Daemon is alive and its owner is dead (or legacy + untracked). Reap. # Use the process-tree termination helper so Chromium children # (renderer, GPU, etc.) are cleaned up, not just the daemon parent.