mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(browser): verify daemon identity before orphan reaper kills a PID (#14073)
The browser orphan reaper reads a daemon PID from a `.pid` file in a world-writable, predictably-named temp dir (`/tmp/agent-browser-h_*`) it does not write itself, then tree-kills that PID via `_terminate_host_pid` after only a liveness check. A same-user actor could plant a fake socket dir whose `.pid` points at an arbitrary victim process, and OS PID reuse after the real daemon exits could land the recorded PID on an unrelated process — either way an arbitrary same-user process (and its whole tree) gets SIGTERMed. Local DoS. Add `_verify_reapable_browser_daemon()`, gated before the kill: via psutil (a hard dep, fine cross-platform for the same-user processes the reaper can signal) require both (1) identity — `agent-browser` in the process name/cmdline — and (2) binding — the live process references *this* session's socket dir in its cmdline or `AGENT_BROWSER_SOCKET_DIR`. The binding check is the real spoof defense: a planted/recycled PID won't embed our exact socket path. Fail-closed on any ambiguity (unreadable cmdline, no match), leaving the process and its socket dir untouched for a later sweep. Builds on @sgaofen's fix in #14394 (cmdline identity check); rewritten to use psutil instead of `/proc`+`ps` (cross-platform, Windows-covered) and to add the session-socket-dir binding check for recycled-PID / spoof resistance. Co-authored-by: sgaofen <135070653+sgaofen@users.noreply.github.com>
This commit is contained in:
parent
4d4ba0831e
commit
624580e836
2 changed files with 229 additions and 0 deletions
|
|
@ -85,7 +85,10 @@ class TestReapOrphanedBrowserSessions:
|
|||
# Post-#21561 the liveness probe goes through
|
||||
# ``gateway.status._pid_exists`` (which wraps ``psutil.pid_exists``
|
||||
# so it's safe on Windows — ``os.kill(pid, 0)`` is bpo-14484).
|
||||
# The identity guard (#14073) is mocked True here — its own behavior
|
||||
# is covered by TestReaperIdentityGuard below.
|
||||
with patch("gateway.status._pid_exists", return_value=True), \
|
||||
patch("tools.browser_tool._verify_reapable_browser_daemon", return_value=True), \
|
||||
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
|
|
@ -136,6 +139,7 @@ class TestReapOrphanedBrowserSessions:
|
|||
terminate_calls.append(pid)
|
||||
|
||||
with patch("gateway.status._pid_exists", return_value=True), \
|
||||
patch("tools.browser_tool._verify_reapable_browser_daemon", return_value=True), \
|
||||
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
|
|
@ -229,6 +233,7 @@ class TestOwnerPidCrossProcess:
|
|||
pid_alive = {999999999: False, 12345: True}
|
||||
with patch("gateway.status._pid_exists",
|
||||
side_effect=lambda pid: pid_alive.get(int(pid), False)), \
|
||||
patch("tools.browser_tool._verify_reapable_browser_daemon", return_value=True), \
|
||||
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
|
|
@ -380,6 +385,133 @@ class TestOwnerPidCrossProcess:
|
|||
assert session_name in socket_dir_arg
|
||||
|
||||
|
||||
class TestReaperIdentityGuard:
|
||||
"""Tests for _verify_reapable_browser_daemon — the #14073 fix.
|
||||
|
||||
The reaper reads daemon PIDs from world-writable, predictably-named temp
|
||||
dirs. Before tree-killing a live PID it must confirm the process really is
|
||||
*this* session's agent-browser daemon, defeating planted pid files and
|
||||
recycled PIDs that would otherwise become an arbitrary same-user DoS.
|
||||
"""
|
||||
|
||||
class _FakeProc:
|
||||
def __init__(self, name="agent-browser", cmdline=None, environ=None,
|
||||
raise_environ=False):
|
||||
self._name = name
|
||||
self._cmdline = cmdline if cmdline is not None else []
|
||||
self._environ = environ or {}
|
||||
self._raise_environ = raise_environ
|
||||
|
||||
def name(self):
|
||||
return self._name
|
||||
|
||||
def cmdline(self):
|
||||
return self._cmdline
|
||||
|
||||
def environ(self):
|
||||
if self._raise_environ:
|
||||
import psutil
|
||||
raise psutil.AccessDenied()
|
||||
return self._environ
|
||||
|
||||
def _run(self, fake_proc, socket_dir, session_name="h_sess123456",
|
||||
daemon_pid=12345, no_such=False, access_denied=False):
|
||||
import psutil
|
||||
from tools.browser_tool import _verify_reapable_browser_daemon
|
||||
|
||||
def _factory(pid):
|
||||
if no_such:
|
||||
raise psutil.NoSuchProcess(pid)
|
||||
if access_denied:
|
||||
raise psutil.AccessDenied(pid)
|
||||
return fake_proc
|
||||
|
||||
with patch("psutil.Process", side_effect=_factory):
|
||||
return _verify_reapable_browser_daemon(
|
||||
daemon_pid, socket_dir, session_name)
|
||||
|
||||
def test_real_daemon_bound_via_cmdline_is_reapable(self):
|
||||
socket_dir = "/tmp/agent-browser-h_sess123456"
|
||||
proc = self._FakeProc(
|
||||
name="agent-browser",
|
||||
cmdline=["agent-browser", "open", "--session", "h_sess123456",
|
||||
"--socket-dir", socket_dir],
|
||||
)
|
||||
assert self._run(proc, socket_dir) is True
|
||||
|
||||
def test_daemon_bound_via_environ_is_reapable(self):
|
||||
socket_dir = "/tmp/agent-browser-h_sess123456"
|
||||
proc = self._FakeProc(
|
||||
name="agent-browser-linux-x64",
|
||||
cmdline=["agent-browser-linux-x64", "daemon"], # no dir in cmd
|
||||
environ={"AGENT_BROWSER_SOCKET_DIR": socket_dir},
|
||||
)
|
||||
assert self._run(proc, socket_dir) is True
|
||||
|
||||
def test_planted_pid_for_non_browser_process_is_refused(self):
|
||||
"""A planted .pid pointing at e.g. `sleep 600` must NOT be reaped."""
|
||||
socket_dir = "/tmp/agent-browser-h_sess123456"
|
||||
proc = self._FakeProc(name="sleep", cmdline=["/bin/sleep", "600"])
|
||||
assert self._run(proc, socket_dir) is False
|
||||
|
||||
def test_recycled_pid_browser_not_bound_to_our_dir_is_refused(self):
|
||||
"""An agent-browser process for a DIFFERENT session must not be reaped.
|
||||
|
||||
Models PID reuse / a concurrent unrelated daemon: it looks like
|
||||
agent-browser but is bound to another socket dir.
|
||||
"""
|
||||
socket_dir = "/tmp/agent-browser-h_sess123456"
|
||||
proc = self._FakeProc(
|
||||
name="agent-browser",
|
||||
cmdline=["agent-browser", "open", "--session", "h_OTHER999",
|
||||
"--socket-dir", "/tmp/agent-browser-h_OTHER999"],
|
||||
environ={"AGENT_BROWSER_SOCKET_DIR":
|
||||
"/tmp/agent-browser-h_OTHER999"},
|
||||
)
|
||||
assert self._run(proc, socket_dir) is False
|
||||
|
||||
def test_browser_name_but_environ_denied_and_no_cmdline_bind_refused(self):
|
||||
"""Looks like browser, cmdline doesn't bind, environ() denied -> refuse."""
|
||||
socket_dir = "/tmp/agent-browser-h_sess123456"
|
||||
proc = self._FakeProc(
|
||||
name="agent-browser",
|
||||
cmdline=["agent-browser", "daemon"], # no dir
|
||||
raise_environ=True,
|
||||
)
|
||||
assert self._run(proc, socket_dir) is False
|
||||
|
||||
def test_vanished_process_is_not_reapable(self):
|
||||
socket_dir = "/tmp/agent-browser-h_sess123456"
|
||||
assert self._run(None, socket_dir, no_such=True) is False
|
||||
|
||||
def test_access_denied_on_identity_read_refuses(self):
|
||||
socket_dir = "/tmp/agent-browser-h_sess123456"
|
||||
assert self._run(None, socket_dir, access_denied=True) is False
|
||||
|
||||
def test_planted_pid_survives_full_reaper_path(self, fake_tmpdir):
|
||||
"""End-to-end through the reaper: a planted non-browser PID is spared.
|
||||
|
||||
No owner_pid (legacy path), not tracked, PID 'alive' — but the live
|
||||
process is `sleep`, not agent-browser, so it must be left alone and the
|
||||
socket dir retained.
|
||||
"""
|
||||
from tools.browser_tool import _reap_orphaned_browser_sessions
|
||||
|
||||
d = _make_socket_dir(fake_tmpdir, "h_planted9999", pid=12345)
|
||||
|
||||
terminate_calls = []
|
||||
proc = self._FakeProc(name="sleep", cmdline=["/bin/sleep", "600"])
|
||||
|
||||
with patch("gateway.status._pid_exists", return_value=True), \
|
||||
patch("psutil.Process", return_value=proc), \
|
||||
patch("tools.process_registry.ProcessRegistry._terminate_host_pid",
|
||||
side_effect=lambda pid: terminate_calls.append(pid)):
|
||||
_reap_orphaned_browser_sessions()
|
||||
|
||||
assert terminate_calls == [], "planted non-browser PID must not be killed"
|
||||
assert d.exists(), "socket dir retained for a later sweep"
|
||||
|
||||
|
||||
class TestEmergencyCleanupRunsReaper:
|
||||
"""Verify atexit-registered cleanup sweeps orphans even without an active session."""
|
||||
|
||||
|
|
|
|||
|
|
@ -1320,6 +1320,92 @@ def _write_owner_pid(socket_dir: str, session_name: str) -> None:
|
|||
session_name, exc)
|
||||
|
||||
|
||||
def _verify_reapable_browser_daemon(daemon_pid: int, socket_dir: str,
|
||||
session_name: str) -> bool:
|
||||
"""Confirm a live PID is genuinely *this* session's agent-browser daemon.
|
||||
|
||||
The orphan reaper scans world-writable, predictably-named temp paths
|
||||
(``/tmp/agent-browser-h_*`` etc.) and reads a daemon PID from a ``.pid``
|
||||
file we do not write ourselves — the agent-browser daemon writes it. A
|
||||
same-user actor can therefore plant a fake socket dir whose ``.pid`` points
|
||||
at an arbitrary victim process, or a recycled PID can land on an unrelated
|
||||
process after the real daemon exits. Either way, terminating that PID
|
||||
(a *tree* kill via ``_terminate_host_pid``) is an arbitrary-process DoS.
|
||||
|
||||
Before reaping we require, via ``psutil`` (a hard dependency, cross-platform
|
||||
for same-user processes — the only processes the reaper can signal):
|
||||
|
||||
1. **Identity** — the process looks like agent-browser: ``agent-browser``
|
||||
appears in its name or command line.
|
||||
2. **Binding** — the process is bound to *this* session's socket dir: the
|
||||
socket dir path (or its basename) appears in the command line, or in
|
||||
``AGENT_BROWSER_SOCKET_DIR`` in the process environment.
|
||||
|
||||
Requirement (2) is the real spoof defense: a planted process pointing at a
|
||||
victim PID will not have the victim's cmdline/environ referencing our
|
||||
socket dir. An attacker would need a process that genuinely embeds this
|
||||
exact session path — i.e. a real daemon they already own and could signal
|
||||
directly. Fail-closed: any ambiguity (unreadable cmdline, no match) means
|
||||
we refuse to reap and leave the process and its socket dir alone.
|
||||
|
||||
Returns ``True`` only when both checks pass.
|
||||
"""
|
||||
try:
|
||||
import psutil
|
||||
except ImportError: # psutil is a hard dep; defensive only
|
||||
logger.warning(
|
||||
"Refusing to reap browser daemon PID %d (session %s): "
|
||||
"psutil unavailable for identity verification",
|
||||
daemon_pid, session_name)
|
||||
return False
|
||||
|
||||
try:
|
||||
proc = psutil.Process(daemon_pid)
|
||||
name = (proc.name() or "").lower()
|
||||
cmdline = " ".join(proc.cmdline() or []).lower()
|
||||
except psutil.NoSuchProcess:
|
||||
# Vanished between the liveness check and now — nothing to reap.
|
||||
return False
|
||||
except (psutil.AccessDenied, OSError) as exc:
|
||||
logger.warning(
|
||||
"Refusing to reap browser daemon PID %d (session %s): "
|
||||
"could not read process identity (%s)",
|
||||
daemon_pid, session_name, exc)
|
||||
return False
|
||||
|
||||
looks_like_browser = "agent-browser" in name or "agent-browser" in cmdline
|
||||
if not looks_like_browser:
|
||||
logger.warning(
|
||||
"Refusing to reap PID %d (session %s): not an agent-browser "
|
||||
"process (name=%r)", daemon_pid, session_name, name)
|
||||
return False
|
||||
|
||||
# Binding check: the live process must reference *this* socket dir.
|
||||
socket_dir_l = socket_dir.lower()
|
||||
socket_base_l = os.path.basename(socket_dir).lower()
|
||||
bound = socket_dir_l in cmdline or (
|
||||
socket_base_l and socket_base_l in cmdline)
|
||||
if not bound:
|
||||
try:
|
||||
env_dir = (proc.environ() or {}).get(
|
||||
"AGENT_BROWSER_SOCKET_DIR", "")
|
||||
bound = bool(env_dir) and os.path.normpath(env_dir) == \
|
||||
os.path.normpath(socket_dir)
|
||||
except (psutil.AccessDenied, psutil.NoSuchProcess, OSError):
|
||||
# environ() can be denied even same-user on some platforms.
|
||||
# cmdline already failed to bind — fail closed.
|
||||
bound = False
|
||||
|
||||
if not bound:
|
||||
logger.warning(
|
||||
"Refusing to reap agent-browser PID %d: not bound to session "
|
||||
"socket dir %s (possible recycled PID or planted pid file)",
|
||||
daemon_pid, socket_dir)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _reap_orphaned_browser_sessions():
|
||||
"""Scan for orphaned agent-browser daemon processes from previous runs.
|
||||
|
||||
|
|
@ -1415,6 +1501,17 @@ def _reap_orphaned_browser_sessions():
|
|||
shutil.rmtree(socket_dir, ignore_errors=True)
|
||||
continue
|
||||
|
||||
# The PID is live — but the .pid file lives in a world-writable,
|
||||
# predictably-named temp dir we don't write ourselves, and PIDs get
|
||||
# recycled after the real daemon exits. Verify the process really is
|
||||
# *this* session's agent-browser daemon before tree-killing it; refuse
|
||||
# otherwise (don't touch the process, leave the socket dir for a later
|
||||
# sweep once the imposter PID is gone). Fixes the arbitrary same-user
|
||||
# process DoS in issue #14073.
|
||||
if not _verify_reapable_browser_daemon(
|
||||
daemon_pid, socket_dir, session_name):
|
||||
continue
|
||||
|
||||
# Daemon is alive and its owner is dead (or legacy + untracked). Reap.
|
||||
# Use the process-tree termination helper so Chromium children
|
||||
# (renderer, GPU, etc.) are cleaned up, not just the daemon parent.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue