mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-11 03:31:55 +00:00
Cloud metadata endpoints (169.254.169.254 etc.) are now always blocked
by browser_navigate regardless of hybrid routing, allow_private_urls,
or backend.
Bug: commit 42c076d3 (#16136) added hybrid routing that flips
auto_local_this_nav=True for private URLs and short-circuits
_is_safe_url(). IMDS endpoints are technically private (169.254/16
link-local), so the sidecar happily routed them to a local Chromium,
and the agent could read IAM credentials via browser_snapshot. On
EC2/GCP/Azure this is a full SSRF-to-credential-theft.
Fix: new is_always_blocked_url() in url_safety.py — a narrow floor
that checks _BLOCKED_HOSTNAMES, _ALWAYS_BLOCKED_IPS,
_ALWAYS_BLOCKED_NETWORKS only. Applied as an independent gate in
browser_navigate's pre-nav and post-redirect checks, BEFORE
auto_local_this_nav gets a chance to short-circuit. Ordinary private
URLs (localhost, 192.168.x, 10.x, .local, CGNAT) still route to the
local sidecar as the #16136 feature intends.
Secondary fix (reporter's finding): _url_is_private() now explicitly
checks 172.16.0.0/12. ipaddress.is_private only covers that range on
Python ≥3.11 (bpo-40791), so on 3.10 runtimes those URLs were routed
to cloud instead of the local sidecar. No security impact — just a
correctness fix for the hybrid-routing feature.
Closes #16234.
This commit is contained in:
parent
12289c2630
commit
0214858ef5
4 changed files with 281 additions and 1 deletions
|
|
@ -76,9 +76,13 @@ except Exception:
|
|||
check_website_access = lambda url: None # noqa: E731 — fail-open if policy module unavailable
|
||||
|
||||
try:
|
||||
from tools.url_safety import is_safe_url as _is_safe_url
|
||||
from tools.url_safety import (
|
||||
is_safe_url as _is_safe_url,
|
||||
is_always_blocked_url as _is_always_blocked_url,
|
||||
)
|
||||
except Exception:
|
||||
_is_safe_url = lambda url: False # noqa: E731 — fail-closed: block all if safety module unavailable
|
||||
_is_always_blocked_url = lambda url: True # noqa: E731 — fail-closed on the floor too
|
||||
from tools.browser_providers.base import CloudBrowserProvider
|
||||
from tools.browser_providers.browserbase import BrowserbaseProvider
|
||||
from tools.browser_providers.browser_use import BrowserUseProvider
|
||||
|
|
@ -837,6 +841,10 @@ def _url_is_private(url: str) -> bool:
|
|||
ip.is_private
|
||||
or ip.is_loopback
|
||||
or ip.is_link_local
|
||||
# 172.16.0.0/12: only covered by ip.is_private on Python
|
||||
# ≥3.11 (bpo-40791). Explicit check keeps 3.10 runtimes
|
||||
# routing these to the local sidecar correctly.
|
||||
or ip in ipaddress.ip_network("172.16.0.0/12")
|
||||
or ip in ipaddress.ip_network("100.64.0.0/10")
|
||||
)
|
||||
except ValueError:
|
||||
|
|
@ -2081,6 +2089,18 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
|||
nav_session_key = _navigation_session_key(effective_task_id, url)
|
||||
auto_local_this_nav = _is_local_sidecar_key(nav_session_key)
|
||||
|
||||
# Always-blocked floor: cloud metadata / IMDS endpoints are denied
|
||||
# regardless of backend, hybrid routing, or allow_private_urls.
|
||||
# There's no legitimate agent use case for navigating to
|
||||
# 169.254.169.254 / metadata.google.internal / ECS task metadata
|
||||
# via a browser, and routing those to a local Chromium sidecar
|
||||
# on an EC2/GCP/Azure host exfiltrates IAM credentials (#16234).
|
||||
if not _is_local_backend() and _is_always_blocked_url(url):
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Blocked: URL targets a cloud metadata endpoint",
|
||||
})
|
||||
|
||||
if (
|
||||
not _is_local_backend()
|
||||
and not auto_local_this_nav
|
||||
|
|
@ -2143,6 +2163,21 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
|||
# Skipped for local backends (same rationale as the pre-nav check),
|
||||
# and for the hybrid local sidecar (we're already on a local browser
|
||||
# hitting a private URL by design).
|
||||
# Always-blocked floor (cloud metadata / IMDS) is enforced even
|
||||
# when auto_local_this_nav is true — see pre-nav check for
|
||||
# rationale (#16234).
|
||||
if (
|
||||
not _is_local_backend()
|
||||
and final_url
|
||||
and final_url != url
|
||||
and _is_always_blocked_url(final_url)
|
||||
):
|
||||
_run_browser_command(nav_session_key, "open", ["about:blank"], timeout=10)
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Blocked: redirect landed on a cloud metadata endpoint",
|
||||
})
|
||||
|
||||
if (
|
||||
not _is_local_backend()
|
||||
and not auto_local_this_nav
|
||||
|
|
|
|||
|
|
@ -147,6 +147,102 @@ def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def is_always_blocked_url(url: str) -> bool:
|
||||
"""Return True when the URL targets an always-blocked endpoint.
|
||||
|
||||
This is the security floor — cloud metadata IPs / hostnames
|
||||
(169.254.169.254, metadata.google.internal, ECS task metadata, etc.)
|
||||
that have no legitimate agent use regardless of backend, routing, or
|
||||
the ``allow_private_urls`` toggle. Used by callers that bypass the
|
||||
full ``is_safe_url`` check for their own reasons (e.g. hybrid cloud
|
||||
browser routing to a local Chromium sidecar for private URLs) and
|
||||
still need to enforce the non-negotiable floor before letting the
|
||||
request proceed.
|
||||
|
||||
Returns True (= blocked) on:
|
||||
- Hostnames in ``_BLOCKED_HOSTNAMES``
|
||||
- IPs / networks in ``_ALWAYS_BLOCKED_IPS`` / ``_ALWAYS_BLOCKED_NETWORKS``
|
||||
- URLs whose hostname resolves to any of the above
|
||||
|
||||
Returns False (= not in the always-blocked floor) on:
|
||||
- Benign public / private / loopback URLs (whether or not they'd
|
||||
be blocked by the ordinary SSRF check)
|
||||
- DNS-resolution failures for non-sentinel hostnames (these are
|
||||
someone else's problem — the caller's ordinary fail-closed path
|
||||
will catch them if applicable)
|
||||
- Parse errors (caller decides fail-open vs fail-closed)
|
||||
|
||||
Intentionally narrower than ``is_safe_url``: only blocks the sentinel
|
||||
set, not ordinary private addresses. Callers that want the full
|
||||
SSRF check should still use ``is_safe_url``.
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = (parsed.hostname or "").strip().lower().rstrip(".")
|
||||
if not hostname:
|
||||
return False
|
||||
|
||||
# Blocked-hostname check fires regardless of DNS resolution
|
||||
if hostname in _BLOCKED_HOSTNAMES:
|
||||
logger.warning(
|
||||
"Blocked request to internal hostname (always-blocked floor): %s",
|
||||
hostname,
|
||||
)
|
||||
return True
|
||||
|
||||
# Literal IP → check directly against the always-blocked set
|
||||
try:
|
||||
ip = ipaddress.ip_address(hostname)
|
||||
except ValueError:
|
||||
ip = None
|
||||
|
||||
if ip is not None:
|
||||
if ip in _ALWAYS_BLOCKED_IPS or any(
|
||||
ip in net for net in _ALWAYS_BLOCKED_NETWORKS
|
||||
):
|
||||
logger.warning(
|
||||
"Blocked request to cloud metadata address "
|
||||
"(always-blocked floor): %s",
|
||||
hostname,
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
# Hostname → resolve and check every answer. DNS failure is NOT
|
||||
# always-blocked (caller's ordinary path handles that).
|
||||
try:
|
||||
addr_info = socket.getaddrinfo(
|
||||
hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM
|
||||
)
|
||||
except socket.gaierror:
|
||||
return False
|
||||
|
||||
for _family, _, _, _, sockaddr in addr_info:
|
||||
ip_str = sockaddr[0]
|
||||
try:
|
||||
resolved = ipaddress.ip_address(ip_str)
|
||||
except ValueError:
|
||||
continue
|
||||
if resolved in _ALWAYS_BLOCKED_IPS or any(
|
||||
resolved in net for net in _ALWAYS_BLOCKED_NETWORKS
|
||||
):
|
||||
logger.warning(
|
||||
"Blocked request to cloud metadata address "
|
||||
"(always-blocked floor): %s -> %s",
|
||||
hostname,
|
||||
ip_str,
|
||||
)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
except Exception as exc:
|
||||
# Parse failures or unexpected errors — don't claim the URL is
|
||||
# always-blocked. Caller decides what to do with a malformed URL.
|
||||
logger.debug("is_always_blocked_url error for %s: %s", url, exc)
|
||||
return False
|
||||
|
||||
|
||||
def _allows_private_ip_resolution(hostname: str, scheme: str) -> bool:
|
||||
"""Return True when a trusted HTTPS hostname may bypass IP-class blocking."""
|
||||
return scheme == "https" and hostname in _TRUSTED_PRIVATE_IP_HOSTS
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue