mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-14 04:02:26 +00:00
Cloud metadata endpoints (169.254.169.254 etc.) are now always blocked
by browser_navigate regardless of hybrid routing, allow_private_urls,
or backend.
Bug: commit 42c076d3 (#16136) added hybrid routing that flips
auto_local_this_nav=True for private URLs and short-circuits
_is_safe_url(). IMDS endpoints are technically private (169.254/16
link-local), so the sidecar happily routed them to a local Chromium,
and the agent could read IAM credentials via browser_snapshot. On
EC2/GCP/Azure this is a full SSRF-to-credential-theft.
Fix: new is_always_blocked_url() in url_safety.py — a narrow floor
that checks _BLOCKED_HOSTNAMES, _ALWAYS_BLOCKED_IPS,
_ALWAYS_BLOCKED_NETWORKS only. Applied as an independent gate in
browser_navigate's pre-nav and post-redirect checks, BEFORE
auto_local_this_nav gets a chance to short-circuit. Ordinary private
URLs (localhost, 192.168.x, 10.x, .local, CGNAT) still route to the
local sidecar as the #16136 feature intends.
Secondary fix (reporter's finding): _url_is_private() now explicitly
checks 172.16.0.0/12. ipaddress.is_private only covers that range on
Python ≥3.11 (bpo-40791), so on 3.10 runtimes those URLs were routed
to cloud instead of the local sidecar. No security impact — just a
correctness fix for the hybrid-routing feature.
Closes #16234.
This commit is contained in:
parent
12289c2630
commit
0214858ef5
4 changed files with 281 additions and 1 deletions
|
|
@ -147,6 +147,102 @@ def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def is_always_blocked_url(url: str) -> bool:
|
||||
"""Return True when the URL targets an always-blocked endpoint.
|
||||
|
||||
This is the security floor — cloud metadata IPs / hostnames
|
||||
(169.254.169.254, metadata.google.internal, ECS task metadata, etc.)
|
||||
that have no legitimate agent use regardless of backend, routing, or
|
||||
the ``allow_private_urls`` toggle. Used by callers that bypass the
|
||||
full ``is_safe_url`` check for their own reasons (e.g. hybrid cloud
|
||||
browser routing to a local Chromium sidecar for private URLs) and
|
||||
still need to enforce the non-negotiable floor before letting the
|
||||
request proceed.
|
||||
|
||||
Returns True (= blocked) on:
|
||||
- Hostnames in ``_BLOCKED_HOSTNAMES``
|
||||
- IPs / networks in ``_ALWAYS_BLOCKED_IPS`` / ``_ALWAYS_BLOCKED_NETWORKS``
|
||||
- URLs whose hostname resolves to any of the above
|
||||
|
||||
Returns False (= not in the always-blocked floor) on:
|
||||
- Benign public / private / loopback URLs (whether or not they'd
|
||||
be blocked by the ordinary SSRF check)
|
||||
- DNS-resolution failures for non-sentinel hostnames (these are
|
||||
someone else's problem — the caller's ordinary fail-closed path
|
||||
will catch them if applicable)
|
||||
- Parse errors (caller decides fail-open vs fail-closed)
|
||||
|
||||
Intentionally narrower than ``is_safe_url``: only blocks the sentinel
|
||||
set, not ordinary private addresses. Callers that want the full
|
||||
SSRF check should still use ``is_safe_url``.
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = (parsed.hostname or "").strip().lower().rstrip(".")
|
||||
if not hostname:
|
||||
return False
|
||||
|
||||
# Blocked-hostname check fires regardless of DNS resolution
|
||||
if hostname in _BLOCKED_HOSTNAMES:
|
||||
logger.warning(
|
||||
"Blocked request to internal hostname (always-blocked floor): %s",
|
||||
hostname,
|
||||
)
|
||||
return True
|
||||
|
||||
# Literal IP → check directly against the always-blocked set
|
||||
try:
|
||||
ip = ipaddress.ip_address(hostname)
|
||||
except ValueError:
|
||||
ip = None
|
||||
|
||||
if ip is not None:
|
||||
if ip in _ALWAYS_BLOCKED_IPS or any(
|
||||
ip in net for net in _ALWAYS_BLOCKED_NETWORKS
|
||||
):
|
||||
logger.warning(
|
||||
"Blocked request to cloud metadata address "
|
||||
"(always-blocked floor): %s",
|
||||
hostname,
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
# Hostname → resolve and check every answer. DNS failure is NOT
|
||||
# always-blocked (caller's ordinary path handles that).
|
||||
try:
|
||||
addr_info = socket.getaddrinfo(
|
||||
hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM
|
||||
)
|
||||
except socket.gaierror:
|
||||
return False
|
||||
|
||||
for _family, _, _, _, sockaddr in addr_info:
|
||||
ip_str = sockaddr[0]
|
||||
try:
|
||||
resolved = ipaddress.ip_address(ip_str)
|
||||
except ValueError:
|
||||
continue
|
||||
if resolved in _ALWAYS_BLOCKED_IPS or any(
|
||||
resolved in net for net in _ALWAYS_BLOCKED_NETWORKS
|
||||
):
|
||||
logger.warning(
|
||||
"Blocked request to cloud metadata address "
|
||||
"(always-blocked floor): %s -> %s",
|
||||
hostname,
|
||||
ip_str,
|
||||
)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
except Exception as exc:
|
||||
# Parse failures or unexpected errors — don't claim the URL is
|
||||
# always-blocked. Caller decides what to do with a malformed URL.
|
||||
logger.debug("is_always_blocked_url error for %s: %s", url, exc)
|
||||
return False
|
||||
|
||||
|
||||
def _allows_private_ip_resolution(hostname: str, scheme: str) -> bool:
|
||||
"""Return True when a trusted HTTPS hostname may bypass IP-class blocking."""
|
||||
return scheme == "https" and hostname in _TRUSTED_PRIVATE_IP_HOSTS
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue