mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Adds security.allow_private_urls / HERMES_ALLOW_PRIVATE_URLS toggle so users on OpenWrt routers, TUN-mode proxies (Clash/Mihomo/Sing-box), corporate split-tunnel VPNs, and Tailscale networks — where DNS resolves public domains to 198.18.0.0/15 or 100.64.0.0/10 — can use web_extract, browser, vision URL fetching, and gateway media downloads. Single toggle in tools/url_safety.py; all 23 is_safe_url() call sites inherit automatically. Cached for process lifetime. Cloud metadata endpoints stay ALWAYS blocked regardless of the toggle: 169.254.169.254 (AWS/GCP/Azure/DO/Oracle), 169.254.170.2 (AWS ECS task IAM creds), 169.254.169.253 (Azure IMDS wire server), 100.100.100.200 (Alibaba), fd00:ec2::254 (AWS IPv6), the entire 169.254.0.0/16 link-local range, and the metadata.google.internal / metadata.goog hostnames (checked pre-DNS so they can't be bypassed on networks where those names resolve to local IPs). Supersedes #3779 (narrower HERMES_ALLOW_RFC2544 for the same class of users). Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
225 lines
9.1 KiB
Python
225 lines
9.1 KiB
Python
"""URL safety checks — blocks requests to private/internal network addresses.
|
|
|
|
Prevents SSRF (Server-Side Request Forgery) where a malicious prompt or
|
|
skill could trick the agent into fetching internal resources like cloud
|
|
metadata endpoints (169.254.169.254), localhost services, or private
|
|
network hosts.
|
|
|
|
The check can be globally disabled via ``security.allow_private_urls: true``
|
|
in config.yaml for environments where DNS resolves external domains to
|
|
private/benchmark-range IPs (OpenWrt routers, corporate proxies, VPNs
|
|
that use 198.18.0.0/15 or 100.64.0.0/10). Even when disabled, cloud
|
|
metadata hostnames (metadata.google.internal, 169.254.169.254) are
|
|
**always** blocked — those are never legitimate agent targets.
|
|
|
|
Limitations (documented, not fixable at pre-flight level):
|
|
- DNS rebinding (TOCTOU): an attacker-controlled DNS server with TTL=0
|
|
can return a public IP for the check, then a private IP for the actual
|
|
connection. Fixing this requires connection-level validation (e.g.
|
|
Python's Champion library or an egress proxy like Stripe's Smokescreen).
|
|
- Redirect-based bypass is mitigated by httpx event hooks that re-validate
|
|
each redirect target in vision_tools, gateway platform adapters, and
|
|
media cache helpers. Web tools use third-party SDKs (Firecrawl/Tavily)
|
|
where redirect handling is on their servers.
|
|
"""
|
|
|
|
import ipaddress
|
|
import logging
|
|
import os
|
|
import socket
|
|
from urllib.parse import urlparse
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Hostnames that should always be blocked regardless of IP resolution
|
|
# or any config toggle. These are cloud metadata endpoints that an
|
|
# attacker could use to steal instance credentials.
|
|
_BLOCKED_HOSTNAMES = frozenset({
|
|
"metadata.google.internal",
|
|
"metadata.goog",
|
|
})
|
|
|
|
# IPs and networks that should always be blocked regardless of the
|
|
# allow_private_urls toggle. These are cloud metadata / credential
|
|
# endpoints — the #1 SSRF target — and the link-local range where
|
|
# they all live.
|
|
_ALWAYS_BLOCKED_IPS = frozenset({
|
|
ipaddress.ip_address("169.254.169.254"), # AWS/GCP/Azure/DO/Oracle metadata
|
|
ipaddress.ip_address("169.254.170.2"), # AWS ECS task metadata (task IAM creds)
|
|
ipaddress.ip_address("169.254.169.253"), # Azure IMDS wire server
|
|
ipaddress.ip_address("fd00:ec2::254"), # AWS metadata (IPv6)
|
|
ipaddress.ip_address("100.100.100.200"), # Alibaba Cloud metadata
|
|
})
|
|
_ALWAYS_BLOCKED_NETWORKS = (
|
|
ipaddress.ip_network("169.254.0.0/16"), # Entire link-local range (no legit agent target)
|
|
)
|
|
|
|
# Exact HTTPS hostnames allowed to resolve to private/benchmark-space IPs.
|
|
# This is intentionally narrow: QQ media downloads can legitimately resolve
|
|
# to 198.18.0.0/15 behind local proxy/benchmark infrastructure.
|
|
_TRUSTED_PRIVATE_IP_HOSTS = frozenset({
|
|
"multimedia.nt.qq.com.cn",
|
|
})
|
|
|
|
# 100.64.0.0/10 (CGNAT / Shared Address Space, RFC 6598) is NOT covered by
|
|
# ipaddress.is_private — it returns False for both is_private and is_global.
|
|
# Must be blocked explicitly. Used by carrier-grade NAT, Tailscale/WireGuard
|
|
# VPNs, and some cloud internal networks.
|
|
_CGNAT_NETWORK = ipaddress.ip_network("100.64.0.0/10")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Global toggle: allow private/internal IP resolution
|
|
# ---------------------------------------------------------------------------
|
|
# Cached after first read so we don't hit the filesystem on every URL check.
|
|
_allow_private_resolved = False
|
|
_cached_allow_private: bool = False
|
|
|
|
|
|
def _global_allow_private_urls() -> bool:
|
|
"""Return True when the user has opted out of private-IP blocking.
|
|
|
|
Checks (in priority order):
|
|
1. ``HERMES_ALLOW_PRIVATE_URLS`` env var (``true``/``1``/``yes``)
|
|
2. ``security.allow_private_urls`` in config.yaml
|
|
3. ``browser.allow_private_urls`` in config.yaml (legacy / backward compat)
|
|
|
|
Result is cached for the process lifetime.
|
|
"""
|
|
global _allow_private_resolved, _cached_allow_private
|
|
if _allow_private_resolved:
|
|
return _cached_allow_private
|
|
|
|
_allow_private_resolved = True
|
|
_cached_allow_private = False # safe default
|
|
|
|
# 1. Env var override (highest priority)
|
|
env_val = os.getenv("HERMES_ALLOW_PRIVATE_URLS", "").strip().lower()
|
|
if env_val in ("true", "1", "yes"):
|
|
_cached_allow_private = True
|
|
return _cached_allow_private
|
|
if env_val in ("false", "0", "no"):
|
|
# Explicit false — don't fall through to config
|
|
return _cached_allow_private
|
|
|
|
# 2. Config file
|
|
try:
|
|
from hermes_cli.config import read_raw_config
|
|
cfg = read_raw_config()
|
|
# security.allow_private_urls (preferred)
|
|
sec = cfg.get("security", {})
|
|
if isinstance(sec, dict) and sec.get("allow_private_urls"):
|
|
_cached_allow_private = True
|
|
return _cached_allow_private
|
|
# browser.allow_private_urls (legacy fallback)
|
|
browser = cfg.get("browser", {})
|
|
if isinstance(browser, dict) and browser.get("allow_private_urls"):
|
|
_cached_allow_private = True
|
|
return _cached_allow_private
|
|
except Exception:
|
|
# Config unavailable (e.g. tests, early import) — keep default
|
|
pass
|
|
|
|
return _cached_allow_private
|
|
|
|
|
|
def _reset_allow_private_cache() -> None:
|
|
"""Reset the cached toggle — only for tests."""
|
|
global _allow_private_resolved, _cached_allow_private
|
|
_allow_private_resolved = False
|
|
_cached_allow_private = False
|
|
|
|
|
|
def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
|
|
"""Return True if the IP should be blocked for SSRF protection."""
|
|
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
|
return True
|
|
if ip.is_multicast or ip.is_unspecified:
|
|
return True
|
|
# CGNAT range not covered by is_private
|
|
if ip in _CGNAT_NETWORK:
|
|
return True
|
|
return False
|
|
|
|
|
|
def _allows_private_ip_resolution(hostname: str, scheme: str) -> bool:
|
|
"""Return True when a trusted HTTPS hostname may bypass IP-class blocking."""
|
|
return scheme == "https" and hostname in _TRUSTED_PRIVATE_IP_HOSTS
|
|
|
|
|
|
def is_safe_url(url: str) -> bool:
|
|
"""Return True if the URL target is not a private/internal address.
|
|
|
|
Resolves the hostname to an IP and checks against private ranges.
|
|
Fails closed: DNS errors and unexpected exceptions block the request.
|
|
|
|
When ``security.allow_private_urls`` is enabled (or the env var
|
|
``HERMES_ALLOW_PRIVATE_URLS=true``), private-IP blocking is skipped.
|
|
Cloud metadata endpoints (169.254.169.254, metadata.google.internal)
|
|
remain blocked regardless — they are never legitimate agent targets.
|
|
"""
|
|
try:
|
|
parsed = urlparse(url)
|
|
hostname = (parsed.hostname or "").strip().lower().rstrip(".")
|
|
scheme = (parsed.scheme or "").strip().lower()
|
|
if not hostname:
|
|
return False
|
|
|
|
# Block known internal hostnames — ALWAYS, even with toggle on
|
|
if hostname in _BLOCKED_HOSTNAMES:
|
|
logger.warning("Blocked request to internal hostname: %s", hostname)
|
|
return False
|
|
|
|
# Check the global toggle AFTER blocking metadata hostnames
|
|
allow_all_private = _global_allow_private_urls()
|
|
|
|
allow_private_ip = _allows_private_ip_resolution(hostname, scheme)
|
|
|
|
# Try to resolve and check IP
|
|
try:
|
|
addr_info = socket.getaddrinfo(hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM)
|
|
except socket.gaierror:
|
|
# DNS resolution failed — fail closed. If DNS can't resolve it,
|
|
# the HTTP client will also fail, so blocking loses nothing.
|
|
logger.warning("Blocked request — DNS resolution failed for: %s", hostname)
|
|
return False
|
|
|
|
for family, _, _, _, sockaddr in addr_info:
|
|
ip_str = sockaddr[0]
|
|
try:
|
|
ip = ipaddress.ip_address(ip_str)
|
|
except ValueError:
|
|
continue
|
|
|
|
# Always block cloud metadata IPs and link-local, even with toggle on
|
|
if ip in _ALWAYS_BLOCKED_IPS or any(ip in net for net in _ALWAYS_BLOCKED_NETWORKS):
|
|
logger.warning(
|
|
"Blocked request to cloud metadata address: %s -> %s",
|
|
hostname, ip_str,
|
|
)
|
|
return False
|
|
|
|
if not allow_all_private and not allow_private_ip and _is_blocked_ip(ip):
|
|
logger.warning(
|
|
"Blocked request to private/internal address: %s -> %s",
|
|
hostname, ip_str,
|
|
)
|
|
return False
|
|
|
|
if allow_all_private:
|
|
logger.debug(
|
|
"Allowing private/internal resolution (security.allow_private_urls=true): %s",
|
|
hostname,
|
|
)
|
|
elif allow_private_ip:
|
|
logger.debug(
|
|
"Allowing trusted hostname despite private/internal resolution: %s",
|
|
hostname,
|
|
)
|
|
|
|
return True
|
|
|
|
except Exception as exc:
|
|
# Fail closed on unexpected errors — don't let parsing edge cases
|
|
# become SSRF bypass vectors
|
|
logger.warning("Blocked request — URL safety check error for %s: %s", url, exc)
|
|
return False
|