mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(security): add SSRF protection to vision_tools and web_tools (hardened)
* fix(security): add SSRF protection to vision_tools and web_tools Both vision_analyze and web_extract/web_crawl accept arbitrary URLs without checking if they target private/internal network addresses. A prompt-injected or malicious skill could use this to access cloud metadata endpoints (169.254.169.254), localhost services, or private network hosts. Adds a shared url_safety.is_safe_url() that resolves hostnames and blocks private, loopback, link-local, and reserved IP ranges. Also blocks known internal hostnames (metadata.google.internal). Integrated at the URL validation layer in vision_tools and before each website_policy check in web_tools (extract, crawl). * test(vision): update localhost test to reflect SSRF protection The existing test_valid_url_with_port asserted localhost URLs pass validation. With SSRF protection, localhost is now correctly blocked. Update the test to verify the block, and add a separate test for valid URLs with ports using a public hostname. * fix(security): harden SSRF protection — fail-closed, CGNAT, multicast, redirect guard Follow-up hardening on top of dieutx's SSRF protection (PR #2630): - Change fail-open to fail-closed: DNS errors and unexpected exceptions now block the request instead of allowing it (OWASP best practice) - Block CGNAT range (100.64.0.0/10): Python's ipaddress.is_private does NOT cover this range (returns False for both is_private and is_global). Used by Tailscale/WireGuard and carrier infrastructure. - Add is_multicast and is_unspecified checks: multicast (224.0.0.0/4) and unspecified (0.0.0.0) addresses were not caught by the original four-check chain - Add redirect guard for vision_tools: httpx event hook re-validates each redirect target against SSRF checks, preventing the classic redirect-based SSRF bypass (302 to internal IP) - Move SSRF filtering before backend dispatch in web_extract: now covers Parallel and Tavily backends, not just Firecrawl - Extract _is_blocked_ip() helper for cleaner IP range checking - Add 24 new tests (CGNAT, multicast, IPv4-mapped IPv6, fail-closed behavior, parametrized blocked/allowed IP lists) - Fix existing tests to mock DNS resolution for test hostnames --------- Co-authored-by: dieutx <dangtc94@gmail.com>
This commit is contained in:
parent
934fbe3c06
commit
0791efe2c3
6 changed files with 472 additions and 124 deletions
|
|
@ -69,7 +69,12 @@ def _validate_image_url(url: str) -> bool:
|
|||
if not parsed.netloc:
|
||||
return False
|
||||
|
||||
return True # Allow all well-formed HTTP/HTTPS URLs for flexibility
|
||||
# Block private/internal addresses to prevent SSRF
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(url):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
async def _download_image(image_url: str, destination: Path, max_retries: int = 3) -> Path:
|
||||
|
|
@ -92,12 +97,31 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
|
|||
# Create parent directories if they don't exist
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _ssrf_redirect_guard(response):
|
||||
"""Re-validate each redirect target to prevent redirect-based SSRF.
|
||||
|
||||
Without this, an attacker can host a public URL that 302-redirects
|
||||
to http://169.254.169.254/ and bypass the pre-flight is_safe_url check.
|
||||
"""
|
||||
if response.is_redirect and response.next_request:
|
||||
redirect_url = str(response.next_request.url)
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(redirect_url):
|
||||
raise ValueError(
|
||||
f"Blocked redirect to private/internal address: {redirect_url}"
|
||||
)
|
||||
|
||||
last_error = None
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# Download the image with appropriate headers using async httpx
|
||||
# Enable follow_redirects to handle image CDNs that redirect (e.g., Imgur, Picsum)
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
# SSRF: event_hooks validates each redirect target against private IP ranges
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
event_hooks={"response": [_ssrf_redirect_guard]},
|
||||
) as client:
|
||||
response = await client.get(
|
||||
image_url,
|
||||
headers={
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue