mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-24 05:41:40 +00:00
fix(skills-hub): cover remaining SSRF fetch paths after #10029
This commit is contained in:
parent
af9df46525
commit
0c5c4d1b8d
3 changed files with 135 additions and 25 deletions
|
|
@ -27,7 +27,7 @@ from datetime import datetime, timezone
|
|||
from pathlib import Path, PurePosixPath
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
from urllib.parse import urljoin, urlparse, urlunparse
|
||||
|
||||
import httpx
|
||||
import yaml
|
||||
|
|
@ -35,6 +35,8 @@ import yaml
|
|||
from tools.skills_guard import (
|
||||
ScanResult, content_hash, TRUSTED_REPOS,
|
||||
)
|
||||
from tools.url_safety import is_safe_url
|
||||
from tools.website_policy import check_website_access
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -55,6 +57,9 @@ INDEX_CACHE_DIR = HUB_DIR / "index-cache"
|
|||
# Cache duration for remote index fetches
|
||||
INDEX_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
_REDIRECT_STATUS_CODES = {301, 302, 303, 307, 308}
|
||||
_MAX_SKILL_FETCH_REDIRECTS = 5
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data models
|
||||
|
|
@ -118,6 +123,43 @@ def _validate_category_name(category: str) -> str:
|
|||
return _normalize_bundle_path(category, field_name="category", allow_nested=False)
|
||||
|
||||
|
||||
def _guarded_http_get(url: str, *, timeout: int = 20) -> Optional[httpx.Response]:
|
||||
"""Fetch a URL with SSRF and redirect-target validation."""
|
||||
current_url = url
|
||||
|
||||
for _ in range(_MAX_SKILL_FETCH_REDIRECTS + 1):
|
||||
if not is_safe_url(current_url):
|
||||
logger.warning("Blocked unsafe Skills Hub URL: %s", current_url)
|
||||
return None
|
||||
|
||||
blocked = check_website_access(current_url)
|
||||
if blocked:
|
||||
logger.info(
|
||||
"Blocked Skills Hub fetch for %s by rule %s",
|
||||
blocked["host"],
|
||||
blocked["rule"],
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
resp = httpx.get(current_url, timeout=timeout, follow_redirects=False)
|
||||
except httpx.HTTPError as exc:
|
||||
logger.debug("Skills Hub fetch failed for %s: %s", current_url, exc)
|
||||
return None
|
||||
|
||||
if resp.status_code in _REDIRECT_STATUS_CODES:
|
||||
location = getattr(resp, "headers", {}).get("location")
|
||||
if not location:
|
||||
return None
|
||||
current_url = urljoin(current_url, location)
|
||||
continue
|
||||
|
||||
return resp
|
||||
|
||||
logger.warning("Skills Hub fetch exceeded redirect limit for %s", url)
|
||||
return None
|
||||
|
||||
|
||||
def _validate_bundle_rel_path(rel_path: str) -> str:
|
||||
return _normalize_bundle_path(rel_path, field_name="bundle file path", allow_nested=True)
|
||||
|
||||
|
|
@ -887,12 +929,12 @@ class WellKnownSkillSource(SkillSource):
|
|||
if isinstance(cached, dict) and isinstance(cached.get("skills"), list):
|
||||
return cached
|
||||
|
||||
resp = _guarded_http_get(index_url, timeout=20)
|
||||
if resp is None or resp.status_code != 200:
|
||||
return None
|
||||
try:
|
||||
resp = httpx.get(index_url, timeout=20, follow_redirects=True)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
data = resp.json()
|
||||
except (httpx.HTTPError, json.JSONDecodeError):
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
skills = data.get("skills", []) if isinstance(data, dict) else []
|
||||
|
|
@ -918,12 +960,9 @@ class WellKnownSkillSource(SkillSource):
|
|||
|
||||
@staticmethod
|
||||
def _fetch_text(url: str) -> Optional[str]:
|
||||
try:
|
||||
resp = httpx.get(url, timeout=20, follow_redirects=True)
|
||||
if resp.status_code == 200:
|
||||
return resp.text
|
||||
except httpx.HTTPError:
|
||||
return None
|
||||
resp = _guarded_http_get(url, timeout=20)
|
||||
if resp is not None and resp.status_code == 200:
|
||||
return resp.text
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -1045,13 +1084,9 @@ class UrlSource(SkillSource):
|
|||
|
||||
@staticmethod
|
||||
def _fetch_text(url: str) -> Optional[str]:
|
||||
try:
|
||||
resp = httpx.get(url, timeout=20, follow_redirects=True)
|
||||
if resp.status_code == 200:
|
||||
return resp.text
|
||||
except httpx.HTTPError as exc:
|
||||
logger.debug("UrlSource fetch failed for %s: %s", url, exc)
|
||||
return None
|
||||
resp = _guarded_http_get(url, timeout=20)
|
||||
if resp is not None and resp.status_code == 200:
|
||||
return resp.text
|
||||
return None
|
||||
|
||||
# Skill names must look like identifiers: lowercase letters/digits with
|
||||
|
|
@ -2051,12 +2086,9 @@ class ClawHubSource(SkillSource):
|
|||
return files
|
||||
|
||||
def _fetch_text(self, url: str) -> Optional[str]:
|
||||
try:
|
||||
resp = httpx.get(url, timeout=20)
|
||||
if resp.status_code == 200:
|
||||
return resp.text
|
||||
except httpx.HTTPError:
|
||||
return None
|
||||
resp = _guarded_http_get(url, timeout=20)
|
||||
if resp is not None and resp.status_code == 200:
|
||||
return resp.text
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue