fix(skills-hub): cover remaining SSRF fetch paths after #10029

2026-05-24 05:41:40 +00:00 · 2026-05-09 23:18:49 +03:00 · 2026-05-09 23:18:49 +03:00 · 0c5c4d1b8d
commit 0c5c4d1b8d
parent af9df46525
3 changed files with 135 additions and 25 deletions
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@ -27,7 +27,7 @@ from datetime import datetime, timezone
 from pathlib import Path, PurePosixPath
 from hermes_constants import get_hermes_home
 from typing import Any, Dict, List, Optional, Tuple, Union
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urljoin, urlparse, urlunparse

 import httpx
 import yaml
@ -35,6 +35,8 @@ import yaml
 from tools.skills_guard import (
    ScanResult, content_hash, TRUSTED_REPOS,
 )
+from tools.url_safety import is_safe_url
+from tools.website_policy import check_website_access

 logger = logging.getLogger(__name__)

@ -55,6 +57,9 @@ INDEX_CACHE_DIR = HUB_DIR / "index-cache"
 # Cache duration for remote index fetches
 INDEX_CACHE_TTL = 3600  # 1 hour

+_REDIRECT_STATUS_CODES = {301, 302, 303, 307, 308}
+_MAX_SKILL_FETCH_REDIRECTS = 5
+

 # ---------------------------------------------------------------------------
 # Data models
@ -118,6 +123,43 @@ def _validate_category_name(category: str) -> str:
    return _normalize_bundle_path(category, field_name="category", allow_nested=False)


+def _guarded_http_get(url: str, *, timeout: int = 20) -> Optional[httpx.Response]:
+    """Fetch a URL with SSRF and redirect-target validation."""
+    current_url = url
+
+    for _ in range(_MAX_SKILL_FETCH_REDIRECTS + 1):
+        if not is_safe_url(current_url):
+            logger.warning("Blocked unsafe Skills Hub URL: %s", current_url)
+            return None
+
+        blocked = check_website_access(current_url)
+        if blocked:
+            logger.info(
+                "Blocked Skills Hub fetch for %s by rule %s",
+                blocked["host"],
+                blocked["rule"],
+            )
+            return None
+
+        try:
+            resp = httpx.get(current_url, timeout=timeout, follow_redirects=False)
+        except httpx.HTTPError as exc:
+            logger.debug("Skills Hub fetch failed for %s: %s", current_url, exc)
+            return None
+
+        if resp.status_code in _REDIRECT_STATUS_CODES:
+            location = getattr(resp, "headers", {}).get("location")
+            if not location:
+                return None
+            current_url = urljoin(current_url, location)
+            continue
+
+        return resp
+
+    logger.warning("Skills Hub fetch exceeded redirect limit for %s", url)
+    return None
+
+
 def _validate_bundle_rel_path(rel_path: str) -> str:
    return _normalize_bundle_path(rel_path, field_name="bundle file path", allow_nested=True)

@ -887,12 +929,12 @@ class WellKnownSkillSource(SkillSource):
        if isinstance(cached, dict) and isinstance(cached.get("skills"), list):
            return cached

+        resp = _guarded_http_get(index_url, timeout=20)
+        if resp is None or resp.status_code != 200:
+            return None
        try:
-            resp = httpx.get(index_url, timeout=20, follow_redirects=True)
-            if resp.status_code != 200:
-                return None
            data = resp.json()
-        except (httpx.HTTPError, json.JSONDecodeError):
+        except json.JSONDecodeError:
            return None

        skills = data.get("skills", []) if isinstance(data, dict) else []
@ -918,12 +960,9 @@ class WellKnownSkillSource(SkillSource):

    @staticmethod
    def _fetch_text(url: str) -> Optional[str]:
-        try:
-            resp = httpx.get(url, timeout=20, follow_redirects=True)
-            if resp.status_code == 200:
-                return resp.text
-        except httpx.HTTPError:
-            return None
+        resp = _guarded_http_get(url, timeout=20)
+        if resp is not None and resp.status_code == 200:
+            return resp.text
        return None

    @staticmethod
@ -1045,13 +1084,9 @@ class UrlSource(SkillSource):

    @staticmethod
    def _fetch_text(url: str) -> Optional[str]:
-        try:
-            resp = httpx.get(url, timeout=20, follow_redirects=True)
-            if resp.status_code == 200:
-                return resp.text
-        except httpx.HTTPError as exc:
-            logger.debug("UrlSource fetch failed for %s: %s", url, exc)
-            return None
+        resp = _guarded_http_get(url, timeout=20)
+        if resp is not None and resp.status_code == 200:
+            return resp.text
        return None

    # Skill names must look like identifiers: lowercase letters/digits with
@ -2051,12 +2086,9 @@ class ClawHubSource(SkillSource):
        return files

    def _fetch_text(self, url: str) -> Optional[str]:
-        try:
-            resp = httpx.get(url, timeout=20)
-            if resp.status_code == 200:
-                return resp.text
-        except httpx.HTTPError:
-            return None
+        resp = _guarded_http_get(url, timeout=20)
+        if resp is not None and resp.status_code == 200:
+            return resp.text
        return None