feat: add BrowseShSource adapter for browse.sh skills catalog

Adds BrowseShSource — a new skill source adapter that integrates Browserbase's browse.sh catalog (169+ site-specific SKILL.md files) into the Hermes Skills Hub. - BrowseShSource class in tools/skills_hub.py implementing SkillSource ABC - Fetches browse.sh catalog API with 1h TTL cache - Full-text search across name, title, description, hostname, category, tags - fetch() downloads SKILL.md via sourceUrl (GitHub HTML -> raw URL conversion) - Registered in create_source_router() after LobeHubSource - Tests in tests/tools/test_skills_hub_browse_sh.py (7 tests, all passing)
2026-06-06 07:51:53 +00:00 · 2026-05-19 06:32:16 +00:00 · 2026-05-19 06:32:16 +00:00 · 57145ca146
commit 57145ca146
parent 2b41f9d893
2 changed files with 288 additions and 0 deletions
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@ -2350,6 +2350,161 @@ class LobeHubSource(SkillSource):
        return "\n".join(fm_lines) + "\n\n" + "\n".join(body_lines) + "\n"


+# ---------------------------------------------------------------------------
+# browse.sh source adapter
+# ---------------------------------------------------------------------------
+
+
+class BrowseShSource(SkillSource):
+    """Discover and install site-specific browser automation skills from browse.sh.
+
+    browse.sh (https://browse.sh) is Browserbase's catalog of 169+ SKILL.md files
+    that describe how to automate specific websites (Airbnb, Amazon, arXiv, etc.).
+    Each skill has a sourceUrl pointing to the raw SKILL.md on GitHub.
+    """
+
+    CATALOG_URL = "https://browse.sh/api/skills"
+    _CACHE_KEY = "browse_sh_catalog"
+
+    def source_id(self) -> str:
+        return "browse-sh"
+
+    def trust_level_for(self, identifier: str) -> str:
+        return "community"
+
+    def _fetch_catalog(self) -> List[Dict]:
+        cached = _read_index_cache(self._CACHE_KEY)
+        if cached is not None:
+            return cached
+        try:
+            resp = httpx.get(self.CATALOG_URL, timeout=20)
+            if resp.status_code != 200:
+                return []
+            data = resp.json()
+        except (httpx.HTTPError, json.JSONDecodeError):
+            return []
+        skills = data.get("skills", []) if isinstance(data, dict) else []
+        if isinstance(skills, list):
+            _write_index_cache(self._CACHE_KEY, skills)
+        return skills if isinstance(skills, list) else []
+
+    def _item_to_meta(self, item: Dict) -> Optional[SkillMeta]:
+        slug = item.get("slug", "")
+        name = item.get("name", "")
+        title = item.get("title", name)
+        description = item.get("description", title)
+        if not slug or not name:
+            return None
+        if len(description) > 1024:
+            description = description[:1021] + "..."
+        return SkillMeta(
+            name=name,
+            description=description,
+            source="browse-sh",
+            identifier=f"browse-sh/{slug}",
+            trust_level="community",
+            tags=item.get("tags", []),
+            extra={
+                "slug": slug,
+                "hostname": item.get("hostname", ""),
+                "category": item.get("category", ""),
+                "source_url": item.get("sourceUrl", ""),
+                "recommended_method": item.get("recommendedMethod", ""),
+                "proxies": item.get("proxies", False),
+                "install_count": item.get("installCount", 0),
+            },
+        )
+
+    def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
+        catalog = self._fetch_catalog()
+        query_lower = query.lower()
+        results = []
+        for item in catalog:
+            text = " ".join([
+                item.get("name", ""),
+                item.get("title", ""),
+                item.get("description", ""),
+                item.get("hostname", ""),
+                item.get("category", ""),
+                " ".join(item.get("tags", [])),
+            ]).lower()
+            if not query_lower or query_lower in text:
+                meta = self._item_to_meta(item)
+                if meta:
+                    results.append(meta)
+            if len(results) >= limit:
+                break
+        return results
+
+    def inspect(self, identifier: str) -> Optional[SkillMeta]:
+        slug = self._slug_from_identifier(identifier)
+        if not slug:
+            return None
+        catalog = self._fetch_catalog()
+        for item in catalog:
+            if item.get("slug") == slug:
+                return self._item_to_meta(item)
+        return None
+
+    def fetch(self, identifier: str) -> Optional[SkillBundle]:
+        slug = self._slug_from_identifier(identifier)
+        if not slug:
+            return None
+        catalog = self._fetch_catalog()
+        item = next((i for i in catalog if i.get("slug") == slug), None)
+        if not item:
+            return None
+        source_url = item.get("sourceUrl", "")
+        if not source_url:
+            return None
+        # Convert GitHub HTML URL to raw URL if needed
+        raw_url = self._to_raw_url(source_url)
+        if not raw_url:
+            return None
+        try:
+            resp = httpx.get(raw_url, timeout=20, follow_redirects=True)
+            if resp.status_code != 200:
+                return None
+            content = resp.text
+        except httpx.HTTPError:
+            return None
+        meta = self._item_to_meta(item)
+        name = meta.name if meta else slug.split("/")[-1]
+        return SkillBundle(
+            name=name,
+            files={"SKILL.md": content},
+            source="browse-sh",
+            identifier=identifier,
+            trust_level="community",
+            metadata={
+                "slug": slug,
+                "hostname": item.get("hostname", ""),
+                "source_url": source_url,
+            },
+        )
+
+    def _slug_from_identifier(self, identifier: str) -> str:
+        """Extract slug from identifier like 'browse-sh/airbnb.com/search-listings-abc'."""
+        if identifier.startswith("browse-sh/"):
+            return identifier[len("browse-sh/"):]
+        return identifier
+
+    def _to_raw_url(self, url: str) -> Optional[str]:
+        """Convert a GitHub HTML URL to a raw.githubusercontent.com URL."""
+        if "raw.githubusercontent.com" in url:
+            return url
+        # https://github.com/owner/repo/blob/branch/path -> raw URL
+        import re
+        m = re.match(
+            r"https://github\.com/([^/]+)/([^/]+)/blob/([^/]+)/(.+)",
+            url,
+        )
+        if m:
+            owner, repo, branch, path = m.groups()
+            return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}"
+        return None
+
+
 # ---------------------------------------------------------------------------
 # Official optional skills source adapter
 # ---------------------------------------------------------------------------
@ -3143,6 +3298,7 @@ def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]
        ClawHubSource(),
        ClaudeMarketplaceSource(auth=auth),
        LobeHubSource(),
+        BrowseShSource(),   # browse.sh: 169+ site-specific browser automation skills
    ]

    return sources