feat: add BrowseShSource adapter for browse.sh skills catalog

Adds BrowseShSource — a new skill source adapter that integrates
Browserbase's browse.sh catalog (169+ site-specific SKILL.md files)
into the Hermes Skills Hub.

- BrowseShSource class in tools/skills_hub.py implementing SkillSource ABC
- Fetches browse.sh catalog API with 1h TTL cache
- Full-text search across name, title, description, hostname, category, tags
- fetch() downloads SKILL.md via sourceUrl (GitHub HTML -> raw URL conversion)
- Registered in create_source_router() after LobeHubSource
- Tests in tests/tools/test_skills_hub_browse_sh.py (7 tests, all passing)
This commit is contained in:
Kyle Jeong 2026-05-19 06:32:16 +00:00 committed by Teknium
parent 2b41f9d893
commit 57145ca146
2 changed files with 288 additions and 0 deletions

View file

@ -2350,6 +2350,161 @@ class LobeHubSource(SkillSource):
return "\n".join(fm_lines) + "\n\n" + "\n".join(body_lines) + "\n"
# ---------------------------------------------------------------------------
# browse.sh source adapter
# ---------------------------------------------------------------------------
class BrowseShSource(SkillSource):
"""Discover and install site-specific browser automation skills from browse.sh.
browse.sh (https://browse.sh) is Browserbase's catalog of 169+ SKILL.md files
that describe how to automate specific websites (Airbnb, Amazon, arXiv, etc.).
Each skill has a sourceUrl pointing to the raw SKILL.md on GitHub.
"""
CATALOG_URL = "https://browse.sh/api/skills"
_CACHE_KEY = "browse_sh_catalog"
def source_id(self) -> str:
return "browse-sh"
def trust_level_for(self, identifier: str) -> str:
return "community"
def _fetch_catalog(self) -> List[Dict]:
cached = _read_index_cache(self._CACHE_KEY)
if cached is not None:
return cached
try:
resp = httpx.get(self.CATALOG_URL, timeout=20)
if resp.status_code != 200:
return []
data = resp.json()
except (httpx.HTTPError, json.JSONDecodeError):
return []
skills = data.get("skills", []) if isinstance(data, dict) else []
if isinstance(skills, list):
_write_index_cache(self._CACHE_KEY, skills)
return skills if isinstance(skills, list) else []
def _item_to_meta(self, item: Dict) -> Optional[SkillMeta]:
slug = item.get("slug", "")
name = item.get("name", "")
title = item.get("title", name)
description = item.get("description", title)
if not slug or not name:
return None
if len(description) > 1024:
description = description[:1021] + "..."
return SkillMeta(
name=name,
description=description,
source="browse-sh",
identifier=f"browse-sh/{slug}",
trust_level="community",
tags=item.get("tags", []),
extra={
"slug": slug,
"hostname": item.get("hostname", ""),
"category": item.get("category", ""),
"source_url": item.get("sourceUrl", ""),
"recommended_method": item.get("recommendedMethod", ""),
"proxies": item.get("proxies", False),
"install_count": item.get("installCount", 0),
},
)
def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
catalog = self._fetch_catalog()
query_lower = query.lower()
results = []
for item in catalog:
text = " ".join([
item.get("name", ""),
item.get("title", ""),
item.get("description", ""),
item.get("hostname", ""),
item.get("category", ""),
" ".join(item.get("tags", [])),
]).lower()
if not query_lower or query_lower in text:
meta = self._item_to_meta(item)
if meta:
results.append(meta)
if len(results) >= limit:
break
return results
def inspect(self, identifier: str) -> Optional[SkillMeta]:
slug = self._slug_from_identifier(identifier)
if not slug:
return None
catalog = self._fetch_catalog()
for item in catalog:
if item.get("slug") == slug:
return self._item_to_meta(item)
return None
def fetch(self, identifier: str) -> Optional[SkillBundle]:
slug = self._slug_from_identifier(identifier)
if not slug:
return None
catalog = self._fetch_catalog()
item = next((i for i in catalog if i.get("slug") == slug), None)
if not item:
return None
source_url = item.get("sourceUrl", "")
if not source_url:
return None
# Convert GitHub HTML URL to raw URL if needed
raw_url = self._to_raw_url(source_url)
if not raw_url:
return None
try:
resp = httpx.get(raw_url, timeout=20, follow_redirects=True)
if resp.status_code != 200:
return None
content = resp.text
except httpx.HTTPError:
return None
meta = self._item_to_meta(item)
name = meta.name if meta else slug.split("/")[-1]
return SkillBundle(
name=name,
files={"SKILL.md": content},
source="browse-sh",
identifier=identifier,
trust_level="community",
metadata={
"slug": slug,
"hostname": item.get("hostname", ""),
"source_url": source_url,
},
)
def _slug_from_identifier(self, identifier: str) -> str:
"""Extract slug from identifier like 'browse-sh/airbnb.com/search-listings-abc'."""
if identifier.startswith("browse-sh/"):
return identifier[len("browse-sh/"):]
return identifier
def _to_raw_url(self, url: str) -> Optional[str]:
"""Convert a GitHub HTML URL to a raw.githubusercontent.com URL."""
if "raw.githubusercontent.com" in url:
return url
# https://github.com/owner/repo/blob/branch/path -> raw URL
import re
m = re.match(
r"https://github\.com/([^/]+)/([^/]+)/blob/([^/]+)/(.+)",
url,
)
if m:
owner, repo, branch, path = m.groups()
return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}"
return None
# ---------------------------------------------------------------------------
# Official optional skills source adapter
# ---------------------------------------------------------------------------
@ -3143,6 +3298,7 @@ def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]
ClawHubSource(),
ClaudeMarketplaceSource(auth=auth),
LobeHubSource(),
BrowseShSource(), # browse.sh: 169+ site-specific browser automation skills
]
return sources