feat: add BrowseShSource adapter for browse.sh skills catalog

Adds BrowseShSource — a new skill source adapter that integrates
Browserbase's browse.sh catalog (169+ site-specific SKILL.md files)
into the Hermes Skills Hub.

- BrowseShSource class in tools/skills_hub.py implementing SkillSource ABC
- Fetches browse.sh catalog API with 1h TTL cache
- Full-text search across name, title, description, hostname, category, tags
- fetch() downloads SKILL.md via sourceUrl (GitHub HTML -> raw URL conversion)
- Registered in create_source_router() after LobeHubSource
- Tests in tests/tools/test_skills_hub_browse_sh.py (7 tests, all passing)
This commit is contained in:
Kyle Jeong 2026-05-19 06:32:16 +00:00 committed by Teknium
parent 2b41f9d893
commit 57145ca146
2 changed files with 288 additions and 0 deletions

View file

@ -0,0 +1,132 @@
#!/usr/bin/env python3
import unittest
from unittest.mock import patch
from tools.skills_hub import BrowseShSource, SkillMeta, SkillBundle
SAMPLE_CATALOG = [
{
"slug": "airbnb.com/search-listings-ddgioa",
"name": "airbnb.com",
"title": "Airbnb Search Listings",
"description": "Search and browse Airbnb listings by location and dates.",
"hostname": "airbnb.com",
"category": "travel",
"tags": ["travel", "accommodation"],
"sourceUrl": "https://github.com/browserbase/browse-sh/blob/main/skills/airbnb.com/SKILL.md",
"recommendedMethod": "stagehand",
"proxies": False,
"installCount": 42,
},
{
"slug": "amazon.com/search-products-xyz",
"name": "amazon.com",
"title": "Amazon Product Search",
"description": "Search for products on Amazon.",
"hostname": "amazon.com",
"category": "shopping",
"tags": ["shopping", "ecommerce"],
"sourceUrl": "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/amazon.com/SKILL.md",
"recommendedMethod": "stagehand",
"proxies": False,
"installCount": 99,
},
]
class _MockResponse:
def __init__(self, status_code=200, json_data=None, text="", headers=None):
self.status_code = status_code
self._json_data = json_data
self.text = text
self.headers = headers or {}
def json(self):
return self._json_data
class TestBrowseShSource(unittest.TestCase):
def setUp(self):
self.src = BrowseShSource()
def test_source_id(self):
self.assertEqual(self.src.source_id(), "browse-sh")
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
def test_search_returns_results(self, _mock_catalog):
results = self.src.search("airbnb", limit=10)
self.assertGreaterEqual(len(results), 1)
meta = results[0]
self.assertIsInstance(meta, SkillMeta)
self.assertEqual(meta.name, "airbnb.com")
self.assertEqual(meta.source, "browse-sh")
self.assertEqual(meta.trust_level, "community")
self.assertEqual(meta.identifier, "browse-sh/airbnb.com/search-listings-ddgioa")
self.assertIn("travel", meta.tags)
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
def test_search_filters_by_query(self, _mock_catalog):
results = self.src.search("amazon", limit=10)
self.assertEqual(len(results), 1)
self.assertEqual(results[0].name, "amazon.com")
results_all = self.src.search("", limit=10)
self.assertEqual(len(results_all), 2)
@patch("tools.skills_hub.httpx.get")
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
def test_fetch_returns_bundle(self, _mock_catalog, mock_get):
mock_get.return_value = _MockResponse(
status_code=200,
text="# Airbnb Skill\n\nSearch and book Airbnb listings.",
)
bundle = self.src.fetch("browse-sh/airbnb.com/search-listings-ddgioa")
self.assertIsNotNone(bundle)
self.assertIsInstance(bundle, SkillBundle)
self.assertEqual(bundle.name, "airbnb.com")
self.assertIn("SKILL.md", bundle.files)
self.assertIn("Airbnb", bundle.files["SKILL.md"])
self.assertEqual(bundle.source, "browse-sh")
self.assertEqual(bundle.trust_level, "community")
self.assertEqual(bundle.identifier, "browse-sh/airbnb.com/search-listings-ddgioa")
mock_get.assert_called_once()
call_url = mock_get.call_args.args[0]
self.assertIn("raw.githubusercontent.com", call_url)
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
def test_fetch_missing_slug_returns_none(self, _mock_catalog):
result = self.src.fetch("browse-sh/nonexistent.com/no-such-skill")
self.assertIsNone(result)
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
def test_inspect_returns_meta(self, _mock_catalog):
meta = self.src.inspect("browse-sh/airbnb.com/search-listings-ddgioa")
self.assertIsNotNone(meta)
self.assertIsInstance(meta, SkillMeta)
self.assertEqual(meta.name, "airbnb.com")
self.assertEqual(meta.identifier, "browse-sh/airbnb.com/search-listings-ddgioa")
self.assertEqual(meta.extra["hostname"], "airbnb.com")
self.assertEqual(meta.extra["category"], "travel")
self.assertEqual(meta.extra["install_count"], 42)
def test_to_raw_url_conversion(self):
# GitHub HTML URL should be converted
html_url = "https://github.com/browserbase/browse-sh/blob/main/skills/airbnb.com/SKILL.md"
raw_url = self.src._to_raw_url(html_url)
self.assertEqual(
raw_url,
"https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/airbnb.com/SKILL.md",
)
# Already a raw URL — should be returned unchanged
already_raw = "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/amazon.com/SKILL.md"
self.assertEqual(self.src._to_raw_url(already_raw), already_raw)
# Unrecognised URL — should return None
self.assertIsNone(self.src._to_raw_url("https://example.com/something"))
if __name__ == "__main__":
unittest.main()

View file

@ -2350,6 +2350,161 @@ class LobeHubSource(SkillSource):
return "\n".join(fm_lines) + "\n\n" + "\n".join(body_lines) + "\n"
# ---------------------------------------------------------------------------
# browse.sh source adapter
# ---------------------------------------------------------------------------
class BrowseShSource(SkillSource):
"""Discover and install site-specific browser automation skills from browse.sh.
browse.sh (https://browse.sh) is Browserbase's catalog of 169+ SKILL.md files
that describe how to automate specific websites (Airbnb, Amazon, arXiv, etc.).
Each skill has a sourceUrl pointing to the raw SKILL.md on GitHub.
"""
CATALOG_URL = "https://browse.sh/api/skills"
_CACHE_KEY = "browse_sh_catalog"
def source_id(self) -> str:
return "browse-sh"
def trust_level_for(self, identifier: str) -> str:
return "community"
def _fetch_catalog(self) -> List[Dict]:
cached = _read_index_cache(self._CACHE_KEY)
if cached is not None:
return cached
try:
resp = httpx.get(self.CATALOG_URL, timeout=20)
if resp.status_code != 200:
return []
data = resp.json()
except (httpx.HTTPError, json.JSONDecodeError):
return []
skills = data.get("skills", []) if isinstance(data, dict) else []
if isinstance(skills, list):
_write_index_cache(self._CACHE_KEY, skills)
return skills if isinstance(skills, list) else []
def _item_to_meta(self, item: Dict) -> Optional[SkillMeta]:
slug = item.get("slug", "")
name = item.get("name", "")
title = item.get("title", name)
description = item.get("description", title)
if not slug or not name:
return None
if len(description) > 1024:
description = description[:1021] + "..."
return SkillMeta(
name=name,
description=description,
source="browse-sh",
identifier=f"browse-sh/{slug}",
trust_level="community",
tags=item.get("tags", []),
extra={
"slug": slug,
"hostname": item.get("hostname", ""),
"category": item.get("category", ""),
"source_url": item.get("sourceUrl", ""),
"recommended_method": item.get("recommendedMethod", ""),
"proxies": item.get("proxies", False),
"install_count": item.get("installCount", 0),
},
)
def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
catalog = self._fetch_catalog()
query_lower = query.lower()
results = []
for item in catalog:
text = " ".join([
item.get("name", ""),
item.get("title", ""),
item.get("description", ""),
item.get("hostname", ""),
item.get("category", ""),
" ".join(item.get("tags", [])),
]).lower()
if not query_lower or query_lower in text:
meta = self._item_to_meta(item)
if meta:
results.append(meta)
if len(results) >= limit:
break
return results
def inspect(self, identifier: str) -> Optional[SkillMeta]:
slug = self._slug_from_identifier(identifier)
if not slug:
return None
catalog = self._fetch_catalog()
for item in catalog:
if item.get("slug") == slug:
return self._item_to_meta(item)
return None
def fetch(self, identifier: str) -> Optional[SkillBundle]:
slug = self._slug_from_identifier(identifier)
if not slug:
return None
catalog = self._fetch_catalog()
item = next((i for i in catalog if i.get("slug") == slug), None)
if not item:
return None
source_url = item.get("sourceUrl", "")
if not source_url:
return None
# Convert GitHub HTML URL to raw URL if needed
raw_url = self._to_raw_url(source_url)
if not raw_url:
return None
try:
resp = httpx.get(raw_url, timeout=20, follow_redirects=True)
if resp.status_code != 200:
return None
content = resp.text
except httpx.HTTPError:
return None
meta = self._item_to_meta(item)
name = meta.name if meta else slug.split("/")[-1]
return SkillBundle(
name=name,
files={"SKILL.md": content},
source="browse-sh",
identifier=identifier,
trust_level="community",
metadata={
"slug": slug,
"hostname": item.get("hostname", ""),
"source_url": source_url,
},
)
def _slug_from_identifier(self, identifier: str) -> str:
"""Extract slug from identifier like 'browse-sh/airbnb.com/search-listings-abc'."""
if identifier.startswith("browse-sh/"):
return identifier[len("browse-sh/"):]
return identifier
def _to_raw_url(self, url: str) -> Optional[str]:
"""Convert a GitHub HTML URL to a raw.githubusercontent.com URL."""
if "raw.githubusercontent.com" in url:
return url
# https://github.com/owner/repo/blob/branch/path -> raw URL
import re
m = re.match(
r"https://github\.com/([^/]+)/([^/]+)/blob/([^/]+)/(.+)",
url,
)
if m:
owner, repo, branch, path = m.groups()
return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}"
return None
# ---------------------------------------------------------------------------
# Official optional skills source adapter
# ---------------------------------------------------------------------------
@ -3143,6 +3298,7 @@ def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]
ClawHubSource(),
ClaudeMarketplaceSource(auth=auth),
LobeHubSource(),
BrowseShSource(), # browse.sh: 169+ site-specific browser automation skills
]
return sources