mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
feat: add BrowseShSource adapter for browse.sh skills catalog
Adds BrowseShSource — a new skill source adapter that integrates Browserbase's browse.sh catalog (169+ site-specific SKILL.md files) into the Hermes Skills Hub. - BrowseShSource class in tools/skills_hub.py implementing SkillSource ABC - Fetches browse.sh catalog API with 1h TTL cache - Full-text search across name, title, description, hostname, category, tags - fetch() downloads SKILL.md via sourceUrl (GitHub HTML -> raw URL conversion) - Registered in create_source_router() after LobeHubSource - Tests in tests/tools/test_skills_hub_browse_sh.py (7 tests, all passing)
This commit is contained in:
parent
2b41f9d893
commit
57145ca146
2 changed files with 288 additions and 0 deletions
132
tests/tools/test_skills_hub_browse_sh.py
Normal file
132
tests/tools/test_skills_hub_browse_sh.py
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from tools.skills_hub import BrowseShSource, SkillMeta, SkillBundle
|
||||
|
||||
|
||||
SAMPLE_CATALOG = [
|
||||
{
|
||||
"slug": "airbnb.com/search-listings-ddgioa",
|
||||
"name": "airbnb.com",
|
||||
"title": "Airbnb Search Listings",
|
||||
"description": "Search and browse Airbnb listings by location and dates.",
|
||||
"hostname": "airbnb.com",
|
||||
"category": "travel",
|
||||
"tags": ["travel", "accommodation"],
|
||||
"sourceUrl": "https://github.com/browserbase/browse-sh/blob/main/skills/airbnb.com/SKILL.md",
|
||||
"recommendedMethod": "stagehand",
|
||||
"proxies": False,
|
||||
"installCount": 42,
|
||||
},
|
||||
{
|
||||
"slug": "amazon.com/search-products-xyz",
|
||||
"name": "amazon.com",
|
||||
"title": "Amazon Product Search",
|
||||
"description": "Search for products on Amazon.",
|
||||
"hostname": "amazon.com",
|
||||
"category": "shopping",
|
||||
"tags": ["shopping", "ecommerce"],
|
||||
"sourceUrl": "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/amazon.com/SKILL.md",
|
||||
"recommendedMethod": "stagehand",
|
||||
"proxies": False,
|
||||
"installCount": 99,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class _MockResponse:
|
||||
def __init__(self, status_code=200, json_data=None, text="", headers=None):
|
||||
self.status_code = status_code
|
||||
self._json_data = json_data
|
||||
self.text = text
|
||||
self.headers = headers or {}
|
||||
|
||||
def json(self):
|
||||
return self._json_data
|
||||
|
||||
|
||||
class TestBrowseShSource(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.src = BrowseShSource()
|
||||
|
||||
def test_source_id(self):
|
||||
self.assertEqual(self.src.source_id(), "browse-sh")
|
||||
|
||||
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
|
||||
def test_search_returns_results(self, _mock_catalog):
|
||||
results = self.src.search("airbnb", limit=10)
|
||||
self.assertGreaterEqual(len(results), 1)
|
||||
meta = results[0]
|
||||
self.assertIsInstance(meta, SkillMeta)
|
||||
self.assertEqual(meta.name, "airbnb.com")
|
||||
self.assertEqual(meta.source, "browse-sh")
|
||||
self.assertEqual(meta.trust_level, "community")
|
||||
self.assertEqual(meta.identifier, "browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
self.assertIn("travel", meta.tags)
|
||||
|
||||
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
|
||||
def test_search_filters_by_query(self, _mock_catalog):
|
||||
results = self.src.search("amazon", limit=10)
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0].name, "amazon.com")
|
||||
|
||||
results_all = self.src.search("", limit=10)
|
||||
self.assertEqual(len(results_all), 2)
|
||||
|
||||
@patch("tools.skills_hub.httpx.get")
|
||||
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
|
||||
def test_fetch_returns_bundle(self, _mock_catalog, mock_get):
|
||||
mock_get.return_value = _MockResponse(
|
||||
status_code=200,
|
||||
text="# Airbnb Skill\n\nSearch and book Airbnb listings.",
|
||||
)
|
||||
bundle = self.src.fetch("browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
self.assertIsNotNone(bundle)
|
||||
self.assertIsInstance(bundle, SkillBundle)
|
||||
self.assertEqual(bundle.name, "airbnb.com")
|
||||
self.assertIn("SKILL.md", bundle.files)
|
||||
self.assertIn("Airbnb", bundle.files["SKILL.md"])
|
||||
self.assertEqual(bundle.source, "browse-sh")
|
||||
self.assertEqual(bundle.trust_level, "community")
|
||||
self.assertEqual(bundle.identifier, "browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
mock_get.assert_called_once()
|
||||
call_url = mock_get.call_args.args[0]
|
||||
self.assertIn("raw.githubusercontent.com", call_url)
|
||||
|
||||
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
|
||||
def test_fetch_missing_slug_returns_none(self, _mock_catalog):
|
||||
result = self.src.fetch("browse-sh/nonexistent.com/no-such-skill")
|
||||
self.assertIsNone(result)
|
||||
|
||||
@patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG)
|
||||
def test_inspect_returns_meta(self, _mock_catalog):
|
||||
meta = self.src.inspect("browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
self.assertIsNotNone(meta)
|
||||
self.assertIsInstance(meta, SkillMeta)
|
||||
self.assertEqual(meta.name, "airbnb.com")
|
||||
self.assertEqual(meta.identifier, "browse-sh/airbnb.com/search-listings-ddgioa")
|
||||
self.assertEqual(meta.extra["hostname"], "airbnb.com")
|
||||
self.assertEqual(meta.extra["category"], "travel")
|
||||
self.assertEqual(meta.extra["install_count"], 42)
|
||||
|
||||
def test_to_raw_url_conversion(self):
|
||||
# GitHub HTML URL should be converted
|
||||
html_url = "https://github.com/browserbase/browse-sh/blob/main/skills/airbnb.com/SKILL.md"
|
||||
raw_url = self.src._to_raw_url(html_url)
|
||||
self.assertEqual(
|
||||
raw_url,
|
||||
"https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/airbnb.com/SKILL.md",
|
||||
)
|
||||
|
||||
# Already a raw URL — should be returned unchanged
|
||||
already_raw = "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/amazon.com/SKILL.md"
|
||||
self.assertEqual(self.src._to_raw_url(already_raw), already_raw)
|
||||
|
||||
# Unrecognised URL — should return None
|
||||
self.assertIsNone(self.src._to_raw_url("https://example.com/something"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -2350,6 +2350,161 @@ class LobeHubSource(SkillSource):
|
|||
return "\n".join(fm_lines) + "\n\n" + "\n".join(body_lines) + "\n"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# browse.sh source adapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class BrowseShSource(SkillSource):
|
||||
"""Discover and install site-specific browser automation skills from browse.sh.
|
||||
|
||||
browse.sh (https://browse.sh) is Browserbase's catalog of 169+ SKILL.md files
|
||||
that describe how to automate specific websites (Airbnb, Amazon, arXiv, etc.).
|
||||
Each skill has a sourceUrl pointing to the raw SKILL.md on GitHub.
|
||||
"""
|
||||
|
||||
CATALOG_URL = "https://browse.sh/api/skills"
|
||||
_CACHE_KEY = "browse_sh_catalog"
|
||||
|
||||
def source_id(self) -> str:
|
||||
return "browse-sh"
|
||||
|
||||
def trust_level_for(self, identifier: str) -> str:
|
||||
return "community"
|
||||
|
||||
def _fetch_catalog(self) -> List[Dict]:
|
||||
cached = _read_index_cache(self._CACHE_KEY)
|
||||
if cached is not None:
|
||||
return cached
|
||||
try:
|
||||
resp = httpx.get(self.CATALOG_URL, timeout=20)
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
data = resp.json()
|
||||
except (httpx.HTTPError, json.JSONDecodeError):
|
||||
return []
|
||||
skills = data.get("skills", []) if isinstance(data, dict) else []
|
||||
if isinstance(skills, list):
|
||||
_write_index_cache(self._CACHE_KEY, skills)
|
||||
return skills if isinstance(skills, list) else []
|
||||
|
||||
def _item_to_meta(self, item: Dict) -> Optional[SkillMeta]:
|
||||
slug = item.get("slug", "")
|
||||
name = item.get("name", "")
|
||||
title = item.get("title", name)
|
||||
description = item.get("description", title)
|
||||
if not slug or not name:
|
||||
return None
|
||||
if len(description) > 1024:
|
||||
description = description[:1021] + "..."
|
||||
return SkillMeta(
|
||||
name=name,
|
||||
description=description,
|
||||
source="browse-sh",
|
||||
identifier=f"browse-sh/{slug}",
|
||||
trust_level="community",
|
||||
tags=item.get("tags", []),
|
||||
extra={
|
||||
"slug": slug,
|
||||
"hostname": item.get("hostname", ""),
|
||||
"category": item.get("category", ""),
|
||||
"source_url": item.get("sourceUrl", ""),
|
||||
"recommended_method": item.get("recommendedMethod", ""),
|
||||
"proxies": item.get("proxies", False),
|
||||
"install_count": item.get("installCount", 0),
|
||||
},
|
||||
)
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
|
||||
catalog = self._fetch_catalog()
|
||||
query_lower = query.lower()
|
||||
results = []
|
||||
for item in catalog:
|
||||
text = " ".join([
|
||||
item.get("name", ""),
|
||||
item.get("title", ""),
|
||||
item.get("description", ""),
|
||||
item.get("hostname", ""),
|
||||
item.get("category", ""),
|
||||
" ".join(item.get("tags", [])),
|
||||
]).lower()
|
||||
if not query_lower or query_lower in text:
|
||||
meta = self._item_to_meta(item)
|
||||
if meta:
|
||||
results.append(meta)
|
||||
if len(results) >= limit:
|
||||
break
|
||||
return results
|
||||
|
||||
def inspect(self, identifier: str) -> Optional[SkillMeta]:
|
||||
slug = self._slug_from_identifier(identifier)
|
||||
if not slug:
|
||||
return None
|
||||
catalog = self._fetch_catalog()
|
||||
for item in catalog:
|
||||
if item.get("slug") == slug:
|
||||
return self._item_to_meta(item)
|
||||
return None
|
||||
|
||||
def fetch(self, identifier: str) -> Optional[SkillBundle]:
|
||||
slug = self._slug_from_identifier(identifier)
|
||||
if not slug:
|
||||
return None
|
||||
catalog = self._fetch_catalog()
|
||||
item = next((i for i in catalog if i.get("slug") == slug), None)
|
||||
if not item:
|
||||
return None
|
||||
source_url = item.get("sourceUrl", "")
|
||||
if not source_url:
|
||||
return None
|
||||
# Convert GitHub HTML URL to raw URL if needed
|
||||
raw_url = self._to_raw_url(source_url)
|
||||
if not raw_url:
|
||||
return None
|
||||
try:
|
||||
resp = httpx.get(raw_url, timeout=20, follow_redirects=True)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
content = resp.text
|
||||
except httpx.HTTPError:
|
||||
return None
|
||||
meta = self._item_to_meta(item)
|
||||
name = meta.name if meta else slug.split("/")[-1]
|
||||
return SkillBundle(
|
||||
name=name,
|
||||
files={"SKILL.md": content},
|
||||
source="browse-sh",
|
||||
identifier=identifier,
|
||||
trust_level="community",
|
||||
metadata={
|
||||
"slug": slug,
|
||||
"hostname": item.get("hostname", ""),
|
||||
"source_url": source_url,
|
||||
},
|
||||
)
|
||||
|
||||
def _slug_from_identifier(self, identifier: str) -> str:
|
||||
"""Extract slug from identifier like 'browse-sh/airbnb.com/search-listings-abc'."""
|
||||
if identifier.startswith("browse-sh/"):
|
||||
return identifier[len("browse-sh/"):]
|
||||
return identifier
|
||||
|
||||
def _to_raw_url(self, url: str) -> Optional[str]:
|
||||
"""Convert a GitHub HTML URL to a raw.githubusercontent.com URL."""
|
||||
if "raw.githubusercontent.com" in url:
|
||||
return url
|
||||
# https://github.com/owner/repo/blob/branch/path -> raw URL
|
||||
import re
|
||||
m = re.match(
|
||||
r"https://github\.com/([^/]+)/([^/]+)/blob/([^/]+)/(.+)",
|
||||
url,
|
||||
)
|
||||
if m:
|
||||
owner, repo, branch, path = m.groups()
|
||||
return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}"
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Official optional skills source adapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -3143,6 +3298,7 @@ def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]
|
|||
ClawHubSource(),
|
||||
ClaudeMarketplaceSource(auth=auth),
|
||||
LobeHubSource(),
|
||||
BrowseShSource(), # browse.sh: 169+ site-specific browser automation skills
|
||||
]
|
||||
|
||||
return sources
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue