mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-06 07:51:53 +00:00
feat: add BrowseShSource adapter for browse.sh skills catalog
Adds BrowseShSource — a new skill source adapter that integrates Browserbase's browse.sh catalog (169+ site-specific SKILL.md files) into the Hermes Skills Hub. - BrowseShSource class in tools/skills_hub.py implementing SkillSource ABC - Fetches browse.sh catalog API with 1h TTL cache - Full-text search across name, title, description, hostname, category, tags - fetch() downloads SKILL.md via sourceUrl (GitHub HTML -> raw URL conversion) - Registered in create_source_router() after LobeHubSource - Tests in tests/tools/test_skills_hub_browse_sh.py (7 tests, all passing)
This commit is contained in:
parent
2b41f9d893
commit
57145ca146
2 changed files with 288 additions and 0 deletions
|
|
@ -2350,6 +2350,161 @@ class LobeHubSource(SkillSource):
|
|||
return "\n".join(fm_lines) + "\n\n" + "\n".join(body_lines) + "\n"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# browse.sh source adapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class BrowseShSource(SkillSource):
|
||||
"""Discover and install site-specific browser automation skills from browse.sh.
|
||||
|
||||
browse.sh (https://browse.sh) is Browserbase's catalog of 169+ SKILL.md files
|
||||
that describe how to automate specific websites (Airbnb, Amazon, arXiv, etc.).
|
||||
Each skill has a sourceUrl pointing to the raw SKILL.md on GitHub.
|
||||
"""
|
||||
|
||||
CATALOG_URL = "https://browse.sh/api/skills"
|
||||
_CACHE_KEY = "browse_sh_catalog"
|
||||
|
||||
def source_id(self) -> str:
|
||||
return "browse-sh"
|
||||
|
||||
def trust_level_for(self, identifier: str) -> str:
|
||||
return "community"
|
||||
|
||||
def _fetch_catalog(self) -> List[Dict]:
|
||||
cached = _read_index_cache(self._CACHE_KEY)
|
||||
if cached is not None:
|
||||
return cached
|
||||
try:
|
||||
resp = httpx.get(self.CATALOG_URL, timeout=20)
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
data = resp.json()
|
||||
except (httpx.HTTPError, json.JSONDecodeError):
|
||||
return []
|
||||
skills = data.get("skills", []) if isinstance(data, dict) else []
|
||||
if isinstance(skills, list):
|
||||
_write_index_cache(self._CACHE_KEY, skills)
|
||||
return skills if isinstance(skills, list) else []
|
||||
|
||||
def _item_to_meta(self, item: Dict) -> Optional[SkillMeta]:
|
||||
slug = item.get("slug", "")
|
||||
name = item.get("name", "")
|
||||
title = item.get("title", name)
|
||||
description = item.get("description", title)
|
||||
if not slug or not name:
|
||||
return None
|
||||
if len(description) > 1024:
|
||||
description = description[:1021] + "..."
|
||||
return SkillMeta(
|
||||
name=name,
|
||||
description=description,
|
||||
source="browse-sh",
|
||||
identifier=f"browse-sh/{slug}",
|
||||
trust_level="community",
|
||||
tags=item.get("tags", []),
|
||||
extra={
|
||||
"slug": slug,
|
||||
"hostname": item.get("hostname", ""),
|
||||
"category": item.get("category", ""),
|
||||
"source_url": item.get("sourceUrl", ""),
|
||||
"recommended_method": item.get("recommendedMethod", ""),
|
||||
"proxies": item.get("proxies", False),
|
||||
"install_count": item.get("installCount", 0),
|
||||
},
|
||||
)
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
|
||||
catalog = self._fetch_catalog()
|
||||
query_lower = query.lower()
|
||||
results = []
|
||||
for item in catalog:
|
||||
text = " ".join([
|
||||
item.get("name", ""),
|
||||
item.get("title", ""),
|
||||
item.get("description", ""),
|
||||
item.get("hostname", ""),
|
||||
item.get("category", ""),
|
||||
" ".join(item.get("tags", [])),
|
||||
]).lower()
|
||||
if not query_lower or query_lower in text:
|
||||
meta = self._item_to_meta(item)
|
||||
if meta:
|
||||
results.append(meta)
|
||||
if len(results) >= limit:
|
||||
break
|
||||
return results
|
||||
|
||||
def inspect(self, identifier: str) -> Optional[SkillMeta]:
|
||||
slug = self._slug_from_identifier(identifier)
|
||||
if not slug:
|
||||
return None
|
||||
catalog = self._fetch_catalog()
|
||||
for item in catalog:
|
||||
if item.get("slug") == slug:
|
||||
return self._item_to_meta(item)
|
||||
return None
|
||||
|
||||
def fetch(self, identifier: str) -> Optional[SkillBundle]:
|
||||
slug = self._slug_from_identifier(identifier)
|
||||
if not slug:
|
||||
return None
|
||||
catalog = self._fetch_catalog()
|
||||
item = next((i for i in catalog if i.get("slug") == slug), None)
|
||||
if not item:
|
||||
return None
|
||||
source_url = item.get("sourceUrl", "")
|
||||
if not source_url:
|
||||
return None
|
||||
# Convert GitHub HTML URL to raw URL if needed
|
||||
raw_url = self._to_raw_url(source_url)
|
||||
if not raw_url:
|
||||
return None
|
||||
try:
|
||||
resp = httpx.get(raw_url, timeout=20, follow_redirects=True)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
content = resp.text
|
||||
except httpx.HTTPError:
|
||||
return None
|
||||
meta = self._item_to_meta(item)
|
||||
name = meta.name if meta else slug.split("/")[-1]
|
||||
return SkillBundle(
|
||||
name=name,
|
||||
files={"SKILL.md": content},
|
||||
source="browse-sh",
|
||||
identifier=identifier,
|
||||
trust_level="community",
|
||||
metadata={
|
||||
"slug": slug,
|
||||
"hostname": item.get("hostname", ""),
|
||||
"source_url": source_url,
|
||||
},
|
||||
)
|
||||
|
||||
def _slug_from_identifier(self, identifier: str) -> str:
|
||||
"""Extract slug from identifier like 'browse-sh/airbnb.com/search-listings-abc'."""
|
||||
if identifier.startswith("browse-sh/"):
|
||||
return identifier[len("browse-sh/"):]
|
||||
return identifier
|
||||
|
||||
def _to_raw_url(self, url: str) -> Optional[str]:
|
||||
"""Convert a GitHub HTML URL to a raw.githubusercontent.com URL."""
|
||||
if "raw.githubusercontent.com" in url:
|
||||
return url
|
||||
# https://github.com/owner/repo/blob/branch/path -> raw URL
|
||||
import re
|
||||
m = re.match(
|
||||
r"https://github\.com/([^/]+)/([^/]+)/blob/([^/]+)/(.+)",
|
||||
url,
|
||||
)
|
||||
if m:
|
||||
owner, repo, branch, path = m.groups()
|
||||
return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}"
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Official optional skills source adapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -3143,6 +3298,7 @@ def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]
|
|||
ClawHubSource(),
|
||||
ClaudeMarketplaceSource(auth=auth),
|
||||
LobeHubSource(),
|
||||
BrowseShSource(), # browse.sh: 169+ site-specific browser automation skills
|
||||
]
|
||||
|
||||
return sources
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue