diff --git a/tests/tools/test_skills_hub_browse_sh.py b/tests/tools/test_skills_hub_browse_sh.py new file mode 100644 index 00000000000..f4bfd1c3acb --- /dev/null +++ b/tests/tools/test_skills_hub_browse_sh.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +import unittest +from unittest.mock import patch + +from tools.skills_hub import BrowseShSource, SkillMeta, SkillBundle + + +SAMPLE_CATALOG = [ + { + "slug": "airbnb.com/search-listings-ddgioa", + "name": "airbnb.com", + "title": "Airbnb Search Listings", + "description": "Search and browse Airbnb listings by location and dates.", + "hostname": "airbnb.com", + "category": "travel", + "tags": ["travel", "accommodation"], + "sourceUrl": "https://github.com/browserbase/browse-sh/blob/main/skills/airbnb.com/SKILL.md", + "recommendedMethod": "stagehand", + "proxies": False, + "installCount": 42, + }, + { + "slug": "amazon.com/search-products-xyz", + "name": "amazon.com", + "title": "Amazon Product Search", + "description": "Search for products on Amazon.", + "hostname": "amazon.com", + "category": "shopping", + "tags": ["shopping", "ecommerce"], + "sourceUrl": "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/amazon.com/SKILL.md", + "recommendedMethod": "stagehand", + "proxies": False, + "installCount": 99, + }, +] + + +class _MockResponse: + def __init__(self, status_code=200, json_data=None, text="", headers=None): + self.status_code = status_code + self._json_data = json_data + self.text = text + self.headers = headers or {} + + def json(self): + return self._json_data + + +class TestBrowseShSource(unittest.TestCase): + def setUp(self): + self.src = BrowseShSource() + + def test_source_id(self): + self.assertEqual(self.src.source_id(), "browse-sh") + + @patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG) + def test_search_returns_results(self, _mock_catalog): + results = self.src.search("airbnb", limit=10) + self.assertGreaterEqual(len(results), 1) + meta = results[0] + self.assertIsInstance(meta, SkillMeta) + self.assertEqual(meta.name, "airbnb.com") + self.assertEqual(meta.source, "browse-sh") + self.assertEqual(meta.trust_level, "community") + self.assertEqual(meta.identifier, "browse-sh/airbnb.com/search-listings-ddgioa") + self.assertIn("travel", meta.tags) + + @patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG) + def test_search_filters_by_query(self, _mock_catalog): + results = self.src.search("amazon", limit=10) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "amazon.com") + + results_all = self.src.search("", limit=10) + self.assertEqual(len(results_all), 2) + + @patch("tools.skills_hub.httpx.get") + @patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG) + def test_fetch_returns_bundle(self, _mock_catalog, mock_get): + mock_get.return_value = _MockResponse( + status_code=200, + text="# Airbnb Skill\n\nSearch and book Airbnb listings.", + ) + bundle = self.src.fetch("browse-sh/airbnb.com/search-listings-ddgioa") + self.assertIsNotNone(bundle) + self.assertIsInstance(bundle, SkillBundle) + self.assertEqual(bundle.name, "airbnb.com") + self.assertIn("SKILL.md", bundle.files) + self.assertIn("Airbnb", bundle.files["SKILL.md"]) + self.assertEqual(bundle.source, "browse-sh") + self.assertEqual(bundle.trust_level, "community") + self.assertEqual(bundle.identifier, "browse-sh/airbnb.com/search-listings-ddgioa") + mock_get.assert_called_once() + call_url = mock_get.call_args.args[0] + self.assertIn("raw.githubusercontent.com", call_url) + + @patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG) + def test_fetch_missing_slug_returns_none(self, _mock_catalog): + result = self.src.fetch("browse-sh/nonexistent.com/no-such-skill") + self.assertIsNone(result) + + @patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG) + def test_inspect_returns_meta(self, _mock_catalog): + meta = self.src.inspect("browse-sh/airbnb.com/search-listings-ddgioa") + self.assertIsNotNone(meta) + self.assertIsInstance(meta, SkillMeta) + self.assertEqual(meta.name, "airbnb.com") + self.assertEqual(meta.identifier, "browse-sh/airbnb.com/search-listings-ddgioa") + self.assertEqual(meta.extra["hostname"], "airbnb.com") + self.assertEqual(meta.extra["category"], "travel") + self.assertEqual(meta.extra["install_count"], 42) + + def test_to_raw_url_conversion(self): + # GitHub HTML URL should be converted + html_url = "https://github.com/browserbase/browse-sh/blob/main/skills/airbnb.com/SKILL.md" + raw_url = self.src._to_raw_url(html_url) + self.assertEqual( + raw_url, + "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/airbnb.com/SKILL.md", + ) + + # Already a raw URL — should be returned unchanged + already_raw = "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/amazon.com/SKILL.md" + self.assertEqual(self.src._to_raw_url(already_raw), already_raw) + + # Unrecognised URL — should return None + self.assertIsNone(self.src._to_raw_url("https://example.com/something")) + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 35cec56e08e..1734ff20fc5 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -2350,6 +2350,161 @@ class LobeHubSource(SkillSource): return "\n".join(fm_lines) + "\n\n" + "\n".join(body_lines) + "\n" +# --------------------------------------------------------------------------- +# browse.sh source adapter +# --------------------------------------------------------------------------- + + +class BrowseShSource(SkillSource): + """Discover and install site-specific browser automation skills from browse.sh. + + browse.sh (https://browse.sh) is Browserbase's catalog of 169+ SKILL.md files + that describe how to automate specific websites (Airbnb, Amazon, arXiv, etc.). + Each skill has a sourceUrl pointing to the raw SKILL.md on GitHub. + """ + + CATALOG_URL = "https://browse.sh/api/skills" + _CACHE_KEY = "browse_sh_catalog" + + def source_id(self) -> str: + return "browse-sh" + + def trust_level_for(self, identifier: str) -> str: + return "community" + + def _fetch_catalog(self) -> List[Dict]: + cached = _read_index_cache(self._CACHE_KEY) + if cached is not None: + return cached + try: + resp = httpx.get(self.CATALOG_URL, timeout=20) + if resp.status_code != 200: + return [] + data = resp.json() + except (httpx.HTTPError, json.JSONDecodeError): + return [] + skills = data.get("skills", []) if isinstance(data, dict) else [] + if isinstance(skills, list): + _write_index_cache(self._CACHE_KEY, skills) + return skills if isinstance(skills, list) else [] + + def _item_to_meta(self, item: Dict) -> Optional[SkillMeta]: + slug = item.get("slug", "") + name = item.get("name", "") + title = item.get("title", name) + description = item.get("description", title) + if not slug or not name: + return None + if len(description) > 1024: + description = description[:1021] + "..." + return SkillMeta( + name=name, + description=description, + source="browse-sh", + identifier=f"browse-sh/{slug}", + trust_level="community", + tags=item.get("tags", []), + extra={ + "slug": slug, + "hostname": item.get("hostname", ""), + "category": item.get("category", ""), + "source_url": item.get("sourceUrl", ""), + "recommended_method": item.get("recommendedMethod", ""), + "proxies": item.get("proxies", False), + "install_count": item.get("installCount", 0), + }, + ) + + def search(self, query: str, limit: int = 10) -> List[SkillMeta]: + catalog = self._fetch_catalog() + query_lower = query.lower() + results = [] + for item in catalog: + text = " ".join([ + item.get("name", ""), + item.get("title", ""), + item.get("description", ""), + item.get("hostname", ""), + item.get("category", ""), + " ".join(item.get("tags", [])), + ]).lower() + if not query_lower or query_lower in text: + meta = self._item_to_meta(item) + if meta: + results.append(meta) + if len(results) >= limit: + break + return results + + def inspect(self, identifier: str) -> Optional[SkillMeta]: + slug = self._slug_from_identifier(identifier) + if not slug: + return None + catalog = self._fetch_catalog() + for item in catalog: + if item.get("slug") == slug: + return self._item_to_meta(item) + return None + + def fetch(self, identifier: str) -> Optional[SkillBundle]: + slug = self._slug_from_identifier(identifier) + if not slug: + return None + catalog = self._fetch_catalog() + item = next((i for i in catalog if i.get("slug") == slug), None) + if not item: + return None + source_url = item.get("sourceUrl", "") + if not source_url: + return None + # Convert GitHub HTML URL to raw URL if needed + raw_url = self._to_raw_url(source_url) + if not raw_url: + return None + try: + resp = httpx.get(raw_url, timeout=20, follow_redirects=True) + if resp.status_code != 200: + return None + content = resp.text + except httpx.HTTPError: + return None + meta = self._item_to_meta(item) + name = meta.name if meta else slug.split("/")[-1] + return SkillBundle( + name=name, + files={"SKILL.md": content}, + source="browse-sh", + identifier=identifier, + trust_level="community", + metadata={ + "slug": slug, + "hostname": item.get("hostname", ""), + "source_url": source_url, + }, + ) + + def _slug_from_identifier(self, identifier: str) -> str: + """Extract slug from identifier like 'browse-sh/airbnb.com/search-listings-abc'.""" + if identifier.startswith("browse-sh/"): + return identifier[len("browse-sh/"):] + return identifier + + def _to_raw_url(self, url: str) -> Optional[str]: + """Convert a GitHub HTML URL to a raw.githubusercontent.com URL.""" + if "raw.githubusercontent.com" in url: + return url + # https://github.com/owner/repo/blob/branch/path -> raw URL + import re + m = re.match( + r"https://github\.com/([^/]+)/([^/]+)/blob/([^/]+)/(.+)", + url, + ) + if m: + owner, repo, branch, path = m.groups() + return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}" + return None + + # --------------------------------------------------------------------------- # Official optional skills source adapter # --------------------------------------------------------------------------- @@ -3143,6 +3298,7 @@ def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource] ClawHubSource(), ClaudeMarketplaceSource(auth=auth), LobeHubSource(), + BrowseShSource(), # browse.sh: 169+ site-specific browser automation skills ] return sources