From 890b2ebd5b5f042e0ad16196a072a780bef20fda Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Tue, 19 May 2026 14:14:22 -0700 Subject: [PATCH] fix(browse-sh): fetch SKILL.md via /api/skills/{slug}+skillMdUrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The catalog's sourceUrl points at github.com/browserbase/browse.sh, whose underlying repository is not always public — most raw URLs derived from it 404. Use the per-skill detail endpoint instead, which returns a skillMdUrl CDN blob that reliably resolves to the SKILL.md text. Fall back to a raw.githubusercontent.com sourceUrl if the detail call fails. - tools/skills_hub.py: rewrite BrowseShSource.fetch() to resolve via /api/skills/{slug} -> skillMdUrl; drop the unreachable _to_raw_url helper; expose the resolved URL in bundle.metadata.skill_md_url. - tests/tools/test_skills_hub_browse_sh.py: match the real catalog shape (name = task name, slug = host/task-id), exercise the detail-endpoint -> blob two-call flow, and add a fallback test. - scripts/release.py: map kylejeong21@gmail.com -> Kylejeong2. --- scripts/release.py | 1 + tests/tools/test_skills_hub_browse_sh.py | 74 ++++++++++++++---------- tools/skills_hub.py | 70 ++++++++++++++-------- 3 files changed, 90 insertions(+), 55 deletions(-) diff --git a/scripts/release.py b/scripts/release.py index b4afe7d689c..60779899b93 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -104,6 +104,7 @@ AUTHOR_MAP = { "147827411+EloquentBrush@users.noreply.github.com": "AhmetArif0", "97489706+purzbeats@users.noreply.github.com": "purzbeats", "hugosequier@gmail.com": "Hugo-SEQUIER", + "kylejeong21@gmail.com": "Kylejeong2", "128259593+Gutslabs@users.noreply.github.com": "Gutslabs", "50326054+nocturnum91@users.noreply.github.com": "nocturnum91", "52470719+gianfrancopiana@users.noreply.github.com": "gianfrancopiana", diff --git a/tests/tools/test_skills_hub_browse_sh.py b/tests/tools/test_skills_hub_browse_sh.py index f4bfd1c3acb..7058dffe1ed 100644 --- a/tests/tools/test_skills_hub_browse_sh.py +++ b/tests/tools/test_skills_hub_browse_sh.py @@ -6,29 +6,31 @@ from unittest.mock import patch from tools.skills_hub import BrowseShSource, SkillMeta, SkillBundle +# Catalog shape mirrors the real ``GET https://browse.sh/api/skills`` response: +# ``slug`` is ``/`` and ``name`` is the task name. SAMPLE_CATALOG = [ { "slug": "airbnb.com/search-listings-ddgioa", - "name": "airbnb.com", + "name": "search-listings", "title": "Airbnb Search Listings", "description": "Search and browse Airbnb listings by location and dates.", "hostname": "airbnb.com", "category": "travel", "tags": ["travel", "accommodation"], - "sourceUrl": "https://github.com/browserbase/browse-sh/blob/main/skills/airbnb.com/SKILL.md", + "sourceUrl": "https://github.com/browserbase/browse.sh/blob/main/skills/airbnb.com/search-listings-ddgioa/SKILL.md", "recommendedMethod": "stagehand", "proxies": False, "installCount": 42, }, { "slug": "amazon.com/search-products-xyz", - "name": "amazon.com", + "name": "search-products", "title": "Amazon Product Search", "description": "Search for products on Amazon.", "hostname": "amazon.com", "category": "shopping", "tags": ["shopping", "ecommerce"], - "sourceUrl": "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/amazon.com/SKILL.md", + "sourceUrl": "https://github.com/browserbase/browse.sh/blob/main/skills/amazon.com/search-products-xyz/SKILL.md", "recommendedMethod": "stagehand", "proxies": False, "installCount": 99, @@ -60,7 +62,7 @@ class TestBrowseShSource(unittest.TestCase): self.assertGreaterEqual(len(results), 1) meta = results[0] self.assertIsInstance(meta, SkillMeta) - self.assertEqual(meta.name, "airbnb.com") + self.assertEqual(meta.name, "search-listings") self.assertEqual(meta.source, "browse-sh") self.assertEqual(meta.trust_level, "community") self.assertEqual(meta.identifier, "browse-sh/airbnb.com/search-listings-ddgioa") @@ -70,7 +72,7 @@ class TestBrowseShSource(unittest.TestCase): def test_search_filters_by_query(self, _mock_catalog): results = self.src.search("amazon", limit=10) self.assertEqual(len(results), 1) - self.assertEqual(results[0].name, "amazon.com") + self.assertEqual(results[0].extra["hostname"], "amazon.com") results_all = self.src.search("", limit=10) self.assertEqual(len(results_all), 2) @@ -78,22 +80,50 @@ class TestBrowseShSource(unittest.TestCase): @patch("tools.skills_hub.httpx.get") @patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG) def test_fetch_returns_bundle(self, _mock_catalog, mock_get): - mock_get.return_value = _MockResponse( - status_code=200, - text="# Airbnb Skill\n\nSearch and book Airbnb listings.", + # First call: GET /api/skills/{slug} returns the detail object with skillMdUrl. + # Second call: GET the CDN blob URL returns the SKILL.md text. + blob_url = ( + "https://gh0lfhlmyzhg6tww.public.blob.vercel-storage.com" + "/skills/airbnb.com/search-listings-ddgioa/SKILL.md" ) + mock_get.side_effect = [ + _MockResponse(status_code=200, json_data={"skillMdUrl": blob_url}), + _MockResponse(status_code=200, text="# Airbnb Skill\n\nSearch and book Airbnb listings."), + ] bundle = self.src.fetch("browse-sh/airbnb.com/search-listings-ddgioa") self.assertIsNotNone(bundle) self.assertIsInstance(bundle, SkillBundle) - self.assertEqual(bundle.name, "airbnb.com") + self.assertEqual(bundle.name, "search-listings") self.assertIn("SKILL.md", bundle.files) self.assertIn("Airbnb", bundle.files["SKILL.md"]) self.assertEqual(bundle.source, "browse-sh") self.assertEqual(bundle.trust_level, "community") self.assertEqual(bundle.identifier, "browse-sh/airbnb.com/search-listings-ddgioa") - mock_get.assert_called_once() - call_url = mock_get.call_args.args[0] - self.assertIn("raw.githubusercontent.com", call_url) + self.assertEqual(bundle.metadata["skill_md_url"], blob_url) + # Two HTTP calls: detail endpoint + blob. + self.assertEqual(mock_get.call_count, 2) + first_url = mock_get.call_args_list[0].args[0] + second_url = mock_get.call_args_list[1].args[0] + self.assertIn("/api/skills/airbnb.com/search-listings-ddgioa", first_url) + self.assertEqual(second_url, blob_url) + + @patch("tools.skills_hub.httpx.get") + @patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG) + def test_fetch_falls_back_to_raw_github_url(self, _mock_catalog, mock_get): + # Detail endpoint fails → fall back to a raw.githubusercontent.com sourceUrl. + raw_catalog = [dict(SAMPLE_CATALOG[0])] + raw_catalog[0]["sourceUrl"] = ( + "https://raw.githubusercontent.com/example/repo/main/skills/" + "airbnb.com/search-listings-ddgioa/SKILL.md" + ) + with patch.object(BrowseShSource, "_fetch_catalog", return_value=raw_catalog): + mock_get.side_effect = [ + _MockResponse(status_code=500, json_data=None), # detail endpoint fails + _MockResponse(status_code=200, text="# Fallback content"), + ] + bundle = self.src.fetch("browse-sh/airbnb.com/search-listings-ddgioa") + self.assertIsNotNone(bundle) + self.assertEqual(bundle.files["SKILL.md"], "# Fallback content") @patch.object(BrowseShSource, "_fetch_catalog", return_value=SAMPLE_CATALOG) def test_fetch_missing_slug_returns_none(self, _mock_catalog): @@ -105,28 +135,12 @@ class TestBrowseShSource(unittest.TestCase): meta = self.src.inspect("browse-sh/airbnb.com/search-listings-ddgioa") self.assertIsNotNone(meta) self.assertIsInstance(meta, SkillMeta) - self.assertEqual(meta.name, "airbnb.com") + self.assertEqual(meta.name, "search-listings") self.assertEqual(meta.identifier, "browse-sh/airbnb.com/search-listings-ddgioa") self.assertEqual(meta.extra["hostname"], "airbnb.com") self.assertEqual(meta.extra["category"], "travel") self.assertEqual(meta.extra["install_count"], 42) - def test_to_raw_url_conversion(self): - # GitHub HTML URL should be converted - html_url = "https://github.com/browserbase/browse-sh/blob/main/skills/airbnb.com/SKILL.md" - raw_url = self.src._to_raw_url(html_url) - self.assertEqual( - raw_url, - "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/airbnb.com/SKILL.md", - ) - - # Already a raw URL — should be returned unchanged - already_raw = "https://raw.githubusercontent.com/browserbase/browse-sh/main/skills/amazon.com/SKILL.md" - self.assertEqual(self.src._to_raw_url(already_raw), already_raw) - - # Unrecognised URL — should return None - self.assertIsNone(self.src._to_raw_url("https://example.com/something")) - if __name__ == "__main__": unittest.main() diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 1734ff20fc5..7725c745de4 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -2358,12 +2358,17 @@ class LobeHubSource(SkillSource): class BrowseShSource(SkillSource): """Discover and install site-specific browser automation skills from browse.sh. - browse.sh (https://browse.sh) is Browserbase's catalog of 169+ SKILL.md files + browse.sh (https://browse.sh) is Browserbase's catalog of 200+ SKILL.md files that describe how to automate specific websites (Airbnb, Amazon, arXiv, etc.). - Each skill has a sourceUrl pointing to the raw SKILL.md on GitHub. + The catalog lives at ``/api/skills`` and each skill's actual SKILL.md content + is fetched via ``/api/skills/{slug}`` which returns a ``skillMdUrl`` field + pointing at a CDN-hosted blob — the catalog's ``sourceUrl`` field is a GitHub + HTML URL whose underlying repository is not always public, so it cannot be + relied on for content fetch. """ CATALOG_URL = "https://browse.sh/api/skills" + SKILL_DETAIL_URL = "https://browse.sh/api/skills/{slug}" _CACHE_KEY = "browse_sh_catalog" def source_id(self) -> str: @@ -2454,20 +2459,22 @@ class BrowseShSource(SkillSource): item = next((i for i in catalog if i.get("slug") == slug), None) if not item: return None - source_url = item.get("sourceUrl", "") - if not source_url: - return None - # Convert GitHub HTML URL to raw URL if needed - raw_url = self._to_raw_url(source_url) - if not raw_url: + + # Resolve the actual SKILL.md content URL via the per-skill detail + # endpoint, which returns a ``skillMdUrl`` (CDN blob). The catalog's + # ``sourceUrl`` is a GitHub HTML link whose underlying repo is not + # reliably public, so we don't use it for content. + md_url = self._resolve_skill_md_url(slug, item) + if not md_url: return None try: - resp = httpx.get(raw_url, timeout=20, follow_redirects=True) + resp = httpx.get(md_url, timeout=20, follow_redirects=True) if resp.status_code != 200: return None content = resp.text except httpx.HTTPError: return None + meta = self._item_to_meta(item) name = meta.name if meta else slug.split("/")[-1] return SkillBundle( @@ -2479,31 +2486,44 @@ class BrowseShSource(SkillSource): metadata={ "slug": slug, "hostname": item.get("hostname", ""), - "source_url": source_url, + "source_url": item.get("sourceUrl", ""), + "skill_md_url": md_url, }, ) + def _resolve_skill_md_url(self, slug: str, item: Dict) -> Optional[str]: + """Resolve the SKILL.md content URL for a slug. + + Primary path: hit ``/api/skills/{slug}`` and read ``skillMdUrl``. + Fallback: if the catalog item already has a ``raw.githubusercontent.com`` + ``sourceUrl`` (some entries may), use it directly. + """ + try: + detail = httpx.get( + self.SKILL_DETAIL_URL.format(slug=slug), + timeout=20, + follow_redirects=True, + ) + if detail.status_code == 200: + data = detail.json() + if isinstance(data, dict): + md_url = data.get("skillMdUrl") + if isinstance(md_url, str) and md_url.startswith("http"): + return md_url + except (httpx.HTTPError, json.JSONDecodeError): + pass + + source_url = item.get("sourceUrl", "") if isinstance(item, dict) else "" + if source_url and "raw.githubusercontent.com" in source_url: + return source_url + return None + def _slug_from_identifier(self, identifier: str) -> str: """Extract slug from identifier like 'browse-sh/airbnb.com/search-listings-abc'.""" if identifier.startswith("browse-sh/"): return identifier[len("browse-sh/"):] return identifier - def _to_raw_url(self, url: str) -> Optional[str]: - """Convert a GitHub HTML URL to a raw.githubusercontent.com URL.""" - if "raw.githubusercontent.com" in url: - return url - # https://github.com/owner/repo/blob/branch/path -> raw URL - import re - m = re.match( - r"https://github\.com/([^/]+)/([^/]+)/blob/([^/]+)/(.+)", - url, - ) - if m: - owner, repo, branch, path = m.groups() - return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path}" - return None - # --------------------------------------------------------------------------- # Official optional skills source adapter