diff --git a/tests/tools/test_skills_hub_clawhub.py b/tests/tools/test_skills_hub_clawhub.py index 98611d8d1..3601881ca 100644 --- a/tests/tools/test_skills_hub_clawhub.py +++ b/tests/tools/test_skills_hub_clawhub.py @@ -24,19 +24,26 @@ class TestClawHubSource(unittest.TestCase): @patch("tools.skills_hub._read_index_cache", return_value=None) @patch("tools.skills_hub.httpx.get") def test_search_uses_new_endpoint_and_parses_items(self, mock_get, _mock_read_cache, _mock_write_cache): - mock_get.return_value = _MockResponse( - status_code=200, - json_data={ - "items": [ - { - "slug": "caldav-calendar", - "displayName": "CalDAV Calendar", - "summary": "Calendar integration", - "tags": ["calendar", "productivity"], - } - ] - }, - ) + def side_effect(url, *args, **kwargs): + if url.endswith("/skills"): + return _MockResponse( + status_code=200, + json_data={ + "items": [ + { + "slug": "caldav-calendar", + "displayName": "CalDAV Calendar", + "summary": "Calendar integration", + "tags": ["calendar", "productivity"], + } + ] + }, + ) + if url.endswith("/skills/caldav"): + return _MockResponse(status_code=404, json_data={}) + return _MockResponse(status_code=404, json_data={}) + + mock_get.side_effect = side_effect results = self.src.search("caldav", limit=5) @@ -45,11 +52,93 @@ class TestClawHubSource(unittest.TestCase): self.assertEqual(results[0].name, "CalDAV Calendar") self.assertEqual(results[0].description, "Calendar integration") - mock_get.assert_called_once() - args, kwargs = mock_get.call_args + first_call = mock_get.call_args_list[0] + args, kwargs = first_call self.assertTrue(args[0].endswith("/skills")) self.assertEqual(kwargs["params"], {"search": "caldav", "limit": 5}) + @patch("tools.skills_hub._write_index_cache") + @patch("tools.skills_hub._read_index_cache", return_value=None) + @patch("tools.skills_hub.httpx.get") + def test_search_falls_back_to_exact_slug_when_search_results_are_irrelevant( + self, mock_get, _mock_read_cache, _mock_write_cache + ): + def side_effect(url, *args, **kwargs): + if url.endswith("/skills"): + return _MockResponse( + status_code=200, + json_data={ + "items": [ + { + "slug": "apple-music-dj", + "displayName": "Apple Music DJ", + "summary": "Unrelated result", + } + ] + }, + ) + if url.endswith("/skills/self-improving-agent"): + return _MockResponse( + status_code=200, + json_data={ + "skill": { + "slug": "self-improving-agent", + "displayName": "self-improving-agent", + "summary": "Captures learnings and errors for continuous improvement.", + "tags": {"latest": "3.0.2", "automation": "3.0.2"}, + }, + "latestVersion": {"version": "3.0.2"}, + }, + ) + return _MockResponse(status_code=404, json_data={}) + + mock_get.side_effect = side_effect + + results = self.src.search("self-improving-agent", limit=5) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].identifier, "self-improving-agent") + self.assertEqual(results[0].name, "self-improving-agent") + self.assertIn("continuous improvement", results[0].description) + + @patch("tools.skills_hub.httpx.get") + @patch( + "tools.skills_hub._read_index_cache", + return_value=[ + { + "name": "Apple Music DJ", + "description": "Unrelated cached result", + "source": "clawhub", + "identifier": "apple-music-dj", + "trust_level": "community", + "repo": None, + "path": None, + "tags": [], + "extra": {}, + } + ], + ) + def test_search_repairs_poisoned_cache_with_exact_slug_lookup(self, _mock_read_cache, mock_get): + mock_get.return_value = _MockResponse( + status_code=200, + json_data={ + "skill": { + "slug": "self-improving-agent", + "displayName": "self-improving-agent", + "summary": "Captures learnings and errors for continuous improvement.", + "tags": {"latest": "3.0.2", "automation": "3.0.2"}, + }, + "latestVersion": {"version": "3.0.2"}, + }, + ) + + results = self.src.search("self-improving-agent", limit=5) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].identifier, "self-improving-agent") + mock_get.assert_called_once() + self.assertTrue(mock_get.call_args.args[0].endswith("/skills/self-improving-agent")) + @patch("tools.skills_hub.httpx.get") def test_inspect_maps_display_name_and_summary(self, mock_get): mock_get.return_value = _MockResponse( @@ -69,6 +158,29 @@ class TestClawHubSource(unittest.TestCase): self.assertEqual(meta.description, "Calendar integration") self.assertEqual(meta.identifier, "caldav-calendar") + @patch("tools.skills_hub.httpx.get") + def test_inspect_handles_nested_skill_payload(self, mock_get): + mock_get.return_value = _MockResponse( + status_code=200, + json_data={ + "skill": { + "slug": "self-improving-agent", + "displayName": "self-improving-agent", + "summary": "Captures learnings and errors for continuous improvement.", + "tags": {"latest": "3.0.2", "automation": "3.0.2"}, + }, + "latestVersion": {"version": "3.0.2"}, + }, + ) + + meta = self.src.inspect("self-improving-agent") + + self.assertIsNotNone(meta) + self.assertEqual(meta.name, "self-improving-agent") + self.assertIn("continuous improvement", meta.description) + self.assertEqual(meta.identifier, "self-improving-agent") + self.assertEqual(meta.tags, ["automation"]) + @patch("tools.skills_hub.httpx.get") def test_fetch_resolves_latest_version_and_downloads_raw_files(self, mock_get): def side_effect(url, *args, **kwargs): diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 94845fe92..3c055183e 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -1156,11 +1156,118 @@ class ClawHubSource(SkillSource): def trust_level_for(self, identifier: str) -> str: return "community" + @staticmethod + def _normalize_tags(tags: Any) -> List[str]: + if isinstance(tags, list): + return [str(t) for t in tags] + if isinstance(tags, dict): + return [str(k) for k in tags.keys() if str(k) != "latest"] + return [] + + @staticmethod + def _coerce_skill_payload(data: Any) -> Optional[Dict[str, Any]]: + if not isinstance(data, dict): + return None + nested = data.get("skill") + if isinstance(nested, dict): + merged = dict(nested) + latest_version = data.get("latestVersion") + if latest_version is not None and "latestVersion" not in merged: + merged["latestVersion"] = latest_version + return merged + return data + + @staticmethod + def _query_terms(query: str) -> List[str]: + return [term for term in re.split(r"[^a-z0-9]+", query.lower()) if term] + + @classmethod + def _search_score(cls, query: str, meta: SkillMeta) -> int: + query_norm = query.strip().lower() + if not query_norm: + return 1 + + identifier = (meta.identifier or "").lower() + name = (meta.name or "").lower() + description = (meta.description or "").lower() + score = 0 + + if query_norm == identifier: + score += 100 + if query_norm == name: + score += 95 + if query_norm in identifier: + score += 40 + if query_norm in name: + score += 35 + if query_norm in description: + score += 10 + + for term in cls._query_terms(query_norm): + if term in identifier: + score += 15 + if term in name: + score += 12 + if term in description: + score += 3 + + return score + + @staticmethod + def _dedupe_results(results: List[SkillMeta]) -> List[SkillMeta]: + seen: set[str] = set() + deduped: List[SkillMeta] = [] + for result in results: + key = (result.identifier or result.name).lower() + if key in seen: + continue + seen.add(key) + deduped.append(result) + return deduped + + def _exact_slug_meta(self, query: str) -> Optional[SkillMeta]: + slug = query.strip().split("/")[-1] + if not slug or not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9._-]*", slug): + return None + return self.inspect(slug) + + def _finalize_search_results(self, query: str, results: List[SkillMeta], limit: int) -> List[SkillMeta]: + query_norm = query.strip() + if not query_norm: + return self._dedupe_results(results)[:limit] + + filtered = [meta for meta in results if self._search_score(query_norm, meta) > 0] + filtered.sort( + key=lambda meta: ( + -self._search_score(query_norm, meta), + meta.name.lower(), + meta.identifier.lower(), + ) + ) + filtered = self._dedupe_results(filtered) + + exact = self._exact_slug_meta(query_norm) + if exact: + filtered = [meta for meta in filtered if self._search_score(query_norm, meta) >= 20] + filtered = self._dedupe_results([exact] + filtered) + + if filtered: + return filtered[:limit] + + if re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9._/-]*", query_norm): + return [] + + return self._dedupe_results(results)[:limit] + def search(self, query: str, limit: int = 10) -> List[SkillMeta]: cache_key = f"clawhub_search_{hashlib.md5(query.encode()).hexdigest()}" cached = _read_index_cache(cache_key) if cached is not None: - return [SkillMeta(**s) for s in cached][:limit] + return self._finalize_search_results( + query, + [SkillMeta(**s) for s in cached], + limit, + ) try: resp = httpx.get( @@ -1185,20 +1292,19 @@ class ClawHubSource(SkillSource): continue display_name = item.get("displayName") or item.get("name") or slug summary = item.get("summary") or item.get("description") or "" - tags = item.get("tags", []) - if not isinstance(tags, list): - tags = [] + tags = self._normalize_tags(item.get("tags", [])) results.append(SkillMeta( name=display_name, description=summary, source="clawhub", identifier=slug, trust_level="community", - tags=[str(t) for t in tags], + tags=tags, )) - _write_index_cache(cache_key, [_skill_meta_to_dict(s) for s in results]) - return results + final_results = self._finalize_search_results(query, results, limit) + _write_index_cache(cache_key, [_skill_meta_to_dict(s) for s in final_results]) + return final_results def fetch(self, identifier: str) -> Optional[SkillBundle]: slug = identifier.split("/")[-1] @@ -1244,13 +1350,11 @@ class ClawHubSource(SkillSource): def inspect(self, identifier: str) -> Optional[SkillMeta]: slug = identifier.split("/")[-1] - data = self._get_json(f"{self.BASE_URL}/skills/{slug}") + data = self._coerce_skill_payload(self._get_json(f"{self.BASE_URL}/skills/{slug}")) if not isinstance(data, dict): return None - tags = data.get("tags", []) - if not isinstance(tags, list): - tags = [] + tags = self._normalize_tags(data.get("tags", [])) return SkillMeta( name=data.get("displayName") or data.get("name") or data.get("slug") or slug, @@ -1258,7 +1362,7 @@ class ClawHubSource(SkillSource): source="clawhub", identifier=data.get("slug") or slug, trust_level="community", - tags=[str(t) for t in tags], + tags=tags, ) def _get_json(self, url: str, timeout: int = 20) -> Optional[Any]: