fix(skills): bound ClawHub catalog walk to requested page on cold start (#43395)

Browse renders one page but the cold-cache fallback walked the entire 50k+ ClawHub catalog, then sliced off the first N — pure waste behind the 12s budget band-aid. _load_catalog_index now takes max_items: browse's empty-query path bounds the walk to its limit and stops early; the offline index builder still passes limit=0 (unbounded) and walks to exhaustion. A bounded walk is partial, so it is not written to the shared full-catalog cache (same poison-guard as the budget-truncated case).
2026-06-14 09:11:54 +00:00 · 2026-06-10 01:01:53 -07:00 · 2026-06-10 01:01:53 -07:00 · eee1da45f0
commit eee1da45f0
parent 6a30cfca82
2 changed files with 140 additions and 10 deletions
--- a/tests/tools/test_skills_hub_clawhub.py
+++ b/tests/tools/test_skills_hub_clawhub.py
@ -423,5 +423,111 @@ class TestClawHubSource(unittest.TestCase):
        mock_write_cache.assert_called_once()


+class TestClawHubCatalogWalkBounded(unittest.TestCase):
+    """max_items bounds the walk so browse's cold-start fallback renders one
+    page without walking the entire 50k+ catalog. The offline index builder
+    keeps max_items=0 (unbounded) and walks to exhaustion."""
+
+    def setUp(self):
+        self.src = ClawHubSource()
+        self._safe_patcher = patch("tools.skills_hub.is_safe_url", return_value=True)
+        self._policy_patcher = patch("tools.skills_hub.check_website_access", return_value=None)
+        self._safe_patcher.start()
+        self._policy_patcher.start()
+
+    def tearDown(self):
+        self._policy_patcher.stop()
+        self._safe_patcher.stop()
+
+    def _infinite_pages(self, page_calls):
+        """A side_effect that always advertises another cursor — the walk would
+        never stop on its own, so only max_items / budget can break it."""
+
+        def side_effect(url, *args, **kwargs):
+            if url.endswith("/skills"):
+                idx = page_calls["n"]
+                page_calls["n"] += 1
+                return _MockResponse(
+                    status_code=200,
+                    json_data={
+                        "items": [
+                            {"slug": f"skill-{idx}", "displayName": f"Skill {idx}"}
+                        ],
+                        "nextCursor": f"cursor-{idx + 1}",
+                    },
+                )
+            return _MockResponse(status_code=404, json_data={})
+
+        return side_effect
+
+    @patch("tools.skills_hub._write_index_cache")
+    @patch("tools.skills_hub._read_index_cache", return_value=None)
+    @patch("tools.skills_hub.httpx.get")
+    def test_max_items_stops_walk_early_and_does_not_cache(
+        self, mock_get, _mock_read_cache, mock_write_cache
+    ):
+        """A bounded walk stops as soon as it has >= max_items skills and must
+        NOT poison the shared full-catalog cache with the partial slice."""
+        page_calls = {"n": 0}
+        mock_get.side_effect = self._infinite_pages(page_calls)
+
+        results = self.src._load_catalog_index(max_items=5)
+
+        # Each mocked page yields exactly 1 item, so ~5 pages cover the bound.
+        self.assertGreaterEqual(len(results), 5)
+        self.assertLess(page_calls["n"], 750, "bounded walk should stop well before the cap")
+        self.assertLess(page_calls["n"], 20, "should stop within a few pages of the bound")
+        # Partial (bounded) walk must not be cached.
+        mock_write_cache.assert_not_called()
+
+    @patch("tools.skills_hub._write_index_cache")
+    @patch("tools.skills_hub._read_index_cache", return_value=None)
+    @patch("tools.skills_hub.httpx.get")
+    def test_max_items_zero_is_unbounded_and_caches(
+        self, mock_get, _mock_read_cache, mock_write_cache
+    ):
+        """max_items=0 (the index builder's path) walks to natural termination
+        and DOES cache the complete catalog."""
+
+        def side_effect(url, *args, **kwargs):
+            if url.endswith("/skills"):
+                return _MockResponse(
+                    status_code=200,
+                    json_data={
+                        "items": [
+                            {"slug": "a", "displayName": "A"},
+                            {"slug": "b", "displayName": "B"},
+                            {"slug": "c", "displayName": "C"},
+                        ],
+                        # No nextCursor -> natural termination.
+                    },
+                )
+            return _MockResponse(status_code=404, json_data={})
+
+        mock_get.side_effect = side_effect
+
+        results = self.src._load_catalog_index(max_items=0)
+
+        self.assertEqual(len(results), 3)
+        mock_write_cache.assert_called_once()
+
+    @patch("tools.skills_hub._write_index_cache")
+    @patch("tools.skills_hub._read_index_cache", return_value=None)
+    @patch("tools.skills_hub.httpx.get")
+    def test_empty_query_browse_bounds_walk_to_limit(
+        self, mock_get, _mock_read_cache, _mock_write_cache
+    ):
+        """search("", limit=N) is the browse cold-start path — it must bound the
+        catalog walk to N rather than walking the whole 50k+ catalog."""
+        page_calls = {"n": 0}
+        mock_get.side_effect = self._infinite_pages(page_calls)
+
+        results = self.src.search("", limit=10)
+
+        self.assertEqual(len(results), 10, "browse page should be exactly `limit` items")
+        # Walk stopped near the bound, not at the 750-page cap.
+        self.assertLess(page_calls["n"], 30)
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@ -2119,12 +2119,13 @@ class ClawHubSource(SkillSource):
            if results:
                return results
        else:
-            # Empty query: route through the paginating catalog walker so the
-            # full ClawHub catalog (20k+ skills) lands in the index. The
-            # single-request listing path below caps at one page (200 items)
-            # regardless of `limit`, which silently truncates the public
-            # skills index. The catalog walker follows `nextCursor`.
-            catalog = self._load_catalog_index()
+            # Empty query: route through the paginating catalog walker. When
+            # the full catalog is already disk-cached this returns it whole and
+            # the caller paginates client-side. On a cold cache, bound the walk
+            # to `limit` so a browse command renders its first page without
+            # walking the entire 50k+ catalog (max_items=0 → unbounded, used
+            # only by the offline index builder via search("", limit=0)).
+            catalog = self._load_catalog_index(max_items=limit if limit > 0 else 0)
            if catalog:
                return self._dedupe_results(catalog)[:limit] if limit > 0 else self._dedupe_results(catalog)

@ -2249,7 +2250,21 @@ class ClawHubSource(SkillSource):
        _write_index_cache(cache_key, [_skill_meta_to_dict(s) for s in results])
        return results

-    def _load_catalog_index(self) -> List[SkillMeta]:
+    def _load_catalog_index(self, max_items: int = 0) -> List[SkillMeta]:
+        """Walk the ClawHub catalog via cursor pagination.
+
+        ``max_items`` bounds the walk: once at least that many distinct skills
+        have been gathered the walk stops early. This is what browse's
+        cold-start fallback wants — it only renders one page, so walking the
+        entire 50k+ catalog just to slice off the first N is pure waste.
+        ``max_items=0`` (the default, used by the offline index builder) means
+        walk to exhaustion.
+
+        Caching: only a *complete* catalog (cursor exhausted or page cap) is
+        written to the shared ``clawhub_catalog_v1`` cache. A walk truncated by
+        ``max_items`` OR the wall-clock budget is partial, so caching it would
+        poison the full-catalog cache with an incomplete slice.
+        """
        cache_key = "clawhub_catalog_v1"
        cached = _read_index_cache(cache_key)
        if cached is not None:
@ -2266,6 +2281,7 @@ class ClawHubSource(SkillSource):
        max_pages = 750
        deadline = time.monotonic() + self.CATALOG_WALK_BUDGET_SECONDS
        hit_deadline = False
+        hit_max_items = False

        for _ in range(max_pages):
            if time.monotonic() > deadline:
@ -2308,10 +2324,18 @@ class ClawHubSource(SkillSource):
            if not isinstance(cursor, str) or not cursor:
                break

+            # Browse's cold-start fallback only renders one page, so stop as
+            # soon as we have enough to satisfy the caller's bound. The index
+            # builder passes max_items=0 (unbounded) and walks to exhaustion.
+            if max_items > 0 and len(results) >= max_items:
+                hit_max_items = True
+                break
+
        # Only cache a walk that reached a natural stop (cursor exhausted or
-        # page cap). A walk truncated by the wall-clock budget is partial, so
-        # writing it would poison the cache with incomplete catalog data.
-        if not hit_deadline:
+        # page cap). A walk truncated by the wall-clock budget OR by max_items
+        # is partial, so writing it would poison the shared full-catalog cache
+        # with incomplete data.
+        if not hit_deadline and not hit_max_items:
            _write_index_cache(cache_key, [_skill_meta_to_dict(s) for s in results])
        return results