fix(skills): let ClawHub index build walk past the 12s browse budget (#44500)

The deploy-site skills index crawl was capped at ~3k ClawHub entries
because CATALOG_WALK_BUDGET_SECONDS applied to max_items=0 walks too.
Only enforce the wall-clock budget for bounded browse requests and pass
limit=0 from build_skills_index so CI walks the full catalog.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Austin Pickett 2026-06-11 18:03:11 -04:00 committed by GitHub
parent 021ed69141
commit 2ee69d0579
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 33 additions and 6 deletions

View file

@ -272,8 +272,9 @@ def main():
# (well above current catalog size) lets the full catalog land in the
# index instead of being truncated at an arbitrary build-time limit.
SOURCE_LIMITS = {
# ClawHub had 49,698+ skills as of May 2026; 200k leaves headroom.
"clawhub": 200_000,
# 0 = unbounded catalog walk (max_items=0 in ClawHubSource). A positive
# limit bounds the walk and also enables the interactive 12s budget.
"clawhub": 0,
"lobehub": 100_000,
"browse-sh": 5_000,
"claude-marketplace": 5_000,

View file

@ -381,9 +381,10 @@ class TestClawHubSource(unittest.TestCase):
mock_get.side_effect = side_effect
# Force the deadline to be in the past immediately.
# Force the deadline to be in the past immediately. Budget only applies
# to bounded browse walks (max_items > 0), not the index builder path.
with patch.object(ClawHubSource, "CATALOG_WALK_BUDGET_SECONDS", -1):
results = self.src._load_catalog_index()
results = self.src._load_catalog_index(max_items=10)
# Walk broke well before the 750-page cap.
self.assertLess(page_calls["n"], 750)
@ -480,6 +481,23 @@ class TestClawHubCatalogWalkBounded(unittest.TestCase):
# Partial (bounded) walk must not be cached.
mock_write_cache.assert_not_called()
@patch("tools.skills_hub._write_index_cache")
@patch("tools.skills_hub._read_index_cache", return_value=None)
@patch("tools.skills_hub.httpx.get")
def test_max_items_zero_ignores_wall_clock_budget(
self, mock_get, _mock_read_cache, _mock_write_cache
):
"""Index builder path (max_items=0) must not truncate on the browse budget."""
page_calls = {"n": 0}
mock_get.side_effect = self._infinite_pages(page_calls)
with patch.object(ClawHubSource, "CATALOG_WALK_BUDGET_SECONDS", -1):
results = self.src._load_catalog_index(max_items=0)
# No budget -> walks until the 750-page safety cap, not ~14 pages in 12s.
self.assertEqual(page_calls["n"], 750)
self.assertEqual(len(results), 750)
@patch("tools.skills_hub._write_index_cache")
@patch("tools.skills_hub._read_index_cache", return_value=None)
@patch("tools.skills_hub.httpx.get")

View file

@ -2279,12 +2279,20 @@ class ClawHubSource(SkillSource):
# terminates well before this on `nextCursor` going None — the cap is
# a safety rail against an infinite-cursor loop.
max_pages = 750
deadline = time.monotonic() + self.CATALOG_WALK_BUDGET_SECONDS
# Wall-clock budget is for interactive browse (max_items > 0) only.
# The offline index builder passes max_items=0 and must walk the full
# catalog — a 12s cap there ships ~3k skills and trips the deploy
# health floor (20k).
deadline = (
time.monotonic() + self.CATALOG_WALK_BUDGET_SECONDS
if max_items > 0
else None
)
hit_deadline = False
hit_max_items = False
for _ in range(max_pages):
if time.monotonic() > deadline:
if deadline is not None and time.monotonic() > deadline:
hit_deadline = True
break
params: Dict[str, Any] = {"limit": 200}