mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(skills-hub): stop shipping a degenerate index when GitHub taps collapse (#42347)
The Skills Hub lost every api.github.com-backed source — the OpenAI, Anthropic, HuggingFace, NVIDIA, gstack, Claude Marketplace and Well-Known tabs all vanished — while ClawHub/skills.sh/LobeHub/browse.sh survived. A GitHub API rate limit during the docs-deploy crawl zeroed all three api.github.com sources (github / claude-marketplace / well-known) at once. Two compounding bugs let the broken index reach the live site: 1. build_skills_index.py wrote the output file BEFORE the health check, so even when the github floor (30) tripped and the script exited 2, the degenerate file was already on disk. deploy-site.yml then swallowed the exit code with `|| echo non-fatal` and extract-skills.py read the partial index. Fix: run the health check first, write the file only when healthy, exit without writing on failure. Removed the non-fatal swallow in deploy-site.yml so a collapse fails the deploy and the last good site stays live (Pages serves the previous build). 2. The build-time GitHub listing path returned [] on a 403 rate-limit without retrying or flagging it, so a rate-limited crawl looked identical to an empty source. Fix: a shared _github_get() helper on GitHubSource with retry/backoff (honors Retry-After / X-RateLimit-Reset on 403/429, backs off on 5xx + transport errors) and flags is_rate_limited. Routed _list_skills_in_repo and _fetch_file_content through it; gave ClaudeMarketplaceSource a persistent GitHubSource + is_rate_limited so the builder can name the rate limit as the cause instead of '0 results'. Added tests/scripts/test_build_skills_index_health.py pinning both contracts: a degenerate crawl exits non-zero and writes no file; a healthy crawl writes the index with github/claude-marketplace/well-known all present.
This commit is contained in:
parent
639c1e3636
commit
5e9d7a7661
4 changed files with 274 additions and 53 deletions
22
.github/workflows/deploy-site.yml
vendored
22
.github/workflows/deploy-site.yml
vendored
|
|
@ -59,12 +59,22 @@ jobs:
|
|||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
# Always rebuild — the file isn't committed (gitignored), so a
|
||||
# fresh checkout starts without it and we want the freshest crawl
|
||||
# in every deploy. Failure is non-fatal: extract-skills.py will
|
||||
# fall back to the legacy snapshot cache and the Skills Hub page
|
||||
# still renders, just without the latest community catalog.
|
||||
python3 scripts/build_skills_index.py || echo "Skills index build failed (non-fatal)"
|
||||
# Rebuild the unified catalog. The file is gitignored, so a fresh
|
||||
# checkout starts without it and we want the freshest crawl in
|
||||
# every deploy.
|
||||
#
|
||||
# This MUST be fatal. build_skills_index.py runs a health check and
|
||||
# exits non-zero WITHOUT writing the output file when a source
|
||||
# collapses (e.g. a GitHub API rate limit zeroes the github /
|
||||
# claude-marketplace / well-known taps all at once). Letting the
|
||||
# deploy continue would either (a) ship a degenerate index missing
|
||||
# whole hubs — the June 2026 regression where OpenAI/Anthropic/
|
||||
# HuggingFace/NVIDIA tabs vanished — or (b) fall through to a
|
||||
# local-only catalog. Failing here keeps the last good deployment
|
||||
# live (GitHub Pages serves the previous build) instead of
|
||||
# publishing a broken catalog. Re-run the workflow once the
|
||||
# transient rate limit clears.
|
||||
python3 scripts/build_skills_index.py
|
||||
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
|
|
|||
|
|
@ -297,6 +297,21 @@ def main():
|
|||
# Batch resolve GitHub paths for skills.sh entries
|
||||
all_skills = batch_resolve_paths(all_skills, auth)
|
||||
|
||||
# Collect which sources hit a GitHub API rate limit during the crawl.
|
||||
# github / claude-marketplace / well-known all read api.github.com, so a
|
||||
# rate-limited token zeroes all three at once — surfaced below so the
|
||||
# failure message names the real cause instead of "source returned 0".
|
||||
rate_limited_sources = {
|
||||
name for name, source in sources.items()
|
||||
if getattr(source, "is_rate_limited", False)
|
||||
}
|
||||
if rate_limited_sources:
|
||||
print(
|
||||
" WARNING: GitHub API rate limit hit for: "
|
||||
+ ", ".join(sorted(rate_limited_sources)),
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
# Deduplicate by identifier
|
||||
seen: dict[str, dict] = {}
|
||||
for skill in all_skills:
|
||||
|
|
@ -311,25 +326,9 @@ def main():
|
|||
"browse-sh": 5, "claude-marketplace": 6, "lobehub": 7}
|
||||
deduped.sort(key=lambda s: (source_order.get(s["source"], 99), s["name"]))
|
||||
|
||||
# Build index
|
||||
index = {
|
||||
"version": INDEX_VERSION,
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"skill_count": len(deduped),
|
||||
"skills": deduped,
|
||||
}
|
||||
|
||||
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||
with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(index, f, separators=(",", ":"), ensure_ascii=False)
|
||||
|
||||
elapsed = time.time() - overall_start
|
||||
file_size = os.path.getsize(OUTPUT_PATH)
|
||||
print(f"\nDone! {len(deduped)} skills indexed in {elapsed:.0f}s")
|
||||
print(f"Output: {OUTPUT_PATH} ({file_size / 1024:.0f} KB)")
|
||||
|
||||
from collections import Counter
|
||||
by_source = Counter(s["source"] for s in deduped)
|
||||
print(f"\nCrawled {len(deduped)} skills in {time.time() - overall_start:.0f}s")
|
||||
for src, count in sorted(by_source.items(), key=lambda x: -x[1]):
|
||||
resolved = sum(1 for s in deduped
|
||||
if s["source"] == src and s.get("resolved_github_id"))
|
||||
|
|
@ -380,14 +379,46 @@ def main():
|
|||
)
|
||||
for line in health_errors:
|
||||
print(line, file=sys.stderr)
|
||||
if rate_limited_sources:
|
||||
print(
|
||||
"\nGitHub API rate limit was hit during this crawl for: "
|
||||
+ ", ".join(sorted(rate_limited_sources))
|
||||
+ ". This is the usual cause of an all-GitHub-tap collapse "
|
||||
"(github / claude-marketplace / well-known dropping to zero "
|
||||
"together). Re-run with a higher-quota GITHUB_TOKEN.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
"\nIf the drop is expected (e.g. a hub is genuinely shutting "
|
||||
"down), lower the floor in scripts/build_skills_index.py "
|
||||
"EXPECTED_FLOORS in the same PR.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
# IMPORTANT: do NOT write OUTPUT_PATH on failure. The index file is
|
||||
# gitignored, so a fresh deploy checkout has no copy on disk — leaving
|
||||
# it absent lets website/scripts/extract-skills.py fall back to the
|
||||
# legacy snapshot cache (or skip the unified index) instead of reading
|
||||
# a degenerate file. Writing-then-exiting-2 was the bug that shipped an
|
||||
# index with every GitHub-API source dropped to zero: deploy-site.yml
|
||||
# swallows the exit code with `|| echo non-fatal`, and the partial file
|
||||
# was already on disk for extract-skills to pick up.
|
||||
sys.exit(2)
|
||||
|
||||
# Healthy — only now write the index out for the docs build to consume.
|
||||
index = {
|
||||
"version": INDEX_VERSION,
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"skill_count": len(deduped),
|
||||
"skills": deduped,
|
||||
}
|
||||
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||
with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(index, f, separators=(",", ":"), ensure_ascii=False)
|
||||
file_size = os.path.getsize(OUTPUT_PATH)
|
||||
print(f"\nDone! {len(deduped)} skills indexed in "
|
||||
f"{time.time() - overall_start:.0f}s")
|
||||
print(f"Output: {OUTPUT_PATH} ({file_size / 1024:.0f} KB)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
99
tests/scripts/test_build_skills_index_health.py
Normal file
99
tests/scripts/test_build_skills_index_health.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
"""Invariants for scripts/build_skills_index.py's health-check guard.
|
||||
|
||||
Regression context (June 2026): a GitHub API rate limit zeroed every
|
||||
api.github.com-backed source (github / claude-marketplace / well-known) at
|
||||
once during the docs deploy crawl. The build's health check fired and exited
|
||||
non-zero — but it had ALREADY written the degenerate index to disk, and
|
||||
deploy-site.yml swallowed the exit code with ``|| echo non-fatal``. The
|
||||
partial index (missing the OpenAI/Anthropic/HuggingFace/NVIDIA tabs) shipped
|
||||
to the live Skills Hub.
|
||||
|
||||
These tests pin the two contracts that prevent a recurrence:
|
||||
1. A degenerate crawl exits non-zero AND does NOT write the output file
|
||||
(so extract-skills.py falls back instead of reading a broken index).
|
||||
2. A healthy crawl exits zero AND writes the file with every source present.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
|
||||
import pytest
|
||||
|
||||
import scripts.build_skills_index as build_mod
|
||||
|
||||
|
||||
def _meta(name, src):
|
||||
return build_mod.SkillMeta(
|
||||
name=name, description="d", source=src,
|
||||
identifier=f"{src}/{name}", trust_level="community",
|
||||
)
|
||||
|
||||
|
||||
class _FakeSource:
|
||||
def __init__(self, src, n, rate_limited=False):
|
||||
self._src = src
|
||||
self._n = n
|
||||
self.is_rate_limited = rate_limited
|
||||
|
||||
def search(self, query, limit=10):
|
||||
return [_meta(f"{self._src}-{i}", self._src) for i in range(self._n)]
|
||||
|
||||
|
||||
def _install_fake_sources(monkeypatch, *, github_count, claude_count=40,
|
||||
well_known_count=10, github_rate_limited=False):
|
||||
monkeypatch.setattr(build_mod, "SkillsShSource", lambda auth: _FakeSource("skills.sh", 15000))
|
||||
monkeypatch.setattr(build_mod, "OptionalSkillSource", lambda: _FakeSource("official", 95))
|
||||
monkeypatch.setattr(build_mod, "WellKnownSkillSource", lambda: _FakeSource("well-known", well_known_count))
|
||||
monkeypatch.setattr(
|
||||
build_mod, "GitHubSource",
|
||||
lambda auth: _FakeSource("github", github_count, rate_limited=github_rate_limited),
|
||||
)
|
||||
monkeypatch.setattr(build_mod, "ClawHubSource", lambda: _FakeSource("clawhub", 69000))
|
||||
monkeypatch.setattr(
|
||||
build_mod, "ClaudeMarketplaceSource",
|
||||
lambda auth: _FakeSource("claude-marketplace", claude_count, rate_limited=github_rate_limited),
|
||||
)
|
||||
monkeypatch.setattr(build_mod, "LobeHubSource", lambda: _FakeSource("lobehub", 500))
|
||||
monkeypatch.setattr(build_mod, "BrowseShSource", lambda: _FakeSource("browse-sh", 380))
|
||||
monkeypatch.setattr(
|
||||
build_mod, "crawl_skills_sh",
|
||||
lambda source: [build_mod._meta_to_dict(m) for m in source.search("", 0)],
|
||||
)
|
||||
monkeypatch.setattr(build_mod, "batch_resolve_paths", lambda skills, auth: skills)
|
||||
monkeypatch.setattr(
|
||||
build_mod, "GitHubAuth",
|
||||
lambda: types.SimpleNamespace(auth_method=lambda: "token"),
|
||||
)
|
||||
|
||||
|
||||
def test_degenerate_crawl_exits_nonzero_and_writes_no_file(tmp_path, monkeypatch):
|
||||
"""A collapsed GitHub crawl must fail loud and leave OUTPUT_PATH unwritten."""
|
||||
out = tmp_path / "skills-index.json"
|
||||
monkeypatch.setattr(build_mod, "OUTPUT_PATH", str(out))
|
||||
_install_fake_sources(monkeypatch, github_count=0, claude_count=0,
|
||||
well_known_count=0, github_rate_limited=True)
|
||||
|
||||
with pytest.raises(SystemExit) as exc:
|
||||
build_mod.main()
|
||||
|
||||
assert exc.value.code != 0
|
||||
# The degenerate index must NOT have been written — extract-skills.py
|
||||
# relies on the file's absence to fall back instead of reading garbage.
|
||||
assert not out.exists()
|
||||
|
||||
|
||||
def test_healthy_crawl_writes_index_with_all_sources(tmp_path, monkeypatch):
|
||||
out = tmp_path / "skills-index.json"
|
||||
monkeypatch.setattr(build_mod, "OUTPUT_PATH", str(out))
|
||||
_install_fake_sources(monkeypatch, github_count=200)
|
||||
|
||||
build_mod.main() # exit 0 (no SystemExit)
|
||||
|
||||
assert out.exists()
|
||||
import json
|
||||
data = json.loads(out.read_text())
|
||||
sources = {s["source"] for s in data["skills"]}
|
||||
# Every GitHub-API-backed source that vanished in the regression is present.
|
||||
assert {"github", "claude-marketplace", "well-known"} <= sources
|
||||
assert data["skill_count"] == len(data["skills"])
|
||||
|
|
@ -550,11 +550,8 @@ class GitHubSource(SkillSource):
|
|||
return [SkillMeta(**s) for s in cached]
|
||||
|
||||
url = f"https://api.github.com/repos/{repo}/contents/{path.rstrip('/')}"
|
||||
try:
|
||||
resp = httpx.get(url, headers=self.auth.get_headers(), timeout=15, follow_redirects=True)
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
except httpx.HTTPError:
|
||||
resp = self._github_get(url)
|
||||
if resp is None or resp.status_code != 200:
|
||||
return []
|
||||
|
||||
entries = resp.json()
|
||||
|
|
@ -639,15 +636,98 @@ class GitHubSource(SkillSource):
|
|||
|
||||
def _check_rate_limit_response(self, resp: "httpx.Response") -> None:
|
||||
"""Flag the instance as rate-limited when GitHub returns 403 + exhausted quota."""
|
||||
if resp.status_code == 403:
|
||||
if resp.status_code in (403, 429):
|
||||
remaining = resp.headers.get("X-RateLimit-Remaining", "")
|
||||
if remaining == "0":
|
||||
if remaining == "0" or resp.status_code == 429:
|
||||
self._rate_limited = True
|
||||
logger.warning(
|
||||
"GitHub API rate limit exhausted (unauthenticated: 60 req/hr). "
|
||||
"Set GITHUB_TOKEN or install the gh CLI to raise the limit to 5,000/hr."
|
||||
)
|
||||
|
||||
def _github_get(
|
||||
self,
|
||||
url: str,
|
||||
*,
|
||||
params: Optional[Dict] = None,
|
||||
headers: Optional[Dict] = None,
|
||||
timeout: float = 15.0,
|
||||
max_retries: int = 3,
|
||||
) -> Optional["httpx.Response"]:
|
||||
"""GET against the GitHub API with retry/backoff on transient failures.
|
||||
|
||||
Returns the final ``httpx.Response`` (caller inspects status) or
|
||||
``None`` when every attempt raised a transport error.
|
||||
|
||||
Retries on:
|
||||
- 403/429 with ``X-RateLimit-Remaining: 0`` — waits until the
|
||||
reset time (capped) when the header is present, else exponential
|
||||
backoff. This is the all-GitHub-tap-collapse case: a single
|
||||
shared rate limit zeroes github + claude-marketplace + well-known
|
||||
at once during the index build.
|
||||
- 5xx and connection/timeout errors — exponential backoff.
|
||||
|
||||
On terminal rate-limit exhaustion the instance is flagged via
|
||||
``_check_rate_limit_response`` so the build can fail loud instead of
|
||||
silently shipping an index with the GitHub sources dropped to zero.
|
||||
"""
|
||||
hdrs = headers if headers is not None else self.auth.get_headers()
|
||||
backoff = 1.0
|
||||
last_resp: Optional["httpx.Response"] = None
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
resp = httpx.get(
|
||||
url, params=params, headers=hdrs,
|
||||
timeout=timeout, follow_redirects=True,
|
||||
)
|
||||
except httpx.HTTPError as e:
|
||||
logger.debug("GitHub GET %s failed (attempt %d/%d): %s",
|
||||
url, attempt + 1, max_retries, e)
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(backoff)
|
||||
backoff = min(backoff * 2, 30.0)
|
||||
continue
|
||||
return None
|
||||
|
||||
last_resp = resp
|
||||
if resp.status_code == 200:
|
||||
return resp
|
||||
|
||||
# Rate-limited: honor the reset header when present, else back off.
|
||||
if resp.status_code in (403, 429):
|
||||
remaining = resp.headers.get("X-RateLimit-Remaining", "")
|
||||
is_rl = remaining == "0" or resp.status_code == 429
|
||||
if is_rl and attempt < max_retries - 1:
|
||||
wait = backoff
|
||||
reset = resp.headers.get("X-RateLimit-Reset", "")
|
||||
retry_after = resp.headers.get("Retry-After", "")
|
||||
if retry_after.isdigit():
|
||||
wait = min(float(retry_after), 60.0)
|
||||
elif reset.isdigit():
|
||||
delta = float(reset) - time.time()
|
||||
if 0 < delta <= 60.0:
|
||||
wait = delta
|
||||
logger.debug(
|
||||
"GitHub rate limited on %s, waiting %.1fs (attempt %d/%d)",
|
||||
url, wait, attempt + 1, max_retries,
|
||||
)
|
||||
time.sleep(wait)
|
||||
backoff = min(backoff * 2, 30.0)
|
||||
continue
|
||||
# Out of retries (or not a rate-limit 403) — flag and return.
|
||||
self._check_rate_limit_response(resp)
|
||||
return resp
|
||||
|
||||
# 5xx — retry; 4xx (other than rate limit) — return immediately.
|
||||
if 500 <= resp.status_code < 600 and attempt < max_retries - 1:
|
||||
time.sleep(backoff)
|
||||
backoff = min(backoff * 2, 30.0)
|
||||
continue
|
||||
return resp
|
||||
|
||||
return last_resp
|
||||
|
||||
|
||||
def _download_directory(self, repo: str, path: str) -> Dict[str, str]:
|
||||
"""Recursively download all text files from a GitHub directory.
|
||||
|
||||
|
|
@ -768,17 +848,12 @@ class GitHubSource(SkillSource):
|
|||
def _fetch_file_content(self, repo: str, path: str) -> Optional[str]:
|
||||
"""Fetch a single file's content from GitHub."""
|
||||
url = f"https://api.github.com/repos/{repo}/contents/{path}"
|
||||
try:
|
||||
resp = httpx.get(
|
||||
url,
|
||||
headers={**self.auth.get_headers(), "Accept": "application/vnd.github.v3.raw"},
|
||||
timeout=15, follow_redirects=True,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
return resp.text
|
||||
self._check_rate_limit_response(resp)
|
||||
except httpx.HTTPError as e:
|
||||
logger.debug("GitHub contents API fetch failed: %s", e)
|
||||
resp = self._github_get(
|
||||
url,
|
||||
headers={**self.auth.get_headers(), "Accept": "application/vnd.github.v3.raw"},
|
||||
)
|
||||
if resp is not None and resp.status_code == 200:
|
||||
return resp.text
|
||||
return None
|
||||
|
||||
def _get_skillsh_groupings(self, repo: str) -> Optional[Dict[str, str]]:
|
||||
|
|
@ -2373,10 +2448,19 @@ class ClaudeMarketplaceSource(SkillSource):
|
|||
|
||||
def __init__(self, auth: GitHubAuth):
|
||||
self.auth = auth
|
||||
# Persistent GitHubSource so rate-limit state survives across the
|
||||
# marketplace-index fetch + per-skill inspect calls and can be
|
||||
# surfaced to the index builder (see is_rate_limited).
|
||||
self.github = GitHubSource(auth=auth)
|
||||
|
||||
def source_id(self) -> str:
|
||||
return "claude-marketplace"
|
||||
|
||||
@property
|
||||
def is_rate_limited(self) -> bool:
|
||||
"""Whether the underlying GitHub API hit a rate limit during the crawl."""
|
||||
return self.github.is_rate_limited
|
||||
|
||||
def trust_level_for(self, identifier: str) -> str:
|
||||
parts = identifier.split("/", 2)
|
||||
if len(parts) >= 2:
|
||||
|
|
@ -2415,15 +2499,13 @@ class ClaudeMarketplaceSource(SkillSource):
|
|||
|
||||
def fetch(self, identifier: str) -> Optional[SkillBundle]:
|
||||
# Delegate to GitHub Contents API since marketplace skills live in GitHub repos
|
||||
gh = GitHubSource(auth=self.auth)
|
||||
bundle = gh.fetch(identifier)
|
||||
bundle = self.github.fetch(identifier)
|
||||
if bundle:
|
||||
bundle.source = "claude-marketplace"
|
||||
return bundle
|
||||
|
||||
def inspect(self, identifier: str) -> Optional[SkillMeta]:
|
||||
gh = GitHubSource(auth=self.auth)
|
||||
meta = gh.inspect(identifier)
|
||||
meta = self.github.inspect(identifier)
|
||||
if meta:
|
||||
meta.source = "claude-marketplace"
|
||||
meta.trust_level = self.trust_level_for(identifier)
|
||||
|
|
@ -2437,16 +2519,15 @@ class ClaudeMarketplaceSource(SkillSource):
|
|||
return cached
|
||||
|
||||
url = f"https://api.github.com/repos/{repo}/contents/.claude-plugin/marketplace.json"
|
||||
resp = self.github._github_get(
|
||||
url,
|
||||
headers={**self.auth.get_headers(), "Accept": "application/vnd.github.v3.raw"},
|
||||
)
|
||||
if resp is None or resp.status_code != 200:
|
||||
return []
|
||||
try:
|
||||
resp = httpx.get(
|
||||
url,
|
||||
headers={**self.auth.get_headers(), "Accept": "application/vnd.github.v3.raw"},
|
||||
timeout=15,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
data = json.loads(resp.text)
|
||||
except (httpx.HTTPError, json.JSONDecodeError):
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
|
||||
plugins = data.get("plugins", [])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue