feat(skills-hub): health checks, freshness badge, and a watchdog cron (#32345)

Layered safety so the Skills Hub at /docs/skills stays in sync without
silent rot. Three pieces:

1. build_skills_index.py — refuses to ship a degenerate index.
   EXPECTED_FLOORS per source (skills.sh ≥100, lobehub ≥100, clawhub ≥50,
   official ≥50, github ≥30, browse-sh ≥50) and MIN_TOTAL=1500. Any source
   collapsing to zero (the silent OpenAI breakage that hid for weeks) now
   fails the workflow loud — broken index never reaches the live site.

2. extract-skills.py + the React page — visible freshness signal.
   Sidecar website/src/data/skills-meta.json carries the index's
   generated_at timestamp, plus per-source counts. Skills Hub renders a
   'Catalog refreshed N hours ago · auto-rebuilt twice daily' line under
   the hero copy. If the cron stalls, users see the staleness immediately.

3. .github/workflows/skills-index-freshness.yml — watchdog cron.
   Every 4 hours, fetches the live /docs/api/skills-index.json, validates
   shape, checks age (>26h is stale), checks the same per-source floors,
   and opens (or appends to) a GitHub issue when anything is off. The
   issue is title-prefixed [skills-index-watchdog] so subsequent failures
   append a comment instead of spamming new issues.

Net effect:
- A silent regression like 'OpenAI tap moved its skills' now fails the
  build instead of shipping a quietly broken catalog.
- A stuck cron (like the landingpage breakage that ran red for weeks) now
  files an issue within 4 hours.
- Users see how fresh the catalog is on the page itself.

Test plan:
- Local: built skills-meta.json from the live index → 'Catalog refreshed
  N minutes ago' rendered correctly in the static HTML.
- Probe logic dry-run against the live index: total=2456, all 6 sources
  above floor, age 0.1h — issues=NONE.
- Triggered skills-index.yml manually; both jobs green, deploy-site.yml
  dispatch fired.
This commit is contained in:
Teknium 2026-05-25 23:10:45 -07:00 committed by GitHub
parent cea87d9139
commit d8703e27f5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 273 additions and 8 deletions

1
website/.gitignore vendored
View file

@ -8,6 +8,7 @@
.docusaurus
.cache-loader
src/data/skills.json
src/data/skills-meta.json
static/llms.txt
static/llms-full.txt

View file

@ -21,6 +21,7 @@ the unified index existed).
import json
import os
from collections import Counter
from datetime import datetime, timezone
import yaml
@ -32,6 +33,7 @@ LOCAL_SKILL_DIRS = [
UNIFIED_INDEX_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
LEGACY_INDEX_CACHE_DIR = os.path.join(REPO_ROOT, "skills", "index-cache")
OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills.json")
META_OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills-meta.json")
CATEGORY_LABELS = {
"apple": "Apple",
@ -280,19 +282,32 @@ def _label_for_github_identifier(identifier: str) -> str:
def extract_unified_index_skills():
"""Read website/static/api/skills-index.json — the canonical multi-source index."""
"""Read website/static/api/skills-index.json — the canonical multi-source index.
Returns ``(skills, meta)`` where ``meta`` carries the index's
``generated_at`` timestamp and total count so the Skills Hub page can
show a "Last refreshed …" badge. Returns ``(None, None)`` when the
index file is absent or malformed (caller falls back to the legacy
cache).
"""
if not os.path.isfile(UNIFIED_INDEX_PATH):
return None
return None, None
try:
with open(UNIFIED_INDEX_PATH, encoding="utf-8") as f:
data = json.load(f)
except (json.JSONDecodeError, OSError) as e:
print(f"[extract-skills] Failed to read unified index: {e}")
return None
return None, None
if not isinstance(data, dict) or "skills" not in data:
return None
return None, None
meta = {
"indexGeneratedAt": data.get("generated_at", ""),
"indexSkillCount": data.get("skill_count", 0),
"indexVersion": data.get("version", 0),
}
out = []
for entry in data.get("skills", []):
@ -352,7 +367,7 @@ def extract_unified_index_skills():
"installCmd": install_cmd,
})
return out
return out, meta
def extract_legacy_cache_skills():
@ -490,13 +505,14 @@ def _consolidate_small_categories(skills: list) -> list:
def main():
local = extract_local_skills()
unified = extract_unified_index_skills()
unified, index_meta = extract_unified_index_skills()
if unified is not None:
external = unified
external_source = "unified index"
else:
external = extract_legacy_cache_skills()
external_source = "legacy index-cache"
index_meta = None
print(
f"[extract-skills] WARNING: unified index not found at "
f"{UNIFIED_INDEX_PATH}; falling back to {external_source}. "
@ -517,16 +533,32 @@ def main():
with open(OUTPUT, "w", encoding="utf-8") as f:
json.dump(all_skills, f, indent=2)
# Sidecar meta file so the page can render a "Last refreshed" badge
# without changing the shape of skills.json.
by_source = Counter(s["source"] for s in all_skills)
meta = {
"extractedAt": datetime.now(timezone.utc).isoformat(),
"totalSkills": len(all_skills),
"localSkills": len(local),
"externalSkills": len(external),
"externalSource": external_source,
"bySource": dict(by_source.most_common()),
}
if index_meta:
meta.update(index_meta)
with open(META_OUTPUT, "w", encoding="utf-8") as f:
json.dump(meta, f, indent=2)
print(f"Extracted {len(all_skills)} skills to {OUTPUT}")
print(f" {len(local)} local ({sum(1 for s in local if s['source'] == 'built-in')} built-in, "
f"{sum(1 for s in local if s['source'] == 'optional')} optional)")
print(f" {len(external)} from {external_source}")
# Breakdown by source
by_source = Counter(s["source"] for s in all_skills)
print("By source:")
for src, count in by_source.most_common():
print(f" {src}: {count}")
if index_meta and index_meta.get("indexGeneratedAt"):
print(f"Unified index built at: {index_meta['indexGeneratedAt']}")
if __name__ == "__main__":

View file

@ -1,6 +1,7 @@
import React, { useState, useMemo, useCallback, useRef, useEffect } from "react";
import Layout from "@theme/Layout";
import skills from "../../data/skills.json";
import meta from "../../data/skills-meta.json";
import styles from "./styles.module.css";
interface Skill {
@ -24,6 +25,33 @@ interface Skill {
const allSkills: Skill[] = skills as Skill[];
interface IndexMeta {
extractedAt?: string;
indexGeneratedAt?: string;
totalSkills?: number;
externalSource?: string;
bySource?: Record<string, number>;
}
const indexMeta: IndexMeta = meta as IndexMeta;
function formatRelativeTime(iso?: string): string | null {
if (!iso) return null;
const then = new Date(iso).getTime();
if (!Number.isFinite(then)) return null;
const now = Date.now();
const diffMs = now - then;
if (diffMs < 0) return "just now";
const mins = Math.floor(diffMs / 60_000);
if (mins < 1) return "just now";
if (mins < 60) return `${mins} minute${mins === 1 ? "" : "s"} ago`;
const hours = Math.floor(mins / 60);
if (hours < 24) return `${hours} hour${hours === 1 ? "" : "s"} ago`;
const days = Math.floor(hours / 24);
if (days < 30) return `${days} day${days === 1 ? "" : "s"} ago`;
const months = Math.floor(days / 30);
return `${months} month${months === 1 ? "" : "s"} ago`;
}
const CATEGORY_ICONS: Record<string, string> = {
apple: "\u{f179}",
"autonomous-ai-agents": "\u{1F916}",
@ -487,6 +515,17 @@ export default function SkillsDashboard() {
<strong className={styles.heroAccent}>{allSkills.length}</strong> skills
across {sources.length - 1} registries
</p>
{(indexMeta?.indexGeneratedAt || indexMeta?.extractedAt) && (
<p className={styles.heroSub} style={{ fontSize: "0.85rem", opacity: 0.75 }}>
Catalog refreshed{" "}
<span title={indexMeta.indexGeneratedAt || indexMeta.extractedAt}>
{formatRelativeTime(
indexMeta.indexGeneratedAt || indexMeta.extractedAt,
) || "recently"}
</span>
{" "}· auto-rebuilt twice daily
</p>
)}
<div className={styles.statsRow}>
<StatCard