diff --git a/.github/workflows/skills-index-freshness.yml b/.github/workflows/skills-index-freshness.yml
new file mode 100644
index 00000000000..856878def5f
--- /dev/null
+++ b/.github/workflows/skills-index-freshness.yml
@@ -0,0 +1,149 @@
+name: Skills Index Freshness Check
+
+# Belt-and-suspenders for the twice-daily build_skills_index pipeline.
+# If the live /docs/api/skills-index.json ever goes more than 26 hours
+# stale OR the file disappears entirely OR a major source has collapsed,
+# this workflow opens a GitHub issue so we hear about it before users do.
+#
+# Triggered every 4 hours so we catch a stuck cron within one tick.
+
+on:
+ schedule:
+ - cron: '0 */4 * * *'
+ workflow_dispatch:
+
+permissions:
+ contents: read
+ issues: write
+
+jobs:
+ check-freshness:
+ if: github.repository == 'NousResearch/hermes-agent'
+ runs-on: ubuntu-latest
+ steps:
+ - name: Probe live index
+ id: probe
+ run: |
+ set -e
+ URL="https://hermes-agent.nousresearch.com/docs/api/skills-index.json"
+ echo "Probing $URL"
+ # -L follows redirects; -f fails on HTTP errors; -s suppresses progress
+ if ! curl -fsSL -o /tmp/skills-index.json "$URL"; then
+ echo "status=fetch-failed" >> "$GITHUB_OUTPUT"
+ echo "detail=Could not download $URL" >> "$GITHUB_OUTPUT"
+ exit 0
+ fi
+ # Validate + extract generated_at and per-source counts
+ python3 <<'PY' >> "$GITHUB_OUTPUT"
+ import json, sys
+ from datetime import datetime, timezone
+
+ try:
+ with open("/tmp/skills-index.json") as f:
+ data = json.load(f)
+ except Exception as e:
+ print(f"status=parse-failed")
+ print(f"detail=JSON decode error: {e}")
+ sys.exit(0)
+
+ generated_at = data.get("generated_at", "")
+ total = data.get("skill_count", 0)
+ skills = data.get("skills", [])
+ if not isinstance(skills, list):
+ print("status=invalid-shape")
+ print(f"detail=skills field is not a list (got {type(skills).__name__})")
+ sys.exit(0)
+
+ # Per-source counts
+ from collections import Counter
+ by_src = Counter(s.get("source", "") for s in skills)
+
+ # Freshness
+ age_hours = None
+ try:
+ ts = datetime.fromisoformat(generated_at.replace("Z", "+00:00"))
+ age_hours = (datetime.now(timezone.utc) - ts).total_seconds() / 3600
+ except Exception:
+ pass
+
+ # Floors — same as build_skills_index.py EXPECTED_FLOORS.
+ floors = {
+ "skills.sh": 100,
+ "lobehub": 100,
+ "clawhub": 50,
+ "official": 50,
+ "github": 30,
+ "browse-sh": 50,
+ }
+ issues = []
+ if age_hours is not None and age_hours > 26:
+ issues.append(f"Index is {age_hours:.1f}h old (limit 26h)")
+ for src, floor in floors.items():
+ count = by_src.get(src, 0)
+ if src == "skills.sh":
+ count = by_src.get("skills.sh", 0) + by_src.get("skills-sh", 0)
+ if count < floor:
+ issues.append(f"{src}: {count} < {floor}")
+ if total < 1500:
+ issues.append(f"total skills: {total} < 1500")
+
+ if issues:
+ detail = "; ".join(issues)
+ print("status=degraded")
+ # GITHUB_OUTPUT doesn't allow newlines without explicit delimiter
+ print(f"detail={detail}")
+ else:
+ print("status=ok")
+ print(f"detail=Index OK — {total} skills, generated {generated_at}")
+ by_summary = ", ".join(f"{k}={v}" for k, v in by_src.most_common(8))
+ print(f"summary={by_summary}")
+ PY
+
+ - name: Report status
+ run: |
+ echo "Probe status: ${{ steps.probe.outputs.status }}"
+ echo "Detail: ${{ steps.probe.outputs.detail }}"
+ if [ -n "${{ steps.probe.outputs.summary }}" ]; then
+ echo "Summary: ${{ steps.probe.outputs.summary }}"
+ fi
+
+ - name: Open issue on degraded / failed probe
+ if: steps.probe.outputs.status != 'ok'
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ STATUS: ${{ steps.probe.outputs.status }}
+ DETAIL: ${{ steps.probe.outputs.detail }}
+ run: |
+ # Find existing open issue by title prefix so we don't spam — we
+ # append a comment instead of opening a new one each tick.
+ TITLE_PREFIX="[skills-index-watchdog]"
+ existing=$(gh issue list \
+ --repo "${{ github.repository }}" \
+ --state open \
+ --search "in:title \"$TITLE_PREFIX\"" \
+ --json number,title \
+ --jq '.[] | select(.title | startswith("'"$TITLE_PREFIX"'")) | .number' \
+ | head -1)
+ BODY="Automated freshness probe failed.
+
+ **Status:** \`$STATUS\`
+ **Detail:** $DETAIL
+
+ The Skills Hub at /docs/skills depends on \`/docs/api/skills-index.json\`.
+ The unified index is rebuilt by \`.github/workflows/skills-index.yml\` (cron 6/18 UTC)
+ and \`.github/workflows/deploy-site.yml\` (on every push affecting website/skills).
+ If this issue keeps reopening, check the latest runs:
+
+ - https://github.com/${{ github.repository }}/actions/workflows/skills-index.yml
+ - https://github.com/${{ github.repository }}/actions/workflows/deploy-site.yml
+
+ This issue was opened by \`.github/workflows/skills-index-freshness.yml\`. Close it once the underlying problem is fixed; the next probe will reopen if it's still broken."
+ if [ -n "$existing" ]; then
+ echo "Appending to existing issue #$existing"
+ gh issue comment "$existing" --repo "${{ github.repository }}" --body "Probe still failing at $(date -u +%FT%TZ): \`$STATUS\` — $DETAIL"
+ else
+ echo "Opening new watchdog issue"
+ gh issue create --repo "${{ github.repository }}" \
+ --title "$TITLE_PREFIX Skills index is stale or degraded ($STATUS)" \
+ --body "$BODY"
+ fi
diff --git a/scripts/build_skills_index.py b/scripts/build_skills_index.py
index 844b29733b7..9b9277547f7 100644
--- a/scripts/build_skills_index.py
+++ b/scripts/build_skills_index.py
@@ -322,6 +322,50 @@ def main():
extra = f" ({resolved} resolved)" if resolved else ""
print(f" {src}: {count}{extra}")
+ # Health check: catch silent breakage early. Every source listed below
+ # has historically returned at least `floor` entries; a zero (or near-
+ # zero) result almost certainly means a tap path moved, an API changed,
+ # or rate limiting kicked in. Failing here forces a human look before
+ # the broken index reaches the live docs.
+ EXPECTED_FLOORS = {
+ "skills.sh": 100,
+ "lobehub": 100,
+ "clawhub": 50,
+ "official": 50,
+ "github": 30, # collapsed across all GitHub taps
+ "browse-sh": 50,
+ }
+ health_errors = []
+ for src, floor in EXPECTED_FLOORS.items():
+ # 'skills-sh' and 'skills.sh' are the same source; both labels exist.
+ count = by_source.get(src, 0)
+ if src == "skills.sh":
+ count = by_source.get("skills.sh", 0) + by_source.get("skills-sh", 0)
+ if count < floor:
+ health_errors.append(f" {src}: {count} < expected floor {floor}")
+
+ MIN_TOTAL = 1500
+ if len(deduped) < MIN_TOTAL:
+ health_errors.append(
+ f" total: {len(deduped)} < expected floor {MIN_TOTAL}"
+ )
+
+ if health_errors:
+ print(
+ "\nERROR: skills index health check failed — refusing to ship "
+ "a degenerate index. Investigate the following sources:",
+ file=sys.stderr,
+ )
+ for line in health_errors:
+ print(line, file=sys.stderr)
+ print(
+ "\nIf the drop is expected (e.g. a hub is genuinely shutting "
+ "down), lower the floor in scripts/build_skills_index.py "
+ "EXPECTED_FLOORS in the same PR.",
+ file=sys.stderr,
+ )
+ sys.exit(2)
+
if __name__ == "__main__":
main()
diff --git a/website/.gitignore b/website/.gitignore
index c8dd1071c02..618c20e2b1e 100644
--- a/website/.gitignore
+++ b/website/.gitignore
@@ -8,6 +8,7 @@
.docusaurus
.cache-loader
src/data/skills.json
+src/data/skills-meta.json
static/llms.txt
static/llms-full.txt
diff --git a/website/scripts/extract-skills.py b/website/scripts/extract-skills.py
index 5bdb39d4f9b..dd648589db8 100644
--- a/website/scripts/extract-skills.py
+++ b/website/scripts/extract-skills.py
@@ -21,6 +21,7 @@ the unified index existed).
import json
import os
from collections import Counter
+from datetime import datetime, timezone
import yaml
@@ -32,6 +33,7 @@ LOCAL_SKILL_DIRS = [
UNIFIED_INDEX_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
LEGACY_INDEX_CACHE_DIR = os.path.join(REPO_ROOT, "skills", "index-cache")
OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills.json")
+META_OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills-meta.json")
CATEGORY_LABELS = {
"apple": "Apple",
@@ -280,19 +282,32 @@ def _label_for_github_identifier(identifier: str) -> str:
def extract_unified_index_skills():
- """Read website/static/api/skills-index.json — the canonical multi-source index."""
+ """Read website/static/api/skills-index.json — the canonical multi-source index.
+
+ Returns ``(skills, meta)`` where ``meta`` carries the index's
+ ``generated_at`` timestamp and total count so the Skills Hub page can
+ show a "Last refreshed …" badge. Returns ``(None, None)`` when the
+ index file is absent or malformed (caller falls back to the legacy
+ cache).
+ """
if not os.path.isfile(UNIFIED_INDEX_PATH):
- return None
+ return None, None
try:
with open(UNIFIED_INDEX_PATH, encoding="utf-8") as f:
data = json.load(f)
except (json.JSONDecodeError, OSError) as e:
print(f"[extract-skills] Failed to read unified index: {e}")
- return None
+ return None, None
if not isinstance(data, dict) or "skills" not in data:
- return None
+ return None, None
+
+ meta = {
+ "indexGeneratedAt": data.get("generated_at", ""),
+ "indexSkillCount": data.get("skill_count", 0),
+ "indexVersion": data.get("version", 0),
+ }
out = []
for entry in data.get("skills", []):
@@ -352,7 +367,7 @@ def extract_unified_index_skills():
"installCmd": install_cmd,
})
- return out
+ return out, meta
def extract_legacy_cache_skills():
@@ -490,13 +505,14 @@ def _consolidate_small_categories(skills: list) -> list:
def main():
local = extract_local_skills()
- unified = extract_unified_index_skills()
+ unified, index_meta = extract_unified_index_skills()
if unified is not None:
external = unified
external_source = "unified index"
else:
external = extract_legacy_cache_skills()
external_source = "legacy index-cache"
+ index_meta = None
print(
f"[extract-skills] WARNING: unified index not found at "
f"{UNIFIED_INDEX_PATH}; falling back to {external_source}. "
@@ -517,16 +533,32 @@ def main():
with open(OUTPUT, "w", encoding="utf-8") as f:
json.dump(all_skills, f, indent=2)
+ # Sidecar meta file so the page can render a "Last refreshed" badge
+ # without changing the shape of skills.json.
+ by_source = Counter(s["source"] for s in all_skills)
+ meta = {
+ "extractedAt": datetime.now(timezone.utc).isoformat(),
+ "totalSkills": len(all_skills),
+ "localSkills": len(local),
+ "externalSkills": len(external),
+ "externalSource": external_source,
+ "bySource": dict(by_source.most_common()),
+ }
+ if index_meta:
+ meta.update(index_meta)
+ with open(META_OUTPUT, "w", encoding="utf-8") as f:
+ json.dump(meta, f, indent=2)
+
print(f"Extracted {len(all_skills)} skills to {OUTPUT}")
print(f" {len(local)} local ({sum(1 for s in local if s['source'] == 'built-in')} built-in, "
f"{sum(1 for s in local if s['source'] == 'optional')} optional)")
print(f" {len(external)} from {external_source}")
- # Breakdown by source
- by_source = Counter(s["source"] for s in all_skills)
print("By source:")
for src, count in by_source.most_common():
print(f" {src}: {count}")
+ if index_meta and index_meta.get("indexGeneratedAt"):
+ print(f"Unified index built at: {index_meta['indexGeneratedAt']}")
if __name__ == "__main__":
diff --git a/website/src/pages/skills/index.tsx b/website/src/pages/skills/index.tsx
index 495fb35ca5d..0ef6f64abc2 100644
--- a/website/src/pages/skills/index.tsx
+++ b/website/src/pages/skills/index.tsx
@@ -1,6 +1,7 @@
import React, { useState, useMemo, useCallback, useRef, useEffect } from "react";
import Layout from "@theme/Layout";
import skills from "../../data/skills.json";
+import meta from "../../data/skills-meta.json";
import styles from "./styles.module.css";
interface Skill {
@@ -24,6 +25,33 @@ interface Skill {
const allSkills: Skill[] = skills as Skill[];
+interface IndexMeta {
+ extractedAt?: string;
+ indexGeneratedAt?: string;
+ totalSkills?: number;
+ externalSource?: string;
+ bySource?: Record
+ Catalog refreshed{" "} + + {formatRelativeTime( + indexMeta.indexGeneratedAt || indexMeta.extractedAt, + ) || "recently"} + + {" "}· auto-rebuilt twice daily +
+ )}