mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
feat(skills-hub): health checks, freshness badge, and a watchdog cron (#32345)
Layered safety so the Skills Hub at /docs/skills stays in sync without silent rot. Three pieces: 1. build_skills_index.py — refuses to ship a degenerate index. EXPECTED_FLOORS per source (skills.sh ≥100, lobehub ≥100, clawhub ≥50, official ≥50, github ≥30, browse-sh ≥50) and MIN_TOTAL=1500. Any source collapsing to zero (the silent OpenAI breakage that hid for weeks) now fails the workflow loud — broken index never reaches the live site. 2. extract-skills.py + the React page — visible freshness signal. Sidecar website/src/data/skills-meta.json carries the index's generated_at timestamp, plus per-source counts. Skills Hub renders a 'Catalog refreshed N hours ago · auto-rebuilt twice daily' line under the hero copy. If the cron stalls, users see the staleness immediately. 3. .github/workflows/skills-index-freshness.yml — watchdog cron. Every 4 hours, fetches the live /docs/api/skills-index.json, validates shape, checks age (>26h is stale), checks the same per-source floors, and opens (or appends to) a GitHub issue when anything is off. The issue is title-prefixed [skills-index-watchdog] so subsequent failures append a comment instead of spamming new issues. Net effect: - A silent regression like 'OpenAI tap moved its skills' now fails the build instead of shipping a quietly broken catalog. - A stuck cron (like the landingpage breakage that ran red for weeks) now files an issue within 4 hours. - Users see how fresh the catalog is on the page itself. Test plan: - Local: built skills-meta.json from the live index → 'Catalog refreshed N minutes ago' rendered correctly in the static HTML. - Probe logic dry-run against the live index: total=2456, all 6 sources above floor, age 0.1h — issues=NONE. - Triggered skills-index.yml manually; both jobs green, deploy-site.yml dispatch fired.
This commit is contained in:
parent
cea87d9139
commit
d8703e27f5
5 changed files with 273 additions and 8 deletions
149
.github/workflows/skills-index-freshness.yml
vendored
Normal file
149
.github/workflows/skills-index-freshness.yml
vendored
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
name: Skills Index Freshness Check
|
||||
|
||||
# Belt-and-suspenders for the twice-daily build_skills_index pipeline.
|
||||
# If the live /docs/api/skills-index.json ever goes more than 26 hours
|
||||
# stale OR the file disappears entirely OR a major source has collapsed,
|
||||
# this workflow opens a GitHub issue so we hear about it before users do.
|
||||
#
|
||||
# Triggered every 4 hours so we catch a stuck cron within one tick.
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 */4 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
check-freshness:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Probe live index
|
||||
id: probe
|
||||
run: |
|
||||
set -e
|
||||
URL="https://hermes-agent.nousresearch.com/docs/api/skills-index.json"
|
||||
echo "Probing $URL"
|
||||
# -L follows redirects; -f fails on HTTP errors; -s suppresses progress
|
||||
if ! curl -fsSL -o /tmp/skills-index.json "$URL"; then
|
||||
echo "status=fetch-failed" >> "$GITHUB_OUTPUT"
|
||||
echo "detail=Could not download $URL" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
# Validate + extract generated_at and per-source counts
|
||||
python3 <<'PY' >> "$GITHUB_OUTPUT"
|
||||
import json, sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
try:
|
||||
with open("/tmp/skills-index.json") as f:
|
||||
data = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"status=parse-failed")
|
||||
print(f"detail=JSON decode error: {e}")
|
||||
sys.exit(0)
|
||||
|
||||
generated_at = data.get("generated_at", "")
|
||||
total = data.get("skill_count", 0)
|
||||
skills = data.get("skills", [])
|
||||
if not isinstance(skills, list):
|
||||
print("status=invalid-shape")
|
||||
print(f"detail=skills field is not a list (got {type(skills).__name__})")
|
||||
sys.exit(0)
|
||||
|
||||
# Per-source counts
|
||||
from collections import Counter
|
||||
by_src = Counter(s.get("source", "") for s in skills)
|
||||
|
||||
# Freshness
|
||||
age_hours = None
|
||||
try:
|
||||
ts = datetime.fromisoformat(generated_at.replace("Z", "+00:00"))
|
||||
age_hours = (datetime.now(timezone.utc) - ts).total_seconds() / 3600
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Floors — same as build_skills_index.py EXPECTED_FLOORS.
|
||||
floors = {
|
||||
"skills.sh": 100,
|
||||
"lobehub": 100,
|
||||
"clawhub": 50,
|
||||
"official": 50,
|
||||
"github": 30,
|
||||
"browse-sh": 50,
|
||||
}
|
||||
issues = []
|
||||
if age_hours is not None and age_hours > 26:
|
||||
issues.append(f"Index is {age_hours:.1f}h old (limit 26h)")
|
||||
for src, floor in floors.items():
|
||||
count = by_src.get(src, 0)
|
||||
if src == "skills.sh":
|
||||
count = by_src.get("skills.sh", 0) + by_src.get("skills-sh", 0)
|
||||
if count < floor:
|
||||
issues.append(f"{src}: {count} < {floor}")
|
||||
if total < 1500:
|
||||
issues.append(f"total skills: {total} < 1500")
|
||||
|
||||
if issues:
|
||||
detail = "; ".join(issues)
|
||||
print("status=degraded")
|
||||
# GITHUB_OUTPUT doesn't allow newlines without explicit delimiter
|
||||
print(f"detail={detail}")
|
||||
else:
|
||||
print("status=ok")
|
||||
print(f"detail=Index OK — {total} skills, generated {generated_at}")
|
||||
by_summary = ", ".join(f"{k}={v}" for k, v in by_src.most_common(8))
|
||||
print(f"summary={by_summary}")
|
||||
PY
|
||||
|
||||
- name: Report status
|
||||
run: |
|
||||
echo "Probe status: ${{ steps.probe.outputs.status }}"
|
||||
echo "Detail: ${{ steps.probe.outputs.detail }}"
|
||||
if [ -n "${{ steps.probe.outputs.summary }}" ]; then
|
||||
echo "Summary: ${{ steps.probe.outputs.summary }}"
|
||||
fi
|
||||
|
||||
- name: Open issue on degraded / failed probe
|
||||
if: steps.probe.outputs.status != 'ok'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
STATUS: ${{ steps.probe.outputs.status }}
|
||||
DETAIL: ${{ steps.probe.outputs.detail }}
|
||||
run: |
|
||||
# Find existing open issue by title prefix so we don't spam — we
|
||||
# append a comment instead of opening a new one each tick.
|
||||
TITLE_PREFIX="[skills-index-watchdog]"
|
||||
existing=$(gh issue list \
|
||||
--repo "${{ github.repository }}" \
|
||||
--state open \
|
||||
--search "in:title \"$TITLE_PREFIX\"" \
|
||||
--json number,title \
|
||||
--jq '.[] | select(.title | startswith("'"$TITLE_PREFIX"'")) | .number' \
|
||||
| head -1)
|
||||
BODY="Automated freshness probe failed.
|
||||
|
||||
**Status:** \`$STATUS\`
|
||||
**Detail:** $DETAIL
|
||||
|
||||
The Skills Hub at /docs/skills depends on \`/docs/api/skills-index.json\`.
|
||||
The unified index is rebuilt by \`.github/workflows/skills-index.yml\` (cron 6/18 UTC)
|
||||
and \`.github/workflows/deploy-site.yml\` (on every push affecting website/skills).
|
||||
If this issue keeps reopening, check the latest runs:
|
||||
|
||||
- https://github.com/${{ github.repository }}/actions/workflows/skills-index.yml
|
||||
- https://github.com/${{ github.repository }}/actions/workflows/deploy-site.yml
|
||||
|
||||
This issue was opened by \`.github/workflows/skills-index-freshness.yml\`. Close it once the underlying problem is fixed; the next probe will reopen if it's still broken."
|
||||
if [ -n "$existing" ]; then
|
||||
echo "Appending to existing issue #$existing"
|
||||
gh issue comment "$existing" --repo "${{ github.repository }}" --body "Probe still failing at $(date -u +%FT%TZ): \`$STATUS\` — $DETAIL"
|
||||
else
|
||||
echo "Opening new watchdog issue"
|
||||
gh issue create --repo "${{ github.repository }}" \
|
||||
--title "$TITLE_PREFIX Skills index is stale or degraded ($STATUS)" \
|
||||
--body "$BODY"
|
||||
fi
|
||||
|
|
@ -322,6 +322,50 @@ def main():
|
|||
extra = f" ({resolved} resolved)" if resolved else ""
|
||||
print(f" {src}: {count}{extra}")
|
||||
|
||||
# Health check: catch silent breakage early. Every source listed below
|
||||
# has historically returned at least `floor` entries; a zero (or near-
|
||||
# zero) result almost certainly means a tap path moved, an API changed,
|
||||
# or rate limiting kicked in. Failing here forces a human look before
|
||||
# the broken index reaches the live docs.
|
||||
EXPECTED_FLOORS = {
|
||||
"skills.sh": 100,
|
||||
"lobehub": 100,
|
||||
"clawhub": 50,
|
||||
"official": 50,
|
||||
"github": 30, # collapsed across all GitHub taps
|
||||
"browse-sh": 50,
|
||||
}
|
||||
health_errors = []
|
||||
for src, floor in EXPECTED_FLOORS.items():
|
||||
# 'skills-sh' and 'skills.sh' are the same source; both labels exist.
|
||||
count = by_source.get(src, 0)
|
||||
if src == "skills.sh":
|
||||
count = by_source.get("skills.sh", 0) + by_source.get("skills-sh", 0)
|
||||
if count < floor:
|
||||
health_errors.append(f" {src}: {count} < expected floor {floor}")
|
||||
|
||||
MIN_TOTAL = 1500
|
||||
if len(deduped) < MIN_TOTAL:
|
||||
health_errors.append(
|
||||
f" total: {len(deduped)} < expected floor {MIN_TOTAL}"
|
||||
)
|
||||
|
||||
if health_errors:
|
||||
print(
|
||||
"\nERROR: skills index health check failed — refusing to ship "
|
||||
"a degenerate index. Investigate the following sources:",
|
||||
file=sys.stderr,
|
||||
)
|
||||
for line in health_errors:
|
||||
print(line, file=sys.stderr)
|
||||
print(
|
||||
"\nIf the drop is expected (e.g. a hub is genuinely shutting "
|
||||
"down), lower the floor in scripts/build_skills_index.py "
|
||||
"EXPECTED_FLOORS in the same PR.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
1
website/.gitignore
vendored
1
website/.gitignore
vendored
|
|
@ -8,6 +8,7 @@
|
|||
.docusaurus
|
||||
.cache-loader
|
||||
src/data/skills.json
|
||||
src/data/skills-meta.json
|
||||
static/llms.txt
|
||||
static/llms-full.txt
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ the unified index existed).
|
|||
import json
|
||||
import os
|
||||
from collections import Counter
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import yaml
|
||||
|
||||
|
|
@ -32,6 +33,7 @@ LOCAL_SKILL_DIRS = [
|
|||
UNIFIED_INDEX_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
|
||||
LEGACY_INDEX_CACHE_DIR = os.path.join(REPO_ROOT, "skills", "index-cache")
|
||||
OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills.json")
|
||||
META_OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills-meta.json")
|
||||
|
||||
CATEGORY_LABELS = {
|
||||
"apple": "Apple",
|
||||
|
|
@ -280,19 +282,32 @@ def _label_for_github_identifier(identifier: str) -> str:
|
|||
|
||||
|
||||
def extract_unified_index_skills():
|
||||
"""Read website/static/api/skills-index.json — the canonical multi-source index."""
|
||||
"""Read website/static/api/skills-index.json — the canonical multi-source index.
|
||||
|
||||
Returns ``(skills, meta)`` where ``meta`` carries the index's
|
||||
``generated_at`` timestamp and total count so the Skills Hub page can
|
||||
show a "Last refreshed …" badge. Returns ``(None, None)`` when the
|
||||
index file is absent or malformed (caller falls back to the legacy
|
||||
cache).
|
||||
"""
|
||||
if not os.path.isfile(UNIFIED_INDEX_PATH):
|
||||
return None
|
||||
return None, None
|
||||
|
||||
try:
|
||||
with open(UNIFIED_INDEX_PATH, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
print(f"[extract-skills] Failed to read unified index: {e}")
|
||||
return None
|
||||
return None, None
|
||||
|
||||
if not isinstance(data, dict) or "skills" not in data:
|
||||
return None
|
||||
return None, None
|
||||
|
||||
meta = {
|
||||
"indexGeneratedAt": data.get("generated_at", ""),
|
||||
"indexSkillCount": data.get("skill_count", 0),
|
||||
"indexVersion": data.get("version", 0),
|
||||
}
|
||||
|
||||
out = []
|
||||
for entry in data.get("skills", []):
|
||||
|
|
@ -352,7 +367,7 @@ def extract_unified_index_skills():
|
|||
"installCmd": install_cmd,
|
||||
})
|
||||
|
||||
return out
|
||||
return out, meta
|
||||
|
||||
|
||||
def extract_legacy_cache_skills():
|
||||
|
|
@ -490,13 +505,14 @@ def _consolidate_small_categories(skills: list) -> list:
|
|||
def main():
|
||||
local = extract_local_skills()
|
||||
|
||||
unified = extract_unified_index_skills()
|
||||
unified, index_meta = extract_unified_index_skills()
|
||||
if unified is not None:
|
||||
external = unified
|
||||
external_source = "unified index"
|
||||
else:
|
||||
external = extract_legacy_cache_skills()
|
||||
external_source = "legacy index-cache"
|
||||
index_meta = None
|
||||
print(
|
||||
f"[extract-skills] WARNING: unified index not found at "
|
||||
f"{UNIFIED_INDEX_PATH}; falling back to {external_source}. "
|
||||
|
|
@ -517,16 +533,32 @@ def main():
|
|||
with open(OUTPUT, "w", encoding="utf-8") as f:
|
||||
json.dump(all_skills, f, indent=2)
|
||||
|
||||
# Sidecar meta file so the page can render a "Last refreshed" badge
|
||||
# without changing the shape of skills.json.
|
||||
by_source = Counter(s["source"] for s in all_skills)
|
||||
meta = {
|
||||
"extractedAt": datetime.now(timezone.utc).isoformat(),
|
||||
"totalSkills": len(all_skills),
|
||||
"localSkills": len(local),
|
||||
"externalSkills": len(external),
|
||||
"externalSource": external_source,
|
||||
"bySource": dict(by_source.most_common()),
|
||||
}
|
||||
if index_meta:
|
||||
meta.update(index_meta)
|
||||
with open(META_OUTPUT, "w", encoding="utf-8") as f:
|
||||
json.dump(meta, f, indent=2)
|
||||
|
||||
print(f"Extracted {len(all_skills)} skills to {OUTPUT}")
|
||||
print(f" {len(local)} local ({sum(1 for s in local if s['source'] == 'built-in')} built-in, "
|
||||
f"{sum(1 for s in local if s['source'] == 'optional')} optional)")
|
||||
print(f" {len(external)} from {external_source}")
|
||||
|
||||
# Breakdown by source
|
||||
by_source = Counter(s["source"] for s in all_skills)
|
||||
print("By source:")
|
||||
for src, count in by_source.most_common():
|
||||
print(f" {src}: {count}")
|
||||
if index_meta and index_meta.get("indexGeneratedAt"):
|
||||
print(f"Unified index built at: {index_meta['indexGeneratedAt']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import React, { useState, useMemo, useCallback, useRef, useEffect } from "react";
|
||||
import Layout from "@theme/Layout";
|
||||
import skills from "../../data/skills.json";
|
||||
import meta from "../../data/skills-meta.json";
|
||||
import styles from "./styles.module.css";
|
||||
|
||||
interface Skill {
|
||||
|
|
@ -24,6 +25,33 @@ interface Skill {
|
|||
|
||||
const allSkills: Skill[] = skills as Skill[];
|
||||
|
||||
interface IndexMeta {
|
||||
extractedAt?: string;
|
||||
indexGeneratedAt?: string;
|
||||
totalSkills?: number;
|
||||
externalSource?: string;
|
||||
bySource?: Record<string, number>;
|
||||
}
|
||||
const indexMeta: IndexMeta = meta as IndexMeta;
|
||||
|
||||
function formatRelativeTime(iso?: string): string | null {
|
||||
if (!iso) return null;
|
||||
const then = new Date(iso).getTime();
|
||||
if (!Number.isFinite(then)) return null;
|
||||
const now = Date.now();
|
||||
const diffMs = now - then;
|
||||
if (diffMs < 0) return "just now";
|
||||
const mins = Math.floor(diffMs / 60_000);
|
||||
if (mins < 1) return "just now";
|
||||
if (mins < 60) return `${mins} minute${mins === 1 ? "" : "s"} ago`;
|
||||
const hours = Math.floor(mins / 60);
|
||||
if (hours < 24) return `${hours} hour${hours === 1 ? "" : "s"} ago`;
|
||||
const days = Math.floor(hours / 24);
|
||||
if (days < 30) return `${days} day${days === 1 ? "" : "s"} ago`;
|
||||
const months = Math.floor(days / 30);
|
||||
return `${months} month${months === 1 ? "" : "s"} ago`;
|
||||
}
|
||||
|
||||
const CATEGORY_ICONS: Record<string, string> = {
|
||||
apple: "\u{f179}",
|
||||
"autonomous-ai-agents": "\u{1F916}",
|
||||
|
|
@ -487,6 +515,17 @@ export default function SkillsDashboard() {
|
|||
<strong className={styles.heroAccent}>{allSkills.length}</strong> skills
|
||||
across {sources.length - 1} registries
|
||||
</p>
|
||||
{(indexMeta?.indexGeneratedAt || indexMeta?.extractedAt) && (
|
||||
<p className={styles.heroSub} style={{ fontSize: "0.85rem", opacity: 0.75 }}>
|
||||
Catalog refreshed{" "}
|
||||
<span title={indexMeta.indexGeneratedAt || indexMeta.extractedAt}>
|
||||
{formatRelativeTime(
|
||||
indexMeta.indexGeneratedAt || indexMeta.extractedAt,
|
||||
) || "recently"}
|
||||
</span>
|
||||
{" "}· auto-rebuilt twice daily
|
||||
</p>
|
||||
)}
|
||||
|
||||
<div className={styles.statsRow}>
|
||||
<StatCard
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue