mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 09:21:36 +00:00
fix(docs): reuse healthy skills index during Pages deploys (#45616)
This commit is contained in:
parent
d206e1f51d
commit
4373e802a1
2 changed files with 84 additions and 17 deletions
99
.github/workflows/deploy-site.yml
vendored
99
.github/workflows/deploy-site.yml
vendored
|
|
@ -11,8 +11,20 @@ on:
|
|||
- 'optional-skills/**'
|
||||
- '.github/workflows/deploy-site.yml'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
skills_index_run_id:
|
||||
description: 'Optional Build Skills Index run ID whose skills-index artifact should be deployed'
|
||||
required: false
|
||||
type: string
|
||||
rebuild_skills_index:
|
||||
description: 'Force a fresh multi-source crawl instead of reusing the latest healthy index'
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
actions: read
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
|
|
@ -55,26 +67,81 @@ jobs:
|
|||
- name: Install PyYAML for skill extraction
|
||||
run: pip install pyyaml==6.0.2 httpx==0.28.1
|
||||
|
||||
- name: Build skills index (unified multi-source catalog)
|
||||
- name: Prepare skills index (unified multi-source catalog)
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
SKILLS_INDEX_RUN_ID: ${{ github.event.inputs.skills_index_run_id || '' }}
|
||||
REBUILD_SKILLS_INDEX: ${{ github.event.inputs.rebuild_skills_index || 'false' }}
|
||||
run: |
|
||||
# Rebuild the unified catalog. The file is gitignored, so a fresh
|
||||
# checkout starts without it and we want the freshest crawl in
|
||||
# every deploy.
|
||||
# The unified external catalog is expensive to crawl and can burn
|
||||
# through the repository installation's GitHub API quota when several
|
||||
# docs deploys land close together. Normal docs deploys therefore
|
||||
# reuse the latest healthy catalog: first the artifact from a
|
||||
# scheduled skills-index run, then the currently live index. Only a
|
||||
# manual force rebuild does a fresh crawl here.
|
||||
#
|
||||
# This MUST be fatal. build_skills_index.py runs a health check and
|
||||
# exits non-zero WITHOUT writing the output file when a source
|
||||
# collapses (e.g. a GitHub API rate limit zeroes the github /
|
||||
# claude-marketplace / well-known taps all at once). Letting the
|
||||
# deploy continue would either (a) ship a degenerate index missing
|
||||
# whole hubs — the June 2026 regression where OpenAI/Anthropic/
|
||||
# HuggingFace/NVIDIA tabs vanished — or (b) fall through to a
|
||||
# local-only catalog. Failing here keeps the last good deployment
|
||||
# live (GitHub Pages serves the previous build) instead of
|
||||
# publishing a broken catalog. Re-run the workflow once the
|
||||
# transient rate limit clears.
|
||||
# If we do crawl, the build remains fatal. build_skills_index.py runs
|
||||
# the health check BEFORE writing and exits non-zero on source
|
||||
# collapse, keeping the last good Pages deployment live instead of
|
||||
# publishing a degenerate catalog.
|
||||
set -euo pipefail
|
||||
INDEX_PATH="website/static/api/skills-index.json"
|
||||
mkdir -p "$(dirname "$INDEX_PATH")"
|
||||
|
||||
validate_index() {
|
||||
python3 - "$INDEX_PATH" <<'PY'
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
path = Path(sys.argv[1])
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception as exc:
|
||||
print(f"invalid skills index JSON: {exc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
skills = data.get("skills")
|
||||
if not isinstance(skills, list) or len(skills) < 1500:
|
||||
count = len(skills) if isinstance(skills, list) else "missing"
|
||||
print(f"skills index too small: {count}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(f"skills index ready: {len(skills)} skills")
|
||||
PY
|
||||
}
|
||||
|
||||
if [ "$REBUILD_SKILLS_INDEX" = "true" ]; then
|
||||
python3 scripts/build_skills_index.py
|
||||
validate_index
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ -n "$SKILLS_INDEX_RUN_ID" ]; then
|
||||
tmpdir="$(mktemp -d)"
|
||||
echo "Downloading skills-index artifact from run $SKILLS_INDEX_RUN_ID"
|
||||
if gh run download "$SKILLS_INDEX_RUN_ID" --name skills-index --dir "$tmpdir"; then
|
||||
candidate="$(find "$tmpdir" -name skills-index.json -type f | head -n 1 || true)"
|
||||
if [ -n "$candidate" ]; then
|
||||
cp "$candidate" "$INDEX_PATH"
|
||||
if validate_index; then
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
echo "::warning::Could not use skills-index artifact from run $SKILLS_INDEX_RUN_ID; trying live index"
|
||||
fi
|
||||
|
||||
echo "Downloading currently live skills index"
|
||||
if curl -fsSL --retry 3 --retry-delay 5 \
|
||||
"https://hermes-agent.nousresearch.com/docs/api/skills-index.json" \
|
||||
-o "$INDEX_PATH" && validate_index; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "::warning::Live skills index unavailable or unhealthy; falling back to a fresh crawl"
|
||||
rm -f "$INDEX_PATH"
|
||||
python3 scripts/build_skills_index.py
|
||||
validate_index
|
||||
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
|
|
|||
2
.github/workflows/skills-index.yml
vendored
2
.github/workflows/skills-index.yml
vendored
|
|
@ -53,4 +53,4 @@ jobs:
|
|||
- name: Trigger Deploy Site workflow
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: gh workflow run deploy-site.yml --repo ${{ github.repository }}
|
||||
run: gh workflow run deploy-site.yml --repo ${{ github.repository }} -f skills_index_run_id=${{ github.run_id }}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue