mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(skills): centralized skills index — eliminate GitHub API calls for search/install
Add a CI-built skills index served from the docs site. The index is crawled daily by GitHub Actions, resolves all GitHub paths upfront, and is cached locally by the client. When the index is available: - Search uses the cached index (0 GitHub API calls, was 23+) - Install uses resolved paths from index (6 API calls for file downloads only, was 31-45 for discovery + downloads) Total: 68 → 6 GitHub API calls for a typical search + install flow. Unauthenticated users (60 req/hr) can now search and install without hitting rate limits. Components: - scripts/build_skills_index.py: Crawl all sources (skills.sh, GitHub taps, official, clawhub, lobehub), batch-resolve GitHub paths via tree API, output JSON index - tools/skills_hub.py: HermesIndexSource class — search/fetch/inspect backed by the index, with lazy GitHubSource for file downloads - parallel_search_sources() skips external API sources when index is available (0 GitHub calls for search) - .github/workflows/skills-index.yml: twice-daily CI build + deploy - .github/workflows/deploy-site.yml: also builds index during docs deploy Graceful degradation: when the index is unavailable (first run, network down, stale), all methods return empty/None and downstream sources handle the request via direct API as before.
This commit is contained in:
parent
7e0e5ea03b
commit
76019320fb
5 changed files with 670 additions and 1 deletions
10
.github/workflows/deploy-site.yml
vendored
10
.github/workflows/deploy-site.yml
vendored
|
|
@ -41,11 +41,19 @@ jobs:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
|
|
||||||
- name: Install PyYAML for skill extraction
|
- name: Install PyYAML for skill extraction
|
||||||
run: pip install pyyaml
|
run: pip install pyyaml httpx
|
||||||
|
|
||||||
- name: Extract skill metadata for dashboard
|
- name: Extract skill metadata for dashboard
|
||||||
run: python3 website/scripts/extract-skills.py
|
run: python3 website/scripts/extract-skills.py
|
||||||
|
|
||||||
|
- name: Build skills index (if not already present)
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
run: |
|
||||||
|
if [ ! -f website/static/api/skills-index.json ]; then
|
||||||
|
python3 scripts/build_skills_index.py || echo "Skills index build failed (non-fatal)"
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: npm ci
|
run: npm ci
|
||||||
working-directory: website
|
working-directory: website
|
||||||
|
|
|
||||||
101
.github/workflows/skills-index.yml
vendored
Normal file
101
.github/workflows/skills-index.yml
vendored
Normal file
|
|
@ -0,0 +1,101 @@
|
||||||
|
name: Build Skills Index
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# Run twice daily: 6 AM and 6 PM UTC
|
||||||
|
- cron: '0 6,18 * * *'
|
||||||
|
workflow_dispatch: # Manual trigger
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
paths:
|
||||||
|
- 'scripts/build_skills_index.py'
|
||||||
|
- '.github/workflows/skills-index.yml'
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-index:
|
||||||
|
# Only run on the upstream repository, not on forks
|
||||||
|
if: github.repository == 'NousResearch/hermes-agent'
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: pip install httpx pyyaml
|
||||||
|
|
||||||
|
- name: Build skills index
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
run: python scripts/build_skills_index.py
|
||||||
|
|
||||||
|
- name: Upload index artifact
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: skills-index
|
||||||
|
path: website/static/api/skills-index.json
|
||||||
|
retention-days: 7
|
||||||
|
|
||||||
|
deploy-with-index:
|
||||||
|
needs: build-index
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
pages: write
|
||||||
|
id-token: write
|
||||||
|
environment:
|
||||||
|
name: github-pages
|
||||||
|
url: ${{ steps.deploy.outputs.page_url }}
|
||||||
|
# Only deploy on schedule or manual trigger (not on every push to the script)
|
||||||
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: skills-index
|
||||||
|
path: website/static/api/
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
cache: npm
|
||||||
|
cache-dependency-path: website/package-lock.json
|
||||||
|
|
||||||
|
- uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Install PyYAML for skill extraction
|
||||||
|
run: pip install pyyaml
|
||||||
|
|
||||||
|
- name: Extract skill metadata for dashboard
|
||||||
|
run: python3 website/scripts/extract-skills.py
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
working-directory: website
|
||||||
|
|
||||||
|
- name: Build Docusaurus
|
||||||
|
run: npm run build
|
||||||
|
working-directory: website
|
||||||
|
|
||||||
|
- name: Stage deployment
|
||||||
|
run: |
|
||||||
|
mkdir -p _site/docs
|
||||||
|
cp -r landingpage/* _site/
|
||||||
|
cp -r website/build/* _site/docs/
|
||||||
|
echo "hermes-agent.nousresearch.com" > _site/CNAME
|
||||||
|
|
||||||
|
- name: Upload artifact
|
||||||
|
uses: actions/upload-pages-artifact@v3
|
||||||
|
with:
|
||||||
|
path: _site
|
||||||
|
|
||||||
|
- name: Deploy to GitHub Pages
|
||||||
|
id: deploy
|
||||||
|
uses: actions/deploy-pages@v4
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -58,3 +58,4 @@ mini-swe-agent/
|
||||||
# Nix
|
# Nix
|
||||||
.direnv/
|
.direnv/
|
||||||
result
|
result
|
||||||
|
website/static/api/skills-index.json
|
||||||
|
|
|
||||||
325
scripts/build_skills_index.py
Normal file
325
scripts/build_skills_index.py
Normal file
|
|
@ -0,0 +1,325 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Build the Hermes Skills Index — a centralized JSON catalog of all skills.
|
||||||
|
|
||||||
|
This script crawls every skill source (skills.sh, GitHub taps, official,
|
||||||
|
clawhub, lobehub, claude-marketplace) and writes a JSON index with resolved
|
||||||
|
GitHub paths. The index is served as a static file on the docs site so that
|
||||||
|
`hermes skills search/install` can use it without hitting the GitHub API.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Local (uses gh CLI or GITHUB_TOKEN for auth)
|
||||||
|
python scripts/build_skills_index.py
|
||||||
|
|
||||||
|
# CI (set GITHUB_TOKEN as secret)
|
||||||
|
GITHUB_TOKEN=ghp_... python scripts/build_skills_index.py
|
||||||
|
|
||||||
|
Output: website/static/api/skills-index.json
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from collections import defaultdict
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
# Allow importing from repo root
|
||||||
|
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
sys.path.insert(0, REPO_ROOT)
|
||||||
|
|
||||||
|
# Ensure HERMES_HOME is set (needed by tools/skills_hub.py imports)
|
||||||
|
os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
|
||||||
|
|
||||||
|
from tools.skills_hub import (
|
||||||
|
GitHubAuth,
|
||||||
|
GitHubSource,
|
||||||
|
SkillsShSource,
|
||||||
|
OptionalSkillSource,
|
||||||
|
WellKnownSkillSource,
|
||||||
|
ClawHubSource,
|
||||||
|
ClaudeMarketplaceSource,
|
||||||
|
LobeHubSource,
|
||||||
|
SkillMeta,
|
||||||
|
)
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
|
||||||
|
INDEX_VERSION = 1
|
||||||
|
|
||||||
|
|
||||||
|
def _meta_to_dict(meta: SkillMeta) -> dict:
|
||||||
|
"""Convert a SkillMeta to a serializable dict."""
|
||||||
|
return {
|
||||||
|
"name": meta.name,
|
||||||
|
"description": meta.description,
|
||||||
|
"source": meta.source,
|
||||||
|
"identifier": meta.identifier,
|
||||||
|
"trust_level": meta.trust_level,
|
||||||
|
"repo": meta.repo or "",
|
||||||
|
"path": meta.path or "",
|
||||||
|
"tags": meta.tags or [],
|
||||||
|
"extra": meta.extra or {},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def crawl_source(source, source_name: str, limit: int) -> list:
|
||||||
|
"""Crawl a single source and return skill dicts."""
|
||||||
|
print(f" Crawling {source_name}...", flush=True)
|
||||||
|
start = time.time()
|
||||||
|
try:
|
||||||
|
results = source.search("", limit=limit)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Error crawling {source_name}: {e}", file=sys.stderr)
|
||||||
|
return []
|
||||||
|
skills = [_meta_to_dict(m) for m in results]
|
||||||
|
elapsed = time.time() - start
|
||||||
|
print(f" {source_name}: {len(skills)} skills ({elapsed:.1f}s)", flush=True)
|
||||||
|
return skills
|
||||||
|
|
||||||
|
|
||||||
|
def crawl_skills_sh(source: SkillsShSource) -> list:
|
||||||
|
"""Crawl skills.sh using popular queries for broad coverage."""
|
||||||
|
print(" Crawling skills.sh (popular queries)...", flush=True)
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
queries = [
|
||||||
|
"", # featured
|
||||||
|
"react", "python", "web", "api", "database", "docker",
|
||||||
|
"testing", "scraping", "design", "typescript", "git",
|
||||||
|
"aws", "security", "data", "ml", "ai", "devops",
|
||||||
|
"frontend", "backend", "mobile", "cli", "documentation",
|
||||||
|
"kubernetes", "terraform", "rust", "go", "java",
|
||||||
|
]
|
||||||
|
|
||||||
|
all_skills: dict[str, dict] = {}
|
||||||
|
for query in queries:
|
||||||
|
try:
|
||||||
|
results = source.search(query, limit=50)
|
||||||
|
for meta in results:
|
||||||
|
entry = _meta_to_dict(meta)
|
||||||
|
if entry["identifier"] not in all_skills:
|
||||||
|
all_skills[entry["identifier"]] = entry
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Warning: skills.sh search '{query}' failed: {e}",
|
||||||
|
file=sys.stderr)
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
|
print(f" skills.sh: {len(all_skills)} unique skills ({elapsed:.1f}s)",
|
||||||
|
flush=True)
|
||||||
|
return list(all_skills.values())
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_repo_tree(repo: str, auth: GitHubAuth) -> list:
|
||||||
|
"""Fetch the recursive tree for a repo. Returns list of tree entries."""
|
||||||
|
headers = auth.get_headers()
|
||||||
|
try:
|
||||||
|
resp = httpx.get(
|
||||||
|
f"https://api.github.com/repos/{repo}",
|
||||||
|
headers=headers, timeout=15, follow_redirects=True,
|
||||||
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return []
|
||||||
|
branch = resp.json().get("default_branch", "main")
|
||||||
|
|
||||||
|
resp = httpx.get(
|
||||||
|
f"https://api.github.com/repos/{repo}/git/trees/{branch}",
|
||||||
|
params={"recursive": "1"},
|
||||||
|
headers=headers, timeout=30, follow_redirects=True,
|
||||||
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return []
|
||||||
|
data = resp.json()
|
||||||
|
if data.get("truncated"):
|
||||||
|
return []
|
||||||
|
return data.get("tree", [])
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def batch_resolve_paths(skills: list, auth: GitHubAuth) -> list:
|
||||||
|
"""Resolve GitHub paths for skills.sh entries using batch tree lookups.
|
||||||
|
|
||||||
|
Instead of resolving each skill individually (N×M API calls), we:
|
||||||
|
1. Group skills by repo
|
||||||
|
2. Fetch one tree per repo (2 API calls per repo)
|
||||||
|
3. Find all SKILL.md files in the tree
|
||||||
|
4. Match skills to their resolved paths
|
||||||
|
"""
|
||||||
|
# Filter to skills.sh entries that need resolution
|
||||||
|
skills_sh = [s for s in skills if s["source"] in ("skills.sh", "skills-sh")]
|
||||||
|
if not skills_sh:
|
||||||
|
return skills
|
||||||
|
|
||||||
|
print(f" Resolving paths for {len(skills_sh)} skills.sh entries...",
|
||||||
|
flush=True)
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
# Group by repo
|
||||||
|
by_repo: dict[str, list] = defaultdict(list)
|
||||||
|
for s in skills_sh:
|
||||||
|
repo = s.get("repo", "")
|
||||||
|
if repo:
|
||||||
|
by_repo[repo].append(s)
|
||||||
|
|
||||||
|
print(f" {len(by_repo)} unique repos to scan", flush=True)
|
||||||
|
|
||||||
|
resolved_count = 0
|
||||||
|
|
||||||
|
# Fetch trees in parallel (up to 6 concurrent)
|
||||||
|
def _resolve_repo(repo: str, entries: list):
|
||||||
|
tree = _fetch_repo_tree(repo, auth)
|
||||||
|
if not tree:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Find all SKILL.md paths in this repo
|
||||||
|
skill_paths = {} # skill_dir_name -> full_path
|
||||||
|
for item in tree:
|
||||||
|
if item.get("type") != "blob":
|
||||||
|
continue
|
||||||
|
path = item.get("path", "")
|
||||||
|
if path.endswith("/SKILL.md"):
|
||||||
|
skill_dir = path[: -len("/SKILL.md")]
|
||||||
|
dir_name = skill_dir.split("/")[-1]
|
||||||
|
skill_paths[dir_name.lower()] = f"{repo}/{skill_dir}"
|
||||||
|
|
||||||
|
# Also check SKILL.md frontmatter name if we can match by path
|
||||||
|
# For now, just index by directory name
|
||||||
|
elif path == "SKILL.md":
|
||||||
|
# Root-level SKILL.md
|
||||||
|
skill_paths["_root_"] = f"{repo}"
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
for entry in entries:
|
||||||
|
# Try to match the skill's name/path to a tree entry
|
||||||
|
skill_name = entry.get("name", "").lower()
|
||||||
|
skill_path = entry.get("path", "").lower()
|
||||||
|
identifier = entry.get("identifier", "")
|
||||||
|
|
||||||
|
# Extract the skill token from the identifier
|
||||||
|
# e.g. "skills-sh/d4vinci/scrapling/scrapling-official" -> "scrapling-official"
|
||||||
|
parts = identifier.replace("skills-sh/", "").replace("skills.sh/", "")
|
||||||
|
skill_token = parts.split("/")[-1].lower() if "/" in parts else ""
|
||||||
|
|
||||||
|
# Try matching in order of likelihood
|
||||||
|
for candidate in [skill_token, skill_name, skill_path]:
|
||||||
|
if not candidate:
|
||||||
|
continue
|
||||||
|
matched = skill_paths.get(candidate)
|
||||||
|
if matched:
|
||||||
|
entry["resolved_github_id"] = matched
|
||||||
|
count += 1
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Try fuzzy: skill_token with common transformations
|
||||||
|
for tree_name, tree_path in skill_paths.items():
|
||||||
|
if (skill_token and (
|
||||||
|
tree_name.replace("-", "") == skill_token.replace("-", "")
|
||||||
|
or skill_token in tree_name
|
||||||
|
or tree_name in skill_token
|
||||||
|
)):
|
||||||
|
entry["resolved_github_id"] = tree_path
|
||||||
|
count += 1
|
||||||
|
break
|
||||||
|
|
||||||
|
return count
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=6) as pool:
|
||||||
|
futures = {
|
||||||
|
pool.submit(_resolve_repo, repo, entries): repo
|
||||||
|
for repo, entries in by_repo.items()
|
||||||
|
}
|
||||||
|
for future in as_completed(futures):
|
||||||
|
try:
|
||||||
|
resolved_count += future.result()
|
||||||
|
except Exception as e:
|
||||||
|
repo = futures[future]
|
||||||
|
print(f" Warning: {repo}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
|
print(f" Resolved {resolved_count}/{len(skills_sh)} paths ({elapsed:.1f}s)",
|
||||||
|
flush=True)
|
||||||
|
return skills
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Building Hermes Skills Index...", flush=True)
|
||||||
|
overall_start = time.time()
|
||||||
|
|
||||||
|
auth = GitHubAuth()
|
||||||
|
print(f"GitHub auth: {auth.auth_method()}")
|
||||||
|
if auth.auth_method() == "anonymous":
|
||||||
|
print("WARNING: No GitHub authentication — rate limit is 60/hr. "
|
||||||
|
"Set GITHUB_TOKEN for better results.", file=sys.stderr)
|
||||||
|
|
||||||
|
skills_sh_source = SkillsShSource(auth=auth)
|
||||||
|
sources = {
|
||||||
|
"official": OptionalSkillSource(),
|
||||||
|
"well-known": WellKnownSkillSource(),
|
||||||
|
"github": GitHubSource(auth=auth),
|
||||||
|
"clawhub": ClawHubSource(),
|
||||||
|
"claude-marketplace": ClaudeMarketplaceSource(auth=auth),
|
||||||
|
"lobehub": LobeHubSource(),
|
||||||
|
}
|
||||||
|
|
||||||
|
all_skills: list[dict] = []
|
||||||
|
|
||||||
|
# Crawl skills.sh
|
||||||
|
all_skills.extend(crawl_skills_sh(skills_sh_source))
|
||||||
|
|
||||||
|
# Crawl other sources in parallel
|
||||||
|
with ThreadPoolExecutor(max_workers=4) as pool:
|
||||||
|
futures = {}
|
||||||
|
for name, source in sources.items():
|
||||||
|
futures[pool.submit(crawl_source, source, name, 500)] = name
|
||||||
|
for future in as_completed(futures):
|
||||||
|
try:
|
||||||
|
all_skills.extend(future.result())
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Error: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Batch resolve GitHub paths for skills.sh entries
|
||||||
|
all_skills = batch_resolve_paths(all_skills, auth)
|
||||||
|
|
||||||
|
# Deduplicate by identifier
|
||||||
|
seen: dict[str, dict] = {}
|
||||||
|
for skill in all_skills:
|
||||||
|
key = skill["identifier"]
|
||||||
|
if key not in seen:
|
||||||
|
seen[key] = skill
|
||||||
|
deduped = list(seen.values())
|
||||||
|
|
||||||
|
# Sort
|
||||||
|
source_order = {"official": 0, "skills-sh": 1, "skills.sh": 1,
|
||||||
|
"github": 2, "well-known": 3, "clawhub": 4,
|
||||||
|
"claude-marketplace": 5, "lobehub": 6}
|
||||||
|
deduped.sort(key=lambda s: (source_order.get(s["source"], 99), s["name"]))
|
||||||
|
|
||||||
|
# Build index
|
||||||
|
index = {
|
||||||
|
"version": INDEX_VERSION,
|
||||||
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"skill_count": len(deduped),
|
||||||
|
"skills": deduped,
|
||||||
|
}
|
||||||
|
|
||||||
|
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||||
|
with open(OUTPUT_PATH, "w") as f:
|
||||||
|
json.dump(index, f, separators=(",", ":"), ensure_ascii=False)
|
||||||
|
|
||||||
|
elapsed = time.time() - overall_start
|
||||||
|
file_size = os.path.getsize(OUTPUT_PATH)
|
||||||
|
print(f"\nDone! {len(deduped)} skills indexed in {elapsed:.0f}s")
|
||||||
|
print(f"Output: {OUTPUT_PATH} ({file_size / 1024:.0f} KB)")
|
||||||
|
|
||||||
|
from collections import Counter
|
||||||
|
by_source = Counter(s["source"] for s in deduped)
|
||||||
|
for src, count in sorted(by_source.items(), key=lambda x: -x[1]):
|
||||||
|
resolved = sum(1 for s in deduped
|
||||||
|
if s["source"] == src and s.get("resolved_github_id"))
|
||||||
|
extra = f" ({resolved} resolved)" if resolved else ""
|
||||||
|
print(f" {src}: {count}{extra}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -2698,6 +2698,222 @@ def check_for_skill_updates(
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Hermes centralized index source
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
HERMES_INDEX_URL = "https://hermes-agent.nousresearch.com/docs/api/skills-index.json"
|
||||||
|
HERMES_INDEX_CACHE_FILE = INDEX_CACHE_DIR / "hermes-index.json"
|
||||||
|
HERMES_INDEX_TTL = 6 * 3600 # 6 hours
|
||||||
|
|
||||||
|
|
||||||
|
def _load_hermes_index() -> Optional[dict]:
|
||||||
|
"""Fetch the centralized skills index, with local cache.
|
||||||
|
|
||||||
|
The index is a JSON file hosted on the docs site, rebuilt daily by CI.
|
||||||
|
We cache it locally for HERMES_INDEX_TTL seconds to avoid repeated
|
||||||
|
downloads within a session.
|
||||||
|
"""
|
||||||
|
# Check local cache
|
||||||
|
if HERMES_INDEX_CACHE_FILE.exists():
|
||||||
|
try:
|
||||||
|
age = time.time() - HERMES_INDEX_CACHE_FILE.stat().st_mtime
|
||||||
|
if age < HERMES_INDEX_TTL:
|
||||||
|
return json.loads(HERMES_INDEX_CACHE_FILE.read_text())
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fetch from docs site
|
||||||
|
try:
|
||||||
|
resp = httpx.get(HERMES_INDEX_URL, timeout=15, follow_redirects=True)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
logger.debug("Hermes index fetch returned %d", resp.status_code)
|
||||||
|
return _load_stale_index_cache()
|
||||||
|
data = resp.json()
|
||||||
|
except (httpx.HTTPError, json.JSONDecodeError) as e:
|
||||||
|
logger.debug("Hermes index fetch failed: %s", e)
|
||||||
|
return _load_stale_index_cache()
|
||||||
|
|
||||||
|
# Validate structure
|
||||||
|
if not isinstance(data, dict) or "skills" not in data:
|
||||||
|
return _load_stale_index_cache()
|
||||||
|
|
||||||
|
# Cache locally
|
||||||
|
try:
|
||||||
|
HERMES_INDEX_CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
HERMES_INDEX_CACHE_FILE.write_text(json.dumps(data))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def _load_stale_index_cache() -> Optional[dict]:
|
||||||
|
"""Fall back to stale cache when the network fetch fails."""
|
||||||
|
if HERMES_INDEX_CACHE_FILE.exists():
|
||||||
|
try:
|
||||||
|
return json.loads(HERMES_INDEX_CACHE_FILE.read_text())
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class HermesIndexSource(SkillSource):
|
||||||
|
"""Skill source backed by the centralized Hermes Skills Index.
|
||||||
|
|
||||||
|
The index is a JSON catalog published to the docs site and rebuilt
|
||||||
|
daily by CI. It contains metadata + resolved GitHub paths for every
|
||||||
|
skill, eliminating the need for users to hit the GitHub API for
|
||||||
|
search or path discovery.
|
||||||
|
|
||||||
|
When the index is unavailable, all methods return empty / None so
|
||||||
|
downstream sources take over transparently.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, auth: GitHubAuth):
|
||||||
|
self._index: Optional[dict] = None
|
||||||
|
self._loaded = False
|
||||||
|
self.auth = auth
|
||||||
|
# Lazily create GitHubSource for fetch — only used when actually
|
||||||
|
# downloading files, which requires real GitHub API calls.
|
||||||
|
self._github: Optional[GitHubSource] = None
|
||||||
|
|
||||||
|
def _ensure_loaded(self) -> dict:
|
||||||
|
if not self._loaded:
|
||||||
|
self._index = _load_hermes_index()
|
||||||
|
self._loaded = True
|
||||||
|
return self._index or {}
|
||||||
|
|
||||||
|
def _get_github(self) -> GitHubSource:
|
||||||
|
if self._github is None:
|
||||||
|
self._github = GitHubSource(auth=self.auth)
|
||||||
|
return self._github
|
||||||
|
|
||||||
|
def source_id(self) -> str:
|
||||||
|
return "hermes-index"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
"""Whether the index is loaded and has skills."""
|
||||||
|
index = self._ensure_loaded()
|
||||||
|
return bool(index.get("skills"))
|
||||||
|
|
||||||
|
def trust_level_for(self, identifier: str) -> str:
|
||||||
|
index = self._ensure_loaded()
|
||||||
|
for skill in index.get("skills", []):
|
||||||
|
if skill.get("identifier") == identifier:
|
||||||
|
return skill.get("trust_level", "community")
|
||||||
|
return "community"
|
||||||
|
|
||||||
|
def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
|
||||||
|
"""Search the cached index. Zero API calls."""
|
||||||
|
index = self._ensure_loaded()
|
||||||
|
skills = index.get("skills", [])
|
||||||
|
if not skills:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not query.strip():
|
||||||
|
# No query — return featured/popular
|
||||||
|
return [self._to_meta(s) for s in skills[:limit]]
|
||||||
|
|
||||||
|
query_lower = query.lower()
|
||||||
|
results: List[SkillMeta] = []
|
||||||
|
for s in skills:
|
||||||
|
searchable = f"{s.get('name', '')} {s.get('description', '')} {' '.join(s.get('tags', []))}".lower()
|
||||||
|
if query_lower in searchable:
|
||||||
|
results.append(self._to_meta(s))
|
||||||
|
if len(results) >= limit:
|
||||||
|
break
|
||||||
|
return results
|
||||||
|
|
||||||
|
def fetch(self, identifier: str) -> Optional[SkillBundle]:
|
||||||
|
"""Fetch a skill using the resolved path from the index.
|
||||||
|
|
||||||
|
If the index has a ``resolved_github_id`` for this skill, we skip
|
||||||
|
the entire candidate/discovery chain and go directly to GitHub
|
||||||
|
with the exact path. This reduces install from ~31 API calls to
|
||||||
|
just the file content downloads (~5-22 depending on skill size).
|
||||||
|
"""
|
||||||
|
index = self._ensure_loaded()
|
||||||
|
entry = self._find_entry(identifier, index)
|
||||||
|
if not entry:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Use resolved path if available
|
||||||
|
resolved = entry.get("resolved_github_id")
|
||||||
|
if resolved:
|
||||||
|
bundle = self._get_github().fetch(resolved)
|
||||||
|
if bundle:
|
||||||
|
bundle.source = entry.get("source", "hermes-index")
|
||||||
|
bundle.identifier = identifier
|
||||||
|
return bundle
|
||||||
|
|
||||||
|
# Fall back to identifier-based fetch via repo/path
|
||||||
|
repo = entry.get("repo", "")
|
||||||
|
path = entry.get("path", "")
|
||||||
|
if repo and path:
|
||||||
|
github_id = f"{repo}/{path}"
|
||||||
|
bundle = self._get_github().fetch(github_id)
|
||||||
|
if bundle:
|
||||||
|
bundle.source = entry.get("source", "hermes-index")
|
||||||
|
bundle.identifier = identifier
|
||||||
|
return bundle
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def inspect(self, identifier: str) -> Optional[SkillMeta]:
|
||||||
|
"""Return metadata from the index. Zero API calls."""
|
||||||
|
index = self._ensure_loaded()
|
||||||
|
entry = self._find_entry(identifier, index)
|
||||||
|
if entry:
|
||||||
|
return self._to_meta(entry)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _find_entry(self, identifier: str, index: dict) -> Optional[dict]:
|
||||||
|
"""Look up a skill in the index by identifier or name."""
|
||||||
|
skills = index.get("skills", [])
|
||||||
|
|
||||||
|
# Exact identifier match
|
||||||
|
for s in skills:
|
||||||
|
if s.get("identifier") == identifier:
|
||||||
|
return s
|
||||||
|
|
||||||
|
# Try without source prefix (e.g. "skills-sh/" stripped)
|
||||||
|
normalized = identifier
|
||||||
|
for prefix in ("skills-sh/", "skills.sh/", "official/", "github/", "clawhub/"):
|
||||||
|
if identifier.startswith(prefix):
|
||||||
|
normalized = identifier[len(prefix):]
|
||||||
|
break
|
||||||
|
|
||||||
|
# Match on normalized identifier or name
|
||||||
|
for s in skills:
|
||||||
|
sid = s.get("identifier", "")
|
||||||
|
# Strip prefix from stored identifier too
|
||||||
|
stored_normalized = sid
|
||||||
|
for prefix in ("skills-sh/", "skills.sh/", "official/", "github/", "clawhub/"):
|
||||||
|
if sid.startswith(prefix):
|
||||||
|
stored_normalized = sid[len(prefix):]
|
||||||
|
break
|
||||||
|
if stored_normalized == normalized:
|
||||||
|
return s
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _to_meta(entry: dict) -> SkillMeta:
|
||||||
|
return SkillMeta(
|
||||||
|
name=entry.get("name", ""),
|
||||||
|
description=entry.get("description", ""),
|
||||||
|
source=entry.get("source", "hermes-index"),
|
||||||
|
identifier=entry.get("identifier", ""),
|
||||||
|
trust_level=entry.get("trust_level", "community"),
|
||||||
|
repo=entry.get("repo"),
|
||||||
|
path=entry.get("path"),
|
||||||
|
tags=entry.get("tags", []),
|
||||||
|
extra=entry.get("extra", {}),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]:
|
def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]:
|
||||||
"""
|
"""
|
||||||
Create all configured source adapters.
|
Create all configured source adapters.
|
||||||
|
|
@ -2711,6 +2927,7 @@ def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]
|
||||||
|
|
||||||
sources: List[SkillSource] = [
|
sources: List[SkillSource] = [
|
||||||
OptionalSkillSource(), # Official optional skills (highest priority)
|
OptionalSkillSource(), # Official optional skills (highest priority)
|
||||||
|
HermesIndexSource(auth=auth), # Centralized index (search + resolved install paths)
|
||||||
SkillsShSource(auth=auth),
|
SkillsShSource(auth=auth),
|
||||||
WellKnownSkillSource(),
|
WellKnownSkillSource(),
|
||||||
GitHubSource(auth=auth, extra_taps=extra_taps),
|
GitHubSource(auth=auth, extra_taps=extra_taps),
|
||||||
|
|
@ -2753,10 +2970,27 @@ def parallel_search_sources(
|
||||||
per_source_limits = per_source_limits or {}
|
per_source_limits = per_source_limits or {}
|
||||||
|
|
||||||
active: List[SkillSource] = []
|
active: List[SkillSource] = []
|
||||||
|
# When the centralized index is available and the user hasn't filtered
|
||||||
|
# to a specific source, skip external API sources (github, skills-sh,
|
||||||
|
# clawhub, etc.) — the index already has their data. This avoids
|
||||||
|
# ~70 GitHub API calls per search for unauthenticated users.
|
||||||
|
_index_available = False
|
||||||
|
_api_source_ids = frozenset({"github", "skills-sh", "clawhub",
|
||||||
|
"claude-marketplace", "lobehub", "well-known"})
|
||||||
|
if source_filter == "all":
|
||||||
|
for src in sources:
|
||||||
|
if (src.source_id() == "hermes-index"
|
||||||
|
and getattr(src, "is_available", False)):
|
||||||
|
_index_available = True
|
||||||
|
break
|
||||||
|
|
||||||
for src in sources:
|
for src in sources:
|
||||||
sid = src.source_id()
|
sid = src.source_id()
|
||||||
if source_filter != "all" and sid != source_filter and sid != "official":
|
if source_filter != "all" and sid != source_filter and sid != "official":
|
||||||
continue
|
continue
|
||||||
|
# Skip external API sources when the index covers them
|
||||||
|
if _index_available and sid in _api_source_ids:
|
||||||
|
continue
|
||||||
active.append(src)
|
active.append(src)
|
||||||
|
|
||||||
all_results: List[SkillMeta] = []
|
all_results: List[SkillMeta] = []
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue