mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
The Skills Hub at /skills had cards that, when expanded, showed only the
one-line description, tags, author, version, and an install command. For
the 163 bundled and optional skills shipped with the repo, this was thinner
than the data we already have on disk.
Three changes, all under website/:
1. extract-skills.py now pulls four extra fields per local skill:
- 'overview' — first non-heading body paragraph from SKILL.md (stripped
of admonitions/code fences, capped at ~500 chars at a sentence boundary)
- 'envVars' / 'commands' — from the prerequisites: block in frontmatter
- 'license' — from the top-level frontmatter
- 'docsPath' — slug to the per-skill /docs/user-guide/skills/.../* page,
computed with the same logic as generate-skill-docs.py
162 of 163 local skills get a non-empty overview automatically. The
remaining one (media/heartmula) has only headings/code in its body and
falls through to the description.
2. Skill TS interface + SkillCard expanded-panel render the new fields:
- Overview paragraph at the top of the panel
- Prerequisites box (env vars + required commands) when frontmatter
declares them
- License row alongside author/version
- 'View full documentation →' link to the per-skill docs page
Search now covers the overview text too, so users can find skills by
matching content from inside SKILL.md, not just the one-line description.
3. styles.module.css gains six new classes (overviewBlock, detailLabel,
overviewText, prereqBlock/Row/Kind/List/Item, docsLink) styled to match
the existing dark panel aesthetic.
External / community skills (Anthropic, LobeHub, Claude Marketplace cached
indexes) keep the old behavior — overview is empty, no prereqs, no docsPath.
Validation: 'npm run build' clean (exit 0); broken-link count unchanged at
155 baseline; all 163 generated docsPath values resolve to existing pages
under website/docs/user-guide/skills/.
352 lines
12 KiB
Python
352 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""Extract skill metadata from SKILL.md files and index caches into JSON."""
|
|
|
|
import json
|
|
import os
|
|
from collections import Counter
|
|
|
|
import yaml
|
|
|
|
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
LOCAL_SKILL_DIRS = [
|
|
("skills", "built-in"),
|
|
("optional-skills", "optional"),
|
|
]
|
|
INDEX_CACHE_DIR = os.path.join(REPO_ROOT, "skills", "index-cache")
|
|
OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills.json")
|
|
|
|
CATEGORY_LABELS = {
|
|
"apple": "Apple",
|
|
"autonomous-ai-agents": "AI Agents",
|
|
"blockchain": "Blockchain",
|
|
"communication": "Communication",
|
|
"creative": "Creative",
|
|
"data-science": "Data Science",
|
|
"devops": "DevOps",
|
|
"dogfood": "Dogfood",
|
|
"domain": "Domain",
|
|
"email": "Email",
|
|
"gaming": "Gaming",
|
|
"gifs": "GIFs",
|
|
"github": "GitHub",
|
|
"health": "Health",
|
|
"inference-sh": "Inference",
|
|
"leisure": "Leisure",
|
|
"mcp": "MCP",
|
|
"media": "Media",
|
|
"migration": "Migration",
|
|
"mlops": "MLOps",
|
|
"note-taking": "Note-Taking",
|
|
"productivity": "Productivity",
|
|
"red-teaming": "Red Teaming",
|
|
"research": "Research",
|
|
"security": "Security",
|
|
"smart-home": "Smart Home",
|
|
"social-media": "Social Media",
|
|
"software-development": "Software Dev",
|
|
"translation": "Translation",
|
|
"other": "Other",
|
|
}
|
|
|
|
SOURCE_LABELS = {
|
|
"anthropics_skills": "Anthropic",
|
|
"openai_skills": "OpenAI",
|
|
"claude_marketplace": "Claude Marketplace",
|
|
"lobehub": "LobeHub",
|
|
}
|
|
|
|
|
|
def _extract_overview(body: str) -> str:
|
|
"""Pull the first non-heading paragraph from a SKILL.md body.
|
|
|
|
Skips H1/H2/etc. lines so the overview is real prose, not a heading.
|
|
Strips markdown links/code-fence syntax to plain-ish text. Capped at
|
|
~500 chars so the SkillCard panel stays a reasonable size.
|
|
"""
|
|
if not body:
|
|
return ""
|
|
paragraphs = [p.strip() for p in body.split("\n\n") if p.strip()]
|
|
for p in paragraphs[:6]:
|
|
# Skip pure heading paragraphs ("# Foo", "## Foo")
|
|
if p.startswith("#"):
|
|
# If a heading paragraph also has body text on later lines, take those
|
|
lines = [ln for ln in p.split("\n") if ln.strip() and not ln.lstrip().startswith("#")]
|
|
if lines:
|
|
p = "\n".join(lines).strip()
|
|
else:
|
|
continue
|
|
# Skip a leading admonition fence (:::tip / :::info / etc.)
|
|
if p.startswith(":::"):
|
|
continue
|
|
# Skip pure code fences and frontmatter-style blocks
|
|
if p.startswith("```") or p.startswith("~~~"):
|
|
continue
|
|
# Trim to roughly 500 chars at a sentence boundary
|
|
if len(p) > 500:
|
|
cut = p[:500]
|
|
last_period = cut.rfind(". ")
|
|
if last_period > 200:
|
|
p = cut[: last_period + 1]
|
|
else:
|
|
p = cut.rstrip() + "…"
|
|
return p
|
|
return ""
|
|
|
|
|
|
def _docs_page_path(rel_dir: str, source_label: str) -> str:
|
|
"""Compute the per-skill docs-site URL slug for a given SKILL.md location.
|
|
|
|
Mirrors the slug logic in website/scripts/generate-skill-docs.py:
|
|
bundled + skills/<cat>/<slug>/SKILL.md -> bundled/<cat>/<cat>-<slug>
|
|
bundled + skills/<cat>/<sub>/<slug>/SKILL.md -> bundled/<cat>/<cat>-<sub>-<slug>
|
|
optional + optional-skills/<cat>/<slug>/SKILL.md -> optional/<cat>/<cat>-<slug>
|
|
"""
|
|
parts = [p for p in rel_dir.split(os.sep) if p]
|
|
if not parts:
|
|
return ""
|
|
source_dir = "bundled" if source_label == "built-in" else "optional"
|
|
if len(parts) == 1:
|
|
category, slug = parts[0], parts[0]
|
|
return f"{source_dir}/{category}/{category}-{slug}"
|
|
if len(parts) == 2:
|
|
category, slug = parts
|
|
return f"{source_dir}/{category}/{category}-{slug}"
|
|
if len(parts) == 3:
|
|
category, sub, slug = parts
|
|
return f"{source_dir}/{category}/{category}-{sub}-{slug}"
|
|
return ""
|
|
|
|
|
|
def extract_local_skills():
|
|
skills = []
|
|
|
|
for base_dir, source_label in LOCAL_SKILL_DIRS:
|
|
base_path = os.path.join(REPO_ROOT, base_dir)
|
|
if not os.path.isdir(base_path):
|
|
continue
|
|
|
|
for root, _dirs, files in os.walk(base_path):
|
|
if "SKILL.md" not in files:
|
|
continue
|
|
|
|
skill_path = os.path.join(root, "SKILL.md")
|
|
with open(skill_path, encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
if not content.startswith("---"):
|
|
continue
|
|
|
|
parts = content.split("---", 2)
|
|
if len(parts) < 3:
|
|
continue
|
|
|
|
try:
|
|
fm = yaml.safe_load(parts[1])
|
|
except yaml.YAMLError:
|
|
continue
|
|
|
|
if not fm or not isinstance(fm, dict):
|
|
continue
|
|
|
|
body = parts[2].strip()
|
|
overview = _extract_overview(body)
|
|
|
|
rel = os.path.relpath(root, base_path)
|
|
category = rel.split(os.sep)[0]
|
|
|
|
tags = []
|
|
metadata = fm.get("metadata")
|
|
if isinstance(metadata, dict):
|
|
hermes_meta = metadata.get("hermes", {})
|
|
if isinstance(hermes_meta, dict):
|
|
tags = hermes_meta.get("tags", [])
|
|
if not tags:
|
|
tags = fm.get("tags", [])
|
|
if isinstance(tags, str):
|
|
tags = [tags]
|
|
|
|
# Optional structured prerequisites — surfaced in the SkillCard panel
|
|
prereq = fm.get("prerequisites") or {}
|
|
env_vars = []
|
|
commands = []
|
|
if isinstance(prereq, dict):
|
|
ev = prereq.get("env_vars")
|
|
if isinstance(ev, list):
|
|
env_vars = [str(x) for x in ev if x]
|
|
elif isinstance(ev, str) and ev.strip():
|
|
env_vars = [ev.strip()]
|
|
cmds = prereq.get("commands")
|
|
if isinstance(cmds, list):
|
|
commands = [str(x) for x in cmds if x]
|
|
elif isinstance(cmds, str) and cmds.strip():
|
|
commands = [cmds.strip()]
|
|
|
|
skills.append({
|
|
"name": fm.get("name", os.path.basename(root)),
|
|
"description": fm.get("description", ""),
|
|
"overview": overview,
|
|
"category": category,
|
|
"categoryLabel": CATEGORY_LABELS.get(category, category.replace("-", " ").title()),
|
|
"source": source_label,
|
|
"tags": tags or [],
|
|
"platforms": fm.get("platforms", []),
|
|
"author": fm.get("author", ""),
|
|
"version": fm.get("version", ""),
|
|
"license": fm.get("license", ""),
|
|
"envVars": env_vars,
|
|
"commands": commands,
|
|
"docsPath": _docs_page_path(rel, source_label),
|
|
})
|
|
|
|
return skills
|
|
|
|
|
|
def extract_cached_index_skills():
|
|
skills = []
|
|
|
|
if not os.path.isdir(INDEX_CACHE_DIR):
|
|
return skills
|
|
|
|
for filename in os.listdir(INDEX_CACHE_DIR):
|
|
if not filename.endswith(".json"):
|
|
continue
|
|
|
|
filepath = os.path.join(INDEX_CACHE_DIR, filename)
|
|
try:
|
|
with open(filepath, encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
except (json.JSONDecodeError, OSError):
|
|
continue
|
|
|
|
stem = filename.replace(".json", "")
|
|
source_label = "community"
|
|
for key, label in SOURCE_LABELS.items():
|
|
if key in stem:
|
|
source_label = label
|
|
break
|
|
|
|
if isinstance(data, dict) and "agents" in data:
|
|
for agent in data["agents"]:
|
|
if not isinstance(agent, dict):
|
|
continue
|
|
skills.append({
|
|
"name": agent.get("identifier", agent.get("meta", {}).get("title", "unknown")),
|
|
"description": (agent.get("meta", {}).get("description", "") or "").split("\n")[0][:200],
|
|
"category": _guess_category(agent.get("meta", {}).get("tags", [])),
|
|
"categoryLabel": "", # filled below
|
|
"source": source_label,
|
|
"tags": agent.get("meta", {}).get("tags", []),
|
|
"platforms": [],
|
|
"author": agent.get("author", ""),
|
|
"version": "",
|
|
})
|
|
continue
|
|
|
|
if isinstance(data, list):
|
|
for entry in data:
|
|
if not isinstance(entry, dict) or not entry.get("name"):
|
|
continue
|
|
if "skills" in entry and isinstance(entry["skills"], list):
|
|
continue
|
|
skills.append({
|
|
"name": entry.get("name", ""),
|
|
"description": entry.get("description", ""),
|
|
"category": "uncategorized",
|
|
"categoryLabel": "",
|
|
"source": source_label,
|
|
"tags": entry.get("tags", []),
|
|
"platforms": [],
|
|
"author": "",
|
|
"version": "",
|
|
})
|
|
|
|
for s in skills:
|
|
if not s["categoryLabel"]:
|
|
s["categoryLabel"] = CATEGORY_LABELS.get(
|
|
s["category"],
|
|
s["category"].replace("-", " ").title() if s["category"] else "Uncategorized",
|
|
)
|
|
|
|
return skills
|
|
|
|
|
|
TAG_TO_CATEGORY = {}
|
|
for _cat, _tags in {
|
|
"software-development": [
|
|
"programming", "code", "coding", "software-development",
|
|
"frontend-development", "backend-development", "web-development",
|
|
"react", "python", "typescript", "java", "rust",
|
|
],
|
|
"creative": ["writing", "design", "creative", "art", "image-generation"],
|
|
"research": ["education", "academic", "research"],
|
|
"social-media": ["marketing", "seo", "social-media"],
|
|
"productivity": ["productivity", "business"],
|
|
"data-science": ["data", "data-science"],
|
|
"mlops": ["machine-learning", "deep-learning"],
|
|
"devops": ["devops"],
|
|
"gaming": ["gaming", "game", "game-development"],
|
|
"media": ["music", "media", "video"],
|
|
"health": ["health", "fitness"],
|
|
"translation": ["translation", "language-learning"],
|
|
"security": ["security", "cybersecurity"],
|
|
}.items():
|
|
for _t in _tags:
|
|
TAG_TO_CATEGORY[_t] = _cat
|
|
|
|
|
|
def _guess_category(tags: list) -> str:
|
|
if not tags:
|
|
return "uncategorized"
|
|
for tag in tags:
|
|
cat = TAG_TO_CATEGORY.get(tag.lower())
|
|
if cat:
|
|
return cat
|
|
return tags[0].lower().replace(" ", "-")
|
|
|
|
|
|
MIN_CATEGORY_SIZE = 4
|
|
|
|
|
|
def _consolidate_small_categories(skills: list) -> list:
|
|
for s in skills:
|
|
if s["category"] in ("uncategorized", ""):
|
|
s["category"] = "other"
|
|
s["categoryLabel"] = "Other"
|
|
|
|
counts = Counter(s["category"] for s in skills)
|
|
small_cats = {cat for cat, n in counts.items() if n < MIN_CATEGORY_SIZE}
|
|
|
|
for s in skills:
|
|
if s["category"] in small_cats:
|
|
s["category"] = "other"
|
|
s["categoryLabel"] = "Other"
|
|
|
|
return skills
|
|
|
|
|
|
def main():
|
|
local = extract_local_skills()
|
|
external = extract_cached_index_skills()
|
|
|
|
all_skills = _consolidate_small_categories(local + external)
|
|
|
|
source_order = {"built-in": 0, "optional": 1}
|
|
all_skills.sort(key=lambda s: (
|
|
source_order.get(s["source"], 2),
|
|
1 if s["category"] == "other" else 0,
|
|
s["category"],
|
|
s["name"],
|
|
))
|
|
|
|
os.makedirs(os.path.dirname(OUTPUT), exist_ok=True)
|
|
with open(OUTPUT, "w", encoding="utf-8") as f:
|
|
json.dump(all_skills, f, indent=2)
|
|
|
|
print(f"Extracted {len(all_skills)} skills to {OUTPUT}")
|
|
print(f" {len(local)} local ({sum(1 for s in local if s['source'] == 'built-in')} built-in, "
|
|
f"{sum(1 for s in local if s['source'] == 'optional')} optional)")
|
|
print(f" {len(external)} from external indexes")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|