diff --git a/hermes_cli/skills_hub.py b/hermes_cli/skills_hub.py index afc41cca4b1..aacee3a99b3 100644 --- a/hermes_cli/skills_hub.py +++ b/hermes_cli/skills_hub.py @@ -335,7 +335,14 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all", # Collect results from all (or filtered) sources in parallel. # Per-source limits are generous — parallelism + 30s timeout cap prevents hangs. _TRUST_RANK = {"builtin": 3, "trusted": 2, "community": 1} + # NOTE: when the centralized index is available, parallel_search_sources + # skips the external API sources and serves everything from "hermes-index". + # That source MUST therefore carry a high limit, or browse silently caps + # the entire hub at the default (50) — it shipped that way and surfaced + # ~136 of 88k skills. The external-source limits below only apply when the + # index is unavailable (offline / first run before the cache populates). _PER_SOURCE_LIMIT = { + "hermes-index": 5000, "official": 200, "skills-sh": 200, "well-known": 50, "github": 200, "clawhub": 500, "claude-marketplace": 100, "lobehub": 500, "browse-sh": 500, @@ -396,18 +403,22 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all", # Build table table = Table(show_header=True, header_style="bold") table.add_column("#", style="dim", width=4, justify="right") - table.add_column("Name", style="bold cyan", max_width=25) - table.add_column("Description", max_width=50) + table.add_column("Name", style="bold cyan", max_width=22) + table.add_column("Description", max_width=44) table.add_column("Source", style="dim", width=12) table.add_column("Trust", width=10) + # The identifier is what you pass to `hermes skills install`. Browse used + # to omit it entirely, so users couldn't act on what they saw without a + # second `search`. overflow="fold" keeps long slugs copy-pasteable. + table.add_column("Identifier", style="dim", overflow="fold", no_wrap=False) for i, r in enumerate(page_items, start=start + 1): trust_style = {"builtin": "bright_cyan", "trusted": "green", "community": "yellow"}.get(r.trust_level, "dim") trust_label = "★ official" if r.source == "official" else r.trust_level - desc = r.description[:50] - if len(r.description) > 50: + desc = r.description[:44] + if len(r.description) > 44: desc += "..." table.add_row( @@ -416,6 +427,7 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all", desc, r.source, f"[{trust_style}]{trust_label}[/]", + r.identifier, ) c.print(table) @@ -439,7 +451,9 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all", c.print(f" [yellow]⚡ Slow sources skipped: {', '.join(timed_out)} " f"— run again for cached results[/]") - c.print("[dim]Tip: 'hermes skills search ' searches deeper across all registries[/]\n") + c.print("[dim]Tip: 'hermes skills inspect ' to preview, " + "'hermes skills install ' to install, " + "'hermes skills search ' to search deeper[/]\n") def do_install(identifier: str, category: str = "", force: bool = False, @@ -725,24 +739,27 @@ def browse_skills(page: int = 1, page_size: int = 20, source: str = "all") -> di Returns ``{"items": [...], "page": int, "total_pages": int, "total": int}``. """ - from tools.skills_hub import GitHubAuth, create_source_router + from tools.skills_hub import ( + GitHubAuth, create_source_router, parallel_search_sources, + ) page_size = max(1, min(page_size, 100)) _TRUST_RANK = {"builtin": 3, "trusted": 2, "community": 1} - _PER_SOURCE_LIMIT = {"official": 100, "skills-sh": 100, "well-known": 25, "github": 100, "clawhub": 50, + # "hermes-index" must carry a high limit: when the index is available the + # router skips external API sources and serves everything from it, so a + # low cap here silently truncates the whole hub (see do_browse note). + _PER_SOURCE_LIMIT = {"hermes-index": 5000, "official": 100, "skills-sh": 100, + "well-known": 25, "github": 100, "clawhub": 50, "claude-marketplace": 50, "lobehub": 50, "browse-sh": 500} auth = GitHubAuth() sources = create_source_router(auth) - all_results: list = [] - for src in sources: - sid = src.source_id() - if source != "all" and sid != source and sid != "official": - continue - try: - limit = _PER_SOURCE_LIMIT.get(sid, 50) - all_results.extend(src.search("", limit=limit)) - except Exception: - continue + # Delegate to the shared parallel walker so this inherits the index-aware + # source-skip logic — querying hermes-index AND the external APIs at once + # would double-count every skill. + all_results, _counts, _timed_out = parallel_search_sources( + sources, query="", per_source_limits=_PER_SOURCE_LIMIT, + source_filter=source, overall_timeout=30, + ) if not all_results: return {"items": [], "page": 1, "total_pages": 1, "total": 0} seen: dict = {} @@ -759,7 +776,7 @@ def browse_skills(page: int = 1, page_size: int = 20, source: str = "all") -> di page_items = deduped[start : min(start + page_size, total)] return { "items": [{"name": r.name, "description": r.description, "source": r.source, - "trust": r.trust_level} for r in page_items], + "trust": r.trust_level, "identifier": r.identifier} for r in page_items], "page": page, "total_pages": total_pages, "total": total, diff --git a/tests/website/test_extract_skills.py b/tests/website/test_extract_skills.py new file mode 100644 index 00000000000..307d1f44459 --- /dev/null +++ b/tests/website/test_extract_skills.py @@ -0,0 +1,116 @@ +"""Tests for website/scripts/extract-skills.py helpers. + +Covers the two behavioral contracts added when the Skills Hub page gained +per-skill source links and a cleaned-up category sidebar: + +1. ``_source_url`` — every community skill must resolve to a clickable + origin URL (explicit ``extra`` URL preferred, else synthesized from the + identifier shape). Built-in/optional skills intentionally return "" — + they have a generated docs page (docsPath) instead. + +2. ``_guess_category`` — tags only map to a curated category bucket; + unknown tags fall to ``uncategorized`` (folded into "Other" later) so the + sidebar doesn't fill with one-off junk like version strings or brand + names. +""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[2] +EXTRACT = REPO_ROOT / "website" / "scripts" / "extract-skills.py" + + +@pytest.fixture(scope="module") +def mod(): + spec = importlib.util.spec_from_file_location("extract_skills", EXTRACT) + assert spec is not None and spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +# -------------------------------------------------------------------------- +# _source_url +# -------------------------------------------------------------------------- + +def test_source_url_prefers_explicit_detail_url(mod): + extra = {"detail_url": "https://skills.sh/owner/repo/skill"} + assert ( + mod._source_url("skills.sh", "skills-sh/owner/repo/skill", extra) + == "https://skills.sh/owner/repo/skill" + ) + + +def test_source_url_prefers_browse_sh_source_url(mod): + # browse.sh adapter carries its origin under extra["source_url"]. + extra = {"source_url": "https://airbnb.com/host"} + assert ( + mod._source_url("browse-sh", "browse-sh/airbnb.com/login-abc", extra) + == "https://airbnb.com/host" + ) + + +def test_source_url_synthesizes_github_tree_url(mod): + url = mod._source_url("github", "anthropics/skills/skills/algorithmic-art", {}) + assert url == "https://github.com/anthropics/skills/tree/main/skills/algorithmic-art" + + +def test_source_url_synthesizes_github_root_when_no_subpath(mod): + assert mod._source_url("github", "owner/repo", {}) == "https://github.com/owner/repo" + + +def test_source_url_synthesizes_clawhub(mod): + assert mod._source_url("clawhub", "go-music-skill", {}) == "https://clawhub.ai/skills/go-music-skill" + + +def test_source_url_synthesizes_clawhub_strips_prefix(mod): + # identifier may arrive already prefixed; we must not double-prefix. + assert ( + mod._source_url("clawhub", "clawhub/go-music-skill", {}) + == "https://clawhub.ai/skills/go-music-skill" + ) + + +def test_source_url_synthesizes_lobehub(mod): + assert mod._source_url("lobehub", "lobehub/chinese-paper", {}) == "https://lobehub.com/agent/chinese-paper" + + +def test_source_url_empty_for_unknown_source_without_identifier(mod): + assert mod._source_url("mystery", "", {}) == "" + + +# -------------------------------------------------------------------------- +# _guess_category +# -------------------------------------------------------------------------- + +def test_guess_category_maps_known_tag(mod): + assert mod._guess_category(["security"]) == "security" + assert mod._guess_category(["machine-learning"]) == "mlops" + assert mod._guess_category(["crypto"]) == "blockchain" + + +def test_guess_category_accepts_literal_curated_key(mod): + # A skill tagged literally with a curated category key should route there. + assert mod._guess_category(["devops"]) == "devops" + + +def test_guess_category_rejects_junk_tag(mod): + # This is the whole point: version strings / brand names must NOT become + # their own sidebar category. They land in "uncategorized" → "Other". + assert mod._guess_category(["0.10.7 Dev"]) == "uncategorized" + assert mod._guess_category(["Doramagic Crystal"]) == "uncategorized" + assert mod._guess_category(["Ap2"]) == "uncategorized" + + +def test_guess_category_empty_tags(mod): + assert mod._guess_category([]) == "uncategorized" + + +def test_guess_category_skips_first_junk_tag_for_later_known_tag(mod): + # First tag is junk, second is curated — we should still find the curated one. + assert mod._guess_category(["Some Brand", "security"]) == "security" diff --git a/website/scripts/extract-skills.py b/website/scripts/extract-skills.py index c0aa362f0a8..c8beb9b753e 100644 --- a/website/scripts/extract-skills.py +++ b/website/scripts/extract-skills.py @@ -48,7 +48,7 @@ CATEGORY_LABELS = { "data-science": "Data Science", "devops": "DevOps", "dogfood": "Dogfood", - "domain": "Domain", + "domain": "Business & Finance", "email": "Email", "gaming": "Gaming", "gifs": "GIFs", @@ -193,6 +193,60 @@ def _install_command(source: str, identifier: str, name: str) -> str: return f"hermes skills install {identifier}" +def _source_url(source: str, identifier: str, extra: dict) -> str: + """Best-effort clickable URL to the skill's origin (repo / detail page). + + Community skills have no generated docs page, so without this the + expanded card on the Skills Hub gives users nowhere to go to read the + actual SKILL.md before installing. We prefer an explicit URL the source + adapter already collected (``extra.detail_url`` / ``extra.repo_url``), + then fall back to synthesizing one from the identifier shape. + """ + extra = extra or {} + for key in ("detail_url", "source_url", "repo_url", "url", "index_url"): + val = extra.get(key) + if isinstance(val, str) and val.startswith("http"): + return val + + if not identifier: + return "" + src = (source or "").lower() + + # GitHub-backed taps (openai/anthropic/nvidia/hf/gstack/VoltAgent/...): + # identifier is "owner/repo/" — link to the directory on GitHub. + if src in {"github", "openai", "anthropic", "huggingface", "nvidia", + "gstack", "voltagent", "minimax", "claude marketplace", + "claude-marketplace"}: + parts = [p for p in identifier.split("/") if p] + if len(parts) >= 2: + owner, repo = parts[0], parts[1] + sub = "/".join(parts[2:]) + base = f"https://github.com/{owner}/{repo}" + return f"{base}/tree/main/{sub}" if sub else base + return "" + + if src == "clawhub": + # identifier is a bare slug (the "clawhub/" prefix is added at install time) + slug = identifier[len("clawhub/"):] if identifier.startswith("clawhub/") else identifier + return f"https://clawhub.ai/skills/{slug}" + + if src in {"skills.sh", "skills-sh"}: + # "skills-sh/owner/repo/skill" -> the skills.sh detail page + rest = identifier[len("skills-sh/"):] if identifier.startswith("skills-sh/") else identifier + return f"https://skills.sh/skills/{rest}" + + if src == "lobehub": + slug = identifier[len("lobehub/"):] if identifier.startswith("lobehub/") else identifier + return f"https://lobehub.com/agent/{slug}" + + if src in {"browse.sh", "browse-sh"}: + # "browse-sh//" -> browse.sh task page + rest = identifier[len("browse-sh/"):] if identifier.startswith("browse-sh/") else identifier + return f"https://browse.sh/skills/{rest}" + + return "" + + def extract_local_skills(): skills = [] @@ -361,6 +415,7 @@ def extract_unified_index_skills(): author = repo.split("/")[0] install_cmd = _install_command(source_id, identifier, name) + source_url = _source_url(source_id, identifier, extra) out.append({ "name": name, @@ -380,6 +435,7 @@ def extract_unified_index_skills(): "docsPath": "", "identifier": identifier, "installCmd": install_cmd, + "sourceUrl": source_url, }) return out, meta @@ -460,26 +516,60 @@ for _cat, _tags in { "software-development": [ "programming", "code", "coding", "software-development", "frontend-development", "backend-development", "web-development", - "react", "python", "typescript", "java", "rust", + "react", "python", "typescript", "java", "rust", "cli", + "developer-tools", "development", "api", "database", "debugging", + "documentation", "testing", "test", "architecture", ], - "creative": ["writing", "design", "creative", "art", "image-generation"], - "research": ["education", "academic", "research"], - "social-media": ["marketing", "seo", "social-media"], - "productivity": ["productivity", "business"], - "data-science": ["data", "data-science"], - "mlops": ["machine-learning", "deep-learning"], - "devops": ["devops"], + "autonomous-ai-agents": [ + "ai", "agent", "agents", "ai-agent", "ai-agents", "agentic", + "agentic-ai", "ai-assistant", "assistant", "multi-agent", + "autonomous", "llm", "rag", "prompt", "prompts", "a2a", "acp", + ], + "creative": [ + "writing", "design", "creative", "art", "image-generation", + "image", "content", "video-editing", "content-creation", + ], + "research": ["education", "academic", "academic-writing", "research", "knowledge"], + "social-media": ["marketing", "seo", "social-media", "advertising", "creator"], + "productivity": [ + "productivity", "business", "automation", "calendar", "email", + "document", "documents", "office", "notes", "note-taking", + "collaboration", "workflow", "crm", + ], + "data-science": ["data", "data-science", "analytics", "analysis", "visualization"], + "mlops": ["machine-learning", "deep-learning", "mlops", "training", "fine-tuning"], + "devops": ["devops", "docker", "kubernetes", "infrastructure", "deployment", "monitoring", "ci-cd"], "gaming": ["gaming", "game", "game-development"], - "media": ["music", "media", "video"], - "health": ["health", "fitness"], - "translation": ["translation", "language-learning"], - "security": ["security", "cybersecurity"], + "media": ["music", "media", "video", "audio", "podcast", "youtube"], + "health": ["health", "fitness", "medical", "wellness"], + "translation": ["translation", "language-learning", "i18n", "localization"], + "security": ["security", "cybersecurity", "auth", "compliance", "audit", "privacy"], + "blockchain": [ + "blockchain", "crypto", "cryptocurrency", "defi", "web3", + "bitcoin", "ethereum", "nft", "trading", "arbitrage", + ], + "communication": ["communication", "chat", "messaging", "slack", "discord"], + "domain": [ + "finance", "accounting", "banking", "ecommerce", "e-commerce", + "shopping", "travel", "booking", "real-estate", "legal", + "government", "b2b", "b2b-sales", "entrepreneur", "budget", + ], }.items(): for _t in _tags: TAG_TO_CATEGORY[_t] = _cat def _guess_category(tags: list) -> str: + """Map a skill's tags to a curated category, or 'uncategorized'. + + Previously this fell back to ``tags[0]`` verbatim, which produced + hundreds of junk one-off "categories" in the sidebar (e.g. + "Doramagic Crystal", "0.10.7 Dev", "Ap2") — version strings, brand + names, and tag noise. We now ONLY accept categories that map to a + known curated bucket; everything else becomes "uncategorized", which + _consolidate_small_categories folds into "Other". Sidecar-declared + categories (skills.sh groupings) bypass this entirely via fixedCategory. + """ if not tags: return "uncategorized" for tag in tags: @@ -488,8 +578,12 @@ def _guess_category(tags: list) -> str: cat = TAG_TO_CATEGORY.get(tag.lower()) if cat: return cat - first = tags[0] if isinstance(tags[0], str) else "" - return first.lower().replace(" ", "-") if first else "uncategorized" + # Also accept a tag that's already a known curated category key + # (e.g. a skill tagged literally "security" or "devops"). + normalized = tag.lower().replace(" ", "-") + if normalized in CATEGORY_LABELS and normalized != "other": + return normalized + return "uncategorized" MIN_CATEGORY_SIZE = 4 diff --git a/website/src/pages/skills/index.tsx b/website/src/pages/skills/index.tsx index 9595af67e4f..735ccafb548 100644 --- a/website/src/pages/skills/index.tsx +++ b/website/src/pages/skills/index.tsx @@ -19,6 +19,10 @@ interface Skill { docsPath?: string; identifier?: string; installCmd?: string; + /** Clickable URL to the skill's origin (repo / detail page). Synthesized + * in extract-skills.py for community skills that have no generated docs + * page, so the expanded card always has somewhere to send the user. */ + sourceUrl?: string; /** Lowercase pre-joined haystack used by the search filter. * Built once at load time so per-keystroke filtering is a single * `.includes()` per skill instead of array-join + toLowerCase on @@ -240,6 +244,47 @@ function highlightMatch(text: string, query: string): React.ReactNode { ); } +function CopyButton({ text }: { text: string }) { + const [copied, setCopied] = useState(false); + const onCopy = useCallback( + (e: React.MouseEvent) => { + e.stopPropagation(); + navigator.clipboard?.writeText(text).then( + () => { + setCopied(true); + setTimeout(() => setCopied(false), 1500); + }, + () => {}, + ); + }, + [text], + ); + return ( + + ); +} + function SkillCard({ skill, query, @@ -379,16 +424,31 @@ function SkillCard({ )}
{skill.installCmd || `hermes skills install ${skill.name}`} + +
+ - {skill.docsPath && ( - e.stopPropagation()} - > - View full documentation → - - )} )} @@ -792,7 +852,15 @@ export default function SkillsDashboard() { )} - {visible.length > 0 ? ( + {!data && !loadError ? ( +
+
+

Loading the catalog…

+

+ Fetching 88k+ skills across every registry. One moment. +

+
+ ) : visible.length > 0 ? ( <>
{visible.map((skill, i) => { diff --git a/website/src/pages/skills/styles.module.css b/website/src/pages/skills/styles.module.css index 94dce0a7493..018703c676d 100644 --- a/website/src/pages/skills/styles.module.css +++ b/website/src/pages/skills/styles.module.css @@ -628,6 +628,9 @@ background: rgba(0, 0, 0, 0.25); border: 1px solid rgba(255, 215, 0, 0.06); border-radius: 5px; + display: flex; + align-items: center; + gap: 0.5rem; } .installHint code { @@ -636,6 +639,64 @@ color: rgba(255, 215, 0, 0.7); background: none; padding: 0; + flex: 1; + overflow-x: auto; + white-space: nowrap; + scrollbar-width: none; +} + +.installHint code::-webkit-scrollbar { + display: none; +} + +.copyBtn { + display: inline-flex; + align-items: center; + gap: 0.25rem; + flex-shrink: 0; + padding: 0.2rem 0.45rem; + border: 1px solid rgba(255, 215, 0, 0.18); + border-radius: 4px; + background: rgba(255, 215, 0, 0.06); + color: rgba(255, 215, 0, 0.85); + font-size: 0.68rem; + font-weight: 600; + cursor: pointer; + transition: all 0.15s; +} + +.copyBtn:hover { + background: rgba(255, 215, 0, 0.14); + color: rgba(255, 215, 0, 1); +} + +.copyBtnLabel { + line-height: 1; +} + +.cardLinks { + display: flex; + gap: 0.5rem; +} + +.cardLinks .docsLink { + flex: 1; +} + +.loadingSpinner { + width: 2.25rem; + height: 2.25rem; + margin: 0 auto 1rem; + border: 3px solid rgba(255, 215, 0, 0.15); + border-top-color: rgba(255, 215, 0, 0.7); + border-radius: 50%; + animation: skillsSpin 0.8s linear infinite; +} + +@keyframes skillsSpin { + to { + transform: rotate(360deg); + } } .overviewBlock {