From f040710d04c26ee7cd684bda763862c8e1be9188 Mon Sep 17 00:00:00 2001 From: wysie Date: Wed, 27 May 2026 18:32:03 +0800 Subject: [PATCH] fix: backfill official optional skill provenance --- hermes_cli/main.py | 25 ++++ hermes_cli/skills_hub.py | 45 +++++++ tests/tools/test_skills_sync.py | 115 ++++++++++++++++ tools/skills_sync.py | 231 +++++++++++++++++++++++++++++++- 4 files changed, 414 insertions(+), 2 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 5d7fec8253f..e6cb23b7a83 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -12569,6 +12569,31 @@ Examples: help="Skip confirmation prompt when using --restore", ) + skills_repair_official = skills_subparsers.add_parser( + "repair-official", + help="Backfill or restore official optional skills from repo source", + description=( + "Repair official optional skill provenance. By default, only backfills " + "hub metadata for exact matches. Pass --restore to replace missing or " + "mutated active copies from optional-skills/, moving existing copies to " + "a restore backup first. Use name 'all' to repair every optional skill." + ), + ) + skills_repair_official.add_argument( + "name", help="Official optional skill folder/frontmatter name, or 'all'" + ) + skills_repair_official.add_argument( + "--restore", + action="store_true", + help="Restore from official optional source, backing up existing matching copies", + ) + skills_repair_official.add_argument( + "--yes", + "-y", + action="store_true", + help="Skip confirmation prompt when using --restore", + ) + skills_publish = skills_subparsers.add_parser( "publish", help="Publish a skill to a registry" ) diff --git a/hermes_cli/skills_hub.py b/hermes_cli/skills_hub.py index 8e7276bdc30..b617b69f384 100644 --- a/hermes_cli/skills_hub.py +++ b/hermes_cli/skills_hub.py @@ -1041,6 +1041,48 @@ def do_reset(name: str, restore: bool = False, c.print("[dim]Use /reset to start a new session now, or --now to apply immediately (invalidates prompt cache).[/]\n") +def do_repair_official(name: str, restore: bool = False, + console: Optional[Console] = None, + skip_confirm: bool = False, + invalidate_cache: bool = True) -> None: + """Backfill or restore official optional skills from repo source.""" + from tools.skills_sync import restore_official_optional_skill + + c = console or _console + if restore and not skip_confirm: + c.print(f"\n[bold]Restore official optional skill '{name}' from repo source?[/]") + c.print("[dim]Existing matching active copies will be moved to a restore backup before copying the official source.[/]") + try: + answer = input("Confirm [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + answer = "n" + if answer not in {"y", "yes"}: + c.print("[dim]Cancelled.[/]\n") + return + + result = restore_official_optional_skill(name, restore=restore) + if not result.get("ok"): + c.print(f"[bold red]Error:[/] {result.get('message', 'Repair failed')}\n") + return + + c.print(f"[bold green]{result['message']}[/]") + if result.get("restored"): + c.print(f"[dim]Restored: {', '.join(result['restored'])}[/]") + if result.get("backfilled"): + c.print(f"[dim]Backfilled provenance: {', '.join(result['backfilled'])}[/]") + if result.get("backed_up"): + c.print(f"[dim]Backed up: {', '.join(result['backed_up'])}[/]") + c.print(f"[dim]Backup dir: {result.get('backup_dir')}[/]") + c.print() + + if invalidate_cache: + try: + from agent.prompt_builder import clear_skills_system_prompt_cache + clear_skills_system_prompt_cache(clear_snapshot=True) + except Exception: + pass + + def do_tap(action: str, repo: str = "", console: Optional[Console] = None) -> None: """Manage taps (custom GitHub repo sources).""" from tools.skills_hub import TapsManager @@ -1372,6 +1414,9 @@ def skills_command(args) -> None: elif action == "reset": do_reset(args.name, restore=getattr(args, "restore", False), skip_confirm=getattr(args, "yes", False)) + elif action == "repair-official": + do_repair_official(args.name, restore=getattr(args, "restore", False), + skip_confirm=getattr(args, "yes", False)) elif action == "publish": do_publish( args.skill_path, diff --git a/tests/tools/test_skills_sync.py b/tests/tools/test_skills_sync.py index 347366e6a6f..d0bee8eb78c 100644 --- a/tests/tools/test_skills_sync.py +++ b/tests/tools/test_skills_sync.py @@ -1,5 +1,6 @@ """Tests for tools/skills_sync.py — manifest-based skill seeding and updating.""" +import json from pathlib import Path from unittest.mock import patch @@ -13,6 +14,7 @@ from tools.skills_sync import ( _dir_hash, sync_skills, reset_bundled_skill, + restore_official_optional_skill, MANIFEST_FILE, SKILLS_DIR, ) @@ -196,6 +198,7 @@ class TestSyncSkills: from contextlib import ExitStack stack = ExitStack() stack.enter_context(patch("tools.skills_sync._get_bundled_dir", return_value=bundled)) + stack.enter_context(patch("tools.skills_sync._get_optional_dir", return_value=bundled.parent / "optional-skills")) stack.enter_context(patch("tools.skills_sync.SKILLS_DIR", skills_dir)) stack.enter_context(patch("tools.skills_sync.MANIFEST_FILE", manifest_file)) return stack @@ -482,12 +485,123 @@ class TestSyncSkills: assert "new-skill" in captured assert "hermes skills reset new-skill" in captured + def test_backfills_official_optional_provenance_for_existing_identical_skill(self, tmp_path): + bundled = self._setup_bundled(tmp_path) + optional = tmp_path / "optional-skills" + optional_skill = optional / "mlops" / "training" / "trl-fine-tuning" + optional_skill.mkdir(parents=True) + (optional_skill / "SKILL.md").write_text( + "---\nname: fine-tuning-with-trl\n---\n# TRL\n" + ) + (optional_skill / "references").mkdir() + (optional_skill / "references" / "api.md").write_text("api\n") + + skills_dir = tmp_path / "user_skills" + manifest_file = skills_dir / ".bundled_manifest" + active = skills_dir / "mlops" / "training" / "trl-fine-tuning" + active.mkdir(parents=True) + (active / "SKILL.md").write_text( + "---\nname: fine-tuning-with-trl\n---\n# TRL\n" + ) + (active / "references").mkdir() + (active / "references" / "api.md").write_text("api\n") + + with self._patches(bundled, skills_dir, manifest_file): + with patch("tools.skills_sync._get_optional_dir", return_value=optional): + result = sync_skills(quiet=True) + + assert result["optional_provenance_backfilled"] == ["trl-fine-tuning"] + lock_path = skills_dir / ".hub" / "lock.json" + data = json.loads(lock_path.read_text()) + entry = data["installed"]["trl-fine-tuning"] + assert entry["source"] == "official" + assert entry["identifier"] == "official/mlops/training/trl-fine-tuning" + assert entry["trust_level"] == "builtin" + assert entry["install_path"] == "mlops/training/trl-fine-tuning" + + def test_does_not_backfill_optional_provenance_for_modified_skill(self, tmp_path): + bundled = self._setup_bundled(tmp_path) + optional = tmp_path / "optional-skills" + optional_skill = optional / "mlops" / "training" / "trl-fine-tuning" + optional_skill.mkdir(parents=True) + (optional_skill / "SKILL.md").write_text("# upstream optional\n") + + skills_dir = tmp_path / "user_skills" + manifest_file = skills_dir / ".bundled_manifest" + active = skills_dir / "mlops" / "training" / "trl-fine-tuning" + active.mkdir(parents=True) + (active / "SKILL.md").write_text("# user modified\n") + + with self._patches(bundled, skills_dir, manifest_file): + with patch("tools.skills_sync._get_optional_dir", return_value=optional): + result = sync_skills(quiet=True) + + assert result["optional_provenance_backfilled"] == [] + assert not (skills_dir / ".hub" / "lock.json").exists() + + def test_repair_official_optional_restores_reorganized_skill_with_backup(self, tmp_path): + bundled = self._setup_bundled(tmp_path) + optional = tmp_path / "optional-skills" + optional_skill = optional / "mlops" / "training" / "trl-fine-tuning" + optional_skill.mkdir(parents=True) + (optional_skill / "SKILL.md").write_text( + "---\nname: fine-tuning-with-trl\n---\n# Official TRL\n" + ) + + skills_dir = tmp_path / "user_skills" + manifest_file = skills_dir / ".bundled_manifest" + wrong = skills_dir / "mlops" / "trl-fine-tuning" + wrong.mkdir(parents=True) + (wrong / "SKILL.md").write_text( + "---\nname: fine-tuning-with-trl\n---\n# Curator mangled\n" + ) + + with self._patches(bundled, skills_dir, manifest_file): + with patch("tools.skills_sync._get_optional_dir", return_value=optional): + result = restore_official_optional_skill("fine-tuning-with-trl", restore=True) + + canonical = skills_dir / "mlops" / "training" / "trl-fine-tuning" + assert result["ok"] is True + assert result["restored"] == ["trl-fine-tuning"] + assert result["backed_up"] == ["mlops/trl-fine-tuning"] + assert "Official TRL" in (canonical / "SKILL.md").read_text() + assert not wrong.exists() + assert (Path(result["backup_dir"]) / "mlops" / "trl-fine-tuning" / "SKILL.md").exists() + + data = json.loads((skills_dir / ".hub" / "lock.json").read_text()) + assert data["installed"]["trl-fine-tuning"]["source"] == "official" + assert data["installed"]["trl-fine-tuning"]["install_path"] == "mlops/training/trl-fine-tuning" + + def test_repair_official_optional_without_restore_does_not_replace_modified_copy(self, tmp_path): + bundled = self._setup_bundled(tmp_path) + optional = tmp_path / "optional-skills" + optional_skill = optional / "mlops" / "training" / "trl-fine-tuning" + optional_skill.mkdir(parents=True) + (optional_skill / "SKILL.md").write_text("# official\n") + + skills_dir = tmp_path / "user_skills" + manifest_file = skills_dir / ".bundled_manifest" + canonical = skills_dir / "mlops" / "training" / "trl-fine-tuning" + canonical.mkdir(parents=True) + (canonical / "SKILL.md").write_text("# modified\n") + + with self._patches(bundled, skills_dir, manifest_file): + with patch("tools.skills_sync._get_optional_dir", return_value=optional): + result = restore_official_optional_skill("trl-fine-tuning", restore=False) + + assert result["ok"] is True + assert result["restored"] == [] + assert result["backfilled"] == [] + assert (canonical / "SKILL.md").read_text() == "# modified\n" + assert not (skills_dir / ".hub" / "lock.json").exists() + def test_nonexistent_bundled_dir(self, tmp_path): with patch("tools.skills_sync._get_bundled_dir", return_value=tmp_path / "nope"): result = sync_skills(quiet=True) assert result == { "copied": [], "updated": [], "skipped": 0, "user_modified": [], "cleaned": [], "total_bundled": 0, + "optional_provenance_backfilled": [], } def test_failed_copy_does_not_poison_manifest(self, tmp_path): @@ -620,6 +734,7 @@ class TestResetBundledSkill: from contextlib import ExitStack stack = ExitStack() stack.enter_context(patch("tools.skills_sync._get_bundled_dir", return_value=bundled)) + stack.enter_context(patch("tools.skills_sync._get_optional_dir", return_value=bundled.parent / "optional-skills")) stack.enter_context(patch("tools.skills_sync.SKILLS_DIR", skills_dir)) stack.enter_context(patch("tools.skills_sync.MANIFEST_FILE", manifest_file)) return stack diff --git a/tools/skills_sync.py b/tools/skills_sync.py index fb95898f84d..91c1408c432 100644 --- a/tools/skills_sync.py +++ b/tools/skills_sync.py @@ -22,11 +22,13 @@ The manifest lives at ~/.hermes/skills/.bundled_manifest. """ import hashlib +import json import logging import os import shutil -from pathlib import Path -from hermes_constants import get_bundled_skills_dir, get_hermes_home +from datetime import datetime, timezone +from pathlib import Path, PurePosixPath +from hermes_constants import get_bundled_skills_dir, get_hermes_home, get_optional_skills_dir from agent.skill_utils import is_excluded_skill_path from typing import Dict, List, Tuple from utils import atomic_replace @@ -49,6 +51,11 @@ def _get_bundled_dir() -> Path: return get_bundled_skills_dir(Path(__file__).parent.parent / "skills") +def _get_optional_dir() -> Path: + """Locate the official optional-skills/ directory.""" + return get_optional_skills_dir(Path(__file__).parent.parent / "optional-skills") + + def _read_manifest() -> Dict[str, str]: """ Read the manifest as a dict of {skill_name: origin_hash}. @@ -172,6 +179,221 @@ def _dir_hash(directory: Path) -> str: return hasher.hexdigest() +def _safe_rel_install_path(path: Path, base: Path) -> str: + """Return a normalized relative POSIX path, rejecting traversal/absolute paths.""" + rel = path.relative_to(base) + posix = rel.as_posix() + pure = PurePosixPath(posix) + parts = [part for part in pure.parts if part not in {"", "."}] + if pure.is_absolute() or not parts or any(part == ".." for part in parts): + raise ValueError(f"Unsafe optional skill path: {posix}") + return "/".join(parts) + + +def _skill_file_list(skill_dir: Path) -> List[str]: + """List files inside a skill directory in lock-file format.""" + files: List[str] = [] + for fpath in sorted(skill_dir.rglob("*")): + if fpath.is_file(): + files.append(fpath.relative_to(skill_dir).as_posix()) + return files + + +def _content_hash(directory: Path) -> str: + """Return the same hash style the skills hub lock uses, falling back locally.""" + try: + from tools.skills_guard import content_hash + + return content_hash(directory) + except Exception: + # Hashing is provenance metadata only; keep sync resilient if guard + # dependencies are unavailable in a packaged/update context. + return _dir_hash(directory) + + +def _optional_skill_index() -> Dict[str, Tuple[str, str, Path]]: + """Return official optional skills keyed by folder name and frontmatter name. + + Values are ``(folder_name, install_path, source_dir)``. Multiple keys may + point to the same skill so callers can accept either the folder slug used + by the hub lock or the user-facing frontmatter name. + """ + optional_dir = _get_optional_dir() + index: Dict[str, Tuple[str, str, Path]] = {} + if not optional_dir.exists(): + return index + for skill_md in sorted(optional_dir.rglob("SKILL.md")): + if is_excluded_skill_path(skill_md): + continue + src = skill_md.parent + try: + install_path = _safe_rel_install_path(src, optional_dir) + except ValueError: + continue + folder_name = src.name + frontmatter_name = _read_skill_name(skill_md, folder_name) + value = (folder_name, install_path, src) + index[folder_name] = value + index[frontmatter_name] = value + return index + + +def _move_to_restore_backup(path: Path, backup_root: Path) -> str: + """Move an existing skill directory into a restore backup, preserving rel path.""" + rel = path.relative_to(SKILLS_DIR) + target = backup_root / rel + target.parent.mkdir(parents=True, exist_ok=True) + if target.exists(): + suffix = 1 + while target.with_name(f"{target.name}-{suffix}").exists(): + suffix += 1 + target = target.with_name(f"{target.name}-{suffix}") + shutil.move(str(path), str(target)) + return rel.as_posix() + + +def restore_official_optional_skill(name: str, *, restore: bool = False) -> dict: + """Restore one or all official optional skills from repo source. + + ``restore=False`` only performs exact-match provenance backfill. ``restore=True`` + repairs already-mutated/reorganized skills by backing up matching active + copies and copying the official optional source into its canonical path. + """ + index = _optional_skill_index() + if not index: + return {"ok": False, "message": "No official optional skills directory found.", "restored": [], "backfilled": [], "backed_up": []} + + targets = sorted(set(index.values()), key=lambda item: item[1]) if name in {"all", "*"} else [] + if not targets: + target = index.get(name) + if target is None: + return {"ok": False, "message": f"Official optional skill not found: {name}", "restored": [], "backfilled": [], "backed_up": []} + targets = [target] + + restored: List[str] = [] + backed_up: List[str] = [] + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") + backup_root = SKILLS_DIR / ".restore-backups" / f"official-optional-{timestamp}" + + for folder_name, install_path, src in targets: + dest = SKILLS_DIR / Path(*install_path.split("/")) + src_hash = _dir_hash(src) + canonical_ok = dest.exists() and _dir_hash(dest) == src_hash + + # Find already-active copies of this official skill by frontmatter name + # or folder slug, even if curator moved it into another category. + src_frontmatter = _read_skill_name(src / "SKILL.md", folder_name) + matches: List[Path] = [] + if SKILLS_DIR.exists(): + for skill_md in sorted(SKILLS_DIR.rglob("SKILL.md")): + if is_excluded_skill_path(skill_md): + continue + candidate = skill_md.parent + try: + candidate.relative_to(SKILLS_DIR) + except ValueError: + continue + candidate_name = _read_skill_name(skill_md, candidate.name) + if candidate == dest: + continue + if candidate.name == folder_name or candidate_name in {folder_name, src_frontmatter}: + matches.append(candidate) + + if restore: + for match in matches: + if match.exists(): + backed_up.append(_move_to_restore_backup(match, backup_root)) + if dest.exists() and not canonical_ok: + backed_up.append(_move_to_restore_backup(dest, backup_root)) + if not dest.exists(): + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copytree(src, dest) + restored.append(folder_name) + elif not canonical_ok: + continue + + backfilled = _backfill_optional_provenance(quiet=True) + return { + "ok": True, + "message": "Official optional skill repair complete.", + "restored": restored, + "backfilled": backfilled, + "backed_up": backed_up, + "backup_dir": str(backup_root) if backed_up else "", + } + + +def _backfill_optional_provenance(quiet: bool = False) -> List[str]: + """Mark already-present official optional skills as hub-installed. + + This covers the migration case where a skill used to be bundled (or was + manually copied into the active skills tree) and later lives under + optional-skills/. If the active copy is byte-identical to the official + optional source, record official hub provenance without copying or + reinstalling anything. Modified/local skills are left alone. + """ + optional_dir = _get_optional_dir() + if not optional_dir.exists(): + return [] + + lock_path = SKILLS_DIR / ".hub" / "lock.json" + try: + data = json.loads(lock_path.read_text()) if lock_path.exists() else {"version": 1, "installed": {}} + except (json.JSONDecodeError, OSError): + data = {"version": 1, "installed": {}} + installed = data.setdefault("installed", {}) + existing_paths = { + entry.get("install_path") + for entry in installed.values() + if isinstance(entry, dict) + } + + backfilled: List[str] = [] + changed = False + for skill_md in sorted(optional_dir.rglob("SKILL.md")): + if is_excluded_skill_path(skill_md): + continue + src = skill_md.parent + try: + install_path = _safe_rel_install_path(src, optional_dir) + except ValueError as e: + logger.debug("Skipping optional skill with unsafe path %s: %s", src, e) + continue + dest = SKILLS_DIR / Path(*install_path.split("/")) + if not dest.exists() or not dest.is_dir(): + continue + if _dir_hash(dest) != _dir_hash(src): + continue + + lock_name = src.name + if lock_name in installed or install_path in existing_paths: + continue + + timestamp = datetime.now(timezone.utc).isoformat() + installed[lock_name] = { + "source": "official", + "identifier": f"official/{install_path}", + "trust_level": "builtin", + "scan_verdict": "backfilled", + "content_hash": _content_hash(dest), + "install_path": install_path, + "files": _skill_file_list(dest), + "metadata": {"backfilled_from": "optional-skills"}, + "installed_at": timestamp, + "updated_at": timestamp, + } + existing_paths.add(install_path) + backfilled.append(lock_name) + changed = True + if not quiet: + print(f" = {lock_name} (official optional provenance backfilled)") + + if changed: + lock_path.parent.mkdir(parents=True, exist_ok=True) + lock_path.write_text(json.dumps(data, indent=2, ensure_ascii=False) + "\n") + return backfilled + + def sync_skills(quiet: bool = False) -> dict: """ Sync bundled skills into ~/.hermes/skills/ using the manifest. @@ -185,6 +407,7 @@ def sync_skills(quiet: bool = False) -> dict: return { "copied": [], "updated": [], "skipped": 0, "user_modified": [], "cleaned": [], "total_bundled": 0, + "optional_provenance_backfilled": [], } SKILLS_DIR.mkdir(parents=True, exist_ok=True) @@ -305,6 +528,7 @@ def sync_skills(quiet: bool = False) -> dict: logger.debug("Could not copy %s: %s", desc_md, e) _write_manifest(manifest) + optional_provenance_backfilled = _backfill_optional_provenance(quiet=quiet) return { "copied": copied, @@ -313,6 +537,7 @@ def sync_skills(quiet: bool = False) -> dict: "user_modified": user_modified, "cleaned": cleaned, "total_bundled": len(bundled_skills), + "optional_provenance_backfilled": optional_provenance_backfilled, } @@ -431,4 +656,6 @@ if __name__ == "__main__": parts.append(f"{len(names)} user-modified (kept): {shown}") if result["cleaned"]: parts.append(f"{len(result['cleaned'])} cleaned from manifest") + if result.get("optional_provenance_backfilled"): + parts.append(f"{len(result['optional_provenance_backfilled'])} official optional backfilled") print(f"\nDone: {', '.join(parts)}. {result['total_bundled']} total bundled.")