From 9137b86a5286e4ea420e9fb894bce34fee546b3d Mon Sep 17 00:00:00 2001 From: Wolfram Ravenwolf Date: Tue, 16 Jun 2026 03:45:56 +0200 Subject: [PATCH] fix(skills): ignore support docs in skill discovery Support files under references/, templates/, assets/, and scripts/ are progressive-disclosure data loaded through skill_view(..., file_path=...). They should not be treated as standalone skills during discovery or collision checks. This prevents archived skill packages or support markdown files inside a real skill from shadowing active skills with the same name while still allowing top-level categories named scripts/templates/assets/references. Tests cover: - pruning nested SKILL.md files inside skill support directories - preserving support-named top-level categories - avoiding skill_view collisions from support markdown - keeping archived package SKILL.md files accessible only through file_path --- agent/skill_utils.py | 62 ++++++++++++++++++++---- tests/agent/test_skill_utils.py | 47 +++++++++++++++++++ tests/tools/test_skills_tool.py | 83 +++++++++++++++++++++++++++++++++ tools/skills_tool.py | 34 +++++++++++--- 4 files changed, 211 insertions(+), 15 deletions(-) diff --git a/agent/skill_utils.py b/agent/skill_utils.py index 6f68d3041b5..9f16534a450 100644 --- a/agent/skill_utils.py +++ b/agent/skill_utils.py @@ -43,14 +43,20 @@ EXCLUDED_SKILL_DIRS = frozenset( ) ) +# Supporting files live inside a skill package and are loaded explicitly via +# skill_view(skill, file_path=...). They are not standalone skills and must not +# be scanned for active SKILL.md/DESCRIPTION.md entries, even if a Curator or +# archive workflow preserves a complete old skill package under references/. +SKILL_SUPPORT_DIRS = frozenset(("references", "templates", "assets", "scripts")) + def is_excluded_skill_path(path) -> bool: - """True if any component of *path* is in EXCLUDED_SKILL_DIRS. + """True if *path* should be skipped by active skill scanners. - Use this on every SKILL.md path produced by ``rglob`` to prune - dependency, virtualenv, VCS, and cache directories. Centralising the - check here keeps every skill-scanning site in sync with the shared - exclusion set. + Use this on every ``SKILL.md`` path produced by direct ``rglob`` scans to + prune dependency, virtualenv, VCS, cache, and progressive-disclosure + support-package paths. Centralising the check here keeps every + skill-scanning site in sync with the shared exclusion set. Accepts a Path or string. """ @@ -59,7 +65,36 @@ def is_excluded_skill_path(path) -> bool: except AttributeError: from pathlib import PurePath parts = PurePath(str(path)).parts - return any(part in EXCLUDED_SKILL_DIRS for part in parts) + return any(part in EXCLUDED_SKILL_DIRS for part in parts) or is_skill_support_path( + path + ) + + +def is_skill_support_path(path) -> bool: + """True if *path* is under a support dir of an actual skill root. + + ``references/``, ``templates/``, ``assets/``, and ``scripts/`` are + progressive-disclosure support areas when they sit directly inside a skill + directory containing ``SKILL.md``. They are not active discovery roots for + standalone skills. A preserved package such as + ``some-skill/references/old-skill-package/SKILL.md`` is documentation data + unless the caller explicitly loads it via ``file_path``. + + Legitimate categories or skill names such as ``skills/scripts/foo`` remain + discoverable because their ``scripts`` component is not directly under a + directory that contains ``SKILL.md``. + """ + path_obj = path if isinstance(path, Path) else Path(str(path)) + parts = path_obj.parts + # Last component may be a file or candidate skill directory name. Only + # components before the leaf can be containing support directories. + for idx, part in enumerate(parts[:-1]): + if part not in SKILL_SUPPORT_DIRS or idx == 0: + continue + skill_root = Path(*parts[:idx]) + if (skill_root / "SKILL.md").exists(): + return True + return False # ── Lazy YAML loader ───────────────────────────────────────────────────── @@ -661,12 +696,21 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str: def iter_skill_index_files(skills_dir: Path, filename: str): """Walk skills_dir yielding sorted paths matching *filename*. - Excludes Hermes metadata, VCS, virtualenv/dependency, and cache - directories so dependencies cannot register nested skills. + Excludes Hermes metadata, VCS, virtualenv/dependency, cache, and skill + support directories. Support directories (references/templates/assets/ + scripts) can contain arbitrary markdown and even archived package + ``SKILL.md`` files, but they are progressive-disclosure data loaded through + ``skill_view(..., file_path=...)`` rather than active skill roots. """ matches = [] for root, dirs, files in os.walk(skills_dir, followlinks=True): - dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS] + has_skill_md = "SKILL.md" in files + dirs[:] = [ + d + for d in dirs + if d not in EXCLUDED_SKILL_DIRS + and not (has_skill_md and d in SKILL_SUPPORT_DIRS) + ] if filename in files: matches.append(Path(root) / filename) for path in sorted(matches, key=lambda p: str(p.relative_to(skills_dir))): diff --git a/tests/agent/test_skill_utils.py b/tests/agent/test_skill_utils.py index db380c75bb8..ef2c0bf7bca 100644 --- a/tests/agent/test_skill_utils.py +++ b/tests/agent/test_skill_utils.py @@ -6,6 +6,8 @@ from agent.skill_utils import ( extract_skill_conditions, get_disabled_skill_names, get_external_skills_dirs, + is_excluded_skill_path, + is_skill_support_path, iter_skill_index_files, resolve_skill_config_values, skill_matches_platform, @@ -166,6 +168,51 @@ def test_skill_config_raw_cache_invalidates_on_config_edit(tmp_path, monkeypatch os.utime(config_path, None) assert get_disabled_skill_names() == {"new-skill"} +def test_iter_skill_index_files_prunes_skill_support_dirs(tmp_path): + """Archived package SKILL.md files under support dirs are not active skills.""" + real = tmp_path / "umbrella" + real.mkdir() + (real / "SKILL.md").write_text("---\nname: umbrella\n---\n", encoding="utf-8") + + package = real / "references" / "old-skill-package" + package.mkdir(parents=True) + (package / "SKILL.md").write_text("---\nname: old-skill\n---\n", encoding="utf-8") + (package / "DESCRIPTION.md").write_text( + "---\ndescription: archived package\n---\n", encoding="utf-8" + ) + + script_package = real / "scripts" / "helper-skill" + script_package.mkdir(parents=True) + (script_package / "SKILL.md").write_text("---\nname: helper\n---\n", encoding="utf-8") + + found = list(iter_skill_index_files(tmp_path, "SKILL.md")) + desc_found = list(iter_skill_index_files(tmp_path, "DESCRIPTION.md")) + + assert found == [real / "SKILL.md"] + assert desc_found == [] + assert is_skill_support_path(package / "SKILL.md") is True + assert is_excluded_skill_path(package / "SKILL.md") is True + + +def test_iter_skill_index_files_keeps_support_named_categories(tmp_path): + """A category named scripts/templates/assets/references is still valid.""" + scripts_skill = tmp_path / "scripts" / "bash-helper" + scripts_skill.mkdir(parents=True) + (scripts_skill / "SKILL.md").write_text( + "---\nname: bash-helper\n---\n", encoding="utf-8" + ) + + templates_skill = tmp_path / "templates" / "deck-template" + templates_skill.mkdir(parents=True) + (templates_skill / "SKILL.md").write_text( + "---\nname: deck-template\n---\n", encoding="utf-8" + ) + + found = list(iter_skill_index_files(tmp_path, "SKILL.md")) + + assert found == [scripts_skill / "SKILL.md", templates_skill / "SKILL.md"] + assert is_skill_support_path(scripts_skill / "SKILL.md") is False + assert is_excluded_skill_path(scripts_skill / "SKILL.md") is False # ── skill_matches_platform on Termux ────────────────────────────────────── diff --git a/tests/tools/test_skills_tool.py b/tests/tools/test_skills_tool.py index a99f5a1f68d..a7445207c71 100644 --- a/tests/tools/test_skills_tool.py +++ b/tests/tools/test_skills_tool.py @@ -1225,6 +1225,89 @@ class TestSkillViewCollisionDetection: assert result["success"] is True assert "LOCAL VERSION" in result["content"] + def test_support_markdown_does_not_collide_with_real_skill(self, tmp_path): + """Supporting reference docs named .md are not skills. + + A real-world regression had creative/sketch/SKILL.md become + unloadable because another skill carried + references/styles/sketch.md. Support files are loaded via + skill_view(skill, file_path=...), not as bare skill names. + """ + local_dir = tmp_path / "local" + external_dir = tmp_path / "external" + local_dir.mkdir() + external_dir.mkdir() + + _make_skill(local_dir, "article-illustrator", category="creative") + support_file = ( + local_dir + / "creative" + / "article-illustrator" + / "references" + / "styles" + / "sketch.md" + ) + support_file.parent.mkdir(parents=True, exist_ok=True) + support_file.write_text("# Sketch style support doc\n") + _make_skill(local_dir, "sketch", category="creative", body="REAL SKETCH SKILL") + + p1, p2 = self._patch_dirs(local_dir, [external_dir]) + with p1, p2: + raw = skill_view("sketch") + + result = json.loads(raw) + assert result["success"] is True + assert result["path"] == "creative/sketch/SKILL.md" + assert "REAL SKETCH SKILL" in result["content"] + + def test_reference_package_skill_md_is_not_active_skill(self, tmp_path): + """Curator-preserved package SKILL.md files under references stay data. + + Umbrella consolidations may preserve an old skill as + references/old-skill-package/SKILL.md. That package must not appear in + skills_list/system prompts and must not resolve as skill_view("old-skill"). + The package can still be opened explicitly through the umbrella's + file_path progressive-disclosure channel. + """ + local_dir = tmp_path / "local" + external_dir = tmp_path / "external" + local_dir.mkdir() + external_dir.mkdir() + + _make_skill(local_dir, "umbrella", category="creative", body="UMBRELLA") + package = ( + local_dir + / "creative" + / "umbrella" + / "references" + / "old-skill-package" + ) + package.mkdir(parents=True, exist_ok=True) + (package / "SKILL.md").write_text( + "---\nname: old-skill\ndescription: Preserved old skill.\n---\n\nOLD BODY\n" + ) + + p1, p2 = self._patch_dirs(local_dir, [external_dir]) + with p1, p2: + names = {skill["name"] for skill in _find_all_skills()} + old_raw = skill_view("old-skill") + direct_package_raw = skill_view("creative/umbrella/references/old-skill-package") + package_raw = skill_view( + "umbrella", file_path="references/old-skill-package/SKILL.md" + ) + + assert "umbrella" in names + assert "old-skill" not in names + old_result = json.loads(old_raw) + assert old_result["success"] is False + assert "not found" in old_result["error"] + direct_package_result = json.loads(direct_package_raw) + assert direct_package_result["success"] is False + assert "not found" in direct_package_result["error"] + package_result = json.loads(package_raw) + assert package_result["success"] is True + assert "OLD BODY" in package_result["content"] + def test_external_skill_resolves_when_no_collision(self, tmp_path): """External-only skills still resolve normally when there's no local skill of the same name.""" diff --git a/tools/skills_tool.py b/tools/skills_tool.py index 2b5e40ac0cf..2ba57adc54d 100644 --- a/tools/skills_tool.py +++ b/tools/skills_tool.py @@ -79,7 +79,10 @@ from typing import Dict, Any, List, Optional, Set, Tuple from tools.registry import registry, tool_error from hermes_cli.config import cfg_get from utils import env_var_enabled -from agent.skill_utils import EXCLUDED_SKILL_DIRS as _EXCLUDED_SKILL_DIRS +from agent.skill_utils import ( + EXCLUDED_SKILL_DIRS as _EXCLUDED_SKILL_DIRS, + is_skill_support_path as _is_skill_support_path, +) logger = logging.getLogger(__name__) @@ -1020,9 +1023,15 @@ def skill_view( # Strategy 1: direct path (e.g., "mlops/axolotl" or bare "axolotl" # at the top of the dir). direct_path = search_dir / name - if direct_path.is_dir() and (direct_path / "SKILL.md").exists(): + if ( + not _is_skill_support_path(direct_path) + and direct_path.is_dir() + and (direct_path / "SKILL.md").exists() + ): _record(direct_path, direct_path / "SKILL.md") - elif direct_path.with_suffix(".md").exists(): + elif direct_path.with_suffix(".md").exists() and not _is_skill_support_path( + direct_path.with_suffix(".md") + ): _record(None, direct_path.with_suffix(".md")) # Strategy 1b: categorized form for plugin namespace fall-through @@ -1030,9 +1039,17 @@ def skill_view( # tries the on-disk path "myplugin/explore"). if local_category_name: categorized_path = search_dir / local_category_name - if categorized_path.is_dir() and (categorized_path / "SKILL.md").exists(): + if ( + not _is_skill_support_path(categorized_path) + and categorized_path.is_dir() + and (categorized_path / "SKILL.md").exists() + ): _record(categorized_path, categorized_path / "SKILL.md") - elif categorized_path.with_suffix(".md").exists(): + elif categorized_path.with_suffix( + ".md" + ).exists() and not _is_skill_support_path( + categorized_path.with_suffix(".md") + ): _record(None, categorized_path.with_suffix(".md")) # Strategy 2: recursive by directory name (catches nested skills @@ -1053,8 +1070,13 @@ def skill_view( _record(found_skill_md.parent, found_skill_md) # Strategy 3: legacy flat .md files anywhere under the dir. + # Exclude skill support docs: references/templates/assets/scripts + # are loaded through skill_view(skill, file_path=...) and must not + # shadow or collide with real skills that share the same basename. for found_md in search_dir.rglob(f"{name}.md"): - if found_md.name != "SKILL.md": + if found_md.name != "SKILL.md" and not _is_skill_support_path( + found_md + ): _record(None, found_md) if len(candidates) > 1: