fix(skills): ignore support docs in skill discovery

Support files under references/, templates/, assets/, and scripts/ are progressive-disclosure data loaded through skill_view(..., file_path=...). They should not be treated as standalone skills during discovery or collision checks.

This prevents archived skill packages or support markdown files inside a real skill from shadowing active skills with the same name while still allowing top-level categories named scripts/templates/assets/references.

Tests cover:
- pruning nested SKILL.md files inside skill support directories
- preserving support-named top-level categories
- avoiding skill_view collisions from support markdown
- keeping archived package SKILL.md files accessible only through file_path
This commit is contained in:
Wolfram Ravenwolf 2026-06-16 03:45:56 +02:00 committed by Teknium
parent 7493de7fc3
commit 9137b86a52
4 changed files with 211 additions and 15 deletions

View file

@ -43,14 +43,20 @@ EXCLUDED_SKILL_DIRS = frozenset(
)
)
# Supporting files live inside a skill package and are loaded explicitly via
# skill_view(skill, file_path=...). They are not standalone skills and must not
# be scanned for active SKILL.md/DESCRIPTION.md entries, even if a Curator or
# archive workflow preserves a complete old skill package under references/.
SKILL_SUPPORT_DIRS = frozenset(("references", "templates", "assets", "scripts"))
def is_excluded_skill_path(path) -> bool:
"""True if any component of *path* is in EXCLUDED_SKILL_DIRS.
"""True if *path* should be skipped by active skill scanners.
Use this on every SKILL.md path produced by ``rglob`` to prune
dependency, virtualenv, VCS, and cache directories. Centralising the
check here keeps every skill-scanning site in sync with the shared
exclusion set.
Use this on every ``SKILL.md`` path produced by direct ``rglob`` scans to
prune dependency, virtualenv, VCS, cache, and progressive-disclosure
support-package paths. Centralising the check here keeps every
skill-scanning site in sync with the shared exclusion set.
Accepts a Path or string.
"""
@ -59,7 +65,36 @@ def is_excluded_skill_path(path) -> bool:
except AttributeError:
from pathlib import PurePath
parts = PurePath(str(path)).parts
return any(part in EXCLUDED_SKILL_DIRS for part in parts)
return any(part in EXCLUDED_SKILL_DIRS for part in parts) or is_skill_support_path(
path
)
def is_skill_support_path(path) -> bool:
"""True if *path* is under a support dir of an actual skill root.
``references/``, ``templates/``, ``assets/``, and ``scripts/`` are
progressive-disclosure support areas when they sit directly inside a skill
directory containing ``SKILL.md``. They are not active discovery roots for
standalone skills. A preserved package such as
``some-skill/references/old-skill-package/SKILL.md`` is documentation data
unless the caller explicitly loads it via ``file_path``.
Legitimate categories or skill names such as ``skills/scripts/foo`` remain
discoverable because their ``scripts`` component is not directly under a
directory that contains ``SKILL.md``.
"""
path_obj = path if isinstance(path, Path) else Path(str(path))
parts = path_obj.parts
# Last component may be a file or candidate skill directory name. Only
# components before the leaf can be containing support directories.
for idx, part in enumerate(parts[:-1]):
if part not in SKILL_SUPPORT_DIRS or idx == 0:
continue
skill_root = Path(*parts[:idx])
if (skill_root / "SKILL.md").exists():
return True
return False
# ── Lazy YAML loader ─────────────────────────────────────────────────────
@ -661,12 +696,21 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
def iter_skill_index_files(skills_dir: Path, filename: str):
"""Walk skills_dir yielding sorted paths matching *filename*.
Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
directories so dependencies cannot register nested skills.
Excludes Hermes metadata, VCS, virtualenv/dependency, cache, and skill
support directories. Support directories (references/templates/assets/
scripts) can contain arbitrary markdown and even archived package
``SKILL.md`` files, but they are progressive-disclosure data loaded through
``skill_view(..., file_path=...)`` rather than active skill roots.
"""
matches = []
for root, dirs, files in os.walk(skills_dir, followlinks=True):
dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
has_skill_md = "SKILL.md" in files
dirs[:] = [
d
for d in dirs
if d not in EXCLUDED_SKILL_DIRS
and not (has_skill_md and d in SKILL_SUPPORT_DIRS)
]
if filename in files:
matches.append(Path(root) / filename)
for path in sorted(matches, key=lambda p: str(p.relative_to(skills_dir))):

View file

@ -6,6 +6,8 @@ from agent.skill_utils import (
extract_skill_conditions,
get_disabled_skill_names,
get_external_skills_dirs,
is_excluded_skill_path,
is_skill_support_path,
iter_skill_index_files,
resolve_skill_config_values,
skill_matches_platform,
@ -166,6 +168,51 @@ def test_skill_config_raw_cache_invalidates_on_config_edit(tmp_path, monkeypatch
os.utime(config_path, None)
assert get_disabled_skill_names() == {"new-skill"}
def test_iter_skill_index_files_prunes_skill_support_dirs(tmp_path):
"""Archived package SKILL.md files under support dirs are not active skills."""
real = tmp_path / "umbrella"
real.mkdir()
(real / "SKILL.md").write_text("---\nname: umbrella\n---\n", encoding="utf-8")
package = real / "references" / "old-skill-package"
package.mkdir(parents=True)
(package / "SKILL.md").write_text("---\nname: old-skill\n---\n", encoding="utf-8")
(package / "DESCRIPTION.md").write_text(
"---\ndescription: archived package\n---\n", encoding="utf-8"
)
script_package = real / "scripts" / "helper-skill"
script_package.mkdir(parents=True)
(script_package / "SKILL.md").write_text("---\nname: helper\n---\n", encoding="utf-8")
found = list(iter_skill_index_files(tmp_path, "SKILL.md"))
desc_found = list(iter_skill_index_files(tmp_path, "DESCRIPTION.md"))
assert found == [real / "SKILL.md"]
assert desc_found == []
assert is_skill_support_path(package / "SKILL.md") is True
assert is_excluded_skill_path(package / "SKILL.md") is True
def test_iter_skill_index_files_keeps_support_named_categories(tmp_path):
"""A category named scripts/templates/assets/references is still valid."""
scripts_skill = tmp_path / "scripts" / "bash-helper"
scripts_skill.mkdir(parents=True)
(scripts_skill / "SKILL.md").write_text(
"---\nname: bash-helper\n---\n", encoding="utf-8"
)
templates_skill = tmp_path / "templates" / "deck-template"
templates_skill.mkdir(parents=True)
(templates_skill / "SKILL.md").write_text(
"---\nname: deck-template\n---\n", encoding="utf-8"
)
found = list(iter_skill_index_files(tmp_path, "SKILL.md"))
assert found == [scripts_skill / "SKILL.md", templates_skill / "SKILL.md"]
assert is_skill_support_path(scripts_skill / "SKILL.md") is False
assert is_excluded_skill_path(scripts_skill / "SKILL.md") is False
# ── skill_matches_platform on Termux ──────────────────────────────────────

View file

@ -1225,6 +1225,89 @@ class TestSkillViewCollisionDetection:
assert result["success"] is True
assert "LOCAL VERSION" in result["content"]
def test_support_markdown_does_not_collide_with_real_skill(self, tmp_path):
"""Supporting reference docs named <skill>.md are not skills.
A real-world regression had creative/sketch/SKILL.md become
unloadable because another skill carried
references/styles/sketch.md. Support files are loaded via
skill_view(skill, file_path=...), not as bare skill names.
"""
local_dir = tmp_path / "local"
external_dir = tmp_path / "external"
local_dir.mkdir()
external_dir.mkdir()
_make_skill(local_dir, "article-illustrator", category="creative")
support_file = (
local_dir
/ "creative"
/ "article-illustrator"
/ "references"
/ "styles"
/ "sketch.md"
)
support_file.parent.mkdir(parents=True, exist_ok=True)
support_file.write_text("# Sketch style support doc\n")
_make_skill(local_dir, "sketch", category="creative", body="REAL SKETCH SKILL")
p1, p2 = self._patch_dirs(local_dir, [external_dir])
with p1, p2:
raw = skill_view("sketch")
result = json.loads(raw)
assert result["success"] is True
assert result["path"] == "creative/sketch/SKILL.md"
assert "REAL SKETCH SKILL" in result["content"]
def test_reference_package_skill_md_is_not_active_skill(self, tmp_path):
"""Curator-preserved package SKILL.md files under references stay data.
Umbrella consolidations may preserve an old skill as
references/old-skill-package/SKILL.md. That package must not appear in
skills_list/system prompts and must not resolve as skill_view("old-skill").
The package can still be opened explicitly through the umbrella's
file_path progressive-disclosure channel.
"""
local_dir = tmp_path / "local"
external_dir = tmp_path / "external"
local_dir.mkdir()
external_dir.mkdir()
_make_skill(local_dir, "umbrella", category="creative", body="UMBRELLA")
package = (
local_dir
/ "creative"
/ "umbrella"
/ "references"
/ "old-skill-package"
)
package.mkdir(parents=True, exist_ok=True)
(package / "SKILL.md").write_text(
"---\nname: old-skill\ndescription: Preserved old skill.\n---\n\nOLD BODY\n"
)
p1, p2 = self._patch_dirs(local_dir, [external_dir])
with p1, p2:
names = {skill["name"] for skill in _find_all_skills()}
old_raw = skill_view("old-skill")
direct_package_raw = skill_view("creative/umbrella/references/old-skill-package")
package_raw = skill_view(
"umbrella", file_path="references/old-skill-package/SKILL.md"
)
assert "umbrella" in names
assert "old-skill" not in names
old_result = json.loads(old_raw)
assert old_result["success"] is False
assert "not found" in old_result["error"]
direct_package_result = json.loads(direct_package_raw)
assert direct_package_result["success"] is False
assert "not found" in direct_package_result["error"]
package_result = json.loads(package_raw)
assert package_result["success"] is True
assert "OLD BODY" in package_result["content"]
def test_external_skill_resolves_when_no_collision(self, tmp_path):
"""External-only skills still resolve normally when there's no
local skill of the same name."""

View file

@ -79,7 +79,10 @@ from typing import Dict, Any, List, Optional, Set, Tuple
from tools.registry import registry, tool_error
from hermes_cli.config import cfg_get
from utils import env_var_enabled
from agent.skill_utils import EXCLUDED_SKILL_DIRS as _EXCLUDED_SKILL_DIRS
from agent.skill_utils import (
EXCLUDED_SKILL_DIRS as _EXCLUDED_SKILL_DIRS,
is_skill_support_path as _is_skill_support_path,
)
logger = logging.getLogger(__name__)
@ -1020,9 +1023,15 @@ def skill_view(
# Strategy 1: direct path (e.g., "mlops/axolotl" or bare "axolotl"
# at the top of the dir).
direct_path = search_dir / name
if direct_path.is_dir() and (direct_path / "SKILL.md").exists():
if (
not _is_skill_support_path(direct_path)
and direct_path.is_dir()
and (direct_path / "SKILL.md").exists()
):
_record(direct_path, direct_path / "SKILL.md")
elif direct_path.with_suffix(".md").exists():
elif direct_path.with_suffix(".md").exists() and not _is_skill_support_path(
direct_path.with_suffix(".md")
):
_record(None, direct_path.with_suffix(".md"))
# Strategy 1b: categorized form for plugin namespace fall-through
@ -1030,9 +1039,17 @@ def skill_view(
# tries the on-disk path "myplugin/explore").
if local_category_name:
categorized_path = search_dir / local_category_name
if categorized_path.is_dir() and (categorized_path / "SKILL.md").exists():
if (
not _is_skill_support_path(categorized_path)
and categorized_path.is_dir()
and (categorized_path / "SKILL.md").exists()
):
_record(categorized_path, categorized_path / "SKILL.md")
elif categorized_path.with_suffix(".md").exists():
elif categorized_path.with_suffix(
".md"
).exists() and not _is_skill_support_path(
categorized_path.with_suffix(".md")
):
_record(None, categorized_path.with_suffix(".md"))
# Strategy 2: recursive by directory name (catches nested skills
@ -1053,8 +1070,13 @@ def skill_view(
_record(found_skill_md.parent, found_skill_md)
# Strategy 3: legacy flat <name>.md files anywhere under the dir.
# Exclude skill support docs: references/templates/assets/scripts
# are loaded through skill_view(skill, file_path=...) and must not
# shadow or collide with real skills that share the same basename.
for found_md in search_dir.rglob(f"{name}.md"):
if found_md.name != "SKILL.md":
if found_md.name != "SKILL.md" and not _is_skill_support_path(
found_md
):
_record(None, found_md)
if len(candidates) > 1: