fix(skills): prune dependency/venv dirs from all skill scanners (#30042)

* fix(skills): skip dependency dirs in skill scan

* fix(skills): widen sibling rglob scanners to use shared exclusion set

Follow-up to PR #29968. The contributor's PR widened EXCLUDED_SKILL_DIRS
in the canonical walker (iter_skill_index_files), which fixes the
user-visible discovery path. This commit sweeps the ~12 other
rglob('SKILL.md') sites that did their own ad-hoc filtering — most only
checked .git/.hub, some had no filter at all — so dependency dirs
(.venv, node_modules, site-packages, etc.) cannot leak ghost skills
through the secondary paths.

Adds agent.skill_utils.is_excluded_skill_path(path) helper. Migrates
all 13 sites to use it. Removes 3 hardcoded duplicate filter sets.

Sites touched:
  agent/curator_backup.py        - skill backup file count
  gateway/run.py                 - disabled-skill response (2 sites)
  hermes_cli/dump.py             - skill count in env dump
  hermes_cli/profile_describer.py- profile description (2 sites)
  hermes_cli/profile_distribution.py - profile install count
  hermes_cli/profiles.py         - profile skill count
  hermes_cli/skills_hub.py       - category detection
  tools/skill_manager_tool.py    - skill name lookup (already used set, now uses helper)
  tools/skill_usage.py           - usage tracking + skill dir lookup (2 sites)
  tools/skills_hub.py            - optional skills find + scan (2 sites)
  tools/skills_sync.py           - bundled skills sync

E2E verified with the exact reported shape
(bring/scripts/.venv/.../typer/.agents/skills/typer/SKILL.md): no
sibling site picks up the ghost skill, all five legit-skill counts
still return 1.

* chore(infographic): retro-pop-grid bento for PR #30042 skill-scanner sweep

---------

Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com>
This commit is contained in:
Teknium 2026-05-21 14:18:02 -07:00 committed by GitHub
parent 3462b097e2
commit 3fde8c153d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 150 additions and 33 deletions

View file

@ -50,6 +50,7 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from hermes_constants import get_hermes_home
from agent.skill_utils import is_excluded_skill_path
logger = logging.getLogger(__name__)
@ -176,7 +177,9 @@ def get_keep() -> int:
def _count_skill_files(base: Path) -> int:
try:
return sum(1 for _ in base.rglob("SKILL.md"))
return sum(
1 for p in base.rglob("SKILL.md") if not is_excluded_skill_path(p)
)
except OSError:
return 0

View file

@ -24,7 +24,43 @@ PLATFORM_MAP = {
"windows": "win32",
}
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
EXCLUDED_SKILL_DIRS = frozenset(
(
".git",
".github",
".hub",
".archive",
".venv",
"venv",
"node_modules",
"site-packages",
"__pycache__",
".tox",
".nox",
".pytest_cache",
".mypy_cache",
".ruff_cache",
)
)
def is_excluded_skill_path(path) -> bool:
"""True if any component of *path* is in EXCLUDED_SKILL_DIRS.
Use this on every SKILL.md path produced by ``rglob`` to prune
dependency, virtualenv, VCS, and cache directories. Centralising the
check here keeps every skill-scanning site in sync with the shared
exclusion set.
Accepts a Path or string.
"""
try:
parts = path.parts # Path
except AttributeError:
from pathlib import PurePath
parts = PurePath(str(path)).parts
return any(part in EXCLUDED_SKILL_DIRS for part in parts)
# ── Lazy YAML loader ─────────────────────────────────────────────────────
@ -478,7 +514,8 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
def iter_skill_index_files(skills_dir: Path, filename: str):
"""Walk skills_dir yielding sorted paths matching *filename*.
Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
directories so dependencies cannot register nested skills.
"""
matches = []
for root, dirs, files in os.walk(skills_dir, followlinks=True):

View file

@ -1109,7 +1109,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
normalized = command_name.lower().replace("_", "-")
try:
from tools.skills_tool import _get_disabled_skill_names
from agent.skill_utils import get_all_skills_dirs
from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
disabled = _get_disabled_skill_names()
# Check disabled skills across all dirs (local + external)
@ -1117,7 +1117,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
if not skills_dir.exists():
continue
for skill_md in skills_dir.rglob("SKILL.md"):
if any(part in {'.git', '.github', '.hub', '.archive'} for part in skill_md.parts):
if is_excluded_skill_path(skill_md):
continue
slug, declared_name = _skill_slug_from_frontmatter(skill_md)
if not slug or not declared_name:
@ -1136,6 +1136,8 @@ def _check_unavailable_skill(command_name: str) -> str | None:
optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
if optional_dir.exists():
for skill_md in optional_dir.rglob("SKILL.md"):
if is_excluded_skill_path(skill_md):
continue
slug, _declared = _skill_slug_from_frontmatter(skill_md)
if not slug:
continue

View file

@ -16,6 +16,7 @@ from pathlib import Path
from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
from hermes_cli.env_loader import load_hermes_dotenv
from hermes_constants import display_hermes_home
from agent.skill_utils import is_excluded_skill_path
def _get_git_commit(project_root: Path) -> str:
@ -69,6 +70,8 @@ def _count_skills(hermes_home: Path) -> int:
return 0
count = 0
for item in skills_dir.rglob("SKILL.md"):
if is_excluded_skill_path(item):
continue
count += 1
return count

View file

@ -35,6 +35,7 @@ from pathlib import Path
from typing import Optional
from hermes_cli import profiles as profiles_mod
from agent.skill_utils import is_excluded_skill_path
logger = logging.getLogger(__name__)
@ -109,8 +110,7 @@ def _collect_skills(profile_dir: Path) -> list[str]:
return []
names: list[str] = []
for md in skills_dir.rglob("SKILL.md"):
path_str = str(md)
if "/.hub/" in path_str or "/.git/" in path_str:
if is_excluded_skill_path(md):
continue
try:
rel = md.relative_to(skills_dir)
@ -201,7 +201,7 @@ def describe_profile(
skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)"
skill_count = sum(
1 for _ in (profile_dir / "skills").rglob("SKILL.md")
if "/.hub/" not in str(_) and "/.git/" not in str(_)
if not is_excluded_skill_path(_)
) if (profile_dir / "skills").is_dir() else 0
# Read model + provider from the profile's config.

View file

@ -70,6 +70,8 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from agent.skill_utils import is_excluded_skill_path
# ---------------------------------------------------------------------------
# Constants
@ -463,7 +465,9 @@ def _count_skills(staged: Path) -> int:
skills_dir = staged / "skills"
if not skills_dir.is_dir():
return 0
return sum(1 for _ in skills_dir.rglob("SKILL.md"))
return sum(
1 for p in skills_dir.rglob("SKILL.md") if not is_excluded_skill_path(p)
)
def plan_install(

View file

@ -30,6 +30,8 @@ from dataclasses import dataclass
from pathlib import Path, PurePosixPath, PureWindowsPath
from typing import List, Optional
from agent.skill_utils import is_excluded_skill_path
_PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
# Directories bootstrapped inside every new profile
@ -485,8 +487,9 @@ def _count_skills(profile_dir: Path) -> int:
return 0
count = 0
for md in skills_dir.rglob("SKILL.md"):
if "/.hub/" not in str(md) and "/.git/" not in str(md):
count += 1
if is_excluded_skill_path(md):
continue
count += 1
return count

View file

@ -23,6 +23,7 @@ from rich.table import Table
# Lazy imports to avoid circular dependencies and slow startup.
# tools.skills_hub and tools.skills_guard are imported inside functions.
from hermes_constants import display_hermes_home
from agent.skill_utils import is_excluded_skill_path
_console = Console()
@ -178,9 +179,12 @@ def _existing_categories() -> List[str]:
# top level (no category); otherwise treat as a category bucket.
if (entry / "SKILL.md").exists():
continue
# Has at least one nested SKILL.md?
# Has at least one nested SKILL.md (excluding dependency/cache dirs)?
try:
if any(entry.rglob("SKILL.md")):
if any(
not is_excluded_skill_path(p)
for p in entry.rglob("SKILL.md")
):
out.append(entry.name)
except OSError:
continue

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 MiB

View file

@ -1,6 +1,6 @@
"""Tests for agent/skill_utils.py — extract_skill_conditions metadata handling."""
"""Tests for agent/skill_utils.py."""
from agent.skill_utils import extract_skill_conditions
from agent.skill_utils import extract_skill_conditions, iter_skill_index_files
def test_metadata_as_dict_with_hermes():
@ -56,3 +56,41 @@ def test_metadata_missing_entirely():
"fallback_for_tools": [],
"requires_tools": [],
}
def test_iter_skill_index_files_prunes_dependency_dirs(tmp_path):
real = tmp_path / "real-skill"
real.mkdir()
(real / "SKILL.md").write_text("---\nname: real-skill\n---\n", encoding="utf-8")
nested = (
tmp_path
/ "bring"
/ "scripts"
/ ".venv"
/ "lib"
/ "python3.13"
/ "site-packages"
/ "typer"
/ ".agents"
/ "skills"
/ "typer"
)
nested.mkdir(parents=True)
(nested / "SKILL.md").write_text("---\nname: typer\n---\n", encoding="utf-8")
node_module = (
tmp_path
/ "web-skill"
/ "node_modules"
/ "dep"
/ ".agents"
/ "skills"
/ "dep"
)
node_module.mkdir(parents=True)
(node_module / "SKILL.md").write_text("---\nname: dep\n---\n", encoding="utf-8")
found = list(iter_skill_index_files(tmp_path, "SKILL.md"))
assert found == [real / "SKILL.md"]

View file

@ -267,6 +267,32 @@ class TestFindAllSkills:
assert len(skills) == 1
assert skills[0]["name"] == "real-skill"
def test_skips_nested_virtualenv_dependency_skills(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_skill(tmp_path, "real-skill")
typer_skill = (
tmp_path
/ "bring"
/ "scripts"
/ ".venv"
/ "lib"
/ "python3.13"
/ "site-packages"
/ "typer"
/ ".agents"
/ "skills"
/ "typer"
)
typer_skill.mkdir(parents=True)
(typer_skill / "SKILL.md").write_text(
"---\nname: typer\ndescription: Should not be discovered.\n---\n",
encoding="utf-8",
)
skills = _find_all_skills()
assert [skill["name"] for skill in skills] == ["real-skill"]
def test_finds_skills_in_symlinked_category_dir(self, tmp_path):
external_root = tmp_path / "repo"
skills_root = tmp_path / "skills"

View file

@ -283,12 +283,12 @@ def _find_skill(name: str) -> Optional[Dict[str, Any]]:
external dirs configured via skills.external_dirs. Returns
{"path": Path} or None.
"""
from agent.skill_utils import EXCLUDED_SKILL_DIRS, get_all_skills_dirs
from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
for skills_dir in get_all_skills_dirs():
if not skills_dir.exists():
continue
for skill_md in skills_dir.rglob("SKILL.md"):
if any(part in EXCLUDED_SKILL_DIRS for part in skill_md.parts):
if is_excluded_skill_path(skill_md):
continue
if skill_md.parent.name == name:
return {"path": skill_md.parent}

View file

@ -34,6 +34,7 @@ from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
from hermes_constants import get_hermes_home
from agent.skill_utils import is_excluded_skill_path
logger = logging.getLogger(__name__)
@ -236,14 +237,13 @@ def list_agent_created_skill_names() -> List[str]:
names: List[str] = []
# Top-level SKILL.md files (flat layout) AND nested category/skill/SKILL.md
for skill_md in base.rglob("SKILL.md"):
# Skip anything under .archive or .hub
# Skip Hermes metadata, VCS, virtualenv/dependency, and cache dirs
if is_excluded_skill_path(skill_md):
continue
try:
rel = skill_md.relative_to(base)
except ValueError:
continue
parts = rel.parts
if parts and (parts[0].startswith(".") or parts[0] == "node_modules"):
continue
name = _read_skill_name(skill_md, fallback=skill_md.parent.name)
if name in off_limits:
continue
@ -577,11 +577,7 @@ def _find_skill_dir(skill_name: str) -> Optional[Path]:
if not base.exists():
return None
for skill_md in base.rglob("SKILL.md"):
try:
rel = skill_md.relative_to(base)
except ValueError:
continue
if rel.parts and rel.parts[0].startswith("."):
if is_excluded_skill_path(skill_md):
continue
if _read_skill_name(skill_md, fallback=skill_md.parent.name) == skill_name:
return skill_md.parent

View file

@ -26,6 +26,7 @@ from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path, PurePosixPath
from hermes_constants import get_hermes_home
from agent.skill_utils import is_excluded_skill_path
from typing import Any, Dict, List, Optional, Tuple, Union
from urllib.parse import urljoin, urlparse, urlunparse
@ -2639,6 +2640,8 @@ class OptionalSkillSource(SkillSource):
if not self._optional_dir.is_dir():
return None
for skill_md in self._optional_dir.rglob("SKILL.md"):
if is_excluded_skill_path(skill_md):
continue
if skill_md.parent.name == name:
return skill_md.parent
return None
@ -2650,10 +2653,9 @@ class OptionalSkillSource(SkillSource):
results: List[SkillMeta] = []
for skill_md in sorted(self._optional_dir.rglob("SKILL.md")):
parent = skill_md.parent
rel_parts = parent.relative_to(self._optional_dir).parts
if any(part.startswith(".") for part in rel_parts):
if is_excluded_skill_path(skill_md):
continue
parent = skill_md.parent
try:
content = skill_md.read_text(encoding="utf-8")

View file

@ -27,6 +27,7 @@ import os
import shutil
from pathlib import Path
from hermes_constants import get_bundled_skills_dir, get_hermes_home
from agent.skill_utils import is_excluded_skill_path
from typing import Dict, List, Tuple
from utils import atomic_replace
@ -139,8 +140,7 @@ def _discover_bundled_skills(bundled_dir: Path) -> List[Tuple[str, Path]]:
return skills
for skill_md in bundled_dir.rglob("SKILL.md"):
path_str = str(skill_md)
if "/.git/" in path_str or "/.github/" in path_str or "/.hub/" in path_str:
if is_excluded_skill_path(skill_md):
continue
skill_dir = skill_md.parent
skill_name = _read_skill_name(skill_md, skill_dir.name)

View file

@ -79,6 +79,7 @@ from typing import Dict, Any, List, Optional, Set, Tuple
from tools.registry import registry, tool_error
from hermes_cli.config import cfg_get
from utils import env_var_enabled
from agent.skill_utils import EXCLUDED_SKILL_DIRS as _EXCLUDED_SKILL_DIRS
logger = logging.getLogger(__name__)
@ -101,7 +102,6 @@ _PLATFORM_MAP = {
"windows": "win32",
}
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
_EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
_REMOTE_ENV_BACKENDS = frozenset(
{"docker", "singularity", "modal", "ssh", "daytona", "vercel_sandbox"}
)
@ -1565,4 +1565,3 @@ registry.register(
check_fn=check_skills_requirements,
emoji="📚",
)