diff --git a/agent/curator_backup.py b/agent/curator_backup.py index fe74920521c..5e39443bae0 100644 --- a/agent/curator_backup.py +++ b/agent/curator_backup.py @@ -50,6 +50,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Tuple from hermes_constants import get_hermes_home +from agent.skill_utils import is_excluded_skill_path logger = logging.getLogger(__name__) @@ -176,7 +177,9 @@ def get_keep() -> int: def _count_skill_files(base: Path) -> int: try: - return sum(1 for _ in base.rglob("SKILL.md")) + return sum( + 1 for p in base.rglob("SKILL.md") if not is_excluded_skill_path(p) + ) except OSError: return 0 diff --git a/agent/skill_utils.py b/agent/skill_utils.py index 28424d7ed62..959a109a6cb 100644 --- a/agent/skill_utils.py +++ b/agent/skill_utils.py @@ -24,7 +24,43 @@ PLATFORM_MAP = { "windows": "win32", } -EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive")) +EXCLUDED_SKILL_DIRS = frozenset( + ( + ".git", + ".github", + ".hub", + ".archive", + ".venv", + "venv", + "node_modules", + "site-packages", + "__pycache__", + ".tox", + ".nox", + ".pytest_cache", + ".mypy_cache", + ".ruff_cache", + ) +) + + +def is_excluded_skill_path(path) -> bool: + """True if any component of *path* is in EXCLUDED_SKILL_DIRS. + + Use this on every SKILL.md path produced by ``rglob`` to prune + dependency, virtualenv, VCS, and cache directories. Centralising the + check here keeps every skill-scanning site in sync with the shared + exclusion set. + + Accepts a Path or string. + """ + try: + parts = path.parts # Path + except AttributeError: + from pathlib import PurePath + parts = PurePath(str(path)).parts + return any(part in EXCLUDED_SKILL_DIRS for part in parts) + # ── Lazy YAML loader ───────────────────────────────────────────────────── @@ -478,7 +514,8 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str: def iter_skill_index_files(skills_dir: Path, filename: str): """Walk skills_dir yielding sorted paths matching *filename*. - Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories. + Excludes Hermes metadata, VCS, virtualenv/dependency, and cache + directories so dependencies cannot register nested skills. """ matches = [] for root, dirs, files in os.walk(skills_dir, followlinks=True): diff --git a/gateway/run.py b/gateway/run.py index cca9901cb42..0f56ad61c39 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1109,7 +1109,7 @@ def _check_unavailable_skill(command_name: str) -> str | None: normalized = command_name.lower().replace("_", "-") try: from tools.skills_tool import _get_disabled_skill_names - from agent.skill_utils import get_all_skills_dirs + from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path disabled = _get_disabled_skill_names() # Check disabled skills across all dirs (local + external) @@ -1117,7 +1117,7 @@ def _check_unavailable_skill(command_name: str) -> str | None: if not skills_dir.exists(): continue for skill_md in skills_dir.rglob("SKILL.md"): - if any(part in {'.git', '.github', '.hub', '.archive'} for part in skill_md.parts): + if is_excluded_skill_path(skill_md): continue slug, declared_name = _skill_slug_from_frontmatter(skill_md) if not slug or not declared_name: @@ -1136,6 +1136,8 @@ def _check_unavailable_skill(command_name: str) -> str | None: optional_dir = get_optional_skills_dir(repo_root / "optional-skills") if optional_dir.exists(): for skill_md in optional_dir.rglob("SKILL.md"): + if is_excluded_skill_path(skill_md): + continue slug, _declared = _skill_slug_from_frontmatter(skill_md) if not slug: continue diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py index 859f8f62468..c29ef19775c 100644 --- a/hermes_cli/dump.py +++ b/hermes_cli/dump.py @@ -16,6 +16,7 @@ from pathlib import Path from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config from hermes_cli.env_loader import load_hermes_dotenv from hermes_constants import display_hermes_home +from agent.skill_utils import is_excluded_skill_path def _get_git_commit(project_root: Path) -> str: @@ -69,6 +70,8 @@ def _count_skills(hermes_home: Path) -> int: return 0 count = 0 for item in skills_dir.rglob("SKILL.md"): + if is_excluded_skill_path(item): + continue count += 1 return count diff --git a/hermes_cli/profile_describer.py b/hermes_cli/profile_describer.py index 55d646d92cd..0da67e8a3d3 100644 --- a/hermes_cli/profile_describer.py +++ b/hermes_cli/profile_describer.py @@ -35,6 +35,7 @@ from pathlib import Path from typing import Optional from hermes_cli import profiles as profiles_mod +from agent.skill_utils import is_excluded_skill_path logger = logging.getLogger(__name__) @@ -109,8 +110,7 @@ def _collect_skills(profile_dir: Path) -> list[str]: return [] names: list[str] = [] for md in skills_dir.rglob("SKILL.md"): - path_str = str(md) - if "/.hub/" in path_str or "/.git/" in path_str: + if is_excluded_skill_path(md): continue try: rel = md.relative_to(skills_dir) @@ -201,7 +201,7 @@ def describe_profile( skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)" skill_count = sum( 1 for _ in (profile_dir / "skills").rglob("SKILL.md") - if "/.hub/" not in str(_) and "/.git/" not in str(_) + if not is_excluded_skill_path(_) ) if (profile_dir / "skills").is_dir() else 0 # Read model + provider from the profile's config. diff --git a/hermes_cli/profile_distribution.py b/hermes_cli/profile_distribution.py index 5e6be8c609e..45b0302f35c 100644 --- a/hermes_cli/profile_distribution.py +++ b/hermes_cli/profile_distribution.py @@ -70,6 +70,8 @@ from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Optional, Tuple +from agent.skill_utils import is_excluded_skill_path + # --------------------------------------------------------------------------- # Constants @@ -463,7 +465,9 @@ def _count_skills(staged: Path) -> int: skills_dir = staged / "skills" if not skills_dir.is_dir(): return 0 - return sum(1 for _ in skills_dir.rglob("SKILL.md")) + return sum( + 1 for p in skills_dir.rglob("SKILL.md") if not is_excluded_skill_path(p) + ) def plan_install( diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py index 9d596795fb1..aa33d9182b8 100644 --- a/hermes_cli/profiles.py +++ b/hermes_cli/profiles.py @@ -30,6 +30,8 @@ from dataclasses import dataclass from pathlib import Path, PurePosixPath, PureWindowsPath from typing import List, Optional +from agent.skill_utils import is_excluded_skill_path + _PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$") # Directories bootstrapped inside every new profile @@ -485,8 +487,9 @@ def _count_skills(profile_dir: Path) -> int: return 0 count = 0 for md in skills_dir.rglob("SKILL.md"): - if "/.hub/" not in str(md) and "/.git/" not in str(md): - count += 1 + if is_excluded_skill_path(md): + continue + count += 1 return count diff --git a/hermes_cli/skills_hub.py b/hermes_cli/skills_hub.py index 256624e53c9..b0540705165 100644 --- a/hermes_cli/skills_hub.py +++ b/hermes_cli/skills_hub.py @@ -23,6 +23,7 @@ from rich.table import Table # Lazy imports to avoid circular dependencies and slow startup. # tools.skills_hub and tools.skills_guard are imported inside functions. from hermes_constants import display_hermes_home +from agent.skill_utils import is_excluded_skill_path _console = Console() @@ -178,9 +179,12 @@ def _existing_categories() -> List[str]: # top level (no category); otherwise treat as a category bucket. if (entry / "SKILL.md").exists(): continue - # Has at least one nested SKILL.md? + # Has at least one nested SKILL.md (excluding dependency/cache dirs)? try: - if any(entry.rglob("SKILL.md")): + if any( + not is_excluded_skill_path(p) + for p in entry.rglob("SKILL.md") + ): out.append(entry.name) except OSError: continue diff --git a/infographic/skill-scanner-no-ghost-skills/infographic.png b/infographic/skill-scanner-no-ghost-skills/infographic.png new file mode 100644 index 00000000000..72e207a5fab Binary files /dev/null and b/infographic/skill-scanner-no-ghost-skills/infographic.png differ diff --git a/tests/agent/test_skill_utils.py b/tests/agent/test_skill_utils.py index 206cc5f4b11..ae22dc569be 100644 --- a/tests/agent/test_skill_utils.py +++ b/tests/agent/test_skill_utils.py @@ -1,6 +1,6 @@ -"""Tests for agent/skill_utils.py — extract_skill_conditions metadata handling.""" +"""Tests for agent/skill_utils.py.""" -from agent.skill_utils import extract_skill_conditions +from agent.skill_utils import extract_skill_conditions, iter_skill_index_files def test_metadata_as_dict_with_hermes(): @@ -56,3 +56,41 @@ def test_metadata_missing_entirely(): "fallback_for_tools": [], "requires_tools": [], } + + +def test_iter_skill_index_files_prunes_dependency_dirs(tmp_path): + real = tmp_path / "real-skill" + real.mkdir() + (real / "SKILL.md").write_text("---\nname: real-skill\n---\n", encoding="utf-8") + + nested = ( + tmp_path + / "bring" + / "scripts" + / ".venv" + / "lib" + / "python3.13" + / "site-packages" + / "typer" + / ".agents" + / "skills" + / "typer" + ) + nested.mkdir(parents=True) + (nested / "SKILL.md").write_text("---\nname: typer\n---\n", encoding="utf-8") + + node_module = ( + tmp_path + / "web-skill" + / "node_modules" + / "dep" + / ".agents" + / "skills" + / "dep" + ) + node_module.mkdir(parents=True) + (node_module / "SKILL.md").write_text("---\nname: dep\n---\n", encoding="utf-8") + + found = list(iter_skill_index_files(tmp_path, "SKILL.md")) + + assert found == [real / "SKILL.md"] diff --git a/tests/tools/test_skills_tool.py b/tests/tools/test_skills_tool.py index 9502467546e..03e9c206eb8 100644 --- a/tests/tools/test_skills_tool.py +++ b/tests/tools/test_skills_tool.py @@ -267,6 +267,32 @@ class TestFindAllSkills: assert len(skills) == 1 assert skills[0]["name"] == "real-skill" + def test_skips_nested_virtualenv_dependency_skills(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_skill(tmp_path, "real-skill") + typer_skill = ( + tmp_path + / "bring" + / "scripts" + / ".venv" + / "lib" + / "python3.13" + / "site-packages" + / "typer" + / ".agents" + / "skills" + / "typer" + ) + typer_skill.mkdir(parents=True) + (typer_skill / "SKILL.md").write_text( + "---\nname: typer\ndescription: Should not be discovered.\n---\n", + encoding="utf-8", + ) + + skills = _find_all_skills() + + assert [skill["name"] for skill in skills] == ["real-skill"] + def test_finds_skills_in_symlinked_category_dir(self, tmp_path): external_root = tmp_path / "repo" skills_root = tmp_path / "skills" diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index caa30f321c6..547167a6623 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -283,12 +283,12 @@ def _find_skill(name: str) -> Optional[Dict[str, Any]]: external dirs configured via skills.external_dirs. Returns {"path": Path} or None. """ - from agent.skill_utils import EXCLUDED_SKILL_DIRS, get_all_skills_dirs + from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path for skills_dir in get_all_skills_dirs(): if not skills_dir.exists(): continue for skill_md in skills_dir.rglob("SKILL.md"): - if any(part in EXCLUDED_SKILL_DIRS for part in skill_md.parts): + if is_excluded_skill_path(skill_md): continue if skill_md.parent.name == name: return {"path": skill_md.parent} diff --git a/tools/skill_usage.py b/tools/skill_usage.py index 6bffb86d1d6..52a6d74dbac 100644 --- a/tools/skill_usage.py +++ b/tools/skill_usage.py @@ -34,6 +34,7 @@ from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Set, Tuple from hermes_constants import get_hermes_home +from agent.skill_utils import is_excluded_skill_path logger = logging.getLogger(__name__) @@ -236,14 +237,13 @@ def list_agent_created_skill_names() -> List[str]: names: List[str] = [] # Top-level SKILL.md files (flat layout) AND nested category/skill/SKILL.md for skill_md in base.rglob("SKILL.md"): - # Skip anything under .archive or .hub + # Skip Hermes metadata, VCS, virtualenv/dependency, and cache dirs + if is_excluded_skill_path(skill_md): + continue try: rel = skill_md.relative_to(base) except ValueError: continue - parts = rel.parts - if parts and (parts[0].startswith(".") or parts[0] == "node_modules"): - continue name = _read_skill_name(skill_md, fallback=skill_md.parent.name) if name in off_limits: continue @@ -577,11 +577,7 @@ def _find_skill_dir(skill_name: str) -> Optional[Path]: if not base.exists(): return None for skill_md in base.rglob("SKILL.md"): - try: - rel = skill_md.relative_to(base) - except ValueError: - continue - if rel.parts and rel.parts[0].startswith("."): + if is_excluded_skill_path(skill_md): continue if _read_skill_name(skill_md, fallback=skill_md.parent.name) == skill_name: return skill_md.parent diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 12372e34ce6..79be8dc34fc 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -26,6 +26,7 @@ from dataclasses import dataclass, field from datetime import datetime, timezone from pathlib import Path, PurePosixPath from hermes_constants import get_hermes_home +from agent.skill_utils import is_excluded_skill_path from typing import Any, Dict, List, Optional, Tuple, Union from urllib.parse import urljoin, urlparse, urlunparse @@ -2639,6 +2640,8 @@ class OptionalSkillSource(SkillSource): if not self._optional_dir.is_dir(): return None for skill_md in self._optional_dir.rglob("SKILL.md"): + if is_excluded_skill_path(skill_md): + continue if skill_md.parent.name == name: return skill_md.parent return None @@ -2650,10 +2653,9 @@ class OptionalSkillSource(SkillSource): results: List[SkillMeta] = [] for skill_md in sorted(self._optional_dir.rglob("SKILL.md")): - parent = skill_md.parent - rel_parts = parent.relative_to(self._optional_dir).parts - if any(part.startswith(".") for part in rel_parts): + if is_excluded_skill_path(skill_md): continue + parent = skill_md.parent try: content = skill_md.read_text(encoding="utf-8") diff --git a/tools/skills_sync.py b/tools/skills_sync.py index 24374d51791..fb95898f84d 100644 --- a/tools/skills_sync.py +++ b/tools/skills_sync.py @@ -27,6 +27,7 @@ import os import shutil from pathlib import Path from hermes_constants import get_bundled_skills_dir, get_hermes_home +from agent.skill_utils import is_excluded_skill_path from typing import Dict, List, Tuple from utils import atomic_replace @@ -139,8 +140,7 @@ def _discover_bundled_skills(bundled_dir: Path) -> List[Tuple[str, Path]]: return skills for skill_md in bundled_dir.rglob("SKILL.md"): - path_str = str(skill_md) - if "/.git/" in path_str or "/.github/" in path_str or "/.hub/" in path_str: + if is_excluded_skill_path(skill_md): continue skill_dir = skill_md.parent skill_name = _read_skill_name(skill_md, skill_dir.name) diff --git a/tools/skills_tool.py b/tools/skills_tool.py index df6361ba59a..0cd61cc751f 100644 --- a/tools/skills_tool.py +++ b/tools/skills_tool.py @@ -79,6 +79,7 @@ from typing import Dict, Any, List, Optional, Set, Tuple from tools.registry import registry, tool_error from hermes_cli.config import cfg_get from utils import env_var_enabled +from agent.skill_utils import EXCLUDED_SKILL_DIRS as _EXCLUDED_SKILL_DIRS logger = logging.getLogger(__name__) @@ -101,7 +102,6 @@ _PLATFORM_MAP = { "windows": "win32", } _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") -_EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive")) _REMOTE_ENV_BACKENDS = frozenset( {"docker", "singularity", "modal", "ssh", "daytona", "vercel_sandbox"} ) @@ -1565,4 +1565,3 @@ registry.register( check_fn=check_skills_requirements, emoji="📚", ) -