mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-05 07:41:39 +00:00
fix(skills): prune dependency/venv dirs from all skill scanners (#30042)
* fix(skills): skip dependency dirs in skill scan * fix(skills): widen sibling rglob scanners to use shared exclusion set Follow-up to PR #29968. The contributor's PR widened EXCLUDED_SKILL_DIRS in the canonical walker (iter_skill_index_files), which fixes the user-visible discovery path. This commit sweeps the ~12 other rglob('SKILL.md') sites that did their own ad-hoc filtering — most only checked .git/.hub, some had no filter at all — so dependency dirs (.venv, node_modules, site-packages, etc.) cannot leak ghost skills through the secondary paths. Adds agent.skill_utils.is_excluded_skill_path(path) helper. Migrates all 13 sites to use it. Removes 3 hardcoded duplicate filter sets. Sites touched: agent/curator_backup.py - skill backup file count gateway/run.py - disabled-skill response (2 sites) hermes_cli/dump.py - skill count in env dump hermes_cli/profile_describer.py- profile description (2 sites) hermes_cli/profile_distribution.py - profile install count hermes_cli/profiles.py - profile skill count hermes_cli/skills_hub.py - category detection tools/skill_manager_tool.py - skill name lookup (already used set, now uses helper) tools/skill_usage.py - usage tracking + skill dir lookup (2 sites) tools/skills_hub.py - optional skills find + scan (2 sites) tools/skills_sync.py - bundled skills sync E2E verified with the exact reported shape (bring/scripts/.venv/.../typer/.agents/skills/typer/SKILL.md): no sibling site picks up the ghost skill, all five legit-skill counts still return 1. * chore(infographic): retro-pop-grid bento for PR #30042 skill-scanner sweep --------- Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com>
This commit is contained in:
parent
3462b097e2
commit
3fde8c153d
16 changed files with 150 additions and 33 deletions
|
|
@ -50,6 +50,7 @@ from pathlib import Path
|
|||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -176,7 +177,9 @@ def get_keep() -> int:
|
|||
|
||||
def _count_skill_files(base: Path) -> int:
|
||||
try:
|
||||
return sum(1 for _ in base.rglob("SKILL.md"))
|
||||
return sum(
|
||||
1 for p in base.rglob("SKILL.md") if not is_excluded_skill_path(p)
|
||||
)
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,43 @@ PLATFORM_MAP = {
|
|||
"windows": "win32",
|
||||
}
|
||||
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
|
||||
EXCLUDED_SKILL_DIRS = frozenset(
|
||||
(
|
||||
".git",
|
||||
".github",
|
||||
".hub",
|
||||
".archive",
|
||||
".venv",
|
||||
"venv",
|
||||
"node_modules",
|
||||
"site-packages",
|
||||
"__pycache__",
|
||||
".tox",
|
||||
".nox",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
".ruff_cache",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def is_excluded_skill_path(path) -> bool:
|
||||
"""True if any component of *path* is in EXCLUDED_SKILL_DIRS.
|
||||
|
||||
Use this on every SKILL.md path produced by ``rglob`` to prune
|
||||
dependency, virtualenv, VCS, and cache directories. Centralising the
|
||||
check here keeps every skill-scanning site in sync with the shared
|
||||
exclusion set.
|
||||
|
||||
Accepts a Path or string.
|
||||
"""
|
||||
try:
|
||||
parts = path.parts # Path
|
||||
except AttributeError:
|
||||
from pathlib import PurePath
|
||||
parts = PurePath(str(path)).parts
|
||||
return any(part in EXCLUDED_SKILL_DIRS for part in parts)
|
||||
|
||||
|
||||
# ── Lazy YAML loader ─────────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -478,7 +514,8 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
|
|||
def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
"""Walk skills_dir yielding sorted paths matching *filename*.
|
||||
|
||||
Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
|
||||
Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
|
||||
directories so dependencies cannot register nested skills.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
|
|
|
|||
|
|
@ -1109,7 +1109,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
|||
normalized = command_name.lower().replace("_", "-")
|
||||
try:
|
||||
from tools.skills_tool import _get_disabled_skill_names
|
||||
from agent.skill_utils import get_all_skills_dirs
|
||||
from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
|
||||
disabled = _get_disabled_skill_names()
|
||||
|
||||
# Check disabled skills across all dirs (local + external)
|
||||
|
|
@ -1117,7 +1117,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
|||
if not skills_dir.exists():
|
||||
continue
|
||||
for skill_md in skills_dir.rglob("SKILL.md"):
|
||||
if any(part in {'.git', '.github', '.hub', '.archive'} for part in skill_md.parts):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
slug, declared_name = _skill_slug_from_frontmatter(skill_md)
|
||||
if not slug or not declared_name:
|
||||
|
|
@ -1136,6 +1136,8 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
|||
optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
|
||||
if optional_dir.exists():
|
||||
for skill_md in optional_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
slug, _declared = _skill_slug_from_frontmatter(skill_md)
|
||||
if not slug:
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from pathlib import Path
|
|||
from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
|
||||
def _get_git_commit(project_root: Path) -> str:
|
||||
|
|
@ -69,6 +70,8 @@ def _count_skills(hermes_home: Path) -> int:
|
|||
return 0
|
||||
count = 0
|
||||
for item in skills_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(item):
|
||||
continue
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ from pathlib import Path
|
|||
from typing import Optional
|
||||
|
||||
from hermes_cli import profiles as profiles_mod
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -109,8 +110,7 @@ def _collect_skills(profile_dir: Path) -> list[str]:
|
|||
return []
|
||||
names: list[str] = []
|
||||
for md in skills_dir.rglob("SKILL.md"):
|
||||
path_str = str(md)
|
||||
if "/.hub/" in path_str or "/.git/" in path_str:
|
||||
if is_excluded_skill_path(md):
|
||||
continue
|
||||
try:
|
||||
rel = md.relative_to(skills_dir)
|
||||
|
|
@ -201,7 +201,7 @@ def describe_profile(
|
|||
skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)"
|
||||
skill_count = sum(
|
||||
1 for _ in (profile_dir / "skills").rglob("SKILL.md")
|
||||
if "/.hub/" not in str(_) and "/.git/" not in str(_)
|
||||
if not is_excluded_skill_path(_)
|
||||
) if (profile_dir / "skills").is_dir() else 0
|
||||
|
||||
# Read model + provider from the profile's config.
|
||||
|
|
|
|||
|
|
@ -70,6 +70,8 @@ from datetime import datetime, timezone
|
|||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
|
|
@ -463,7 +465,9 @@ def _count_skills(staged: Path) -> int:
|
|||
skills_dir = staged / "skills"
|
||||
if not skills_dir.is_dir():
|
||||
return 0
|
||||
return sum(1 for _ in skills_dir.rglob("SKILL.md"))
|
||||
return sum(
|
||||
1 for p in skills_dir.rglob("SKILL.md") if not is_excluded_skill_path(p)
|
||||
)
|
||||
|
||||
|
||||
def plan_install(
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ from dataclasses import dataclass
|
|||
from pathlib import Path, PurePosixPath, PureWindowsPath
|
||||
from typing import List, Optional
|
||||
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
_PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
|
||||
|
||||
# Directories bootstrapped inside every new profile
|
||||
|
|
@ -485,8 +487,9 @@ def _count_skills(profile_dir: Path) -> int:
|
|||
return 0
|
||||
count = 0
|
||||
for md in skills_dir.rglob("SKILL.md"):
|
||||
if "/.hub/" not in str(md) and "/.git/" not in str(md):
|
||||
count += 1
|
||||
if is_excluded_skill_path(md):
|
||||
continue
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from rich.table import Table
|
|||
# Lazy imports to avoid circular dependencies and slow startup.
|
||||
# tools.skills_hub and tools.skills_guard are imported inside functions.
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
_console = Console()
|
||||
|
||||
|
|
@ -178,9 +179,12 @@ def _existing_categories() -> List[str]:
|
|||
# top level (no category); otherwise treat as a category bucket.
|
||||
if (entry / "SKILL.md").exists():
|
||||
continue
|
||||
# Has at least one nested SKILL.md?
|
||||
# Has at least one nested SKILL.md (excluding dependency/cache dirs)?
|
||||
try:
|
||||
if any(entry.rglob("SKILL.md")):
|
||||
if any(
|
||||
not is_excluded_skill_path(p)
|
||||
for p in entry.rglob("SKILL.md")
|
||||
):
|
||||
out.append(entry.name)
|
||||
except OSError:
|
||||
continue
|
||||
|
|
|
|||
BIN
infographic/skill-scanner-no-ghost-skills/infographic.png
Normal file
BIN
infographic/skill-scanner-no-ghost-skills/infographic.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.9 MiB |
|
|
@ -1,6 +1,6 @@
|
|||
"""Tests for agent/skill_utils.py — extract_skill_conditions metadata handling."""
|
||||
"""Tests for agent/skill_utils.py."""
|
||||
|
||||
from agent.skill_utils import extract_skill_conditions
|
||||
from agent.skill_utils import extract_skill_conditions, iter_skill_index_files
|
||||
|
||||
|
||||
def test_metadata_as_dict_with_hermes():
|
||||
|
|
@ -56,3 +56,41 @@ def test_metadata_missing_entirely():
|
|||
"fallback_for_tools": [],
|
||||
"requires_tools": [],
|
||||
}
|
||||
|
||||
|
||||
def test_iter_skill_index_files_prunes_dependency_dirs(tmp_path):
|
||||
real = tmp_path / "real-skill"
|
||||
real.mkdir()
|
||||
(real / "SKILL.md").write_text("---\nname: real-skill\n---\n", encoding="utf-8")
|
||||
|
||||
nested = (
|
||||
tmp_path
|
||||
/ "bring"
|
||||
/ "scripts"
|
||||
/ ".venv"
|
||||
/ "lib"
|
||||
/ "python3.13"
|
||||
/ "site-packages"
|
||||
/ "typer"
|
||||
/ ".agents"
|
||||
/ "skills"
|
||||
/ "typer"
|
||||
)
|
||||
nested.mkdir(parents=True)
|
||||
(nested / "SKILL.md").write_text("---\nname: typer\n---\n", encoding="utf-8")
|
||||
|
||||
node_module = (
|
||||
tmp_path
|
||||
/ "web-skill"
|
||||
/ "node_modules"
|
||||
/ "dep"
|
||||
/ ".agents"
|
||||
/ "skills"
|
||||
/ "dep"
|
||||
)
|
||||
node_module.mkdir(parents=True)
|
||||
(node_module / "SKILL.md").write_text("---\nname: dep\n---\n", encoding="utf-8")
|
||||
|
||||
found = list(iter_skill_index_files(tmp_path, "SKILL.md"))
|
||||
|
||||
assert found == [real / "SKILL.md"]
|
||||
|
|
|
|||
|
|
@ -267,6 +267,32 @@ class TestFindAllSkills:
|
|||
assert len(skills) == 1
|
||||
assert skills[0]["name"] == "real-skill"
|
||||
|
||||
def test_skips_nested_virtualenv_dependency_skills(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(tmp_path, "real-skill")
|
||||
typer_skill = (
|
||||
tmp_path
|
||||
/ "bring"
|
||||
/ "scripts"
|
||||
/ ".venv"
|
||||
/ "lib"
|
||||
/ "python3.13"
|
||||
/ "site-packages"
|
||||
/ "typer"
|
||||
/ ".agents"
|
||||
/ "skills"
|
||||
/ "typer"
|
||||
)
|
||||
typer_skill.mkdir(parents=True)
|
||||
(typer_skill / "SKILL.md").write_text(
|
||||
"---\nname: typer\ndescription: Should not be discovered.\n---\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
skills = _find_all_skills()
|
||||
|
||||
assert [skill["name"] for skill in skills] == ["real-skill"]
|
||||
|
||||
def test_finds_skills_in_symlinked_category_dir(self, tmp_path):
|
||||
external_root = tmp_path / "repo"
|
||||
skills_root = tmp_path / "skills"
|
||||
|
|
|
|||
|
|
@ -283,12 +283,12 @@ def _find_skill(name: str) -> Optional[Dict[str, Any]]:
|
|||
external dirs configured via skills.external_dirs. Returns
|
||||
{"path": Path} or None.
|
||||
"""
|
||||
from agent.skill_utils import EXCLUDED_SKILL_DIRS, get_all_skills_dirs
|
||||
from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
|
||||
for skills_dir in get_all_skills_dirs():
|
||||
if not skills_dir.exists():
|
||||
continue
|
||||
for skill_md in skills_dir.rglob("SKILL.md"):
|
||||
if any(part in EXCLUDED_SKILL_DIRS for part in skill_md.parts):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
if skill_md.parent.name == name:
|
||||
return {"path": skill_md.parent}
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ from pathlib import Path
|
|||
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -236,14 +237,13 @@ def list_agent_created_skill_names() -> List[str]:
|
|||
names: List[str] = []
|
||||
# Top-level SKILL.md files (flat layout) AND nested category/skill/SKILL.md
|
||||
for skill_md in base.rglob("SKILL.md"):
|
||||
# Skip anything under .archive or .hub
|
||||
# Skip Hermes metadata, VCS, virtualenv/dependency, and cache dirs
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
try:
|
||||
rel = skill_md.relative_to(base)
|
||||
except ValueError:
|
||||
continue
|
||||
parts = rel.parts
|
||||
if parts and (parts[0].startswith(".") or parts[0] == "node_modules"):
|
||||
continue
|
||||
name = _read_skill_name(skill_md, fallback=skill_md.parent.name)
|
||||
if name in off_limits:
|
||||
continue
|
||||
|
|
@ -577,11 +577,7 @@ def _find_skill_dir(skill_name: str) -> Optional[Path]:
|
|||
if not base.exists():
|
||||
return None
|
||||
for skill_md in base.rglob("SKILL.md"):
|
||||
try:
|
||||
rel = skill_md.relative_to(base)
|
||||
except ValueError:
|
||||
continue
|
||||
if rel.parts and rel.parts[0].startswith("."):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
if _read_skill_name(skill_md, fallback=skill_md.parent.name) == skill_name:
|
||||
return skill_md.parent
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ from dataclasses import dataclass, field
|
|||
from datetime import datetime, timezone
|
||||
from pathlib import Path, PurePosixPath
|
||||
from hermes_constants import get_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from urllib.parse import urljoin, urlparse, urlunparse
|
||||
|
||||
|
|
@ -2639,6 +2640,8 @@ class OptionalSkillSource(SkillSource):
|
|||
if not self._optional_dir.is_dir():
|
||||
return None
|
||||
for skill_md in self._optional_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
if skill_md.parent.name == name:
|
||||
return skill_md.parent
|
||||
return None
|
||||
|
|
@ -2650,10 +2653,9 @@ class OptionalSkillSource(SkillSource):
|
|||
|
||||
results: List[SkillMeta] = []
|
||||
for skill_md in sorted(self._optional_dir.rglob("SKILL.md")):
|
||||
parent = skill_md.parent
|
||||
rel_parts = parent.relative_to(self._optional_dir).parts
|
||||
if any(part.startswith(".") for part in rel_parts):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
parent = skill_md.parent
|
||||
|
||||
try:
|
||||
content = skill_md.read_text(encoding="utf-8")
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ import os
|
|||
import shutil
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_bundled_skills_dir, get_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
from typing import Dict, List, Tuple
|
||||
from utils import atomic_replace
|
||||
|
||||
|
|
@ -139,8 +140,7 @@ def _discover_bundled_skills(bundled_dir: Path) -> List[Tuple[str, Path]]:
|
|||
return skills
|
||||
|
||||
for skill_md in bundled_dir.rglob("SKILL.md"):
|
||||
path_str = str(skill_md)
|
||||
if "/.git/" in path_str or "/.github/" in path_str or "/.hub/" in path_str:
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
skill_dir = skill_md.parent
|
||||
skill_name = _read_skill_name(skill_md, skill_dir.name)
|
||||
|
|
|
|||
|
|
@ -79,6 +79,7 @@ from typing import Dict, Any, List, Optional, Set, Tuple
|
|||
from tools.registry import registry, tool_error
|
||||
from hermes_cli.config import cfg_get
|
||||
from utils import env_var_enabled
|
||||
from agent.skill_utils import EXCLUDED_SKILL_DIRS as _EXCLUDED_SKILL_DIRS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -101,7 +102,6 @@ _PLATFORM_MAP = {
|
|||
"windows": "win32",
|
||||
}
|
||||
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
||||
_EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
|
||||
_REMOTE_ENV_BACKENDS = frozenset(
|
||||
{"docker", "singularity", "modal", "ssh", "daytona", "vercel_sandbox"}
|
||||
)
|
||||
|
|
@ -1565,4 +1565,3 @@ registry.register(
|
|||
check_fn=check_skills_requirements,
|
||||
emoji="📚",
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue