fix(curator): protect external skills from background curation

This commit is contained in:
yu-xin-c 2026-06-24 20:28:34 +08:00 committed by Teknium
parent eed9bbeb0a
commit 96bc524a71
7 changed files with 309 additions and 13 deletions

View file

@ -377,8 +377,10 @@ CURATOR_REVIEW_PROMPT = (
"bodies + `references/`, `templates/`, and `scripts/` subfiles for "
"session-specific detail — not one-session-one-skill micro-entries.\n\n"
"Hard rules — do not violate:\n"
"1. DO NOT touch bundled or hub-installed skills. The candidate list "
"below is already filtered to agent-created skills only.\n"
"1. DO NOT touch bundled, hub-installed, or external-dir skills "
"(`skills.external_dirs`). The candidate list below is already filtered "
"to local curator-managed skills only; external skills are externally "
"owned and read-only to this background curator.\n"
"2. DO NOT delete any skill. Archiving (moving the skill's directory "
"into ~/.hermes/skills/.archive/) is the maximum destructive action. "
"Archives are recoverable; deletion is not.\n"
@ -469,8 +471,9 @@ CURATOR_REVIEW_PROMPT = (
"skill, or `absorbed_into=\"\"` when you're truly pruning with no "
"forwarding target. This drives cron-job skill-reference migration — "
"guessing from your YAML summary after the fact is fragile.\n"
" - terminal — mv a sibling into the archive "
"OR move its content into a support subfile\n\n"
" - terminal — move LOCAL candidate content into "
"a support subfile when package integrity requires it; never mv, cp, rm, "
"patch, or rewrite bundled, hub-installed, or external-dir skills\n\n"
"'keep' is a legitimate decision ONLY when the skill is already a "
"class-level umbrella and none of the proposed merges would improve "
"discoverability. 'This is narrow but distinct from its siblings' "

View file

@ -507,6 +507,34 @@ def get_all_skills_dirs() -> List[Path]:
return dirs
def _resolve_for_skill_ownership(path) -> Path:
path_obj = path if isinstance(path, Path) else Path(str(path))
try:
return path_obj.expanduser().resolve()
except (OSError, RuntimeError):
return path_obj.expanduser().absolute()
def is_external_skill_path(path) -> bool:
"""Return True when ``path`` lives under a configured external skills dir.
``skills.external_dirs`` are externally owned: Hermes can discover and view
their skills, and foreground user-directed tool calls may still edit them,
but autonomous lifecycle maintenance must treat them as read-only. This
helper centralizes the ownership boundary so curator/reporting/tool paths do
not each need to re-interpret the config.
"""
candidate = _resolve_for_skill_ownership(path)
for root in get_external_skills_dirs():
resolved_root = _resolve_for_skill_ownership(root)
try:
candidate.relative_to(resolved_root)
return True
except ValueError:
continue
return False
# ── Condition extraction ──────────────────────────────────────────────────

View file

@ -7,6 +7,7 @@ from agent.skill_utils import (
get_disabled_skill_names,
get_external_skills_dirs,
is_excluded_skill_path,
is_external_skill_path,
is_skill_support_path,
iter_skill_index_files,
resolve_skill_config_values,
@ -168,6 +169,28 @@ def test_skill_config_raw_cache_invalidates_on_config_edit(tmp_path, monkeypatch
os.utime(config_path, None)
assert get_disabled_skill_names() == {"new-skill"}
def test_is_external_skill_path_matches_configured_external_dir(tmp_path, monkeypatch):
from agent import skill_utils
hermes_home = tmp_path / ".hermes"
local_skills = hermes_home / "skills"
external = tmp_path / "external-skills"
local_skills.mkdir(parents=True)
external.mkdir()
(hermes_home / "config.yaml").write_text(
f"skills:\n external_dirs:\n - {external}\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
skill_utils._external_dirs_cache_clear()
assert is_external_skill_path(external / "team-skill" / "SKILL.md") is True
assert is_external_skill_path(local_skills / "local-skill" / "SKILL.md") is False
def test_iter_skill_index_files_prunes_skill_support_dirs(tmp_path):
"""Archived package SKILL.md files under support dirs are not active skills."""
real = tmp_path / "umbrella"

View file

@ -600,6 +600,35 @@ class TestSkillManageDispatcher:
assert result["success"] is False
assert "does not exist" in result["error"]
def test_background_review_delete_refuses_bundled_even_with_absorbed_into(self, tmp_path):
from tools.skill_provenance import (
BACKGROUND_REVIEW,
reset_current_write_origin,
set_current_write_origin,
)
token = set_current_write_origin(BACKGROUND_REVIEW)
try:
with _skill_dir(tmp_path), \
patch("tools.skill_usage.is_protected_builtin", return_value=False), \
patch("tools.skill_usage.is_hub_installed", return_value=False), \
patch("tools.skill_usage.is_bundled",
side_effect=lambda skill_name: skill_name == "bundled"):
skill_manage(action="create", name="umbrella", content=VALID_SKILL_CONTENT)
skill_manage(action="create", name="bundled", content=VALID_SKILL_CONTENT)
raw = skill_manage(
action="delete",
name="bundled",
absorbed_into="umbrella",
)
finally:
reset_current_write_origin(token)
result = json.loads(raw)
assert result["success"] is False
assert "bundled" in result["error"].lower()
assert (tmp_path / "bundled" / "SKILL.md").exists()
class TestSecurityScanGate:
"""_security_scan_skill is gated by skills.guard_agent_created config flag."""
@ -849,6 +878,40 @@ class TestExternalSkillMutations:
assert (local / "fresh-skill" / "SKILL.md").exists()
assert not (external / "fresh-skill").exists()
def test_background_review_refuses_to_patch_external_skill(self, tmp_path):
"""Autonomous curator runs treat skills.external_dirs as read-only."""
from tools.skill_provenance import (
BACKGROUND_REVIEW,
reset_current_write_origin,
set_current_write_origin,
)
local = tmp_path / "local"
external = tmp_path / "vault"
local.mkdir(); external.mkdir()
skill_dir = _write_external_skill(external)
token = set_current_write_origin(BACKGROUND_REVIEW)
try:
with _two_roots(local, external), patch(
"agent.skill_utils.get_external_skills_dirs",
return_value=[external.resolve()],
):
raw = skill_manage(
action="patch",
name="ext-skill",
old_string="OLD_MARKER",
new_string="NEW_MARKER",
)
finally:
reset_current_write_origin(token)
result = json.loads(raw)
assert result["success"] is False
assert "external" in result["error"].lower()
assert "OLD_MARKER" in (skill_dir / "SKILL.md").read_text()
assert "NEW_MARKER" not in (skill_dir / "SKILL.md").read_text()
# ---------------------------------------------------------------------------

View file

@ -341,6 +341,29 @@ def test_agent_created_skips_archive_and_hub_dirs(skills_home):
assert "old-skill" not in names
def test_agent_created_excludes_external_dir_even_with_stale_agent_record(skills_home, monkeypatch):
from tools.skill_usage import (
agent_created_report,
is_agent_created,
list_agent_created_skill_names,
save_usage,
)
skills_dir = skills_home / "skills"
external = skills_dir / "shared-vault"
_write_skill(external, "external-skill")
save_usage({"external-skill": {"created_by": "agent"}})
monkeypatch.setattr(
"agent.skill_utils.get_external_skills_dirs",
lambda: [external.resolve()],
)
assert "external-skill" not in list_agent_created_skill_names()
assert "external-skill" not in {r["name"] for r in agent_created_report()}
assert is_agent_created("external-skill") is False
# ---------------------------------------------------------------------------
# Archive / restore
# ---------------------------------------------------------------------------
@ -384,6 +407,23 @@ def test_archive_refuses_hub_skill(skills_home):
assert not ok
def test_archive_refuses_external_skill(skills_home, monkeypatch):
from tools.skill_usage import archive_skill
skills_dir = skills_home / "skills"
external = skills_dir / "shared-vault"
skill_dir = _write_skill(external, "external-skill")
monkeypatch.setattr(
"agent.skill_utils.get_external_skills_dirs",
lambda: [external.resolve()],
)
ok, msg = archive_skill("external-skill")
assert not ok
assert "external" in msg.lower()
assert skill_dir.exists()
def test_archive_missing_skill_returns_error(skills_home):
from tools.skill_usage import archive_skill
ok, msg = archive_skill("nonexistent")

View file

@ -235,6 +235,79 @@ def _pinned_guard(name: str) -> Optional[str]:
return None
def _background_review_write_guard(
name: str,
skill_dir: Path,
action: str,
) -> Optional[Dict[str, Any]]:
"""Refuse autonomous curator writes to externally owned skills.
Foreground agents may still perform user-directed edits to external,
bundled, or hub-installed skills. The background review fork is different:
it is autonomous lifecycle maintenance, so its write surface is restricted
to local curator-owned sediment.
"""
try:
from tools.skill_provenance import is_background_review
if not is_background_review():
return None
except Exception:
return None
try:
from agent.skill_utils import is_external_skill_path
if is_external_skill_path(skill_dir):
return {
"success": False,
"error": (
f"Refusing background curator {action} for skill '{name}': "
"the skill lives in skills.external_dirs, which are "
"externally owned and read-only to autonomous curation."
),
}
except Exception:
logger.debug("external skill guard lookup failed for %s", name, exc_info=True)
try:
from tools import skill_usage
if skill_usage.is_protected_builtin(name):
return {
"success": False,
"error": (
f"Refusing background curator {action} for protected "
f"built-in skill '{name}'."
),
}
if skill_usage.is_hub_installed(name):
return {
"success": False,
"error": (
f"Refusing background curator {action} for hub-installed "
f"skill '{name}'."
),
}
if skill_usage.is_bundled(name):
return {
"success": False,
"error": (
f"Refusing background curator {action} for bundled "
f"skill '{name}'."
),
}
except Exception:
logger.debug("owned skill guard lookup failed for %s", name, exc_info=True)
return None
def _background_review_preflight(action: str, name: str) -> Optional[Dict[str, Any]]:
if action not in {"edit", "patch", "delete", "write_file", "remove_file"}:
return None
existing = _find_skill(name)
if not existing:
return None
return _background_review_write_guard(name, existing["path"], action)
MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token
MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file
@ -637,6 +710,9 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
existing = _find_skill(name)
if not existing:
return {"success": False, "error": _skill_not_found_error(name)}
guard = _background_review_write_guard(name, existing["path"], "edit")
if guard:
return guard
skill_md = existing["path"] / "SKILL.md"
# Back up original content for rollback
@ -690,6 +766,9 @@ def _patch_skill(
return {"success": False, "error": _skill_not_found_error(name)}
skill_dir = existing["path"]
guard = _background_review_write_guard(name, skill_dir, "patch")
if guard:
return guard
if file_path:
# Patching a supporting file
@ -783,6 +862,9 @@ def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, A
existing = _find_skill(name)
if not existing:
return {"success": False, "error": _skill_not_found_error(name)}
guard = _background_review_write_guard(name, existing["path"], "delete")
if guard:
return guard
pinned_err = _pinned_guard(name)
if pinned_err:
@ -858,6 +940,9 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
existing = _find_skill(name)
if not existing:
return {"success": False, "error": _skill_not_found_error(name, " Create it first with action='create'.")}
guard = _background_review_write_guard(name, existing["path"], "write_file")
if guard:
return guard
target, err = _resolve_skill_target(existing["path"], file_path)
if err:
@ -894,6 +979,9 @@ def _remove_file(name: str, file_path: str) -> Dict[str, Any]:
return {"success": False, "error": _skill_not_found_error(name)}
skill_dir = existing["path"]
guard = _background_review_write_guard(name, skill_dir, "remove_file")
if guard:
return guard
target, err = _resolve_skill_target(skill_dir, file_path)
if err:
@ -1016,6 +1104,10 @@ def skill_manage(
Returns JSON string with results.
"""
preflight = _background_review_preflight(action, name)
if preflight is not None:
return json.dumps(preflight, ensure_ascii=False)
# Approval gate: when on, stages the write for review (skills are too large
# to review inline, so they always stage regardless of origin); when off
# (default) passes straight through. The gate is bypassed when this call is

View file

@ -34,7 +34,7 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from hermes_constants import get_hermes_home
from agent.skill_utils import is_excluded_skill_path
from agent.skill_utils import is_excluded_skill_path, is_external_skill_path
logger = logging.getLogger(__name__)
@ -352,6 +352,10 @@ def list_agent_created_skill_names() -> List[str]:
# Skip Hermes metadata, VCS, virtualenv/dependency, and cache dirs
if is_excluded_skill_path(skill_md):
continue
# External skill dirs can be mounted below the local skills tree.
# Discovery may see them, but autonomous lifecycle curation must not.
if is_external_skill_path(skill_md):
continue
try:
skill_md.relative_to(base)
except ValueError:
@ -415,7 +419,12 @@ def _read_skill_name(skill_md: Path, fallback: str) -> str:
def is_agent_created(skill_name: str) -> bool:
"""Whether *skill_name* is neither bundled nor hub-installed."""
off_limits = _read_bundled_manifest_names() | _read_hub_installed_names()
return skill_name not in off_limits
if skill_name in off_limits:
return False
return not (
_find_skill_dir(skill_name) is None
and _find_external_skill_dir(skill_name) is not None
)
def is_hub_installed(skill_name: str) -> bool:
@ -428,21 +437,36 @@ def is_bundled(skill_name: str) -> bool:
return skill_name in _read_bundled_manifest_names()
def is_curation_eligible(skill_name: str) -> bool:
def _external_read_only_message(skill_name: str) -> str:
return (
f"skill '{skill_name}' lives in skills.external_dirs; "
"external skills are read-only to the curator"
)
def is_curation_eligible(skill_name: str, skill_path: Optional[Path] = None) -> bool:
"""Whether the curator may track/archive *skill_name*.
Agent-created skills are always eligible. Bundled built-ins become eligible
only when ``curator.prune_builtins`` is enabled. Hub-installed skills are
NEVER eligible they have an external upstream owner. Protected built-ins
(``PROTECTED_BUILTIN_SKILLS``) are NEVER eligible regardless of any flag
they back load-bearing UX and must never be archived or consolidated.
only when ``curator.prune_builtins`` is enabled. Hub-installed and external
skill-dir skills are NEVER eligible they have an external upstream owner.
Protected built-ins (``PROTECTED_BUILTIN_SKILLS``) are NEVER eligible
regardless of any flag they back load-bearing UX and must never be
archived or consolidated.
"""
if skill_path is not None and is_external_skill_path(skill_path):
return False
if is_protected_builtin(skill_name):
return False
if is_hub_installed(skill_name):
return False
if is_bundled(skill_name):
return _prune_builtins_enabled()
local_dir = _find_skill_dir(skill_name)
if local_dir is not None:
return not is_external_skill_path(local_dir)
if _find_external_skill_dir(skill_name) is not None:
return False
return True
@ -677,7 +701,11 @@ def archive_skill(skill_name: str) -> Tuple[bool, str]:
when one is archived, its name is added to the suppression list so the
update-time re-seeder leaves it archived instead of restoring it.
"""
if not is_curation_eligible(skill_name):
local_skill_dir = _find_skill_dir(skill_name)
if local_skill_dir is None and _find_external_skill_dir(skill_name) is not None:
return False, _external_read_only_message(skill_name)
if not is_curation_eligible(skill_name, local_skill_dir):
if is_protected_builtin(skill_name):
return False, (
f"skill '{skill_name}' is a protected built-in; it backs "
@ -690,9 +718,11 @@ def archive_skill(skill_name: str) -> Tuple[bool, str]:
"curator.prune_builtins to allow pruning it"
)
skill_dir = _find_skill_dir(skill_name)
skill_dir = local_skill_dir
if skill_dir is None:
return False, f"skill '{skill_name}' not found"
if is_external_skill_path(skill_dir):
return False, _external_read_only_message(skill_name)
archive_root = _archive_dir()
try:
@ -811,11 +841,28 @@ def _find_skill_dir(skill_name: str) -> Optional[Path]:
for skill_md in base.rglob("SKILL.md"):
if is_excluded_skill_path(skill_md):
continue
if is_external_skill_path(skill_md):
continue
if _read_skill_name(skill_md, fallback=skill_md.parent.name) == skill_name:
return skill_md.parent
return None
def _find_external_skill_dir(skill_name: str) -> Optional[Path]:
"""Locate a skill under configured external dirs by frontmatter name."""
from agent.skill_utils import get_all_skills_dirs
for base in get_all_skills_dirs()[1:]:
if not base.exists():
continue
for skill_md in base.rglob("SKILL.md"):
if is_excluded_skill_path(skill_md):
continue
if _read_skill_name(skill_md, fallback=skill_md.parent.name) == skill_name:
return skill_md.parent
return None
# ---------------------------------------------------------------------------
# Reporting — for the curator CLI / slash command
# ---------------------------------------------------------------------------