diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py index 5797049433d..05b6f7d2d75 100644 --- a/tests/tools/test_skill_manager_tool.py +++ b/tests/tools/test_skill_manager_tool.py @@ -1152,3 +1152,132 @@ class TestDeleteSkillRmtreeGuard: assert result["success"] is False assert "skills root" in result["error"].lower() assert outside.exists() + + +# --------------------------------------------------------------------------- +# Curator consolidation-pass fail-closed delete guard (#29912) +# --------------------------------------------------------------------------- + + +@contextmanager +def _curator_pass(tmp_path, *, monkeypatch): + """Run the body as the curator/background-review fork. + + Points HERMES_HOME at ``tmp_path/.hermes`` so skill_usage's archive path + (``get_hermes_home()``) resolves into the same tree the skill manager + searches, and flips ``is_background_review()`` → True so the consolidation + guard fires. + """ + hermes_home = tmp_path / ".hermes" + skills_root = hermes_home / "skills" + skills_root.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + with patch("tools.skill_manager_tool.SKILLS_DIR", skills_root), \ + patch("agent.skill_utils.get_all_skills_dirs", return_value=[skills_root]), \ + patch("tools.skill_provenance.is_background_review", return_value=True): + yield skills_root + + +def _skill_content(name: str) -> str: + """SKILL.md whose frontmatter ``name:`` matches the directory name. + + ``skill_usage._find_skill_dir`` (used by ``archive_skill``) resolves a + skill by its frontmatter ``name:`` field, so archive-path tests must keep + the two in sync. + """ + return ( + "---\n" + f"name: {name}\n" + "description: A test skill for unit testing.\n" + "---\n\n" + f"# {name}\n\n" + "Step 1: Do the thing.\n" + ) + + +class TestCuratorConsolidationDeleteGuard: + """The curator's LLM consolidation pass must fail CLOSED on unverified + deletes — it may only archive a skill it absorbed into an umbrella. + + Reproduces #29912: the pass archived clusters of active skills with zero + verified consolidations (``consolidated_this_run == 0``) because a bare + prune from the LLM pass was accepted. With the guard, a delete without a + valid ``absorbed_into`` is refused and the skill stays active; a verified + consolidation is archived RECOVERABLY (not rmtree'd). + """ + + def test_bare_prune_during_curator_pass_refused(self, tmp_path, monkeypatch): + with _curator_pass(tmp_path, monkeypatch=monkeypatch) as skills_root: + _create_skill("active-skill", VALID_SKILL_CONTENT) + result = _delete_skill("active-skill", absorbed_into="") + assert result["success"] is False + assert result.get("_fail_closed") is True + # Skill must remain active on disk — fail closed, no archive. + assert (skills_root / "active-skill").exists() + + def test_omitted_absorbed_into_during_curator_pass_refused(self, tmp_path, monkeypatch): + with _curator_pass(tmp_path, monkeypatch=monkeypatch) as skills_root: + _create_skill("active-skill", VALID_SKILL_CONTENT) + result = _delete_skill("active-skill") # absorbed_into omitted + assert result["success"] is False + assert result.get("_fail_closed") is True + assert (skills_root / "active-skill").exists() + + def test_whitespace_absorbed_into_during_curator_pass_refused(self, tmp_path, monkeypatch): + with _curator_pass(tmp_path, monkeypatch=monkeypatch) as skills_root: + _create_skill("active-skill", VALID_SKILL_CONTENT) + result = _delete_skill("active-skill", absorbed_into=" ") + assert result["success"] is False + assert result.get("_fail_closed") is True + assert (skills_root / "active-skill").exists() + + def test_verified_consolidation_archives_recoverably(self, tmp_path, monkeypatch): + with _curator_pass(tmp_path, monkeypatch=monkeypatch) as skills_root: + _create_skill("umbrella", _skill_content("umbrella")) + _create_skill("narrow", _skill_content("narrow")) + result = _delete_skill("narrow", absorbed_into="umbrella") + assert result["success"] is True, result + assert result.get("_archived") is True + assert "absorbed into 'umbrella'" in result["message"] + # Recoverable: moved to .archive/, NOT permanently rmtree'd. + assert not (skills_root / "narrow").exists() + assert (skills_root / ".archive" / "narrow").exists() + # Umbrella untouched. + assert (skills_root / "umbrella").exists() + + def test_consolidation_into_missing_umbrella_still_rejected(self, tmp_path, monkeypatch): + # The pre-existing target-existence check fires before the recoverable + # archive — a hallucinated umbrella is refused and the skill stays put. + with _curator_pass(tmp_path, monkeypatch=monkeypatch) as skills_root: + _create_skill("narrow", VALID_SKILL_CONTENT) + result = _delete_skill("narrow", absorbed_into="ghost-umbrella") + assert result["success"] is False + assert "does not exist" in result["error"] + assert (skills_root / "narrow").exists() + + def test_foreground_bare_prune_unaffected(self, tmp_path): + # Outside the curator pass (default foreground origin), a bare prune + # still hard-deletes — the guard is curator-scoped only. + with _skill_dir(tmp_path): + _create_skill("user-skill", VALID_SKILL_CONTENT) + result = _delete_skill("user-skill", absorbed_into="") + assert result["success"] is True + assert result.get("_fail_closed") is None + assert result.get("_archived") is None + assert not (tmp_path / "user-skill").exists() + + def test_dispatcher_preserves_usage_record_on_curator_archive(self, tmp_path, monkeypatch): + # skill_manage(delete) post-action telemetry must NOT forget a + # recoverable curator archive — the record persists as archived so + # `hermes curator restore` can bring it back. + from tools import skill_usage + with _curator_pass(tmp_path, monkeypatch=monkeypatch): + _create_skill("umbrella", _skill_content("umbrella")) + _create_skill("narrow", _skill_content("narrow")) + skill_usage.mark_agent_created("narrow") + raw = skill_manage("delete", "narrow", absorbed_into="umbrella") + result = json.loads(raw) + assert result["success"] is True, result + rec = skill_usage.get_record("narrow") + # Record kept (not forgotten) and marked archived. + assert rec.get("state") == skill_usage.STATE_ARCHIVED diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index 5d1e8fd117a..faef98bb1a0 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -329,6 +329,56 @@ def _background_review_preflight(action: str, name: str) -> Optional[Dict[str, A return _background_review_write_guard(name, existing["path"], action) +def _curator_consolidation_delete_guard( + name: str, absorbed_into: Optional[str] +) -> Optional[Dict[str, Any]]: + """Fail closed on unverified deletes during the curator consolidation pass. + + The curator's forked review agent (``is_background_review()``) runs the + LLM umbrella-building pass. Its only legitimate ``skill_manage(delete)`` is + a *verified consolidation*: the skill's content was absorbed into an + umbrella, declared via ``absorbed_into=`` where the umbrella + exists on disk (validated separately in ``_delete_skill``). + + A delete with no forwarding target — ``absorbed_into`` omitted (``None``) + or empty (``""``) — is the fail-open behavior reported in #29912: the + consolidation pass archived whole clusters of active skills with zero + verified consolidations (``consolidated_this_run == 0``), leaving active + automations pointing at names that no longer resolve. The deterministic + inactivity prune is the only legitimate prune path, and it archives via + ``skill_usage.archive_skill()`` directly without ever calling + ``skill_manage`` — so a bare prune reaching here can only be the LLM pass + pruning without consolidation evidence. Refuse it; keep the skill active. + + Returns an error dict to abort the delete, or ``None`` when the delete is + allowed to proceed (not the curator pass, or a declared consolidation). + """ + try: + from tools.skill_provenance import is_background_review + if not is_background_review(): + return None + except Exception: + return None + + declared = isinstance(absorbed_into, str) and absorbed_into.strip() + if declared: + return None + + return { + "success": False, + "error": ( + f"Refusing background curator delete of skill '{name}': the " + "consolidation pass may only archive a skill it has absorbed into " + "an umbrella. Pass absorbed_into= (the umbrella must " + "already exist) to record a verified consolidation. Pruning a " + "skill with no forwarding target is not permitted here — the " + "deterministic inactivity prune handles staleness archival " + "separately. Keeping '{name}' active.".format(name=name) + ), + "_fail_closed": True, + } + + MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file @@ -887,13 +937,26 @@ def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, A if guard: return guard + # Fail closed on unverified deletes during the curator consolidation pass. + # A bare prune (no absorbed_into) from the LLM umbrella pass is the + # fail-open behavior reported in #29912 — refuse it; keep the skill active. + fail_closed = _curator_consolidation_delete_guard(name, absorbed_into) + if fail_closed: + return fail_closed + pinned_err = _pinned_guard(name) if pinned_err: return {"success": False, "error": pinned_err} # Validate absorbed_into target when declared non-empty - if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip(): - target_name = absorbed_into.strip() + absorbed_target = ( + absorbed_into.strip() + if absorbed_into is not None and isinstance(absorbed_into, str) + else "" + ) + is_consolidation = bool(absorbed_target) + if is_consolidation: + target_name = absorbed_target if target_name == name: return { "success": False, @@ -917,6 +980,32 @@ def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, A if unsafe: return {"success": False, "error": unsafe} + # During the curator consolidation pass, a verified consolidation must be + # RECOVERABLE: archival into ~/.hermes/skills/.archive/ is documented as + # the maximum destructive action the curator may take, and + # `hermes curator restore` promises the skill can be brought back. Route + # through the recoverable archive primitive instead of permanent rmtree so + # a misjudged consolidation can be undone (#29912). Foreground, + # user-directed deletes keep their existing hard-delete semantics. + try: + from tools.skill_provenance import is_background_review + curator_pass = is_background_review() + except Exception: + curator_pass = False + + if curator_pass: + try: + from tools.skill_usage import archive_skill + ok, archive_msg = archive_skill(name) + except Exception as e: + return {"success": False, "error": f"failed to archive '{name}': {e}"} + if not ok: + return {"success": False, "error": archive_msg} + message = f"Skill '{name}' archived ({archive_msg})." + if is_consolidation: + message += f" Content absorbed into '{absorbed_target}'." + return {"success": True, "message": message, "_archived": True} + shutil.rmtree(skill_dir) # Clean up empty category directories (don't remove the skills root itself) @@ -925,8 +1014,8 @@ def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, A parent.rmdir() message = f"Skill '{name}' deleted." - if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip(): - message += f" Content absorbed into '{absorbed_into.strip()}'." + if is_consolidation: + message += f" Content absorbed into '{absorbed_target}'." return { "success": True, @@ -1198,7 +1287,11 @@ def skill_manage( elif action in {"patch", "edit", "write_file", "remove_file"}: bump_patch(name) elif action == "delete": - forget(name) + # A recoverable curator archive (routed through archive_skill) + # keeps its usage record as STATE_ARCHIVED so `hermes curator + # status`/`restore` still see it. Only a hard delete forgets. + if not result.get("_archived"): + forget(name) except Exception: pass