fix(curator): fail closed on unverified skill deletes during consolidation (#53935)

The curator's LLM consolidation pass could archive whole clusters of
active skills with zero verified consolidations (#29912): a bare prune
(skill_manage delete with absorbed_into empty/omitted) from the forked
review agent was accepted, removing the skill's name from lookup even
though counts.consolidated_this_run was 0.

- _delete_skill now fails closed during the curator/background-review
  pass: a delete is only allowed when it declares a verified
  consolidation (absorbed_into=<umbrella>, umbrella must exist). A prune
  with no forwarding target is refused; the skill stays active. The
  deterministic inactivity prune (archive_skill) is unaffected.
- A verified consolidation delete during the curator pass now routes
  through the recoverable archive primitive instead of shutil.rmtree, so
  a misjudged consolidation can be undone with hermes curator restore.
  The usage record is kept (state=archived) rather than forgotten.
- Foreground, user-directed deletes keep their existing hard-delete
  semantics.
This commit is contained in:
Teknium 2026-06-27 20:45:57 -07:00 committed by GitHub
parent 11b0be8d15
commit 56abbaeac3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 227 additions and 5 deletions

View file

@ -1152,3 +1152,132 @@ class TestDeleteSkillRmtreeGuard:
assert result["success"] is False
assert "skills root" in result["error"].lower()
assert outside.exists()
# ---------------------------------------------------------------------------
# Curator consolidation-pass fail-closed delete guard (#29912)
# ---------------------------------------------------------------------------
@contextmanager
def _curator_pass(tmp_path, *, monkeypatch):
"""Run the body as the curator/background-review fork.
Points HERMES_HOME at ``tmp_path/.hermes`` so skill_usage's archive path
(``get_hermes_home()``) resolves into the same tree the skill manager
searches, and flips ``is_background_review()`` True so the consolidation
guard fires.
"""
hermes_home = tmp_path / ".hermes"
skills_root = hermes_home / "skills"
skills_root.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
with patch("tools.skill_manager_tool.SKILLS_DIR", skills_root), \
patch("agent.skill_utils.get_all_skills_dirs", return_value=[skills_root]), \
patch("tools.skill_provenance.is_background_review", return_value=True):
yield skills_root
def _skill_content(name: str) -> str:
"""SKILL.md whose frontmatter ``name:`` matches the directory name.
``skill_usage._find_skill_dir`` (used by ``archive_skill``) resolves a
skill by its frontmatter ``name:`` field, so archive-path tests must keep
the two in sync.
"""
return (
"---\n"
f"name: {name}\n"
"description: A test skill for unit testing.\n"
"---\n\n"
f"# {name}\n\n"
"Step 1: Do the thing.\n"
)
class TestCuratorConsolidationDeleteGuard:
"""The curator's LLM consolidation pass must fail CLOSED on unverified
deletes it may only archive a skill it absorbed into an umbrella.
Reproduces #29912: the pass archived clusters of active skills with zero
verified consolidations (``consolidated_this_run == 0``) because a bare
prune from the LLM pass was accepted. With the guard, a delete without a
valid ``absorbed_into`` is refused and the skill stays active; a verified
consolidation is archived RECOVERABLY (not rmtree'd).
"""
def test_bare_prune_during_curator_pass_refused(self, tmp_path, monkeypatch):
with _curator_pass(tmp_path, monkeypatch=monkeypatch) as skills_root:
_create_skill("active-skill", VALID_SKILL_CONTENT)
result = _delete_skill("active-skill", absorbed_into="")
assert result["success"] is False
assert result.get("_fail_closed") is True
# Skill must remain active on disk — fail closed, no archive.
assert (skills_root / "active-skill").exists()
def test_omitted_absorbed_into_during_curator_pass_refused(self, tmp_path, monkeypatch):
with _curator_pass(tmp_path, monkeypatch=monkeypatch) as skills_root:
_create_skill("active-skill", VALID_SKILL_CONTENT)
result = _delete_skill("active-skill") # absorbed_into omitted
assert result["success"] is False
assert result.get("_fail_closed") is True
assert (skills_root / "active-skill").exists()
def test_whitespace_absorbed_into_during_curator_pass_refused(self, tmp_path, monkeypatch):
with _curator_pass(tmp_path, monkeypatch=monkeypatch) as skills_root:
_create_skill("active-skill", VALID_SKILL_CONTENT)
result = _delete_skill("active-skill", absorbed_into=" ")
assert result["success"] is False
assert result.get("_fail_closed") is True
assert (skills_root / "active-skill").exists()
def test_verified_consolidation_archives_recoverably(self, tmp_path, monkeypatch):
with _curator_pass(tmp_path, monkeypatch=monkeypatch) as skills_root:
_create_skill("umbrella", _skill_content("umbrella"))
_create_skill("narrow", _skill_content("narrow"))
result = _delete_skill("narrow", absorbed_into="umbrella")
assert result["success"] is True, result
assert result.get("_archived") is True
assert "absorbed into 'umbrella'" in result["message"]
# Recoverable: moved to .archive/, NOT permanently rmtree'd.
assert not (skills_root / "narrow").exists()
assert (skills_root / ".archive" / "narrow").exists()
# Umbrella untouched.
assert (skills_root / "umbrella").exists()
def test_consolidation_into_missing_umbrella_still_rejected(self, tmp_path, monkeypatch):
# The pre-existing target-existence check fires before the recoverable
# archive — a hallucinated umbrella is refused and the skill stays put.
with _curator_pass(tmp_path, monkeypatch=monkeypatch) as skills_root:
_create_skill("narrow", VALID_SKILL_CONTENT)
result = _delete_skill("narrow", absorbed_into="ghost-umbrella")
assert result["success"] is False
assert "does not exist" in result["error"]
assert (skills_root / "narrow").exists()
def test_foreground_bare_prune_unaffected(self, tmp_path):
# Outside the curator pass (default foreground origin), a bare prune
# still hard-deletes — the guard is curator-scoped only.
with _skill_dir(tmp_path):
_create_skill("user-skill", VALID_SKILL_CONTENT)
result = _delete_skill("user-skill", absorbed_into="")
assert result["success"] is True
assert result.get("_fail_closed") is None
assert result.get("_archived") is None
assert not (tmp_path / "user-skill").exists()
def test_dispatcher_preserves_usage_record_on_curator_archive(self, tmp_path, monkeypatch):
# skill_manage(delete) post-action telemetry must NOT forget a
# recoverable curator archive — the record persists as archived so
# `hermes curator restore` can bring it back.
from tools import skill_usage
with _curator_pass(tmp_path, monkeypatch=monkeypatch):
_create_skill("umbrella", _skill_content("umbrella"))
_create_skill("narrow", _skill_content("narrow"))
skill_usage.mark_agent_created("narrow")
raw = skill_manage("delete", "narrow", absorbed_into="umbrella")
result = json.loads(raw)
assert result["success"] is True, result
rec = skill_usage.get_record("narrow")
# Record kept (not forgotten) and marked archived.
assert rec.get("state") == skill_usage.STATE_ARCHIVED

View file

@ -329,6 +329,56 @@ def _background_review_preflight(action: str, name: str) -> Optional[Dict[str, A
return _background_review_write_guard(name, existing["path"], action)
def _curator_consolidation_delete_guard(
name: str, absorbed_into: Optional[str]
) -> Optional[Dict[str, Any]]:
"""Fail closed on unverified deletes during the curator consolidation pass.
The curator's forked review agent (``is_background_review()``) runs the
LLM umbrella-building pass. Its only legitimate ``skill_manage(delete)`` is
a *verified consolidation*: the skill's content was absorbed into an
umbrella, declared via ``absorbed_into=<umbrella>`` where the umbrella
exists on disk (validated separately in ``_delete_skill``).
A delete with no forwarding target ``absorbed_into`` omitted (``None``)
or empty (``""``) is the fail-open behavior reported in #29912: the
consolidation pass archived whole clusters of active skills with zero
verified consolidations (``consolidated_this_run == 0``), leaving active
automations pointing at names that no longer resolve. The deterministic
inactivity prune is the only legitimate prune path, and it archives via
``skill_usage.archive_skill()`` directly without ever calling
``skill_manage`` so a bare prune reaching here can only be the LLM pass
pruning without consolidation evidence. Refuse it; keep the skill active.
Returns an error dict to abort the delete, or ``None`` when the delete is
allowed to proceed (not the curator pass, or a declared consolidation).
"""
try:
from tools.skill_provenance import is_background_review
if not is_background_review():
return None
except Exception:
return None
declared = isinstance(absorbed_into, str) and absorbed_into.strip()
if declared:
return None
return {
"success": False,
"error": (
f"Refusing background curator delete of skill '{name}': the "
"consolidation pass may only archive a skill it has absorbed into "
"an umbrella. Pass absorbed_into=<umbrella> (the umbrella must "
"already exist) to record a verified consolidation. Pruning a "
"skill with no forwarding target is not permitted here — the "
"deterministic inactivity prune handles staleness archival "
"separately. Keeping '{name}' active.".format(name=name)
),
"_fail_closed": True,
}
MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token
MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file
@ -887,13 +937,26 @@ def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, A
if guard:
return guard
# Fail closed on unverified deletes during the curator consolidation pass.
# A bare prune (no absorbed_into) from the LLM umbrella pass is the
# fail-open behavior reported in #29912 — refuse it; keep the skill active.
fail_closed = _curator_consolidation_delete_guard(name, absorbed_into)
if fail_closed:
return fail_closed
pinned_err = _pinned_guard(name)
if pinned_err:
return {"success": False, "error": pinned_err}
# Validate absorbed_into target when declared non-empty
if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip():
target_name = absorbed_into.strip()
absorbed_target = (
absorbed_into.strip()
if absorbed_into is not None and isinstance(absorbed_into, str)
else ""
)
is_consolidation = bool(absorbed_target)
if is_consolidation:
target_name = absorbed_target
if target_name == name:
return {
"success": False,
@ -917,6 +980,32 @@ def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, A
if unsafe:
return {"success": False, "error": unsafe}
# During the curator consolidation pass, a verified consolidation must be
# RECOVERABLE: archival into ~/.hermes/skills/.archive/ is documented as
# the maximum destructive action the curator may take, and
# `hermes curator restore` promises the skill can be brought back. Route
# through the recoverable archive primitive instead of permanent rmtree so
# a misjudged consolidation can be undone (#29912). Foreground,
# user-directed deletes keep their existing hard-delete semantics.
try:
from tools.skill_provenance import is_background_review
curator_pass = is_background_review()
except Exception:
curator_pass = False
if curator_pass:
try:
from tools.skill_usage import archive_skill
ok, archive_msg = archive_skill(name)
except Exception as e:
return {"success": False, "error": f"failed to archive '{name}': {e}"}
if not ok:
return {"success": False, "error": archive_msg}
message = f"Skill '{name}' archived ({archive_msg})."
if is_consolidation:
message += f" Content absorbed into '{absorbed_target}'."
return {"success": True, "message": message, "_archived": True}
shutil.rmtree(skill_dir)
# Clean up empty category directories (don't remove the skills root itself)
@ -925,8 +1014,8 @@ def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, A
parent.rmdir()
message = f"Skill '{name}' deleted."
if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip():
message += f" Content absorbed into '{absorbed_into.strip()}'."
if is_consolidation:
message += f" Content absorbed into '{absorbed_target}'."
return {
"success": True,
@ -1198,7 +1287,11 @@ def skill_manage(
elif action in {"patch", "edit", "write_file", "remove_file"}:
bump_patch(name)
elif action == "delete":
forget(name)
# A recoverable curator archive (routed through archive_skill)
# keeps its usage record as STATE_ARCHIVED so `hermes curator
# status`/`restore` still see it. Only a hard delete forgets.
if not result.get("_archived"):
forget(name)
except Exception:
pass