mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-05 02:31:47 +00:00
_classify_removed_skills used naive 'in' substring matching to detect whether a removed skill's name appeared in skill_manage arguments. Short/common skill names (api, git, test, foo, etc.) matched incorrectly when they appeared as substrings of longer words in file paths (references/api-design.md) or content (latest, testing). Replace with field-aware matching: - file_path: needle must match a complete filename stem or directory name, with -/_ normalised for variant tolerance - content fields: word-boundary regex (\b) prevents embedding in longer words Also add 3 regression tests covering the false-positive scenarios.
888 lines
32 KiB
Python
888 lines
32 KiB
Python
"""Tests for the curator consolidated-vs-pruned classifier.
|
|
|
|
The classifier splits skills that disappeared between the before/after
|
|
snapshots into two buckets:
|
|
|
|
- "consolidated" — absorbed into an umbrella; content still lives
|
|
under another skill's files
|
|
- "pruned" — archived for staleness; content not preserved elsewhere
|
|
|
|
Without the split the report lumped everything under "Skills archived",
|
|
which misled users into thinking consolidated skills had been pruned.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def curator_env(tmp_path, monkeypatch):
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
(home / "skills").mkdir()
|
|
(home / "logs").mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
|
|
import importlib
|
|
import hermes_constants
|
|
importlib.reload(hermes_constants)
|
|
from agent import curator
|
|
importlib.reload(curator)
|
|
yield curator
|
|
|
|
|
|
def test_classify_consolidated_via_write_file_evidence(curator_env):
|
|
"""skill_manage write_file on umbrella references/<removed>.md = consolidated."""
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["axolotl-training"],
|
|
added=[],
|
|
after_names={"training-platforms", "keeper"},
|
|
tool_calls=[
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "write_file",
|
|
"name": "training-platforms",
|
|
"file_path": "references/axolotl-training.md",
|
|
"file_content": "# Axolotl\n...",
|
|
}),
|
|
},
|
|
],
|
|
)
|
|
assert len(result["consolidated"]) == 1
|
|
assert result["consolidated"][0]["name"] == "axolotl-training"
|
|
assert result["consolidated"][0]["into"] == "training-platforms"
|
|
assert result["pruned"] == []
|
|
|
|
|
|
def test_classify_pruned_when_no_destination_reference(curator_env):
|
|
"""Removed skill with no referencing tool call = pruned."""
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["old-stale-thing"],
|
|
added=[],
|
|
after_names={"keeper"},
|
|
tool_calls=[
|
|
{"name": "skills_list", "arguments": "{}"},
|
|
{"name": "skill_manage", "arguments": json.dumps({
|
|
"action": "patch", "name": "keeper",
|
|
"old_string": "foo", "new_string": "bar",
|
|
})},
|
|
],
|
|
)
|
|
assert result["consolidated"] == []
|
|
assert len(result["pruned"]) == 1
|
|
assert result["pruned"][0]["name"] == "old-stale-thing"
|
|
|
|
|
|
def test_classify_consolidated_into_newly_created_umbrella(curator_env):
|
|
"""Removed skill absorbed into a skill that was created THIS run."""
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["anthropic-api"],
|
|
added=["llm-providers"], # new umbrella
|
|
after_names={"llm-providers"},
|
|
tool_calls=[
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "create",
|
|
"name": "llm-providers",
|
|
"content": "# LLM Providers\n\n## anthropic-api\nMerged from the old anthropic-api skill.\n",
|
|
}),
|
|
},
|
|
],
|
|
)
|
|
assert len(result["consolidated"]) == 1
|
|
assert result["consolidated"][0]["name"] == "anthropic-api"
|
|
assert result["consolidated"][0]["into"] == "llm-providers"
|
|
|
|
|
|
def test_classify_handles_underscore_hyphen_variants(curator_env):
|
|
"""Names with hyphens match underscore forms in paths/content and vice versa."""
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["open-webui-setup"],
|
|
added=[],
|
|
after_names={"webui"},
|
|
tool_calls=[
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "write_file",
|
|
"name": "webui",
|
|
"file_path": "references/open_webui_setup.md",
|
|
"file_content": "...",
|
|
}),
|
|
},
|
|
],
|
|
)
|
|
assert len(result["consolidated"]) == 1
|
|
assert result["consolidated"][0]["into"] == "webui"
|
|
|
|
|
|
def test_classify_self_reference_does_not_count(curator_env):
|
|
"""A tool call that targets the removed skill itself is NOT consolidation."""
|
|
# e.g. the curator patched the skill once and later archived it
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["doomed"],
|
|
added=[],
|
|
after_names={"keeper"},
|
|
tool_calls=[
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "patch",
|
|
"name": "doomed", # same as removed
|
|
"old_string": "x",
|
|
"new_string": "y",
|
|
}),
|
|
},
|
|
],
|
|
)
|
|
assert result["consolidated"] == []
|
|
assert result["pruned"][0]["name"] == "doomed"
|
|
|
|
|
|
def test_classify_destination_must_exist_after_run(curator_env):
|
|
"""A reference to a skill that doesn't exist after the run can't be the umbrella."""
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["thing"],
|
|
added=[],
|
|
after_names={"keeper"}, # "ghost" not in here
|
|
tool_calls=[
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "write_file",
|
|
"name": "ghost", # not in after_names
|
|
"file_path": "references/thing.md",
|
|
"file_content": "...",
|
|
}),
|
|
},
|
|
],
|
|
)
|
|
assert result["consolidated"] == []
|
|
assert result["pruned"][0]["name"] == "thing"
|
|
|
|
|
|
def test_classify_mixed_run_produces_both_buckets(curator_env):
|
|
"""A realistic run: one skill consolidated, one skill pruned."""
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["absorbed-skill", "dead-skill"],
|
|
added=["umbrella"],
|
|
after_names={"umbrella", "keeper"},
|
|
tool_calls=[
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "write_file",
|
|
"name": "umbrella",
|
|
"file_path": "references/absorbed-skill.md",
|
|
"file_content": "...",
|
|
}),
|
|
},
|
|
],
|
|
)
|
|
assert len(result["consolidated"]) == 1
|
|
assert result["consolidated"][0]["name"] == "absorbed-skill"
|
|
assert result["consolidated"][0]["into"] == "umbrella"
|
|
assert len(result["pruned"]) == 1
|
|
assert result["pruned"][0]["name"] == "dead-skill"
|
|
|
|
|
|
def test_classify_handles_malformed_arguments_string(curator_env):
|
|
"""Truncated/malformed JSON in arguments falls back to substring match."""
|
|
# Arguments truncated to 400 chars may not parse as JSON.
|
|
truncated_raw = (
|
|
'{"action":"write_file","name":"umbrella","file_path":"references/'
|
|
'absorbed-skill.md","file_content":"long content that was cut off mid'
|
|
)
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["absorbed-skill"],
|
|
added=[],
|
|
after_names={"umbrella"},
|
|
tool_calls=[
|
|
{"name": "skill_manage", "arguments": truncated_raw},
|
|
],
|
|
)
|
|
# Fallback substring match finds "absorbed-skill" in the raw truncated string
|
|
# even though json.loads fails — but it can't identify target="umbrella"
|
|
# because _raw is the only haystack and there's no dict access. The
|
|
# classifier only promotes to "consolidated" if it can identify a target
|
|
# skill from args.get("name"). Ensure we fail safe: no false positive.
|
|
# (This is a correctness floor — better to prune-label than hallucinate
|
|
# an umbrella that wasn't really used.)
|
|
assert result["consolidated"] == []
|
|
assert len(result["pruned"]) == 1
|
|
|
|
|
|
def test_classify_no_false_positive_short_name_in_file_path(curator_env):
|
|
"""Short skill name that is a substring of another filename = pruned, not consolidated."""
|
|
# e.g. "api" should NOT match "references/api-design.md"
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["api"],
|
|
added=[],
|
|
after_names={"conventions"},
|
|
tool_calls=[
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "write_file",
|
|
"name": "conventions",
|
|
"file_path": "references/api-design.md",
|
|
"file_content": "# API Design\n...",
|
|
}),
|
|
},
|
|
],
|
|
)
|
|
assert result["consolidated"] == [], (
|
|
f"Short name 'api' should NOT match file_path 'references/api-design.md'"
|
|
)
|
|
assert len(result["pruned"]) == 1
|
|
assert result["pruned"][0]["name"] == "api"
|
|
|
|
|
|
def test_classify_no_false_positive_short_name_in_content(curator_env):
|
|
"""Short skill name embedded in longer word in content = pruned, not consolidated."""
|
|
# e.g. "test" should NOT match content "running latest tests"
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["test"],
|
|
added=[],
|
|
after_names={"umbrella"},
|
|
tool_calls=[
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "patch",
|
|
"name": "umbrella",
|
|
"old_string": "old",
|
|
"new_string": "running latest tests with pytest",
|
|
}),
|
|
},
|
|
],
|
|
)
|
|
assert result["consolidated"] == [], (
|
|
f"Short name 'test' should NOT match 'latest' via word boundary"
|
|
)
|
|
assert len(result["pruned"]) == 1
|
|
|
|
|
|
def test_classify_still_matches_exact_word_in_content(curator_env):
|
|
"""Word-boundary match still works for exact word occurrences."""
|
|
# "api" SHOULD match content "use the api gateway"
|
|
result = curator_env._classify_removed_skills(
|
|
removed=["api"],
|
|
added=[],
|
|
after_names={"gateway"},
|
|
tool_calls=[
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "edit",
|
|
"name": "gateway",
|
|
"content": "# Gateway\n\nUse the api gateway for all requests.\n",
|
|
}),
|
|
},
|
|
],
|
|
)
|
|
assert len(result["consolidated"]) == 1, (
|
|
f"'api' should match as a standalone word in content"
|
|
)
|
|
assert result["consolidated"][0]["into"] == "gateway"
|
|
|
|
|
|
def test_report_md_splits_consolidated_and_pruned_sections(curator_env):
|
|
"""End-to-end: REPORT.md shows both sections distinctly."""
|
|
curator = curator_env
|
|
start = datetime.now(timezone.utc)
|
|
|
|
before = [
|
|
{"name": "absorbed-skill", "state": "active", "pinned": False},
|
|
{"name": "dead-skill", "state": "stale", "pinned": False},
|
|
{"name": "keeper", "state": "active", "pinned": False},
|
|
]
|
|
after = [
|
|
{"name": "keeper", "state": "active", "pinned": False},
|
|
{"name": "umbrella", "state": "active", "pinned": False},
|
|
]
|
|
|
|
run_dir = curator._write_run_report(
|
|
started_at=start,
|
|
elapsed_seconds=60.0,
|
|
auto_counts={"checked": 3, "marked_stale": 0, "archived": 0, "reactivated": 0},
|
|
auto_summary="no auto changes",
|
|
before_report=before,
|
|
before_names={r["name"] for r in before},
|
|
after_report=after,
|
|
llm_meta={
|
|
"final": "Consolidated absorbed-skill into umbrella. Pruned dead-skill.",
|
|
"summary": "1 consolidated, 1 pruned",
|
|
"model": "m",
|
|
"provider": "p",
|
|
"error": None,
|
|
"tool_calls": [
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "create",
|
|
"name": "umbrella",
|
|
"content": "# umbrella\n\nAbsorbed absorbed-skill.",
|
|
}),
|
|
},
|
|
],
|
|
},
|
|
)
|
|
|
|
payload = json.loads((run_dir / "run.json").read_text())
|
|
# Both lists exist and are disjoint
|
|
consolidated_names = {e["name"] for e in payload["consolidated"]}
|
|
assert consolidated_names == {"absorbed-skill"}
|
|
# `pruned` holds full dicts {name, source, reason}; `pruned_names` is the
|
|
# flat list for quick scans / legacy compat.
|
|
pruned_names = payload["pruned_names"]
|
|
assert pruned_names == ["dead-skill"]
|
|
assert all(isinstance(e, dict) and "name" in e for e in payload["pruned"])
|
|
# The union still matches the legacy "archived" field for backward compat
|
|
assert set(payload["archived"]) == consolidated_names | set(pruned_names)
|
|
# counts exposed
|
|
assert payload["counts"]["consolidated_this_run"] == 1
|
|
assert payload["counts"]["pruned_this_run"] == 1
|
|
|
|
md = (run_dir / "REPORT.md").read_text()
|
|
# Two separate sections, not a single "Skills archived" lump
|
|
assert "Consolidated into umbrella skills" in md
|
|
assert "Pruned — archived for staleness" in md
|
|
assert "`absorbed-skill` → merged into `umbrella`" in md
|
|
assert "`dead-skill`" in md
|
|
# The old single-lump section should not appear
|
|
assert "### Skills archived" not in md
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _parse_structured_summary — extracting the model's required YAML block
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_parse_structured_summary_happy_path(curator_env):
|
|
text = (
|
|
"Long human summary here. I processed clusters X, Y, Z.\n\n"
|
|
"## Structured summary (required)\n"
|
|
"```yaml\n"
|
|
"consolidations:\n"
|
|
" - from: anthropic-api\n"
|
|
" into: llm-providers\n"
|
|
" reason: duplicate of the generic llm-providers skill\n"
|
|
" - from: openai-api\n"
|
|
" into: llm-providers\n"
|
|
" reason: same — merged with sibling\n"
|
|
"prunings:\n"
|
|
" - name: random-old-notes\n"
|
|
" reason: pre-curator garbage, no overlap\n"
|
|
"```\n"
|
|
)
|
|
out = curator_env._parse_structured_summary(text)
|
|
assert len(out["consolidations"]) == 2
|
|
assert out["consolidations"][0] == {
|
|
"from": "anthropic-api",
|
|
"into": "llm-providers",
|
|
"reason": "duplicate of the generic llm-providers skill",
|
|
}
|
|
assert len(out["prunings"]) == 1
|
|
assert out["prunings"][0]["reason"] == "pre-curator garbage, no overlap"
|
|
|
|
|
|
def test_parse_structured_summary_missing_block(curator_env):
|
|
out = curator_env._parse_structured_summary("No block in this text.")
|
|
assert out == {"consolidations": [], "prunings": []}
|
|
|
|
|
|
def test_parse_structured_summary_malformed_yaml(curator_env):
|
|
text = "```yaml\nthis: is\n not: [valid yaml\n```"
|
|
out = curator_env._parse_structured_summary(text)
|
|
assert out == {"consolidations": [], "prunings": []}
|
|
|
|
|
|
def test_parse_structured_summary_empty_lists(curator_env):
|
|
text = "```yaml\nconsolidations: []\nprunings: []\n```"
|
|
out = curator_env._parse_structured_summary(text)
|
|
assert out == {"consolidations": [], "prunings": []}
|
|
|
|
|
|
def test_parse_structured_summary_ignores_bare_strings(curator_env):
|
|
"""Entries that aren't dicts (e.g. a model wrote bare names) are skipped."""
|
|
text = (
|
|
"```yaml\n"
|
|
"consolidations:\n"
|
|
" - just-a-bare-string\n"
|
|
" - from: real-entry\n"
|
|
" into: umbrella\n"
|
|
" reason: valid\n"
|
|
"prunings: []\n"
|
|
"```"
|
|
)
|
|
out = curator_env._parse_structured_summary(text)
|
|
assert len(out["consolidations"]) == 1
|
|
assert out["consolidations"][0]["from"] == "real-entry"
|
|
|
|
|
|
def test_parse_structured_summary_missing_required_fields(curator_env):
|
|
"""Consolidation entries without from+into are skipped."""
|
|
text = (
|
|
"```yaml\n"
|
|
"consolidations:\n"
|
|
" - from: only-from\n"
|
|
" reason: no into\n"
|
|
" - into: only-into\n"
|
|
" - from: good\n"
|
|
" into: umbrella\n"
|
|
"prunings: []\n"
|
|
"```"
|
|
)
|
|
out = curator_env._parse_structured_summary(text)
|
|
assert len(out["consolidations"]) == 1
|
|
assert out["consolidations"][0]["from"] == "good"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _reconcile_classification — merging model block with heuristic
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_reconcile_model_wins_when_umbrella_exists(curator_env):
|
|
"""Model claim + umbrella in destinations → model authority (with reason)."""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["anthropic-api"],
|
|
heuristic={"consolidated": [], "pruned": [{"name": "anthropic-api"}]},
|
|
model_block={
|
|
"consolidations": [{
|
|
"from": "anthropic-api",
|
|
"into": "llm-providers",
|
|
"reason": "duplicate",
|
|
}],
|
|
"prunings": [],
|
|
},
|
|
destinations={"llm-providers"},
|
|
)
|
|
assert len(out["consolidated"]) == 1
|
|
e = out["consolidated"][0]
|
|
assert e["name"] == "anthropic-api"
|
|
assert e["into"] == "llm-providers"
|
|
assert e["reason"] == "duplicate"
|
|
assert e["source"] == "model"
|
|
assert out["pruned"] == []
|
|
|
|
|
|
def test_reconcile_model_hallucinates_umbrella(curator_env):
|
|
"""Model names a non-existent umbrella — downgrade, prefer heuristic if any."""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["thing"],
|
|
heuristic={
|
|
"consolidated": [{"name": "thing", "into": "real-umbrella", "evidence": "..."}],
|
|
"pruned": [],
|
|
},
|
|
model_block={
|
|
"consolidations": [{
|
|
"from": "thing",
|
|
"into": "nonexistent-umbrella",
|
|
"reason": "confused",
|
|
}],
|
|
"prunings": [],
|
|
},
|
|
destinations={"real-umbrella"},
|
|
)
|
|
assert len(out["consolidated"]) == 1
|
|
e = out["consolidated"][0]
|
|
assert e["into"] == "real-umbrella"
|
|
assert "tool-call audit" in e["source"]
|
|
assert e["model_claimed_into"] == "nonexistent-umbrella"
|
|
|
|
|
|
def test_reconcile_model_hallucinates_with_no_heuristic_evidence(curator_env):
|
|
"""Model names a non-existent umbrella AND no tool-call evidence → prune."""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["ghost"],
|
|
heuristic={"consolidated": [], "pruned": [{"name": "ghost"}]},
|
|
model_block={
|
|
"consolidations": [{
|
|
"from": "ghost",
|
|
"into": "nonexistent",
|
|
"reason": "wrong",
|
|
}],
|
|
"prunings": [],
|
|
},
|
|
destinations={"real-umbrella"},
|
|
)
|
|
assert out["consolidated"] == []
|
|
assert len(out["pruned"]) == 1
|
|
assert "fallback" in out["pruned"][0]["source"]
|
|
|
|
|
|
def test_reconcile_heuristic_catches_model_omission(curator_env):
|
|
"""Model forgot to list a consolidation, heuristic found it."""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["forgotten"],
|
|
heuristic={
|
|
"consolidated": [{
|
|
"name": "forgotten",
|
|
"into": "umbrella",
|
|
"evidence": "write_file on umbrella referenced forgotten.md",
|
|
}],
|
|
"pruned": [],
|
|
},
|
|
model_block={"consolidations": [], "prunings": []},
|
|
destinations={"umbrella"},
|
|
)
|
|
assert len(out["consolidated"]) == 1
|
|
e = out["consolidated"][0]
|
|
assert e["into"] == "umbrella"
|
|
assert "model omitted" in e["source"]
|
|
|
|
|
|
def test_reconcile_model_prunes_with_reason(curator_env):
|
|
"""Model says pruned, heuristic agrees, we surface the reason."""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["stale-skill"],
|
|
heuristic={"consolidated": [], "pruned": [{"name": "stale-skill"}]},
|
|
model_block={
|
|
"consolidations": [],
|
|
"prunings": [{"name": "stale-skill", "reason": "superseded by bundled skill"}],
|
|
},
|
|
destinations=set(),
|
|
)
|
|
assert len(out["pruned"]) == 1
|
|
e = out["pruned"][0]
|
|
assert e["reason"] == "superseded by bundled skill"
|
|
assert e["source"] == "model"
|
|
|
|
|
|
def test_reconcile_model_block_visible_in_full_report(curator_env):
|
|
"""End-to-end: LLM final response with the YAML block → reasons in REPORT.md."""
|
|
import json as _json
|
|
from datetime import datetime as _dt, timezone as _tz
|
|
|
|
start = _dt.now(_tz.utc)
|
|
before = [
|
|
{"name": "anthropic-api", "state": "active", "pinned": False},
|
|
{"name": "stale-thing", "state": "stale", "pinned": False},
|
|
]
|
|
after = [{"name": "llm-providers", "state": "active", "pinned": False}]
|
|
|
|
llm_final_text = (
|
|
"Processed 3 clusters. Absorbed anthropic-api into llm-providers.\n\n"
|
|
"## Structured summary (required)\n"
|
|
"```yaml\n"
|
|
"consolidations:\n"
|
|
" - from: anthropic-api\n"
|
|
" into: llm-providers\n"
|
|
" reason: duplicate content, now a subsection\n"
|
|
"prunings:\n"
|
|
" - name: stale-thing\n"
|
|
" reason: pre-curator junk, no overlap with anything\n"
|
|
"```\n"
|
|
)
|
|
|
|
run_dir = curator_env._write_run_report(
|
|
started_at=start,
|
|
elapsed_seconds=30.0,
|
|
auto_counts={"checked": 2, "marked_stale": 0, "archived": 0, "reactivated": 0},
|
|
auto_summary="none",
|
|
before_report=before,
|
|
before_names={r["name"] for r in before},
|
|
after_report=after,
|
|
llm_meta={
|
|
"final": llm_final_text,
|
|
"summary": "1 consolidated, 1 pruned",
|
|
"model": "m",
|
|
"provider": "p",
|
|
"error": None,
|
|
"tool_calls": [
|
|
{"name": "skill_manage", "arguments": _json.dumps({
|
|
"action": "create",
|
|
"name": "llm-providers",
|
|
"content": "# llm-providers\nIncludes anthropic-api",
|
|
})},
|
|
],
|
|
},
|
|
)
|
|
|
|
payload = _json.loads((run_dir / "run.json").read_text())
|
|
cons = payload["consolidated"][0]
|
|
assert cons["name"] == "anthropic-api"
|
|
assert cons["into"] == "llm-providers"
|
|
assert cons["reason"] == "duplicate content, now a subsection"
|
|
assert cons["source"] == "model+audit" # model AND heuristic both had it
|
|
|
|
pruned = payload["pruned"][0]
|
|
assert pruned["name"] == "stale-thing"
|
|
assert pruned["reason"] == "pre-curator junk, no overlap with anything"
|
|
|
|
md = (run_dir / "REPORT.md").read_text()
|
|
assert "duplicate content, now a subsection" in md
|
|
assert "pre-curator junk" in md
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _extract_absorbed_into_declarations — authoritative signal from delete calls
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_extract_absorbed_into_picks_up_consolidation(curator_env):
|
|
"""Delete call with absorbed_into=<umbrella> yields a declaration."""
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "delete",
|
|
"name": "narrow-skill",
|
|
"absorbed_into": "umbrella",
|
|
}),
|
|
},
|
|
])
|
|
assert declarations == {
|
|
"narrow-skill": {"into": "umbrella", "declared": True},
|
|
}
|
|
|
|
|
|
def test_extract_absorbed_into_empty_string_is_explicit_prune(curator_env):
|
|
"""absorbed_into='' is recorded as an explicit prune declaration."""
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "delete",
|
|
"name": "stale",
|
|
"absorbed_into": "",
|
|
}),
|
|
},
|
|
])
|
|
assert declarations == {"stale": {"into": "", "declared": True}}
|
|
|
|
|
|
def test_extract_absorbed_into_missing_arg_ignored(curator_env):
|
|
"""Delete call without absorbed_into is skipped — fallback to heuristic."""
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "delete",
|
|
"name": "legacy-skill",
|
|
}),
|
|
},
|
|
])
|
|
assert declarations == {}
|
|
|
|
|
|
def test_extract_absorbed_into_ignores_non_delete_actions(curator_env):
|
|
"""Patch, create, write_file etc. must not leak into declarations."""
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "patch",
|
|
"name": "umbrella",
|
|
"old_string": "...",
|
|
"new_string": "...",
|
|
"absorbed_into": "something", # bogus on non-delete, must be ignored
|
|
}),
|
|
},
|
|
])
|
|
assert declarations == {}
|
|
|
|
|
|
def test_extract_absorbed_into_accepts_dict_arguments(curator_env):
|
|
"""arguments can arrive as a dict (defensive path) — still works."""
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": {
|
|
"action": "delete",
|
|
"name": "narrow",
|
|
"absorbed_into": "umbrella",
|
|
},
|
|
},
|
|
])
|
|
assert declarations == {"narrow": {"into": "umbrella", "declared": True}}
|
|
|
|
|
|
def test_extract_absorbed_into_strips_whitespace(curator_env):
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
{
|
|
"name": "skill_manage",
|
|
"arguments": json.dumps({
|
|
"action": "delete",
|
|
"name": " narrow ",
|
|
"absorbed_into": " umbrella ",
|
|
}),
|
|
},
|
|
])
|
|
assert declarations == {"narrow": {"into": "umbrella", "declared": True}}
|
|
|
|
|
|
def test_extract_absorbed_into_ignores_non_skill_manage_calls(curator_env):
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
{"name": "terminal", "arguments": json.dumps({"command": "ls"})},
|
|
{"name": "read_file", "arguments": json.dumps({"path": "/tmp/x"})},
|
|
])
|
|
assert declarations == {}
|
|
|
|
|
|
def test_extract_absorbed_into_handles_malformed_arguments(curator_env):
|
|
"""Garbage JSON in arguments must not crash the extractor."""
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
{"name": "skill_manage", "arguments": "{not json"},
|
|
{"name": "skill_manage", "arguments": None},
|
|
{"name": "skill_manage"}, # no arguments key at all
|
|
])
|
|
assert declarations == {}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _reconcile_classification with absorbed_into declarations (authoritative)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_reconcile_absorbed_into_beats_everything_else(curator_env):
|
|
"""Model declared absorbed_into at delete; YAML/heuristic disagree — declaration wins.
|
|
|
|
This is the exact #18671 regression: the model forgets to emit the YAML
|
|
summary block, the heuristic's substring match misses because the
|
|
umbrella's patch content doesn't literally contain the old skill's
|
|
slug. Previously this fell through to 'no-evidence fallback' prune,
|
|
which dropped the cron ref instead of rewriting. With absorbed_into
|
|
declared, the model tells us directly.
|
|
"""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["pr-review-format"],
|
|
heuristic={"consolidated": [], "pruned": [{"name": "pr-review-format"}]},
|
|
model_block={"consolidations": [], "prunings": []}, # model forgot YAML block
|
|
destinations={"hermes-agent-dev"},
|
|
absorbed_declarations={
|
|
"pr-review-format": {"into": "hermes-agent-dev", "declared": True},
|
|
},
|
|
)
|
|
assert len(out["consolidated"]) == 1
|
|
assert out["pruned"] == []
|
|
e = out["consolidated"][0]
|
|
assert e["name"] == "pr-review-format"
|
|
assert e["into"] == "hermes-agent-dev"
|
|
assert "absorbed_into" in e["source"]
|
|
|
|
|
|
def test_reconcile_absorbed_into_empty_is_explicit_prune(curator_env):
|
|
"""absorbed_into='' takes precedence and routes to pruned, not fallback."""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["stale"],
|
|
heuristic={"consolidated": [], "pruned": [{"name": "stale"}]},
|
|
model_block={"consolidations": [], "prunings": []},
|
|
destinations=set(),
|
|
absorbed_declarations={
|
|
"stale": {"into": "", "declared": True},
|
|
},
|
|
)
|
|
assert out["consolidated"] == []
|
|
assert len(out["pruned"]) == 1
|
|
assert "model-declared prune" in out["pruned"][0]["source"]
|
|
|
|
|
|
def test_reconcile_absorbed_into_nonexistent_target_falls_through(curator_env):
|
|
"""If the declared umbrella doesn't exist in destinations, fall through to
|
|
heuristic/YAML logic. Shouldn't happen in practice (the tool validates at
|
|
delete time) but the reconciler is defensive."""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["thing"],
|
|
heuristic={
|
|
"consolidated": [{"name": "thing", "into": "real-umbrella", "evidence": "..."}],
|
|
"pruned": [],
|
|
},
|
|
model_block={"consolidations": [], "prunings": []},
|
|
destinations={"real-umbrella"},
|
|
absorbed_declarations={
|
|
"thing": {"into": "ghost-umbrella", "declared": True},
|
|
},
|
|
)
|
|
assert len(out["consolidated"]) == 1
|
|
assert out["consolidated"][0]["into"] == "real-umbrella"
|
|
assert "tool-call audit" in out["consolidated"][0]["source"]
|
|
|
|
|
|
def test_reconcile_declaration_preserves_yaml_reason(curator_env):
|
|
"""When the model both declared absorbed_into AND emitted YAML with reason,
|
|
the reason carries through so REPORT.md still has it."""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["narrow"],
|
|
heuristic={"consolidated": [], "pruned": []},
|
|
model_block={
|
|
"consolidations": [{
|
|
"from": "narrow",
|
|
"into": "umbrella",
|
|
"reason": "duplicate of umbrella's main content",
|
|
}],
|
|
"prunings": [],
|
|
},
|
|
destinations={"umbrella"},
|
|
absorbed_declarations={
|
|
"narrow": {"into": "umbrella", "declared": True},
|
|
},
|
|
)
|
|
assert len(out["consolidated"]) == 1
|
|
e = out["consolidated"][0]
|
|
assert e["into"] == "umbrella"
|
|
assert "absorbed_into" in e["source"]
|
|
assert e["reason"] == "duplicate of umbrella's main content"
|
|
|
|
|
|
def test_reconcile_without_declarations_preserves_legacy_behavior(curator_env):
|
|
"""Backward compat: no absorbed_declarations arg → all existing logic intact."""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["thing"],
|
|
heuristic={
|
|
"consolidated": [{"name": "thing", "into": "umbrella", "evidence": "..."}],
|
|
"pruned": [],
|
|
},
|
|
model_block={"consolidations": [], "prunings": []},
|
|
destinations={"umbrella"},
|
|
# no absorbed_declarations — defaults to None → behaves identically to pre-change
|
|
)
|
|
assert len(out["consolidated"]) == 1
|
|
assert out["consolidated"][0]["into"] == "umbrella"
|
|
|
|
|
|
def test_reconcile_mixed_declarations_and_legacy_calls(curator_env):
|
|
"""Real-world run: some deletes declared absorbed_into, some didn't.
|
|
Declared ones use the authoritative path; others fall through to YAML/heuristic.
|
|
"""
|
|
out = curator_env._reconcile_classification(
|
|
removed=["declared-cons", "declared-prune", "legacy-cons", "legacy-prune"],
|
|
heuristic={
|
|
"consolidated": [
|
|
{"name": "legacy-cons", "into": "umbrella-a", "evidence": "..."},
|
|
],
|
|
"pruned": [{"name": "legacy-prune"}],
|
|
},
|
|
model_block={"consolidations": [], "prunings": []},
|
|
destinations={"umbrella-a", "umbrella-b"},
|
|
absorbed_declarations={
|
|
"declared-cons": {"into": "umbrella-b", "declared": True},
|
|
"declared-prune": {"into": "", "declared": True},
|
|
},
|
|
)
|
|
cons_by_name = {e["name"]: e for e in out["consolidated"]}
|
|
pruned_by_name = {e["name"]: e for e in out["pruned"]}
|
|
|
|
assert "declared-cons" in cons_by_name
|
|
assert cons_by_name["declared-cons"]["into"] == "umbrella-b"
|
|
assert "absorbed_into" in cons_by_name["declared-cons"]["source"]
|
|
|
|
assert "legacy-cons" in cons_by_name
|
|
assert cons_by_name["legacy-cons"]["into"] == "umbrella-a"
|
|
assert "tool-call audit" in cons_by_name["legacy-cons"]["source"]
|
|
|
|
assert "declared-prune" in pruned_by_name
|
|
assert "model-declared prune" in pruned_by_name["declared-prune"]["source"]
|
|
|
|
assert "legacy-prune" in pruned_by_name
|
|
assert "no-evidence fallback" in pruned_by_name["legacy-prune"]["source"]
|