mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-05 02:31:47 +00:00
Adds the Curator — an auxiliary-model background task that periodically
reviews AGENT-CREATED skills and keeps the collection tidy: tracks usage,
transitions unused skills through active → stale → archived, and spawns
a forked AIAgent to consolidate overlaps and patch drift.
Default: enabled, inactivity-triggered (no cron daemon). Runs on CLI
startup and gateway boot when the last run is older than interval_hours
(default 24) AND the agent has been idle for min_idle_hours (default 2).
Invariants (all load-bearing):
- Never touches bundled or hub-installed skills (.bundled_manifest +
.hub/lock.json double-filter)
- Never auto-deletes — archive only. Archives are recoverable
via `hermes curator restore <skill>`
- Pinned skills bypass all auto-transitions
- Uses the aux client; never touches the main session's prompt cache
New files:
- tools/skill_usage.py — sidecar .usage.json telemetry, atomic writes,
provenance filter
- agent/curator.py — orchestrator: config, idle gating, state-machine
transitions (pure, no LLM), forked-agent review prompt
- hermes_cli/curator.py — `hermes curator {status,run,pause,resume,
pin,unpin,restore}` subcommand
- tests/tools/test_skill_usage.py — 29 tests
- tests/agent/test_curator.py — 25 tests
Modified files (surgical patches):
- tools/skills_tool.py — bump view_count on successful skill_view
- tools/skill_manager_tool.py — bump patch_count on skill_manage
patch/edit/write_file/remove_file; forget record on delete
- hermes_cli/config.py — add curator: section to DEFAULT_CONFIG
- hermes_cli/commands.py — add /curator CommandDef with subcommands
- hermes_cli/main.py — register `hermes curator` subparser via
register_cli() from hermes_cli.curator
- cli.py — /curator slash-command dispatch + startup hook
- gateway/run.py — gateway-boot hook (mirrors CLI)
Validation:
- 54 new tests across skill_usage + curator, all passing in 3s
- 346 tests across all touched files' neighbors green
- 2783 tests across hermes_cli/ + gateway/test_run_progress_topics.py green
- CLI smoke: `hermes curator status/pause/resume` work end-to-end
Companion to PR #16026 (class-first skill review prompt) — together
they form a loop: the review prompt stops near-duplicate skill creation
at the source, and the curator prunes/consolidates what still accumulates.
Refs #7816.
363 lines
12 KiB
Python
363 lines
12 KiB
Python
"""Tests for agent/curator.py — orchestrator, idle gating, state transitions.
|
|
|
|
LLM spawning is never exercised here — `_run_llm_review` is monkeypatched so
|
|
tests run fully offline and the curator module doesn't need real credentials.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import importlib
|
|
import json
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def curator_env(tmp_path, monkeypatch):
|
|
"""Isolated HERMES_HOME + freshly reloaded curator + skill_usage modules."""
|
|
home = tmp_path / ".hermes"
|
|
(home / "skills").mkdir(parents=True)
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
|
|
import tools.skill_usage as usage
|
|
importlib.reload(usage)
|
|
import agent.curator as curator
|
|
importlib.reload(curator)
|
|
|
|
# Neutralize the real LLM pass by default — tests opt in per-case.
|
|
monkeypatch.setattr(curator, "_run_llm_review", lambda prompt: "llm-stub")
|
|
|
|
# Default: no config file → curator defaults. Tests can override.
|
|
monkeypatch.setattr(curator, "_load_config", lambda: {})
|
|
|
|
return {"home": home, "curator": curator, "usage": usage}
|
|
|
|
|
|
def _write_skill(skills_dir: Path, name: str):
|
|
d = skills_dir / name
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
(d / "SKILL.md").write_text(
|
|
f"---\nname: {name}\ndescription: x\n---\n", encoding="utf-8",
|
|
)
|
|
return d
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Config gates
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_curator_enabled_default_true(curator_env):
|
|
assert curator_env["curator"].is_enabled() is True
|
|
|
|
|
|
def test_curator_disabled_via_config(curator_env, monkeypatch):
|
|
c = curator_env["curator"]
|
|
monkeypatch.setattr(c, "_load_config", lambda: {"enabled": False})
|
|
assert c.is_enabled() is False
|
|
assert c.should_run_now() is False
|
|
|
|
|
|
def test_curator_defaults(curator_env):
|
|
c = curator_env["curator"]
|
|
assert c.get_interval_hours() == 24
|
|
assert c.get_min_idle_hours() == 2
|
|
assert c.get_stale_after_days() == 30
|
|
assert c.get_archive_after_days() == 90
|
|
|
|
|
|
def test_curator_config_overrides(curator_env, monkeypatch):
|
|
c = curator_env["curator"]
|
|
monkeypatch.setattr(c, "_load_config", lambda: {
|
|
"interval_hours": 12,
|
|
"min_idle_hours": 0.5,
|
|
"stale_after_days": 7,
|
|
"archive_after_days": 60,
|
|
})
|
|
assert c.get_interval_hours() == 12
|
|
assert c.get_min_idle_hours() == 0.5
|
|
assert c.get_stale_after_days() == 7
|
|
assert c.get_archive_after_days() == 60
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# should_run_now
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_first_run_always_eligible(curator_env):
|
|
c = curator_env["curator"]
|
|
assert c.should_run_now() is True
|
|
|
|
|
|
def test_recent_run_blocks(curator_env):
|
|
c = curator_env["curator"]
|
|
c.save_state({
|
|
"last_run_at": datetime.now(timezone.utc).isoformat(),
|
|
"paused": False,
|
|
})
|
|
assert c.should_run_now() is False
|
|
|
|
|
|
def test_old_run_eligible(curator_env):
|
|
c = curator_env["curator"]
|
|
long_ago = datetime.now(timezone.utc) - timedelta(hours=48)
|
|
c.save_state({"last_run_at": long_ago.isoformat(), "paused": False})
|
|
assert c.should_run_now() is True
|
|
|
|
|
|
def test_paused_blocks_even_if_stale(curator_env):
|
|
c = curator_env["curator"]
|
|
long_ago = datetime.now(timezone.utc) - timedelta(days=5)
|
|
c.save_state({"last_run_at": long_ago.isoformat(), "paused": True})
|
|
assert c.should_run_now() is False
|
|
|
|
|
|
def test_set_paused_roundtrip(curator_env):
|
|
c = curator_env["curator"]
|
|
c.set_paused(True)
|
|
assert c.is_paused() is True
|
|
c.set_paused(False)
|
|
assert c.is_paused() is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Automatic state transitions
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_unused_skill_transitions_to_stale(curator_env):
|
|
c = curator_env["curator"]
|
|
u = curator_env["usage"]
|
|
skills_dir = curator_env["home"] / "skills"
|
|
_write_skill(skills_dir, "old-skill")
|
|
|
|
# Record last-use well past stale_after_days (30 default)
|
|
long_ago = (datetime.now(timezone.utc) - timedelta(days=45)).isoformat()
|
|
data = u.load_usage()
|
|
data["old-skill"] = u._empty_record()
|
|
data["old-skill"]["last_used_at"] = long_ago
|
|
data["old-skill"]["created_at"] = long_ago
|
|
u.save_usage(data)
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
assert counts["marked_stale"] == 1
|
|
assert u.get_record("old-skill")["state"] == "stale"
|
|
|
|
|
|
def test_very_old_skill_gets_archived(curator_env):
|
|
c = curator_env["curator"]
|
|
u = curator_env["usage"]
|
|
skills_dir = curator_env["home"] / "skills"
|
|
skill_dir = _write_skill(skills_dir, "ancient")
|
|
|
|
super_old = (datetime.now(timezone.utc) - timedelta(days=120)).isoformat()
|
|
data = u.load_usage()
|
|
data["ancient"] = u._empty_record()
|
|
data["ancient"]["last_used_at"] = super_old
|
|
data["ancient"]["created_at"] = super_old
|
|
u.save_usage(data)
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
assert counts["archived"] == 1
|
|
assert not skill_dir.exists()
|
|
assert (skills_dir / ".archive" / "ancient" / "SKILL.md").exists()
|
|
assert u.get_record("ancient")["state"] == "archived"
|
|
|
|
|
|
def test_pinned_skill_is_never_touched(curator_env):
|
|
c = curator_env["curator"]
|
|
u = curator_env["usage"]
|
|
skills_dir = curator_env["home"] / "skills"
|
|
_write_skill(skills_dir, "precious")
|
|
|
|
super_old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat()
|
|
data = u.load_usage()
|
|
data["precious"] = u._empty_record()
|
|
data["precious"]["last_used_at"] = super_old
|
|
data["precious"]["created_at"] = super_old
|
|
data["precious"]["pinned"] = True
|
|
u.save_usage(data)
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
assert counts["archived"] == 0
|
|
assert counts["marked_stale"] == 0
|
|
rec = u.get_record("precious")
|
|
assert rec["state"] == "active" # untouched
|
|
assert rec["pinned"] is True
|
|
|
|
|
|
def test_stale_skill_reactivates_on_recent_use(curator_env):
|
|
c = curator_env["curator"]
|
|
u = curator_env["usage"]
|
|
skills_dir = curator_env["home"] / "skills"
|
|
_write_skill(skills_dir, "revived")
|
|
|
|
recent = datetime.now(timezone.utc).isoformat()
|
|
data = u.load_usage()
|
|
data["revived"] = u._empty_record()
|
|
data["revived"]["state"] = "stale"
|
|
data["revived"]["last_used_at"] = recent
|
|
data["revived"]["created_at"] = recent
|
|
u.save_usage(data)
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
assert counts["reactivated"] == 1
|
|
assert u.get_record("revived")["state"] == "active"
|
|
|
|
|
|
def test_new_skill_without_last_used_not_immediately_archived(curator_env):
|
|
"""A freshly-created skill with no use history should not get archived
|
|
just because last_used_at is None."""
|
|
c = curator_env["curator"]
|
|
u = curator_env["usage"]
|
|
skills_dir = curator_env["home"] / "skills"
|
|
_write_skill(skills_dir, "fresh")
|
|
|
|
# Bump nothing — record doesn't exist yet. Curator should create it
|
|
# and fall back to created_at which is ~now.
|
|
counts = c.apply_automatic_transitions()
|
|
assert counts["archived"] == 0
|
|
assert counts["marked_stale"] == 0
|
|
assert (skills_dir / "fresh").exists()
|
|
|
|
|
|
def test_bundled_skill_not_touched_by_transitions(curator_env):
|
|
c = curator_env["curator"]
|
|
u = curator_env["usage"]
|
|
skills_dir = curator_env["home"] / "skills"
|
|
_write_skill(skills_dir, "bundled")
|
|
(skills_dir / ".bundled_manifest").write_text(
|
|
"bundled:abc\n", encoding="utf-8",
|
|
)
|
|
|
|
super_old = (datetime.now(timezone.utc) - timedelta(days=500)).isoformat()
|
|
data = u.load_usage()
|
|
data["bundled"] = u._empty_record()
|
|
data["bundled"]["last_used_at"] = super_old
|
|
u.save_usage(data)
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
# bundled skills are excluded from the agent-created list entirely
|
|
assert counts["checked"] == 0
|
|
assert (skills_dir / "bundled").exists() # never moved
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# run_curator_review orchestration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_run_review_records_state(curator_env):
|
|
c = curator_env["curator"]
|
|
skills_dir = curator_env["home"] / "skills"
|
|
_write_skill(skills_dir, "a")
|
|
|
|
result = c.run_curator_review(synchronous=True)
|
|
assert "started_at" in result
|
|
state = c.load_state()
|
|
assert state["last_run_at"] is not None
|
|
assert state["run_count"] >= 1
|
|
assert state["last_run_summary"] is not None
|
|
|
|
|
|
def test_run_review_synchronous_invokes_llm_stub(curator_env, monkeypatch):
|
|
c = curator_env["curator"]
|
|
skills_dir = curator_env["home"] / "skills"
|
|
_write_skill(skills_dir, "a")
|
|
|
|
calls = []
|
|
monkeypatch.setattr(
|
|
c, "_run_llm_review",
|
|
lambda prompt: (calls.append(prompt), "stubbed-summary")[1],
|
|
)
|
|
|
|
captured = []
|
|
c.run_curator_review(on_summary=lambda s: captured.append(s), synchronous=True)
|
|
|
|
assert len(calls) == 1
|
|
assert "skill CURATOR" in calls[0] or "CURATOR" in calls[0]
|
|
assert captured # on_summary was called
|
|
assert any("stubbed-summary" in s for s in captured)
|
|
|
|
|
|
def test_run_review_skips_llm_when_no_candidates(curator_env, monkeypatch):
|
|
c = curator_env["curator"]
|
|
# No skills in the dir → no candidates
|
|
calls = []
|
|
monkeypatch.setattr(
|
|
c, "_run_llm_review",
|
|
lambda prompt: (calls.append(prompt), "never-called")[1],
|
|
)
|
|
|
|
captured = []
|
|
c.run_curator_review(on_summary=lambda s: captured.append(s), synchronous=True)
|
|
|
|
assert calls == [] # LLM not invoked
|
|
assert any("skipped" in s for s in captured)
|
|
|
|
|
|
def test_maybe_run_curator_respects_disabled(curator_env, monkeypatch):
|
|
c = curator_env["curator"]
|
|
monkeypatch.setattr(c, "_load_config", lambda: {"enabled": False})
|
|
result = c.maybe_run_curator()
|
|
assert result is None
|
|
|
|
|
|
def test_maybe_run_curator_enforces_idle_gate(curator_env, monkeypatch):
|
|
c = curator_env["curator"]
|
|
monkeypatch.setattr(c, "_load_config", lambda: {"min_idle_hours": 2})
|
|
# idle less than the threshold
|
|
result = c.maybe_run_curator(idle_for_seconds=60.0)
|
|
assert result is None
|
|
|
|
|
|
def test_maybe_run_curator_runs_when_eligible(curator_env, monkeypatch):
|
|
c = curator_env["curator"]
|
|
skills_dir = curator_env["home"] / "skills"
|
|
_write_skill(skills_dir, "a")
|
|
# Force idle over threshold
|
|
result = c.maybe_run_curator(idle_for_seconds=99999.0)
|
|
assert result is not None
|
|
assert "started_at" in result
|
|
|
|
|
|
def test_maybe_run_curator_swallows_exceptions(curator_env, monkeypatch):
|
|
c = curator_env["curator"]
|
|
|
|
def explode():
|
|
raise RuntimeError("boom")
|
|
|
|
monkeypatch.setattr(c, "should_run_now", explode)
|
|
# Must not raise
|
|
assert c.maybe_run_curator() is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Persistence
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_state_file_survives_corrupt_read(curator_env):
|
|
c = curator_env["curator"]
|
|
c._state_file().write_text("not json", encoding="utf-8")
|
|
# Must fall back to default, not raise
|
|
assert c.load_state() == c._default_state()
|
|
|
|
|
|
def test_state_atomic_write_no_tmp_leftovers(curator_env):
|
|
c = curator_env["curator"]
|
|
c.save_state({"paused": True})
|
|
parent = c._state_file().parent
|
|
for p in parent.iterdir():
|
|
assert not p.name.startswith(".curator_state_"), f"tmp leftover: {p.name}"
|
|
|
|
|
|
def test_curator_review_prompt_has_invariants():
|
|
"""Core invariants must be in the review prompt text."""
|
|
from agent.curator import CURATOR_REVIEW_PROMPT
|
|
assert "MUST NOT" in CURATOR_REVIEW_PROMPT
|
|
assert "bundled" in CURATOR_REVIEW_PROMPT.lower()
|
|
assert "delete" in CURATOR_REVIEW_PROMPT.lower()
|
|
assert "pinned" in CURATOR_REVIEW_PROMPT.lower()
|
|
# Must mention the decisions the reviewer can make
|
|
for verb in ("keep", "patch", "archive", "pin"):
|
|
assert verb in CURATOR_REVIEW_PROMPT.lower()
|