mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 02:11:48 +00:00
* fix(curator): defer first run and add --dry-run preview (#18373) Curator was meant to run 7 days after install, not on the very first gateway tick. On a fresh install (no .curator_state), should_run_now() returned True immediately because last_run_at was None — so the gateway cron ticker fired Curator against a fresh skill library moments after 'hermes update'. Combined with the binary 'agent-created' provenance model (anything not bundled and not hub-installed), this consolidated hand-authored user workflow skills without consent. Changes: - should_run_now(): first observation seeds last_run_at='now' and returns False. The next real pass fires one full interval_hours later (7 days by default), matching the original design intent. - hermes curator run --dry-run: produces the same review report without applying automatic transitions OR permitting the LLM to call skill_manage / terminal mv. A DRY-RUN banner is prepended to the prompt and the caller skips apply_automatic_transitions. State is NOT advanced so a preview doesn't defer the next scheduled real pass. - hermes update: prints a one-liner on fresh installs pointing at --dry-run, pause, and the docs. Silent on steady state. - Docs: curator.md and cli-commands.md explain the deferred first-run behavior and warn that hand-written SKILL.md files share the 'agent-created' bucket, with guidance to pin or preview before the first pass. Tests: - test_first_run_defers replaces the old 'first run always eligible' assertion — same fixture, inverted expectation. - test_maybe_run_curator_defers_on_fresh_install covers the gateway tick path end-to-end. - Three new dry-run tests cover state-advance suppression, prompt banner injection, and apply_automatic_transitions skipping. Fixes #18373. * feat(curator): pre-run backup + rollback (#18373) Every real curator pass now snapshots ~/.hermes/skills/ into ~/.hermes/skills/.curator_backups/<utc-iso>/skills.tar.gz before calling apply_automatic_transitions or the LLM review. If a run consolidates or archives something the user didn't want touched, 'hermes curator rollback' restores the tree in one command. Dry-run is skipped — no mutation means no snapshot needed. Changes: - agent/curator_backup.py (new): tar.gz snapshot + safe rollback. The snapshot excludes .curator_backups/ (would recurse) and .hub/ (managed by the skills hub). Extract refuses absolute paths and .. components, and uses tarfile's filter='data' on Python 3.12+. Rollback takes a pre-rollback safety snapshot FIRST, stages the current tree into .rollback-staging-<ts>/ so the extract lands in an empty dir, and cleans the staging dir on success. A failed extract restores the staged contents. - agent/curator.py: run_curator_review() calls curator_backup. snapshot_skills(reason='pre-curator-run') before apply_automatic_ transitions. Best-effort — a failed snapshot logs at debug and the run continues (a transient disk issue shouldn't silently disable curator forever). - hermes_cli/curator.py: new 'hermes curator backup' and 'hermes curator rollback' subcommands. rollback supports --list, --id <ts>, -y. - hermes_cli/config.py: curator.backup.{enabled, keep} config block with sane defaults (enabled=true, keep=5). - Docs: curator.md gets a 'Backups and rollback' section; cli-commands .md table gets the new rows. Tests (new file tests/agent/test_curator_backup.py, 16 cases): - snapshot creates tarball + manifest with correct counts - snapshot excludes .curator_backups/ (recursion guard) and .hub/ - snapshot disabled via config returns None without creating anything - snapshot uniquifies ids within the same second (-01 suffix) - prune honors keep count, newest-first - list_backups + _resolve_backup cover newest-default and unknown-id - rollback restores a deleted skill with content intact - rollback is itself undoable — safety snapshot shows up in list_backups - rollback with no snapshots returns an error - rollback refuses tarballs with absolute paths or .. components - real curator runs take a 'pre-curator-run' snapshot; dry-runs do not All curator tests: 210 passing locally.
This commit is contained in:
parent
c5b4c48165
commit
77c0bc6b13
9 changed files with 1226 additions and 20 deletions
|
|
@ -86,9 +86,22 @@ def test_curator_config_overrides(curator_env, monkeypatch):
|
|||
# should_run_now
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_first_run_always_eligible(curator_env):
|
||||
def test_first_run_defers(curator_env):
|
||||
"""The FIRST observation of the curator (fresh install, no state file)
|
||||
must NOT trigger an immediate run. The curator is designed to run after
|
||||
a full ``interval_hours`` of skill activity, not on the first background
|
||||
tick after installation. Fixes #18373.
|
||||
"""
|
||||
c = curator_env["curator"]
|
||||
assert c.should_run_now() is True
|
||||
# No state file — should defer and seed last_run_at.
|
||||
assert c.should_run_now() is False
|
||||
state = c.load_state()
|
||||
assert state.get("last_run_at") is not None, (
|
||||
"first observation should seed last_run_at so the interval clock "
|
||||
"starts ticking instead of firing immediately next tick"
|
||||
)
|
||||
# A second immediate call still returns False (seeded, not yet stale).
|
||||
assert c.should_run_now() is False
|
||||
|
||||
|
||||
def test_recent_run_blocks(curator_env):
|
||||
|
|
@ -265,6 +278,77 @@ def test_run_review_records_state(curator_env):
|
|||
assert state["last_run_summary"] is not None
|
||||
|
||||
|
||||
def test_dry_run_does_not_advance_state(curator_env, monkeypatch):
|
||||
"""Dry-run previews must not bump last_run_at or run_count. A preview
|
||||
shouldn't defer the next scheduled real pass or look like a real run in
|
||||
`hermes curator status`. Fixes #18373.
|
||||
"""
|
||||
c = curator_env["curator"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "a")
|
||||
|
||||
# Stub the LLM so the test doesn't need a provider.
|
||||
monkeypatch.setattr(
|
||||
c, "_run_llm_review",
|
||||
lambda prompt: {
|
||||
"final": "", "summary": "dry preview", "model": "", "provider": "",
|
||||
"tool_calls": [], "error": None,
|
||||
},
|
||||
)
|
||||
|
||||
c.run_curator_review(synchronous=True, dry_run=True)
|
||||
state = c.load_state()
|
||||
assert state.get("last_run_at") is None, "dry-run must not seed last_run_at"
|
||||
assert state.get("run_count", 0) == 0, "dry-run must not bump run_count"
|
||||
assert "dry-run" in (state.get("last_run_summary") or ""), (
|
||||
"dry-run summary should be labeled so status output is unambiguous"
|
||||
)
|
||||
|
||||
|
||||
def test_dry_run_injects_report_only_banner(curator_env, monkeypatch):
|
||||
"""The dry-run prompt must carry a banner instructing the LLM not to
|
||||
call any mutating tool. This is defense in depth — the caller also
|
||||
skips automatic transitions — but the LLM prompt is the only guard
|
||||
against the model calling skill_manage directly."""
|
||||
c = curator_env["curator"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "a")
|
||||
|
||||
captured = {}
|
||||
def _stub(prompt):
|
||||
captured["prompt"] = prompt
|
||||
return {"final": "", "summary": "s", "model": "", "provider": "",
|
||||
"tool_calls": [], "error": None}
|
||||
monkeypatch.setattr(c, "_run_llm_review", _stub)
|
||||
|
||||
c.run_curator_review(synchronous=True, dry_run=True)
|
||||
assert "DRY-RUN" in captured["prompt"]
|
||||
assert "DO NOT" in captured["prompt"]
|
||||
|
||||
|
||||
def test_dry_run_skips_automatic_transitions(curator_env, monkeypatch):
|
||||
"""Dry-run must not call apply_automatic_transitions — the auto pass
|
||||
archives skills deterministically, and a preview must not touch the
|
||||
filesystem."""
|
||||
c = curator_env["curator"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "a")
|
||||
|
||||
called = {"n": 0}
|
||||
def _explode(*_a, **_kw):
|
||||
called["n"] += 1
|
||||
return {"checked": 0, "marked_stale": 0, "archived": 0, "reactivated": 0}
|
||||
monkeypatch.setattr(c, "apply_automatic_transitions", _explode)
|
||||
monkeypatch.setattr(
|
||||
c, "_run_llm_review",
|
||||
lambda p: {"final": "", "summary": "s", "model": "", "provider": "",
|
||||
"tool_calls": [], "error": None},
|
||||
)
|
||||
|
||||
c.run_curator_review(synchronous=True, dry_run=True)
|
||||
assert called["n"] == 0, "dry-run must skip apply_automatic_transitions"
|
||||
|
||||
|
||||
def test_run_review_synchronous_invokes_llm_stub(curator_env, monkeypatch):
|
||||
c = curator_env["curator"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
|
|
@ -327,12 +411,32 @@ def test_maybe_run_curator_runs_when_eligible(curator_env, monkeypatch):
|
|||
c = curator_env["curator"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "a")
|
||||
# Seed last_run_at far in the past so the interval gate opens — the
|
||||
# "no state" path intentionally defers the first run now (#18373).
|
||||
long_ago = datetime.now(timezone.utc) - timedelta(hours=c.get_interval_hours() * 2)
|
||||
c.save_state({"last_run_at": long_ago.isoformat(), "paused": False})
|
||||
# Force idle over threshold
|
||||
result = c.maybe_run_curator(idle_for_seconds=99999.0)
|
||||
assert result is not None
|
||||
assert "started_at" in result
|
||||
|
||||
|
||||
def test_maybe_run_curator_defers_on_fresh_install(curator_env):
|
||||
"""Fresh install (no curator state file) must NOT fire the curator on
|
||||
the first gateway tick. The first observation seeds last_run_at and
|
||||
returns None. Fixes #18373."""
|
||||
c = curator_env["curator"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "a")
|
||||
# Infinite idle — the only thing that should block the run is the new
|
||||
# deferred-first-run gate.
|
||||
result = c.maybe_run_curator(idle_for_seconds=99999.0)
|
||||
assert result is None
|
||||
# And the next tick still defers (we seeded last_run_at to "now").
|
||||
result2 = c.maybe_run_curator(idle_for_seconds=99999.0)
|
||||
assert result2 is None
|
||||
|
||||
|
||||
def test_maybe_run_curator_swallows_exceptions(curator_env, monkeypatch):
|
||||
c = curator_env["curator"]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue