mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-04 02:21:47 +00:00
feat(curator): per-run reports — run.json + REPORT.md under logs/curator/ (#17307)
Every curator pass now emits a dated report directory under
`~/.hermes/logs/curator/{YYYYMMDD-HHMMSS}/` with two files:
- `run.json` — machine-readable full record (before/after snapshot,
state transitions, all tool calls, model/provider, timing, full LLM
final response untruncated, error if any)
- `REPORT.md` — human-readable markdown: model + duration header,
auto-transition counts, LLM consolidation stats, archived-this-run
list, new-skills-this-run list, state transitions, the full LLM
final summary, and a recovery footer pointing at the archive + the
`hermes curator restore` command
Reports live under `logs/curator/`, not inside `skills/` — they're
operational telemetry, not user-authored skill data, and belong
alongside `agent.log` / `gateway.log`.
Internals:
- `_run_llm_review()` now returns a dict (final, summary, model,
provider, tool_calls, error) instead of a bare truncated string so
the reporter has full fidelity
- Report writer is fully best-effort — any failure logs at DEBUG and
never breaks the curator itself. Same-second rerun gets a numeric
suffix so reports can't clobber each other
- Report path stamped into `.curator_state` as `last_report_path`
- `hermes curator status` surfaces a "last report:" line so users
can immediately open the latest run
Tests (all green):
- 7 new tests in tests/agent/test_curator_reports.py covering: report
location (logs not skills), both files written, run.json shape and
diff accuracy, markdown structure, error path still writes, state
transitions captured, same-second runs get unique dirs
- Existing test_run_review_synchronous_invokes_llm_stub updated to
stub the new dict-returning _run_llm_review signature
Live E2E: ran a synchronous pass against a 1-skill test collection
with a stubbed LLM; report written correctly, state stamped with
last_report_path, markdown human-readable, run.json machine-parseable.
This commit is contained in:
parent
2d137074a3
commit
bc0d8a941e
4 changed files with 592 additions and 16 deletions
332
agent/curator.py
332
agent/curator.py
|
|
@ -28,7 +28,7 @@ import tempfile
|
|||
import threading
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
from typing import Any, Callable, Dict, List, Optional, Set
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from tools import skill_usage
|
||||
|
|
@ -354,6 +354,218 @@ CURATOR_REVIEW_PROMPT = (
|
|||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-run reports — {YYYYMMDD-HHMMSS}/run.json + REPORT.md under logs/curator/
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _reports_root() -> Path:
|
||||
"""Directory where curator run reports are written.
|
||||
|
||||
Lives under the profile-aware logs dir (``~/.hermes/logs/curator/``)
|
||||
alongside ``agent.log`` and ``gateway.log`` so it's found by anyone
|
||||
looking for operational telemetry, not mixed in with the user's
|
||||
authored skill data in ``~/.hermes/skills/``.
|
||||
"""
|
||||
return get_hermes_home() / "logs" / "curator"
|
||||
|
||||
|
||||
def _write_run_report(
|
||||
*,
|
||||
started_at: datetime,
|
||||
elapsed_seconds: float,
|
||||
auto_counts: Dict[str, int],
|
||||
auto_summary: str,
|
||||
before_report: List[Dict[str, Any]],
|
||||
before_names: Set[str],
|
||||
after_report: List[Dict[str, Any]],
|
||||
llm_meta: Dict[str, Any],
|
||||
) -> Optional[Path]:
|
||||
"""Write run.json + REPORT.md under logs/curator/{YYYYMMDD-HHMMSS}/.
|
||||
|
||||
Returns the report directory path on success, None if the write
|
||||
couldn't happen (caller logs and continues — reporting is best-effort).
|
||||
"""
|
||||
root = _reports_root()
|
||||
try:
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
except Exception as e:
|
||||
logger.debug("Curator report dir create failed: %s", e)
|
||||
return None
|
||||
|
||||
stamp = started_at.strftime("%Y%m%d-%H%M%S")
|
||||
run_dir = root / stamp
|
||||
# If we crash-reran within the same second, append a disambiguator
|
||||
suffix = 1
|
||||
while run_dir.exists():
|
||||
suffix += 1
|
||||
run_dir = root / f"{stamp}-{suffix}"
|
||||
try:
|
||||
run_dir.mkdir(parents=True, exist_ok=False)
|
||||
except Exception as e:
|
||||
logger.debug("Curator run dir create failed: %s", e)
|
||||
return None
|
||||
|
||||
# Diff before/after
|
||||
after_by_name = {r.get("name"): r for r in after_report if isinstance(r, dict)}
|
||||
after_names = set(after_by_name.keys())
|
||||
removed = sorted(before_names - after_names) # archived during this run
|
||||
added = sorted(after_names - before_names) # new skills this run
|
||||
before_by_name = {r.get("name"): r for r in before_report if isinstance(r, dict)}
|
||||
|
||||
# State transitions between the two snapshots (e.g. active -> stale)
|
||||
transitions: List[Dict[str, str]] = []
|
||||
for name in sorted(after_names & before_names):
|
||||
s_before = (before_by_name.get(name) or {}).get("state")
|
||||
s_after = (after_by_name.get(name) or {}).get("state")
|
||||
if s_before and s_after and s_before != s_after:
|
||||
transitions.append({"name": name, "from": s_before, "to": s_after})
|
||||
|
||||
# Classify LLM tool calls
|
||||
tc_counts: Dict[str, int] = {}
|
||||
for tc in llm_meta.get("tool_calls", []) or []:
|
||||
name = tc.get("name", "unknown")
|
||||
tc_counts[name] = tc_counts.get(name, 0) + 1
|
||||
|
||||
payload = {
|
||||
"started_at": started_at.isoformat(),
|
||||
"duration_seconds": round(elapsed_seconds, 2),
|
||||
"model": llm_meta.get("model", ""),
|
||||
"provider": llm_meta.get("provider", ""),
|
||||
"auto_transitions": auto_counts,
|
||||
"counts": {
|
||||
"before": len(before_names),
|
||||
"after": len(after_names),
|
||||
"delta": len(after_names) - len(before_names),
|
||||
"archived_this_run": len(removed),
|
||||
"added_this_run": len(added),
|
||||
"state_transitions": len(transitions),
|
||||
"tool_calls_total": sum(tc_counts.values()),
|
||||
},
|
||||
"tool_call_counts": tc_counts,
|
||||
"archived": removed,
|
||||
"added": added,
|
||||
"state_transitions": transitions,
|
||||
"llm_final": llm_meta.get("final", ""),
|
||||
"llm_summary": llm_meta.get("summary", ""),
|
||||
"llm_error": llm_meta.get("error"),
|
||||
"tool_calls": llm_meta.get("tool_calls", []),
|
||||
}
|
||||
|
||||
# run.json — machine-readable, full fidelity
|
||||
try:
|
||||
(run_dir / "run.json").write_text(
|
||||
json.dumps(payload, indent=2, ensure_ascii=False) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Curator run.json write failed: %s", e)
|
||||
|
||||
# REPORT.md — human-readable
|
||||
try:
|
||||
md = _render_report_markdown(payload)
|
||||
(run_dir / "REPORT.md").write_text(md, encoding="utf-8")
|
||||
except Exception as e:
|
||||
logger.debug("Curator REPORT.md write failed: %s", e)
|
||||
|
||||
return run_dir
|
||||
|
||||
|
||||
def _render_report_markdown(p: Dict[str, Any]) -> str:
|
||||
"""Render the human-readable report."""
|
||||
lines: List[str] = []
|
||||
started = p.get("started_at", "")
|
||||
duration = p.get("duration_seconds", 0) or 0
|
||||
mins, secs = divmod(int(duration), 60)
|
||||
dur_label = f"{mins}m {secs}s" if mins else f"{secs}s"
|
||||
|
||||
lines.append(f"# Curator run — {started}\n")
|
||||
model = p.get("model") or "(not resolved)"
|
||||
prov = p.get("provider") or "(not resolved)"
|
||||
counts = p.get("counts") or {}
|
||||
lines.append(
|
||||
f"Model: `{model}` via `{prov}` · Duration: {dur_label} · "
|
||||
f"Agent-created skills: {counts.get('before', 0)} → {counts.get('after', 0)} "
|
||||
f"({counts.get('delta', 0):+d})\n"
|
||||
)
|
||||
|
||||
error = p.get("llm_error")
|
||||
if error:
|
||||
lines.append(f"> ⚠ LLM pass error: `{error}`\n")
|
||||
|
||||
# Auto-transitions (pure, no LLM)
|
||||
auto = p.get("auto_transitions") or {}
|
||||
lines.append("## Auto-transitions (pure, no LLM)\n")
|
||||
lines.append(f"- checked: {auto.get('checked', 0)}")
|
||||
lines.append(f"- marked stale: {auto.get('marked_stale', 0)}")
|
||||
lines.append(f"- archived: {auto.get('archived', 0)}")
|
||||
lines.append(f"- reactivated: {auto.get('reactivated', 0)}")
|
||||
lines.append("")
|
||||
|
||||
# LLM pass numbers
|
||||
tc_counts = p.get("tool_call_counts") or {}
|
||||
lines.append("## LLM consolidation pass\n")
|
||||
lines.append(f"- tool calls: **{counts.get('tool_calls_total', 0)}** "
|
||||
f"(by name: {', '.join(f'{k}={v}' for k, v in sorted(tc_counts.items())) or 'none'})")
|
||||
lines.append(f"- archived this run: **{counts.get('archived_this_run', 0)}**")
|
||||
lines.append(f"- new skills this run: **{counts.get('added_this_run', 0)}**")
|
||||
lines.append(f"- state transitions (active ↔ stale ↔ archived): "
|
||||
f"**{counts.get('state_transitions', 0)}**")
|
||||
lines.append("")
|
||||
|
||||
# Archived list
|
||||
archived = p.get("archived") or []
|
||||
if archived:
|
||||
lines.append(f"### Skills archived ({len(archived)})\n")
|
||||
lines.append("_Archived skills are at `~/.hermes/skills/.archive/`. "
|
||||
"Restore any via `hermes curator restore <name>`._\n")
|
||||
# Show first 50 inline, note truncation after that
|
||||
SHOW = 50
|
||||
for n in archived[:SHOW]:
|
||||
lines.append(f"- `{n}`")
|
||||
if len(archived) > SHOW:
|
||||
lines.append(f"- … and {len(archived) - SHOW} more (see `run.json` for the full list)")
|
||||
lines.append("")
|
||||
|
||||
# Added list
|
||||
added = p.get("added") or []
|
||||
if added:
|
||||
lines.append(f"### New skills this run ({len(added)})\n")
|
||||
lines.append("_Usually these are new class-level umbrellas created via `skill_manage action=create`._\n")
|
||||
for n in added:
|
||||
lines.append(f"- `{n}`")
|
||||
lines.append("")
|
||||
|
||||
# State transitions
|
||||
trans = p.get("state_transitions") or []
|
||||
if trans:
|
||||
lines.append(f"### State transitions ({len(trans)})\n")
|
||||
for t in trans:
|
||||
lines.append(f"- `{t.get('name')}`: {t.get('from')} → {t.get('to')}")
|
||||
lines.append("")
|
||||
|
||||
# Full LLM final response
|
||||
final = (p.get("llm_final") or "").strip()
|
||||
if final:
|
||||
lines.append("## LLM final summary\n")
|
||||
lines.append(final)
|
||||
lines.append("")
|
||||
elif not error:
|
||||
llm_sum = p.get("llm_summary") or ""
|
||||
if llm_sum:
|
||||
lines.append("## LLM summary\n")
|
||||
lines.append(llm_sum)
|
||||
lines.append("")
|
||||
|
||||
# Recovery footer
|
||||
lines.append("## Recovery\n")
|
||||
lines.append("- Restore an archived skill: `hermes curator restore <name>`")
|
||||
lines.append("- All archives live under `~/.hermes/skills/.archive/` and are recoverable by `mv`")
|
||||
lines.append("- See `run.json` in this directory for the full machine-readable record.")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Orchestrator — spawn a forked AIAgent for the LLM review pass
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -415,22 +627,72 @@ def run_curator_review(
|
|||
|
||||
def _llm_pass():
|
||||
nonlocal auto_summary
|
||||
# Snapshot skill state BEFORE the LLM pass so the report can diff.
|
||||
try:
|
||||
before_report = skill_usage.agent_created_report()
|
||||
except Exception:
|
||||
before_report = []
|
||||
before_names = {r.get("name") for r in before_report if isinstance(r, dict)}
|
||||
|
||||
llm_meta: Dict[str, Any] = {}
|
||||
try:
|
||||
candidate_list = _render_candidate_list()
|
||||
if "No agent-created skills" in candidate_list:
|
||||
final_summary = f"auto: {auto_summary}; llm: skipped (no candidates)"
|
||||
llm_meta = {
|
||||
"final": "",
|
||||
"summary": "skipped (no candidates)",
|
||||
"model": "",
|
||||
"provider": "",
|
||||
"tool_calls": [],
|
||||
"error": None,
|
||||
}
|
||||
else:
|
||||
prompt = f"{CURATOR_REVIEW_PROMPT}\n\n{candidate_list}"
|
||||
llm_summary = _run_llm_review(prompt)
|
||||
final_summary = f"auto: {auto_summary}; llm: {llm_summary}"
|
||||
llm_meta = _run_llm_review(prompt)
|
||||
final_summary = (
|
||||
f"auto: {auto_summary}; llm: {llm_meta.get('summary', 'no change')}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Curator LLM pass failed: %s", e, exc_info=True)
|
||||
final_summary = f"auto: {auto_summary}; llm: error ({e})"
|
||||
llm_meta = {
|
||||
"final": "",
|
||||
"summary": f"error ({e})",
|
||||
"model": "",
|
||||
"provider": "",
|
||||
"tool_calls": [],
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
elapsed = (datetime.now(timezone.utc) - start).total_seconds()
|
||||
state2 = load_state()
|
||||
state2["last_run_duration_seconds"] = elapsed
|
||||
state2["last_run_summary"] = final_summary
|
||||
|
||||
# Write the per-run report. Runs in a best-effort try so a
|
||||
# reporting bug never breaks the curator itself. Report path is
|
||||
# recorded in state so `hermes curator status` can point at it.
|
||||
try:
|
||||
after_report = skill_usage.agent_created_report()
|
||||
except Exception:
|
||||
after_report = []
|
||||
try:
|
||||
report_path = _write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=elapsed,
|
||||
auto_counts=counts,
|
||||
auto_summary=auto_summary,
|
||||
before_report=before_report,
|
||||
before_names=before_names,
|
||||
after_report=after_report,
|
||||
llm_meta=llm_meta,
|
||||
)
|
||||
if report_path is not None:
|
||||
state2["last_report_path"] = str(report_path)
|
||||
except Exception as e:
|
||||
logger.debug("Curator report write failed: %s", e, exc_info=True)
|
||||
|
||||
save_state(state2)
|
||||
|
||||
if on_summary:
|
||||
|
|
@ -452,14 +714,34 @@ def run_curator_review(
|
|||
}
|
||||
|
||||
|
||||
def _run_llm_review(prompt: str) -> str:
|
||||
"""Spawn an AIAgent fork to run the curator review prompt. Returns a short
|
||||
summary of what the model said in its final response."""
|
||||
def _run_llm_review(prompt: str) -> Dict[str, Any]:
|
||||
"""Spawn an AIAgent fork to run the curator review prompt.
|
||||
|
||||
Returns a dict with:
|
||||
- final: full (untruncated) final response from the reviewer
|
||||
- summary: short summary suitable for state file (240-char cap)
|
||||
- model, provider: what the fork actually ran on
|
||||
- tool_calls: list of {name, arguments} for every tool call made during
|
||||
the pass (arguments may be truncated for readability)
|
||||
- error: set if the pass failed mid-run; final/summary may still be empty
|
||||
|
||||
Never raises; callers get a structured failure instead.
|
||||
"""
|
||||
import contextlib
|
||||
result_meta: Dict[str, Any] = {
|
||||
"final": "",
|
||||
"summary": "",
|
||||
"model": "",
|
||||
"provider": "",
|
||||
"tool_calls": [],
|
||||
"error": None,
|
||||
}
|
||||
try:
|
||||
from run_agent import AIAgent
|
||||
except Exception as e:
|
||||
return f"AIAgent import failed: {e}"
|
||||
result_meta["error"] = f"AIAgent import failed: {e}"
|
||||
result_meta["summary"] = result_meta["error"]
|
||||
return result_meta
|
||||
|
||||
# Resolve provider + model the same way the CLI does, so the curator
|
||||
# fork inherits the user's active main config rather than falling
|
||||
|
|
@ -489,6 +771,9 @@ def _run_llm_review(prompt: str) -> str:
|
|||
except Exception as e:
|
||||
logger.debug("Curator provider resolution failed: %s", e, exc_info=True)
|
||||
|
||||
result_meta["model"] = _model_name
|
||||
result_meta["provider"] = _resolved_provider or ""
|
||||
|
||||
review_agent = None
|
||||
try:
|
||||
review_agent = AIAgent(
|
||||
|
|
@ -520,20 +805,43 @@ def _run_llm_review(prompt: str) -> str:
|
|||
with open(os.devnull, "w") as _devnull, \
|
||||
contextlib.redirect_stdout(_devnull), \
|
||||
contextlib.redirect_stderr(_devnull):
|
||||
result = review_agent.run_conversation(user_message=prompt)
|
||||
conv_result = review_agent.run_conversation(user_message=prompt)
|
||||
|
||||
final = ""
|
||||
if isinstance(result, dict):
|
||||
final = str(result.get("final_response") or "").strip()
|
||||
return (final[:240] + "…") if len(final) > 240 else (final or "no change")
|
||||
if isinstance(conv_result, dict):
|
||||
final = str(conv_result.get("final_response") or "").strip()
|
||||
result_meta["final"] = final
|
||||
result_meta["summary"] = (final[:240] + "…") if len(final) > 240 else (final or "no change")
|
||||
|
||||
# Collect tool calls for the report. Walk the forked agent's
|
||||
# session messages and extract every tool_call made during the
|
||||
# pass. Truncate argument payloads so a giant skill_manage create
|
||||
# doesn't blow up the report.
|
||||
_calls: List[Dict[str, Any]] = []
|
||||
for msg in getattr(review_agent, "_session_messages", []) or []:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
tcs = msg.get("tool_calls") or []
|
||||
for tc in tcs:
|
||||
if not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function") or {}
|
||||
name = fn.get("name") or ""
|
||||
args_raw = fn.get("arguments") or ""
|
||||
if isinstance(args_raw, str) and len(args_raw) > 400:
|
||||
args_raw = args_raw[:400] + "…"
|
||||
_calls.append({"name": name, "arguments": args_raw})
|
||||
result_meta["tool_calls"] = _calls
|
||||
except Exception as e:
|
||||
return f"error: {e}"
|
||||
result_meta["error"] = f"error: {e}"
|
||||
result_meta["summary"] = result_meta["error"]
|
||||
finally:
|
||||
if review_agent is not None:
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
return result_meta
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -55,6 +55,9 @@ def _cmd_status(args) -> int:
|
|||
print(f" runs: {runs}")
|
||||
print(f" last run: {_fmt_ts(last_run)}")
|
||||
print(f" last summary: {summary}")
|
||||
_report = state.get("last_report_path")
|
||||
if _report:
|
||||
print(f" last report: {_report}")
|
||||
_ih = curator.get_interval_hours()
|
||||
_interval_label = (
|
||||
f"{_ih // 24}d" if _ih % 24 == 0 and _ih >= 24
|
||||
|
|
|
|||
|
|
@ -271,10 +271,17 @@ def test_run_review_synchronous_invokes_llm_stub(curator_env, monkeypatch):
|
|||
_write_skill(skills_dir, "a")
|
||||
|
||||
calls = []
|
||||
monkeypatch.setattr(
|
||||
c, "_run_llm_review",
|
||||
lambda prompt: (calls.append(prompt), "stubbed-summary")[1],
|
||||
)
|
||||
def _stub(prompt):
|
||||
calls.append(prompt)
|
||||
return {
|
||||
"final": "stubbed-summary",
|
||||
"summary": "stubbed-summary",
|
||||
"model": "stub-model",
|
||||
"provider": "stub-provider",
|
||||
"tool_calls": [],
|
||||
"error": None,
|
||||
}
|
||||
monkeypatch.setattr(c, "_run_llm_review", _stub)
|
||||
|
||||
captured = []
|
||||
c.run_curator_review(on_summary=lambda s: captured.append(s), synchronous=True)
|
||||
|
|
|
|||
258
tests/agent/test_curator_reports.py
Normal file
258
tests/agent/test_curator_reports.py
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
"""Tests for the curator per-run report writer (run.json + REPORT.md).
|
||||
|
||||
Reports live under ``~/.hermes/logs/curator/{YYYYMMDD-HHMMSS}/`` alongside
|
||||
the standard log dir, not inside the user's ``skills/`` data directory.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def curator_env(tmp_path, monkeypatch):
|
||||
"""Isolated HERMES_HOME with a skills/ dir + reset curator module state."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
(home / "skills").mkdir()
|
||||
(home / "logs").mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
|
||||
import importlib
|
||||
import hermes_constants
|
||||
importlib.reload(hermes_constants)
|
||||
from agent import curator
|
||||
importlib.reload(curator)
|
||||
from tools import skill_usage
|
||||
importlib.reload(skill_usage)
|
||||
yield {"home": home, "curator": curator, "skill_usage": skill_usage}
|
||||
|
||||
|
||||
def _make_llm_meta(**overrides):
|
||||
base = {
|
||||
"final": "short summary of the pass",
|
||||
"summary": "short summary",
|
||||
"model": "test-model",
|
||||
"provider": "test-provider",
|
||||
"tool_calls": [],
|
||||
"error": None,
|
||||
}
|
||||
base.update(overrides)
|
||||
return base
|
||||
|
||||
|
||||
def test_reports_root_is_under_logs_not_skills(curator_env):
|
||||
"""Reports live in logs/curator/, not skills/ — operational telemetry
|
||||
belongs with the logs, not with user-authored skill data."""
|
||||
curator = curator_env["curator"]
|
||||
root = curator._reports_root()
|
||||
home = curator_env["home"]
|
||||
# Must be under logs/
|
||||
assert root == home / "logs" / "curator"
|
||||
# Must NOT be under skills/
|
||||
assert "skills" not in root.parts
|
||||
|
||||
|
||||
def test_write_run_report_creates_both_files(curator_env):
|
||||
"""Each run writes both a run.json (machine) and a REPORT.md (human)."""
|
||||
curator = curator_env["curator"]
|
||||
start = datetime.now(timezone.utc)
|
||||
|
||||
run_dir = curator._write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=12.345,
|
||||
auto_counts={"checked": 5, "marked_stale": 1, "archived": 0, "reactivated": 0},
|
||||
auto_summary="1 marked stale",
|
||||
before_report=[],
|
||||
before_names=set(),
|
||||
after_report=[],
|
||||
llm_meta=_make_llm_meta(),
|
||||
)
|
||||
assert run_dir is not None
|
||||
assert run_dir.is_dir()
|
||||
assert (run_dir / "run.json").exists()
|
||||
assert (run_dir / "REPORT.md").exists()
|
||||
|
||||
# The directory name is a timestamp under logs/curator/
|
||||
assert run_dir.parent == curator._reports_root()
|
||||
|
||||
|
||||
def test_run_json_has_expected_shape(curator_env):
|
||||
"""run.json must carry the machine-readable fields downstream tooling needs."""
|
||||
curator = curator_env["curator"]
|
||||
start = datetime.now(timezone.utc)
|
||||
|
||||
before_report = [
|
||||
{"name": "old-thing", "state": "active", "pinned": False},
|
||||
{"name": "keeper", "state": "active", "pinned": True},
|
||||
]
|
||||
after_report = [
|
||||
{"name": "keeper", "state": "active", "pinned": True},
|
||||
{"name": "new-umbrella", "state": "active", "pinned": False},
|
||||
]
|
||||
|
||||
run_dir = curator._write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=42.0,
|
||||
auto_counts={"checked": 2, "marked_stale": 0, "archived": 0, "reactivated": 0},
|
||||
auto_summary="no changes",
|
||||
before_report=before_report,
|
||||
before_names={r["name"] for r in before_report},
|
||||
after_report=after_report,
|
||||
llm_meta=_make_llm_meta(
|
||||
final="I consolidated the whole universe.",
|
||||
tool_calls=[
|
||||
{"name": "skills_list", "arguments": "{}"},
|
||||
{"name": "skill_manage", "arguments": '{"action":"create"}'},
|
||||
{"name": "terminal", "arguments": "mv ..."},
|
||||
],
|
||||
),
|
||||
)
|
||||
payload = json.loads((run_dir / "run.json").read_text())
|
||||
|
||||
# top-level shape
|
||||
for k in (
|
||||
"started_at", "duration_seconds", "model", "provider",
|
||||
"auto_transitions", "counts", "tool_call_counts",
|
||||
"archived", "added", "state_transitions",
|
||||
"llm_final", "llm_summary", "llm_error", "tool_calls",
|
||||
):
|
||||
assert k in payload, f"missing key: {k}"
|
||||
|
||||
# Diff logic
|
||||
assert payload["archived"] == ["old-thing"]
|
||||
assert payload["added"] == ["new-umbrella"]
|
||||
# Counts reflect the diff
|
||||
assert payload["counts"]["before"] == 2
|
||||
assert payload["counts"]["after"] == 2
|
||||
assert payload["counts"]["archived_this_run"] == 1
|
||||
assert payload["counts"]["added_this_run"] == 1
|
||||
# Tool call counts are aggregated
|
||||
assert payload["tool_call_counts"]["skills_list"] == 1
|
||||
assert payload["tool_call_counts"]["skill_manage"] == 1
|
||||
assert payload["tool_call_counts"]["terminal"] == 1
|
||||
assert payload["counts"]["tool_calls_total"] == 3
|
||||
|
||||
|
||||
def test_report_md_is_human_readable(curator_env):
|
||||
"""REPORT.md should be a valid markdown doc with the key sections visible."""
|
||||
curator = curator_env["curator"]
|
||||
start = datetime.now(timezone.utc)
|
||||
|
||||
run_dir = curator._write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=75.0,
|
||||
auto_counts={"checked": 10, "marked_stale": 2, "archived": 1, "reactivated": 0},
|
||||
auto_summary="2 marked stale, 1 archived",
|
||||
before_report=[{"name": "foo", "state": "active", "pinned": False}],
|
||||
before_names={"foo"},
|
||||
after_report=[{"name": "foo-umbrella", "state": "active", "pinned": False}],
|
||||
llm_meta=_make_llm_meta(
|
||||
final="Consolidated foo-like skills into foo-umbrella.",
|
||||
model="claude-opus-4.7",
|
||||
provider="openrouter",
|
||||
),
|
||||
)
|
||||
md = (run_dir / "REPORT.md").read_text()
|
||||
|
||||
# Structural checks
|
||||
assert "# Curator run" in md
|
||||
assert "Auto-transitions" in md
|
||||
assert "LLM consolidation pass" in md
|
||||
assert "Recovery" in md
|
||||
|
||||
# The model / provider we passed in show up
|
||||
assert "claude-opus-4.7" in md
|
||||
assert "openrouter" in md
|
||||
|
||||
# The added/archived lists are present
|
||||
assert "Skills archived" in md
|
||||
assert "`foo`" in md
|
||||
assert "New skills this run" in md
|
||||
assert "`foo-umbrella`" in md
|
||||
|
||||
# The full LLM final response is included verbatim (no 240-char truncation)
|
||||
assert "Consolidated foo-like skills into foo-umbrella." in md
|
||||
|
||||
|
||||
def test_same_second_reruns_get_unique_dirs(curator_env):
|
||||
"""If the curator somehow runs twice in the same second, the second
|
||||
report still gets its own directory rather than overwriting the first."""
|
||||
curator = curator_env["curator"]
|
||||
start = datetime(2026, 4, 29, 5, 33, 34, tzinfo=timezone.utc)
|
||||
|
||||
kwargs = dict(
|
||||
started_at=start,
|
||||
elapsed_seconds=1.0,
|
||||
auto_counts={"checked": 0, "marked_stale": 0, "archived": 0, "reactivated": 0},
|
||||
auto_summary="no changes",
|
||||
before_report=[],
|
||||
before_names=set(),
|
||||
after_report=[],
|
||||
llm_meta=_make_llm_meta(),
|
||||
)
|
||||
a = curator._write_run_report(**kwargs)
|
||||
b = curator._write_run_report(**kwargs)
|
||||
assert a != b
|
||||
assert a is not None and b is not None
|
||||
# Second dir has a numeric disambiguator suffix
|
||||
assert b.name.startswith(a.name)
|
||||
|
||||
|
||||
def test_report_captures_llm_error_and_continues(curator_env):
|
||||
"""If the LLM pass recorded an error, the report still writes and
|
||||
surfaces the error prominently."""
|
||||
curator = curator_env["curator"]
|
||||
run_dir = curator._write_run_report(
|
||||
started_at=datetime.now(timezone.utc),
|
||||
elapsed_seconds=2.0,
|
||||
auto_counts={"checked": 0, "marked_stale": 0, "archived": 0, "reactivated": 0},
|
||||
auto_summary="no changes",
|
||||
before_report=[],
|
||||
before_names=set(),
|
||||
after_report=[],
|
||||
llm_meta=_make_llm_meta(
|
||||
error="HTTP 400: No models provided",
|
||||
final="",
|
||||
summary="error",
|
||||
),
|
||||
)
|
||||
md = (run_dir / "REPORT.md").read_text()
|
||||
assert "HTTP 400" in md
|
||||
payload = json.loads((run_dir / "run.json").read_text())
|
||||
assert payload["llm_error"] == "HTTP 400: No models provided"
|
||||
|
||||
|
||||
def test_state_transitions_captured_in_report(curator_env):
|
||||
"""When a skill moves active → stale or stale → archived between
|
||||
before/after snapshots, the report records it."""
|
||||
curator = curator_env["curator"]
|
||||
start = datetime.now(timezone.utc)
|
||||
|
||||
before = [{"name": "getting-old", "state": "active", "pinned": False}]
|
||||
after = [{"name": "getting-old", "state": "stale", "pinned": False}]
|
||||
|
||||
run_dir = curator._write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=1.0,
|
||||
auto_counts={"checked": 1, "marked_stale": 1, "archived": 0, "reactivated": 0},
|
||||
auto_summary="1 marked stale",
|
||||
before_report=before,
|
||||
before_names={r["name"] for r in before},
|
||||
after_report=after,
|
||||
llm_meta=_make_llm_meta(),
|
||||
)
|
||||
payload = json.loads((run_dir / "run.json").read_text())
|
||||
assert payload["state_transitions"] == [
|
||||
{"name": "getting-old", "from": "active", "to": "stale"}
|
||||
]
|
||||
md = (run_dir / "REPORT.md").read_text()
|
||||
assert "State transitions" in md
|
||||
assert "getting-old" in md
|
||||
assert "active → stale" in md
|
||||
Loading…
Add table
Add a link
Reference in a new issue