From 0da968e521f3870cabfba3c2d9fceb1aec9b1e69 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 30 Apr 2026 02:46:01 -0700 Subject: [PATCH] fix(curator): unify under auxiliary.curator (hermes model, dashboard) (#17868) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Voscko reported curator.auxiliary.provider/model was advertised in the docs but ignored — the review fork read only model.provider/default. The narrow fix would wire the one-off key through, but that leaves curator as a parallel system: not in `hermes model` → auxiliary picker, not in the dashboard Models tab, missing per-task base_url/api_key/timeout/ extra_body. Unify curator with the rest of the aux task system so `hermes model` and the dashboard configure it like every other aux task. Four sources of truth updated: - hermes_cli/config.py — add 'curator' slot to DEFAULT_CONFIG.auxiliary (timeout=600 since reviews run long), drop the one-off curator.auxiliary block from DEFAULT_CONFIG.curator. - hermes_cli/main.py — add ('curator', 'Curator', 'skill-usage review pass') to _AUX_TASKS so the CLI picker offers it. - hermes_cli/web_server.py — add 'curator' to _AUX_TASK_SLOTS so the dashboard REST endpoint accepts it. - web/src/pages/ModelsPage.tsx — add Curator entry so the dashboard Models tab renders the task. agent/curator.py _resolve_review_model() now reads auxiliary.curator first (canonical), falls back to legacy curator.auxiliary (with an info log asking users to migrate), then falls back to the main chat model. Pre-unification users keep working. Docs updated: docs/user-guide/features/curator.md now points at `hermes model` → auxiliary → Curator and the dashboard Models tab. Tests: 6 unit tests on _resolve_review_model (auto default, canonical slot honored, partial override fallback, legacy fallback with deprecation log assertion, new-wins-over-legacy, empty-config safety) plus a cross-registry test that curator is wired into all four sources of truth. test_aux_tasks_keys_all_exist_in_default_config already covers the DEFAULT_CONFIG ↔ _AUX_TASKS invariant. Reported by Voscko on Discord. --- agent/curator.py | 52 ++++++- hermes_cli/config.py | 19 ++- hermes_cli/main.py | 1 + hermes_cli/web_server.py | 1 + tests/agent/test_curator.py | 150 ++++++++++++++++++++ web/src/pages/ModelsPage.tsx | 1 + website/docs/user-guide/features/curator.md | 32 ++++- 7 files changed, 243 insertions(+), 13 deletions(-) diff --git a/agent/curator.py b/agent/curator.py index c93a9c1853..044f9904c1 100644 --- a/agent/curator.py +++ b/agent/curator.py @@ -714,6 +714,49 @@ def run_curator_review( } +def _resolve_review_model(cfg: Dict[str, Any]) -> tuple[str, str]: + """Pick (provider, model) for the curator review fork. + + Curator is a regular auxiliary task slot — ``auxiliary.curator.{provider,model}`` + — so it participates in the canonical aux-model plumbing (``hermes model`` → + auxiliary picker, the dashboard Models tab, ``auxiliary.curator.{timeout, + base_url,api_key,extra_body}``). ``provider: "auto"`` with an empty model + means "use the main chat model" — same default as every other aux task. + + Legacy fallback: users who configured ``curator.auxiliary.{provider,model}`` + under the previous one-off schema still work. Precedence: + 1. ``auxiliary.curator.{provider,model}`` when both are set non-auto + 2. Legacy ``curator.auxiliary.{provider,model}`` when both are set + 3. Main ``model.{provider,default/model}`` pair + """ + _main = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {} + _main_provider = _main.get("provider") or "auto" + _main_model = _main.get("default") or _main.get("model") or "" + + # 1. Canonical aux task slot + _aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {} + _cur_task = _aux.get("curator", {}) if isinstance(_aux.get("curator"), dict) else {} + _task_provider = (_cur_task.get("provider") or "").strip() or None + _task_model = (_cur_task.get("model") or "").strip() or None + if _task_provider and _task_provider != "auto" and _task_model: + return _task_provider, _task_model + + # 2. Legacy curator.auxiliary.{provider,model} (deprecated, pre-unification) + _cur = cfg.get("curator", {}) if isinstance(cfg.get("curator"), dict) else {} + _legacy = _cur.get("auxiliary", {}) if isinstance(_cur.get("auxiliary"), dict) else {} + _legacy_provider = _legacy.get("provider") or None + _legacy_model = _legacy.get("model") or None + if _legacy_provider and _legacy_model: + logger.info( + "curator: using deprecated curator.auxiliary.{provider,model} " + "config — please migrate to auxiliary.curator.{provider,model}" + ) + return _legacy_provider, _legacy_model + + # 3. Fall through to the main chat model + return _main_provider, _main_model + + def _run_llm_review(prompt: str) -> Dict[str, Any]: """Spawn an AIAgent fork to run the curator review prompt. @@ -749,6 +792,11 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]: # "No models provided"). AIAgent() without explicit provider/model # arguments hits an auto-resolution path that fails for OAuth-only # providers and for pool-backed credentials. + # + # `_resolve_review_model()` honors `auxiliary.curator.{provider,model}` + # (canonical aux-task slot, wired through `hermes model` → auxiliary + # picker and the dashboard Models tab), with a legacy fallback to + # `curator.auxiliary.{provider,model}`. See docs/user-guide/features/curator.md. _api_key = None _base_url = None _api_mode = None @@ -758,9 +806,7 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]: from hermes_cli.config import load_config from hermes_cli.runtime_provider import resolve_runtime_provider _cfg = load_config() - _m = _cfg.get("model", {}) if isinstance(_cfg.get("model"), dict) else {} - _provider = _m.get("provider") or "auto" - _model_name = _m.get("default") or _m.get("model") or "" + _provider, _model_name = _resolve_review_model(_cfg) _rp = resolve_runtime_provider( requested=_provider, target_model=_model_name ) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 283ecd8a31..7907af2253 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -709,6 +709,19 @@ DEFAULT_CONFIG = { "timeout": 30, "extra_body": {}, }, + # Curator — skill-usage review fork. Timeout is generous because the + # review pass can take several minutes on reasoning models (umbrella + # building over hundreds of candidate skills). "auto" = use main chat + # model; override via `hermes model` → auxiliary → Curator to route + # to a cheaper aux model (e.g. openrouter google/gemini-3-flash-preview). + "curator": { + "provider": "auto", + "model": "", + "base_url": "", + "api_key": "", + "timeout": 600, + "extra_body": {}, + }, }, "display": { @@ -949,12 +962,6 @@ DEFAULT_CONFIG = { # Archive a skill (move to skills/.archive/) after this many days # without use. Archived skills are recoverable — no auto-deletion. "archive_after_days": 90, - # Optional per-task override for the curator's aux model. Leave null - # to use Hermes' main auxiliary client resolution. - "auxiliary": { - "provider": None, - "model": None, - }, }, # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth. diff --git a/hermes_cli/main.py b/hermes_cli/main.py index f1de563566..28af40b6e3 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1927,6 +1927,7 @@ _AUX_TASKS: list[tuple[str, str, str]] = [ ("mcp", "MCP", "MCP tool reasoning"), ("title_generation", "Title generation", "session titles"), ("skills_hub", "Skills hub", "skills search/install"), + ("curator", "Curator", "skill-usage review pass"), ] diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 1cbd466ec6..8b63e13fb5 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -952,6 +952,7 @@ _AUX_TASK_SLOTS: Tuple[str, ...] = ( "approval", "mcp", "title_generation", + "curator", ) diff --git a/tests/agent/test_curator.py b/tests/agent/test_curator.py index c9118d1ad5..70040ec01d 100644 --- a/tests/agent/test_curator.py +++ b/tests/agent/test_curator.py @@ -485,3 +485,153 @@ def test_cli_pin_refuses_bundled_skill(curator_env, capsys): captured = capsys.readouterr() assert rc == 1 assert "bundled" in captured.out.lower() or "hub" in captured.out.lower() + + +# --------------------------------------------------------------------------- +# curator review-model resolution (canonical auxiliary.curator slot) +# +# Curator was unified with the rest of the aux task system in Apr 2026 so +# `hermes model` → auxiliary picker, the dashboard Models tab, and the full +# per-task config (timeout, base_url, api_key, extra_body) all work for it. +# Voscko report: curator.auxiliary.{provider,model} was advertised but never +# read. Fix wires curator through auxiliary.curator with a legacy fallback. +# --------------------------------------------------------------------------- + + +def test_review_model_defaults_to_main_when_slot_is_auto(curator_env): + """auxiliary.curator absent (or auto/empty) → use main model.provider/model.""" + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + } + assert curator._resolve_review_model(cfg) == ("openrouter", "openai/gpt-5.5") + + # Explicit auto/empty slot — still main model. + cfg["auxiliary"] = {"curator": {"provider": "auto", "model": ""}} + assert curator._resolve_review_model(cfg) == ("openrouter", "openai/gpt-5.5") + + +def test_review_model_honors_auxiliary_curator_slot(curator_env): + """auxiliary.curator.{provider,model} fully set → that pair wins.""" + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "auxiliary": { + "curator": { + "provider": "openrouter", + "model": "openai/gpt-5.4-mini", + }, + }, + } + assert curator._resolve_review_model(cfg) == ( + "openrouter", "openai/gpt-5.4-mini", + ) + + +def test_review_model_auxiliary_curator_partial_override_falls_back(curator_env): + """Only one of slot provider/model set → fall back to the main pair. + + Prevents half-configured overrides from sending an empty side to + resolve_runtime_provider. + """ + curator = curator_env["curator"] + base_main = {"provider": "openrouter", "default": "openai/gpt-5.5"} + + cfg_provider_only = { + "model": dict(base_main), + "auxiliary": {"curator": {"provider": "openrouter", "model": ""}}, + } + assert curator._resolve_review_model(cfg_provider_only) == ( + "openrouter", "openai/gpt-5.5", + ) + + cfg_model_only = { + "model": dict(base_main), + "auxiliary": {"curator": {"provider": "auto", "model": "gpt-5.4-mini"}}, + } + assert curator._resolve_review_model(cfg_model_only) == ( + "openrouter", "openai/gpt-5.5", + ) + + +def test_review_model_legacy_curator_auxiliary_still_works(curator_env, caplog): + """Pre-unification users set curator.auxiliary.{provider,model} — honor it. + + Emits a deprecation log line but keeps their config working. + """ + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "curator": { + "auxiliary": { + "provider": "openrouter", + "model": "openai/gpt-5.4-mini", + }, + }, + } + import logging + with caplog.at_level(logging.INFO, logger="agent.curator"): + result = curator._resolve_review_model(cfg) + assert result == ("openrouter", "openai/gpt-5.4-mini") + assert any( + "deprecated curator.auxiliary" in rec.message for rec in caplog.records + ), "expected deprecation warning when legacy curator.auxiliary is used" + + +def test_review_model_new_slot_wins_over_legacy(curator_env): + """When BOTH new and legacy are set, the canonical slot wins.""" + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "auxiliary": { + "curator": {"provider": "nous", "model": "new-winner"}, + }, + "curator": { + "auxiliary": {"provider": "openrouter", "model": "legacy-loser"}, + }, + } + assert curator._resolve_review_model(cfg) == ("nous", "new-winner") + + +def test_review_model_handles_missing_sections(curator_env): + """Missing auxiliary/curator sections never raise — fall back cleanly.""" + curator = curator_env["curator"] + cfg = {"model": {"provider": "anthropic", "model": "claude-sonnet-4-6"}} + assert curator._resolve_review_model(cfg) == ( + "anthropic", "claude-sonnet-4-6", + ) + + # Completely empty config → ("auto", "") — resolve_runtime_provider + # handles the auto-detection chain from there. + assert curator._resolve_review_model({}) == ("auto", "") + + +def test_curator_slot_is_canonical_aux_task(): + """Curator must be a first-class slot in every aux-task registry. + + Four sources of truth, all checked by the shared registry test + (test_aux_config.py) for the main tasks — this test pins `curator` + specifically so the unification doesn't silently regress. + """ + from hermes_cli.config import DEFAULT_CONFIG + from hermes_cli.main import _AUX_TASKS + from hermes_cli.web_server import _AUX_TASK_SLOTS + + # 1. DEFAULT_CONFIG.auxiliary — schema source + assert "curator" in DEFAULT_CONFIG["auxiliary"], \ + "curator missing from DEFAULT_CONFIG['auxiliary']" + slot = DEFAULT_CONFIG["auxiliary"]["curator"] + assert slot["provider"] == "auto" + assert slot["model"] == "" + assert slot["timeout"] > 0, "curator timeout should be set (reviews run long)" + + # 2. hermes_cli/main.py _AUX_TASKS — CLI picker + aux_keys = {k for k, _name, _desc in _AUX_TASKS} + assert "curator" in aux_keys, "curator missing from _AUX_TASKS (CLI picker)" + + # 3. hermes_cli/web_server.py _AUX_TASK_SLOTS — REST API allowlist + assert "curator" in _AUX_TASK_SLOTS, \ + "curator missing from _AUX_TASK_SLOTS (dashboard REST API)" + + # 4. web/src/pages/ModelsPage.tsx is checked at build time; the tsx + # array and this tuple share a ``Must match _AUX_TASK_SLOTS`` comment. diff --git a/web/src/pages/ModelsPage.tsx b/web/src/pages/ModelsPage.tsx index 97ff1d0695..955ed0da21 100644 --- a/web/src/pages/ModelsPage.tsx +++ b/web/src/pages/ModelsPage.tsx @@ -44,6 +44,7 @@ const AUX_TASKS: readonly { key: string; label: string; hint: string }[] = [ { key: "approval", label: "Approval", hint: "Smart auto-approve" }, { key: "mcp", label: "MCP", hint: "MCP tool routing" }, { key: "title_generation", label: "Title Gen", hint: "Session titles" }, + { key: "curator", label: "Curator", hint: "Skill-usage review" }, ] as const; function formatTokens(n: number): string { diff --git a/website/docs/user-guide/features/curator.md b/website/docs/user-guide/features/curator.md index c1620f2b83..d9ba73dc7d 100644 --- a/website/docs/user-guide/features/curator.md +++ b/website/docs/user-guide/features/curator.md @@ -41,14 +41,38 @@ curator: min_idle_hours: 2 stale_after_days: 30 archive_after_days: 90 - auxiliary: - provider: null # null = use main auxiliary client resolution - model: null ``` To disable entirely, set `curator.enabled: false`. -To use a cheaper aux model for the LLM review pass instead of your main model, set `curator.auxiliary.provider` and `curator.auxiliary.model` to something specific (e.g. `openrouter` + `google/gemini-3-flash-preview`). +### Running the review on a cheaper aux model + +The curator's LLM review pass is a regular auxiliary task slot — `auxiliary.curator` — alongside Vision, Compression, Session Search, etc. "Auto" means "use my main chat model"; override the slot to pin a specific provider + model for the review pass instead. + +**Easiest — `hermes model`:** + +```bash +hermes model # → "Auxiliary models — side-task routing" + # → pick "Curator" → pick provider → pick model +``` + +The same picker is available in the web dashboard under the **Models** tab. + +**Direct config.yaml (equivalent):** + +```yaml +auxiliary: + curator: + provider: openrouter + model: google/gemini-3-flash-preview + timeout: 600 # generous — reviews can take several minutes +``` + +Leaving `provider: auto` (the default) routes the review pass through whatever your main chat model is, matching the behavior of every other auxiliary task. + +:::note Legacy config +Earlier releases used a one-off `curator.auxiliary.{provider,model}` block. That path still works but emits a deprecation log line — please migrate to `auxiliary.curator` above so the curator shares the same plumbing (`hermes model`, dashboard Models tab, `base_url`, `api_key`, `timeout`, `extra_body`) as every other aux task. +::: ## CLI