hermes-agent/tests/run_agent/test_background_review_cost_controls.py
Teknium 87c4a5ebb8
feat(background-review): aux-model selector for the self-improvement review (#49252)
Adds auxiliary.background_review.{provider,model} (default auto = main chat
model — unchanged). Set it to a different, cheaper model and the post-turn
self-improvement review runs there for ~3-5x lower cost.

Cache-aware by design: the main chat is warm in the prompt cache, so the
default full-history replay on the main model is cheap cache reads — left
exactly as-is. A different model can't reuse that cache (different key), so
when (and only when) routed to a different model the fork replays a compact
digest instead of the full transcript, minimising what it cold-writes on the
aux model. Same model -> full replay; different model -> digest.

Quality holds in benchmarks: memory capture identical, skill near-identical.
Nothing changes unless you opt in by naming a different model.

Co-authored-by: Hermes Agent <noreply@nousresearch.com>
2026-06-22 14:54:53 -07:00

138 lines
5.4 KiB
Python

"""Unit coverage for the background-review aux-model selector + routed digest.
Covers the two behaviors this change adds:
• _resolve_review_runtime — auto/same-model → not routed (main model, warm
cache); a configured different model → routed with resolved credentials.
• _digest_history — compact replay used ONLY on the routed path (recent tail
verbatim + a digest of older turns), preserving role alternation.
Pure-function / config-driven; no live model calls.
"""
from unittest.mock import patch
from agent import background_review as br
def _msg(role, content, tool_calls=None):
m = {"role": role, "content": content}
if tool_calls:
m["tool_calls"] = tool_calls
return m
# ---------------------------------------------------------------------------
# _resolve_review_runtime — the aux-model selector
# ---------------------------------------------------------------------------
class _FakeAgent:
def __init__(self, provider="openai-codex", model="gpt-5.5"):
self.provider = provider
self.model = model
def _current_main_runtime(self):
return {
"api_key": "parent-key",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_mode": "codex_app_server",
}
def test_routing_auto_inherits_parent_and_downgrades_codex_app_server():
agent = _FakeAgent()
cfg = {"auxiliary": {"background_review": {"provider": "auto", "model": ""}}}
with patch("hermes_cli.config.load_config", return_value=cfg):
rt = br._resolve_review_runtime(agent)
assert rt["routed"] is False
assert rt["provider"] == "openai-codex"
assert rt["model"] == "gpt-5.5"
assert rt["api_mode"] == "codex_responses" # downgraded so agent-loop tools dispatch
def test_routing_to_different_model_marks_routed_and_resolves_credentials():
agent = _FakeAgent()
cfg = {"auxiliary": {"background_review": {
"provider": "openrouter", "model": "google/gemini-3-flash-preview",
}}}
fake_rp = {
"provider": "openrouter", "api_key": "or-key",
"base_url": "https://openrouter.ai/api/v1", "api_mode": "chat_completions",
}
with patch("hermes_cli.config.load_config", return_value=cfg), \
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value=fake_rp):
rt = br._resolve_review_runtime(agent)
assert rt["routed"] is True
assert rt["provider"] == "openrouter"
assert rt["model"] == "google/gemini-3-flash-preview"
assert rt["api_key"] == "or-key"
def test_routing_same_model_as_parent_is_not_routed():
agent = _FakeAgent(provider="openrouter", model="anthropic/claude-opus-4.8")
cfg = {"auxiliary": {"background_review": {
"provider": "openrouter", "model": "anthropic/claude-opus-4.8",
}}}
with patch("hermes_cli.config.load_config", return_value=cfg):
rt = br._resolve_review_runtime(agent)
assert rt["routed"] is False # same model/provider → keep full-replay path
def test_routing_resolution_failure_falls_back_to_parent():
agent = _FakeAgent()
cfg = {"auxiliary": {"background_review": {
"provider": "openrouter", "model": "google/gemini-3-flash-preview",
}}}
with patch("hermes_cli.config.load_config", return_value=cfg), \
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
side_effect=RuntimeError("boom")):
rt = br._resolve_review_runtime(agent)
assert rt["routed"] is False
assert rt["provider"] == "openai-codex"
# ---------------------------------------------------------------------------
# _digest_history — routed-path compact replay
# ---------------------------------------------------------------------------
def test_digest_under_tail_returns_full():
msgs = [_msg("user", "hi"), _msg("assistant", "hello")]
assert br._digest_history(msgs, tail=24) == msgs
def test_digest_collapses_old_keeps_tail_verbatim():
msgs = []
for i in range(60):
msgs.append(_msg("user", f"u{i} " + "x" * 50))
msgs.append(_msg("assistant", f"a{i} " + "y" * 50))
out = br._digest_history(msgs, tail=10)
# First message is the synthetic digest (user role → alternation preserved).
assert out[0]["role"] == "user"
assert out[0]["content"].startswith("[Earlier conversation digest")
# Recent tail preserved verbatim.
assert out[-1] == msgs[-1]
assert len(out) == 11 # 1 digest + 10 tail
def test_digest_does_not_open_tail_on_a_tool_message():
msgs = []
for i in range(40):
msgs.append(_msg("user", "u" + "x" * 50))
msgs.append(_msg("assistant", "", tool_calls=[
{"function": {"name": "terminal", "arguments": "{}"}}]))
msgs.append({"role": "tool", "content": "result " + "w" * 50})
out = br._digest_history(msgs, tail=2)
# The verbatim tail (after the digest) must not begin on a bare tool message.
assert out[1]["role"] != "tool"
def test_digest_records_tool_names_in_arc():
old = [
_msg("user", "do the thing"),
_msg("assistant", "", tool_calls=[
{"function": {"name": "skill_view", "arguments": "{}"}},
{"function": {"name": "patch", "arguments": "{}"}}]),
]
msgs = old + [_msg("user", f"tail{i}") for i in range(30)]
out = br._digest_history(msgs, tail=10)
digest = out[0]["content"]
assert "USER: do the thing" in digest
assert "tools: skill_view, patch" in digest