mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
Adds auxiliary.background_review.{provider,model} (default auto = main chat
model — unchanged). Set it to a different, cheaper model and the post-turn
self-improvement review runs there for ~3-5x lower cost.
Cache-aware by design: the main chat is warm in the prompt cache, so the
default full-history replay on the main model is cheap cache reads — left
exactly as-is. A different model can't reuse that cache (different key), so
when (and only when) routed to a different model the fork replays a compact
digest instead of the full transcript, minimising what it cold-writes on the
aux model. Same model -> full replay; different model -> digest.
Quality holds in benchmarks: memory capture identical, skill near-identical.
Nothing changes unless you opt in by naming a different model.
Co-authored-by: Hermes Agent <noreply@nousresearch.com>
138 lines
5.4 KiB
Python
138 lines
5.4 KiB
Python
"""Unit coverage for the background-review aux-model selector + routed digest.
|
|
|
|
Covers the two behaviors this change adds:
|
|
• _resolve_review_runtime — auto/same-model → not routed (main model, warm
|
|
cache); a configured different model → routed with resolved credentials.
|
|
• _digest_history — compact replay used ONLY on the routed path (recent tail
|
|
verbatim + a digest of older turns), preserving role alternation.
|
|
|
|
Pure-function / config-driven; no live model calls.
|
|
"""
|
|
from unittest.mock import patch
|
|
|
|
from agent import background_review as br
|
|
|
|
|
|
def _msg(role, content, tool_calls=None):
|
|
m = {"role": role, "content": content}
|
|
if tool_calls:
|
|
m["tool_calls"] = tool_calls
|
|
return m
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _resolve_review_runtime — the aux-model selector
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class _FakeAgent:
|
|
def __init__(self, provider="openai-codex", model="gpt-5.5"):
|
|
self.provider = provider
|
|
self.model = model
|
|
|
|
def _current_main_runtime(self):
|
|
return {
|
|
"api_key": "parent-key",
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
"api_mode": "codex_app_server",
|
|
}
|
|
|
|
|
|
def test_routing_auto_inherits_parent_and_downgrades_codex_app_server():
|
|
agent = _FakeAgent()
|
|
cfg = {"auxiliary": {"background_review": {"provider": "auto", "model": ""}}}
|
|
with patch("hermes_cli.config.load_config", return_value=cfg):
|
|
rt = br._resolve_review_runtime(agent)
|
|
assert rt["routed"] is False
|
|
assert rt["provider"] == "openai-codex"
|
|
assert rt["model"] == "gpt-5.5"
|
|
assert rt["api_mode"] == "codex_responses" # downgraded so agent-loop tools dispatch
|
|
|
|
|
|
def test_routing_to_different_model_marks_routed_and_resolves_credentials():
|
|
agent = _FakeAgent()
|
|
cfg = {"auxiliary": {"background_review": {
|
|
"provider": "openrouter", "model": "google/gemini-3-flash-preview",
|
|
}}}
|
|
fake_rp = {
|
|
"provider": "openrouter", "api_key": "or-key",
|
|
"base_url": "https://openrouter.ai/api/v1", "api_mode": "chat_completions",
|
|
}
|
|
with patch("hermes_cli.config.load_config", return_value=cfg), \
|
|
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value=fake_rp):
|
|
rt = br._resolve_review_runtime(agent)
|
|
assert rt["routed"] is True
|
|
assert rt["provider"] == "openrouter"
|
|
assert rt["model"] == "google/gemini-3-flash-preview"
|
|
assert rt["api_key"] == "or-key"
|
|
|
|
|
|
def test_routing_same_model_as_parent_is_not_routed():
|
|
agent = _FakeAgent(provider="openrouter", model="anthropic/claude-opus-4.8")
|
|
cfg = {"auxiliary": {"background_review": {
|
|
"provider": "openrouter", "model": "anthropic/claude-opus-4.8",
|
|
}}}
|
|
with patch("hermes_cli.config.load_config", return_value=cfg):
|
|
rt = br._resolve_review_runtime(agent)
|
|
assert rt["routed"] is False # same model/provider → keep full-replay path
|
|
|
|
|
|
def test_routing_resolution_failure_falls_back_to_parent():
|
|
agent = _FakeAgent()
|
|
cfg = {"auxiliary": {"background_review": {
|
|
"provider": "openrouter", "model": "google/gemini-3-flash-preview",
|
|
}}}
|
|
with patch("hermes_cli.config.load_config", return_value=cfg), \
|
|
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
|
|
side_effect=RuntimeError("boom")):
|
|
rt = br._resolve_review_runtime(agent)
|
|
assert rt["routed"] is False
|
|
assert rt["provider"] == "openai-codex"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _digest_history — routed-path compact replay
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_digest_under_tail_returns_full():
|
|
msgs = [_msg("user", "hi"), _msg("assistant", "hello")]
|
|
assert br._digest_history(msgs, tail=24) == msgs
|
|
|
|
|
|
def test_digest_collapses_old_keeps_tail_verbatim():
|
|
msgs = []
|
|
for i in range(60):
|
|
msgs.append(_msg("user", f"u{i} " + "x" * 50))
|
|
msgs.append(_msg("assistant", f"a{i} " + "y" * 50))
|
|
out = br._digest_history(msgs, tail=10)
|
|
# First message is the synthetic digest (user role → alternation preserved).
|
|
assert out[0]["role"] == "user"
|
|
assert out[0]["content"].startswith("[Earlier conversation digest")
|
|
# Recent tail preserved verbatim.
|
|
assert out[-1] == msgs[-1]
|
|
assert len(out) == 11 # 1 digest + 10 tail
|
|
|
|
|
|
def test_digest_does_not_open_tail_on_a_tool_message():
|
|
msgs = []
|
|
for i in range(40):
|
|
msgs.append(_msg("user", "u" + "x" * 50))
|
|
msgs.append(_msg("assistant", "", tool_calls=[
|
|
{"function": {"name": "terminal", "arguments": "{}"}}]))
|
|
msgs.append({"role": "tool", "content": "result " + "w" * 50})
|
|
out = br._digest_history(msgs, tail=2)
|
|
# The verbatim tail (after the digest) must not begin on a bare tool message.
|
|
assert out[1]["role"] != "tool"
|
|
|
|
|
|
def test_digest_records_tool_names_in_arc():
|
|
old = [
|
|
_msg("user", "do the thing"),
|
|
_msg("assistant", "", tool_calls=[
|
|
{"function": {"name": "skill_view", "arguments": "{}"}},
|
|
{"function": {"name": "patch", "arguments": "{}"}}]),
|
|
]
|
|
msgs = old + [_msg("user", f"tail{i}") for i in range(30)]
|
|
out = br._digest_history(msgs, tail=10)
|
|
digest = out[0]["content"]
|
|
assert "USER: do the thing" in digest
|
|
assert "tools: skill_view, patch" in digest
|