mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-31 06:51:29 +00:00
Snapshot review_agent._session_messages before teardown so close() can
clean per-session state without dropping the user-visible
self-improvement summary. Adds two regressions:
- bg-review summarizer receives captured review-agent tool messages
after review_agent.close() runs
- context-compressor protected-head handoff rehydration populates
_previous_summary and keeps the old handoff out of newly summarized
turns
Salvaged from PR #26039 onto current main after agent/background_review.py
extraction. Original commit 63eaf6055; bg-review test updated to patch
the module-level summarize_background_review_actions in
agent.background_review instead of the now-forwarder
AIAgent._summarize_background_review_actions.
315 lines
10 KiB
Python
315 lines
10 KiB
Python
"""Regression tests for background review agent cleanup."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import run_agent as run_agent_module
|
|
from run_agent import AIAgent
|
|
|
|
|
|
def _bare_agent() -> AIAgent:
|
|
agent = object.__new__(AIAgent)
|
|
agent.model = "fake-model"
|
|
agent.platform = "telegram"
|
|
agent.provider = "openai"
|
|
agent.base_url = ""
|
|
agent.api_key = ""
|
|
agent.api_mode = ""
|
|
agent.session_id = "test-session"
|
|
agent._parent_session_id = ""
|
|
agent._credential_pool = None
|
|
agent._memory_store = object()
|
|
agent._memory_enabled = True
|
|
agent._user_profile_enabled = False
|
|
agent._cached_system_prompt = "test-cached-system-prompt"
|
|
import datetime as _dt
|
|
agent.session_start = _dt.datetime(2026, 1, 1, 12, 0, 0)
|
|
agent._MEMORY_REVIEW_PROMPT = "review memory"
|
|
agent._SKILL_REVIEW_PROMPT = "review skills"
|
|
agent._COMBINED_REVIEW_PROMPT = "review both"
|
|
agent.background_review_callback = None
|
|
agent.status_callback = None
|
|
agent._safe_print = lambda *_args, **_kwargs: None
|
|
return agent
|
|
|
|
|
|
class ImmediateThread:
|
|
def __init__(self, *, target, daemon=None, name=None):
|
|
self._target = target
|
|
|
|
def start(self):
|
|
self._target()
|
|
|
|
|
|
def test_background_review_shuts_down_memory_provider_before_close(monkeypatch):
|
|
events = []
|
|
|
|
class FakeReviewAgent:
|
|
def __init__(self, **kwargs):
|
|
events.append(("init", kwargs))
|
|
self._session_messages = []
|
|
|
|
def run_conversation(self, **kwargs):
|
|
events.append(("run_conversation", kwargs))
|
|
|
|
def shutdown_memory_provider(self):
|
|
events.append(("shutdown_memory_provider", None))
|
|
|
|
def close(self):
|
|
events.append(("close", None))
|
|
|
|
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
|
|
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
|
|
|
|
agent = _bare_agent()
|
|
|
|
AIAgent._spawn_background_review(
|
|
agent,
|
|
messages_snapshot=[{"role": "user", "content": "hello"}],
|
|
review_memory=True,
|
|
)
|
|
|
|
assert [name for name, _payload in events] == [
|
|
"init",
|
|
"run_conversation",
|
|
"shutdown_memory_provider",
|
|
"close",
|
|
]
|
|
|
|
|
|
def test_background_review_summarizer_receives_captured_messages_after_close(monkeypatch):
|
|
"""The action summarizer must see review messages even after close cleanup.
|
|
|
|
Regression for the bug where ``review_messages`` was snapshot AFTER
|
|
``review_agent.close()``. close() is allowed to clean per-session state
|
|
(including ``_session_messages``), so the summarizer would receive an
|
|
empty list and the user-visible self-improvement summary would silently
|
|
disappear. The fix snapshots ``_session_messages`` before teardown.
|
|
"""
|
|
import json
|
|
import agent.background_review as bg_review
|
|
|
|
review_tool_message = {
|
|
"role": "tool",
|
|
"tool_call_id": "call_bg",
|
|
"content": json.dumps(
|
|
{"success": True, "message": "Entry added", "target": "memory"}
|
|
),
|
|
}
|
|
captured: dict = {}
|
|
events: list[str] = []
|
|
|
|
class FakeReviewAgent:
|
|
def __init__(self, **kwargs):
|
|
self._session_messages = []
|
|
|
|
def run_conversation(self, **kwargs):
|
|
events.append("run_conversation")
|
|
self._session_messages = [review_tool_message]
|
|
|
|
def shutdown_memory_provider(self):
|
|
events.append("shutdown_memory_provider")
|
|
|
|
def close(self):
|
|
events.append("close")
|
|
# close() is allowed to clean _session_messages — the fix
|
|
# must have snapshot them before this runs.
|
|
self._session_messages = []
|
|
|
|
def fake_summarize(review_messages, prior_snapshot):
|
|
events.append("summarize")
|
|
captured["review_messages"] = list(review_messages)
|
|
captured["prior_snapshot"] = list(prior_snapshot)
|
|
return []
|
|
|
|
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
|
|
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
|
|
monkeypatch.setattr(
|
|
bg_review,
|
|
"summarize_background_review_actions",
|
|
fake_summarize,
|
|
)
|
|
|
|
messages_snapshot = [{"role": "user", "content": "hi"}]
|
|
agent = _bare_agent()
|
|
|
|
AIAgent._spawn_background_review(
|
|
agent,
|
|
messages_snapshot=messages_snapshot,
|
|
review_memory=True,
|
|
)
|
|
|
|
assert events == [
|
|
"run_conversation",
|
|
"shutdown_memory_provider",
|
|
"close",
|
|
"summarize",
|
|
]
|
|
assert captured["review_messages"] == [review_tool_message]
|
|
assert captured["prior_snapshot"] == messages_snapshot
|
|
|
|
|
|
def test_background_review_installs_auto_deny_approval_callback(monkeypatch):
|
|
"""Regression guard for #15216.
|
|
|
|
The background review thread must install a non-interactive approval
|
|
callback. If it doesn't, any dangerous-command guard the review agent
|
|
trips falls back to input() on a daemon thread, which deadlocks against
|
|
the parent's prompt_toolkit TUI.
|
|
"""
|
|
import tools.terminal_tool as tt
|
|
|
|
observed: dict = {"during_run": "<unread>", "after_finally": "<unread>"}
|
|
|
|
class FakeReviewAgent:
|
|
def __init__(self, **kwargs):
|
|
self._session_messages = []
|
|
|
|
def run_conversation(self, **kwargs):
|
|
# Capture what the callback looks like mid-run. It must be
|
|
# a callable (the auto-deny) -- not None.
|
|
observed["during_run"] = tt._get_approval_callback()
|
|
|
|
def shutdown_memory_provider(self):
|
|
pass
|
|
|
|
def close(self):
|
|
pass
|
|
|
|
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
|
|
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
|
|
|
|
# Start from a clean slot.
|
|
tt.set_approval_callback(None)
|
|
agent = _bare_agent()
|
|
|
|
AIAgent._spawn_background_review(
|
|
agent,
|
|
messages_snapshot=[{"role": "user", "content": "hello"}],
|
|
review_memory=True,
|
|
)
|
|
|
|
observed["after_finally"] = tt._get_approval_callback()
|
|
|
|
assert callable(observed["during_run"]), (
|
|
"Background review did not install an approval callback on its "
|
|
"worker thread; dangerous-command prompts will deadlock against "
|
|
"the parent TUI (#15216)."
|
|
)
|
|
# The installed callback must deny (it's a safety gate, not a prompt).
|
|
assert observed["during_run"]("rm -rf /", "test") == "deny"
|
|
|
|
assert observed["after_finally"] is None, (
|
|
"Background review leaked its approval callback into the worker "
|
|
"thread's TLS slot; a recycled thread-id could reuse it."
|
|
)
|
|
|
|
|
|
def test_background_review_summary_is_attributed_to_self_improvement_loop(monkeypatch):
|
|
"""The CLI/gateway emission must identify the self-improvement loop.
|
|
|
|
Users who miss the line in their terminal have no way to tell that the
|
|
background review was what modified their skill/memory stores. The
|
|
summary prefix ``💾 Self-improvement review: …`` makes the origin
|
|
explicit so both the CLI and gateway deliveries are unambiguous.
|
|
"""
|
|
import json
|
|
|
|
captured_prints: list = []
|
|
captured_bg_callback: list = []
|
|
|
|
class FakeReviewAgent:
|
|
def __init__(self, **kwargs):
|
|
# Simulate a review that successfully updated memory so
|
|
# _summarize_background_review_actions returns a real action.
|
|
self._session_messages = [
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": "call_bg",
|
|
"content": json.dumps(
|
|
{"success": True, "message": "Entry added", "target": "memory"}
|
|
),
|
|
}
|
|
]
|
|
|
|
def run_conversation(self, **kwargs):
|
|
pass
|
|
|
|
def shutdown_memory_provider(self):
|
|
pass
|
|
|
|
def close(self):
|
|
pass
|
|
|
|
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
|
|
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
|
|
|
|
agent = _bare_agent()
|
|
agent._safe_print = lambda *a, **kw: captured_prints.append(" ".join(str(x) for x in a))
|
|
agent.background_review_callback = lambda msg: captured_bg_callback.append(msg)
|
|
|
|
AIAgent._spawn_background_review(
|
|
agent,
|
|
messages_snapshot=[{"role": "user", "content": "hi"}],
|
|
review_memory=True,
|
|
)
|
|
|
|
# Exactly one summary should have been emitted, and it must identify
|
|
# the self-improvement review explicitly.
|
|
assert len(captured_prints) == 1, captured_prints
|
|
printed = captured_prints[0]
|
|
assert "Self-improvement review" in printed, printed
|
|
assert "Memory updated" in printed, printed
|
|
|
|
# Gateway path gets the same prefix.
|
|
assert len(captured_bg_callback) == 1
|
|
assert captured_bg_callback[0].startswith("💾 Self-improvement review:"), (
|
|
captured_bg_callback[0]
|
|
)
|
|
|
|
|
|
def test_background_review_fork_skips_external_memory_plugins(monkeypatch):
|
|
"""The background review fork must NOT touch external memory plugins.
|
|
|
|
Without skip_memory=True on the fork constructor, AIAgent.__init__
|
|
rebuilds its own _memory_manager from config, scoped to the parent's
|
|
session_id. The review fork's run_conversation() then leaks the
|
|
harness prompt into the user's real memory namespace via three
|
|
ingestion sites: on_turn_start (cadence + turn message),
|
|
prefetch_all (recall query), and sync_all (harness prompt + review
|
|
output recorded as a (user, assistant) turn pair). The fix is a
|
|
single kwarg on the fork constructor — this test guards it.
|
|
"""
|
|
captured_kwargs: dict = {}
|
|
|
|
class FakeReviewAgent:
|
|
def __init__(self, **kwargs):
|
|
captured_kwargs.update(kwargs)
|
|
self._session_messages = []
|
|
|
|
def run_conversation(self, **kwargs):
|
|
pass
|
|
|
|
def shutdown_memory_provider(self):
|
|
pass
|
|
|
|
def close(self):
|
|
pass
|
|
|
|
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
|
|
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
|
|
|
|
agent = _bare_agent()
|
|
|
|
AIAgent._spawn_background_review(
|
|
agent,
|
|
messages_snapshot=[{"role": "user", "content": "hello"}],
|
|
review_memory=True,
|
|
)
|
|
|
|
assert captured_kwargs.get("skip_memory") is True, (
|
|
"Background review fork must be constructed with skip_memory=True "
|
|
"so AIAgent.__init__ does not rebuild a _memory_manager wired to "
|
|
"external plugins (honcho, mem0, supermemory, ...). Without this "
|
|
"the fork leaks harness prompts into the user's real memory "
|
|
"namespace via on_turn_start / prefetch_all / sync_all."
|
|
)
|