test: cover ci-unblocker production regressions

Snapshot review_agent._session_messages before teardown so close() can
clean per-session state without dropping the user-visible
self-improvement summary. Adds two regressions:

- bg-review summarizer receives captured review-agent tool messages
  after review_agent.close() runs
- context-compressor protected-head handoff rehydration populates
  _previous_summary and keeps the old handoff out of newly summarized
  turns

Salvaged from PR #26039 onto current main after agent/background_review.py
extraction. Original commit 63eaf6055; bg-review test updated to patch
the module-level summarize_background_review_actions in
agent.background_review instead of the now-forwarder
AIAgent._summarize_background_review_actions.
This commit is contained in:
stephenschoettler 2026-05-27 21:41:48 -07:00 committed by Teknium
parent 66489f38c7
commit 4a6f1863ac
3 changed files with 95 additions and 1 deletions

View file

@ -483,6 +483,11 @@ def _run_review_in_thread(
finally:
clear_thread_tool_whitelist()
# Snapshot review actions before teardown. close() is allowed to
# clean per-session state, but the user-visible self-improvement
# summary still needs the completed review agent's tool results.
review_messages = list(getattr(review_agent, "_session_messages", []))
# Tear down memory providers while stdout is still
# redirected so background thread teardown (Honcho flush,
# Hindsight sync, etc.) stays silent. The finally block
@ -495,7 +500,6 @@ def _run_review_in_thread(
review_agent.close()
except Exception:
pass
review_messages = list(getattr(review_agent, "_session_messages", []))
review_agent = None
# Scan the review agent's messages for successful tool actions

View file

@ -67,3 +67,21 @@ def test_resume_rehydrates_previous_summary_from_handoff_message():
assert "TURNS TO SUMMARIZE:" not in prompt
assert prompt.count(old_summary) == 1
assert f"[USER]: {SUMMARY_PREFIX}" not in prompt
def test_handoff_in_protected_head_populates_previous_summary_before_update():
"""A resumed protected-head handoff should restore iterative-summary state."""
compressor = _compressor()
old_summary = "PROTECTED-HEAD-SUMMARY durable facts from before restart"
seen_turns = []
def fake_generate_summary(turns_to_summarize, focus_topic=None):
seen_turns.extend(turns_to_summarize)
return "new summary from resumed turns"
with patch.object(compressor, "_generate_summary", side_effect=fake_generate_summary):
compressor.compress(_messages_with_handoff(old_summary))
assert compressor._previous_summary == old_summary
assert seen_turns
assert all(old_summary not in str(msg.get("content", "")) for msg in seen_turns)

View file

@ -76,6 +76,78 @@ def test_background_review_shuts_down_memory_provider_before_close(monkeypatch):
]
def test_background_review_summarizer_receives_captured_messages_after_close(monkeypatch):
"""The action summarizer must see review messages even after close cleanup.
Regression for the bug where ``review_messages`` was snapshot AFTER
``review_agent.close()``. close() is allowed to clean per-session state
(including ``_session_messages``), so the summarizer would receive an
empty list and the user-visible self-improvement summary would silently
disappear. The fix snapshots ``_session_messages`` before teardown.
"""
import json
import agent.background_review as bg_review
review_tool_message = {
"role": "tool",
"tool_call_id": "call_bg",
"content": json.dumps(
{"success": True, "message": "Entry added", "target": "memory"}
),
}
captured: dict = {}
events: list[str] = []
class FakeReviewAgent:
def __init__(self, **kwargs):
self._session_messages = []
def run_conversation(self, **kwargs):
events.append("run_conversation")
self._session_messages = [review_tool_message]
def shutdown_memory_provider(self):
events.append("shutdown_memory_provider")
def close(self):
events.append("close")
# close() is allowed to clean _session_messages — the fix
# must have snapshot them before this runs.
self._session_messages = []
def fake_summarize(review_messages, prior_snapshot):
events.append("summarize")
captured["review_messages"] = list(review_messages)
captured["prior_snapshot"] = list(prior_snapshot)
return []
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
monkeypatch.setattr(
bg_review,
"summarize_background_review_actions",
fake_summarize,
)
messages_snapshot = [{"role": "user", "content": "hi"}]
agent = _bare_agent()
AIAgent._spawn_background_review(
agent,
messages_snapshot=messages_snapshot,
review_memory=True,
)
assert events == [
"run_conversation",
"shutdown_memory_provider",
"close",
"summarize",
]
assert captured["review_messages"] == [review_tool_message]
assert captured["prior_snapshot"] == messages_snapshot
def test_background_review_installs_auto_deny_approval_callback(monkeypatch):
"""Regression guard for #15216.