diff --git a/agent/background_review.py b/agent/background_review.py index 35d3d5191a0..bf99ee52845 100644 --- a/agent/background_review.py +++ b/agent/background_review.py @@ -483,6 +483,11 @@ def _run_review_in_thread( finally: clear_thread_tool_whitelist() + # Snapshot review actions before teardown. close() is allowed to + # clean per-session state, but the user-visible self-improvement + # summary still needs the completed review agent's tool results. + review_messages = list(getattr(review_agent, "_session_messages", [])) + # Tear down memory providers while stdout is still # redirected so background thread teardown (Honcho flush, # Hindsight sync, etc.) stays silent. The finally block @@ -495,7 +500,6 @@ def _run_review_in_thread( review_agent.close() except Exception: pass - review_messages = list(getattr(review_agent, "_session_messages", [])) review_agent = None # Scan the review agent's messages for successful tool actions diff --git a/tests/agent/test_context_compressor_summary_continuity.py b/tests/agent/test_context_compressor_summary_continuity.py index d797b661f01..f3101913ceb 100644 --- a/tests/agent/test_context_compressor_summary_continuity.py +++ b/tests/agent/test_context_compressor_summary_continuity.py @@ -67,3 +67,21 @@ def test_resume_rehydrates_previous_summary_from_handoff_message(): assert "TURNS TO SUMMARIZE:" not in prompt assert prompt.count(old_summary) == 1 assert f"[USER]: {SUMMARY_PREFIX}" not in prompt + + +def test_handoff_in_protected_head_populates_previous_summary_before_update(): + """A resumed protected-head handoff should restore iterative-summary state.""" + compressor = _compressor() + old_summary = "PROTECTED-HEAD-SUMMARY durable facts from before restart" + seen_turns = [] + + def fake_generate_summary(turns_to_summarize, focus_topic=None): + seen_turns.extend(turns_to_summarize) + return "new summary from resumed turns" + + with patch.object(compressor, "_generate_summary", side_effect=fake_generate_summary): + compressor.compress(_messages_with_handoff(old_summary)) + + assert compressor._previous_summary == old_summary + assert seen_turns + assert all(old_summary not in str(msg.get("content", "")) for msg in seen_turns) diff --git a/tests/run_agent/test_background_review.py b/tests/run_agent/test_background_review.py index 89626f857d5..f4b0faff7f5 100644 --- a/tests/run_agent/test_background_review.py +++ b/tests/run_agent/test_background_review.py @@ -76,6 +76,78 @@ def test_background_review_shuts_down_memory_provider_before_close(monkeypatch): ] +def test_background_review_summarizer_receives_captured_messages_after_close(monkeypatch): + """The action summarizer must see review messages even after close cleanup. + + Regression for the bug where ``review_messages`` was snapshot AFTER + ``review_agent.close()``. close() is allowed to clean per-session state + (including ``_session_messages``), so the summarizer would receive an + empty list and the user-visible self-improvement summary would silently + disappear. The fix snapshots ``_session_messages`` before teardown. + """ + import json + import agent.background_review as bg_review + + review_tool_message = { + "role": "tool", + "tool_call_id": "call_bg", + "content": json.dumps( + {"success": True, "message": "Entry added", "target": "memory"} + ), + } + captured: dict = {} + events: list[str] = [] + + class FakeReviewAgent: + def __init__(self, **kwargs): + self._session_messages = [] + + def run_conversation(self, **kwargs): + events.append("run_conversation") + self._session_messages = [review_tool_message] + + def shutdown_memory_provider(self): + events.append("shutdown_memory_provider") + + def close(self): + events.append("close") + # close() is allowed to clean _session_messages — the fix + # must have snapshot them before this runs. + self._session_messages = [] + + def fake_summarize(review_messages, prior_snapshot): + events.append("summarize") + captured["review_messages"] = list(review_messages) + captured["prior_snapshot"] = list(prior_snapshot) + return [] + + monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent) + monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread) + monkeypatch.setattr( + bg_review, + "summarize_background_review_actions", + fake_summarize, + ) + + messages_snapshot = [{"role": "user", "content": "hi"}] + agent = _bare_agent() + + AIAgent._spawn_background_review( + agent, + messages_snapshot=messages_snapshot, + review_memory=True, + ) + + assert events == [ + "run_conversation", + "shutdown_memory_provider", + "close", + "summarize", + ] + assert captured["review_messages"] == [review_tool_message] + assert captured["prior_snapshot"] == messages_snapshot + + def test_background_review_installs_auto_deny_approval_callback(monkeypatch): """Regression guard for #15216.