test: cover ci-unblocker production regressions

Snapshot review_agent._session_messages before teardown so close() can clean per-session state without dropping the user-visible self-improvement summary. Adds two regressions: - bg-review summarizer receives captured review-agent tool messages after review_agent.close() runs - context-compressor protected-head handoff rehydration populates _previous_summary and keeps the old handoff out of newly summarized turns Salvaged from PR #26039 onto current main after agent/background_review.py extraction. Original commit 63eaf6055; bg-review test updated to patch the module-level summarize_background_review_actions in agent.background_review instead of the now-forwarder AIAgent._summarize_background_review_actions.
2026-07-14 14:12:44 +00:00 · 2026-05-27 21:41:48 -07:00 · 2026-05-27 21:41:48 -07:00 · 4a6f1863ac
commit 4a6f1863ac
parent 66489f38c7
3 changed files with 95 additions and 1 deletions
--- a/agent/background_review.py
+++ b/agent/background_review.py
@ -483,6 +483,11 @@ def _run_review_in_thread(
            finally:
                clear_thread_tool_whitelist()

+            # Snapshot review actions before teardown. close() is allowed to
+            # clean per-session state, but the user-visible self-improvement
+            # summary still needs the completed review agent's tool results.
+            review_messages = list(getattr(review_agent, "_session_messages", []))
+
            # Tear down memory providers while stdout is still
            # redirected so background thread teardown (Honcho flush,
            # Hindsight sync, etc.) stays silent.  The finally block
@ -495,7 +500,6 @@ def _run_review_in_thread(
                review_agent.close()
            except Exception:
                pass
-            review_messages = list(getattr(review_agent, "_session_messages", []))
            review_agent = None

        # Scan the review agent's messages for successful tool actions
--- a/tests/agent/test_context_compressor_summary_continuity.py
+++ b/tests/agent/test_context_compressor_summary_continuity.py
@ -67,3 +67,21 @@ def test_resume_rehydrates_previous_summary_from_handoff_message():
    assert "TURNS TO SUMMARIZE:" not in prompt
    assert prompt.count(old_summary) == 1
    assert f"[USER]: {SUMMARY_PREFIX}" not in prompt
+
+
+def test_handoff_in_protected_head_populates_previous_summary_before_update():
+    """A resumed protected-head handoff should restore iterative-summary state."""
+    compressor = _compressor()
+    old_summary = "PROTECTED-HEAD-SUMMARY durable facts from before restart"
+    seen_turns = []
+
+    def fake_generate_summary(turns_to_summarize, focus_topic=None):
+        seen_turns.extend(turns_to_summarize)
+        return "new summary from resumed turns"
+
+    with patch.object(compressor, "_generate_summary", side_effect=fake_generate_summary):
+        compressor.compress(_messages_with_handoff(old_summary))
+
+    assert compressor._previous_summary == old_summary
+    assert seen_turns
+    assert all(old_summary not in str(msg.get("content", "")) for msg in seen_turns)
--- a/tests/run_agent/test_background_review.py
+++ b/tests/run_agent/test_background_review.py
@ -76,6 +76,78 @@ def test_background_review_shuts_down_memory_provider_before_close(monkeypatch):
    ]


+def test_background_review_summarizer_receives_captured_messages_after_close(monkeypatch):
+    """The action summarizer must see review messages even after close cleanup.
+
+    Regression for the bug where ``review_messages`` was snapshot AFTER
+    ``review_agent.close()``. close() is allowed to clean per-session state
+    (including ``_session_messages``), so the summarizer would receive an
+    empty list and the user-visible self-improvement summary would silently
+    disappear. The fix snapshots ``_session_messages`` before teardown.
+    """
+    import json
+    import agent.background_review as bg_review
+
+    review_tool_message = {
+        "role": "tool",
+        "tool_call_id": "call_bg",
+        "content": json.dumps(
+            {"success": True, "message": "Entry added", "target": "memory"}
+        ),
+    }
+    captured: dict = {}
+    events: list[str] = []
+
+    class FakeReviewAgent:
+        def __init__(self, **kwargs):
+            self._session_messages = []
+
+        def run_conversation(self, **kwargs):
+            events.append("run_conversation")
+            self._session_messages = [review_tool_message]
+
+        def shutdown_memory_provider(self):
+            events.append("shutdown_memory_provider")
+
+        def close(self):
+            events.append("close")
+            # close() is allowed to clean _session_messages — the fix
+            # must have snapshot them before this runs.
+            self._session_messages = []
+
+    def fake_summarize(review_messages, prior_snapshot):
+        events.append("summarize")
+        captured["review_messages"] = list(review_messages)
+        captured["prior_snapshot"] = list(prior_snapshot)
+        return []
+
+    monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
+    monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
+    monkeypatch.setattr(
+        bg_review,
+        "summarize_background_review_actions",
+        fake_summarize,
+    )
+
+    messages_snapshot = [{"role": "user", "content": "hi"}]
+    agent = _bare_agent()
+
+    AIAgent._spawn_background_review(
+        agent,
+        messages_snapshot=messages_snapshot,
+        review_memory=True,
+    )
+
+    assert events == [
+        "run_conversation",
+        "shutdown_memory_provider",
+        "close",
+        "summarize",
+    ]
+    assert captured["review_messages"] == [review_tool_message]
+    assert captured["prior_snapshot"] == messages_snapshot
+
+
 def test_background_review_installs_auto_deny_approval_callback(monkeypatch):
    """Regression guard for #15216.