fix: clear conversation_history after mid-loop compression to prevent empty sessions (#7001)

After mid-loop compression (triggered by 413, context_overflow, or Anthropic long-context tier errors), _compress_context() creates a new session in SQLite and resets _last_flushed_db_idx=0. However, conversation_history was not cleared, so _flush_messages_to_session_db() computed: flush_from = max(len(conversation_history=200), _last_flushed_db_idx=0) = 200 messages[200:] → empty (compressed messages < 200) This resulted in zero messages being written to the new session's SQLite store. On resume, the user would see 'Session found but has no messages.' The preflight compression path (line 7311) already had the fix: conversation_history = None This commit adds the same clearing to the three mid-loop compression sites: - Anthropic long-context tier overflow - HTTP 413 payload too large - Generic context_overflow error Reported by Aaryan (Nous community).
2026-04-25 00:51:20 +00:00 · 2026-04-10 00:14:59 -07:00 · 2026-04-10 00:14:59 -07:00 · 871313ae2d
commit 871313ae2d
parent 13d7ff3420
2 changed files with 93 additions and 0 deletions
--- a/tests/run_agent/test_413_compression.py
+++ b/tests/run_agent/test_413_compression.py
@ -172,6 +172,87 @@ class TestHTTP413Compression:
        mock_compress.assert_called_once()
        assert result["completed"] is True

+    def test_413_clears_conversation_history_on_persist(self, agent):
+        """After 413-triggered compression, _persist_session must receive None history.
+
+        Bug: _compress_context() creates a new session and resets _last_flushed_db_idx=0,
+        but if conversation_history still holds the original (pre-compression) list,
+        _flush_messages_to_session_db computes flush_from = max(len(history), 0) which
+        exceeds len(compressed_messages), so messages[flush_from:] is empty and nothing
+        is written to the new session → "Session found but has no messages" on resume.
+        """
+        err_413 = _make_413_error()
+        ok_resp = _mock_response(content="OK", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
+
+        big_history = [
+            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
+            for i in range(200)
+        ]
+
+        persist_calls = []
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(
+                agent, "_persist_session",
+                side_effect=lambda msgs, hist: persist_calls.append(hist),
+            ),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "summary"}],
+                "compressed prompt",
+            )
+            agent.run_conversation("hello", conversation_history=big_history)
+
+        assert len(persist_calls) >= 1, "Expected at least one _persist_session call"
+        for hist in persist_calls:
+            assert hist is None, (
+                f"conversation_history should be None after mid-loop compression, "
+                f"got list with {len(hist)} items"
+            )
+
+    def test_context_overflow_clears_conversation_history_on_persist(self, agent):
+        """After context-overflow compression, _persist_session must receive None history."""
+        err_400 = Exception(
+            "Error code: 400 - This endpoint's maximum context length is 128000 tokens. "
+            "However, you requested about 270460 tokens."
+        )
+        err_400.status_code = 400
+        ok_resp = _mock_response(content="OK", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
+
+        big_history = [
+            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
+            for i in range(200)
+        ]
+
+        persist_calls = []
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(
+                agent, "_persist_session",
+                side_effect=lambda msgs, hist: persist_calls.append(hist),
+            ),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "summary"}],
+                "compressed prompt",
+            )
+            agent.run_conversation("hello", conversation_history=big_history)
+
+        assert len(persist_calls) >= 1
+        for hist in persist_calls:
+            assert hist is None, (
+                f"conversation_history should be None after context-overflow compression, "
+                f"got list with {len(hist)} items"
+            )
+
    def test_400_context_length_triggers_compression(self, agent):
        """A 400 with 'maximum context length' should trigger compression, not abort as generic 4xx.