From 871313ae2dc55c2d6e2490fd97902bdf9ec2b70c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 10 Apr 2026 00:14:59 -0700
Subject: [PATCH] fix: clear conversation_history after mid-loop compression to
 prevent empty sessions (#7001)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After mid-loop compression (triggered by 413, context_overflow, or Anthropic
long-context tier errors), _compress_context() creates a new session in SQLite
and resets _last_flushed_db_idx=0. However, conversation_history was not cleared,
so _flush_messages_to_session_db() computed:

    flush_from = max(len(conversation_history=200), _last_flushed_db_idx=0) = 200
    messages[200:] → empty (compressed messages < 200)

This resulted in zero messages being written to the new session's SQLite store.
On resume, the user would see 'Session found but has no messages.'

The preflight compression path (line 7311) already had the fix:
    conversation_history = None

This commit adds the same clearing to the three mid-loop compression sites:
- Anthropic long-context tier overflow
- HTTP 413 payload too large
- Generic context_overflow error

Reported by Aaryan (Nous community).
---
 run_agent.py                            | 12 ++++
 tests/run_agent/test_413_compression.py | 81 +++++++++++++++++++++++++
 2 files changed, 93 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index 3e7ddc687..64c8cbadb 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -8344,6 +8344,10 @@ class AIAgent:
                                 approx_tokens=approx_tokens,
                                 task_id=effective_task_id,
                             )
+                            # Compression created a new session — clear history
+                            # so _flush_messages_to_session_db writes compressed
+                            # messages to the new session, not skipping them.
+                            conversation_history = None
                             if len(messages) < original_len or old_ctx > _reduced_ctx:
                                 self._emit_status(
                                     f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
@@ -8401,6 +8405,10 @@ class AIAgent:
                             messages, system_message, approx_tokens=approx_tokens,
                             task_id=effective_task_id,
                         )
+                        # Compression created a new session — clear history
+                        # so _flush_messages_to_session_db writes compressed
+                        # messages to the new session, not skipping them.
+                        conversation_history = None
 
                         if len(messages) < original_len:
                             self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
@@ -8519,6 +8527,10 @@ class AIAgent:
                             messages, system_message, approx_tokens=approx_tokens,
                             task_id=effective_task_id,
                         )
+                        # Compression created a new session — clear history
+                        # so _flush_messages_to_session_db writes compressed
+                        # messages to the new session, not skipping them.
+                        conversation_history = None
 
                         if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
                             if len(messages) < original_len:
diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py
index 230434429..b30f9f6bb 100644
--- a/tests/run_agent/test_413_compression.py
+++ b/tests/run_agent/test_413_compression.py
@@ -172,6 +172,87 @@ class TestHTTP413Compression:
         mock_compress.assert_called_once()
         assert result["completed"] is True
 
+    def test_413_clears_conversation_history_on_persist(self, agent):
+        """After 413-triggered compression, _persist_session must receive None history.
+
+        Bug: _compress_context() creates a new session and resets _last_flushed_db_idx=0,
+        but if conversation_history still holds the original (pre-compression) list,
+        _flush_messages_to_session_db computes flush_from = max(len(history), 0) which
+        exceeds len(compressed_messages), so messages[flush_from:] is empty and nothing
+        is written to the new session → "Session found but has no messages" on resume.
+        """
+        err_413 = _make_413_error()
+        ok_resp = _mock_response(content="OK", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
+
+        big_history = [
+            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
+            for i in range(200)
+        ]
+
+        persist_calls = []
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(
+                agent, "_persist_session",
+                side_effect=lambda msgs, hist: persist_calls.append(hist),
+            ),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "summary"}],
+                "compressed prompt",
+            )
+            agent.run_conversation("hello", conversation_history=big_history)
+
+        assert len(persist_calls) >= 1, "Expected at least one _persist_session call"
+        for hist in persist_calls:
+            assert hist is None, (
+                f"conversation_history should be None after mid-loop compression, "
+                f"got list with {len(hist)} items"
+            )
+
+    def test_context_overflow_clears_conversation_history_on_persist(self, agent):
+        """After context-overflow compression, _persist_session must receive None history."""
+        err_400 = Exception(
+            "Error code: 400 - This endpoint's maximum context length is 128000 tokens. "
+            "However, you requested about 270460 tokens."
+        )
+        err_400.status_code = 400
+        ok_resp = _mock_response(content="OK", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
+
+        big_history = [
+            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
+            for i in range(200)
+        ]
+
+        persist_calls = []
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(
+                agent, "_persist_session",
+                side_effect=lambda msgs, hist: persist_calls.append(hist),
+            ),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "summary"}],
+                "compressed prompt",
+            )
+            agent.run_conversation("hello", conversation_history=big_history)
+
+        assert len(persist_calls) >= 1
+        for hist in persist_calls:
+            assert hist is None, (
+                f"conversation_history should be None after context-overflow compression, "
+                f"got list with {len(hist)} items"
+            )
+
     def test_400_context_length_triggers_compression(self, agent):
         """A 400 with 'maximum context length' should trigger compression, not abort as generic 4xx.