diff --git a/run_agent.py b/run_agent.py
index 3e7ddc6870..64c8cbadb3 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -8344,6 +8344,10 @@ class AIAgent:
                                 approx_tokens=approx_tokens,
                                 task_id=effective_task_id,
                             )
+                            # Compression created a new session — clear history
+                            # so _flush_messages_to_session_db writes compressed
+                            # messages to the new session, not skipping them.
+                            conversation_history = None
                             if len(messages) < original_len or old_ctx > _reduced_ctx:
                                 self._emit_status(
                                     f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
@@ -8401,6 +8405,10 @@ class AIAgent:
                             messages, system_message, approx_tokens=approx_tokens,
                             task_id=effective_task_id,
                         )
+                        # Compression created a new session — clear history
+                        # so _flush_messages_to_session_db writes compressed
+                        # messages to the new session, not skipping them.
+                        conversation_history = None
 
                         if len(messages) < original_len:
                             self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
@@ -8519,6 +8527,10 @@ class AIAgent:
                             messages, system_message, approx_tokens=approx_tokens,
                             task_id=effective_task_id,
                         )
+                        # Compression created a new session — clear history
+                        # so _flush_messages_to_session_db writes compressed
+                        # messages to the new session, not skipping them.
+                        conversation_history = None
 
                         if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
                             if len(messages) < original_len:
diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py
index 230434429b..b30f9f6bb3 100644
--- a/tests/run_agent/test_413_compression.py
+++ b/tests/run_agent/test_413_compression.py
@@ -172,6 +172,87 @@ class TestHTTP413Compression:
         mock_compress.assert_called_once()
         assert result["completed"] is True
 
+    def test_413_clears_conversation_history_on_persist(self, agent):
+        """After 413-triggered compression, _persist_session must receive None history.
+
+        Bug: _compress_context() creates a new session and resets _last_flushed_db_idx=0,
+        but if conversation_history still holds the original (pre-compression) list,
+        _flush_messages_to_session_db computes flush_from = max(len(history), 0) which
+        exceeds len(compressed_messages), so messages[flush_from:] is empty and nothing
+        is written to the new session → "Session found but has no messages" on resume.
+        """
+        err_413 = _make_413_error()
+        ok_resp = _mock_response(content="OK", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
+
+        big_history = [
+            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
+            for i in range(200)
+        ]
+
+        persist_calls = []
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(
+                agent, "_persist_session",
+                side_effect=lambda msgs, hist: persist_calls.append(hist),
+            ),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "summary"}],
+                "compressed prompt",
+            )
+            agent.run_conversation("hello", conversation_history=big_history)
+
+        assert len(persist_calls) >= 1, "Expected at least one _persist_session call"
+        for hist in persist_calls:
+            assert hist is None, (
+                f"conversation_history should be None after mid-loop compression, "
+                f"got list with {len(hist)} items"
+            )
+
+    def test_context_overflow_clears_conversation_history_on_persist(self, agent):
+        """After context-overflow compression, _persist_session must receive None history."""
+        err_400 = Exception(
+            "Error code: 400 - This endpoint's maximum context length is 128000 tokens. "
+            "However, you requested about 270460 tokens."
+        )
+        err_400.status_code = 400
+        ok_resp = _mock_response(content="OK", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
+
+        big_history = [
+            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
+            for i in range(200)
+        ]
+
+        persist_calls = []
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(
+                agent, "_persist_session",
+                side_effect=lambda msgs, hist: persist_calls.append(hist),
+            ),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "summary"}],
+                "compressed prompt",
+            )
+            agent.run_conversation("hello", conversation_history=big_history)
+
+        assert len(persist_calls) >= 1
+        for hist in persist_calls:
+            assert hist is None, (
+                f"conversation_history should be None after context-overflow compression, "
+                f"got list with {len(hist)} items"
+            )
+
     def test_400_context_length_triggers_compression(self, agent):
         """A 400 with 'maximum context length' should trigger compression, not abort as generic 4xx.