diff --git a/run_agent.py b/run_agent.py index 3e7ddc6870..64c8cbadb3 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8344,6 +8344,10 @@ class AIAgent: approx_tokens=approx_tokens, task_id=effective_task_id, ) + # Compression created a new session — clear history + # so _flush_messages_to_session_db writes compressed + # messages to the new session, not skipping them. + conversation_history = None if len(messages) < original_len or old_ctx > _reduced_ctx: self._emit_status( f"🗜️ Context reduced to {_reduced_ctx:,} tokens " @@ -8401,6 +8405,10 @@ class AIAgent: messages, system_message, approx_tokens=approx_tokens, task_id=effective_task_id, ) + # Compression created a new session — clear history + # so _flush_messages_to_session_db writes compressed + # messages to the new session, not skipping them. + conversation_history = None if len(messages) < original_len: self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...") @@ -8519,6 +8527,10 @@ class AIAgent: messages, system_message, approx_tokens=approx_tokens, task_id=effective_task_id, ) + # Compression created a new session — clear history + # so _flush_messages_to_session_db writes compressed + # messages to the new session, not skipping them. + conversation_history = None if len(messages) < original_len or new_ctx and new_ctx < old_ctx: if len(messages) < original_len: diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py index 230434429b..b30f9f6bb3 100644 --- a/tests/run_agent/test_413_compression.py +++ b/tests/run_agent/test_413_compression.py @@ -172,6 +172,87 @@ class TestHTTP413Compression: mock_compress.assert_called_once() assert result["completed"] is True + def test_413_clears_conversation_history_on_persist(self, agent): + """After 413-triggered compression, _persist_session must receive None history. + + Bug: _compress_context() creates a new session and resets _last_flushed_db_idx=0, + but if conversation_history still holds the original (pre-compression) list, + _flush_messages_to_session_db computes flush_from = max(len(history), 0) which + exceeds len(compressed_messages), so messages[flush_from:] is empty and nothing + is written to the new session → "Session found but has no messages" on resume. + """ + err_413 = _make_413_error() + ok_resp = _mock_response(content="OK", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [err_413, ok_resp] + + big_history = [ + {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} + for i in range(200) + ] + + persist_calls = [] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object( + agent, "_persist_session", + side_effect=lambda msgs, hist: persist_calls.append(hist), + ), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + mock_compress.return_value = ( + [{"role": "user", "content": "summary"}], + "compressed prompt", + ) + agent.run_conversation("hello", conversation_history=big_history) + + assert len(persist_calls) >= 1, "Expected at least one _persist_session call" + for hist in persist_calls: + assert hist is None, ( + f"conversation_history should be None after mid-loop compression, " + f"got list with {len(hist)} items" + ) + + def test_context_overflow_clears_conversation_history_on_persist(self, agent): + """After context-overflow compression, _persist_session must receive None history.""" + err_400 = Exception( + "Error code: 400 - This endpoint's maximum context length is 128000 tokens. " + "However, you requested about 270460 tokens." + ) + err_400.status_code = 400 + ok_resp = _mock_response(content="OK", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [err_400, ok_resp] + + big_history = [ + {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} + for i in range(200) + ] + + persist_calls = [] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object( + agent, "_persist_session", + side_effect=lambda msgs, hist: persist_calls.append(hist), + ), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + mock_compress.return_value = ( + [{"role": "user", "content": "summary"}], + "compressed prompt", + ) + agent.run_conversation("hello", conversation_history=big_history) + + assert len(persist_calls) >= 1 + for hist in persist_calls: + assert hist is None, ( + f"conversation_history should be None after context-overflow compression, " + f"got list with {len(hist)} items" + ) + def test_400_context_length_triggers_compression(self, agent): """A 400 with 'maximum context length' should trigger compression, not abort as generic 4xx.