From e020f46beccad23f80796d5f7d3a3dc7d4bbdb6f Mon Sep 17 00:00:00 2001 From: maelrx Date: Mon, 13 Apr 2026 15:12:55 -0300 Subject: [PATCH] fix(agent): preserve MiniMax context length on delta-only overflow --- run_agent.py | 23 ++++++++- tests/agent/test_model_metadata.py | 4 ++ tests/run_agent/test_run_agent.py | 83 ++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index 3a26fdead..affcbbd72 100644 --- a/run_agent.py +++ b/run_agent.py @@ -10575,9 +10575,30 @@ class AIAgent: # Error is about the INPUT being too large — reduce context_length. # Try to parse the actual limit from the error message parsed_limit = parse_context_limit_from_error(error_msg) + _provider_lower = (getattr(self, "provider", "") or "").lower() + _base_lower = (getattr(self, "base_url", "") or "").rstrip("/").lower() + is_minimax_provider = ( + _provider_lower in {"minimax", "minimax-cn"} + or _base_lower.startswith(( + "https://api.minimax.io/anthropic", + "https://api.minimaxi.com/anthropic", + )) + ) + minimax_delta_only_overflow = ( + is_minimax_provider + and parsed_limit is None + and "context window exceeds limit (" in error_msg + ) if parsed_limit and parsed_limit < old_ctx: new_ctx = parsed_limit - self._vprint(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True) + self._vprint(f"{self.log_prefix}Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True) + elif minimax_delta_only_overflow: + new_ctx = old_ctx + self._vprint( + f"{self.log_prefix}Provider reported overflow amount only; " + f"keeping context_length at {old_ctx:,} tokens and compressing.", + force=True, + ) else: # Step down to the next probe tier new_ctx = get_next_probe_tier(old_ctx) diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 45e716022..8c5261f48 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -621,6 +621,10 @@ class TestParseContextLimitFromError: msg = "Error: context window of 4096 tokens exceeded" assert parse_context_limit_from_error(msg) == 4096 + def test_minimax_delta_only_message_returns_none(self): + msg = "invalid params, context window exceeds limit (2013)" + assert parse_context_limit_from_error(msg) is None + def test_completely_unrelated_error(self): assert parse_context_limit_from_error("Invalid API key") is None diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 8d5e21f11..d8f33f67c 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2575,6 +2575,89 @@ class TestRunConversation: assert result["final_response"] == "Recovered after compression" assert result["completed"] is True + def test_minimax_delta_overflow_keeps_known_context_length(self, agent): + """MiniMax reports overflow deltas like 'limit (2013)' without the real window. + + Keep the known 204,800-token window and compress instead of probing down + to the generic 128K fallback tier. + """ + self._setup_agent(agent) + agent.provider = "minimax" + agent.model = "MiniMax-M2.7-highspeed" + agent.base_url = "https://api.minimax.io/anthropic" + agent.context_compressor.context_length = 204_800 + agent.context_compressor.threshold_tokens = int( + agent.context_compressor.context_length * agent.context_compressor.threshold_percent + ) + + err_400 = Exception( + "HTTP 400: invalid params, context window exceeds limit (2013)" + ) + err_400.status_code = 400 + ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [err_400, ok_resp] + prefill = [ + {"role": "user", "content": "previous question"}, + {"role": "assistant", "content": "previous answer"}, + ] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + mock_compress.return_value = ( + [{"role": "user", "content": "hello"}], + "compressed system prompt", + ) + result = agent.run_conversation("hello", conversation_history=prefill) + + mock_compress.assert_called_once() + assert agent.context_compressor.context_length == 204_800 + assert agent.context_compressor._context_probed is False + assert result["final_response"] == "Recovered after compression" + assert result["completed"] is True + + def test_non_minimax_delta_overflow_still_probes_down(self, agent): + """Non-MiniMax providers should keep the generic probe-down behavior.""" + self._setup_agent(agent) + agent.provider = "openrouter" + agent.model = "some/unknown-model" + agent.base_url = "https://openrouter.ai/api/v1" + agent.context_compressor.context_length = 200_000 + agent.context_compressor.threshold_tokens = int( + agent.context_compressor.context_length * agent.context_compressor.threshold_percent + ) + + err_400 = Exception( + "HTTP 400: invalid params, context window exceeds limit (2013)" + ) + err_400.status_code = 400 + ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [err_400, ok_resp] + prefill = [ + {"role": "user", "content": "previous question"}, + {"role": "assistant", "content": "previous answer"}, + ] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + mock_compress.return_value = ( + [{"role": "user", "content": "hello"}], + "compressed system prompt", + ) + result = agent.run_conversation("hello", conversation_history=prefill) + + mock_compress.assert_called_once() + assert agent.context_compressor.context_length == 128_000 + assert result["final_response"] == "Recovered after compression" + assert result["completed"] is True + def test_length_finish_reason_requests_continuation(self, agent): """Normal truncation (partial real content) triggers continuation.""" self._setup_agent(agent)