diff --git a/run_agent.py b/run_agent.py index 360ef0517..4c0d3be4b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9736,12 +9736,25 @@ class AIAgent: # Pop thinking-only prefill message(s) before appending # (tool-call path — same rationale as the final-response path). + _had_prefill = False while ( messages and isinstance(messages[-1], dict) and messages[-1].get("_thinking_prefill") ): messages.pop() + _had_prefill = True + + # Reset prefill counter when tool calls follow a prefill + # recovery. Without this, the counter accumulates across + # the whole conversation — a model that intermittently + # empties (empty → prefill → tools → empty → prefill → + # tools) burns both prefill attempts and the third empty + # gets zero recovery. Resetting here treats each tool- + # call success as a fresh start. + if _had_prefill: + self._thinking_prefill_retries = 0 + self._empty_content_retries = 0 messages.append(assistant_msg) self._emit_interim_assistant_message(assistant_msg) @@ -9917,16 +9930,23 @@ class AIAgent: self._save_session_log(messages) continue - # ── Empty response retry (no reasoning) ────── - # Model returned nothing — no content, no - # structured reasoning, no tool calls. Common - # with open models (transient provider issues, - # rate limits, sampling flukes). Retry up to 3 - # times before attempting fallback. Skip when - # content has inline tags (model chose - # to reason, just no visible text). - _truly_empty = not final_response.strip() - if _truly_empty and not _has_structured and self._empty_content_retries < 3: + # ── Empty response retry ────────────────────── + # Model returned nothing usable. Retry up to 3 + # times before attempting fallback. This covers + # both truly empty responses (no content, no + # reasoning) AND reasoning-only responses after + # prefill exhaustion — models like mimo-v2-pro + # always populate reasoning fields via OpenRouter, + # so the old `not _has_structured` guard blocked + # retries for every reasoning model after prefill. + _truly_empty = not self._strip_think_blocks( + final_response + ).strip() + _prefill_exhausted = ( + _has_structured + and self._thinking_prefill_retries >= 2 + ) + if _truly_empty and (not _has_structured or _prefill_exhausted) and self._empty_content_retries < 3: self._empty_content_retries += 1 logger.warning( "Empty response (no content or reasoning) — " diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index e4ae10f20..2112ddc3f 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1741,9 +1741,9 @@ class TestRunConversation: {"role": "assistant", "content": "old answer"}, ] - # 3 responses: original + 2 prefill continuations (structured reasoning triggers prefill) + # 6 responses: original + 2 prefill + 3 retries after prefill exhaustion with ( - patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, empty_resp, empty_resp]), + patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp] * 6), patch.object(agent, "_compress_context") as mock_compress, patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), @@ -1754,18 +1754,18 @@ class TestRunConversation: mock_compress.assert_not_called() # no compression triggered assert result["completed"] is True assert result["final_response"] == "(empty)" - assert result["api_calls"] == 3 # 1 original + 2 prefill continuations + assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries def test_reasoning_only_response_prefill_then_empty(self, agent): - """Structured reasoning-only triggers prefill continuation (up to 2), then falls through to (empty).""" + """Structured reasoning-only triggers prefill (2), then retries (3), then (empty).""" self._setup_agent(agent) empty_resp = _mock_response( content=None, finish_reason="stop", reasoning_content="structured reasoning answer", ) - # 3 responses: original + 2 prefill continuations, all reasoning-only - agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp] + # 6 responses: 1 original + 2 prefill + 3 retries after prefill exhaustion + agent.client.chat.completions.create.side_effect = [empty_resp] * 6 with ( patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), @@ -1774,7 +1774,7 @@ class TestRunConversation: result = agent.run_conversation("answer me") assert result["completed"] is True assert result["final_response"] == "(empty)" - assert result["api_calls"] == 3 # 1 original + 2 prefill continuations + assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries def test_reasoning_only_prefill_succeeds_on_continuation(self, agent): """When prefill continuation produces content, it becomes the final response."""