diff --git a/run_agent.py b/run_agent.py index cf418a576..7ac077d78 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9459,7 +9459,8 @@ class AIAgent: fallback = getattr(self, '_last_content_with_tools', None) if fallback: _turn_exit_reason = "fallback_prior_turn_content" - logger.debug("Empty follow-up after tool calls — using prior turn content as final response") + logger.info("Empty follow-up after tool calls — using prior turn content as final response") + self._emit_status("↻ Empty response after tool calls — using earlier content as final answer") self._last_content_with_tools = None self._empty_content_retries = 0 for i in range(len(messages) - 1, -1, -1): @@ -9490,9 +9491,13 @@ class AIAgent: ) if _has_structured and self._thinking_prefill_retries < 2: self._thinking_prefill_retries += 1 - self._vprint( - f"{self.log_prefix}↻ Thinking-only response — " - f"prefilling to continue " + logger.info( + "Thinking-only response (no visible content) — " + "prefilling to continue (%d/2)", + self._thinking_prefill_retries, + ) + self._emit_status( + f"↻ Thinking-only response — prefilling to continue " f"({self._thinking_prefill_retries}/2)" ) interim_msg = self._build_assistant_message( @@ -9508,23 +9513,57 @@ class AIAgent: # Model returned nothing — no content, no # structured reasoning, no tool calls. Common # with open models (transient provider issues, - # rate limits, sampling flukes). Silently retry - # up to 3 times before giving up. Skip when + # rate limits, sampling flukes). Retry up to 3 + # times before attempting fallback. Skip when # content has inline tags (model chose # to reason, just no visible text). _truly_empty = not final_response.strip() if _truly_empty and not _has_structured and self._empty_content_retries < 3: self._empty_content_retries += 1 - self._vprint( - f"{self.log_prefix}↻ Empty response (no content or reasoning) " - f"— retrying ({self._empty_content_retries}/3)", - force=True, + logger.warning( + "Empty response (no content or reasoning) — " + "retry %d/3 (model=%s)", + self._empty_content_retries, self.model, + ) + self._emit_status( + f"⚠️ Empty response from model — retrying " + f"({self._empty_content_retries}/3)" ) continue - # Exhausted prefill attempts, empty retries, or - # structured reasoning with no content — - # fall through to "(empty)" terminal. + # ── Exhausted retries — try fallback provider ── + # Before giving up with "(empty)", attempt to + # switch to the next provider in the fallback + # chain. This covers the case where a model + # (e.g. GLM-4.5-Air) consistently returns empty + # due to context degradation or provider issues. + if _truly_empty and self._fallback_chain: + logger.warning( + "Empty response after %d retries — " + "attempting fallback (model=%s, provider=%s)", + self._empty_content_retries, self.model, + self.provider, + ) + self._emit_status( + "⚠️ Model returning empty responses — " + "switching to fallback provider..." + ) + if self._try_activate_fallback(): + self._empty_content_retries = 0 + self._emit_status( + f"↻ Switched to fallback: {self.model} " + f"({self.provider})" + ) + logger.info( + "Fallback activated after empty responses: " + "now using %s on %s", + self.model, self.provider, + ) + continue + + # Exhausted retries and fallback chain (or no + # fallback configured). Fall through to the + # "(empty)" terminal. _turn_exit_reason = "empty_response_exhausted" reasoning_text = self._extract_reasoning(assistant_message) assistant_msg = self._build_assistant_message(assistant_message, finish_reason) @@ -9533,9 +9572,28 @@ class AIAgent: if reasoning_text: reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text - self._vprint(f"{self.log_prefix}ℹ️ Reasoning-only response (no visible content). Reasoning: {reasoning_preview}") + logger.warning( + "Reasoning-only response (no visible content) " + "after exhausting retries and fallback. " + "Reasoning: %s", reasoning_preview, + ) + self._emit_status( + "⚠️ Model produced reasoning but no visible " + "response after all retries. Returning empty." + ) else: - self._vprint(f"{self.log_prefix}ℹ️ Empty response (no content or reasoning) after 3 retries.") + logger.warning( + "Empty response (no content or reasoning) " + "after %d retries. No fallback available. " + "model=%s provider=%s", + self._empty_content_retries, self.model, + self.provider, + ) + self._emit_status( + "❌ Model returned no content after all retries" + + (" and fallback attempts." if self._fallback_chain else + ". No fallback providers configured.") + ) final_response = "(empty)" break diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index d88409a7a..58e67070c 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1823,6 +1823,111 @@ class TestRunConversation: assert result["final_response"] == "Here is the actual answer." assert result["api_calls"] == 2 # 1 original + 1 nudge retry + def test_empty_response_triggers_fallback_provider(self, agent): + """After 3 empty retries, fallback provider is activated and produces content.""" + self._setup_agent(agent) + agent.base_url = "http://127.0.0.1:1234/v1" + # Configure a fallback chain + agent._fallback_chain = [{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}] + agent._fallback_index = 0 + agent._fallback_activated = False + + empty_resp = _mock_response(content=None, finish_reason="stop") + content_resp = _mock_response(content="Fallback answer.", finish_reason="stop") + # 4 empty (1 orig + 3 retries), then fallback model answers + agent.client.chat.completions.create.side_effect = [ + empty_resp, empty_resp, empty_resp, empty_resp, content_resp, + ] + + fallback_called = {"called": False} + + def _mock_fallback(): + fallback_called["called"] = True + # Simulate what _try_activate_fallback does: just advance the + # index and set the flag (the client is already mocked). + agent._fallback_index = 1 + agent._fallback_activated = True + agent.model = "anthropic/claude-sonnet-4" + agent.provider = "openrouter" + return True + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + patch.object(agent, "_try_activate_fallback", side_effect=_mock_fallback), + ): + result = agent.run_conversation("answer me") + assert fallback_called["called"], "Fallback should have been triggered" + assert result["completed"] is True + assert result["final_response"] == "Fallback answer." + + def test_empty_response_fallback_also_empty_returns_empty(self, agent): + """If fallback also returns empty, final response is (empty).""" + self._setup_agent(agent) + agent.base_url = "http://127.0.0.1:1234/v1" + agent._fallback_chain = [{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}] + agent._fallback_index = 0 + agent._fallback_activated = False + + empty_resp = _mock_response(content=None, finish_reason="stop") + # 4 empty from primary (1 + 3 retries), fallback activated, + # then 4 more empty from fallback (1 + 3 retries), no more fallbacks + agent.client.chat.completions.create.side_effect = [ + empty_resp, empty_resp, empty_resp, empty_resp, # primary exhausted + empty_resp, empty_resp, empty_resp, empty_resp, # fallback exhausted + ] + + def _mock_fallback(): + if agent._fallback_index >= len(agent._fallback_chain): + return False + agent._fallback_index += 1 + agent._fallback_activated = True + agent.model = "anthropic/claude-sonnet-4" + agent.provider = "openrouter" + return True + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + patch.object(agent, "_try_activate_fallback", side_effect=_mock_fallback), + ): + result = agent.run_conversation("answer me") + assert result["completed"] is True + assert result["final_response"] == "(empty)" + + def test_empty_response_emits_status_for_gateway(self, agent): + """_emit_status is called during empty retries so gateway users see feedback.""" + self._setup_agent(agent) + agent.base_url = "http://127.0.0.1:1234/v1" + + empty_resp = _mock_response(content=None, finish_reason="stop") + # 4 empty: 1 original + 3 retries, all empty, no fallback + agent.client.chat.completions.create.side_effect = [ + empty_resp, empty_resp, empty_resp, empty_resp, + ] + + status_messages = [] + + def _capture_status(msg): + status_messages.append(msg) + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + patch.object(agent, "_emit_status", side_effect=_capture_status), + ): + result = agent.run_conversation("answer me") + + assert result["final_response"] == "(empty)" + # Should have emitted retry statuses (3 retries) + final failure + retry_msgs = [m for m in status_messages if "retrying" in m.lower()] + assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}" + failure_msgs = [m for m in status_messages if "no content" in m.lower() or "no fallback" in m.lower()] + assert len(failure_msgs) >= 1, f"Expected at least 1 failure status, got: {status_messages}" + def test_nous_401_refreshes_after_remint_and_retries(self, agent): self._setup_agent(agent) agent.provider = "nous"