From 4d27bd4a3bb0c6715363dcfe4f6863eebc3df71f Mon Sep 17 00:00:00 2001 From: Edder Talmor Date: Fri, 17 Apr 2026 07:20:08 -0400 Subject: [PATCH] fix: replace empty sentinel with user-friendly fallback --- gateway/run.py | 66 +++++++++++++------ run_agent.py | 20 +++++- tests/gateway/test_empty_response_handling.py | 47 +++++++++++++ tests/run_agent/test_run_agent.py | 27 ++++++-- 4 files changed, 133 insertions(+), 27 deletions(-) create mode 100644 tests/gateway/test_empty_response_handling.py diff --git a/gateway/run.py b/gateway/run.py index ba7ea43ad..82abdb501 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3932,18 +3932,11 @@ class GatewayRunner: pass response = agent_result.get("final_response") or "" + if (not response or response == "(empty)") and ( + agent_result.get("response_is_empty_fallback") or not agent_result.get("failed") + ): + response = self._build_empty_response_message(agent_result) - # Convert the agent's internal "(empty)" sentinel into a - # user-friendly message. "(empty)" means the model failed to - # produce visible content after exhausting all retries (nudge, - # prefill, empty-retry, fallback). Sending the raw sentinel - # looks like a bug; a short explanation is more helpful. - if response == "(empty)": - response = ( - "⚠️ The model returned no response after processing tool " - "results. This can happen with some models — try again or " - "rephrase your question." - ) agent_messages = agent_result.get("messages", []) _response_time = time.time() - _msg_start_time _api_calls = agent_result.get("api_calls", 0) @@ -4248,6 +4241,40 @@ class GatewayRunner: # Restore session context variables to their pre-handler state self._clear_session_env(_session_env_tokens) + @staticmethod + def _build_empty_response_message(agent_result: dict) -> str: + """Return a user-facing fallback when the model produced no visible content.""" + reasoning_text = "" + if isinstance(agent_result, dict): + reasoning_text = ( + agent_result.get("empty_response_reasoning") + or agent_result.get("last_reasoning") + or "" + ) + if reasoning_text: + return ( + "⚠️ The model produced internal reasoning but no visible response " + "after all retries. Try again or rephrase your question." + ) + return ( + "⚠️ The model returned no content after all retries. " + "Try again or rephrase your question." + ) + + @staticmethod + def _is_empty_response_fallback(agent_result: dict, response_text: str = "") -> bool: + """Detect legacy and current empty-response fallbacks. + + Returns True when the response should still be delivered even if the + stream consumer or interim preview already sent other text. This covers + the historical "(empty)" sentinel as well as the newer descriptive + fallback message returned by run_agent. + """ + if isinstance(agent_result, dict) and agent_result.get("response_is_empty_fallback"): + return True + response_text = (response_text or "").strip() + return not response_text or response_text == "(empty)" + def _format_session_info(self) -> str: """Resolve current model config and return a formatted info block. @@ -9430,7 +9457,8 @@ class GatewayRunner: or _previewed ) first_response = result.get("final_response", "") - if first_response and not _already_streamed: + _force_delivery = self._is_empty_response_fallback(result, first_response) + if first_response and (not _already_streamed or _force_delivery): try: logger.info( "Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.", @@ -9545,17 +9573,15 @@ class GatewayRunner: # message is new content the user hasn't seen, and it must reach # them even if streaming had sent earlier partial output. # - # Also never suppress when the final response is "(empty)" — this - # means the model failed to produce content after tool calls (common - # with mimo-v2-pro, GLM-5, etc.). The stream consumer may have - # sent intermediate text ("Let me search for that…") alongside the - # tool call, setting already_sent=True, but that text is NOT the - # final answer. Suppressing delivery here leaves the user staring - # at silence. (#10xxx — "agent stops after web search") + # Also never suppress when the final response is a fallback for an + # empty model reply — this means the model failed to produce visible + # content after tool calls, and the user still needs to receive the + # fallback text even if the stream consumer already sent partial + # narration. _sc = stream_consumer_holder[0] if isinstance(response, dict) and not response.get("failed"): _final = response.get("final_response") or "" - _is_empty_sentinel = not _final or _final == "(empty)" + _is_empty_sentinel = self._is_empty_response_fallback(response, _final) _streamed = bool( _sc and getattr(_sc, "final_response_sent", False) ) diff --git a/run_agent.py b/run_agent.py index 325df9beb..697cecc8d 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2304,6 +2304,18 @@ class AIAgent: return None + def _build_empty_response_message(self, reasoning_text: Optional[str]) -> str: + """Return a user-facing fallback when the model produced no visible content.""" + if reasoning_text: + return ( + "⚠️ The model produced internal reasoning but no visible response " + "after all retries. Try again or rephrase your question." + ) + return ( + "⚠️ The model returned no content after all retries. " + "Try again or rephrase your question." + ) + def _cleanup_task_resources(self, task_id: str) -> None: """Clean up VM and browser resources for a given task. @@ -8314,6 +8326,7 @@ class AIAgent: self._last_content_tools_all_housekeeping = False self._mute_post_response = False self._unicode_sanitization_passes = 0 + self._response_is_empty_fallback = False # Pre-turn connection health check: detect and clean up dead TCP # connections left over from provider outages or dropped streams. @@ -11036,6 +11049,7 @@ class AIAgent: assistant_msg = self._build_assistant_message(assistant_message, finish_reason) assistant_msg["content"] = "(empty)" messages.append(assistant_msg) + self._response_is_empty_fallback = True if reasoning_text: reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text @@ -11046,7 +11060,7 @@ class AIAgent: ) self._emit_status( "⚠️ Model produced reasoning but no visible " - "response after all retries. Returning empty." + "response after all retries. Returning fallback message." ) else: logger.warning( @@ -11062,7 +11076,7 @@ class AIAgent: ". No fallback providers configured.") ) - final_response = "(empty)" + final_response = self._build_empty_response_message(reasoning_text) break # Reset retry counter/signature on successful content @@ -11281,6 +11295,8 @@ class AIAgent: result = { "final_response": final_response, "last_reasoning": last_reasoning, + "empty_response_reasoning": last_reasoning if self._response_is_empty_fallback else None, + "response_is_empty_fallback": self._response_is_empty_fallback, "messages": messages, "api_calls": api_call_count, "completed": completed, diff --git a/tests/gateway/test_empty_response_handling.py b/tests/gateway/test_empty_response_handling.py new file mode 100644 index 000000000..2e683d62e --- /dev/null +++ b/tests/gateway/test_empty_response_handling.py @@ -0,0 +1,47 @@ +"""Tests for empty-response fallback handling in GatewayRunner.""" + +import sys +import types + +import pytest + + +@pytest.fixture(autouse=True) +def _mock_dotenv(monkeypatch): + """gateway.run imports dotenv at module level; stub it for tests.""" + fake = types.ModuleType("dotenv") + fake.load_dotenv = lambda *a, **kw: None + monkeypatch.setitem(sys.modules, "dotenv", fake) + + +@pytest.fixture() +def runner(): + from gateway.run import GatewayRunner + + return GatewayRunner.__new__(GatewayRunner) + + +class TestEmptyResponseFallback: + def test_reasoning_only_message(self, runner): + message = runner._build_empty_response_message({ + "empty_response_reasoning": "structured reasoning answer", + }) + assert message == ( + "⚠️ The model produced internal reasoning but no visible response after all retries. " + "Try again or rephrase your question." + ) + + def test_truly_empty_message(self, runner): + message = runner._build_empty_response_message({}) + assert message == ( + "⚠️ The model returned no content after all retries. " + "Try again or rephrase your question." + ) + + def test_response_fallback_detection_handles_new_and_legacy_forms(self, runner): + assert runner._is_empty_response_fallback( + {"response_is_empty_fallback": True}, + "⚠️ The model returned no content after all retries. Try again or rephrase your question.", + ) + assert runner._is_empty_response_fallback({}, "(empty)") + assert not runner._is_empty_response_fallback({}, "hello world") diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 46eec2cf7..8a42c9c2a 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -237,6 +237,15 @@ def _mock_response( resp.usage = None return resp +EMPTY_REASONING_RESPONSE = ( + "⚠️ The model produced internal reasoning but no visible response after all retries. " + "Try again or rephrase your question." +) +EMPTY_TRULY_EMPTY_RESPONSE = ( + "⚠️ The model returned no content after all retries. " + "Try again or rephrase your question." +) + # =================================================================== # Group 1: Pure Functions @@ -1842,7 +1851,9 @@ class TestRunConversation: mock_compress.assert_not_called() # no compression triggered assert result["completed"] is True - assert result["final_response"] == "(empty)" + assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE + assert result["response_is_empty_fallback"] is True + assert result["empty_response_reasoning"] is None assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries def test_reasoning_only_response_prefill_then_empty(self, agent): @@ -1862,7 +1873,9 @@ class TestRunConversation: ): result = agent.run_conversation("answer me") assert result["completed"] is True - assert result["final_response"] == "(empty)" + assert result["final_response"] == EMPTY_REASONING_RESPONSE + assert result["response_is_empty_fallback"] is True + assert result["empty_response_reasoning"] == "structured reasoning answer" assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries def test_reasoning_only_prefill_succeeds_on_continuation(self, agent): @@ -1909,7 +1922,9 @@ class TestRunConversation: ): result = agent.run_conversation("answer me") assert result["completed"] is True - assert result["final_response"] == "(empty)" + assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE + assert result["response_is_empty_fallback"] is True + assert result["empty_response_reasoning"] is None assert result["api_calls"] == 4 # 1 original + 3 retries def test_truly_empty_response_succeeds_on_nudge(self, agent): @@ -2005,7 +2020,8 @@ class TestRunConversation: ): result = agent.run_conversation("answer me") assert result["completed"] is True - assert result["final_response"] == "(empty)" + assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE + assert result["response_is_empty_fallback"] is True def test_empty_response_emits_status_for_gateway(self, agent): """_emit_status is called during empty retries so gateway users see feedback.""" @@ -2031,7 +2047,8 @@ class TestRunConversation: ): result = agent.run_conversation("answer me") - assert result["final_response"] == "(empty)" + assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE + assert result["response_is_empty_fallback"] is True # Should have emitted retry statuses (3 retries) + final failure retry_msgs = [m for m in status_messages if "retrying" in m.lower()] assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}"