From 4d27bd4a3bb0c6715363dcfe4f6863eebc3df71f Mon Sep 17 00:00:00 2001
From: Edder Talmor <talmoredder@gmail.com>
Date: Fri, 17 Apr 2026 07:20:08 -0400
Subject: [PATCH] fix: replace empty sentinel with user-friendly fallback

---
 gateway/run.py                                | 66 +++++++++++++------
 run_agent.py                                  | 20 +++++-
 tests/gateway/test_empty_response_handling.py | 47 +++++++++++++
 tests/run_agent/test_run_agent.py             | 27 ++++++--
 4 files changed, 133 insertions(+), 27 deletions(-)
 create mode 100644 tests/gateway/test_empty_response_handling.py

diff --git a/gateway/run.py b/gateway/run.py
index ba7ea43ad..82abdb501 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3932,18 +3932,11 @@ class GatewayRunner:
                 pass
 
             response = agent_result.get("final_response") or ""
+            if (not response or response == "(empty)") and (
+                agent_result.get("response_is_empty_fallback") or not agent_result.get("failed")
+            ):
+                response = self._build_empty_response_message(agent_result)
 
-            # Convert the agent's internal "(empty)" sentinel into a
-            # user-friendly message.  "(empty)" means the model failed to
-            # produce visible content after exhausting all retries (nudge,
-            # prefill, empty-retry, fallback).  Sending the raw sentinel
-            # looks like a bug; a short explanation is more helpful.
-            if response == "(empty)":
-                response = (
-                    "⚠️ The model returned no response after processing tool "
-                    "results. This can happen with some models — try again or "
-                    "rephrase your question."
-                )
             agent_messages = agent_result.get("messages", [])
             _response_time = time.time() - _msg_start_time
             _api_calls = agent_result.get("api_calls", 0)
@@ -4248,6 +4241,40 @@ class GatewayRunner:
             # Restore session context variables to their pre-handler state
             self._clear_session_env(_session_env_tokens)
     
+    @staticmethod
+    def _build_empty_response_message(agent_result: dict) -> str:
+        """Return a user-facing fallback when the model produced no visible content."""
+        reasoning_text = ""
+        if isinstance(agent_result, dict):
+            reasoning_text = (
+                agent_result.get("empty_response_reasoning")
+                or agent_result.get("last_reasoning")
+                or ""
+            )
+        if reasoning_text:
+            return (
+                "⚠️ The model produced internal reasoning but no visible response "
+                "after all retries. Try again or rephrase your question."
+            )
+        return (
+            "⚠️ The model returned no content after all retries. "
+            "Try again or rephrase your question."
+        )
+
+    @staticmethod
+    def _is_empty_response_fallback(agent_result: dict, response_text: str = "") -> bool:
+        """Detect legacy and current empty-response fallbacks.
+
+        Returns True when the response should still be delivered even if the
+        stream consumer or interim preview already sent other text. This covers
+        the historical "(empty)" sentinel as well as the newer descriptive
+        fallback message returned by run_agent.
+        """
+        if isinstance(agent_result, dict) and agent_result.get("response_is_empty_fallback"):
+            return True
+        response_text = (response_text or "").strip()
+        return not response_text or response_text == "(empty)"
+
     def _format_session_info(self) -> str:
         """Resolve current model config and return a formatted info block.
 
@@ -9430,7 +9457,8 @@ class GatewayRunner:
                         or _previewed
                     )
                     first_response = result.get("final_response", "")
-                    if first_response and not _already_streamed:
+                    _force_delivery = self._is_empty_response_fallback(result, first_response)
+                    if first_response and (not _already_streamed or _force_delivery):
                         try:
                             logger.info(
                                 "Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
@@ -9545,17 +9573,15 @@ class GatewayRunner:
         # message is new content the user hasn't seen, and it must reach
         # them even if streaming had sent earlier partial output.
         #
-        # Also never suppress when the final response is "(empty)" — this
-        # means the model failed to produce content after tool calls (common
-        # with mimo-v2-pro, GLM-5, etc.).  The stream consumer may have
-        # sent intermediate text ("Let me search for that…") alongside the
-        # tool call, setting already_sent=True, but that text is NOT the
-        # final answer.  Suppressing delivery here leaves the user staring
-        # at silence.  (#10xxx — "agent stops after web search")
+        # Also never suppress when the final response is a fallback for an
+        # empty model reply — this means the model failed to produce visible
+        # content after tool calls, and the user still needs to receive the
+        # fallback text even if the stream consumer already sent partial
+        # narration.
         _sc = stream_consumer_holder[0]
         if isinstance(response, dict) and not response.get("failed"):
             _final = response.get("final_response") or ""
-            _is_empty_sentinel = not _final or _final == "(empty)"
+            _is_empty_sentinel = self._is_empty_response_fallback(response, _final)
             _streamed = bool(
                 _sc and getattr(_sc, "final_response_sent", False)
             )
diff --git a/run_agent.py b/run_agent.py
index 325df9beb..697cecc8d 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2304,6 +2304,18 @@ class AIAgent:
         
         return None
 
+    def _build_empty_response_message(self, reasoning_text: Optional[str]) -> str:
+        """Return a user-facing fallback when the model produced no visible content."""
+        if reasoning_text:
+            return (
+                "⚠️ The model produced internal reasoning but no visible response "
+                "after all retries. Try again or rephrase your question."
+            )
+        return (
+            "⚠️ The model returned no content after all retries. "
+            "Try again or rephrase your question."
+        )
+
     def _cleanup_task_resources(self, task_id: str) -> None:
         """Clean up VM and browser resources for a given task.
 
@@ -8314,6 +8326,7 @@ class AIAgent:
         self._last_content_tools_all_housekeeping = False
         self._mute_post_response = False
         self._unicode_sanitization_passes = 0
+        self._response_is_empty_fallback = False
 
         # Pre-turn connection health check: detect and clean up dead TCP
         # connections left over from provider outages or dropped streams.
@@ -11036,6 +11049,7 @@ class AIAgent:
                         assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
                         assistant_msg["content"] = "(empty)"
                         messages.append(assistant_msg)
+                        self._response_is_empty_fallback = True
 
                         if reasoning_text:
                             reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
@@ -11046,7 +11060,7 @@ class AIAgent:
                             )
                             self._emit_status(
                                 "⚠️ Model produced reasoning but no visible "
-                                "response after all retries. Returning empty."
+                                "response after all retries. Returning fallback message."
                             )
                         else:
                             logger.warning(
@@ -11062,7 +11076,7 @@ class AIAgent:
                                    ". No fallback providers configured.")
                             )
 
-                        final_response = "(empty)"
+                        final_response = self._build_empty_response_message(reasoning_text)
                         break
                     
                     # Reset retry counter/signature on successful content
@@ -11281,6 +11295,8 @@ class AIAgent:
         result = {
             "final_response": final_response,
             "last_reasoning": last_reasoning,
+            "empty_response_reasoning": last_reasoning if self._response_is_empty_fallback else None,
+            "response_is_empty_fallback": self._response_is_empty_fallback,
             "messages": messages,
             "api_calls": api_call_count,
             "completed": completed,
diff --git a/tests/gateway/test_empty_response_handling.py b/tests/gateway/test_empty_response_handling.py
new file mode 100644
index 000000000..2e683d62e
--- /dev/null
+++ b/tests/gateway/test_empty_response_handling.py
@@ -0,0 +1,47 @@
+"""Tests for empty-response fallback handling in GatewayRunner."""
+
+import sys
+import types
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _mock_dotenv(monkeypatch):
+    """gateway.run imports dotenv at module level; stub it for tests."""
+    fake = types.ModuleType("dotenv")
+    fake.load_dotenv = lambda *a, **kw: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake)
+
+
+@pytest.fixture()
+def runner():
+    from gateway.run import GatewayRunner
+
+    return GatewayRunner.__new__(GatewayRunner)
+
+
+class TestEmptyResponseFallback:
+    def test_reasoning_only_message(self, runner):
+        message = runner._build_empty_response_message({
+            "empty_response_reasoning": "structured reasoning answer",
+        })
+        assert message == (
+            "⚠️ The model produced internal reasoning but no visible response after all retries. "
+            "Try again or rephrase your question."
+        )
+
+    def test_truly_empty_message(self, runner):
+        message = runner._build_empty_response_message({})
+        assert message == (
+            "⚠️ The model returned no content after all retries. "
+            "Try again or rephrase your question."
+        )
+
+    def test_response_fallback_detection_handles_new_and_legacy_forms(self, runner):
+        assert runner._is_empty_response_fallback(
+            {"response_is_empty_fallback": True},
+            "⚠️ The model returned no content after all retries. Try again or rephrase your question.",
+        )
+        assert runner._is_empty_response_fallback({}, "(empty)")
+        assert not runner._is_empty_response_fallback({}, "hello world")
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 46eec2cf7..8a42c9c2a 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -237,6 +237,15 @@ def _mock_response(
         resp.usage = None
     return resp
 
+EMPTY_REASONING_RESPONSE = (
+    "⚠️ The model produced internal reasoning but no visible response after all retries. "
+    "Try again or rephrase your question."
+)
+EMPTY_TRULY_EMPTY_RESPONSE = (
+    "⚠️ The model returned no content after all retries. "
+    "Try again or rephrase your question."
+)
+
 
 # ===================================================================
 # Group 1: Pure Functions
@@ -1842,7 +1851,9 @@ class TestRunConversation:
 
         mock_compress.assert_not_called()  # no compression triggered
         assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
+        assert result["response_is_empty_fallback"] is True
+        assert result["empty_response_reasoning"] is None
         assert result["api_calls"] == 6  # 1 original + 2 prefill + 3 retries
 
     def test_reasoning_only_response_prefill_then_empty(self, agent):
@@ -1862,7 +1873,9 @@ class TestRunConversation:
         ):
             result = agent.run_conversation("answer me")
         assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        assert result["final_response"] == EMPTY_REASONING_RESPONSE
+        assert result["response_is_empty_fallback"] is True
+        assert result["empty_response_reasoning"] == "structured reasoning answer"
         assert result["api_calls"] == 6  # 1 original + 2 prefill + 3 retries
 
     def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
@@ -1909,7 +1922,9 @@ class TestRunConversation:
         ):
             result = agent.run_conversation("answer me")
         assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
+        assert result["response_is_empty_fallback"] is True
+        assert result["empty_response_reasoning"] is None
         assert result["api_calls"] == 4  # 1 original + 3 retries
 
     def test_truly_empty_response_succeeds_on_nudge(self, agent):
@@ -2005,7 +2020,8 @@ class TestRunConversation:
         ):
             result = agent.run_conversation("answer me")
         assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
+        assert result["response_is_empty_fallback"] is True
 
     def test_empty_response_emits_status_for_gateway(self, agent):
         """_emit_status is called during empty retries so gateway users see feedback."""
@@ -2031,7 +2047,8 @@ class TestRunConversation:
         ):
             result = agent.run_conversation("answer me")
 
-        assert result["final_response"] == "(empty)"
+        assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
+        assert result["response_is_empty_fallback"] is True
         # Should have emitted retry statuses (3 retries) + final failure
         retry_msgs = [m for m in status_messages if "retrying" in m.lower()]
         assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}"