Merge 4d27bd4a3b into 05d8f11085

2026-04-25 00:51:20 +00:00 · 2026-04-24 19:26:50 -05:00 · 2026-04-24 19:26:50 -05:00 · ec99f88a3d
commit ec99f88a3d
parent 05d8f11085 4d27bd4a3b
4 changed files with 133 additions and 27 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -4607,18 +4607,11 @@ class GatewayRunner:
                return None

            response = agent_result.get("final_response") or ""
+            if (not response or response == "(empty)") and (
+                agent_result.get("response_is_empty_fallback") or not agent_result.get("failed")
+            ):
+                response = self._build_empty_response_message(agent_result)

-            # Convert the agent's internal "(empty)" sentinel into a
-            # user-friendly message.  "(empty)" means the model failed to
-            # produce visible content after exhausting all retries (nudge,
-            # prefill, empty-retry, fallback).  Sending the raw sentinel
-            # looks like a bug; a short explanation is more helpful.
-            if response == "(empty)":
-                response = (
-                    "⚠️ The model returned no response after processing tool "
-                    "results. This can happen with some models — try again or "
-                    "rephrase your question."
-                )
            agent_messages = agent_result.get("messages", [])
            _response_time = time.time() - _msg_start_time
            _api_calls = agent_result.get("api_calls", 0)
@ -4935,6 +4928,40 @@ class GatewayRunner:
            # Restore session context variables to their pre-handler state
            self._clear_session_env(_session_env_tokens)
    
+    @staticmethod
+    def _build_empty_response_message(agent_result: dict) -> str:
+        """Return a user-facing fallback when the model produced no visible content."""
+        reasoning_text = ""
+        if isinstance(agent_result, dict):
+            reasoning_text = (
+                agent_result.get("empty_response_reasoning")
+                or agent_result.get("last_reasoning")
+                or ""
+            )
+        if reasoning_text:
+            return (
+                "⚠️ The model produced internal reasoning but no visible response "
+                "after all retries. Try again or rephrase your question."
+            )
+        return (
+            "⚠️ The model returned no content after all retries. "
+            "Try again or rephrase your question."
+        )
+
+    @staticmethod
+    def _is_empty_response_fallback(agent_result: dict, response_text: str = "") -> bool:
+        """Detect legacy and current empty-response fallbacks.
+
+        Returns True when the response should still be delivered even if the
+        stream consumer or interim preview already sent other text. This covers
+        the historical "(empty)" sentinel as well as the newer descriptive
+        fallback message returned by run_agent.
+        """
+        if isinstance(agent_result, dict) and agent_result.get("response_is_empty_fallback"):
+            return True
+        response_text = (response_text or "").strip()
+        return not response_text or response_text == "(empty)"
+
    def _format_session_info(self) -> str:
        """Resolve current model config and return a formatted info block.

@ -10728,7 +10755,8 @@ class GatewayRunner:
                        or _previewed
                    )
                    first_response = result.get("final_response", "")
-                    if first_response and not _already_streamed:
+                    _force_delivery = self._is_empty_response_fallback(result, first_response)
+                    if first_response and (not _already_streamed or _force_delivery):
                        try:
                            logger.info(
                                "Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
@ -10859,17 +10887,15 @@ class GatewayRunner:
        # message is new content the user hasn't seen, and it must reach
        # them even if streaming had sent earlier partial output.
        #
-        # Also never suppress when the final response is "(empty)" — this
-        # means the model failed to produce content after tool calls (common
-        # with mimo-v2-pro, GLM-5, etc.).  The stream consumer may have
-        # sent intermediate text ("Let me search for that…") alongside the
-        # tool call, setting already_sent=True, but that text is NOT the
-        # final answer.  Suppressing delivery here leaves the user staring
-        # at silence.  (#10xxx — "agent stops after web search")
+        # Also never suppress when the final response is a fallback for an
+        # empty model reply — this means the model failed to produce visible
+        # content after tool calls, and the user still needs to receive the
+        # fallback text even if the stream consumer already sent partial
+        # narration.
        _sc = stream_consumer_holder[0]
        if isinstance(response, dict) and not response.get("failed"):
            _final = response.get("final_response") or ""
-            _is_empty_sentinel = not _final or _final == "(empty)"
+            _is_empty_sentinel = self._is_empty_response_fallback(response, _final)
            _streamed = bool(
                _sc and getattr(_sc, "final_response_sent", False)
            )
--- a/run_agent.py
+++ b/run_agent.py
@ -2989,6 +2989,18 @@ class AIAgent:
        
        return None

+    def _build_empty_response_message(self, reasoning_text: Optional[str]) -> str:
+        """Return a user-facing fallback when the model produced no visible content."""
+        if reasoning_text:
+            return (
+                "⚠️ The model produced internal reasoning but no visible response "
+                "after all retries. Try again or rephrase your question."
+            )
+        return (
+            "⚠️ The model returned no content after all retries. "
+            "Try again or rephrase your question."
+        )
+
    def _cleanup_task_resources(self, task_id: str) -> None:
        """Clean up VM and browser resources for a given task.

@ -9362,6 +9374,7 @@ class AIAgent:
        self._last_content_tools_all_housekeeping = False
        self._mute_post_response = False
        self._unicode_sanitization_passes = 0
+        self._response_is_empty_fallback = False

        # Pre-turn connection health check: detect and clean up dead TCP
        # connections left over from provider outages or dropped streams.
@ -12309,6 +12322,7 @@ class AIAgent:
                        assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
                        assistant_msg["content"] = "(empty)"
                        messages.append(assistant_msg)
+                        self._response_is_empty_fallback = True

                        if reasoning_text:
                            reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
@ -12319,7 +12333,7 @@ class AIAgent:
                            )
                            self._emit_status(
                                "⚠️ Model produced reasoning but no visible "
-                                "response after all retries. Returning empty."
+                                "response after all retries. Returning fallback message."
                            )
                        else:
                            logger.warning(
@ -12335,7 +12349,7 @@ class AIAgent:
                                   ". No fallback providers configured.")
                            )

-                        final_response = "(empty)"
+                        final_response = self._build_empty_response_message(reasoning_text)
                        break
                    
                    # Reset retry counter/signature on successful content
@ -12555,6 +12569,8 @@ class AIAgent:
        result = {
            "final_response": final_response,
            "last_reasoning": last_reasoning,
+            "empty_response_reasoning": last_reasoning if self._response_is_empty_fallback else None,
+            "response_is_empty_fallback": self._response_is_empty_fallback,
            "messages": messages,
            "api_calls": api_call_count,
            "completed": completed,
--- a/tests/gateway/test_empty_response_handling.py
+++ b/tests/gateway/test_empty_response_handling.py
@ -0,0 +1,47 @@
+"""Tests for empty-response fallback handling in GatewayRunner."""
+
+import sys
+import types
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _mock_dotenv(monkeypatch):
+    """gateway.run imports dotenv at module level; stub it for tests."""
+    fake = types.ModuleType("dotenv")
+    fake.load_dotenv = lambda *a, **kw: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake)
+
+
+@pytest.fixture()
+def runner():
+    from gateway.run import GatewayRunner
+
+    return GatewayRunner.__new__(GatewayRunner)
+
+
+class TestEmptyResponseFallback:
+    def test_reasoning_only_message(self, runner):
+        message = runner._build_empty_response_message({
+            "empty_response_reasoning": "structured reasoning answer",
+        })
+        assert message == (
+            "⚠️ The model produced internal reasoning but no visible response after all retries. "
+            "Try again or rephrase your question."
+        )
+
+    def test_truly_empty_message(self, runner):
+        message = runner._build_empty_response_message({})
+        assert message == (
+            "⚠️ The model returned no content after all retries. "
+            "Try again or rephrase your question."
+        )
+
+    def test_response_fallback_detection_handles_new_and_legacy_forms(self, runner):
+        assert runner._is_empty_response_fallback(
+            {"response_is_empty_fallback": True},
+            "⚠️ The model returned no content after all retries. Try again or rephrase your question.",
+        )
+        assert runner._is_empty_response_fallback({}, "(empty)")
+        assert not runner._is_empty_response_fallback({}, "hello world")
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -250,6 +250,15 @@ def _mock_response(
        resp.usage = None
    return resp

+EMPTY_REASONING_RESPONSE = (
+    "⚠️ The model produced internal reasoning but no visible response after all retries. "
+    "Try again or rephrase your question."
+)
+EMPTY_TRULY_EMPTY_RESPONSE = (
+    "⚠️ The model returned no content after all retries. "
+    "Try again or rephrase your question."
+)
+

 # ===================================================================
 # Group 1: Pure Functions
@ -2256,7 +2265,9 @@ class TestRunConversation:

        mock_compress.assert_not_called()  # no compression triggered
        assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
+        assert result["response_is_empty_fallback"] is True
+        assert result["empty_response_reasoning"] is None
        assert result["api_calls"] == 6  # 1 original + 2 prefill + 3 retries

    def test_reasoning_only_response_prefill_then_empty(self, agent):
@ -2276,7 +2287,9 @@ class TestRunConversation:
        ):
            result = agent.run_conversation("answer me")
        assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        assert result["final_response"] == EMPTY_REASONING_RESPONSE
+        assert result["response_is_empty_fallback"] is True
+        assert result["empty_response_reasoning"] == "structured reasoning answer"
        assert result["api_calls"] == 6  # 1 original + 2 prefill + 3 retries

    def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
@ -2323,7 +2336,9 @@ class TestRunConversation:
        ):
            result = agent.run_conversation("answer me")
        assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
+        assert result["response_is_empty_fallback"] is True
+        assert result["empty_response_reasoning"] is None
        assert result["api_calls"] == 4  # 1 original + 3 retries

    def test_truly_empty_response_succeeds_on_nudge(self, agent):
@ -2419,7 +2434,8 @@ class TestRunConversation:
        ):
            result = agent.run_conversation("answer me")
        assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
+        assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
+        assert result["response_is_empty_fallback"] is True

    def test_empty_response_emits_status_for_gateway(self, agent):
        """_emit_status is called during empty retries so gateway users see feedback."""
@ -2445,7 +2461,8 @@ class TestRunConversation:
        ):
            result = agent.run_conversation("answer me")

-        assert result["final_response"] == "(empty)"
+        assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
+        assert result["response_is_empty_fallback"] is True
        # Should have emitted retry statuses (3 retries) + final failure
        retry_msgs = [m for m in status_messages if "retrying" in m.lower()]
        assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}"