This commit is contained in:
Edder Talmor 2026-04-24 19:26:50 -05:00 committed by GitHub
commit ec99f88a3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 133 additions and 27 deletions

View file

@ -4607,18 +4607,11 @@ class GatewayRunner:
return None
response = agent_result.get("final_response") or ""
if (not response or response == "(empty)") and (
agent_result.get("response_is_empty_fallback") or not agent_result.get("failed")
):
response = self._build_empty_response_message(agent_result)
# Convert the agent's internal "(empty)" sentinel into a
# user-friendly message. "(empty)" means the model failed to
# produce visible content after exhausting all retries (nudge,
# prefill, empty-retry, fallback). Sending the raw sentinel
# looks like a bug; a short explanation is more helpful.
if response == "(empty)":
response = (
"⚠️ The model returned no response after processing tool "
"results. This can happen with some models — try again or "
"rephrase your question."
)
agent_messages = agent_result.get("messages", [])
_response_time = time.time() - _msg_start_time
_api_calls = agent_result.get("api_calls", 0)
@ -4935,6 +4928,40 @@ class GatewayRunner:
# Restore session context variables to their pre-handler state
self._clear_session_env(_session_env_tokens)
@staticmethod
def _build_empty_response_message(agent_result: dict) -> str:
"""Return a user-facing fallback when the model produced no visible content."""
reasoning_text = ""
if isinstance(agent_result, dict):
reasoning_text = (
agent_result.get("empty_response_reasoning")
or agent_result.get("last_reasoning")
or ""
)
if reasoning_text:
return (
"⚠️ The model produced internal reasoning but no visible response "
"after all retries. Try again or rephrase your question."
)
return (
"⚠️ The model returned no content after all retries. "
"Try again or rephrase your question."
)
@staticmethod
def _is_empty_response_fallback(agent_result: dict, response_text: str = "") -> bool:
"""Detect legacy and current empty-response fallbacks.
Returns True when the response should still be delivered even if the
stream consumer or interim preview already sent other text. This covers
the historical "(empty)" sentinel as well as the newer descriptive
fallback message returned by run_agent.
"""
if isinstance(agent_result, dict) and agent_result.get("response_is_empty_fallback"):
return True
response_text = (response_text or "").strip()
return not response_text or response_text == "(empty)"
def _format_session_info(self) -> str:
"""Resolve current model config and return a formatted info block.
@ -10728,7 +10755,8 @@ class GatewayRunner:
or _previewed
)
first_response = result.get("final_response", "")
if first_response and not _already_streamed:
_force_delivery = self._is_empty_response_fallback(result, first_response)
if first_response and (not _already_streamed or _force_delivery):
try:
logger.info(
"Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
@ -10859,17 +10887,15 @@ class GatewayRunner:
# message is new content the user hasn't seen, and it must reach
# them even if streaming had sent earlier partial output.
#
# Also never suppress when the final response is "(empty)" — this
# means the model failed to produce content after tool calls (common
# with mimo-v2-pro, GLM-5, etc.). The stream consumer may have
# sent intermediate text ("Let me search for that…") alongside the
# tool call, setting already_sent=True, but that text is NOT the
# final answer. Suppressing delivery here leaves the user staring
# at silence. (#10xxx — "agent stops after web search")
# Also never suppress when the final response is a fallback for an
# empty model reply — this means the model failed to produce visible
# content after tool calls, and the user still needs to receive the
# fallback text even if the stream consumer already sent partial
# narration.
_sc = stream_consumer_holder[0]
if isinstance(response, dict) and not response.get("failed"):
_final = response.get("final_response") or ""
_is_empty_sentinel = not _final or _final == "(empty)"
_is_empty_sentinel = self._is_empty_response_fallback(response, _final)
_streamed = bool(
_sc and getattr(_sc, "final_response_sent", False)
)

View file

@ -2989,6 +2989,18 @@ class AIAgent:
return None
def _build_empty_response_message(self, reasoning_text: Optional[str]) -> str:
"""Return a user-facing fallback when the model produced no visible content."""
if reasoning_text:
return (
"⚠️ The model produced internal reasoning but no visible response "
"after all retries. Try again or rephrase your question."
)
return (
"⚠️ The model returned no content after all retries. "
"Try again or rephrase your question."
)
def _cleanup_task_resources(self, task_id: str) -> None:
"""Clean up VM and browser resources for a given task.
@ -9362,6 +9374,7 @@ class AIAgent:
self._last_content_tools_all_housekeeping = False
self._mute_post_response = False
self._unicode_sanitization_passes = 0
self._response_is_empty_fallback = False
# Pre-turn connection health check: detect and clean up dead TCP
# connections left over from provider outages or dropped streams.
@ -12309,6 +12322,7 @@ class AIAgent:
assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
assistant_msg["content"] = "(empty)"
messages.append(assistant_msg)
self._response_is_empty_fallback = True
if reasoning_text:
reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
@ -12319,7 +12333,7 @@ class AIAgent:
)
self._emit_status(
"⚠️ Model produced reasoning but no visible "
"response after all retries. Returning empty."
"response after all retries. Returning fallback message."
)
else:
logger.warning(
@ -12335,7 +12349,7 @@ class AIAgent:
". No fallback providers configured.")
)
final_response = "(empty)"
final_response = self._build_empty_response_message(reasoning_text)
break
# Reset retry counter/signature on successful content
@ -12555,6 +12569,8 @@ class AIAgent:
result = {
"final_response": final_response,
"last_reasoning": last_reasoning,
"empty_response_reasoning": last_reasoning if self._response_is_empty_fallback else None,
"response_is_empty_fallback": self._response_is_empty_fallback,
"messages": messages,
"api_calls": api_call_count,
"completed": completed,

View file

@ -0,0 +1,47 @@
"""Tests for empty-response fallback handling in GatewayRunner."""
import sys
import types
import pytest
@pytest.fixture(autouse=True)
def _mock_dotenv(monkeypatch):
"""gateway.run imports dotenv at module level; stub it for tests."""
fake = types.ModuleType("dotenv")
fake.load_dotenv = lambda *a, **kw: None
monkeypatch.setitem(sys.modules, "dotenv", fake)
@pytest.fixture()
def runner():
from gateway.run import GatewayRunner
return GatewayRunner.__new__(GatewayRunner)
class TestEmptyResponseFallback:
def test_reasoning_only_message(self, runner):
message = runner._build_empty_response_message({
"empty_response_reasoning": "structured reasoning answer",
})
assert message == (
"⚠️ The model produced internal reasoning but no visible response after all retries. "
"Try again or rephrase your question."
)
def test_truly_empty_message(self, runner):
message = runner._build_empty_response_message({})
assert message == (
"⚠️ The model returned no content after all retries. "
"Try again or rephrase your question."
)
def test_response_fallback_detection_handles_new_and_legacy_forms(self, runner):
assert runner._is_empty_response_fallback(
{"response_is_empty_fallback": True},
"⚠️ The model returned no content after all retries. Try again or rephrase your question.",
)
assert runner._is_empty_response_fallback({}, "(empty)")
assert not runner._is_empty_response_fallback({}, "hello world")

View file

@ -250,6 +250,15 @@ def _mock_response(
resp.usage = None
return resp
EMPTY_REASONING_RESPONSE = (
"⚠️ The model produced internal reasoning but no visible response after all retries. "
"Try again or rephrase your question."
)
EMPTY_TRULY_EMPTY_RESPONSE = (
"⚠️ The model returned no content after all retries. "
"Try again or rephrase your question."
)
# ===================================================================
# Group 1: Pure Functions
@ -2256,7 +2265,9 @@ class TestRunConversation:
mock_compress.assert_not_called() # no compression triggered
assert result["completed"] is True
assert result["final_response"] == "(empty)"
assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
assert result["response_is_empty_fallback"] is True
assert result["empty_response_reasoning"] is None
assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries
def test_reasoning_only_response_prefill_then_empty(self, agent):
@ -2276,7 +2287,9 @@ class TestRunConversation:
):
result = agent.run_conversation("answer me")
assert result["completed"] is True
assert result["final_response"] == "(empty)"
assert result["final_response"] == EMPTY_REASONING_RESPONSE
assert result["response_is_empty_fallback"] is True
assert result["empty_response_reasoning"] == "structured reasoning answer"
assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries
def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
@ -2323,7 +2336,9 @@ class TestRunConversation:
):
result = agent.run_conversation("answer me")
assert result["completed"] is True
assert result["final_response"] == "(empty)"
assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
assert result["response_is_empty_fallback"] is True
assert result["empty_response_reasoning"] is None
assert result["api_calls"] == 4 # 1 original + 3 retries
def test_truly_empty_response_succeeds_on_nudge(self, agent):
@ -2419,7 +2434,8 @@ class TestRunConversation:
):
result = agent.run_conversation("answer me")
assert result["completed"] is True
assert result["final_response"] == "(empty)"
assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
assert result["response_is_empty_fallback"] is True
def test_empty_response_emits_status_for_gateway(self, agent):
"""_emit_status is called during empty retries so gateway users see feedback."""
@ -2445,7 +2461,8 @@ class TestRunConversation:
):
result = agent.run_conversation("answer me")
assert result["final_response"] == "(empty)"
assert result["final_response"] == EMPTY_TRULY_EMPTY_RESPONSE
assert result["response_is_empty_fallback"] is True
# Should have emitted retry statuses (3 retries) + final failure
retry_msgs = [m for m in status_messages if "retrying" in m.lower()]
assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}"