fix: activate fallback provider on repeated empty responses + user-visible status (#7505)

When models return empty responses (no content, no tool calls, no
reasoning), Hermes previously retried 3 times silently then fell through
to '(empty)' — without ever trying the fallback provider chain. Users on
GLM-4.5-Air and similar models experienced what appeared to be a
complete hang, especially in gateway (Telegram/Discord) contexts where
the silent retries produced zero feedback.

Changes:
- After exhausting 3 empty retries, attempt _try_activate_fallback()
  before giving up with '(empty)'. If fallback succeeds, reset retry
  counter and continue the conversation loop with the new provider.
- Replace all _vprint() calls in recovery paths with _emit_status(),
  which surfaces messages through both CLI (_vprint with force=True)
  and gateway (status_callback -> adapter.send). Users now see:
  * '⚠️ Empty response from model — retrying (N/3)' during retries
  * '⚠️ Model returning empty responses — switching to fallback...'
  * '↻ Switched to fallback: <model> (<provider>)' on success
  * ' Model returned no content after all retries [and fallback]'
- Add logger.warning() throughout empty response paths for log file
  visibility (model name, provider, retry counts).
- Upgrade _last_content_with_tools fallback from logger.debug to
  logger.info + _emit_status so recovery is visible.
- Upgrade thinking-only prefill continuation to use _emit_status.

Tests:
- test_empty_response_triggers_fallback_provider: verifies fallback
  activation after 3 empty retries produces content from fallback model
- test_empty_response_fallback_also_empty_returns_empty: verifies
  graceful degradation when fallback also returns empty
- test_empty_response_emits_status_for_gateway: verifies _emit_status
  is called during retries so gateway users see feedback

Addresses #7180.
This commit is contained in:
Teknium 2026-04-10 19:15:41 -07:00 committed by GitHub
parent 992422910c
commit 842e669a13
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 178 additions and 15 deletions

View file

@ -9459,7 +9459,8 @@ class AIAgent:
fallback = getattr(self, '_last_content_with_tools', None)
if fallback:
_turn_exit_reason = "fallback_prior_turn_content"
logger.debug("Empty follow-up after tool calls — using prior turn content as final response")
logger.info("Empty follow-up after tool calls — using prior turn content as final response")
self._emit_status("↻ Empty response after tool calls — using earlier content as final answer")
self._last_content_with_tools = None
self._empty_content_retries = 0
for i in range(len(messages) - 1, -1, -1):
@ -9490,9 +9491,13 @@ class AIAgent:
)
if _has_structured and self._thinking_prefill_retries < 2:
self._thinking_prefill_retries += 1
self._vprint(
f"{self.log_prefix}↻ Thinking-only response — "
f"prefilling to continue "
logger.info(
"Thinking-only response (no visible content) — "
"prefilling to continue (%d/2)",
self._thinking_prefill_retries,
)
self._emit_status(
f"↻ Thinking-only response — prefilling to continue "
f"({self._thinking_prefill_retries}/2)"
)
interim_msg = self._build_assistant_message(
@ -9508,23 +9513,57 @@ class AIAgent:
# Model returned nothing — no content, no
# structured reasoning, no tool calls. Common
# with open models (transient provider issues,
# rate limits, sampling flukes). Silently retry
# up to 3 times before giving up. Skip when
# rate limits, sampling flukes). Retry up to 3
# times before attempting fallback. Skip when
# content has inline <think> tags (model chose
# to reason, just no visible text).
_truly_empty = not final_response.strip()
if _truly_empty and not _has_structured and self._empty_content_retries < 3:
self._empty_content_retries += 1
self._vprint(
f"{self.log_prefix}↻ Empty response (no content or reasoning) "
f"— retrying ({self._empty_content_retries}/3)",
force=True,
logger.warning(
"Empty response (no content or reasoning) — "
"retry %d/3 (model=%s)",
self._empty_content_retries, self.model,
)
self._emit_status(
f"⚠️ Empty response from model — retrying "
f"({self._empty_content_retries}/3)"
)
continue
# Exhausted prefill attempts, empty retries, or
# structured reasoning with no content —
# fall through to "(empty)" terminal.
# ── Exhausted retries — try fallback provider ──
# Before giving up with "(empty)", attempt to
# switch to the next provider in the fallback
# chain. This covers the case where a model
# (e.g. GLM-4.5-Air) consistently returns empty
# due to context degradation or provider issues.
if _truly_empty and self._fallback_chain:
logger.warning(
"Empty response after %d retries — "
"attempting fallback (model=%s, provider=%s)",
self._empty_content_retries, self.model,
self.provider,
)
self._emit_status(
"⚠️ Model returning empty responses — "
"switching to fallback provider..."
)
if self._try_activate_fallback():
self._empty_content_retries = 0
self._emit_status(
f"↻ Switched to fallback: {self.model} "
f"({self.provider})"
)
logger.info(
"Fallback activated after empty responses: "
"now using %s on %s",
self.model, self.provider,
)
continue
# Exhausted retries and fallback chain (or no
# fallback configured). Fall through to the
# "(empty)" terminal.
_turn_exit_reason = "empty_response_exhausted"
reasoning_text = self._extract_reasoning(assistant_message)
assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
@ -9533,9 +9572,28 @@ class AIAgent:
if reasoning_text:
reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
self._vprint(f"{self.log_prefix} Reasoning-only response (no visible content). Reasoning: {reasoning_preview}")
logger.warning(
"Reasoning-only response (no visible content) "
"after exhausting retries and fallback. "
"Reasoning: %s", reasoning_preview,
)
self._emit_status(
"⚠️ Model produced reasoning but no visible "
"response after all retries. Returning empty."
)
else:
self._vprint(f"{self.log_prefix} Empty response (no content or reasoning) after 3 retries.")
logger.warning(
"Empty response (no content or reasoning) "
"after %d retries. No fallback available. "
"model=%s provider=%s",
self._empty_content_retries, self.model,
self.provider,
)
self._emit_status(
"❌ Model returned no content after all retries"
+ (" and fallback attempts." if self._fallback_chain else
". No fallback providers configured.")
)
final_response = "(empty)"
break

View file

@ -1823,6 +1823,111 @@ class TestRunConversation:
assert result["final_response"] == "Here is the actual answer."
assert result["api_calls"] == 2 # 1 original + 1 nudge retry
def test_empty_response_triggers_fallback_provider(self, agent):
"""After 3 empty retries, fallback provider is activated and produces content."""
self._setup_agent(agent)
agent.base_url = "http://127.0.0.1:1234/v1"
# Configure a fallback chain
agent._fallback_chain = [{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}]
agent._fallback_index = 0
agent._fallback_activated = False
empty_resp = _mock_response(content=None, finish_reason="stop")
content_resp = _mock_response(content="Fallback answer.", finish_reason="stop")
# 4 empty (1 orig + 3 retries), then fallback model answers
agent.client.chat.completions.create.side_effect = [
empty_resp, empty_resp, empty_resp, empty_resp, content_resp,
]
fallback_called = {"called": False}
def _mock_fallback():
fallback_called["called"] = True
# Simulate what _try_activate_fallback does: just advance the
# index and set the flag (the client is already mocked).
agent._fallback_index = 1
agent._fallback_activated = True
agent.model = "anthropic/claude-sonnet-4"
agent.provider = "openrouter"
return True
with (
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
patch.object(agent, "_try_activate_fallback", side_effect=_mock_fallback),
):
result = agent.run_conversation("answer me")
assert fallback_called["called"], "Fallback should have been triggered"
assert result["completed"] is True
assert result["final_response"] == "Fallback answer."
def test_empty_response_fallback_also_empty_returns_empty(self, agent):
"""If fallback also returns empty, final response is (empty)."""
self._setup_agent(agent)
agent.base_url = "http://127.0.0.1:1234/v1"
agent._fallback_chain = [{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}]
agent._fallback_index = 0
agent._fallback_activated = False
empty_resp = _mock_response(content=None, finish_reason="stop")
# 4 empty from primary (1 + 3 retries), fallback activated,
# then 4 more empty from fallback (1 + 3 retries), no more fallbacks
agent.client.chat.completions.create.side_effect = [
empty_resp, empty_resp, empty_resp, empty_resp, # primary exhausted
empty_resp, empty_resp, empty_resp, empty_resp, # fallback exhausted
]
def _mock_fallback():
if agent._fallback_index >= len(agent._fallback_chain):
return False
agent._fallback_index += 1
agent._fallback_activated = True
agent.model = "anthropic/claude-sonnet-4"
agent.provider = "openrouter"
return True
with (
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
patch.object(agent, "_try_activate_fallback", side_effect=_mock_fallback),
):
result = agent.run_conversation("answer me")
assert result["completed"] is True
assert result["final_response"] == "(empty)"
def test_empty_response_emits_status_for_gateway(self, agent):
"""_emit_status is called during empty retries so gateway users see feedback."""
self._setup_agent(agent)
agent.base_url = "http://127.0.0.1:1234/v1"
empty_resp = _mock_response(content=None, finish_reason="stop")
# 4 empty: 1 original + 3 retries, all empty, no fallback
agent.client.chat.completions.create.side_effect = [
empty_resp, empty_resp, empty_resp, empty_resp,
]
status_messages = []
def _capture_status(msg):
status_messages.append(msg)
with (
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
patch.object(agent, "_emit_status", side_effect=_capture_status),
):
result = agent.run_conversation("answer me")
assert result["final_response"] == "(empty)"
# Should have emitted retry statuses (3 retries) + final failure
retry_msgs = [m for m in status_messages if "retrying" in m.lower()]
assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}"
failure_msgs = [m for m in status_messages if "no content" in m.lower() or "no fallback" in m.lower()]
assert len(failure_msgs) >= 1, f"Expected at least 1 failure status, got: {status_messages}"
def test_nous_401_refreshes_after_remint_and_retries(self, agent):
self._setup_agent(agent)
agent.provider = "nous"