mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix: empty response recovery for reasoning models (mimo, qwen, GLM) (#8609)
Three fixes for the (empty) response bug affecting open reasoning models: 1. Allow retries after prefill exhaustion — models like mimo-v2-pro always populate reasoning fields via OpenRouter, so the old 'not _has_structured' guard on the retry path blocked retries for EVERY reasoning model after the 2 prefill attempts. Now: 2 prefills + 3 retries = 6 total attempts before (empty). 2. Reset prefill/retry counters on tool-call recovery — the counters accumulated across the entire conversation, never resetting during tool-calling turns. A model cycling empty→prefill→tools→empty burned both prefill attempts and the third empty got zero recovery. Now counters reset when prefill succeeds with tool calls. 3. Strip think blocks before _truly_empty check — inline <think> content made the string non-empty, skipping both retry paths. Reported by users on Telegram with xiaomi/mimo-v2-pro and qwen3.5 models. Reproduced: qwen3.5-9b emits tool calls as XML in reasoning field instead of proper function calls, causing content=None + tool_calls=None + reasoning with embedded <tool_call> XML. Prefill recovery works but counter accumulation caused permanent (empty) in long sessions.
This commit is contained in:
parent
a4593f8b21
commit
d6785dc4d4
2 changed files with 37 additions and 17 deletions
|
|
@ -1741,9 +1741,9 @@ class TestRunConversation:
|
|||
{"role": "assistant", "content": "old answer"},
|
||||
]
|
||||
|
||||
# 3 responses: original + 2 prefill continuations (structured reasoning triggers prefill)
|
||||
# 6 responses: original + 2 prefill + 3 retries after prefill exhaustion
|
||||
with (
|
||||
patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, empty_resp, empty_resp]),
|
||||
patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp] * 6),
|
||||
patch.object(agent, "_compress_context") as mock_compress,
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
|
|
@ -1754,18 +1754,18 @@ class TestRunConversation:
|
|||
mock_compress.assert_not_called() # no compression triggered
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "(empty)"
|
||||
assert result["api_calls"] == 3 # 1 original + 2 prefill continuations
|
||||
assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries
|
||||
|
||||
def test_reasoning_only_response_prefill_then_empty(self, agent):
|
||||
"""Structured reasoning-only triggers prefill continuation (up to 2), then falls through to (empty)."""
|
||||
"""Structured reasoning-only triggers prefill (2), then retries (3), then (empty)."""
|
||||
self._setup_agent(agent)
|
||||
empty_resp = _mock_response(
|
||||
content=None,
|
||||
finish_reason="stop",
|
||||
reasoning_content="structured reasoning answer",
|
||||
)
|
||||
# 3 responses: original + 2 prefill continuations, all reasoning-only
|
||||
agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp]
|
||||
# 6 responses: 1 original + 2 prefill + 3 retries after prefill exhaustion
|
||||
agent.client.chat.completions.create.side_effect = [empty_resp] * 6
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
|
|
@ -1774,7 +1774,7 @@ class TestRunConversation:
|
|||
result = agent.run_conversation("answer me")
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "(empty)"
|
||||
assert result["api_calls"] == 3 # 1 original + 2 prefill continuations
|
||||
assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries
|
||||
|
||||
def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
|
||||
"""When prefill continuation produces content, it becomes the final response."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue