mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(agent): strip stale reasoning_content when falling back to a strict provider (#50480)
* fix(agent): strip stale reasoning_content when falling back to a strict provider
A reasoning primary (DeepSeek/Kimi/MiMo thinking mode) pins reasoning_content
on every assistant tool-call turn (a single space " " pad). api_messages is
built once under the primary; on a mid-session fallback to a strict
OpenAI-compatible provider (Mistral, Cerebras, Groq, SambaNova), those stale
pads were replayed verbatim and rejected with HTTP 400/422:
body.messages.2.assistant.reasoning_content: Extra inputs are not
permitted (input: ' ')
reapply_reasoning_echo_for_provider() only ever ADDED pads, so it never
reconciled history built under a reasoning primary against a strict fallback.
copy_reasoning_content_for_api() also leaked empty-string and 'reasoning'-only
shapes to non-pad providers.
Fix both sites: when the active provider does not enforce echo-back, strip
reasoning_content (empty, space-pad, or non-empty) entirely. Re-padding when
switching TO a reasoning provider is preserved. Covers the Cerebras 400 from
#45655 and the DeepSeek->Mistral 422 fallback report.
Refs #45655.
* test: update reasoning-replay tests for strict-provider stripping
test_explicit_reasoning_content_beats_normalized_reasoning_on_replay was
implicitly running on the OpenRouter fixture (non-pad); pin it to a reasoning
provider so the precedence it checks is observable. Add a positive
strict-provider test asserting reasoning_content is stripped on replay.
This commit is contained in:
parent
73340d8be6
commit
2b3a4f0af8
3 changed files with 206 additions and 42 deletions
|
|
@ -2202,25 +2202,36 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No
|
|||
if source_msg.get("role") != "assistant":
|
||||
return
|
||||
|
||||
# 1. Explicit reasoning_content already set — preserve it verbatim
|
||||
# (includes DeepSeek/Kimi's own space-placeholder written at creation
|
||||
# time, and any valid reasoning content from the same provider).
|
||||
needs_thinking_pad = agent._needs_thinking_reasoning_pad()
|
||||
|
||||
# 1. Explicit reasoning_content already set.
|
||||
#
|
||||
# Exception: sessions persisted BEFORE #17341 have empty-string
|
||||
# placeholders pinned at creation time. DeepSeek V4 Pro rejects
|
||||
# those with HTTP 400. When the active provider enforces the
|
||||
# thinking-mode echo, upgrade "" → " " on replay so stale history
|
||||
# doesn't 400 the user on the next turn.
|
||||
# When the active provider enforces the thinking-mode echo-back
|
||||
# (DeepSeek / Kimi / MiMo), preserve it verbatim — that includes their
|
||||
# own space-placeholder written at creation time and any valid reasoning
|
||||
# from the same provider. Sessions persisted BEFORE #17341 have
|
||||
# empty-string placeholders pinned at creation time; DeepSeek V4 Pro
|
||||
# rejects those with HTTP 400, so upgrade "" → " " on replay.
|
||||
#
|
||||
# When the active provider does NOT enforce echo-back, strip the field
|
||||
# entirely. Strict OpenAI-compatible providers (Mistral, Cerebras, Groq,
|
||||
# SambaNova, …) reject ANY reasoning_content key in input messages with
|
||||
# HTTP 400/422 ("Extra inputs are not permitted"), even an empty string
|
||||
# or a single-space pad. This is the cross-provider fallback case: a
|
||||
# reasoning primary (DeepSeek/Kimi/MiMo) pads history with " ", then a
|
||||
# fallback to a strict provider replays that pad and 422s. Stripping
|
||||
# here covers the rebuild path; reapply_reasoning_echo_for_provider()
|
||||
# covers the already-built api_messages path. Refs #45655.
|
||||
existing = source_msg.get("reasoning_content")
|
||||
if isinstance(existing, str):
|
||||
if existing == "" and agent._needs_thinking_reasoning_pad():
|
||||
if not needs_thinking_pad:
|
||||
api_msg.pop("reasoning_content", None)
|
||||
elif existing == "":
|
||||
api_msg["reasoning_content"] = " "
|
||||
else:
|
||||
api_msg["reasoning_content"] = existing
|
||||
return
|
||||
|
||||
needs_thinking_pad = agent._needs_thinking_reasoning_pad()
|
||||
|
||||
# 2. Cross-provider poisoned history (#15748): on DeepSeek/Kimi,
|
||||
# if the source turn has tool_calls AND a 'reasoning' field but no
|
||||
# 'reasoning_content' key, the 'reasoning' text was written by a
|
||||
|
|
@ -2246,9 +2257,13 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No
|
|||
# for providers that use the internal 'reasoning' key.
|
||||
# This must happen before the unconditional empty-string fallback so
|
||||
# genuine reasoning content is not overwritten (#15812 regression in
|
||||
# PR #15478).
|
||||
# PR #15478). Only promote for providers that enforce echo-back —
|
||||
# strict providers reject the field (refs #45655).
|
||||
if isinstance(normalized_reasoning, str) and normalized_reasoning:
|
||||
api_msg["reasoning_content"] = normalized_reasoning
|
||||
if needs_thinking_pad:
|
||||
api_msg["reasoning_content"] = normalized_reasoning
|
||||
else:
|
||||
api_msg.pop("reasoning_content", None)
|
||||
return
|
||||
|
||||
# 4. DeepSeek / Kimi thinking mode: all assistant messages need
|
||||
|
|
@ -2269,34 +2284,53 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No
|
|||
|
||||
|
||||
def reapply_reasoning_echo_for_provider(agent, api_messages: list) -> int:
|
||||
"""Re-pad assistant turns with reasoning_content for the active provider.
|
||||
"""Re-pad (or strip) assistant turns' reasoning_content for the active provider.
|
||||
|
||||
``api_messages`` is built once, before the retry loop, while the *primary*
|
||||
provider is active. If a mid-conversation fallback then switches to a
|
||||
require-side provider (DeepSeek / Kimi / MiMo thinking mode), assistant
|
||||
turns that were built when the prior provider did NOT need the echo-back go
|
||||
out without ``reasoning_content`` and the new provider rejects them with
|
||||
HTTP 400 ("The reasoning_content in the thinking mode must be passed back").
|
||||
provider is active. A mid-conversation fallback can then switch providers,
|
||||
so the reasoning fields baked into ``api_messages`` are shaped for the
|
||||
*prior* provider and must be reconciled against the *current* one:
|
||||
|
||||
Calling this immediately before building the request kwargs re-applies the
|
||||
pad against the *current* provider. It is idempotent and a no-op unless
|
||||
``_needs_thinking_reasoning_pad()`` is True for the active provider, so it
|
||||
is safe to call every iteration and covers every fallback path.
|
||||
* Switching TO a require-side provider (DeepSeek / Kimi / MiMo thinking
|
||||
mode): assistant turns built when the prior provider did NOT need the
|
||||
echo-back go out without ``reasoning_content`` and the new provider
|
||||
rejects them with HTTP 400 ("The reasoning_content in the thinking mode
|
||||
must be passed back"). Re-apply the pad.
|
||||
|
||||
Returns the number of assistant turns that gained reasoning_content.
|
||||
* Switching TO a strict provider that rejects the field (Mistral,
|
||||
Cerebras, Groq, SambaNova, …): assistant turns built under a reasoning
|
||||
primary carry a ``reasoning_content`` pad (often a single space ``" "``),
|
||||
and the strict provider rejects it with HTTP 400/422 ("Extra inputs are
|
||||
not permitted"). Strip the field. This is the exact cross-provider
|
||||
fallback bug from #45655 — a DeepSeek primary pads history with ``" "``,
|
||||
the request falls back to Mistral, and Mistral 422s on the stale pad.
|
||||
|
||||
Calling this immediately before building the request kwargs reconciles the
|
||||
fields against the *current* provider. It is idempotent and safe to call
|
||||
every iteration; it covers every fallback path.
|
||||
|
||||
Returns the number of assistant turns whose reasoning_content was added or
|
||||
removed.
|
||||
"""
|
||||
if not agent._needs_thinking_reasoning_pad():
|
||||
return 0
|
||||
padded = 0
|
||||
needs_pad = agent._needs_thinking_reasoning_pad()
|
||||
changed = 0
|
||||
for api_msg in api_messages:
|
||||
if api_msg.get("role") != "assistant":
|
||||
continue
|
||||
if api_msg.get("reasoning_content"):
|
||||
continue
|
||||
copy_reasoning_content_for_api(agent, api_msg, api_msg)
|
||||
if api_msg.get("reasoning_content"):
|
||||
padded += 1
|
||||
return padded
|
||||
if needs_pad:
|
||||
if api_msg.get("reasoning_content"):
|
||||
continue
|
||||
copy_reasoning_content_for_api(agent, api_msg, api_msg)
|
||||
if api_msg.get("reasoning_content"):
|
||||
changed += 1
|
||||
else:
|
||||
# Strict provider — strip any stale reasoning_content pad left
|
||||
# over from a reasoning primary so the fallback request doesn't
|
||||
# 400/422 on it.
|
||||
if "reasoning_content" in api_msg:
|
||||
api_msg.pop("reasoning_content", None)
|
||||
changed += 1
|
||||
return changed
|
||||
|
||||
|
||||
def _iter_pool_sockets(client: Any):
|
||||
|
|
|
|||
|
|
@ -160,10 +160,11 @@ class TestCopyReasoningContentForApi:
|
|||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert api_msg["reasoning_content"] == " "
|
||||
|
||||
def test_non_thinking_provider_preserves_empty_reasoning_content_verbatim(self) -> None:
|
||||
"""The stale-placeholder upgrade ONLY fires when the active provider
|
||||
enforces thinking-mode echo. On non-thinking providers, an empty
|
||||
reasoning_content must still round-trip verbatim.
|
||||
def test_non_thinking_provider_strips_empty_reasoning_content(self) -> None:
|
||||
"""Strict OpenAI-compatible providers (Mistral, Cerebras, …) reject ANY
|
||||
reasoning_content key in input messages — even an empty string — with
|
||||
HTTP 400/422. On a non-thinking provider the field must be stripped,
|
||||
not round-tripped. Refs #45655.
|
||||
"""
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
|
|
@ -177,7 +178,7 @@ class TestCopyReasoningContentForApi:
|
|||
}
|
||||
api_msg: dict = {}
|
||||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert api_msg["reasoning_content"] == ""
|
||||
assert "reasoning_content" not in api_msg
|
||||
|
||||
def test_deepseek_reasoning_field_promoted(self) -> None:
|
||||
"""When only 'reasoning' is set, it gets promoted to reasoning_content."""
|
||||
|
|
@ -532,7 +533,12 @@ class TestReapplyReasoningEchoForProviderSwitch:
|
|||
assert msgs[2]["reasoning_content"] == "summary from codex"
|
||||
assert msgs[4]["reasoning_content"] == " "
|
||||
|
||||
def test_noop_under_non_require_provider(self) -> None:
|
||||
def test_strips_stale_pad_under_strict_provider(self) -> None:
|
||||
"""Switching TO a strict provider (Codex/Mistral/Cerebras) must STRIP
|
||||
stale reasoning_content baked in under a reasoning primary, otherwise
|
||||
the fallback request 400/422s ("Extra inputs are not permitted").
|
||||
Refs #45655 — DeepSeek primary → Mistral fallback 422 on the " " pad.
|
||||
"""
|
||||
from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
|
||||
|
||||
agent = _make_agent(
|
||||
|
|
@ -541,9 +547,11 @@ class TestReapplyReasoningEchoForProviderSwitch:
|
|||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
)
|
||||
msgs = self._codex_built_history()
|
||||
padded = reapply_reasoning_echo_for_provider(agent, msgs)
|
||||
assert padded == 0
|
||||
# the bare turn stays bare — Codex doesn't want reasoning_content
|
||||
changed = reapply_reasoning_echo_for_provider(agent, msgs)
|
||||
# msgs[2] carried "summary from codex" — must be stripped for the
|
||||
# strict provider; the bare turn (msgs[4]) stays bare.
|
||||
assert changed == 1
|
||||
assert "reasoning_content" not in msgs[2]
|
||||
assert "reasoning_content" not in msgs[4]
|
||||
|
||||
def test_idempotent(self) -> None:
|
||||
|
|
@ -563,3 +571,79 @@ class TestReapplyReasoningEchoForProviderSwitch:
|
|||
assert "reasoning_content" not in msgs[0] # system
|
||||
assert "reasoning_content" not in msgs[1] # user
|
||||
assert "reasoning_content" not in msgs[3] # tool
|
||||
|
||||
|
||||
class TestReasoningPrimaryToStrictFallback:
|
||||
"""Regression: reasoning primary → strict fallback must not 422.
|
||||
|
||||
User report (HTTP 422): a DeepSeek V4 Pro primary pads tool-call turns
|
||||
with ``reasoning_content=" "``; a mid-session fallback to Mistral
|
||||
(mistral-small) replays those pads and Mistral rejects them with::
|
||||
|
||||
body.messages.2.assistant.reasoning_content: Extra inputs are not
|
||||
permitted (input: ' ')
|
||||
|
||||
api_messages is built once under the primary, so the stale pad survives
|
||||
into the fallback request. reapply_reasoning_echo_for_provider() must
|
||||
strip it when the active provider doesn't enforce echo-back. Refs #45655.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _deepseek_built_history() -> list[dict]:
|
||||
"""Multi-turn history as built under a DeepSeek primary — tool-call
|
||||
turns padded with " " at indices 2 and 6 (matching the report)."""
|
||||
return [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "u1"},
|
||||
{"role": "assistant", "reasoning_content": " ",
|
||||
"tool_calls": [{"id": "a", "function": {"name": "terminal"}}]},
|
||||
{"role": "tool", "tool_call_id": "a", "content": "ok"},
|
||||
{"role": "assistant", "content": "done"},
|
||||
{"role": "user", "content": "u2"},
|
||||
{"role": "assistant", "reasoning_content": " ",
|
||||
"tool_calls": [{"id": "b", "function": {"name": "terminal"}}]},
|
||||
{"role": "tool", "tool_call_id": "b", "content": "ok"},
|
||||
]
|
||||
|
||||
def test_mistral_fallback_strips_space_pad(self) -> None:
|
||||
from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
|
||||
|
||||
mistral = _make_agent(
|
||||
provider="mistral",
|
||||
model="mistral-small-latest",
|
||||
base_url="https://api.mistral.ai/v1",
|
||||
)
|
||||
msgs = self._deepseek_built_history()
|
||||
changed = reapply_reasoning_echo_for_provider(mistral, msgs)
|
||||
assert changed == 2 # both padded tool-call turns
|
||||
leaks = [i for i, m in enumerate(msgs) if "reasoning_content" in m]
|
||||
assert leaks == []
|
||||
|
||||
def test_roundtrip_back_to_deepseek_repads(self) -> None:
|
||||
"""Strict fallback strips, then switching back to DeepSeek re-pads —
|
||||
no regression on the #15748 echo-back requirement."""
|
||||
from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
|
||||
|
||||
msgs = self._deepseek_built_history()
|
||||
mistral = _make_agent(
|
||||
provider="mistral", model="mistral-small-latest",
|
||||
base_url="https://api.mistral.ai/v1",
|
||||
)
|
||||
reapply_reasoning_echo_for_provider(mistral, msgs)
|
||||
deepseek = _make_agent(provider="deepseek", model="deepseek-v4-pro")
|
||||
reapply_reasoning_echo_for_provider(deepseek, msgs)
|
||||
assert msgs[2]["reasoning_content"] == " "
|
||||
assert msgs[6]["reasoning_content"] == " "
|
||||
|
||||
def test_copy_strips_space_pad_for_mistral(self) -> None:
|
||||
"""copy_reasoning_content_for_api strips the " " pad on the rebuild
|
||||
path too (covers fresh api_messages built under the strict provider)."""
|
||||
mistral = _make_agent(
|
||||
provider="mistral", model="mistral-small-latest",
|
||||
base_url="https://api.mistral.ai/v1",
|
||||
)
|
||||
source = {"role": "assistant", "reasoning_content": " ",
|
||||
"tool_calls": [{"id": "a"}]}
|
||||
api_msg: dict = {"role": "assistant", "tool_calls": [{"id": "a"}]}
|
||||
mistral._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert "reasoning_content" not in api_msg
|
||||
|
|
|
|||
|
|
@ -6413,6 +6413,13 @@ class TestReasoningReplayForStrictProviders:
|
|||
|
||||
def test_explicit_reasoning_content_beats_normalized_reasoning_on_replay(self, agent):
|
||||
self._setup_agent(agent)
|
||||
# Precedence (explicit reasoning_content wins over the 'reasoning'
|
||||
# field) only matters on a provider that echoes reasoning_content
|
||||
# back — strict providers strip the field entirely. Pin a
|
||||
# reasoning provider so the precedence is observable.
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.provider = "kimi-coding"
|
||||
prior_assistant = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
|
|
@ -6445,6 +6452,45 @@ class TestReasoningReplayForStrictProviders:
|
|||
replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant")
|
||||
assert replayed_assistant["reasoning_content"] == "provider-native scratchpad"
|
||||
|
||||
def test_strict_provider_strips_reasoning_content_on_replay(self, agent):
|
||||
"""On a strict provider (Mistral et al.) reasoning_content from a
|
||||
prior reasoning primary must be stripped on replay — otherwise the
|
||||
request 400/422s ('Extra inputs are not permitted'). Refs #45655."""
|
||||
self._setup_agent(agent)
|
||||
agent.base_url = "https://api.mistral.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.provider = "mistral"
|
||||
prior_assistant = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "c1",
|
||||
"type": "function",
|
||||
"function": {"name": "web_search", "arguments": "{\"q\":\"test\"}"},
|
||||
}
|
||||
],
|
||||
"reasoning_content": " ", # space-pad from a reasoning primary
|
||||
}
|
||||
tool_result = {"role": "tool", "tool_call_id": "c1", "content": "ok"}
|
||||
final_resp = _mock_response(content="done", finish_reason="stop")
|
||||
agent.client.chat.completions.create.return_value = final_resp
|
||||
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation(
|
||||
"next step",
|
||||
conversation_history=[prior_assistant, tool_result],
|
||||
)
|
||||
|
||||
assert result["completed"] is True
|
||||
sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"]
|
||||
replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant")
|
||||
assert "reasoning_content" not in replayed_assistant
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bugfix: _vprint force=True on error messages during TTS
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue