diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py index ca45d79af64..40e5dbf2a41 100644 --- a/agent/agent_runtime_helpers.py +++ b/agent/agent_runtime_helpers.py @@ -2202,25 +2202,36 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No if source_msg.get("role") != "assistant": return - # 1. Explicit reasoning_content already set — preserve it verbatim - # (includes DeepSeek/Kimi's own space-placeholder written at creation - # time, and any valid reasoning content from the same provider). + needs_thinking_pad = agent._needs_thinking_reasoning_pad() + + # 1. Explicit reasoning_content already set. # - # Exception: sessions persisted BEFORE #17341 have empty-string - # placeholders pinned at creation time. DeepSeek V4 Pro rejects - # those with HTTP 400. When the active provider enforces the - # thinking-mode echo, upgrade "" → " " on replay so stale history - # doesn't 400 the user on the next turn. + # When the active provider enforces the thinking-mode echo-back + # (DeepSeek / Kimi / MiMo), preserve it verbatim — that includes their + # own space-placeholder written at creation time and any valid reasoning + # from the same provider. Sessions persisted BEFORE #17341 have + # empty-string placeholders pinned at creation time; DeepSeek V4 Pro + # rejects those with HTTP 400, so upgrade "" → " " on replay. + # + # When the active provider does NOT enforce echo-back, strip the field + # entirely. Strict OpenAI-compatible providers (Mistral, Cerebras, Groq, + # SambaNova, …) reject ANY reasoning_content key in input messages with + # HTTP 400/422 ("Extra inputs are not permitted"), even an empty string + # or a single-space pad. This is the cross-provider fallback case: a + # reasoning primary (DeepSeek/Kimi/MiMo) pads history with " ", then a + # fallback to a strict provider replays that pad and 422s. Stripping + # here covers the rebuild path; reapply_reasoning_echo_for_provider() + # covers the already-built api_messages path. Refs #45655. existing = source_msg.get("reasoning_content") if isinstance(existing, str): - if existing == "" and agent._needs_thinking_reasoning_pad(): + if not needs_thinking_pad: + api_msg.pop("reasoning_content", None) + elif existing == "": api_msg["reasoning_content"] = " " else: api_msg["reasoning_content"] = existing return - needs_thinking_pad = agent._needs_thinking_reasoning_pad() - # 2. Cross-provider poisoned history (#15748): on DeepSeek/Kimi, # if the source turn has tool_calls AND a 'reasoning' field but no # 'reasoning_content' key, the 'reasoning' text was written by a @@ -2246,9 +2257,13 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No # for providers that use the internal 'reasoning' key. # This must happen before the unconditional empty-string fallback so # genuine reasoning content is not overwritten (#15812 regression in - # PR #15478). + # PR #15478). Only promote for providers that enforce echo-back — + # strict providers reject the field (refs #45655). if isinstance(normalized_reasoning, str) and normalized_reasoning: - api_msg["reasoning_content"] = normalized_reasoning + if needs_thinking_pad: + api_msg["reasoning_content"] = normalized_reasoning + else: + api_msg.pop("reasoning_content", None) return # 4. DeepSeek / Kimi thinking mode: all assistant messages need @@ -2269,34 +2284,53 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No def reapply_reasoning_echo_for_provider(agent, api_messages: list) -> int: - """Re-pad assistant turns with reasoning_content for the active provider. + """Re-pad (or strip) assistant turns' reasoning_content for the active provider. ``api_messages`` is built once, before the retry loop, while the *primary* - provider is active. If a mid-conversation fallback then switches to a - require-side provider (DeepSeek / Kimi / MiMo thinking mode), assistant - turns that were built when the prior provider did NOT need the echo-back go - out without ``reasoning_content`` and the new provider rejects them with - HTTP 400 ("The reasoning_content in the thinking mode must be passed back"). + provider is active. A mid-conversation fallback can then switch providers, + so the reasoning fields baked into ``api_messages`` are shaped for the + *prior* provider and must be reconciled against the *current* one: - Calling this immediately before building the request kwargs re-applies the - pad against the *current* provider. It is idempotent and a no-op unless - ``_needs_thinking_reasoning_pad()`` is True for the active provider, so it - is safe to call every iteration and covers every fallback path. + * Switching TO a require-side provider (DeepSeek / Kimi / MiMo thinking + mode): assistant turns built when the prior provider did NOT need the + echo-back go out without ``reasoning_content`` and the new provider + rejects them with HTTP 400 ("The reasoning_content in the thinking mode + must be passed back"). Re-apply the pad. - Returns the number of assistant turns that gained reasoning_content. + * Switching TO a strict provider that rejects the field (Mistral, + Cerebras, Groq, SambaNova, …): assistant turns built under a reasoning + primary carry a ``reasoning_content`` pad (often a single space ``" "``), + and the strict provider rejects it with HTTP 400/422 ("Extra inputs are + not permitted"). Strip the field. This is the exact cross-provider + fallback bug from #45655 — a DeepSeek primary pads history with ``" "``, + the request falls back to Mistral, and Mistral 422s on the stale pad. + + Calling this immediately before building the request kwargs reconciles the + fields against the *current* provider. It is idempotent and safe to call + every iteration; it covers every fallback path. + + Returns the number of assistant turns whose reasoning_content was added or + removed. """ - if not agent._needs_thinking_reasoning_pad(): - return 0 - padded = 0 + needs_pad = agent._needs_thinking_reasoning_pad() + changed = 0 for api_msg in api_messages: if api_msg.get("role") != "assistant": continue - if api_msg.get("reasoning_content"): - continue - copy_reasoning_content_for_api(agent, api_msg, api_msg) - if api_msg.get("reasoning_content"): - padded += 1 - return padded + if needs_pad: + if api_msg.get("reasoning_content"): + continue + copy_reasoning_content_for_api(agent, api_msg, api_msg) + if api_msg.get("reasoning_content"): + changed += 1 + else: + # Strict provider — strip any stale reasoning_content pad left + # over from a reasoning primary so the fallback request doesn't + # 400/422 on it. + if "reasoning_content" in api_msg: + api_msg.pop("reasoning_content", None) + changed += 1 + return changed def _iter_pool_sockets(client: Any): diff --git a/tests/run_agent/test_deepseek_reasoning_content_echo.py b/tests/run_agent/test_deepseek_reasoning_content_echo.py index c8c322191ff..8ac321b65ba 100644 --- a/tests/run_agent/test_deepseek_reasoning_content_echo.py +++ b/tests/run_agent/test_deepseek_reasoning_content_echo.py @@ -160,10 +160,11 @@ class TestCopyReasoningContentForApi: agent._copy_reasoning_content_for_api(source, api_msg) assert api_msg["reasoning_content"] == " " - def test_non_thinking_provider_preserves_empty_reasoning_content_verbatim(self) -> None: - """The stale-placeholder upgrade ONLY fires when the active provider - enforces thinking-mode echo. On non-thinking providers, an empty - reasoning_content must still round-trip verbatim. + def test_non_thinking_provider_strips_empty_reasoning_content(self) -> None: + """Strict OpenAI-compatible providers (Mistral, Cerebras, …) reject ANY + reasoning_content key in input messages — even an empty string — with + HTTP 400/422. On a non-thinking provider the field must be stripped, + not round-tripped. Refs #45655. """ agent = _make_agent( provider="openrouter", @@ -177,7 +178,7 @@ class TestCopyReasoningContentForApi: } api_msg: dict = {} agent._copy_reasoning_content_for_api(source, api_msg) - assert api_msg["reasoning_content"] == "" + assert "reasoning_content" not in api_msg def test_deepseek_reasoning_field_promoted(self) -> None: """When only 'reasoning' is set, it gets promoted to reasoning_content.""" @@ -532,7 +533,12 @@ class TestReapplyReasoningEchoForProviderSwitch: assert msgs[2]["reasoning_content"] == "summary from codex" assert msgs[4]["reasoning_content"] == " " - def test_noop_under_non_require_provider(self) -> None: + def test_strips_stale_pad_under_strict_provider(self) -> None: + """Switching TO a strict provider (Codex/Mistral/Cerebras) must STRIP + stale reasoning_content baked in under a reasoning primary, otherwise + the fallback request 400/422s ("Extra inputs are not permitted"). + Refs #45655 — DeepSeek primary → Mistral fallback 422 on the " " pad. + """ from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider agent = _make_agent( @@ -541,9 +547,11 @@ class TestReapplyReasoningEchoForProviderSwitch: base_url="https://chatgpt.com/backend-api/codex", ) msgs = self._codex_built_history() - padded = reapply_reasoning_echo_for_provider(agent, msgs) - assert padded == 0 - # the bare turn stays bare — Codex doesn't want reasoning_content + changed = reapply_reasoning_echo_for_provider(agent, msgs) + # msgs[2] carried "summary from codex" — must be stripped for the + # strict provider; the bare turn (msgs[4]) stays bare. + assert changed == 1 + assert "reasoning_content" not in msgs[2] assert "reasoning_content" not in msgs[4] def test_idempotent(self) -> None: @@ -563,3 +571,79 @@ class TestReapplyReasoningEchoForProviderSwitch: assert "reasoning_content" not in msgs[0] # system assert "reasoning_content" not in msgs[1] # user assert "reasoning_content" not in msgs[3] # tool + + +class TestReasoningPrimaryToStrictFallback: + """Regression: reasoning primary → strict fallback must not 422. + + User report (HTTP 422): a DeepSeek V4 Pro primary pads tool-call turns + with ``reasoning_content=" "``; a mid-session fallback to Mistral + (mistral-small) replays those pads and Mistral rejects them with:: + + body.messages.2.assistant.reasoning_content: Extra inputs are not + permitted (input: ' ') + + api_messages is built once under the primary, so the stale pad survives + into the fallback request. reapply_reasoning_echo_for_provider() must + strip it when the active provider doesn't enforce echo-back. Refs #45655. + """ + + @staticmethod + def _deepseek_built_history() -> list[dict]: + """Multi-turn history as built under a DeepSeek primary — tool-call + turns padded with " " at indices 2 and 6 (matching the report).""" + return [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "u1"}, + {"role": "assistant", "reasoning_content": " ", + "tool_calls": [{"id": "a", "function": {"name": "terminal"}}]}, + {"role": "tool", "tool_call_id": "a", "content": "ok"}, + {"role": "assistant", "content": "done"}, + {"role": "user", "content": "u2"}, + {"role": "assistant", "reasoning_content": " ", + "tool_calls": [{"id": "b", "function": {"name": "terminal"}}]}, + {"role": "tool", "tool_call_id": "b", "content": "ok"}, + ] + + def test_mistral_fallback_strips_space_pad(self) -> None: + from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider + + mistral = _make_agent( + provider="mistral", + model="mistral-small-latest", + base_url="https://api.mistral.ai/v1", + ) + msgs = self._deepseek_built_history() + changed = reapply_reasoning_echo_for_provider(mistral, msgs) + assert changed == 2 # both padded tool-call turns + leaks = [i for i, m in enumerate(msgs) if "reasoning_content" in m] + assert leaks == [] + + def test_roundtrip_back_to_deepseek_repads(self) -> None: + """Strict fallback strips, then switching back to DeepSeek re-pads — + no regression on the #15748 echo-back requirement.""" + from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider + + msgs = self._deepseek_built_history() + mistral = _make_agent( + provider="mistral", model="mistral-small-latest", + base_url="https://api.mistral.ai/v1", + ) + reapply_reasoning_echo_for_provider(mistral, msgs) + deepseek = _make_agent(provider="deepseek", model="deepseek-v4-pro") + reapply_reasoning_echo_for_provider(deepseek, msgs) + assert msgs[2]["reasoning_content"] == " " + assert msgs[6]["reasoning_content"] == " " + + def test_copy_strips_space_pad_for_mistral(self) -> None: + """copy_reasoning_content_for_api strips the " " pad on the rebuild + path too (covers fresh api_messages built under the strict provider).""" + mistral = _make_agent( + provider="mistral", model="mistral-small-latest", + base_url="https://api.mistral.ai/v1", + ) + source = {"role": "assistant", "reasoning_content": " ", + "tool_calls": [{"id": "a"}]} + api_msg: dict = {"role": "assistant", "tool_calls": [{"id": "a"}]} + mistral._copy_reasoning_content_for_api(source, api_msg) + assert "reasoning_content" not in api_msg diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 385a296f889..2b45654aac2 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -6413,6 +6413,13 @@ class TestReasoningReplayForStrictProviders: def test_explicit_reasoning_content_beats_normalized_reasoning_on_replay(self, agent): self._setup_agent(agent) + # Precedence (explicit reasoning_content wins over the 'reasoning' + # field) only matters on a provider that echoes reasoning_content + # back — strict providers strip the field entirely. Pin a + # reasoning provider so the precedence is observable. + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.provider = "kimi-coding" prior_assistant = { "role": "assistant", "content": "", @@ -6445,6 +6452,45 @@ class TestReasoningReplayForStrictProviders: replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant") assert replayed_assistant["reasoning_content"] == "provider-native scratchpad" + def test_strict_provider_strips_reasoning_content_on_replay(self, agent): + """On a strict provider (Mistral et al.) reasoning_content from a + prior reasoning primary must be stripped on replay — otherwise the + request 400/422s ('Extra inputs are not permitted'). Refs #45655.""" + self._setup_agent(agent) + agent.base_url = "https://api.mistral.ai/v1" + agent._base_url_lower = agent.base_url.lower() + agent.provider = "mistral" + prior_assistant = { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "c1", + "type": "function", + "function": {"name": "web_search", "arguments": "{\"q\":\"test\"}"}, + } + ], + "reasoning_content": " ", # space-pad from a reasoning primary + } + tool_result = {"role": "tool", "tool_call_id": "c1", "content": "ok"} + final_resp = _mock_response(content="done", finish_reason="stop") + agent.client.chat.completions.create.return_value = final_resp + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation( + "next step", + conversation_history=[prior_assistant, tool_result], + ) + + assert result["completed"] is True + sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"] + replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant") + assert "reasoning_content" not in replayed_assistant + # --------------------------------------------------------------------------- # Bugfix: _vprint force=True on error messages during TTS