mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
fix(agent): re-pad reasoning_content on cross-provider fallback to require-side providers
api_messages is built once before the retry loop while the primary provider
is active. When a mid-conversation fallback switches to a require-side thinking
provider (DeepSeek/Kimi/MiMo), assistant turns built under a non-require primary
(e.g. Codex) go out without reasoning_content and the new provider rejects the
request with HTTP 400 ("reasoning_content must be passed back").
Re-apply the echo-back pad against the current provider immediately before
building the request kwargs. Idempotent and a no-op unless the active provider
enforces echo-back, so it covers all fallback paths without affecting normal or
reject-side operation.
Drafted by Claude (Opus 4.7) under human review while fixing a personal deployment.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
9179396cb7
commit
b5495db701
4 changed files with 125 additions and 0 deletions
|
|
@ -1994,6 +1994,36 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No
|
|||
api_msg.pop("reasoning_content", None)
|
||||
|
||||
|
||||
def reapply_reasoning_echo_for_provider(agent, api_messages: list) -> int:
|
||||
"""Re-pad assistant turns with reasoning_content for the active provider.
|
||||
|
||||
``api_messages`` is built once, before the retry loop, while the *primary*
|
||||
provider is active. If a mid-conversation fallback then switches to a
|
||||
require-side provider (DeepSeek / Kimi / MiMo thinking mode), assistant
|
||||
turns that were built when the prior provider did NOT need the echo-back go
|
||||
out without ``reasoning_content`` and the new provider rejects them with
|
||||
HTTP 400 ("The reasoning_content in the thinking mode must be passed back").
|
||||
|
||||
Calling this immediately before building the request kwargs re-applies the
|
||||
pad against the *current* provider. It is idempotent and a no-op unless
|
||||
``_needs_thinking_reasoning_pad()`` is True for the active provider, so it
|
||||
is safe to call every iteration and covers every fallback path.
|
||||
|
||||
Returns the number of assistant turns that gained reasoning_content.
|
||||
"""
|
||||
if not agent._needs_thinking_reasoning_pad():
|
||||
return 0
|
||||
padded = 0
|
||||
for api_msg in api_messages:
|
||||
if api_msg.get("role") != "assistant":
|
||||
continue
|
||||
if api_msg.get("reasoning_content"):
|
||||
continue
|
||||
copy_reasoning_content_for_api(agent, api_msg, api_msg)
|
||||
if api_msg.get("reasoning_content"):
|
||||
padded += 1
|
||||
return padded
|
||||
|
||||
|
||||
def _iter_pool_sockets(client: Any):
|
||||
"""Yield raw sockets reachable from an OpenAI/httpx client pool.
|
||||
|
|
|
|||
|
|
@ -1183,6 +1183,14 @@ def run_conversation(
|
|||
|
||||
try:
|
||||
agent._reset_stream_delivery_tracking()
|
||||
# api_messages is built once, before this retry loop, while the
|
||||
# primary provider is active. A mid-conversation fallback can
|
||||
# switch to a require-side provider (DeepSeek / Kimi / MiMo) that
|
||||
# rejects assistant turns lacking reasoning_content. Re-apply the
|
||||
# echo-back pad for the *current* provider here (idempotent no-op
|
||||
# unless the active provider needs it) so the fallback request
|
||||
# isn't sent with stale, primary-shaped reasoning fields.
|
||||
agent._reapply_reasoning_echo_for_provider(api_messages)
|
||||
api_kwargs = agent._build_api_kwargs(api_messages)
|
||||
if agent._force_ascii_payload:
|
||||
_sanitize_structure_non_ascii(api_kwargs)
|
||||
|
|
|
|||
|
|
@ -4076,6 +4076,11 @@ class AIAgent:
|
|||
from agent.agent_runtime_helpers import copy_reasoning_content_for_api
|
||||
return copy_reasoning_content_for_api(self, source_msg, api_msg)
|
||||
|
||||
def _reapply_reasoning_echo_for_provider(self, api_messages: list) -> int:
|
||||
"""Forwarder — see ``agent.agent_runtime_helpers.reapply_reasoning_echo_for_provider``."""
|
||||
from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
|
||||
return reapply_reasoning_echo_for_provider(self, api_messages)
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
|
||||
"""Strip Codex Responses API fields from tool_calls for strict providers.
|
||||
|
|
|
|||
|
|
@ -481,3 +481,85 @@ class TestNeedsKimiToolReasoning:
|
|||
)
|
||||
# model name contains 'moonshot' but host is openrouter — should be False
|
||||
assert agent._needs_kimi_tool_reasoning() is False
|
||||
|
||||
|
||||
class TestReapplyReasoningEchoForProviderSwitch:
|
||||
"""Mid-conversation fallover to a require-side provider must re-pad.
|
||||
|
||||
``api_messages`` is built once, before the retry loop, while the *primary*
|
||||
provider is active. When a fallback then switches to DeepSeek/Kimi/MiMo,
|
||||
assistant turns that were built under a non-require primary (e.g. Codex,
|
||||
which uses encrypted reasoning, not ``reasoning_content``) go out bare and
|
||||
the new provider 400s with "reasoning_content must be passed back".
|
||||
|
||||
``reapply_reasoning_echo_for_provider`` re-applies the pad against the
|
||||
*current* provider right before the request is built. It is idempotent and
|
||||
a no-op unless the active provider enforces echo-back.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _codex_built_history() -> list[dict]:
|
||||
"""Assistant turns as built under a Codex primary: some carry a
|
||||
reasoning summary (stored as reasoning_content), some are bare."""
|
||||
return [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "do the thing"},
|
||||
{ # turn that emitted a reasoning summary
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": "summary from codex",
|
||||
"tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "c1", "content": "ok"},
|
||||
{ # bare tool-call turn (Codex emitted no summary)
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{"id": "c2", "function": {"name": "terminal"}}],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "c2", "content": "ok"},
|
||||
]
|
||||
|
||||
def test_switch_to_deepseek_pads_bare_turns(self) -> None:
|
||||
from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
|
||||
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-pro")
|
||||
msgs = self._codex_built_history()
|
||||
padded = reapply_reasoning_echo_for_provider(agent, msgs)
|
||||
assert padded == 1
|
||||
bare = [m for m in msgs if m.get("role") == "assistant" and not m.get("reasoning_content")]
|
||||
assert bare == []
|
||||
# existing summary preserved verbatim, not clobbered with the pad
|
||||
assert msgs[2]["reasoning_content"] == "summary from codex"
|
||||
assert msgs[4]["reasoning_content"] == " "
|
||||
|
||||
def test_noop_under_non_require_provider(self) -> None:
|
||||
from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
|
||||
|
||||
agent = _make_agent(
|
||||
provider="openai-codex",
|
||||
model="gpt-5.5",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
)
|
||||
msgs = self._codex_built_history()
|
||||
padded = reapply_reasoning_echo_for_provider(agent, msgs)
|
||||
assert padded == 0
|
||||
# the bare turn stays bare — Codex doesn't want reasoning_content
|
||||
assert "reasoning_content" not in msgs[4]
|
||||
|
||||
def test_idempotent(self) -> None:
|
||||
from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
|
||||
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-pro")
|
||||
msgs = self._codex_built_history()
|
||||
assert reapply_reasoning_echo_for_provider(agent, msgs) == 1
|
||||
assert reapply_reasoning_echo_for_provider(agent, msgs) == 0
|
||||
|
||||
def test_non_assistant_messages_untouched(self) -> None:
|
||||
from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
|
||||
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-pro")
|
||||
msgs = self._codex_built_history()
|
||||
reapply_reasoning_echo_for_provider(agent, msgs)
|
||||
assert "reasoning_content" not in msgs[0] # system
|
||||
assert "reasoning_content" not in msgs[1] # user
|
||||
assert "reasoning_content" not in msgs[3] # tool
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue