fix(agent): extract thinking from content-list blocks for DeepSeek V4 Pro

DeepSeek V4 Pro returns thinking content as typed blocks inside the
content array rather than as a top-level reasoning_content field:

  [{"type": "thinking", "thinking": "..."}, {"type": "output", ...}]

_extract_reasoning only handled content as a plain string, so the
thinking text was silently dropped.  On the next turn the session was
replayed without the thinking block, causing:

  HTTP 400: The content[].thinking in the thinking mode must be
  passed back to the API.

Fix: when content is a list and no structured reasoning field was
found, scan for items with type=='thinking' and accumulate their
'thinking' (or 'text') value into reasoning_parts.  Structured fields
(reasoning, reasoning_content, reasoning_details) still take priority
so existing provider behaviour is unchanged.

Closes #21944
This commit is contained in:
Wesley Simplicio 2026-05-09 12:22:46 -03:00 committed by Teknium
parent 98e94beb1b
commit 68854cdcdb
2 changed files with 49 additions and 0 deletions

View file

@ -3529,6 +3529,19 @@ class AIAgent:
# instead of returning structured reasoning fields. Only fall back
# to inline extraction when no structured reasoning was found.
content = getattr(assistant_message, "content", None)
if not reasoning_parts and isinstance(content, list):
# DeepSeek V4 Pro (and compatible providers) return content as a
# list of typed blocks, e.g.:
# [{"type": "thinking", "thinking": "..."}, {"type": "output", ...}]
# Without this branch the thinking text is silently dropped and the
# next turn fails with HTTP 400 ("thinking must be passed back").
# Refs #21944.
for block in content:
if isinstance(block, dict) and block.get("type") == "thinking":
thinking_text = block.get("thinking") or block.get("text") or ""
thinking_text = thinking_text.strip()
if thinking_text and thinking_text not in reasoning_parts:
reasoning_parts.append(thinking_text)
if not reasoning_parts and isinstance(content, str) and content:
inline_patterns = (
r"<think>(.*?)</think>",

View file

@ -517,6 +517,42 @@ class TestExtractReasoning:
msg = _mock_assistant_msg(content=content)
assert agent._extract_reasoning(msg) == expected
def test_content_list_thinking_blocks_extracted(self, agent):
"""DeepSeek V4 Pro returns content as a typed-block list (issue #21944).
Without this branch thinking text is silently dropped HTTP 400 on
the next turn ("thinking must be passed back to the API").
"""
msg = _mock_assistant_msg(
content=[
{"type": "thinking", "thinking": "deep analysis here"},
{"type": "output", "text": "final answer"},
]
)
result = agent._extract_reasoning(msg)
assert result == "deep analysis here"
def test_content_list_non_thinking_blocks_ignored(self, agent):
"""Non-thinking blocks in a content list must not be treated as reasoning."""
msg = _mock_assistant_msg(
content=[
{"type": "text", "text": "just a regular response"},
]
)
assert agent._extract_reasoning(msg) is None
def test_content_list_thinking_prefers_structured_field(self, agent):
"""Structured ``reasoning`` field wins over content-list thinking blocks."""
msg = _mock_assistant_msg(
reasoning="from structured field",
content=[
{"type": "thinking", "thinking": "from content list"},
],
)
result = agent._extract_reasoning(msg)
# structured field was found first → content-list branch skipped
assert result == "from structured field"
class TestCleanSessionContent:
def test_none_passthrough(self):