mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 02:01:47 +00:00
fix(deepseek): preserve v4 reasoning_content on replay
This commit is contained in:
parent
8fbc9d7d78
commit
b9b9ee3e6c
5 changed files with 398 additions and 33 deletions
|
|
@ -477,9 +477,13 @@ class ChatCompletionsTransport(ProviderTransport):
|
|||
# so keep them apart in provider_data rather than merging.
|
||||
reasoning = getattr(msg, "reasoning", None)
|
||||
reasoning_content = getattr(msg, "reasoning_content", None)
|
||||
if reasoning_content is None and hasattr(msg, "model_extra"):
|
||||
model_extra = getattr(msg, "model_extra", None) or {}
|
||||
if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
|
||||
reasoning_content = model_extra["reasoning_content"]
|
||||
|
||||
provider_data: Dict[str, Any] = {}
|
||||
if reasoning_content:
|
||||
if reasoning_content is not None:
|
||||
provider_data["reasoning_content"] = reasoning_content
|
||||
rd = getattr(msg, "reasoning_details", None)
|
||||
if rd:
|
||||
|
|
|
|||
44
run_agent.py
44
run_agent.py
|
|
@ -8501,6 +8501,7 @@ class AIAgent:
|
|||
Handles reasoning extraction, reasoning_details, and optional tool_calls
|
||||
so both the tool-call path and the final-response path share one builder.
|
||||
"""
|
||||
assistant_tool_calls = getattr(assistant_message, "tool_calls", None)
|
||||
reasoning_text = self._extract_reasoning(assistant_message)
|
||||
_from_structured = bool(reasoning_text)
|
||||
|
||||
|
|
@ -8560,16 +8561,19 @@ class AIAgent:
|
|||
"finish_reason": finish_reason,
|
||||
}
|
||||
|
||||
if hasattr(assistant_message, "reasoning_content"):
|
||||
raw_reasoning_content = getattr(assistant_message, "reasoning_content", None)
|
||||
if raw_reasoning_content is not None:
|
||||
msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content)
|
||||
elif msg.get("tool_calls") and self._needs_deepseek_tool_reasoning():
|
||||
# DeepSeek thinking mode requires reasoning_content on every
|
||||
# assistant tool-call message. Without it, replaying the
|
||||
# persisted message causes HTTP 400. Include empty string
|
||||
# as a defensive compatibility fallback (refs #15250).
|
||||
msg["reasoning_content"] = ""
|
||||
raw_reasoning_content = getattr(assistant_message, "reasoning_content", None)
|
||||
if raw_reasoning_content is None and hasattr(assistant_message, "model_extra"):
|
||||
model_extra = getattr(assistant_message, "model_extra", None) or {}
|
||||
if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
|
||||
raw_reasoning_content = model_extra["reasoning_content"]
|
||||
if raw_reasoning_content is not None:
|
||||
msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content)
|
||||
elif assistant_tool_calls and self._needs_deepseek_tool_reasoning():
|
||||
# DeepSeek thinking mode requires reasoning_content on every
|
||||
# assistant tool-call message. Without it, replaying the
|
||||
# persisted message causes HTTP 400. Include empty string
|
||||
# only when no structured reasoning text was captured.
|
||||
msg["reasoning_content"] = reasoning_text or ""
|
||||
|
||||
# Additive fallback (refs #16844, #16884). Streaming-only providers
|
||||
# (glm, MiniMax, gpt-5.x via aigw, Anthropic via openai-compat shims)
|
||||
|
|
@ -8626,9 +8630,9 @@ class AIAgent:
|
|||
if codex_message_items:
|
||||
msg["codex_message_items"] = codex_message_items
|
||||
|
||||
if assistant_message.tool_calls:
|
||||
if assistant_tool_calls:
|
||||
tool_calls = []
|
||||
for tool_call in assistant_message.tool_calls:
|
||||
for tool_call in assistant_tool_calls:
|
||||
raw_id = getattr(tool_call, "id", None)
|
||||
call_id = getattr(tool_call, "call_id", None)
|
||||
if not isinstance(call_id, str) or not call_id.strip():
|
||||
|
|
@ -8728,11 +8732,11 @@ class AIAgent:
|
|||
# if the source turn has tool_calls AND a 'reasoning' field but no
|
||||
# 'reasoning_content' key, the 'reasoning' text was written by a
|
||||
# prior provider (e.g. MiniMax) — DeepSeek's own _build_assistant_message
|
||||
# always pins reasoning_content="" at creation time for tool-call turns,
|
||||
# so the shape (reasoning set, reasoning_content absent, tool_calls
|
||||
# present) is unreachable from same-provider DeepSeek history. Inject
|
||||
# "" to satisfy the API without leaking another provider's chain of
|
||||
# thought to DeepSeek/Kimi.
|
||||
# pins reasoning_content at creation time for tool-call turns, so the
|
||||
# shape (reasoning set, reasoning_content absent, tool_calls present)
|
||||
# is unreachable from same-provider DeepSeek history after this fix.
|
||||
# Inject "" to satisfy the API without leaking another provider's
|
||||
# chain of thought to DeepSeek/Kimi.
|
||||
normalized_reasoning = source_msg.get("reasoning")
|
||||
if (
|
||||
needs_thinking_pad
|
||||
|
|
@ -8745,9 +8749,9 @@ class AIAgent:
|
|||
|
||||
# 3. Healthy session: promote 'reasoning' field to 'reasoning_content'
|
||||
# for providers that use the internal 'reasoning' key.
|
||||
# This must happen BEFORE the DeepSeek/Kimi tool-call check so that
|
||||
# genuine reasoning content is not overwritten by the empty-string
|
||||
# fallback (#15812 regression in PR #15478).
|
||||
# This must happen before the unconditional empty-string fallback so
|
||||
# genuine reasoning content is not overwritten (#15812 regression in
|
||||
# PR #15478).
|
||||
if isinstance(normalized_reasoning, str) and normalized_reasoning:
|
||||
api_msg["reasoning_content"] = normalized_reasoning
|
||||
return
|
||||
|
|
|
|||
|
|
@ -620,6 +620,41 @@ class TestChatCompletionsNormalize:
|
|||
assert nr.reasoning == "summary text"
|
||||
assert nr.provider_data == {"reasoning_content": "detailed scratchpad"}
|
||||
|
||||
def test_empty_reasoning_content_preserved(self, transport):
|
||||
"""DeepSeek can require an explicit empty reasoning_content replay field."""
|
||||
r = SimpleNamespace(
|
||||
choices=[SimpleNamespace(
|
||||
message=SimpleNamespace(
|
||||
content=None,
|
||||
tool_calls=None,
|
||||
reasoning=None,
|
||||
reasoning_content="",
|
||||
),
|
||||
finish_reason="stop",
|
||||
)],
|
||||
usage=None,
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.provider_data == {"reasoning_content": ""}
|
||||
assert nr.reasoning_content == ""
|
||||
|
||||
def test_reasoning_content_preserved_from_model_extra(self, transport):
|
||||
"""OpenAI SDK can expose provider-specific DeepSeek fields via model_extra."""
|
||||
r = SimpleNamespace(
|
||||
choices=[SimpleNamespace(
|
||||
message=SimpleNamespace(
|
||||
content=None,
|
||||
tool_calls=None,
|
||||
reasoning=None,
|
||||
model_extra={"reasoning_content": "model-extra scratchpad"},
|
||||
),
|
||||
finish_reason="stop",
|
||||
)],
|
||||
usage=None,
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.provider_data == {"reasoning_content": "model-extra scratchpad"}
|
||||
|
||||
|
||||
class TestChatCompletionsCacheStats:
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ Refs #15250 / #15353.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
|
@ -33,6 +35,10 @@ def _make_agent(provider: str = "", model: str = "", base_url: str = "") -> AIAg
|
|||
agent.provider = provider
|
||||
agent.model = model
|
||||
agent.base_url = base_url
|
||||
agent.verbose_logging = False
|
||||
agent.reasoning_callback = None
|
||||
agent.stream_delta_callback = None
|
||||
agent._stream_callback = None
|
||||
return agent
|
||||
|
||||
|
||||
|
|
@ -109,16 +115,7 @@ class TestCopyReasoningContentForApi:
|
|||
assert api_msg["reasoning_content"] == "<think>real chain of thought</think>"
|
||||
|
||||
def test_deepseek_reasoning_field_promoted(self) -> None:
|
||||
"""When only 'reasoning' is set (no tool_calls), it gets promoted to reasoning_content.
|
||||
|
||||
On DeepSeek/Kimi, tool-call turns with 'reasoning' but no
|
||||
'reasoning_content' are treated as cross-provider poisoned history
|
||||
(#15748) and padded with "" instead of promoted. Same-provider
|
||||
DeepSeek tool-call turns always have reasoning_content pinned at
|
||||
creation time by _build_assistant_message, so the (reasoning-set,
|
||||
reasoning_content-absent, tool_calls-present) shape is unreachable
|
||||
from same-provider history.
|
||||
"""
|
||||
"""When only 'reasoning' is set, it gets promoted to reasoning_content."""
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
source = {
|
||||
"role": "assistant",
|
||||
|
|
@ -135,8 +132,8 @@ class TestCopyReasoningContentForApi:
|
|||
|
||||
If the source turn has tool_calls AND a 'reasoning' field but NO
|
||||
'reasoning_content' key, it's from a prior provider (the DeepSeek
|
||||
build path always pins reasoning_content="" at creation). Inject
|
||||
"" instead of forwarding the prior provider's chain of thought.
|
||||
build path pins reasoning_content at creation). Inject "" instead
|
||||
of forwarding the prior provider's chain of thought.
|
||||
"""
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
source = {
|
||||
|
|
@ -228,6 +225,86 @@ class TestCopyReasoningContentForApi:
|
|||
assert "reasoning_content" not in api_msg
|
||||
|
||||
|
||||
class TestBuildAssistantMessageDeepSeekReasoningContent:
|
||||
"""_build_assistant_message pins replay-safe DeepSeek tool-call state."""
|
||||
|
||||
def test_deepseek_tool_call_reasoning_is_backfilled_into_reasoning_content(self) -> None:
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
assistant_message = SimpleNamespace(
|
||||
content=None,
|
||||
reasoning="DeepSeek tool-call reasoning",
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=None,
|
||||
codex_message_items=None,
|
||||
tool_calls=[
|
||||
SimpleNamespace(
|
||||
id="call_1",
|
||||
call_id=None,
|
||||
response_item_id=None,
|
||||
type="function",
|
||||
function=SimpleNamespace(name="terminal", arguments="{}"),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
msg = agent._build_assistant_message(assistant_message, "tool_calls")
|
||||
|
||||
assert msg["reasoning_content"] == "DeepSeek tool-call reasoning"
|
||||
assert msg["tool_calls"][0]["id"] == "call_1"
|
||||
|
||||
def test_deepseek_model_extra_reasoning_content_is_preserved(self) -> None:
|
||||
"""OpenAI SDK stores unknown provider fields in model_extra."""
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
assistant_message = SimpleNamespace(
|
||||
content=None,
|
||||
reasoning=None,
|
||||
reasoning_content=None,
|
||||
model_extra={"reasoning_content": "DeepSeek model_extra reasoning"},
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=None,
|
||||
codex_message_items=None,
|
||||
tool_calls=[
|
||||
SimpleNamespace(
|
||||
id="call_1",
|
||||
call_id=None,
|
||||
response_item_id=None,
|
||||
type="function",
|
||||
function=SimpleNamespace(name="terminal", arguments="{}"),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
msg = agent._build_assistant_message(assistant_message, "tool_calls")
|
||||
|
||||
assert msg["reasoning_content"] == "DeepSeek model_extra reasoning"
|
||||
|
||||
def test_deepseek_tool_call_without_raw_reasoning_content_gets_empty_string(self) -> None:
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
assistant_message = SimpleNamespace(
|
||||
content=None,
|
||||
reasoning=None,
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=None,
|
||||
codex_message_items=None,
|
||||
tool_calls=[
|
||||
SimpleNamespace(
|
||||
id="call_1",
|
||||
call_id=None,
|
||||
response_item_id=None,
|
||||
type="function",
|
||||
function=SimpleNamespace(name="terminal", arguments="{}"),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
msg = agent._build_assistant_message(assistant_message, "tool_calls")
|
||||
|
||||
assert msg["reasoning_content"] == ""
|
||||
assert msg["tool_calls"][0]["id"] == "call_1"
|
||||
|
||||
|
||||
class TestNeedsKimiToolReasoning:
|
||||
"""The extracted _needs_kimi_tool_reasoning() helper keeps Kimi behavior intact."""
|
||||
|
||||
|
|
|
|||
245
tests/run_agent/test_deepseek_v4_thinking_live.py
Normal file
245
tests/run_agent/test_deepseek_v4_thinking_live.py
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
"""Live DeepSeek V4 thinking-mode tool-call replay smoke test.
|
||||
|
||||
Opt-in only:
|
||||
HERMES_LIVE_TESTS=1 pytest tests/run_agent/test_deepseek_v4_thinking_live.py -q
|
||||
|
||||
Requires DEEPSEEK_API_KEY in the process environment. The key is captured at
|
||||
module import time because tests/conftest.py intentionally removes credential
|
||||
environment variables before each test body runs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
LIVE = os.environ.get("HERMES_LIVE_TESTS") == "1"
|
||||
DEEPSEEK_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
|
||||
LIVE_MODELS = ("deepseek-v4-flash", "deepseek-v4-pro")
|
||||
LIVE_BASE_URL = "https://api.deepseek.com"
|
||||
|
||||
pytestmark = [
|
||||
pytest.mark.skipif(not LIVE, reason="live-only: set HERMES_LIVE_TESTS=1"),
|
||||
pytest.mark.skipif(not DEEPSEEK_KEY, reason="DEEPSEEK_API_KEY not configured"),
|
||||
]
|
||||
|
||||
TOOL_NAME = "lookup_ticket_status"
|
||||
TOOLS = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": TOOL_NAME,
|
||||
"description": "Return the status for a test ticket id.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"ticket_id": {
|
||||
"type": "string",
|
||||
"description": "The ticket id to look up.",
|
||||
},
|
||||
},
|
||||
"required": ["ticket_id"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def _thinking_kwargs() -> dict:
|
||||
return {
|
||||
"reasoning_effort": "high",
|
||||
"extra_body": {"thinking": {"type": "enabled"}},
|
||||
}
|
||||
|
||||
|
||||
def _jsonable(value: Any) -> Any:
|
||||
if hasattr(value, "model_dump"):
|
||||
return value.model_dump(mode="json")
|
||||
if isinstance(value, dict):
|
||||
return {k: _jsonable(v) for k, v in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [_jsonable(v) for v in value]
|
||||
return value
|
||||
|
||||
|
||||
def _print_trace(label: str, value: Any) -> None:
|
||||
sys.__stdout__.write(f"\n--- {label} ---\n")
|
||||
sys.__stdout__.write(
|
||||
json.dumps(_jsonable(value), ensure_ascii=False, indent=2, sort_keys=True)
|
||||
)
|
||||
sys.__stdout__.write("\n")
|
||||
sys.__stdout__.flush()
|
||||
|
||||
|
||||
def _message_snapshot(message) -> dict:
|
||||
return {
|
||||
"content": getattr(message, "content", None),
|
||||
"reasoning": getattr(message, "reasoning", None),
|
||||
"reasoning_content": _raw_reasoning_content(message),
|
||||
"model_extra": getattr(message, "model_extra", None),
|
||||
"tool_calls": _jsonable(getattr(message, "tool_calls", None)),
|
||||
}
|
||||
|
||||
|
||||
def _make_live_client():
|
||||
from openai import OpenAI
|
||||
|
||||
return OpenAI(api_key=DEEPSEEK_KEY, base_url=LIVE_BASE_URL)
|
||||
|
||||
|
||||
def _make_agent_for_message_building(model: str):
|
||||
from run_agent import AIAgent
|
||||
|
||||
agent = object.__new__(AIAgent)
|
||||
agent.provider = "deepseek"
|
||||
agent.model = model
|
||||
agent.base_url = LIVE_BASE_URL
|
||||
agent.verbose_logging = False
|
||||
agent.reasoning_callback = None
|
||||
agent.stream_delta_callback = None
|
||||
agent._stream_callback = None
|
||||
return agent
|
||||
|
||||
|
||||
def _raw_reasoning_content(message):
|
||||
direct = getattr(message, "reasoning_content", None)
|
||||
if direct is not None:
|
||||
return direct
|
||||
model_extra = getattr(message, "model_extra", None) or {}
|
||||
if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
|
||||
return model_extra["reasoning_content"]
|
||||
return None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("live_model", LIVE_MODELS)
|
||||
def test_deepseek_v4_thinking_tool_call_replay_round_trip(live_model: str):
|
||||
"""Hit DeepSeek twice and replay the assistant tool-call turn.
|
||||
|
||||
The first request forces a tool call with thinking enabled. The second
|
||||
request replays that assistant message with content, reasoning_content,
|
||||
and tool_calls, then appends the tool result. DeepSeek accepting the
|
||||
second request is the live guardrail for the V4 thinking replay contract.
|
||||
"""
|
||||
|
||||
client = _make_live_client()
|
||||
agent = _make_agent_for_message_building(live_model)
|
||||
|
||||
first_request = {
|
||||
"model": live_model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"You must use the provided lookup_ticket_status tool "
|
||||
"exactly once with ticket_id 'DS-4242'. Do not answer "
|
||||
"directly."
|
||||
),
|
||||
}
|
||||
],
|
||||
"tools": TOOLS,
|
||||
"max_tokens": 1024,
|
||||
"timeout": 90,
|
||||
**_thinking_kwargs(),
|
||||
}
|
||||
_print_trace(f"{live_model} first request", first_request)
|
||||
first = client.chat.completions.create(**first_request)
|
||||
_print_trace(f"{live_model} first raw response", first)
|
||||
|
||||
first_choice = first.choices[0]
|
||||
first_message = first_choice.message
|
||||
_print_trace(
|
||||
f"{live_model} first assistant message",
|
||||
{
|
||||
"finish_reason": first_choice.finish_reason,
|
||||
**_message_snapshot(first_message),
|
||||
},
|
||||
)
|
||||
assert first_message.tool_calls, "DeepSeek did not return a tool call"
|
||||
first_tool_call = first_message.tool_calls[0]
|
||||
assert first_tool_call.function.name == TOOL_NAME
|
||||
assert isinstance(json.loads(first_tool_call.function.arguments or "{}"), dict)
|
||||
|
||||
raw_reasoning_content = _raw_reasoning_content(first_message)
|
||||
assert raw_reasoning_content is not None, (
|
||||
"DeepSeek did not return reasoning_content; the thinking payload may "
|
||||
"not have been honored"
|
||||
)
|
||||
|
||||
stored_assistant = agent._build_assistant_message(
|
||||
first_message,
|
||||
first_choice.finish_reason or "tool_calls",
|
||||
)
|
||||
_print_trace(f"{live_model} stored assistant message", stored_assistant)
|
||||
assert stored_assistant["reasoning_content"] == raw_reasoning_content
|
||||
|
||||
replay_assistant = {
|
||||
"role": "assistant",
|
||||
"content": stored_assistant.get("content") or "",
|
||||
"tool_calls": stored_assistant["tool_calls"],
|
||||
}
|
||||
agent._copy_reasoning_content_for_api(stored_assistant, replay_assistant)
|
||||
_print_trace(f"{live_model} replay assistant message", replay_assistant)
|
||||
|
||||
tool_call_id = stored_assistant["tool_calls"][0]["id"]
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"You must use the provided lookup_ticket_status tool "
|
||||
"exactly once with ticket_id 'DS-4242'. Do not answer "
|
||||
"directly."
|
||||
),
|
||||
},
|
||||
replay_assistant,
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call_id,
|
||||
"content": json.dumps(
|
||||
{"ticket_id": "DS-4242", "status": "green", "source": "live-test"},
|
||||
separators=(",", ":"),
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
from agent.transports.chat_completions import ChatCompletionsTransport
|
||||
|
||||
api_messages = ChatCompletionsTransport().convert_messages(messages)
|
||||
_print_trace(
|
||||
f"{live_model} second request messages after transport conversion",
|
||||
api_messages,
|
||||
)
|
||||
assert api_messages[1]["reasoning_content"] == raw_reasoning_content
|
||||
assert "call_id" not in api_messages[1]["tool_calls"][0]
|
||||
assert "response_item_id" not in api_messages[1]["tool_calls"][0]
|
||||
|
||||
second_request = {
|
||||
"model": live_model,
|
||||
"messages": api_messages,
|
||||
"max_tokens": 1024,
|
||||
"timeout": 90,
|
||||
**_thinking_kwargs(),
|
||||
}
|
||||
_print_trace(f"{live_model} second request", second_request)
|
||||
second = client.chat.completions.create(**second_request)
|
||||
_print_trace(f"{live_model} second raw response", second)
|
||||
_print_trace(
|
||||
f"{live_model} second assistant message",
|
||||
{
|
||||
"finish_reason": second.choices[0].finish_reason,
|
||||
**_message_snapshot(second.choices[0].message),
|
||||
},
|
||||
)
|
||||
|
||||
second_message = second.choices[0].message
|
||||
final_content = second_message.content or ""
|
||||
final_reasoning = _raw_reasoning_content(second_message) or ""
|
||||
assert second.choices[0].finish_reason == "stop"
|
||||
assert final_content.strip() or final_reasoning.strip(), (
|
||||
"DeepSeek returned neither visible content nor reasoning_content"
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue