fix(deepseek): wire thinking-mode via DeepSeekProfile, not legacy fallback

The cherry-picked PR #15251 from @tw2818 correctly identified the DeepSeek 400 root cause but placed the fix in the legacy fallback path of `build_kwargs`, which DeepSeek never reaches — DeepSeek has a registered ProviderProfile and goes through `_build_kwargs_from_profile` instead. The legacy-path block was therefore dead code. This commit pivots the fix to where it actually fires: - New `DeepSeekProfile` in `plugins/model-providers/deepseek/__init__.py` overrides `build_api_kwargs_extras` to emit DeepSeek's expected wire format (mirrors `KimiProfile`): {"reasoning_effort": "<low|medium|high|max>", "extra_body": {"thinking": {"type": "enabled" | "disabled"}}} - Model gating: only `deepseek-v4-*` and `deepseek-reasoner` emit thinking control. `deepseek-chat` (V3) is untouched — current behavior. - Effort mapping: low/medium/high passthrough, xhigh/max → max, unset → omitted (DeepSeek server applies its own default). - Revert the legacy-path additions from PR #15251 — they were dead code, and the `_copy_reasoning_content_for_api` strip block specifically would have nullified the existing reasoning_content padding machinery (`_needs_deepseek_tool_reasoning` → space-pad on replay) that the active provider already relies on for replay correctness. - Unit tests pin the wire-shape contract and the model gating rules (26 tests, all passing). Existing transport + provider profile suites (321 tests) continue to pass. - AUTHOR_MAP: map twebefy@gmail.com → tw2818 for release notes credit. Closes #15700, #17212, #17825. Co-authored-by: tw2818 <twebefy@gmail.com>
2026-07-05 12:42:30 +00:00 · 2026-05-15 16:39:18 -07:00 · 2026-05-15 16:39:18 -07:00 · cd9470f416
commit cd9470f416
parent 068c24f8a4
5 changed files with 266 additions and 29 deletions
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@ -189,7 +189,6 @@ class ChatCompletionsTransport(ProviderTransport):
            is_kimi: bool
            is_tokenhub: bool
            is_lmstudio: bool
-            is_deepseek: bool
            is_custom_provider: bool
            ollama_num_ctx: int | None
            # Provider routing
@ -349,25 +348,6 @@ class ChatCompletionsTransport(ProviderTransport):
                "type": "enabled" if _kimi_thinking_enabled else "disabled",
            }

-        # DeepSeek extra_body.thinking + top-level reasoning_effort
-        is_deepseek = params.get("is_deepseek", False)
-        if is_deepseek:
-            _ds_thinking_enabled = True
-            if reasoning_config and isinstance(reasoning_config, dict):
-                if reasoning_config.get("enabled") is False:
-                    _ds_thinking_enabled = False
-            extra_body["thinking"] = {
-                "type": "enabled" if _ds_thinking_enabled else "disabled",
-            }
-            # DeepSeek effort: low/medium→high, high→high, xhigh/max→max
-            if _ds_thinking_enabled and reasoning_config:
-                _e = (reasoning_config.get("effort") or "").strip().lower()
-                if _e in ("xhigh", "max"):
-                    api_kwargs["reasoning_effort"] = "max"
-                elif _e in ("low", "medium", "high"):
-                    api_kwargs["reasoning_effort"] = _e
-            # If no effort configured, don't set it → DeepSeek defaults to high
-
        # Reasoning. LM Studio is handled above via top-level reasoning_effort,
        # so skip emitting extra_body.reasoning for it.
        if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
--- a/plugins/model-providers/deepseek/init.py
+++ b/plugins/model-providers/deepseek/init.py
@ -1,9 +1,88 @@
-"""DeepSeek provider profile."""
+"""DeepSeek provider profile.
+
+DeepSeek's V4 family (and the legacy ``deepseek-reasoner``) defaults to
+thinking-mode ON when ``extra_body.thinking`` is unset.  The API then returns
+``reasoning_content`` and starts enforcing the contract that subsequent turns
+echo it back; combined with how Hermes replays history this lands on the
+notorious HTTP 400 ``reasoning_content must be passed back`` error after the
+first tool call (#15700, #17212, #17825).
+
+This profile overrides :meth:`build_api_kwargs_extras` to mirror the Kimi /
+Moonshot wire shape that DeepSeek's OpenAI-compat endpoint expects:
+
+    {"reasoning_effort": "<low|medium|high|max>",
+     "extra_body": {"thinking": {"type": "enabled" | "disabled"}}}
+
+Non-thinking models (only ``deepseek-chat`` today, which is V3) are left as
+no-ops so we don't perturb the V3 wire format.
+"""
+
+from __future__ import annotations
+
+from typing import Any

 from providers import register_provider
 from providers.base import ProviderProfile

-deepseek = ProviderProfile(
+
+def _model_supports_thinking(model: str | None) -> bool:
+    """DeepSeek thinking-capable model families.
+
+    Currently covers the V4 family (``deepseek-v4-pro``, ``deepseek-v4-flash``,
+    and any future ``deepseek-v4-*`` variants) and the legacy
+    ``deepseek-reasoner`` (R1).  ``deepseek-chat`` is V3 with no thinking mode.
+    """
+    m = (model or "").strip().lower()
+    if not m:
+        return False
+    if m.startswith("deepseek-v") and not m.startswith("deepseek-v3"):
+        # deepseek-v4-*, deepseek-v5-*, etc. — every V4+ generation has
+        # thinking. v3 explicitly excluded.
+        return True
+    if m == "deepseek-reasoner":
+        return True
+    return False
+
+
+class DeepSeekProfile(ProviderProfile):
+    """DeepSeek — extra_body.thinking + top-level reasoning_effort."""
+
+    def build_api_kwargs_extras(
+        self, *, reasoning_config: dict | None = None, model: str | None = None, **context
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        extra_body: dict[str, Any] = {}
+        top_level: dict[str, Any] = {}
+
+        if not _model_supports_thinking(model):
+            # V3 / unknown — leave wire format untouched, current behavior.
+            return extra_body, top_level
+
+        # Determine enabled/disabled.  Default is enabled to match DeepSeek's
+        # API default; the API requires this to be set explicitly to avoid the
+        # reasoning_content echo trap on subsequent turns.
+        enabled = True
+        if isinstance(reasoning_config, dict) and reasoning_config.get("enabled") is False:
+            enabled = False
+
+        extra_body["thinking"] = {"type": "enabled" if enabled else "disabled"}
+
+        if not enabled:
+            return extra_body, top_level
+
+        # Effort mapping.  Pass low/medium/high through; xhigh/max → max.
+        # When no effort is set we omit reasoning_effort so DeepSeek applies
+        # its server default (currently high).
+        if isinstance(reasoning_config, dict):
+            effort = (reasoning_config.get("effort") or "").strip().lower()
+            if effort in ("xhigh", "max"):
+                top_level["reasoning_effort"] = "max"
+            elif effort in ("low", "medium", "high"):
+                top_level["reasoning_effort"] = effort
+
+        return extra_body, top_level
+
+
+deepseek = DeepSeekProfile(
    name="deepseek",
    aliases=("deepseek-chat",),
    env_vars=("DEEPSEEK_API_KEY",),
--- a/run_agent.py
+++ b/run_agent.py
@ -9798,7 +9798,6 @@ class AIAgent:
        )
        _is_tokenhub = base_url_host_matches(self._base_url_lower, "tokenhub.tencentmaas.com")
        _is_lmstudio = (self.provider or "").strip().lower() == "lmstudio"
-        _is_deepseek = base_url_host_matches(self.base_url, "api.deepseek.com")

        # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
        # sentinel (temperature omitted entirely), a numeric override, or None.
@ -9910,7 +9909,6 @@ class AIAgent:
            is_kimi=_is_kimi,
            is_tokenhub=_is_tokenhub,
            is_lmstudio=_is_lmstudio,
-            is_deepseek=_is_deepseek,
            is_custom_provider=self.provider == "custom",
            ollama_num_ctx=self._ollama_num_ctx,
            provider_preferences=_prefs or None,
@ -10370,11 +10368,6 @@ class AIAgent:
        # context compaction).  Don't pass null to the API.
        api_msg.pop("reasoning_content", None)

-        # DeepSeek: strip reasoning_content on all assistant messages so the API
-        # doesn't return 400 when the model was invoked with thinking enabled.
-        if base_url_host_matches(self.base_url, "api.deepseek.com"):
-            api_msg.pop("reasoning_content", None)
-
    @staticmethod
    def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
        """Strip Codex Responses API fields from tool_calls for strict providers.
--- a/scripts/release.py
+++ b/scripts/release.py
@ -73,6 +73,7 @@ AUTHOR_MAP = {
    "teknium@nousresearch.com": "teknium1",
    "piyushvp1@gmail.com": "thelumiereguy",
    "421774554@qq.com": "wuli666",
+    "twebefy@gmail.com": "tw2818",
    "harish.kukreja@gmail.com": "counterposition",
    "korkyzer@gmail.com": "Korkyzer",
    "1046611633@qq.com": "zhengyn0001",
--- a/tests/plugins/model_providers/test_deepseek_profile.py
+++ b/tests/plugins/model_providers/test_deepseek_profile.py
@ -0,0 +1,184 @@
+"""Unit tests for the DeepSeek provider profile's thinking-mode wiring.
+
+DeepSeek V4 (and the legacy ``deepseek-reasoner``) expects every request to
+carry an explicit ``extra_body.thinking`` parameter.  Omitting it makes the
+server default to thinking-mode ON, which then enforces the
+``reasoning_content``-must-be-echoed-back contract on subsequent turns and
+breaks the conversation with HTTP 400 (#15700, #17212, #17825).
+
+These tests pin the profile's wire-shape contract so DeepSeek requests stay
+correctly shaped without going live.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+@pytest.fixture
+def deepseek_profile():
+    """Resolve the registered DeepSeek profile.
+
+    Going through ``providers.get_provider_profile`` keeps the test honest —
+    if someone later replaces the registered class with a plain
+    ``ProviderProfile``, every assertion below collapses.
+    """
+    # ``model_tools`` triggers plugin discovery on import, which is what
+    # registers the DeepSeek profile in the global provider registry.
+    import model_tools  # noqa: F401
+    import providers
+
+    profile = providers.get_provider_profile("deepseek")
+    assert profile is not None, "deepseek provider profile must be registered"
+    return profile
+
+
+class TestDeepSeekThinkingWireShape:
+    """``build_api_kwargs_extras`` produces DeepSeek's exact wire format."""
+
+    def test_v4_pro_default_enables_thinking_without_effort(self, deepseek_profile):
+        """No reasoning_config → thinking enabled, server picks default effort."""
+        extra_body, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config=None, model="deepseek-v4-pro"
+        )
+        assert extra_body == {"thinking": {"type": "enabled"}}
+        assert top_level == {}
+
+    def test_v4_pro_enabled_with_high_effort(self, deepseek_profile):
+        extra_body, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": "high"},
+            model="deepseek-v4-pro",
+        )
+        assert extra_body == {"thinking": {"type": "enabled"}}
+        assert top_level == {"reasoning_effort": "high"}
+
+    @pytest.mark.parametrize("effort", ["low", "medium", "high"])
+    def test_standard_efforts_pass_through(self, deepseek_profile, effort):
+        _, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": effort},
+            model="deepseek-v4-pro",
+        )
+        assert top_level == {"reasoning_effort": effort}
+
+    @pytest.mark.parametrize("effort", ["xhigh", "max", "MAX", "  Max  "])
+    def test_xhigh_and_max_normalize_to_max(self, deepseek_profile, effort):
+        _, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": effort},
+            model="deepseek-v4-pro",
+        )
+        assert top_level == {"reasoning_effort": "max"}
+
+    def test_explicitly_disabled_sends_disabled_marker(self, deepseek_profile):
+        """``reasoning_config.enabled=False`` → ``thinking.type=disabled``.
+
+        The crucial bit is that the parameter is *sent* at all — DeepSeek
+        defaults to thinking-on when ``thinking`` is absent.
+        """
+        extra_body, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": False}, model="deepseek-v4-pro"
+        )
+        assert extra_body == {"thinking": {"type": "disabled"}}
+        # No effort when disabled — DeepSeek rejects it.
+        assert top_level == {}
+
+    def test_disabled_ignores_effort_field(self, deepseek_profile):
+        """Effort silently dropped when thinking is off."""
+        _, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": False, "effort": "high"},
+            model="deepseek-v4-pro",
+        )
+        assert top_level == {}
+
+    def test_unknown_effort_omits_top_level(self, deepseek_profile):
+        """Garbage effort → omit reasoning_effort so DeepSeek applies its default."""
+        _, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": "garbage"},
+            model="deepseek-v4-pro",
+        )
+        assert top_level == {}
+
+    def test_empty_effort_omits_top_level(self, deepseek_profile):
+        _, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": ""},
+            model="deepseek-v4-pro",
+        )
+        assert top_level == {}
+
+
+class TestDeepSeekModelGating:
+    """V4 family + ``deepseek-reasoner`` get thinking; V3 stays untouched."""
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "deepseek-v4-pro",
+            "deepseek-v4-flash",
+            "deepseek-v4-future-variant",
+            "deepseek-reasoner",
+            "DEEPSEEK-V4-PRO",  # case-insensitive
+        ],
+    )
+    def test_thinking_capable_models_emit_thinking(self, deepseek_profile, model):
+        extra_body, _ = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config=None, model=model
+        )
+        assert extra_body == {"thinking": {"type": "enabled"}}
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "deepseek-chat",         # V3 alias
+            "deepseek-v3-0324",      # explicit V3
+            "deepseek-v3.1",         # V3 minor revisions
+            "",                       # bare/unknown
+            None,                     # missing
+            "deepseek-unknown",      # unrecognized
+        ],
+    )
+    def test_non_thinking_models_emit_nothing(self, deepseek_profile, model):
+        extra_body, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": "high"}, model=model
+        )
+        assert extra_body == {}
+        assert top_level == {}
+
+
+class TestDeepSeekFullKwargsIntegration:
+    """End-to-end: the transport's full kwargs match DeepSeek's live wire format.
+
+    The live test harness in ``tests/run_agent/test_deepseek_v4_thinking_live.py``
+    sends ``{"reasoning_effort": "high", "extra_body": {"thinking": {"type":
+    "enabled"}}}``.  Confirm the transport produces that exact shape when wired
+    through the registered DeepSeek profile.
+    """
+
+    def test_full_kwargs_match_live_wire_shape(self, deepseek_profile):
+        from agent.transports.chat_completions import ChatCompletionsTransport
+
+        kwargs = ChatCompletionsTransport().build_kwargs(
+            model="deepseek-v4-pro",
+            messages=[{"role": "user", "content": "ping"}],
+            tools=None,
+            provider_profile=deepseek_profile,
+            reasoning_config={"enabled": True, "effort": "high"},
+            base_url="https://api.deepseek.com/v1",
+            provider_name="deepseek",
+        )
+        assert kwargs["model"] == "deepseek-v4-pro"
+        assert kwargs["reasoning_effort"] == "high"
+        assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}}
+
+    def test_v3_chat_full_kwargs_omit_thinking(self, deepseek_profile):
+        from agent.transports.chat_completions import ChatCompletionsTransport
+
+        kwargs = ChatCompletionsTransport().build_kwargs(
+            model="deepseek-chat",
+            messages=[{"role": "user", "content": "ping"}],
+            tools=None,
+            provider_profile=deepseek_profile,
+            reasoning_config={"enabled": True, "effort": "high"},
+            base_url="https://api.deepseek.com/v1",
+            provider_name="deepseek",
+        )
+        assert "reasoning_effort" not in kwargs
+        assert "extra_body" not in kwargs or "thinking" not in kwargs.get("extra_body", {})