fix(deepseek): wire thinking-mode via DeepSeekProfile, not legacy fallback

The cherry-picked PR #15251 from @tw2818 correctly identified the DeepSeek 400 root cause but placed the fix in the legacy fallback path of `build_kwargs`, which DeepSeek never reaches — DeepSeek has a registered ProviderProfile and goes through `_build_kwargs_from_profile` instead. The legacy-path block was therefore dead code. This commit pivots the fix to where it actually fires: - New `DeepSeekProfile` in `plugins/model-providers/deepseek/__init__.py` overrides `build_api_kwargs_extras` to emit DeepSeek's expected wire format (mirrors `KimiProfile`): {"reasoning_effort": "<low|medium|high|max>", "extra_body": {"thinking": {"type": "enabled" | "disabled"}}} - Model gating: only `deepseek-v4-*` and `deepseek-reasoner` emit thinking control. `deepseek-chat` (V3) is untouched — current behavior. - Effort mapping: low/medium/high passthrough, xhigh/max → max, unset → omitted (DeepSeek server applies its own default). - Revert the legacy-path additions from PR #15251 — they were dead code, and the `_copy_reasoning_content_for_api` strip block specifically would have nullified the existing reasoning_content padding machinery (`_needs_deepseek_tool_reasoning` → space-pad on replay) that the active provider already relies on for replay correctness. - Unit tests pin the wire-shape contract and the model gating rules (26 tests, all passing). Existing transport + provider profile suites (321 tests) continue to pass. - AUTHOR_MAP: map twebefy@gmail.com → tw2818 for release notes credit. Closes #15700, #17212, #17825. Co-authored-by: tw2818 <twebefy@gmail.com>
2026-05-18 04:41:56 +00:00 · 2026-05-15 16:39:18 -07:00 · 2026-05-15 16:39:18 -07:00 · cd9470f416
commit cd9470f416
parent 068c24f8a4
5 changed files with 266 additions and 29 deletions
--- a/plugins/model-providers/deepseek/init.py
+++ b/plugins/model-providers/deepseek/init.py
@ -1,9 +1,88 @@
-"""DeepSeek provider profile."""
+"""DeepSeek provider profile.
+
+DeepSeek's V4 family (and the legacy ``deepseek-reasoner``) defaults to
+thinking-mode ON when ``extra_body.thinking`` is unset.  The API then returns
+``reasoning_content`` and starts enforcing the contract that subsequent turns
+echo it back; combined with how Hermes replays history this lands on the
+notorious HTTP 400 ``reasoning_content must be passed back`` error after the
+first tool call (#15700, #17212, #17825).
+
+This profile overrides :meth:`build_api_kwargs_extras` to mirror the Kimi /
+Moonshot wire shape that DeepSeek's OpenAI-compat endpoint expects:
+
+    {"reasoning_effort": "<low|medium|high|max>",
+     "extra_body": {"thinking": {"type": "enabled" | "disabled"}}}
+
+Non-thinking models (only ``deepseek-chat`` today, which is V3) are left as
+no-ops so we don't perturb the V3 wire format.
+"""
+
+from __future__ import annotations
+
+from typing import Any

 from providers import register_provider
 from providers.base import ProviderProfile

-deepseek = ProviderProfile(
+
+def _model_supports_thinking(model: str | None) -> bool:
+    """DeepSeek thinking-capable model families.
+
+    Currently covers the V4 family (``deepseek-v4-pro``, ``deepseek-v4-flash``,
+    and any future ``deepseek-v4-*`` variants) and the legacy
+    ``deepseek-reasoner`` (R1).  ``deepseek-chat`` is V3 with no thinking mode.
+    """
+    m = (model or "").strip().lower()
+    if not m:
+        return False
+    if m.startswith("deepseek-v") and not m.startswith("deepseek-v3"):
+        # deepseek-v4-*, deepseek-v5-*, etc. — every V4+ generation has
+        # thinking. v3 explicitly excluded.
+        return True
+    if m == "deepseek-reasoner":
+        return True
+    return False
+
+
+class DeepSeekProfile(ProviderProfile):
+    """DeepSeek — extra_body.thinking + top-level reasoning_effort."""
+
+    def build_api_kwargs_extras(
+        self, *, reasoning_config: dict | None = None, model: str | None = None, **context
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        extra_body: dict[str, Any] = {}
+        top_level: dict[str, Any] = {}
+
+        if not _model_supports_thinking(model):
+            # V3 / unknown — leave wire format untouched, current behavior.
+            return extra_body, top_level
+
+        # Determine enabled/disabled.  Default is enabled to match DeepSeek's
+        # API default; the API requires this to be set explicitly to avoid the
+        # reasoning_content echo trap on subsequent turns.
+        enabled = True
+        if isinstance(reasoning_config, dict) and reasoning_config.get("enabled") is False:
+            enabled = False
+
+        extra_body["thinking"] = {"type": "enabled" if enabled else "disabled"}
+
+        if not enabled:
+            return extra_body, top_level
+
+        # Effort mapping.  Pass low/medium/high through; xhigh/max → max.
+        # When no effort is set we omit reasoning_effort so DeepSeek applies
+        # its server default (currently high).
+        if isinstance(reasoning_config, dict):
+            effort = (reasoning_config.get("effort") or "").strip().lower()
+            if effort in ("xhigh", "max"):
+                top_level["reasoning_effort"] = "max"
+            elif effort in ("low", "medium", "high"):
+                top_level["reasoning_effort"] = effort
+
+        return extra_body, top_level
+
+
+deepseek = DeepSeekProfile(
    name="deepseek",
    aliases=("deepseek-chat",),
    env_vars=("DEEPSEEK_API_KEY",),