feat(deepseek): add thinking.type + reasoning_effort mapping for DeepSeek API

DeepSeek's thinking mode requires both: - extra_body.thinking.type: "enabled" to activate thinking mode - top-level reasoning_effort: "max" or "high" to control depth Previously, the ChatCompletionsTransport only handled Kimi's thinking mode — DeepSeek was left unmapped, so reasoning_effort config was silently dropped. This patch: 1. Adds is_deepseek: bool to the Params dataclass, detected by base_url matching api.deepseek.com 2. Maps Hermes effort levels (xhigh/max → "max", low/medium/high → themselves) to the top-level reasoning_effort parameter 3. Sets extra_body.thinking.type alongside the effort 4. Strips reasoning_content from assistant messages sent back to DeepSeek, preventing 400 errors when thinking was enabled
2026-07-02 12:13:05 +00:00 · 2026-04-25 00:46:10 +08:00 · 2026-04-25 00:46:10 +08:00 · 068c24f8a4
commit 068c24f8a4
parent 31ba2b0cbc
2 changed files with 27 additions and 0 deletions
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@ -189,6 +189,7 @@ class ChatCompletionsTransport(ProviderTransport):
            is_kimi: bool
            is_tokenhub: bool
            is_lmstudio: bool
+            is_deepseek: bool
            is_custom_provider: bool
            ollama_num_ctx: int | None
            # Provider routing
@ -348,6 +349,25 @@ class ChatCompletionsTransport(ProviderTransport):
                "type": "enabled" if _kimi_thinking_enabled else "disabled",
            }

+        # DeepSeek extra_body.thinking + top-level reasoning_effort
+        is_deepseek = params.get("is_deepseek", False)
+        if is_deepseek:
+            _ds_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _ds_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _ds_thinking_enabled else "disabled",
+            }
+            # DeepSeek effort: low/medium→high, high→high, xhigh/max→max
+            if _ds_thinking_enabled and reasoning_config:
+                _e = (reasoning_config.get("effort") or "").strip().lower()
+                if _e in ("xhigh", "max"):
+                    api_kwargs["reasoning_effort"] = "max"
+                elif _e in ("low", "medium", "high"):
+                    api_kwargs["reasoning_effort"] = _e
+            # If no effort configured, don't set it → DeepSeek defaults to high
+
        # Reasoning. LM Studio is handled above via top-level reasoning_effort,
        # so skip emitting extra_body.reasoning for it.
        if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
--- a/run_agent.py
+++ b/run_agent.py
@ -9798,6 +9798,7 @@ class AIAgent:
        )
        _is_tokenhub = base_url_host_matches(self._base_url_lower, "tokenhub.tencentmaas.com")
        _is_lmstudio = (self.provider or "").strip().lower() == "lmstudio"
+        _is_deepseek = base_url_host_matches(self.base_url, "api.deepseek.com")

        # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
        # sentinel (temperature omitted entirely), a numeric override, or None.
@ -9909,6 +9910,7 @@ class AIAgent:
            is_kimi=_is_kimi,
            is_tokenhub=_is_tokenhub,
            is_lmstudio=_is_lmstudio,
+            is_deepseek=_is_deepseek,
            is_custom_provider=self.provider == "custom",
            ollama_num_ctx=self._ollama_num_ctx,
            provider_preferences=_prefs or None,
@ -10368,6 +10370,11 @@ class AIAgent:
        # context compaction).  Don't pass null to the API.
        api_msg.pop("reasoning_content", None)

+        # DeepSeek: strip reasoning_content on all assistant messages so the API
+        # doesn't return 400 when the model was invoked with thinking enabled.
+        if base_url_host_matches(self.base_url, "api.deepseek.com"):
+            api_msg.pop("reasoning_content", None)
+
    @staticmethod
    def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
        """Strip Codex Responses API fields from tool_calls for strict providers.