diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 1ae584e9159..7edb69e42c7 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -189,7 +189,6 @@ class ChatCompletionsTransport(ProviderTransport): is_kimi: bool is_tokenhub: bool is_lmstudio: bool - is_deepseek: bool is_custom_provider: bool ollama_num_ctx: int | None # Provider routing @@ -349,25 +348,6 @@ class ChatCompletionsTransport(ProviderTransport): "type": "enabled" if _kimi_thinking_enabled else "disabled", } - # DeepSeek extra_body.thinking + top-level reasoning_effort - is_deepseek = params.get("is_deepseek", False) - if is_deepseek: - _ds_thinking_enabled = True - if reasoning_config and isinstance(reasoning_config, dict): - if reasoning_config.get("enabled") is False: - _ds_thinking_enabled = False - extra_body["thinking"] = { - "type": "enabled" if _ds_thinking_enabled else "disabled", - } - # DeepSeek effort: low/medium→high, high→high, xhigh/max→max - if _ds_thinking_enabled and reasoning_config: - _e = (reasoning_config.get("effort") or "").strip().lower() - if _e in ("xhigh", "max"): - api_kwargs["reasoning_effort"] = "max" - elif _e in ("low", "medium", "high"): - api_kwargs["reasoning_effort"] = _e - # If no effort configured, don't set it → DeepSeek defaults to high - # Reasoning. LM Studio is handled above via top-level reasoning_effort, # so skip emitting extra_body.reasoning for it. if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False): diff --git a/plugins/model-providers/deepseek/__init__.py b/plugins/model-providers/deepseek/__init__.py index 59d738f50fb..f67146df113 100644 --- a/plugins/model-providers/deepseek/__init__.py +++ b/plugins/model-providers/deepseek/__init__.py @@ -1,9 +1,88 @@ -"""DeepSeek provider profile.""" +"""DeepSeek provider profile. + +DeepSeek's V4 family (and the legacy ``deepseek-reasoner``) defaults to +thinking-mode ON when ``extra_body.thinking`` is unset. The API then returns +``reasoning_content`` and starts enforcing the contract that subsequent turns +echo it back; combined with how Hermes replays history this lands on the +notorious HTTP 400 ``reasoning_content must be passed back`` error after the +first tool call (#15700, #17212, #17825). + +This profile overrides :meth:`build_api_kwargs_extras` to mirror the Kimi / +Moonshot wire shape that DeepSeek's OpenAI-compat endpoint expects: + + {"reasoning_effort": "", + "extra_body": {"thinking": {"type": "enabled" | "disabled"}}} + +Non-thinking models (only ``deepseek-chat`` today, which is V3) are left as +no-ops so we don't perturb the V3 wire format. +""" + +from __future__ import annotations + +from typing import Any from providers import register_provider from providers.base import ProviderProfile -deepseek = ProviderProfile( + +def _model_supports_thinking(model: str | None) -> bool: + """DeepSeek thinking-capable model families. + + Currently covers the V4 family (``deepseek-v4-pro``, ``deepseek-v4-flash``, + and any future ``deepseek-v4-*`` variants) and the legacy + ``deepseek-reasoner`` (R1). ``deepseek-chat`` is V3 with no thinking mode. + """ + m = (model or "").strip().lower() + if not m: + return False + if m.startswith("deepseek-v") and not m.startswith("deepseek-v3"): + # deepseek-v4-*, deepseek-v5-*, etc. — every V4+ generation has + # thinking. v3 explicitly excluded. + return True + if m == "deepseek-reasoner": + return True + return False + + +class DeepSeekProfile(ProviderProfile): + """DeepSeek — extra_body.thinking + top-level reasoning_effort.""" + + def build_api_kwargs_extras( + self, *, reasoning_config: dict | None = None, model: str | None = None, **context + ) -> tuple[dict[str, Any], dict[str, Any]]: + extra_body: dict[str, Any] = {} + top_level: dict[str, Any] = {} + + if not _model_supports_thinking(model): + # V3 / unknown — leave wire format untouched, current behavior. + return extra_body, top_level + + # Determine enabled/disabled. Default is enabled to match DeepSeek's + # API default; the API requires this to be set explicitly to avoid the + # reasoning_content echo trap on subsequent turns. + enabled = True + if isinstance(reasoning_config, dict) and reasoning_config.get("enabled") is False: + enabled = False + + extra_body["thinking"] = {"type": "enabled" if enabled else "disabled"} + + if not enabled: + return extra_body, top_level + + # Effort mapping. Pass low/medium/high through; xhigh/max → max. + # When no effort is set we omit reasoning_effort so DeepSeek applies + # its server default (currently high). + if isinstance(reasoning_config, dict): + effort = (reasoning_config.get("effort") or "").strip().lower() + if effort in ("xhigh", "max"): + top_level["reasoning_effort"] = "max" + elif effort in ("low", "medium", "high"): + top_level["reasoning_effort"] = effort + + return extra_body, top_level + + +deepseek = DeepSeekProfile( name="deepseek", aliases=("deepseek-chat",), env_vars=("DEEPSEEK_API_KEY",), diff --git a/run_agent.py b/run_agent.py index c9aa3157170..2b20d48ede2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9798,7 +9798,6 @@ class AIAgent: ) _is_tokenhub = base_url_host_matches(self._base_url_lower, "tokenhub.tencentmaas.com") _is_lmstudio = (self.provider or "").strip().lower() == "lmstudio" - _is_deepseek = base_url_host_matches(self.base_url, "api.deepseek.com") # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE # sentinel (temperature omitted entirely), a numeric override, or None. @@ -9910,7 +9909,6 @@ class AIAgent: is_kimi=_is_kimi, is_tokenhub=_is_tokenhub, is_lmstudio=_is_lmstudio, - is_deepseek=_is_deepseek, is_custom_provider=self.provider == "custom", ollama_num_ctx=self._ollama_num_ctx, provider_preferences=_prefs or None, @@ -10370,11 +10368,6 @@ class AIAgent: # context compaction). Don't pass null to the API. api_msg.pop("reasoning_content", None) - # DeepSeek: strip reasoning_content on all assistant messages so the API - # doesn't return 400 when the model was invoked with thinking enabled. - if base_url_host_matches(self.base_url, "api.deepseek.com"): - api_msg.pop("reasoning_content", None) - @staticmethod def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict: """Strip Codex Responses API fields from tool_calls for strict providers. diff --git a/scripts/release.py b/scripts/release.py index aafa626329e..6084e0754c0 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -73,6 +73,7 @@ AUTHOR_MAP = { "teknium@nousresearch.com": "teknium1", "piyushvp1@gmail.com": "thelumiereguy", "421774554@qq.com": "wuli666", + "twebefy@gmail.com": "tw2818", "harish.kukreja@gmail.com": "counterposition", "korkyzer@gmail.com": "Korkyzer", "1046611633@qq.com": "zhengyn0001", diff --git a/tests/plugins/model_providers/test_deepseek_profile.py b/tests/plugins/model_providers/test_deepseek_profile.py new file mode 100644 index 00000000000..c53e70070a8 --- /dev/null +++ b/tests/plugins/model_providers/test_deepseek_profile.py @@ -0,0 +1,184 @@ +"""Unit tests for the DeepSeek provider profile's thinking-mode wiring. + +DeepSeek V4 (and the legacy ``deepseek-reasoner``) expects every request to +carry an explicit ``extra_body.thinking`` parameter. Omitting it makes the +server default to thinking-mode ON, which then enforces the +``reasoning_content``-must-be-echoed-back contract on subsequent turns and +breaks the conversation with HTTP 400 (#15700, #17212, #17825). + +These tests pin the profile's wire-shape contract so DeepSeek requests stay +correctly shaped without going live. +""" + +from __future__ import annotations + +import pytest + + +@pytest.fixture +def deepseek_profile(): + """Resolve the registered DeepSeek profile. + + Going through ``providers.get_provider_profile`` keeps the test honest — + if someone later replaces the registered class with a plain + ``ProviderProfile``, every assertion below collapses. + """ + # ``model_tools`` triggers plugin discovery on import, which is what + # registers the DeepSeek profile in the global provider registry. + import model_tools # noqa: F401 + import providers + + profile = providers.get_provider_profile("deepseek") + assert profile is not None, "deepseek provider profile must be registered" + return profile + + +class TestDeepSeekThinkingWireShape: + """``build_api_kwargs_extras`` produces DeepSeek's exact wire format.""" + + def test_v4_pro_default_enables_thinking_without_effort(self, deepseek_profile): + """No reasoning_config → thinking enabled, server picks default effort.""" + extra_body, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config=None, model="deepseek-v4-pro" + ) + assert extra_body == {"thinking": {"type": "enabled"}} + assert top_level == {} + + def test_v4_pro_enabled_with_high_effort(self, deepseek_profile): + extra_body, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "high"}, + model="deepseek-v4-pro", + ) + assert extra_body == {"thinking": {"type": "enabled"}} + assert top_level == {"reasoning_effort": "high"} + + @pytest.mark.parametrize("effort", ["low", "medium", "high"]) + def test_standard_efforts_pass_through(self, deepseek_profile, effort): + _, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": effort}, + model="deepseek-v4-pro", + ) + assert top_level == {"reasoning_effort": effort} + + @pytest.mark.parametrize("effort", ["xhigh", "max", "MAX", " Max "]) + def test_xhigh_and_max_normalize_to_max(self, deepseek_profile, effort): + _, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": effort}, + model="deepseek-v4-pro", + ) + assert top_level == {"reasoning_effort": "max"} + + def test_explicitly_disabled_sends_disabled_marker(self, deepseek_profile): + """``reasoning_config.enabled=False`` → ``thinking.type=disabled``. + + The crucial bit is that the parameter is *sent* at all — DeepSeek + defaults to thinking-on when ``thinking`` is absent. + """ + extra_body, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": False}, model="deepseek-v4-pro" + ) + assert extra_body == {"thinking": {"type": "disabled"}} + # No effort when disabled — DeepSeek rejects it. + assert top_level == {} + + def test_disabled_ignores_effort_field(self, deepseek_profile): + """Effort silently dropped when thinking is off.""" + _, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": False, "effort": "high"}, + model="deepseek-v4-pro", + ) + assert top_level == {} + + def test_unknown_effort_omits_top_level(self, deepseek_profile): + """Garbage effort → omit reasoning_effort so DeepSeek applies its default.""" + _, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "garbage"}, + model="deepseek-v4-pro", + ) + assert top_level == {} + + def test_empty_effort_omits_top_level(self, deepseek_profile): + _, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": ""}, + model="deepseek-v4-pro", + ) + assert top_level == {} + + +class TestDeepSeekModelGating: + """V4 family + ``deepseek-reasoner`` get thinking; V3 stays untouched.""" + + @pytest.mark.parametrize( + "model", + [ + "deepseek-v4-pro", + "deepseek-v4-flash", + "deepseek-v4-future-variant", + "deepseek-reasoner", + "DEEPSEEK-V4-PRO", # case-insensitive + ], + ) + def test_thinking_capable_models_emit_thinking(self, deepseek_profile, model): + extra_body, _ = deepseek_profile.build_api_kwargs_extras( + reasoning_config=None, model=model + ) + assert extra_body == {"thinking": {"type": "enabled"}} + + @pytest.mark.parametrize( + "model", + [ + "deepseek-chat", # V3 alias + "deepseek-v3-0324", # explicit V3 + "deepseek-v3.1", # V3 minor revisions + "", # bare/unknown + None, # missing + "deepseek-unknown", # unrecognized + ], + ) + def test_non_thinking_models_emit_nothing(self, deepseek_profile, model): + extra_body, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "high"}, model=model + ) + assert extra_body == {} + assert top_level == {} + + +class TestDeepSeekFullKwargsIntegration: + """End-to-end: the transport's full kwargs match DeepSeek's live wire format. + + The live test harness in ``tests/run_agent/test_deepseek_v4_thinking_live.py`` + sends ``{"reasoning_effort": "high", "extra_body": {"thinking": {"type": + "enabled"}}}``. Confirm the transport produces that exact shape when wired + through the registered DeepSeek profile. + """ + + def test_full_kwargs_match_live_wire_shape(self, deepseek_profile): + from agent.transports.chat_completions import ChatCompletionsTransport + + kwargs = ChatCompletionsTransport().build_kwargs( + model="deepseek-v4-pro", + messages=[{"role": "user", "content": "ping"}], + tools=None, + provider_profile=deepseek_profile, + reasoning_config={"enabled": True, "effort": "high"}, + base_url="https://api.deepseek.com/v1", + provider_name="deepseek", + ) + assert kwargs["model"] == "deepseek-v4-pro" + assert kwargs["reasoning_effort"] == "high" + assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}} + + def test_v3_chat_full_kwargs_omit_thinking(self, deepseek_profile): + from agent.transports.chat_completions import ChatCompletionsTransport + + kwargs = ChatCompletionsTransport().build_kwargs( + model="deepseek-chat", + messages=[{"role": "user", "content": "ping"}], + tools=None, + provider_profile=deepseek_profile, + reasoning_config={"enabled": True, "effort": "high"}, + base_url="https://api.deepseek.com/v1", + provider_name="deepseek", + ) + assert "reasoning_effort" not in kwargs + assert "extra_body" not in kwargs or "thinking" not in kwargs.get("extra_body", {})