diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 850e16662..0bed60723 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -162,8 +162,12 @@ DEFAULT_CONTEXT_LENGTHS = { "gemma-4-31b": 256000, "gemma-3": 131072, "gemma": 8192, # fallback for older gemma models - # DeepSeek - "deepseek": 128000, + # DeepSeek — V4 family supports 1M context (api.deepseek.com docs) + "deepseek-v4-pro": 1000000, + "deepseek-v4-flash": 1000000, + "deepseek-chat": 1000000, + "deepseek-reasoner": 1000000, + "deepseek": 128000, # fallback for older/unrecognised DeepSeek models # Meta "llama": 131072, # Qwen — specific model families before the catch-all. diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 1cccf7e92..84b082a31 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -239,6 +239,38 @@ class ChatCompletionsTransport(ProviderTransport): "type": "enabled" if _kimi_thinking_enabled else "disabled", } + # DeepSeek: thinking mode toggle and effort mapping + is_deepseek = params.get("is_deepseek", False) + if is_deepseek: + # Legacy ``deepseek-chat`` is the non-thinking alias; the V4 + # family and ``deepseek-reasoner`` default to thinking mode. + _ds_default_thinking = model_lower != "deepseek-chat" + _ds_thinking_enabled = _ds_default_thinking + _ds_has_explicit_toggle = False + if reasoning_config and isinstance(reasoning_config, dict): + if reasoning_config.get("enabled") is False: + _ds_thinking_enabled = False + _ds_has_explicit_toggle = True + elif reasoning_config.get("enabled") is True or reasoning_config.get("effort"): + _ds_thinking_enabled = True + _ds_has_explicit_toggle = True + if _ds_thinking_enabled: + # DeepSeek only supports "high" and "max" effort values. + # Map low/medium/high → "high", xhigh/max → "max". + _ds_effort = "high" + if reasoning_config and isinstance(reasoning_config, dict): + _e = (reasoning_config.get("effort") or "").strip().lower() + if _e in ("xhigh", "max"): + _ds_effort = "max" + extra_body["thinking"] = {"type": "enabled", "budget_tokens": 8192} + api_kwargs["reasoning_effort"] = _ds_effort + # DeepSeek rejects temperature/top_p/presence_penalty/ + # frequency_penalty when thinking is enabled. + for _k in ("temperature", "top_p", "presence_penalty", "frequency_penalty"): + api_kwargs.pop(_k, None) + elif _ds_default_thinking or _ds_has_explicit_toggle: + extra_body["thinking"] = {"type": "disabled"} + # Reasoning if params.get("supports_reasoning", False): if is_github_models: @@ -347,7 +379,7 @@ class ChatCompletionsTransport(ProviderTransport): reasoning_content = getattr(msg, "reasoning_content", None) provider_data: Dict[str, Any] = {} - if reasoning_content: + if reasoning_content is not None: provider_data["reasoning_content"] = reasoning_content rd = getattr(msg, "reasoning_details", None) if rd: diff --git a/run_agent.py b/run_agent.py index f7a929118..1ac2df2a7 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2939,15 +2939,18 @@ class AIAgent: """ reasoning_parts = [] - # Check direct reasoning field - if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning: - reasoning_parts.append(assistant_message.reasoning) - + # Check direct reasoning field (isinstance guard: some providers + # return non-string values that are truthy but not valid reasoning) + reasoning_val = getattr(assistant_message, 'reasoning', None) + if isinstance(reasoning_val, str) and reasoning_val: + reasoning_parts.append(reasoning_val) + # Check reasoning_content field (alternative name used by some providers) - if hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content: + rc_val = getattr(assistant_message, 'reasoning_content', None) + if isinstance(rc_val, str) and rc_val: # Don't duplicate if same as reasoning - if assistant_message.reasoning_content not in reasoning_parts: - reasoning_parts.append(assistant_message.reasoning_content) + if rc_val not in reasoning_parts: + reasoning_parts.append(rc_val) # Check reasoning_details array (OpenRouter unified format) # Format: [{"type": "reasoning.summary", "summary": "...", ...}, ...] @@ -7406,6 +7409,7 @@ class AIAgent: or base_url_host_matches(self.base_url, "moonshot.ai") or base_url_host_matches(self.base_url, "moonshot.cn") ) + _is_deepseek = base_url_host_matches(self._base_url_lower, "api.deepseek.com") # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE # sentinel (temperature omitted entirely), a numeric override, or None. @@ -7474,6 +7478,7 @@ class AIAgent: is_github_models=_is_gh, is_nvidia_nim=_is_nvidia, is_kimi=_is_kimi, + is_deepseek=_is_deepseek, is_custom_provider=self.provider == "custom", ollama_num_ctx=self._ollama_num_ctx, provider_preferences=_prefs or None, @@ -7759,6 +7764,31 @@ class AIAgent: or self._needs_deepseek_tool_reasoning() ): api_msg["reasoning_content"] = "" + return + + # DeepSeek thinking mode requires reasoning_content on ALL assistant + # messages — not just tool_calls turns. Empty string is valid. + # + # Native DeepSeek keeps ``deepseek-chat`` as the legacy non-thinking + # alias, while V4 models and ``deepseek-reasoner`` default to + # thinking. Preserve that distinction so enabling native DeepSeek + # support does not silently change ``deepseek-chat`` semantics. + _model_lower = (self.model or "").lower() + _deepseek_native = base_url_host_matches(self.base_url, "api.deepseek.com") + _deepseek_openrouter = self._is_openrouter_url() and _model_lower.startswith("deepseek/") + if _deepseek_native or _deepseek_openrouter: + rc = self.reasoning_config if isinstance(self.reasoning_config, dict) else {} + if rc.get("enabled") is False: + return + _deepseek_requires_reasoning = _deepseek_openrouter + if _deepseek_native: + _deepseek_requires_reasoning = ( + _model_lower != "deepseek-chat" + or rc.get("enabled") is True + or bool(rc.get("effort")) + ) + if _deepseek_requires_reasoning: + api_msg["reasoning_content"] = "" @staticmethod def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict: @@ -9134,6 +9164,7 @@ class AIAgent: self._copy_reasoning_content_for_api(msg, api_msg) for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"): api_msg.pop(internal_field, None) + self._copy_reasoning_content_for_api(msg, api_msg) if _needs_sanitize: self._sanitize_tool_calls_for_strict_api(api_msg) api_messages.append(api_msg) diff --git a/tests/agent/test_deepseek_v4.py b/tests/agent/test_deepseek_v4.py new file mode 100644 index 000000000..e20496f91 --- /dev/null +++ b/tests/agent/test_deepseek_v4.py @@ -0,0 +1,322 @@ +"""Comprehensive tests for DeepSeek V4 support. + +Covers context windows, thinking mode toggle, effort mapping, +reasoning_content replay, and _extract_reasoning isinstance guards. + +Unifies test coverage from PRs #14952, #14958, #15325, #15228, #15354. +""" +import unittest +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS +from agent.transports.chat_completions import ChatCompletionsTransport + + +class TestDeepSeekV4ContextWindows(unittest.TestCase): + """V4 models should have 1M context entries in DEFAULT_CONTEXT_LENGTHS.""" + + def _lookup(self, model: str) -> int: + """Simulate the hardcoded default lookup (step 8 in get_model_context_length). + + Sorted by key length descending, finds first substring match. + """ + model_lower = model.lower() + for key, length in sorted( + DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True + ): + if key in model_lower: + return length + return 131072 # fallback + + def test_v4_pro_context(self): + self.assertEqual(self._lookup("deepseek-v4-pro"), 1000000) + + def test_v4_flash_context(self): + self.assertEqual(self._lookup("deepseek-v4-flash"), 1000000) + + def test_deepseek_chat_context(self): + self.assertEqual(self._lookup("deepseek-chat"), 1000000) + + def test_deepseek_reasoner_context(self): + self.assertEqual(self._lookup("deepseek-reasoner"), 1000000) + + def test_plain_deepseek_fallback(self): + """Unrecognised DeepSeek models should fall back to 128K.""" + self.assertEqual(self._lookup("deepseek-old-model"), 128000) + + def test_v4_with_vendor_prefix(self): + """Vendor-prefixed V4 model names should still match.""" + self.assertEqual(self._lookup("deepseek/deepseek-chat"), 1000000) + + def test_entries_present(self): + """All V4 entries must exist in the hardcoded defaults.""" + for key in ("deepseek-v4-pro", "deepseek-v4-flash", "deepseek-chat", "deepseek-reasoner"): + self.assertIn(key, DEFAULT_CONTEXT_LENGTHS, f"{key} missing from defaults") + self.assertEqual(DEFAULT_CONTEXT_LENGTHS[key], 1000000) + + +class TestDeepSeekThinkingMode(unittest.TestCase): + """Verify build_kwargs handles DeepSeek thinking mode correctly.""" + + def _build( + self, + reasoning_config=None, + is_deepseek=True, + model="deepseek-v4-pro", + fixed_temperature=0.7, + ): + transport = ChatCompletionsTransport.__new__(ChatCompletionsTransport) + kwargs = transport.build_kwargs( + model=model, + messages=[{"role": "user", "content": "Hello"}], + tools=None, + is_deepseek=is_deepseek, + reasoning_config=reasoning_config, + model_lower=model.lower(), + fixed_temperature=fixed_temperature, + ) + return kwargs + + def test_thinking_enabled_by_default(self): + """When no reasoning_config, thinking should be enabled.""" + kwargs = self._build() + extra = kwargs.get("extra_body", {}) + self.assertEqual(extra.get("thinking", {}).get("type"), "enabled") + + def test_thinking_disabled(self): + """When reasoning_config.enabled=False, thinking should be disabled.""" + kwargs = self._build(reasoning_config={"enabled": False}) + extra = kwargs.get("extra_body", {}) + self.assertEqual(extra.get("thinking", {}).get("type"), "disabled") + + def test_effort_low_maps_to_high(self): + kwargs = self._build(reasoning_config={"effort": "low"}) + self.assertEqual(kwargs.get("reasoning_effort"), "high") + + def test_effort_medium_maps_to_high(self): + kwargs = self._build(reasoning_config={"effort": "medium"}) + self.assertEqual(kwargs.get("reasoning_effort"), "high") + + def test_effort_high_maps_to_high(self): + kwargs = self._build(reasoning_config={"effort": "high"}) + self.assertEqual(kwargs.get("reasoning_effort"), "high") + + def test_effort_xhigh_maps_to_max(self): + kwargs = self._build(reasoning_config={"effort": "xhigh"}) + self.assertEqual(kwargs.get("reasoning_effort"), "max") + + def test_effort_max_maps_to_max(self): + kwargs = self._build(reasoning_config={"effort": "max"}) + self.assertEqual(kwargs.get("reasoning_effort"), "max") + + def test_temperature_stripped_when_thinking_enabled(self): + """DeepSeek rejects temperature when thinking is enabled.""" + kwargs = self._build(fixed_temperature=0.7) + self.assertNotIn("temperature", kwargs) + + def test_non_deepseek_not_affected(self): + """Non-DeepSeek models should not get thinking toggle.""" + kwargs = self._build(is_deepseek=False) + extra = kwargs.get("extra_body", {}) + self.assertNotIn("thinking", extra) + + def test_disabled_does_not_strip_temperature(self): + """When thinking is disabled, temperature should be preserved.""" + kwargs = self._build( + reasoning_config={"enabled": False}, + fixed_temperature=0.7, + ) + self.assertEqual(kwargs.get("temperature"), 0.7) + + def test_deepseek_chat_does_not_force_thinking(self): + """Legacy deepseek-chat should stay on its non-thinking default.""" + kwargs = self._build(model="deepseek-chat") + extra = kwargs.get("extra_body", {}) + self.assertNotIn("thinking", extra) + self.assertNotIn("reasoning_effort", kwargs) + self.assertEqual(kwargs.get("temperature"), 0.7) + + def test_deepseek_chat_can_opt_in_to_thinking(self): + """Explicit reasoning config should enable thinking for deepseek-chat.""" + kwargs = self._build( + model="deepseek-chat", + reasoning_config={"enabled": True, "effort": "xhigh"}, + fixed_temperature=0.7, + ) + extra = kwargs.get("extra_body", {}) + self.assertEqual(extra.get("thinking", {}).get("type"), "enabled") + self.assertEqual(kwargs.get("reasoning_effort"), "max") + self.assertNotIn("temperature", kwargs) + + +class TestDeepSeekReasoningContentReplay(unittest.TestCase): + """Verify _copy_reasoning_content_for_api handles DeepSeek correctly.""" + + def _make_agent(self, base_url="https://api.deepseek.com/v1", model="deepseek-v4-pro", reasoning_config=None): + agent = MagicMock() + agent.base_url = base_url + agent._base_url_lower = base_url.lower() + agent.model = model + agent.provider = "deepseek" + agent.reasoning_config = reasoning_config + agent._is_openrouter_url = MagicMock(return_value="openrouter" in base_url.lower()) + from run_agent import AIAgent + agent._copy_reasoning_content_for_api = AIAgent._copy_reasoning_content_for_api.__get__(agent) + return agent + + def test_deepseek_injects_empty_reasoning_content(self): + """DeepSeek should inject reasoning_content='' on all assistant messages.""" + agent = self._make_agent() + api_msg = {} + agent._copy_reasoning_content_for_api( + {"role": "assistant", "content": "Hello"}, + api_msg, + ) + self.assertEqual(api_msg.get("reasoning_content"), "") + + def test_deepseek_openrouter_injects(self): + """OpenRouter-routed DeepSeek should also inject.""" + agent = self._make_agent( + base_url="https://openrouter.ai/api/v1", + model="deepseek/deepseek-chat", + ) + api_msg = {} + agent._copy_reasoning_content_for_api( + {"role": "assistant", "content": "Hi"}, + api_msg, + ) + self.assertEqual(api_msg.get("reasoning_content"), "") + + def test_non_deepseek_no_injection(self): + """Non-DeepSeek provider should not inject reasoning_content.""" + agent = self._make_agent( + base_url="https://api.openai.com/v1", + model="gpt-4o", + ) + api_msg = {} + agent._copy_reasoning_content_for_api( + {"role": "assistant", "content": "Hi"}, + api_msg, + ) + self.assertNotIn("reasoning_content", api_msg) + + def test_explicit_reasoning_preserved(self): + """When source message has explicit reasoning_content, it should be preserved.""" + agent = self._make_agent() + api_msg = {} + agent._copy_reasoning_content_for_api( + {"role": "assistant", "content": "Hi", "reasoning_content": "I thought about it"}, + api_msg, + ) + self.assertEqual(api_msg["reasoning_content"], "I thought about it") + + def test_thinking_disabled_skips_injection(self): + """When thinking is explicitly disabled, don't inject.""" + agent = self._make_agent(reasoning_config={"enabled": False}) + api_msg = {} + agent._copy_reasoning_content_for_api( + {"role": "assistant", "content": "Hi"}, + api_msg, + ) + self.assertNotIn("reasoning_content", api_msg) + + def test_native_deepseek_chat_does_not_inject_by_default(self): + """Legacy non-thinking deepseek-chat should not replay reasoning_content.""" + agent = self._make_agent(model="deepseek-chat") + api_msg = {} + agent._copy_reasoning_content_for_api( + {"role": "assistant", "content": "Hi"}, + api_msg, + ) + self.assertNotIn("reasoning_content", api_msg) + + def test_native_deepseek_chat_injects_when_enabled(self): + """deepseek-chat should replay reasoning_content once thinking is enabled.""" + agent = self._make_agent( + model="deepseek-chat", + reasoning_config={"enabled": True, "effort": "high"}, + ) + api_msg = {} + agent._copy_reasoning_content_for_api( + {"role": "assistant", "content": "Hi"}, + api_msg, + ) + self.assertEqual(api_msg.get("reasoning_content"), "") + + def test_non_assistant_skipped(self): + """Non-assistant messages should be skipped entirely.""" + agent = self._make_agent() + api_msg = {} + agent._copy_reasoning_content_for_api( + {"role": "user", "content": "Hi"}, + api_msg, + ) + self.assertNotIn("reasoning_content", api_msg) + + +class TestExtractReasoningIsinstance(unittest.TestCase): + """Verify _extract_reasoning uses isinstance checks.""" + + def _extract(self, **attrs): + from run_agent import AIAgent + agent = MagicMock(spec=AIAgent) + agent._extract_reasoning = AIAgent._extract_reasoning.__get__(agent) + msg = SimpleNamespace(**attrs) + return agent._extract_reasoning(msg) + + def test_valid_string_reasoning(self): + result = self._extract(reasoning="I think therefore I am") + self.assertIn("I think therefore I am", result) + + def test_empty_string_reasoning_skipped(self): + """Empty string reasoning should not be extracted.""" + result = self._extract(reasoning="") + self.assertIsNone(result) + + def test_non_string_reasoning_skipped(self): + """Non-string reasoning (e.g. int, list) should not crash or extract.""" + result = self._extract(reasoning=42) + self.assertIsNone(result) + + def test_valid_reasoning_content(self): + result = self._extract(reasoning_content="Deep thought") + self.assertIn("Deep thought", result) + + def test_empty_reasoning_content_skipped(self): + result = self._extract(reasoning_content="") + self.assertIsNone(result) + + def test_non_string_reasoning_content_skipped(self): + result = self._extract(reasoning_content=["not", "a", "string"]) + self.assertIsNone(result) + + +class TestReasoningContentNormalization(unittest.TestCase): + """Verify normalize_response preserves empty-string reasoning_content.""" + + def test_empty_string_reasoning_content_preserved(self): + """Empty string reasoning_content should be preserved in provider_data.""" + transport = ChatCompletionsTransport.__new__(ChatCompletionsTransport) + + msg = SimpleNamespace( + role="assistant", + content="Hello", + tool_calls=None, + refusal=None, + reasoning=None, + reasoning_content="", + reasoning_details=None, + ) + choice = SimpleNamespace(index=0, message=msg, finish_reason="stop") + response = SimpleNamespace( + id="resp_1", + choices=[choice], + usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15), + model="deepseek-v4-pro", + ) + + result = transport.normalize_response(response) + # Empty string should be preserved (not dropped by truthy check) + self.assertIn("reasoning_content", result.provider_data) + self.assertEqual(result.provider_data["reasoning_content"], "")