This commit is contained in:
Evi Nova 2026-04-25 10:34:05 +10:00 committed by GitHub
commit d9144d3c52
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 399 additions and 10 deletions

View file

@ -162,8 +162,12 @@ DEFAULT_CONTEXT_LENGTHS = {
"gemma-4-31b": 256000,
"gemma-3": 131072,
"gemma": 8192, # fallback for older gemma models
# DeepSeek
"deepseek": 128000,
# DeepSeek — V4 family supports 1M context (api.deepseek.com docs)
"deepseek-v4-pro": 1000000,
"deepseek-v4-flash": 1000000,
"deepseek-chat": 1000000,
"deepseek-reasoner": 1000000,
"deepseek": 128000, # fallback for older/unrecognised DeepSeek models
# Meta
"llama": 131072,
# Qwen — specific model families before the catch-all.

View file

@ -239,6 +239,38 @@ class ChatCompletionsTransport(ProviderTransport):
"type": "enabled" if _kimi_thinking_enabled else "disabled",
}
# DeepSeek: thinking mode toggle and effort mapping
is_deepseek = params.get("is_deepseek", False)
if is_deepseek:
# Legacy ``deepseek-chat`` is the non-thinking alias; the V4
# family and ``deepseek-reasoner`` default to thinking mode.
_ds_default_thinking = model_lower != "deepseek-chat"
_ds_thinking_enabled = _ds_default_thinking
_ds_has_explicit_toggle = False
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is False:
_ds_thinking_enabled = False
_ds_has_explicit_toggle = True
elif reasoning_config.get("enabled") is True or reasoning_config.get("effort"):
_ds_thinking_enabled = True
_ds_has_explicit_toggle = True
if _ds_thinking_enabled:
# DeepSeek only supports "high" and "max" effort values.
# Map low/medium/high → "high", xhigh/max → "max".
_ds_effort = "high"
if reasoning_config and isinstance(reasoning_config, dict):
_e = (reasoning_config.get("effort") or "").strip().lower()
if _e in ("xhigh", "max"):
_ds_effort = "max"
extra_body["thinking"] = {"type": "enabled", "budget_tokens": 8192}
api_kwargs["reasoning_effort"] = _ds_effort
# DeepSeek rejects temperature/top_p/presence_penalty/
# frequency_penalty when thinking is enabled.
for _k in ("temperature", "top_p", "presence_penalty", "frequency_penalty"):
api_kwargs.pop(_k, None)
elif _ds_default_thinking or _ds_has_explicit_toggle:
extra_body["thinking"] = {"type": "disabled"}
# Reasoning
if params.get("supports_reasoning", False):
if is_github_models:
@ -347,7 +379,7 @@ class ChatCompletionsTransport(ProviderTransport):
reasoning_content = getattr(msg, "reasoning_content", None)
provider_data: Dict[str, Any] = {}
if reasoning_content:
if reasoning_content is not None:
provider_data["reasoning_content"] = reasoning_content
rd = getattr(msg, "reasoning_details", None)
if rd:

View file

@ -2939,15 +2939,18 @@ class AIAgent:
"""
reasoning_parts = []
# Check direct reasoning field
if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
reasoning_parts.append(assistant_message.reasoning)
# Check direct reasoning field (isinstance guard: some providers
# return non-string values that are truthy but not valid reasoning)
reasoning_val = getattr(assistant_message, 'reasoning', None)
if isinstance(reasoning_val, str) and reasoning_val:
reasoning_parts.append(reasoning_val)
# Check reasoning_content field (alternative name used by some providers)
if hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
rc_val = getattr(assistant_message, 'reasoning_content', None)
if isinstance(rc_val, str) and rc_val:
# Don't duplicate if same as reasoning
if assistant_message.reasoning_content not in reasoning_parts:
reasoning_parts.append(assistant_message.reasoning_content)
if rc_val not in reasoning_parts:
reasoning_parts.append(rc_val)
# Check reasoning_details array (OpenRouter unified format)
# Format: [{"type": "reasoning.summary", "summary": "...", ...}, ...]
@ -7406,6 +7409,7 @@ class AIAgent:
or base_url_host_matches(self.base_url, "moonshot.ai")
or base_url_host_matches(self.base_url, "moonshot.cn")
)
_is_deepseek = base_url_host_matches(self._base_url_lower, "api.deepseek.com")
# Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
# sentinel (temperature omitted entirely), a numeric override, or None.
@ -7474,6 +7478,7 @@ class AIAgent:
is_github_models=_is_gh,
is_nvidia_nim=_is_nvidia,
is_kimi=_is_kimi,
is_deepseek=_is_deepseek,
is_custom_provider=self.provider == "custom",
ollama_num_ctx=self._ollama_num_ctx,
provider_preferences=_prefs or None,
@ -7759,6 +7764,31 @@ class AIAgent:
or self._needs_deepseek_tool_reasoning()
):
api_msg["reasoning_content"] = ""
return
# DeepSeek thinking mode requires reasoning_content on ALL assistant
# messages — not just tool_calls turns. Empty string is valid.
#
# Native DeepSeek keeps ``deepseek-chat`` as the legacy non-thinking
# alias, while V4 models and ``deepseek-reasoner`` default to
# thinking. Preserve that distinction so enabling native DeepSeek
# support does not silently change ``deepseek-chat`` semantics.
_model_lower = (self.model or "").lower()
_deepseek_native = base_url_host_matches(self.base_url, "api.deepseek.com")
_deepseek_openrouter = self._is_openrouter_url() and _model_lower.startswith("deepseek/")
if _deepseek_native or _deepseek_openrouter:
rc = self.reasoning_config if isinstance(self.reasoning_config, dict) else {}
if rc.get("enabled") is False:
return
_deepseek_requires_reasoning = _deepseek_openrouter
if _deepseek_native:
_deepseek_requires_reasoning = (
_model_lower != "deepseek-chat"
or rc.get("enabled") is True
or bool(rc.get("effort"))
)
if _deepseek_requires_reasoning:
api_msg["reasoning_content"] = ""
@staticmethod
def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
@ -9134,6 +9164,7 @@ class AIAgent:
self._copy_reasoning_content_for_api(msg, api_msg)
for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
api_msg.pop(internal_field, None)
self._copy_reasoning_content_for_api(msg, api_msg)
if _needs_sanitize:
self._sanitize_tool_calls_for_strict_api(api_msg)
api_messages.append(api_msg)

View file

@ -0,0 +1,322 @@
"""Comprehensive tests for DeepSeek V4 support.
Covers context windows, thinking mode toggle, effort mapping,
reasoning_content replay, and _extract_reasoning isinstance guards.
Unifies test coverage from PRs #14952, #14958, #15325, #15228, #15354.
"""
import unittest
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
from agent.transports.chat_completions import ChatCompletionsTransport
class TestDeepSeekV4ContextWindows(unittest.TestCase):
"""V4 models should have 1M context entries in DEFAULT_CONTEXT_LENGTHS."""
def _lookup(self, model: str) -> int:
"""Simulate the hardcoded default lookup (step 8 in get_model_context_length).
Sorted by key length descending, finds first substring match.
"""
model_lower = model.lower()
for key, length in sorted(
DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True
):
if key in model_lower:
return length
return 131072 # fallback
def test_v4_pro_context(self):
self.assertEqual(self._lookup("deepseek-v4-pro"), 1000000)
def test_v4_flash_context(self):
self.assertEqual(self._lookup("deepseek-v4-flash"), 1000000)
def test_deepseek_chat_context(self):
self.assertEqual(self._lookup("deepseek-chat"), 1000000)
def test_deepseek_reasoner_context(self):
self.assertEqual(self._lookup("deepseek-reasoner"), 1000000)
def test_plain_deepseek_fallback(self):
"""Unrecognised DeepSeek models should fall back to 128K."""
self.assertEqual(self._lookup("deepseek-old-model"), 128000)
def test_v4_with_vendor_prefix(self):
"""Vendor-prefixed V4 model names should still match."""
self.assertEqual(self._lookup("deepseek/deepseek-chat"), 1000000)
def test_entries_present(self):
"""All V4 entries must exist in the hardcoded defaults."""
for key in ("deepseek-v4-pro", "deepseek-v4-flash", "deepseek-chat", "deepseek-reasoner"):
self.assertIn(key, DEFAULT_CONTEXT_LENGTHS, f"{key} missing from defaults")
self.assertEqual(DEFAULT_CONTEXT_LENGTHS[key], 1000000)
class TestDeepSeekThinkingMode(unittest.TestCase):
"""Verify build_kwargs handles DeepSeek thinking mode correctly."""
def _build(
self,
reasoning_config=None,
is_deepseek=True,
model="deepseek-v4-pro",
fixed_temperature=0.7,
):
transport = ChatCompletionsTransport.__new__(ChatCompletionsTransport)
kwargs = transport.build_kwargs(
model=model,
messages=[{"role": "user", "content": "Hello"}],
tools=None,
is_deepseek=is_deepseek,
reasoning_config=reasoning_config,
model_lower=model.lower(),
fixed_temperature=fixed_temperature,
)
return kwargs
def test_thinking_enabled_by_default(self):
"""When no reasoning_config, thinking should be enabled."""
kwargs = self._build()
extra = kwargs.get("extra_body", {})
self.assertEqual(extra.get("thinking", {}).get("type"), "enabled")
def test_thinking_disabled(self):
"""When reasoning_config.enabled=False, thinking should be disabled."""
kwargs = self._build(reasoning_config={"enabled": False})
extra = kwargs.get("extra_body", {})
self.assertEqual(extra.get("thinking", {}).get("type"), "disabled")
def test_effort_low_maps_to_high(self):
kwargs = self._build(reasoning_config={"effort": "low"})
self.assertEqual(kwargs.get("reasoning_effort"), "high")
def test_effort_medium_maps_to_high(self):
kwargs = self._build(reasoning_config={"effort": "medium"})
self.assertEqual(kwargs.get("reasoning_effort"), "high")
def test_effort_high_maps_to_high(self):
kwargs = self._build(reasoning_config={"effort": "high"})
self.assertEqual(kwargs.get("reasoning_effort"), "high")
def test_effort_xhigh_maps_to_max(self):
kwargs = self._build(reasoning_config={"effort": "xhigh"})
self.assertEqual(kwargs.get("reasoning_effort"), "max")
def test_effort_max_maps_to_max(self):
kwargs = self._build(reasoning_config={"effort": "max"})
self.assertEqual(kwargs.get("reasoning_effort"), "max")
def test_temperature_stripped_when_thinking_enabled(self):
"""DeepSeek rejects temperature when thinking is enabled."""
kwargs = self._build(fixed_temperature=0.7)
self.assertNotIn("temperature", kwargs)
def test_non_deepseek_not_affected(self):
"""Non-DeepSeek models should not get thinking toggle."""
kwargs = self._build(is_deepseek=False)
extra = kwargs.get("extra_body", {})
self.assertNotIn("thinking", extra)
def test_disabled_does_not_strip_temperature(self):
"""When thinking is disabled, temperature should be preserved."""
kwargs = self._build(
reasoning_config={"enabled": False},
fixed_temperature=0.7,
)
self.assertEqual(kwargs.get("temperature"), 0.7)
def test_deepseek_chat_does_not_force_thinking(self):
"""Legacy deepseek-chat should stay on its non-thinking default."""
kwargs = self._build(model="deepseek-chat")
extra = kwargs.get("extra_body", {})
self.assertNotIn("thinking", extra)
self.assertNotIn("reasoning_effort", kwargs)
self.assertEqual(kwargs.get("temperature"), 0.7)
def test_deepseek_chat_can_opt_in_to_thinking(self):
"""Explicit reasoning config should enable thinking for deepseek-chat."""
kwargs = self._build(
model="deepseek-chat",
reasoning_config={"enabled": True, "effort": "xhigh"},
fixed_temperature=0.7,
)
extra = kwargs.get("extra_body", {})
self.assertEqual(extra.get("thinking", {}).get("type"), "enabled")
self.assertEqual(kwargs.get("reasoning_effort"), "max")
self.assertNotIn("temperature", kwargs)
class TestDeepSeekReasoningContentReplay(unittest.TestCase):
"""Verify _copy_reasoning_content_for_api handles DeepSeek correctly."""
def _make_agent(self, base_url="https://api.deepseek.com/v1", model="deepseek-v4-pro", reasoning_config=None):
agent = MagicMock()
agent.base_url = base_url
agent._base_url_lower = base_url.lower()
agent.model = model
agent.provider = "deepseek"
agent.reasoning_config = reasoning_config
agent._is_openrouter_url = MagicMock(return_value="openrouter" in base_url.lower())
from run_agent import AIAgent
agent._copy_reasoning_content_for_api = AIAgent._copy_reasoning_content_for_api.__get__(agent)
return agent
def test_deepseek_injects_empty_reasoning_content(self):
"""DeepSeek should inject reasoning_content='' on all assistant messages."""
agent = self._make_agent()
api_msg = {}
agent._copy_reasoning_content_for_api(
{"role": "assistant", "content": "Hello"},
api_msg,
)
self.assertEqual(api_msg.get("reasoning_content"), "")
def test_deepseek_openrouter_injects(self):
"""OpenRouter-routed DeepSeek should also inject."""
agent = self._make_agent(
base_url="https://openrouter.ai/api/v1",
model="deepseek/deepseek-chat",
)
api_msg = {}
agent._copy_reasoning_content_for_api(
{"role": "assistant", "content": "Hi"},
api_msg,
)
self.assertEqual(api_msg.get("reasoning_content"), "")
def test_non_deepseek_no_injection(self):
"""Non-DeepSeek provider should not inject reasoning_content."""
agent = self._make_agent(
base_url="https://api.openai.com/v1",
model="gpt-4o",
)
api_msg = {}
agent._copy_reasoning_content_for_api(
{"role": "assistant", "content": "Hi"},
api_msg,
)
self.assertNotIn("reasoning_content", api_msg)
def test_explicit_reasoning_preserved(self):
"""When source message has explicit reasoning_content, it should be preserved."""
agent = self._make_agent()
api_msg = {}
agent._copy_reasoning_content_for_api(
{"role": "assistant", "content": "Hi", "reasoning_content": "I thought about it"},
api_msg,
)
self.assertEqual(api_msg["reasoning_content"], "I thought about it")
def test_thinking_disabled_skips_injection(self):
"""When thinking is explicitly disabled, don't inject."""
agent = self._make_agent(reasoning_config={"enabled": False})
api_msg = {}
agent._copy_reasoning_content_for_api(
{"role": "assistant", "content": "Hi"},
api_msg,
)
self.assertNotIn("reasoning_content", api_msg)
def test_native_deepseek_chat_does_not_inject_by_default(self):
"""Legacy non-thinking deepseek-chat should not replay reasoning_content."""
agent = self._make_agent(model="deepseek-chat")
api_msg = {}
agent._copy_reasoning_content_for_api(
{"role": "assistant", "content": "Hi"},
api_msg,
)
self.assertNotIn("reasoning_content", api_msg)
def test_native_deepseek_chat_injects_when_enabled(self):
"""deepseek-chat should replay reasoning_content once thinking is enabled."""
agent = self._make_agent(
model="deepseek-chat",
reasoning_config={"enabled": True, "effort": "high"},
)
api_msg = {}
agent._copy_reasoning_content_for_api(
{"role": "assistant", "content": "Hi"},
api_msg,
)
self.assertEqual(api_msg.get("reasoning_content"), "")
def test_non_assistant_skipped(self):
"""Non-assistant messages should be skipped entirely."""
agent = self._make_agent()
api_msg = {}
agent._copy_reasoning_content_for_api(
{"role": "user", "content": "Hi"},
api_msg,
)
self.assertNotIn("reasoning_content", api_msg)
class TestExtractReasoningIsinstance(unittest.TestCase):
"""Verify _extract_reasoning uses isinstance checks."""
def _extract(self, **attrs):
from run_agent import AIAgent
agent = MagicMock(spec=AIAgent)
agent._extract_reasoning = AIAgent._extract_reasoning.__get__(agent)
msg = SimpleNamespace(**attrs)
return agent._extract_reasoning(msg)
def test_valid_string_reasoning(self):
result = self._extract(reasoning="I think therefore I am")
self.assertIn("I think therefore I am", result)
def test_empty_string_reasoning_skipped(self):
"""Empty string reasoning should not be extracted."""
result = self._extract(reasoning="")
self.assertIsNone(result)
def test_non_string_reasoning_skipped(self):
"""Non-string reasoning (e.g. int, list) should not crash or extract."""
result = self._extract(reasoning=42)
self.assertIsNone(result)
def test_valid_reasoning_content(self):
result = self._extract(reasoning_content="Deep thought")
self.assertIn("Deep thought", result)
def test_empty_reasoning_content_skipped(self):
result = self._extract(reasoning_content="")
self.assertIsNone(result)
def test_non_string_reasoning_content_skipped(self):
result = self._extract(reasoning_content=["not", "a", "string"])
self.assertIsNone(result)
class TestReasoningContentNormalization(unittest.TestCase):
"""Verify normalize_response preserves empty-string reasoning_content."""
def test_empty_string_reasoning_content_preserved(self):
"""Empty string reasoning_content should be preserved in provider_data."""
transport = ChatCompletionsTransport.__new__(ChatCompletionsTransport)
msg = SimpleNamespace(
role="assistant",
content="Hello",
tool_calls=None,
refusal=None,
reasoning=None,
reasoning_content="",
reasoning_details=None,
)
choice = SimpleNamespace(index=0, message=msg, finish_reason="stop")
response = SimpleNamespace(
id="resp_1",
choices=[choice],
usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
model="deepseek-v4-pro",
)
result = transport.normalize_response(response)
# Empty string should be preserved (not dropped by truthy check)
self.assertIn("reasoning_content", result.provider_data)
self.assertEqual(result.provider_data["reasoning_content"], "")