From ce4e74b35025c108a228700b53bd479eaafaa660 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 6 Jun 2026 19:59:34 -0700
Subject: [PATCH] fix(kimi): send thinking xor reasoning_effort, never both

The standalone Kimi/Moonshot profile (api.moonshot.ai/v1) sent both
extra_body.thinking AND a top-level reasoning_effort. With no reasoning
config it even defaulted to thinking:enabled + reasoning_effort:medium,
pairing them on every default call. Moonshot treats these as mutually
exclusive (cannot specify both 'thinking' and 'reasoning_effort').

Align with the kimi-k2 handling already shipped for the opencode-go relay:
send effort when a recognized low|medium|high is requested, otherwise fall
back to the extra_body.thinking toggle. Disabled sends thinking:disabled
only. Never both.

Reported by Cars29 (NOUS Discord). DeepSeek was deliberately left untouched:
its native endpoint accepts both (verified by the live guardrail in
test_deepseek_v4_thinking_live.py), so the report's DeepSeek claim does not
hold there.

Tests: tests/plugins/model_providers/test_kimi_profile.py pins the xor
contract across all config shapes.
---
 .../model-providers/kimi-coding/__init__.py   |  23 +++-
 .../model_providers/test_kimi_profile.py      | 130 ++++++++++++++++++
 2 files changed, 146 insertions(+), 7 deletions(-)
 create mode 100644 tests/plugins/model_providers/test_kimi_profile.py

diff --git a/plugins/model-providers/kimi-coding/__init__.py b/plugins/model-providers/kimi-coding/__init__.py
index ed96ec514ef..c425520308a 100644
--- a/plugins/model-providers/kimi-coding/__init__.py
+++ b/plugins/model-providers/kimi-coding/__init__.py
@@ -14,19 +14,28 @@ from providers.base import OMIT_TEMPERATURE, ProviderProfile
 
 
 class KimiProfile(ProviderProfile):
-    """Kimi/Moonshot — temperature omitted, thinking + reasoning_effort."""
+    """Kimi/Moonshot — temperature omitted, thinking xor reasoning_effort."""
 
     def build_api_kwargs_extras(
         self, *, reasoning_config: dict | None = None, **context
     ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """Kimi uses extra_body.thinking + top-level reasoning_effort."""
+        """Kimi reasoning controls.
+
+        Moonshot's wire shape treats ``extra_body.thinking`` (a binary toggle)
+        and a top-level ``reasoning_effort`` as mutually exclusive — sending
+        both is at best redundant and risks "cannot specify both 'thinking' and
+        'reasoning_effort'" (HTTP 400). This mirrors the kimi-k2 handling on the
+        opencode-go relay: send effort when one is requested, otherwise fall
+        back to ``extra_body.thinking`` — never both.
+        """
         extra_body = {}
         top_level = {}
 
         if not reasoning_config or not isinstance(reasoning_config, dict):
-            # No config → thinking enabled, default effort
+            # No config → thinking enabled, let the server pick the depth.
+            # (Previously also sent reasoning_effort="medium", which paired
+            # thinking + effort on every default call.)
             extra_body["thinking"] = {"type": "enabled"}
-            top_level["reasoning_effort"] = "medium"
             return extra_body, top_level
 
         enabled = reasoning_config.get("enabled", True)
@@ -34,13 +43,13 @@ class KimiProfile(ProviderProfile):
             extra_body["thinking"] = {"type": "disabled"}
             return extra_body, top_level
 
-        # Enabled
-        extra_body["thinking"] = {"type": "enabled"}
+        # Enabled: prefer an explicit effort; only fall back to extra_body
+        # thinking when no recognized effort is requested.
         effort = (reasoning_config.get("effort") or "").strip().lower()
         if effort in {"low", "medium", "high"}:
             top_level["reasoning_effort"] = effort
         else:
-            top_level["reasoning_effort"] = "medium"
+            extra_body["thinking"] = {"type": "enabled"}
 
         return extra_body, top_level
 
diff --git a/tests/plugins/model_providers/test_kimi_profile.py b/tests/plugins/model_providers/test_kimi_profile.py
new file mode 100644
index 00000000000..5600b23865b
--- /dev/null
+++ b/tests/plugins/model_providers/test_kimi_profile.py
@@ -0,0 +1,130 @@
+"""Unit tests for the Kimi/Moonshot provider profile's reasoning wiring.
+
+Moonshot's OpenAI-compat endpoint (``api.moonshot.ai/v1``) treats
+``extra_body.thinking`` and a top-level ``reasoning_effort`` as mutually
+exclusive. The profile must send at most one of them — never both — so a
+request can't trip "cannot specify both 'thinking' and 'reasoning_effort'".
+
+This mirrors the kimi-k2 handling already shipped for the opencode-go relay
+(see ``tests/plugins/model_providers/test_opencode_go_profile.py``).
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+@pytest.fixture
+def kimi_profile():
+    """Resolve the registered Kimi profile via the provider registry.
+
+    Importing ``model_tools`` triggers plugin discovery, which registers the
+    Kimi profile. Going through ``get_provider_profile`` keeps the test honest:
+    if the registered class is ever swapped for a plain ``ProviderProfile`` the
+    assertions below collapse.
+    """
+    import model_tools  # noqa: F401
+    import providers
+
+    profile = providers.get_provider_profile("kimi-coding")
+    assert profile is not None, "kimi-coding provider profile must be registered"
+    return profile
+
+
+class TestKimiReasoningWireShape:
+    """``build_api_kwargs_extras`` never emits thinking + reasoning_effort together."""
+
+    def test_no_config_enables_thinking_without_effort(self, kimi_profile):
+        """No reasoning_config → thinking on, server picks the depth.
+
+        Regression guard: this path previously also sent
+        ``reasoning_effort="medium"``, pairing thinking + effort on every
+        default call.
+        """
+        extra_body, top_level = kimi_profile.build_api_kwargs_extras(reasoning_config=None)
+        assert extra_body == {"thinking": {"type": "enabled"}}
+        assert top_level == {}
+
+    @pytest.mark.parametrize("effort", ["low", "medium", "high"])
+    def test_explicit_effort_sends_effort_only(self, kimi_profile, effort):
+        extra_body, top_level = kimi_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": effort}
+        )
+        assert top_level == {"reasoning_effort": effort}
+        assert "thinking" not in extra_body
+
+    def test_enabled_without_effort_falls_back_to_thinking(self, kimi_profile):
+        extra_body, top_level = kimi_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True}
+        )
+        assert extra_body == {"thinking": {"type": "enabled"}}
+        assert top_level == {}
+
+    @pytest.mark.parametrize("effort", ["", "garbage", "xhigh", "max"])
+    def test_unrecognized_effort_falls_back_to_thinking(self, kimi_profile, effort):
+        """Unknown/strong efforts aren't in Moonshot's low|medium|high set, so
+        we drop to the thinking toggle rather than sending an invalid effort."""
+        extra_body, top_level = kimi_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": effort}
+        )
+        assert extra_body == {"thinking": {"type": "enabled"}}
+        assert top_level == {}
+
+    def test_disabled_sends_thinking_disabled_only(self, kimi_profile):
+        extra_body, top_level = kimi_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": False}
+        )
+        assert extra_body == {"thinking": {"type": "disabled"}}
+        assert top_level == {}
+
+    def test_disabled_ignores_effort(self, kimi_profile):
+        extra_body, top_level = kimi_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": False, "effort": "high"}
+        )
+        assert extra_body == {"thinking": {"type": "disabled"}}
+        assert top_level == {}
+
+    @pytest.mark.parametrize(
+        "reasoning_config",
+        [
+            None,
+            {"enabled": True},
+            {"enabled": True, "effort": "high"},
+            {"enabled": True, "effort": "garbage"},
+            {"enabled": False},
+            {"enabled": False, "effort": "low"},
+        ],
+    )
+    def test_never_emits_both(self, kimi_profile, reasoning_config):
+        """The core invariant: thinking and reasoning_effort are never both set."""
+        extra_body, top_level = kimi_profile.build_api_kwargs_extras(
+            reasoning_config=reasoning_config
+        )
+        assert not ("thinking" in extra_body and "reasoning_effort" in top_level)
+
+
+class TestKimiFullKwargsIntegration:
+    """The transport's full kwargs carry at most one reasoning knob."""
+
+    def _build(self, kimi_profile, reasoning_config):
+        from agent.transports.chat_completions import ChatCompletionsTransport
+
+        return ChatCompletionsTransport().build_kwargs(
+            model="kimi-k2-turbo-preview",
+            messages=[{"role": "user", "content": "ping"}],
+            tools=None,
+            provider_profile=kimi_profile,
+            reasoning_config=reasoning_config,
+            base_url="https://api.moonshot.ai/v1",
+            provider_name="kimi-coding",
+        )
+
+    def test_explicit_effort_omits_thinking(self, kimi_profile):
+        kwargs = self._build(kimi_profile, {"enabled": True, "effort": "high"})
+        assert kwargs["reasoning_effort"] == "high"
+        assert "thinking" not in kwargs.get("extra_body", {})
+
+    def test_no_config_omits_effort(self, kimi_profile):
+        kwargs = self._build(kimi_profile, None)
+        assert "reasoning_effort" not in kwargs
+        assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}}