From ce4e74b35025c108a228700b53bd479eaafaa660 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sat, 6 Jun 2026 19:59:34 -0700 Subject: [PATCH] fix(kimi): send thinking xor reasoning_effort, never both The standalone Kimi/Moonshot profile (api.moonshot.ai/v1) sent both extra_body.thinking AND a top-level reasoning_effort. With no reasoning config it even defaulted to thinking:enabled + reasoning_effort:medium, pairing them on every default call. Moonshot treats these as mutually exclusive (cannot specify both 'thinking' and 'reasoning_effort'). Align with the kimi-k2 handling already shipped for the opencode-go relay: send effort when a recognized low|medium|high is requested, otherwise fall back to the extra_body.thinking toggle. Disabled sends thinking:disabled only. Never both. Reported by Cars29 (NOUS Discord). DeepSeek was deliberately left untouched: its native endpoint accepts both (verified by the live guardrail in test_deepseek_v4_thinking_live.py), so the report's DeepSeek claim does not hold there. Tests: tests/plugins/model_providers/test_kimi_profile.py pins the xor contract across all config shapes. --- .../model-providers/kimi-coding/__init__.py | 23 +++- .../model_providers/test_kimi_profile.py | 130 ++++++++++++++++++ 2 files changed, 146 insertions(+), 7 deletions(-) create mode 100644 tests/plugins/model_providers/test_kimi_profile.py diff --git a/plugins/model-providers/kimi-coding/__init__.py b/plugins/model-providers/kimi-coding/__init__.py index ed96ec514ef..c425520308a 100644 --- a/plugins/model-providers/kimi-coding/__init__.py +++ b/plugins/model-providers/kimi-coding/__init__.py @@ -14,19 +14,28 @@ from providers.base import OMIT_TEMPERATURE, ProviderProfile class KimiProfile(ProviderProfile): - """Kimi/Moonshot — temperature omitted, thinking + reasoning_effort.""" + """Kimi/Moonshot — temperature omitted, thinking xor reasoning_effort.""" def build_api_kwargs_extras( self, *, reasoning_config: dict | None = None, **context ) -> tuple[dict[str, Any], dict[str, Any]]: - """Kimi uses extra_body.thinking + top-level reasoning_effort.""" + """Kimi reasoning controls. + + Moonshot's wire shape treats ``extra_body.thinking`` (a binary toggle) + and a top-level ``reasoning_effort`` as mutually exclusive — sending + both is at best redundant and risks "cannot specify both 'thinking' and + 'reasoning_effort'" (HTTP 400). This mirrors the kimi-k2 handling on the + opencode-go relay: send effort when one is requested, otherwise fall + back to ``extra_body.thinking`` — never both. + """ extra_body = {} top_level = {} if not reasoning_config or not isinstance(reasoning_config, dict): - # No config → thinking enabled, default effort + # No config → thinking enabled, let the server pick the depth. + # (Previously also sent reasoning_effort="medium", which paired + # thinking + effort on every default call.) extra_body["thinking"] = {"type": "enabled"} - top_level["reasoning_effort"] = "medium" return extra_body, top_level enabled = reasoning_config.get("enabled", True) @@ -34,13 +43,13 @@ class KimiProfile(ProviderProfile): extra_body["thinking"] = {"type": "disabled"} return extra_body, top_level - # Enabled - extra_body["thinking"] = {"type": "enabled"} + # Enabled: prefer an explicit effort; only fall back to extra_body + # thinking when no recognized effort is requested. effort = (reasoning_config.get("effort") or "").strip().lower() if effort in {"low", "medium", "high"}: top_level["reasoning_effort"] = effort else: - top_level["reasoning_effort"] = "medium" + extra_body["thinking"] = {"type": "enabled"} return extra_body, top_level diff --git a/tests/plugins/model_providers/test_kimi_profile.py b/tests/plugins/model_providers/test_kimi_profile.py new file mode 100644 index 00000000000..5600b23865b --- /dev/null +++ b/tests/plugins/model_providers/test_kimi_profile.py @@ -0,0 +1,130 @@ +"""Unit tests for the Kimi/Moonshot provider profile's reasoning wiring. + +Moonshot's OpenAI-compat endpoint (``api.moonshot.ai/v1``) treats +``extra_body.thinking`` and a top-level ``reasoning_effort`` as mutually +exclusive. The profile must send at most one of them — never both — so a +request can't trip "cannot specify both 'thinking' and 'reasoning_effort'". + +This mirrors the kimi-k2 handling already shipped for the opencode-go relay +(see ``tests/plugins/model_providers/test_opencode_go_profile.py``). +""" + +from __future__ import annotations + +import pytest + + +@pytest.fixture +def kimi_profile(): + """Resolve the registered Kimi profile via the provider registry. + + Importing ``model_tools`` triggers plugin discovery, which registers the + Kimi profile. Going through ``get_provider_profile`` keeps the test honest: + if the registered class is ever swapped for a plain ``ProviderProfile`` the + assertions below collapse. + """ + import model_tools # noqa: F401 + import providers + + profile = providers.get_provider_profile("kimi-coding") + assert profile is not None, "kimi-coding provider profile must be registered" + return profile + + +class TestKimiReasoningWireShape: + """``build_api_kwargs_extras`` never emits thinking + reasoning_effort together.""" + + def test_no_config_enables_thinking_without_effort(self, kimi_profile): + """No reasoning_config → thinking on, server picks the depth. + + Regression guard: this path previously also sent + ``reasoning_effort="medium"``, pairing thinking + effort on every + default call. + """ + extra_body, top_level = kimi_profile.build_api_kwargs_extras(reasoning_config=None) + assert extra_body == {"thinking": {"type": "enabled"}} + assert top_level == {} + + @pytest.mark.parametrize("effort", ["low", "medium", "high"]) + def test_explicit_effort_sends_effort_only(self, kimi_profile, effort): + extra_body, top_level = kimi_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": effort} + ) + assert top_level == {"reasoning_effort": effort} + assert "thinking" not in extra_body + + def test_enabled_without_effort_falls_back_to_thinking(self, kimi_profile): + extra_body, top_level = kimi_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True} + ) + assert extra_body == {"thinking": {"type": "enabled"}} + assert top_level == {} + + @pytest.mark.parametrize("effort", ["", "garbage", "xhigh", "max"]) + def test_unrecognized_effort_falls_back_to_thinking(self, kimi_profile, effort): + """Unknown/strong efforts aren't in Moonshot's low|medium|high set, so + we drop to the thinking toggle rather than sending an invalid effort.""" + extra_body, top_level = kimi_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": effort} + ) + assert extra_body == {"thinking": {"type": "enabled"}} + assert top_level == {} + + def test_disabled_sends_thinking_disabled_only(self, kimi_profile): + extra_body, top_level = kimi_profile.build_api_kwargs_extras( + reasoning_config={"enabled": False} + ) + assert extra_body == {"thinking": {"type": "disabled"}} + assert top_level == {} + + def test_disabled_ignores_effort(self, kimi_profile): + extra_body, top_level = kimi_profile.build_api_kwargs_extras( + reasoning_config={"enabled": False, "effort": "high"} + ) + assert extra_body == {"thinking": {"type": "disabled"}} + assert top_level == {} + + @pytest.mark.parametrize( + "reasoning_config", + [ + None, + {"enabled": True}, + {"enabled": True, "effort": "high"}, + {"enabled": True, "effort": "garbage"}, + {"enabled": False}, + {"enabled": False, "effort": "low"}, + ], + ) + def test_never_emits_both(self, kimi_profile, reasoning_config): + """The core invariant: thinking and reasoning_effort are never both set.""" + extra_body, top_level = kimi_profile.build_api_kwargs_extras( + reasoning_config=reasoning_config + ) + assert not ("thinking" in extra_body and "reasoning_effort" in top_level) + + +class TestKimiFullKwargsIntegration: + """The transport's full kwargs carry at most one reasoning knob.""" + + def _build(self, kimi_profile, reasoning_config): + from agent.transports.chat_completions import ChatCompletionsTransport + + return ChatCompletionsTransport().build_kwargs( + model="kimi-k2-turbo-preview", + messages=[{"role": "user", "content": "ping"}], + tools=None, + provider_profile=kimi_profile, + reasoning_config=reasoning_config, + base_url="https://api.moonshot.ai/v1", + provider_name="kimi-coding", + ) + + def test_explicit_effort_omits_thinking(self, kimi_profile): + kwargs = self._build(kimi_profile, {"enabled": True, "effort": "high"}) + assert kwargs["reasoning_effort"] == "high" + assert "thinking" not in kwargs.get("extra_body", {}) + + def test_no_config_omits_effort(self, kimi_profile): + kwargs = self._build(kimi_profile, None) + assert "reasoning_effort" not in kwargs + assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}}