mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(kimi): send thinking xor reasoning_effort, never both
The standalone Kimi/Moonshot profile (api.moonshot.ai/v1) sent both extra_body.thinking AND a top-level reasoning_effort. With no reasoning config it even defaulted to thinking:enabled + reasoning_effort:medium, pairing them on every default call. Moonshot treats these as mutually exclusive (cannot specify both 'thinking' and 'reasoning_effort'). Align with the kimi-k2 handling already shipped for the opencode-go relay: send effort when a recognized low|medium|high is requested, otherwise fall back to the extra_body.thinking toggle. Disabled sends thinking:disabled only. Never both. Reported by Cars29 (NOUS Discord). DeepSeek was deliberately left untouched: its native endpoint accepts both (verified by the live guardrail in test_deepseek_v4_thinking_live.py), so the report's DeepSeek claim does not hold there. Tests: tests/plugins/model_providers/test_kimi_profile.py pins the xor contract across all config shapes.
This commit is contained in:
parent
03392b67d6
commit
ce4e74b350
2 changed files with 146 additions and 7 deletions
|
|
@ -14,19 +14,28 @@ from providers.base import OMIT_TEMPERATURE, ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
class KimiProfile(ProviderProfile):
|
class KimiProfile(ProviderProfile):
|
||||||
"""Kimi/Moonshot — temperature omitted, thinking + reasoning_effort."""
|
"""Kimi/Moonshot — temperature omitted, thinking xor reasoning_effort."""
|
||||||
|
|
||||||
def build_api_kwargs_extras(
|
def build_api_kwargs_extras(
|
||||||
self, *, reasoning_config: dict | None = None, **context
|
self, *, reasoning_config: dict | None = None, **context
|
||||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
"""Kimi uses extra_body.thinking + top-level reasoning_effort."""
|
"""Kimi reasoning controls.
|
||||||
|
|
||||||
|
Moonshot's wire shape treats ``extra_body.thinking`` (a binary toggle)
|
||||||
|
and a top-level ``reasoning_effort`` as mutually exclusive — sending
|
||||||
|
both is at best redundant and risks "cannot specify both 'thinking' and
|
||||||
|
'reasoning_effort'" (HTTP 400). This mirrors the kimi-k2 handling on the
|
||||||
|
opencode-go relay: send effort when one is requested, otherwise fall
|
||||||
|
back to ``extra_body.thinking`` — never both.
|
||||||
|
"""
|
||||||
extra_body = {}
|
extra_body = {}
|
||||||
top_level = {}
|
top_level = {}
|
||||||
|
|
||||||
if not reasoning_config or not isinstance(reasoning_config, dict):
|
if not reasoning_config or not isinstance(reasoning_config, dict):
|
||||||
# No config → thinking enabled, default effort
|
# No config → thinking enabled, let the server pick the depth.
|
||||||
|
# (Previously also sent reasoning_effort="medium", which paired
|
||||||
|
# thinking + effort on every default call.)
|
||||||
extra_body["thinking"] = {"type": "enabled"}
|
extra_body["thinking"] = {"type": "enabled"}
|
||||||
top_level["reasoning_effort"] = "medium"
|
|
||||||
return extra_body, top_level
|
return extra_body, top_level
|
||||||
|
|
||||||
enabled = reasoning_config.get("enabled", True)
|
enabled = reasoning_config.get("enabled", True)
|
||||||
|
|
@ -34,13 +43,13 @@ class KimiProfile(ProviderProfile):
|
||||||
extra_body["thinking"] = {"type": "disabled"}
|
extra_body["thinking"] = {"type": "disabled"}
|
||||||
return extra_body, top_level
|
return extra_body, top_level
|
||||||
|
|
||||||
# Enabled
|
# Enabled: prefer an explicit effort; only fall back to extra_body
|
||||||
extra_body["thinking"] = {"type": "enabled"}
|
# thinking when no recognized effort is requested.
|
||||||
effort = (reasoning_config.get("effort") or "").strip().lower()
|
effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||||
if effort in {"low", "medium", "high"}:
|
if effort in {"low", "medium", "high"}:
|
||||||
top_level["reasoning_effort"] = effort
|
top_level["reasoning_effort"] = effort
|
||||||
else:
|
else:
|
||||||
top_level["reasoning_effort"] = "medium"
|
extra_body["thinking"] = {"type": "enabled"}
|
||||||
|
|
||||||
return extra_body, top_level
|
return extra_body, top_level
|
||||||
|
|
||||||
|
|
|
||||||
130
tests/plugins/model_providers/test_kimi_profile.py
Normal file
130
tests/plugins/model_providers/test_kimi_profile.py
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
"""Unit tests for the Kimi/Moonshot provider profile's reasoning wiring.
|
||||||
|
|
||||||
|
Moonshot's OpenAI-compat endpoint (``api.moonshot.ai/v1``) treats
|
||||||
|
``extra_body.thinking`` and a top-level ``reasoning_effort`` as mutually
|
||||||
|
exclusive. The profile must send at most one of them — never both — so a
|
||||||
|
request can't trip "cannot specify both 'thinking' and 'reasoning_effort'".
|
||||||
|
|
||||||
|
This mirrors the kimi-k2 handling already shipped for the opencode-go relay
|
||||||
|
(see ``tests/plugins/model_providers/test_opencode_go_profile.py``).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def kimi_profile():
|
||||||
|
"""Resolve the registered Kimi profile via the provider registry.
|
||||||
|
|
||||||
|
Importing ``model_tools`` triggers plugin discovery, which registers the
|
||||||
|
Kimi profile. Going through ``get_provider_profile`` keeps the test honest:
|
||||||
|
if the registered class is ever swapped for a plain ``ProviderProfile`` the
|
||||||
|
assertions below collapse.
|
||||||
|
"""
|
||||||
|
import model_tools # noqa: F401
|
||||||
|
import providers
|
||||||
|
|
||||||
|
profile = providers.get_provider_profile("kimi-coding")
|
||||||
|
assert profile is not None, "kimi-coding provider profile must be registered"
|
||||||
|
return profile
|
||||||
|
|
||||||
|
|
||||||
|
class TestKimiReasoningWireShape:
|
||||||
|
"""``build_api_kwargs_extras`` never emits thinking + reasoning_effort together."""
|
||||||
|
|
||||||
|
def test_no_config_enables_thinking_without_effort(self, kimi_profile):
|
||||||
|
"""No reasoning_config → thinking on, server picks the depth.
|
||||||
|
|
||||||
|
Regression guard: this path previously also sent
|
||||||
|
``reasoning_effort="medium"``, pairing thinking + effort on every
|
||||||
|
default call.
|
||||||
|
"""
|
||||||
|
extra_body, top_level = kimi_profile.build_api_kwargs_extras(reasoning_config=None)
|
||||||
|
assert extra_body == {"thinking": {"type": "enabled"}}
|
||||||
|
assert top_level == {}
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("effort", ["low", "medium", "high"])
|
||||||
|
def test_explicit_effort_sends_effort_only(self, kimi_profile, effort):
|
||||||
|
extra_body, top_level = kimi_profile.build_api_kwargs_extras(
|
||||||
|
reasoning_config={"enabled": True, "effort": effort}
|
||||||
|
)
|
||||||
|
assert top_level == {"reasoning_effort": effort}
|
||||||
|
assert "thinking" not in extra_body
|
||||||
|
|
||||||
|
def test_enabled_without_effort_falls_back_to_thinking(self, kimi_profile):
|
||||||
|
extra_body, top_level = kimi_profile.build_api_kwargs_extras(
|
||||||
|
reasoning_config={"enabled": True}
|
||||||
|
)
|
||||||
|
assert extra_body == {"thinking": {"type": "enabled"}}
|
||||||
|
assert top_level == {}
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("effort", ["", "garbage", "xhigh", "max"])
|
||||||
|
def test_unrecognized_effort_falls_back_to_thinking(self, kimi_profile, effort):
|
||||||
|
"""Unknown/strong efforts aren't in Moonshot's low|medium|high set, so
|
||||||
|
we drop to the thinking toggle rather than sending an invalid effort."""
|
||||||
|
extra_body, top_level = kimi_profile.build_api_kwargs_extras(
|
||||||
|
reasoning_config={"enabled": True, "effort": effort}
|
||||||
|
)
|
||||||
|
assert extra_body == {"thinking": {"type": "enabled"}}
|
||||||
|
assert top_level == {}
|
||||||
|
|
||||||
|
def test_disabled_sends_thinking_disabled_only(self, kimi_profile):
|
||||||
|
extra_body, top_level = kimi_profile.build_api_kwargs_extras(
|
||||||
|
reasoning_config={"enabled": False}
|
||||||
|
)
|
||||||
|
assert extra_body == {"thinking": {"type": "disabled"}}
|
||||||
|
assert top_level == {}
|
||||||
|
|
||||||
|
def test_disabled_ignores_effort(self, kimi_profile):
|
||||||
|
extra_body, top_level = kimi_profile.build_api_kwargs_extras(
|
||||||
|
reasoning_config={"enabled": False, "effort": "high"}
|
||||||
|
)
|
||||||
|
assert extra_body == {"thinking": {"type": "disabled"}}
|
||||||
|
assert top_level == {}
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"reasoning_config",
|
||||||
|
[
|
||||||
|
None,
|
||||||
|
{"enabled": True},
|
||||||
|
{"enabled": True, "effort": "high"},
|
||||||
|
{"enabled": True, "effort": "garbage"},
|
||||||
|
{"enabled": False},
|
||||||
|
{"enabled": False, "effort": "low"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_never_emits_both(self, kimi_profile, reasoning_config):
|
||||||
|
"""The core invariant: thinking and reasoning_effort are never both set."""
|
||||||
|
extra_body, top_level = kimi_profile.build_api_kwargs_extras(
|
||||||
|
reasoning_config=reasoning_config
|
||||||
|
)
|
||||||
|
assert not ("thinking" in extra_body and "reasoning_effort" in top_level)
|
||||||
|
|
||||||
|
|
||||||
|
class TestKimiFullKwargsIntegration:
|
||||||
|
"""The transport's full kwargs carry at most one reasoning knob."""
|
||||||
|
|
||||||
|
def _build(self, kimi_profile, reasoning_config):
|
||||||
|
from agent.transports.chat_completions import ChatCompletionsTransport
|
||||||
|
|
||||||
|
return ChatCompletionsTransport().build_kwargs(
|
||||||
|
model="kimi-k2-turbo-preview",
|
||||||
|
messages=[{"role": "user", "content": "ping"}],
|
||||||
|
tools=None,
|
||||||
|
provider_profile=kimi_profile,
|
||||||
|
reasoning_config=reasoning_config,
|
||||||
|
base_url="https://api.moonshot.ai/v1",
|
||||||
|
provider_name="kimi-coding",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_explicit_effort_omits_thinking(self, kimi_profile):
|
||||||
|
kwargs = self._build(kimi_profile, {"enabled": True, "effort": "high"})
|
||||||
|
assert kwargs["reasoning_effort"] == "high"
|
||||||
|
assert "thinking" not in kwargs.get("extra_body", {})
|
||||||
|
|
||||||
|
def test_no_config_omits_effort(self, kimi_profile):
|
||||||
|
kwargs = self._build(kimi_profile, None)
|
||||||
|
assert "reasoning_effort" not in kwargs
|
||||||
|
assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue