fix(kimi): force fixed temperature on kimi-k2.* models (k2.5, thinking, turbo) (#12144)

* fix(kimi): force fixed temperature on kimi-k2.* models (k2.5, thinking, turbo) The prior override only matched the literal model name "kimi-for-coding", but Moonshot's coding endpoint is hit with real model IDs such as `kimi-k2.5`, `kimi-k2-turbo-preview`, `kimi-k2-thinking`, etc. Those requests bypassed the override and kept the caller's temperature, so Moonshot returns HTTP 400 "invalid temperature: only 0.6 is allowed for this model" (or 1.0 for thinking variants). Match the whole kimi-k2.* family: * kimi-k2-thinking / kimi-k2-thinking-turbo -> 1.0 (thinking mode) * all other kimi-k2.* -> 0.6 (non-thinking / instant mode) Also accept an optional vendor prefix (e.g. `moonshotai/kimi-k2.5`) so aggregator routings are covered. * refactor(kimi): whitelist-match kimi coding models instead of prefix Addresses review feedback on PR #12144. - Replace `startswith("kimi-k2")` with explicit frozensets sourced from Moonshot's kimi-for-coding model list. The prefix match would have also clamped `kimi-k2-instruct` / `kimi-k2-instruct-0905`, which are the separate non-coding K2 family with variable temperature (recommended 0.6 but not enforced — see huggingface.co/moonshotai/Kimi-K2-Instruct). - Confirmed via platform.kimi.ai docs that all five coding models (k2.5, k2-turbo-preview, k2-0905-preview, k2-thinking, k2-thinking-turbo) share the fixed-temperature lock, so the preview-model mapping is no longer an assumption. - Drop the fragile `"thinking" in bare` substring test for a set lookup. - Log a debug line on each override so operators can see when Hermes silently rewrites temperature. - Update class docstring. Extend the negative test to parametrize over kimi-k2-instruct, Kimi-K2-Instruct-0905, and a hypothetical future kimi-k2-experimental name — all must keep the caller's temperature.
2026-04-25 00:51:20 +00:00 · 2026-04-18 09:35:51 -07:00 · 2026-04-18 09:35:51 -07:00 · c14b3b5880
commit c14b3b5880
parent 656c375855
2 changed files with 90 additions and 5 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -99,11 +99,48 @@ _FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
    "kimi-for-coding": 0.6,
 }

+# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
+# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
+# value 0.6.  Any other value will result in an error."  The same lock applies
+# to the other k2.* models served on that endpoint.  Enumerated explicitly so
+# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
+# the standard chat API and third parties) are NOT clamped.
+# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
+_KIMI_INSTANT_MODELS: frozenset = frozenset({
+    "kimi-k2.5",
+    "kimi-k2-turbo-preview",
+    "kimi-k2-0905-preview",
+})
+_KIMI_THINKING_MODELS: frozenset = frozenset({
+    "kimi-k2-thinking",
+    "kimi-k2-thinking-turbo",
+})
+

 def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
-    """Return a required temperature override for models with strict contracts."""
+    """Return a required temperature override for models with strict contracts.
+
+    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
+    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
+    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
+    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
+
+    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
+    which is the separate non-coding K2 family with variable temperature.
+    """
    normalized = (model or "").strip().lower()
-    return _FIXED_TEMPERATURE_MODELS.get(normalized)
+    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
+    if fixed is not None:
+        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
+        return fixed
+    bare = normalized.rsplit("/", 1)[-1]
+    if bare in _KIMI_THINKING_MODELS:
+        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
+        return 1.0
+    if bare in _KIMI_INSTANT_MODELS:
+        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
+        return 0.6
+    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -697,7 +697,12 @@ class TestIsConnectionError:


 class TestKimiForCodingTemperature:
-    """kimi-for-coding now requires temperature=0.6 exactly."""
+    """Moonshot kimi-for-coding models require fixed temperatures.
+
+    k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock).
+    k2-thinking / k2-thinking-turbo → 1.0 (thinking lock).
+    kimi-k2-instruct* and every other model preserve the caller's temperature.
+    """

    def test_build_call_kwargs_forces_fixed_temperature(self):
        from agent.auxiliary_client import _build_call_kwargs
@ -772,12 +777,55 @@ class TestKimiForCodingTemperature:
        assert kwargs["model"] == "kimi-for-coding"
        assert kwargs["temperature"] == 0.6

-    def test_non_kimi_model_still_preserves_temperature(self):
+    @pytest.mark.parametrize(
+        "model,expected",
+        [
+            ("kimi-k2.5", 0.6),
+            ("kimi-k2-turbo-preview", 0.6),
+            ("kimi-k2-0905-preview", 0.6),
+            ("kimi-k2-thinking", 1.0),
+            ("kimi-k2-thinking-turbo", 1.0),
+            ("moonshotai/kimi-k2.5", 0.6),
+            ("moonshotai/Kimi-K2-Thinking", 1.0),
+        ],
+    )
+    def test_kimi_k2_family_temperature_override(self, model, expected):
+        """Moonshot kimi-k2.* models only accept fixed temperatures.
+
+        Non-thinking models → 0.6, thinking-mode models → 1.0.
+        """
        from agent.auxiliary_client import _build_call_kwargs

        kwargs = _build_call_kwargs(
            provider="kimi-coding",
-            model="kimi-k2.5",
+            model=model,
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.3,
+        )
+
+        assert kwargs["temperature"] == expected
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "anthropic/claude-sonnet-4-6",
+            "gpt-5.4",
+            # kimi-k2-instruct is the non-coding K2 family — temperature is
+            # variable (recommended 0.6 but not enforced).  Must not clamp.
+            "kimi-k2-instruct",
+            "moonshotai/Kimi-K2-Instruct",
+            "moonshotai/Kimi-K2-Instruct-0905",
+            "kimi-k2-instruct-0905",
+            # Hypothetical future kimi name not in the whitelist.
+            "kimi-k2-experimental",
+        ],
+    )
+    def test_non_restricted_model_preserves_temperature(self, model):
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="openrouter",
+            model=model,
            messages=[{"role": "user", "content": "hello"}],
            temperature=0.3,
        )