From c14b3b58806e7abd01d9ee01e4ff218c01590cd0 Mon Sep 17 00:00:00 2001
From: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sat, 18 Apr 2026 09:35:51 -0700
Subject: [PATCH] fix(kimi): force fixed temperature on kimi-k2.* models (k2.5,
 thinking, turbo) (#12144)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(kimi): force fixed temperature on kimi-k2.* models (k2.5, thinking, turbo)

The prior override only matched the literal model name "kimi-for-coding",
but Moonshot's coding endpoint is hit with real model IDs such as
`kimi-k2.5`, `kimi-k2-turbo-preview`, `kimi-k2-thinking`, etc.  Those
requests bypassed the override and kept the caller's temperature, so
Moonshot returns HTTP 400 "invalid temperature: only 0.6 is allowed for
this model" (or 1.0 for thinking variants).

Match the whole kimi-k2.* family:
  * kimi-k2-thinking / kimi-k2-thinking-turbo -> 1.0 (thinking mode)
  * all other kimi-k2.* -> 0.6 (non-thinking / instant mode)

Also accept an optional vendor prefix (e.g. `moonshotai/kimi-k2.5`) so
aggregator routings are covered.

* refactor(kimi): whitelist-match kimi coding models instead of prefix

Addresses review feedback on PR #12144.

- Replace `startswith("kimi-k2")` with explicit frozensets sourced from
  Moonshot's kimi-for-coding model list.  The prefix match would have also
  clamped `kimi-k2-instruct` / `kimi-k2-instruct-0905`, which are the
  separate non-coding K2 family with variable temperature (recommended 0.6
  but not enforced — see huggingface.co/moonshotai/Kimi-K2-Instruct).
- Confirmed via platform.kimi.ai docs that all five coding models
  (k2.5, k2-turbo-preview, k2-0905-preview, k2-thinking, k2-thinking-turbo)
  share the fixed-temperature lock, so the preview-model mapping is no
  longer an assumption.
- Drop the fragile `"thinking" in bare` substring test for a set lookup.
- Log a debug line on each override so operators can see when Hermes
  silently rewrites temperature.
- Update class docstring.  Extend the negative test to parametrize over
  kimi-k2-instruct, Kimi-K2-Instruct-0905, and a hypothetical future
  kimi-k2-experimental name — all must keep the caller's temperature.
---
 agent/auxiliary_client.py            | 41 +++++++++++++++++++--
 tests/agent/test_auxiliary_client.py | 54 ++++++++++++++++++++++++++--
 2 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 568d610922..126f4615dd 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -99,11 +99,48 @@ _FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
     "kimi-for-coding": 0.6,
 }
 
+# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
+# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
+# value 0.6.  Any other value will result in an error."  The same lock applies
+# to the other k2.* models served on that endpoint.  Enumerated explicitly so
+# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
+# the standard chat API and third parties) are NOT clamped.
+# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
+_KIMI_INSTANT_MODELS: frozenset = frozenset({
+    "kimi-k2.5",
+    "kimi-k2-turbo-preview",
+    "kimi-k2-0905-preview",
+})
+_KIMI_THINKING_MODELS: frozenset = frozenset({
+    "kimi-k2-thinking",
+    "kimi-k2-thinking-turbo",
+})
+
 
 def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
-    """Return a required temperature override for models with strict contracts."""
+    """Return a required temperature override for models with strict contracts.
+
+    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
+    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
+    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
+    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
+
+    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
+    which is the separate non-coding K2 family with variable temperature.
+    """
     normalized = (model or "").strip().lower()
-    return _FIXED_TEMPERATURE_MODELS.get(normalized)
+    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
+    if fixed is not None:
+        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
+        return fixed
+    bare = normalized.rsplit("/", 1)[-1]
+    if bare in _KIMI_THINKING_MODELS:
+        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
+        return 1.0
+    if bare in _KIMI_INSTANT_MODELS:
+        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
+        return 0.6
+    return None
 
 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 1778855ddd..aea8152a53 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -697,7 +697,12 @@ class TestIsConnectionError:
 
 
 class TestKimiForCodingTemperature:
-    """kimi-for-coding now requires temperature=0.6 exactly."""
+    """Moonshot kimi-for-coding models require fixed temperatures.
+
+    k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock).
+    k2-thinking / k2-thinking-turbo → 1.0 (thinking lock).
+    kimi-k2-instruct* and every other model preserve the caller's temperature.
+    """
 
     def test_build_call_kwargs_forces_fixed_temperature(self):
         from agent.auxiliary_client import _build_call_kwargs
@@ -772,12 +777,55 @@ class TestKimiForCodingTemperature:
         assert kwargs["model"] == "kimi-for-coding"
         assert kwargs["temperature"] == 0.6
 
-    def test_non_kimi_model_still_preserves_temperature(self):
+    @pytest.mark.parametrize(
+        "model,expected",
+        [
+            ("kimi-k2.5", 0.6),
+            ("kimi-k2-turbo-preview", 0.6),
+            ("kimi-k2-0905-preview", 0.6),
+            ("kimi-k2-thinking", 1.0),
+            ("kimi-k2-thinking-turbo", 1.0),
+            ("moonshotai/kimi-k2.5", 0.6),
+            ("moonshotai/Kimi-K2-Thinking", 1.0),
+        ],
+    )
+    def test_kimi_k2_family_temperature_override(self, model, expected):
+        """Moonshot kimi-k2.* models only accept fixed temperatures.
+
+        Non-thinking models → 0.6, thinking-mode models → 1.0.
+        """
         from agent.auxiliary_client import _build_call_kwargs
 
         kwargs = _build_call_kwargs(
             provider="kimi-coding",
-            model="kimi-k2.5",
+            model=model,
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.3,
+        )
+
+        assert kwargs["temperature"] == expected
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "anthropic/claude-sonnet-4-6",
+            "gpt-5.4",
+            # kimi-k2-instruct is the non-coding K2 family — temperature is
+            # variable (recommended 0.6 but not enforced).  Must not clamp.
+            "kimi-k2-instruct",
+            "moonshotai/Kimi-K2-Instruct",
+            "moonshotai/Kimi-K2-Instruct-0905",
+            "kimi-k2-instruct-0905",
+            # Hypothetical future kimi name not in the whitelist.
+            "kimi-k2-experimental",
+        ],
+    )
+    def test_non_restricted_model_preserves_temperature(self, model):
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="openrouter",
+            model=model,
             messages=[{"role": "user", "content": "hello"}],
             temperature=0.3,
         )