From c14b3b58806e7abd01d9ee01e4ff218c01590cd0 Mon Sep 17 00:00:00 2001 From: kshitij <82637225+kshitijk4poor@users.noreply.github.com> Date: Sat, 18 Apr 2026 09:35:51 -0700 Subject: [PATCH] fix(kimi): force fixed temperature on kimi-k2.* models (k2.5, thinking, turbo) (#12144) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(kimi): force fixed temperature on kimi-k2.* models (k2.5, thinking, turbo) The prior override only matched the literal model name "kimi-for-coding", but Moonshot's coding endpoint is hit with real model IDs such as `kimi-k2.5`, `kimi-k2-turbo-preview`, `kimi-k2-thinking`, etc. Those requests bypassed the override and kept the caller's temperature, so Moonshot returns HTTP 400 "invalid temperature: only 0.6 is allowed for this model" (or 1.0 for thinking variants). Match the whole kimi-k2.* family: * kimi-k2-thinking / kimi-k2-thinking-turbo -> 1.0 (thinking mode) * all other kimi-k2.* -> 0.6 (non-thinking / instant mode) Also accept an optional vendor prefix (e.g. `moonshotai/kimi-k2.5`) so aggregator routings are covered. * refactor(kimi): whitelist-match kimi coding models instead of prefix Addresses review feedback on PR #12144. - Replace `startswith("kimi-k2")` with explicit frozensets sourced from Moonshot's kimi-for-coding model list. The prefix match would have also clamped `kimi-k2-instruct` / `kimi-k2-instruct-0905`, which are the separate non-coding K2 family with variable temperature (recommended 0.6 but not enforced — see huggingface.co/moonshotai/Kimi-K2-Instruct). - Confirmed via platform.kimi.ai docs that all five coding models (k2.5, k2-turbo-preview, k2-0905-preview, k2-thinking, k2-thinking-turbo) share the fixed-temperature lock, so the preview-model mapping is no longer an assumption. - Drop the fragile `"thinking" in bare` substring test for a set lookup. - Log a debug line on each override so operators can see when Hermes silently rewrites temperature. - Update class docstring. Extend the negative test to parametrize over kimi-k2-instruct, Kimi-K2-Instruct-0905, and a hypothetical future kimi-k2-experimental name — all must keep the caller's temperature. --- agent/auxiliary_client.py | 41 +++++++++++++++++++-- tests/agent/test_auxiliary_client.py | 54 ++++++++++++++++++++++++++-- 2 files changed, 90 insertions(+), 5 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 568d610922..126f4615dd 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -99,11 +99,48 @@ _FIXED_TEMPERATURE_MODELS: Dict[str, float] = { "kimi-for-coding": 0.6, } +# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents: +# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed +# value 0.6. Any other value will result in an error." The same lock applies +# to the other k2.* models served on that endpoint. Enumerated explicitly so +# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on +# the standard chat API and third parties) are NOT clamped. +# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart +_KIMI_INSTANT_MODELS: frozenset = frozenset({ + "kimi-k2.5", + "kimi-k2-turbo-preview", + "kimi-k2-0905-preview", +}) +_KIMI_THINKING_MODELS: frozenset = frozenset({ + "kimi-k2-thinking", + "kimi-k2-thinking-turbo", +}) + def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]: - """Return a required temperature override for models with strict contracts.""" + """Return a required temperature override for models with strict contracts. + + Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on + the k2.5 family. Non-thinking variants require exactly 0.6; thinking + variants require 1.0. An optional ``vendor/`` prefix (e.g. + ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings. + + Returns ``None`` for every other model, including ``kimi-k2-instruct*`` + which is the separate non-coding K2 family with variable temperature. + """ normalized = (model or "").strip().lower() - return _FIXED_TEMPERATURE_MODELS.get(normalized) + fixed = _FIXED_TEMPERATURE_MODELS.get(normalized) + if fixed is not None: + logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model) + return fixed + bare = normalized.rsplit("/", 1)[-1] + if bare in _KIMI_THINKING_MODELS: + logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model) + return 1.0 + if bare in _KIMI_INSTANT_MODELS: + logger.debug("Forcing temperature=0.6 for kimi instant model %r", model) + return 0.6 + return None # Default auxiliary models for direct API-key providers (cheap/fast for side tasks) _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 1778855ddd..aea8152a53 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -697,7 +697,12 @@ class TestIsConnectionError: class TestKimiForCodingTemperature: - """kimi-for-coding now requires temperature=0.6 exactly.""" + """Moonshot kimi-for-coding models require fixed temperatures. + + k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock). + k2-thinking / k2-thinking-turbo → 1.0 (thinking lock). + kimi-k2-instruct* and every other model preserve the caller's temperature. + """ def test_build_call_kwargs_forces_fixed_temperature(self): from agent.auxiliary_client import _build_call_kwargs @@ -772,12 +777,55 @@ class TestKimiForCodingTemperature: assert kwargs["model"] == "kimi-for-coding" assert kwargs["temperature"] == 0.6 - def test_non_kimi_model_still_preserves_temperature(self): + @pytest.mark.parametrize( + "model,expected", + [ + ("kimi-k2.5", 0.6), + ("kimi-k2-turbo-preview", 0.6), + ("kimi-k2-0905-preview", 0.6), + ("kimi-k2-thinking", 1.0), + ("kimi-k2-thinking-turbo", 1.0), + ("moonshotai/kimi-k2.5", 0.6), + ("moonshotai/Kimi-K2-Thinking", 1.0), + ], + ) + def test_kimi_k2_family_temperature_override(self, model, expected): + """Moonshot kimi-k2.* models only accept fixed temperatures. + + Non-thinking models → 0.6, thinking-mode models → 1.0. + """ from agent.auxiliary_client import _build_call_kwargs kwargs = _build_call_kwargs( provider="kimi-coding", - model="kimi-k2.5", + model=model, + messages=[{"role": "user", "content": "hello"}], + temperature=0.3, + ) + + assert kwargs["temperature"] == expected + + @pytest.mark.parametrize( + "model", + [ + "anthropic/claude-sonnet-4-6", + "gpt-5.4", + # kimi-k2-instruct is the non-coding K2 family — temperature is + # variable (recommended 0.6 but not enforced). Must not clamp. + "kimi-k2-instruct", + "moonshotai/Kimi-K2-Instruct", + "moonshotai/Kimi-K2-Instruct-0905", + "kimi-k2-instruct-0905", + # Hypothetical future kimi name not in the whitelist. + "kimi-k2-experimental", + ], + ) + def test_non_restricted_model_preserves_temperature(self, model): + from agent.auxiliary_client import _build_call_kwargs + + kwargs = _build_call_kwargs( + provider="openrouter", + model=model, messages=[{"role": "user", "content": "hello"}], temperature=0.3, )