diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 568d61092..126f4615d 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -99,11 +99,48 @@ _FIXED_TEMPERATURE_MODELS: Dict[str, float] = { "kimi-for-coding": 0.6, } +# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents: +# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed +# value 0.6. Any other value will result in an error." The same lock applies +# to the other k2.* models served on that endpoint. Enumerated explicitly so +# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on +# the standard chat API and third parties) are NOT clamped. +# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart +_KIMI_INSTANT_MODELS: frozenset = frozenset({ + "kimi-k2.5", + "kimi-k2-turbo-preview", + "kimi-k2-0905-preview", +}) +_KIMI_THINKING_MODELS: frozenset = frozenset({ + "kimi-k2-thinking", + "kimi-k2-thinking-turbo", +}) + def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]: - """Return a required temperature override for models with strict contracts.""" + """Return a required temperature override for models with strict contracts. + + Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on + the k2.5 family. Non-thinking variants require exactly 0.6; thinking + variants require 1.0. An optional ``vendor/`` prefix (e.g. + ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings. + + Returns ``None`` for every other model, including ``kimi-k2-instruct*`` + which is the separate non-coding K2 family with variable temperature. + """ normalized = (model or "").strip().lower() - return _FIXED_TEMPERATURE_MODELS.get(normalized) + fixed = _FIXED_TEMPERATURE_MODELS.get(normalized) + if fixed is not None: + logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model) + return fixed + bare = normalized.rsplit("/", 1)[-1] + if bare in _KIMI_THINKING_MODELS: + logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model) + return 1.0 + if bare in _KIMI_INSTANT_MODELS: + logger.debug("Forcing temperature=0.6 for kimi instant model %r", model) + return 0.6 + return None # Default auxiliary models for direct API-key providers (cheap/fast for side tasks) _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 1778855dd..aea8152a5 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -697,7 +697,12 @@ class TestIsConnectionError: class TestKimiForCodingTemperature: - """kimi-for-coding now requires temperature=0.6 exactly.""" + """Moonshot kimi-for-coding models require fixed temperatures. + + k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock). + k2-thinking / k2-thinking-turbo → 1.0 (thinking lock). + kimi-k2-instruct* and every other model preserve the caller's temperature. + """ def test_build_call_kwargs_forces_fixed_temperature(self): from agent.auxiliary_client import _build_call_kwargs @@ -772,12 +777,55 @@ class TestKimiForCodingTemperature: assert kwargs["model"] == "kimi-for-coding" assert kwargs["temperature"] == 0.6 - def test_non_kimi_model_still_preserves_temperature(self): + @pytest.mark.parametrize( + "model,expected", + [ + ("kimi-k2.5", 0.6), + ("kimi-k2-turbo-preview", 0.6), + ("kimi-k2-0905-preview", 0.6), + ("kimi-k2-thinking", 1.0), + ("kimi-k2-thinking-turbo", 1.0), + ("moonshotai/kimi-k2.5", 0.6), + ("moonshotai/Kimi-K2-Thinking", 1.0), + ], + ) + def test_kimi_k2_family_temperature_override(self, model, expected): + """Moonshot kimi-k2.* models only accept fixed temperatures. + + Non-thinking models → 0.6, thinking-mode models → 1.0. + """ from agent.auxiliary_client import _build_call_kwargs kwargs = _build_call_kwargs( provider="kimi-coding", - model="kimi-k2.5", + model=model, + messages=[{"role": "user", "content": "hello"}], + temperature=0.3, + ) + + assert kwargs["temperature"] == expected + + @pytest.mark.parametrize( + "model", + [ + "anthropic/claude-sonnet-4-6", + "gpt-5.4", + # kimi-k2-instruct is the non-coding K2 family — temperature is + # variable (recommended 0.6 but not enforced). Must not clamp. + "kimi-k2-instruct", + "moonshotai/Kimi-K2-Instruct", + "moonshotai/Kimi-K2-Instruct-0905", + "kimi-k2-instruct-0905", + # Hypothetical future kimi name not in the whitelist. + "kimi-k2-experimental", + ], + ) + def test_non_restricted_model_preserves_temperature(self, model): + from agent.auxiliary_client import _build_call_kwargs + + kwargs = _build_call_kwargs( + provider="openrouter", + model=model, messages=[{"role": "user", "content": "hello"}], temperature=0.3, )