mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(kimi): force fixed temperature on kimi-k2.* models (k2.5, thinking, turbo) (#12144)
* fix(kimi): force fixed temperature on kimi-k2.* models (k2.5, thinking, turbo) The prior override only matched the literal model name "kimi-for-coding", but Moonshot's coding endpoint is hit with real model IDs such as `kimi-k2.5`, `kimi-k2-turbo-preview`, `kimi-k2-thinking`, etc. Those requests bypassed the override and kept the caller's temperature, so Moonshot returns HTTP 400 "invalid temperature: only 0.6 is allowed for this model" (or 1.0 for thinking variants). Match the whole kimi-k2.* family: * kimi-k2-thinking / kimi-k2-thinking-turbo -> 1.0 (thinking mode) * all other kimi-k2.* -> 0.6 (non-thinking / instant mode) Also accept an optional vendor prefix (e.g. `moonshotai/kimi-k2.5`) so aggregator routings are covered. * refactor(kimi): whitelist-match kimi coding models instead of prefix Addresses review feedback on PR #12144. - Replace `startswith("kimi-k2")` with explicit frozensets sourced from Moonshot's kimi-for-coding model list. The prefix match would have also clamped `kimi-k2-instruct` / `kimi-k2-instruct-0905`, which are the separate non-coding K2 family with variable temperature (recommended 0.6 but not enforced — see huggingface.co/moonshotai/Kimi-K2-Instruct). - Confirmed via platform.kimi.ai docs that all five coding models (k2.5, k2-turbo-preview, k2-0905-preview, k2-thinking, k2-thinking-turbo) share the fixed-temperature lock, so the preview-model mapping is no longer an assumption. - Drop the fragile `"thinking" in bare` substring test for a set lookup. - Log a debug line on each override so operators can see when Hermes silently rewrites temperature. - Update class docstring. Extend the negative test to parametrize over kimi-k2-instruct, Kimi-K2-Instruct-0905, and a hypothetical future kimi-k2-experimental name — all must keep the caller's temperature.
This commit is contained in:
parent
656c375855
commit
c14b3b5880
2 changed files with 90 additions and 5 deletions
|
|
@ -99,11 +99,48 @@ _FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
|
|||
"kimi-for-coding": 0.6,
|
||||
}
|
||||
|
||||
# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
|
||||
# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
|
||||
# value 0.6. Any other value will result in an error." The same lock applies
|
||||
# to the other k2.* models served on that endpoint. Enumerated explicitly so
|
||||
# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
|
||||
# the standard chat API and third parties) are NOT clamped.
|
||||
# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
|
||||
_KIMI_INSTANT_MODELS: frozenset = frozenset({
|
||||
"kimi-k2.5",
|
||||
"kimi-k2-turbo-preview",
|
||||
"kimi-k2-0905-preview",
|
||||
})
|
||||
_KIMI_THINKING_MODELS: frozenset = frozenset({
|
||||
"kimi-k2-thinking",
|
||||
"kimi-k2-thinking-turbo",
|
||||
})
|
||||
|
||||
|
||||
def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
|
||||
"""Return a required temperature override for models with strict contracts."""
|
||||
"""Return a required temperature override for models with strict contracts.
|
||||
|
||||
Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
|
||||
the k2.5 family. Non-thinking variants require exactly 0.6; thinking
|
||||
variants require 1.0. An optional ``vendor/`` prefix (e.g.
|
||||
``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
|
||||
|
||||
Returns ``None`` for every other model, including ``kimi-k2-instruct*``
|
||||
which is the separate non-coding K2 family with variable temperature.
|
||||
"""
|
||||
normalized = (model or "").strip().lower()
|
||||
return _FIXED_TEMPERATURE_MODELS.get(normalized)
|
||||
fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
|
||||
if fixed is not None:
|
||||
logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
|
||||
return fixed
|
||||
bare = normalized.rsplit("/", 1)[-1]
|
||||
if bare in _KIMI_THINKING_MODELS:
|
||||
logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
|
||||
return 1.0
|
||||
if bare in _KIMI_INSTANT_MODELS:
|
||||
logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
|
||||
return 0.6
|
||||
return None
|
||||
|
||||
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
|
||||
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
|
|
|
|||
|
|
@ -697,7 +697,12 @@ class TestIsConnectionError:
|
|||
|
||||
|
||||
class TestKimiForCodingTemperature:
|
||||
"""kimi-for-coding now requires temperature=0.6 exactly."""
|
||||
"""Moonshot kimi-for-coding models require fixed temperatures.
|
||||
|
||||
k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock).
|
||||
k2-thinking / k2-thinking-turbo → 1.0 (thinking lock).
|
||||
kimi-k2-instruct* and every other model preserve the caller's temperature.
|
||||
"""
|
||||
|
||||
def test_build_call_kwargs_forces_fixed_temperature(self):
|
||||
from agent.auxiliary_client import _build_call_kwargs
|
||||
|
|
@ -772,12 +777,55 @@ class TestKimiForCodingTemperature:
|
|||
assert kwargs["model"] == "kimi-for-coding"
|
||||
assert kwargs["temperature"] == 0.6
|
||||
|
||||
def test_non_kimi_model_still_preserves_temperature(self):
|
||||
@pytest.mark.parametrize(
|
||||
"model,expected",
|
||||
[
|
||||
("kimi-k2.5", 0.6),
|
||||
("kimi-k2-turbo-preview", 0.6),
|
||||
("kimi-k2-0905-preview", 0.6),
|
||||
("kimi-k2-thinking", 1.0),
|
||||
("kimi-k2-thinking-turbo", 1.0),
|
||||
("moonshotai/kimi-k2.5", 0.6),
|
||||
("moonshotai/Kimi-K2-Thinking", 1.0),
|
||||
],
|
||||
)
|
||||
def test_kimi_k2_family_temperature_override(self, model, expected):
|
||||
"""Moonshot kimi-k2.* models only accept fixed temperatures.
|
||||
|
||||
Non-thinking models → 0.6, thinking-mode models → 1.0.
|
||||
"""
|
||||
from agent.auxiliary_client import _build_call_kwargs
|
||||
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="kimi-coding",
|
||||
model="kimi-k2.5",
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=0.3,
|
||||
)
|
||||
|
||||
assert kwargs["temperature"] == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
"gpt-5.4",
|
||||
# kimi-k2-instruct is the non-coding K2 family — temperature is
|
||||
# variable (recommended 0.6 but not enforced). Must not clamp.
|
||||
"kimi-k2-instruct",
|
||||
"moonshotai/Kimi-K2-Instruct",
|
||||
"moonshotai/Kimi-K2-Instruct-0905",
|
||||
"kimi-k2-instruct-0905",
|
||||
# Hypothetical future kimi name not in the whitelist.
|
||||
"kimi-k2-experimental",
|
||||
],
|
||||
)
|
||||
def test_non_restricted_model_preserves_temperature(self, model):
|
||||
from agent.auxiliary_client import _build_call_kwargs
|
||||
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="openrouter",
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=0.3,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue