fix(kimi): omit temperature entirely for Kimi/Moonshot models (#13157)

Kimi's gateway selects the correct temperature server-side based on the
active mode (thinking -> 1.0, non-thinking -> 0.6).  Sending any
temperature value — even the previously "correct" one — conflicts with
gateway-managed defaults.

Replaces the old approach of forcing specific temperature values (0.6
for non-thinking, 1.0 for thinking) with an OMIT_TEMPERATURE sentinel
that tells all call sites to strip the temperature key from API kwargs
entirely.

Changes:
- agent/auxiliary_client.py: OMIT_TEMPERATURE sentinel, _is_kimi_model()
  prefix check (covers all kimi-* models), _fixed_temperature_for_model()
  returns sentinel for kimi models.  _build_call_kwargs() strips temp.
- run_agent.py: _build_api_kwargs, flush_memories, and summary generation
  paths all handle the sentinel by popping/omitting temperature.
- trajectory_compressor.py: _effective_temperature_for_model returns None
  for kimi (sentinel mapped), direct client calls use kwargs dict to
  conditionally include temperature.
- mini_swe_runner.py: same sentinel handling via wrapper function.
- 6 test files updated: all 'forces temperature X' assertions replaced
  with 'temperature not in kwargs' assertions.

Net: -76 lines (171 added, 247 removed).
Inspired by PR #13137 (@kshitijk4poor).
This commit is contained in:
Teknium 2026-04-20 12:23:05 -07:00 committed by GitHub
parent c1977146ce
commit 3cba81ebed
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 170 additions and 246 deletions

View file

@ -95,85 +95,37 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
return _PROVIDER_ALIASES.get(normalized, normalized) return _PROVIDER_ALIASES.get(normalized, normalized)
_FIXED_TEMPERATURE_MODELS: Dict[str, float] = { # Sentinel: when returned by _fixed_temperature_for_model(), callers must
"kimi-for-coding": 0.6, # strip the ``temperature`` key from API kwargs entirely so the provider's
} # server-side default applies. Kimi/Moonshot models manage temperature
# internally — sending *any* value (even the "correct" one) can conflict
# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
OMIT_TEMPERATURE: object = object()
# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
# value 0.6. Any other value will result in an error." The same lock applies
# to the other k2.* models served on that endpoint. Enumerated explicitly so
# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
# the standard chat API and third parties) are NOT clamped.
# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
_KIMI_INSTANT_MODELS: frozenset = frozenset({
"kimi-k2.6",
"kimi-k2.5",
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
})
_KIMI_THINKING_MODELS: frozenset = frozenset({
"kimi-k2-thinking",
"kimi-k2-thinking-turbo",
})
# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different def _is_kimi_model(model: Optional[str]) -> bool:
# temperature contract than the Coding Plan endpoint above. Empirically, """True for any Kimi / Moonshot model that manages temperature server-side."""
# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400 bare = (model or "").strip().lower().rsplit("/", 1)[-1]
# "invalid temperature: only 1 is allowed for this model" — the Coding Plan return bare.startswith("kimi-") or bare == "kimi"
# lock (0.6 for non-thinking) does not apply. `kimi-k2-turbo-preview` and the
# thinking variants already match the Coding Plan contract on the public
# endpoint, so we only override the models that diverge.
# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the
# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see
# hermes_cli/auth.py:_kimi_base_url_for_key).
_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = {
"kimi-k2.5": 1.0,
}
def _fixed_temperature_for_model( def _fixed_temperature_for_model(
model: Optional[str], model: Optional[str],
base_url: Optional[str] = None, base_url: Optional[str] = None,
) -> Optional[float]: ) -> "Optional[float] | object":
"""Return a required temperature override for models with strict contracts. """Return a temperature directive for models with strict contracts.
Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on Returns:
the k2.5 family. Non-thinking variants require exactly 0.6; thinking ``OMIT_TEMPERATURE`` caller must remove the ``temperature`` key so the
variants require 1.0. An optional ``vendor/`` prefix (e.g. provider chooses its own default. Used for all Kimi / Moonshot
``moonshotai/kimi-k2.5``) is tolerated for aggregator routings. models whose gateway selects temperature server-side.
``float`` a specific value the caller must use (reserved for future
When ``base_url`` points to Moonshot's public chat endpoint models with fixed-temperature contracts).
(``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public ``None`` no override; caller should use its own default.
API only accepts ``temperature=1``, not 0.6. That override takes precedence
over the Coding Plan defaults above.
Returns ``None`` for every other model, including ``kimi-k2-instruct*``
which is the separate non-coding K2 family with variable temperature.
""" """
normalized = (model or "").strip().lower() if _is_kimi_model(model):
bare = normalized.rsplit("/", 1)[-1] logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
return OMIT_TEMPERATURE
# Public Moonshot API has a stricter contract for some models than the
# Coding Plan endpoint — check it first so it wins on conflict.
if base_url and ("api.moonshot.ai" in base_url.lower() or "api.moonshot.cn" in base_url.lower()):
public = _KIMI_PUBLIC_API_OVERRIDES.get(bare)
if public is not None:
logger.debug(
"Forcing temperature=%s for %r on public Moonshot API", public, model
)
return public
fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
if fixed is not None:
logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
return fixed
if bare in _KIMI_THINKING_MODELS:
logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
return 1.0
if bare in _KIMI_INSTANT_MODELS:
logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
return 0.6
return None return None
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks) # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
@ -2476,7 +2428,9 @@ def _build_call_kwargs(
} }
fixed_temperature = _fixed_temperature_for_model(model, base_url) fixed_temperature = _fixed_temperature_for_model(model, base_url)
if fixed_temperature is not None: if fixed_temperature is OMIT_TEMPERATURE:
temperature = None # strip — let server choose
elif fixed_temperature is not None:
temperature = fixed_temperature temperature = fixed_temperature
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently

View file

@ -47,12 +47,19 @@ def _effective_temperature_for_model(
model: str, model: str,
base_url: Optional[str] = None, base_url: Optional[str] = None,
) -> Optional[float]: ) -> Optional[float]:
"""Return a fixed temperature for models with strict sampling contracts.""" """Return a fixed temperature for models with strict sampling contracts.
Returns ``None`` when the model manages temperature server-side (Kimi);
callers must omit the ``temperature`` kwarg entirely in that case.
"""
try: try:
from agent.auxiliary_client import _fixed_temperature_for_model from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
except Exception: except Exception:
return None return None
return _fixed_temperature_for_model(model, base_url) result = _fixed_temperature_for_model(model, base_url)
if result is OMIT_TEMPERATURE:
return None # caller must omit temperature
return result

View file

@ -6855,12 +6855,15 @@ class AIAgent:
"timeout": self._resolved_api_call_timeout(), "timeout": self._resolved_api_call_timeout(),
} }
try: try:
from agent.auxiliary_client import _fixed_temperature_for_model from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
except Exception: except Exception:
_fixed_temperature_for_model = None _fixed_temperature_for_model = None
OMIT_TEMPERATURE = None
if _fixed_temperature_for_model is not None: if _fixed_temperature_for_model is not None:
fixed_temperature = _fixed_temperature_for_model(self.model, self.base_url) fixed_temperature = _fixed_temperature_for_model(self.model, self.base_url)
if fixed_temperature is not None: if fixed_temperature is OMIT_TEMPERATURE:
api_kwargs.pop("temperature", None)
elif fixed_temperature is not None:
api_kwargs["temperature"] = fixed_temperature api_kwargs["temperature"] = fixed_temperature
if self._is_qwen_portal(): if self._is_qwen_portal():
api_kwargs["metadata"] = { api_kwargs["metadata"] = {
@ -7301,12 +7304,19 @@ class AIAgent:
from agent.auxiliary_client import ( from agent.auxiliary_client import (
call_llm as _call_llm, call_llm as _call_llm,
_fixed_temperature_for_model, _fixed_temperature_for_model,
OMIT_TEMPERATURE,
) )
_aux_available = True _aux_available = True
# Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if # Kimi models manage temperature server-side — omit it entirely.
# the model has a strict contract; otherwise the historical 0.3 default. # Other models with a fixed contract get that value; everyone else
_flush_temperature = _fixed_temperature_for_model(self.model, self.base_url) # gets the historical 0.3 default.
if _flush_temperature is None: _fixed_temp = _fixed_temperature_for_model(self.model, self.base_url)
_omit_temperature = _fixed_temp is OMIT_TEMPERATURE
if _omit_temperature:
_flush_temperature = None
elif _fixed_temp is not None:
_flush_temperature = _fixed_temp
else:
_flush_temperature = 0.3 _flush_temperature = 0.3
try: try:
response = _call_llm( response = _call_llm(
@ -7325,7 +7335,10 @@ class AIAgent:
# No auxiliary client -- use the Codex Responses path directly # No auxiliary client -- use the Codex Responses path directly
codex_kwargs = self._build_api_kwargs(api_messages) codex_kwargs = self._build_api_kwargs(api_messages)
codex_kwargs["tools"] = self._responses_tools([memory_tool_def]) codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
codex_kwargs["temperature"] = _flush_temperature if _flush_temperature is not None:
codex_kwargs["temperature"] = _flush_temperature
else:
codex_kwargs.pop("temperature", None)
if "max_output_tokens" in codex_kwargs: if "max_output_tokens" in codex_kwargs:
codex_kwargs["max_output_tokens"] = 5120 codex_kwargs["max_output_tokens"] = 5120
response = self._run_codex_stream(codex_kwargs) response = self._run_codex_stream(codex_kwargs)
@ -7344,9 +7357,10 @@ class AIAgent:
"model": self.model, "model": self.model,
"messages": api_messages, "messages": api_messages,
"tools": [memory_tool_def], "tools": [memory_tool_def],
"temperature": _flush_temperature,
**self._max_tokens_param(5120), **self._max_tokens_param(5120),
} }
if _flush_temperature is not None:
api_kwargs["temperature"] = _flush_temperature
from agent.auxiliary_client import _get_task_timeout from agent.auxiliary_client import _get_task_timeout
response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create( response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
**api_kwargs, timeout=_get_task_timeout("flush_memories") **api_kwargs, timeout=_get_task_timeout("flush_memories")
@ -8368,14 +8382,17 @@ class AIAgent:
summary_extra_body = {} summary_extra_body = {}
try: try:
from agent.auxiliary_client import _fixed_temperature_for_model from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE as _OMIT_TEMP
except Exception: except Exception:
_fixed_temperature_for_model = None _fixed_temperature_for_model = None
_summary_temperature = ( _OMIT_TEMP = None
_raw_summary_temp = (
_fixed_temperature_for_model(self.model, self.base_url) _fixed_temperature_for_model(self.model, self.base_url)
if _fixed_temperature_for_model is not None if _fixed_temperature_for_model is not None
else None else None
) )
_omit_summary_temperature = _raw_summary_temp is _OMIT_TEMP
_summary_temperature = None if _omit_summary_temperature else _raw_summary_temp
_is_nous = "nousresearch" in self._base_url_lower _is_nous = "nousresearch" in self._base_url_lower
if self._supports_reasoning_extra_body(): if self._supports_reasoning_extra_body():
if self.reasoning_config is not None: if self.reasoning_config is not None:

View file

@ -696,27 +696,46 @@ class TestIsConnectionError:
assert _is_connection_error(err) is False assert _is_connection_error(err) is False
class TestKimiForCodingTemperature: class TestKimiTemperatureOmitted:
"""Moonshot kimi-for-coding models require fixed temperatures. """Kimi/Moonshot models should have temperature OMITTED from API kwargs.
k2.5 / k2-turbo-preview / k2-0905-preview 0.6 (non-thinking lock). The Kimi gateway selects the correct temperature server-side based on the
k2-thinking / k2-thinking-turbo 1.0 (thinking lock). active mode (thinking 1.0, non-thinking 0.6). Sending any temperature
kimi-k2-instruct* and every other model preserve the caller's temperature. value conflicts with gateway-managed defaults.
""" """
def test_build_call_kwargs_forces_fixed_temperature(self): @pytest.mark.parametrize(
"model",
[
"kimi-for-coding",
"kimi-k2.5",
"kimi-k2.6",
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
"kimi-k2-thinking",
"kimi-k2-thinking-turbo",
"kimi-k2-instruct",
"kimi-k2-instruct-0905",
"moonshotai/kimi-k2.5",
"moonshotai/Kimi-K2-Thinking",
"moonshotai/Kimi-K2-Instruct",
],
)
def test_kimi_models_omit_temperature(self, model):
"""No kimi model should have a temperature key in kwargs."""
from agent.auxiliary_client import _build_call_kwargs from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs( kwargs = _build_call_kwargs(
provider="kimi-coding", provider="kimi-coding",
model="kimi-for-coding", model=model,
messages=[{"role": "user", "content": "hello"}], messages=[{"role": "user", "content": "hello"}],
temperature=0.3, temperature=0.3,
) )
assert kwargs["temperature"] == 0.6 assert "temperature" not in kwargs
def test_build_call_kwargs_injects_temperature_when_missing(self): def test_kimi_for_coding_no_temperature_when_none(self):
"""When caller passes temperature=None, still no temperature key."""
from agent.auxiliary_client import _build_call_kwargs from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs( kwargs = _build_call_kwargs(
@ -726,9 +745,9 @@ class TestKimiForCodingTemperature:
temperature=None, temperature=None,
) )
assert kwargs["temperature"] == 0.6 assert "temperature" not in kwargs
def test_auto_routed_kimi_for_coding_sync_call_uses_fixed_temperature(self): def test_sync_call_omits_temperature(self):
client = MagicMock() client = MagicMock()
client.base_url = "https://api.kimi.com/coding/v1" client.base_url = "https://api.kimi.com/coding/v1"
response = MagicMock() response = MagicMock()
@ -750,10 +769,10 @@ class TestKimiForCodingTemperature:
assert result is response assert result is response
kwargs = client.chat.completions.create.call_args.kwargs kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["model"] == "kimi-for-coding" assert kwargs["model"] == "kimi-for-coding"
assert kwargs["temperature"] == 0.6 assert "temperature" not in kwargs
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_auto_routed_kimi_for_coding_async_call_uses_fixed_temperature(self): async def test_async_call_omits_temperature(self):
client = MagicMock() client = MagicMock()
client.base_url = "https://api.kimi.com/coding/v1" client.base_url = "https://api.kimi.com/coding/v1"
response = MagicMock() response = MagicMock()
@ -775,52 +794,17 @@ class TestKimiForCodingTemperature:
assert result is response assert result is response
kwargs = client.chat.completions.create.call_args.kwargs kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["model"] == "kimi-for-coding" assert kwargs["model"] == "kimi-for-coding"
assert kwargs["temperature"] == 0.6 assert "temperature" not in kwargs
@pytest.mark.parametrize(
"model,expected",
[
("kimi-k2.5", 0.6),
("kimi-k2-turbo-preview", 0.6),
("kimi-k2-0905-preview", 0.6),
("kimi-k2-thinking", 1.0),
("kimi-k2-thinking-turbo", 1.0),
("moonshotai/kimi-k2.5", 0.6),
("moonshotai/Kimi-K2-Thinking", 1.0),
],
)
def test_kimi_k2_family_temperature_override(self, model, expected):
"""Moonshot kimi-k2.* models only accept fixed temperatures.
Non-thinking models 0.6, thinking-mode models 1.0.
"""
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="kimi-coding",
model=model,
messages=[{"role": "user", "content": "hello"}],
temperature=0.3,
)
assert kwargs["temperature"] == expected
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model", "model",
[ [
"anthropic/claude-sonnet-4-6", "anthropic/claude-sonnet-4-6",
"gpt-5.4", "gpt-5.4",
# kimi-k2-instruct is the non-coding K2 family — temperature is "deepseek-chat",
# variable (recommended 0.6 but not enforced). Must not clamp.
"kimi-k2-instruct",
"moonshotai/Kimi-K2-Instruct",
"moonshotai/Kimi-K2-Instruct-0905",
"kimi-k2-instruct-0905",
# Hypothetical future kimi name not in the whitelist.
"kimi-k2-experimental",
], ],
) )
def test_non_restricted_model_preserves_temperature(self, model): def test_non_kimi_models_preserve_temperature(self, model):
from agent.auxiliary_client import _build_call_kwargs from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs( kwargs = _build_call_kwargs(
@ -832,25 +816,16 @@ class TestKimiForCodingTemperature:
assert kwargs["temperature"] == 0.3 assert kwargs["temperature"] == 0.3
# ── Endpoint-aware overrides: api.moonshot.ai vs api.kimi.com/coding ──
# The public Moonshot chat endpoint and the Coding Plan endpoint enforce
# different temperature contracts for the same model name. `kimi-k2.5` on
# api.moonshot.ai rejects 0.6 with HTTP 400 "only 1 is allowed for this
# model", while the Coding Plan docs mandate 0.6. Override must pick the
# right value per base_url.
@pytest.mark.parametrize( @pytest.mark.parametrize(
"base_url", "base_url",
[ [
"https://api.moonshot.ai/v1", "https://api.moonshot.ai/v1",
"https://api.moonshot.ai/v1/",
"https://API.MOONSHOT.AI/v1",
"https://api.moonshot.cn/v1", "https://api.moonshot.cn/v1",
"https://api.moonshot.cn/v1/", "https://api.kimi.com/coding/v1",
], ],
) )
def test_kimi_k2_5_public_api_forces_temperature_1(self, base_url): def test_kimi_k2_5_omits_temperature_regardless_of_endpoint(self, base_url):
"""kimi-k2.5 on the public Moonshot API only accepts temperature=1.""" """Temperature is omitted regardless of which Kimi endpoint is used."""
from agent.auxiliary_client import _build_call_kwargs from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs( kwargs = _build_call_kwargs(
@ -861,64 +836,7 @@ class TestKimiForCodingTemperature:
base_url=base_url, base_url=base_url,
) )
assert kwargs["temperature"] == 1.0 assert "temperature" not in kwargs
def test_kimi_k2_5_coding_plan_keeps_temperature_0_6(self):
"""kimi-k2.5 on api.kimi.com/coding keeps the Coding Plan's 0.6 lock."""
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="kimi-coding",
model="kimi-k2.5",
messages=[{"role": "user", "content": "hello"}],
temperature=0.1,
base_url="https://api.kimi.com/coding/v1",
)
assert kwargs["temperature"] == 0.6
def test_kimi_k2_5_no_base_url_falls_back_to_coding_plan_lock(self):
"""Without a base_url hint, the Coding Plan default (0.6) applies.
Preserves PR #12144 backward compatibility for callers that don't thread
the client's base_url through.
"""
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="kimi-coding",
model="kimi-k2.5",
messages=[{"role": "user", "content": "hello"}],
temperature=0.1,
)
assert kwargs["temperature"] == 0.6
@pytest.mark.parametrize(
"model,expected",
[
# Only kimi-k2.5 diverges on api.moonshot.ai; the rest keep the
# Coding Plan lock (empirically verified against Moonshot in April
# 2026: turbo-preview accepts 0.6, thinking-turbo accepts 1.0).
("kimi-k2-turbo-preview", 0.6),
("kimi-k2-0905-preview", 0.6),
("kimi-k2-thinking", 1.0),
("kimi-k2-thinking-turbo", 1.0),
("moonshotai/kimi-k2-thinking-turbo", 1.0),
],
)
def test_other_kimi_k2_family_unchanged_on_public_api(self, model, expected):
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="kimi-coding",
model=model,
messages=[{"role": "user", "content": "hello"}],
temperature=0.1,
base_url="https://api.moonshot.ai/v1",
)
assert kwargs["temperature"] == expected
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View file

@ -251,8 +251,12 @@ class TestBuildApiKwargsChatCompletionsServiceTier:
assert "service_tier" not in kwargs assert "service_tier" not in kwargs
class TestBuildApiKwargsKimiFixedTemperature: class TestBuildApiKwargsKimiNoTemperatureOverride:
def test_kimi_for_coding_forces_temperature_on_main_chat_path(self, monkeypatch): def test_kimi_for_coding_omits_temperature(self, monkeypatch):
"""Temperature should NOT be set client-side for Kimi models.
The Kimi gateway selects the correct temperature server-side.
"""
agent = _make_agent( agent = _make_agent(
monkeypatch, monkeypatch,
"kimi-coding", "kimi-coding",
@ -261,7 +265,7 @@ class TestBuildApiKwargsKimiFixedTemperature:
) )
messages = [{"role": "user", "content": "hi"}] messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages) kwargs = agent._build_api_kwargs(messages)
assert kwargs["temperature"] == 0.6 assert "temperature" not in kwargs
class TestBuildApiKwargsAIGateway: class TestBuildApiKwargsAIGateway:

View file

@ -918,7 +918,11 @@ class TestBuildApiKwargs:
assert kwargs["messages"] is messages assert kwargs["messages"] is messages
assert kwargs["timeout"] == 1800.0 assert kwargs["timeout"] == 1800.0
def test_public_moonshot_kimi_k2_5_forces_temperature_1(self, agent): def test_public_moonshot_kimi_k2_5_omits_temperature(self, agent):
"""Kimi models should NOT have client-side temperature overrides.
The Kimi gateway selects the correct temperature server-side.
"""
agent.base_url = "https://api.moonshot.ai/v1" agent.base_url = "https://api.moonshot.ai/v1"
agent._base_url_lower = agent.base_url.lower() agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-k2.5" agent.model = "kimi-k2.5"
@ -926,9 +930,9 @@ class TestBuildApiKwargs:
kwargs = agent._build_api_kwargs(messages) kwargs = agent._build_api_kwargs(messages)
assert kwargs["temperature"] == 1.0 assert "temperature" not in kwargs
def test_public_moonshot_cn_kimi_k2_5_forces_temperature_1(self, agent): def test_public_moonshot_cn_kimi_k2_5_omits_temperature(self, agent):
agent.base_url = "https://api.moonshot.cn/v1" agent.base_url = "https://api.moonshot.cn/v1"
agent._base_url_lower = agent.base_url.lower() agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-k2.5" agent.model = "kimi-k2.5"
@ -936,9 +940,9 @@ class TestBuildApiKwargs:
kwargs = agent._build_api_kwargs(messages) kwargs = agent._build_api_kwargs(messages)
assert kwargs["temperature"] == 1.0 assert "temperature" not in kwargs
def test_kimi_coding_endpoint_keeps_kimi_k2_5_at_0_6(self, agent): def test_kimi_coding_endpoint_omits_temperature(self, agent):
agent.base_url = "https://api.kimi.com/coding/v1" agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower() agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-k2.5" agent.model = "kimi-k2.5"
@ -946,7 +950,7 @@ class TestBuildApiKwargs:
kwargs = agent._build_api_kwargs(messages) kwargs = agent._build_api_kwargs(messages)
assert kwargs["temperature"] == 0.6 assert "temperature" not in kwargs
def test_provider_preferences_injected(self, agent): def test_provider_preferences_injected(self, agent):
agent.base_url = "https://openrouter.ai/api/v1" agent.base_url = "https://openrouter.ai/api/v1"

View file

@ -2,7 +2,11 @@ from types import SimpleNamespace
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
def test_run_task_forces_kimi_fixed_temperature(): def test_run_task_kimi_omits_temperature():
"""Kimi models should NOT have client-side temperature overrides.
The Kimi gateway selects the correct temperature server-side.
"""
with patch("openai.OpenAI") as mock_openai: with patch("openai.OpenAI") as mock_openai:
client = MagicMock() client = MagicMock()
client.chat.completions.create.return_value = SimpleNamespace( client.chat.completions.create.return_value = SimpleNamespace(
@ -25,10 +29,11 @@ def test_run_task_forces_kimi_fixed_temperature():
result = runner.run_task("2+2") result = runner.run_task("2+2")
assert result["completed"] is True assert result["completed"] is True
assert client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 assert "temperature" not in client.chat.completions.create.call_args.kwargs
def test_run_task_public_moonshot_kimi_k2_5_forces_temperature_1(): def test_run_task_public_moonshot_kimi_k2_5_omits_temperature():
"""kimi-k2.5 on the public Moonshot API should not get a forced temperature."""
with patch("openai.OpenAI") as mock_openai: with patch("openai.OpenAI") as mock_openai:
client = MagicMock() client = MagicMock()
client.base_url = "https://api.moonshot.ai/v1" client.base_url = "https://api.moonshot.ai/v1"
@ -52,4 +57,4 @@ def test_run_task_public_moonshot_kimi_k2_5_forces_temperature_1():
result = runner.run_task("2+2") result = runner.run_task("2+2")
assert result["completed"] is True assert result["completed"] is True
assert client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 assert "temperature" not in client.chat.completions.create.call_args.kwargs

View file

@ -31,7 +31,8 @@ def test_import_loads_env_from_hermes_home(tmp_path, monkeypatch):
assert os.getenv("OPENROUTER_API_KEY") == "from-hermes-home" assert os.getenv("OPENROUTER_API_KEY") == "from-hermes-home"
def test_generate_summary_custom_client_forces_kimi_temperature(): def test_generate_summary_kimi_omits_temperature():
"""Kimi models should have temperature omitted — server manages it."""
config = CompressionConfig( config = CompressionConfig(
summarization_model="kimi-for-coding", summarization_model="kimi-for-coding",
temperature=0.3, temperature=0.3,
@ -51,10 +52,11 @@ def test_generate_summary_custom_client_forces_kimi_temperature():
result = compressor._generate_summary("tool output", metrics) result = compressor._generate_summary("tool output", metrics)
assert result.startswith("[CONTEXT SUMMARY]:") assert result.startswith("[CONTEXT SUMMARY]:")
assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 assert "temperature" not in compressor.client.chat.completions.create.call_args.kwargs
def test_generate_summary_public_moonshot_kimi_k2_5_forces_temperature_1(): def test_generate_summary_public_moonshot_kimi_k2_5_omits_temperature():
"""kimi-k2.5 on the public Moonshot API should not get a forced temperature."""
config = CompressionConfig( config = CompressionConfig(
summarization_model="kimi-k2.5", summarization_model="kimi-k2.5",
base_url="https://api.moonshot.ai/v1", base_url="https://api.moonshot.ai/v1",
@ -75,10 +77,11 @@ def test_generate_summary_public_moonshot_kimi_k2_5_forces_temperature_1():
result = compressor._generate_summary("tool output", metrics) result = compressor._generate_summary("tool output", metrics)
assert result.startswith("[CONTEXT SUMMARY]:") assert result.startswith("[CONTEXT SUMMARY]:")
assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 assert "temperature" not in compressor.client.chat.completions.create.call_args.kwargs
def test_generate_summary_public_moonshot_cn_kimi_k2_5_forces_temperature_1(): def test_generate_summary_public_moonshot_cn_kimi_k2_5_omits_temperature():
"""kimi-k2.5 on api.moonshot.cn should not get a forced temperature."""
config = CompressionConfig( config = CompressionConfig(
summarization_model="kimi-k2.5", summarization_model="kimi-k2.5",
base_url="https://api.moonshot.cn/v1", base_url="https://api.moonshot.cn/v1",
@ -99,7 +102,7 @@ def test_generate_summary_public_moonshot_cn_kimi_k2_5_forces_temperature_1():
result = compressor._generate_summary("tool output", metrics) result = compressor._generate_summary("tool output", metrics)
assert result.startswith("[CONTEXT SUMMARY]:") assert result.startswith("[CONTEXT SUMMARY]:")
assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 assert "temperature" not in compressor.client.chat.completions.create.call_args.kwargs
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View file

@ -117,7 +117,8 @@ class TestSourceLineVerification:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_generate_summary_async_custom_client_forces_kimi_temperature(): async def test_generate_summary_async_kimi_omits_temperature():
"""Kimi models should have temperature omitted — server manages it."""
from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
config = CompressionConfig( config = CompressionConfig(
@ -140,11 +141,12 @@ async def test_generate_summary_async_custom_client_forces_kimi_temperature():
result = await compressor._generate_summary_async("tool output", metrics) result = await compressor._generate_summary_async("tool output", metrics)
assert result.startswith("[CONTEXT SUMMARY]:") assert result.startswith("[CONTEXT SUMMARY]:")
assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 assert "temperature" not in async_client.chat.completions.create.call_args.kwargs
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_generate_summary_async_public_moonshot_kimi_k2_5_forces_temperature_1(): async def test_generate_summary_async_public_moonshot_kimi_k2_5_omits_temperature():
"""kimi-k2.5 on the public Moonshot API should not get a forced temperature."""
from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
config = CompressionConfig( config = CompressionConfig(
@ -168,12 +170,12 @@ async def test_generate_summary_async_public_moonshot_kimi_k2_5_forces_temperatu
result = await compressor._generate_summary_async("tool output", metrics) result = await compressor._generate_summary_async("tool output", metrics)
assert result.startswith("[CONTEXT SUMMARY]:") assert result.startswith("[CONTEXT SUMMARY]:")
assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 assert "temperature" not in async_client.chat.completions.create.call_args.kwargs
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_forces_temperature_1(): async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_omits_temperature():
"""kimi-k2.5 on api.moonshot.cn should not get a forced temperature."""
from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
config = CompressionConfig( config = CompressionConfig(
@ -197,4 +199,4 @@ async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_forces_temper
result = await compressor._generate_summary_async("tool output", metrics) result = await compressor._generate_summary_async("tool output", metrics)
assert result.startswith("[CONTEXT SUMMARY]:") assert result.startswith("[CONTEXT SUMMARY]:")
assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 assert "temperature" not in async_client.chat.completions.create.call_args.kwargs

View file

@ -58,14 +58,20 @@ def _effective_temperature_for_model(
model: str, model: str,
requested_temperature: float, requested_temperature: float,
base_url: Optional[str] = None, base_url: Optional[str] = None,
) -> float: ) -> Optional[float]:
"""Apply fixed model temperature contracts to direct client calls.""" """Apply fixed model temperature contracts to direct client calls.
Returns ``None`` when the model manages temperature server-side (Kimi);
callers must omit the ``temperature`` kwarg entirely in that case.
"""
try: try:
from agent.auxiliary_client import _fixed_temperature_for_model from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
except Exception: except Exception:
return requested_temperature return requested_temperature
fixed_temperature = _fixed_temperature_for_model(model, base_url) fixed_temperature = _fixed_temperature_for_model(model, base_url)
if fixed_temperature is OMIT_TEMPERATURE:
return None # caller must omit temperature
if fixed_temperature is not None: if fixed_temperature is not None:
return fixed_temperature return fixed_temperature
return requested_temperature return requested_temperature
@ -600,12 +606,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
max_tokens=self.config.summary_target_tokens * 2, max_tokens=self.config.summary_target_tokens * 2,
) )
else: else:
response = self.client.chat.completions.create( _create_kwargs = {
model=self.config.summarization_model, "model": self.config.summarization_model,
messages=[{"role": "user", "content": prompt}], "messages": [{"role": "user", "content": prompt}],
temperature=summary_temperature, "max_tokens": self.config.summary_target_tokens * 2,
max_tokens=self.config.summary_target_tokens * 2, }
) if summary_temperature is not None:
_create_kwargs["temperature"] = summary_temperature
response = self.client.chat.completions.create(**_create_kwargs)
summary = self._coerce_summary_content(response.choices[0].message.content) summary = self._coerce_summary_content(response.choices[0].message.content)
return self._ensure_summary_prefix(summary) return self._ensure_summary_prefix(summary)
@ -667,12 +675,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
max_tokens=self.config.summary_target_tokens * 2, max_tokens=self.config.summary_target_tokens * 2,
) )
else: else:
response = await self._get_async_client().chat.completions.create( _create_kwargs = {
model=self.config.summarization_model, "model": self.config.summarization_model,
messages=[{"role": "user", "content": prompt}], "messages": [{"role": "user", "content": prompt}],
temperature=summary_temperature, "max_tokens": self.config.summary_target_tokens * 2,
max_tokens=self.config.summary_target_tokens * 2, }
) if summary_temperature is not None:
_create_kwargs["temperature"] = summary_temperature
response = await self._get_async_client().chat.completions.create(**_create_kwargs)
summary = self._coerce_summary_content(response.choices[0].message.content) summary = self._coerce_summary_content(response.choices[0].message.content)
return self._ensure_summary_prefix(summary) return self._ensure_summary_prefix(summary)