diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 13a357f9e..718b778f0 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -95,85 +95,37 @@ def _normalize_aux_provider(provider: Optional[str]) -> str: return _PROVIDER_ALIASES.get(normalized, normalized) -_FIXED_TEMPERATURE_MODELS: Dict[str, float] = { - "kimi-for-coding": 0.6, -} +# Sentinel: when returned by _fixed_temperature_for_model(), callers must +# strip the ``temperature`` key from API kwargs entirely so the provider's +# server-side default applies. Kimi/Moonshot models manage temperature +# internally — sending *any* value (even the "correct" one) can conflict +# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6). +OMIT_TEMPERATURE: object = object() -# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents: -# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed -# value 0.6. Any other value will result in an error." The same lock applies -# to the other k2.* models served on that endpoint. Enumerated explicitly so -# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on -# the standard chat API and third parties) are NOT clamped. -# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart -_KIMI_INSTANT_MODELS: frozenset = frozenset({ - "kimi-k2.6", - "kimi-k2.5", - "kimi-k2-turbo-preview", - "kimi-k2-0905-preview", -}) -_KIMI_THINKING_MODELS: frozenset = frozenset({ - "kimi-k2-thinking", - "kimi-k2-thinking-turbo", -}) -# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different -# temperature contract than the Coding Plan endpoint above. Empirically, -# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400 -# "invalid temperature: only 1 is allowed for this model" — the Coding Plan -# lock (0.6 for non-thinking) does not apply. `kimi-k2-turbo-preview` and the -# thinking variants already match the Coding Plan contract on the public -# endpoint, so we only override the models that diverge. -# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the -# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see -# hermes_cli/auth.py:_kimi_base_url_for_key). -_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = { - "kimi-k2.5": 1.0, -} +def _is_kimi_model(model: Optional[str]) -> bool: + """True for any Kimi / Moonshot model that manages temperature server-side.""" + bare = (model or "").strip().lower().rsplit("/", 1)[-1] + return bare.startswith("kimi-") or bare == "kimi" def _fixed_temperature_for_model( model: Optional[str], base_url: Optional[str] = None, -) -> Optional[float]: - """Return a required temperature override for models with strict contracts. +) -> "Optional[float] | object": + """Return a temperature directive for models with strict contracts. - Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on - the k2.5 family. Non-thinking variants require exactly 0.6; thinking - variants require 1.0. An optional ``vendor/`` prefix (e.g. - ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings. - - When ``base_url`` points to Moonshot's public chat endpoint - (``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public - API only accepts ``temperature=1``, not 0.6. That override takes precedence - over the Coding Plan defaults above. - - Returns ``None`` for every other model, including ``kimi-k2-instruct*`` - which is the separate non-coding K2 family with variable temperature. + Returns: + ``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the + provider chooses its own default. Used for all Kimi / Moonshot + models whose gateway selects temperature server-side. + ``float`` — a specific value the caller must use (reserved for future + models with fixed-temperature contracts). + ``None`` — no override; caller should use its own default. """ - normalized = (model or "").strip().lower() - bare = normalized.rsplit("/", 1)[-1] - - # Public Moonshot API has a stricter contract for some models than the - # Coding Plan endpoint — check it first so it wins on conflict. - if base_url and ("api.moonshot.ai" in base_url.lower() or "api.moonshot.cn" in base_url.lower()): - public = _KIMI_PUBLIC_API_OVERRIDES.get(bare) - if public is not None: - logger.debug( - "Forcing temperature=%s for %r on public Moonshot API", public, model - ) - return public - - fixed = _FIXED_TEMPERATURE_MODELS.get(normalized) - if fixed is not None: - logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model) - return fixed - if bare in _KIMI_THINKING_MODELS: - logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model) - return 1.0 - if bare in _KIMI_INSTANT_MODELS: - logger.debug("Forcing temperature=0.6 for kimi instant model %r", model) - return 0.6 + if _is_kimi_model(model): + logger.debug("Omitting temperature for Kimi model %r (server-managed)", model) + return OMIT_TEMPERATURE return None # Default auxiliary models for direct API-key providers (cheap/fast for side tasks) @@ -2476,7 +2428,9 @@ def _build_call_kwargs( } fixed_temperature = _fixed_temperature_for_model(model, base_url) - if fixed_temperature is not None: + if fixed_temperature is OMIT_TEMPERATURE: + temperature = None # strip — let server choose + elif fixed_temperature is not None: temperature = fixed_temperature # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently diff --git a/mini_swe_runner.py b/mini_swe_runner.py index a642e2411..c43451504 100644 --- a/mini_swe_runner.py +++ b/mini_swe_runner.py @@ -47,12 +47,19 @@ def _effective_temperature_for_model( model: str, base_url: Optional[str] = None, ) -> Optional[float]: - """Return a fixed temperature for models with strict sampling contracts.""" + """Return a fixed temperature for models with strict sampling contracts. + + Returns ``None`` when the model manages temperature server-side (Kimi); + callers must omit the ``temperature`` kwarg entirely in that case. + """ try: - from agent.auxiliary_client import _fixed_temperature_for_model + from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE except Exception: return None - return _fixed_temperature_for_model(model, base_url) + result = _fixed_temperature_for_model(model, base_url) + if result is OMIT_TEMPERATURE: + return None # caller must omit temperature + return result diff --git a/run_agent.py b/run_agent.py index fc57d9051..bf00f86c7 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6855,12 +6855,15 @@ class AIAgent: "timeout": self._resolved_api_call_timeout(), } try: - from agent.auxiliary_client import _fixed_temperature_for_model + from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE except Exception: _fixed_temperature_for_model = None + OMIT_TEMPERATURE = None if _fixed_temperature_for_model is not None: fixed_temperature = _fixed_temperature_for_model(self.model, self.base_url) - if fixed_temperature is not None: + if fixed_temperature is OMIT_TEMPERATURE: + api_kwargs.pop("temperature", None) + elif fixed_temperature is not None: api_kwargs["temperature"] = fixed_temperature if self._is_qwen_portal(): api_kwargs["metadata"] = { @@ -7301,12 +7304,19 @@ class AIAgent: from agent.auxiliary_client import ( call_llm as _call_llm, _fixed_temperature_for_model, + OMIT_TEMPERATURE, ) _aux_available = True - # Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if - # the model has a strict contract; otherwise the historical 0.3 default. - _flush_temperature = _fixed_temperature_for_model(self.model, self.base_url) - if _flush_temperature is None: + # Kimi models manage temperature server-side — omit it entirely. + # Other models with a fixed contract get that value; everyone else + # gets the historical 0.3 default. + _fixed_temp = _fixed_temperature_for_model(self.model, self.base_url) + _omit_temperature = _fixed_temp is OMIT_TEMPERATURE + if _omit_temperature: + _flush_temperature = None + elif _fixed_temp is not None: + _flush_temperature = _fixed_temp + else: _flush_temperature = 0.3 try: response = _call_llm( @@ -7325,7 +7335,10 @@ class AIAgent: # No auxiliary client -- use the Codex Responses path directly codex_kwargs = self._build_api_kwargs(api_messages) codex_kwargs["tools"] = self._responses_tools([memory_tool_def]) - codex_kwargs["temperature"] = _flush_temperature + if _flush_temperature is not None: + codex_kwargs["temperature"] = _flush_temperature + else: + codex_kwargs.pop("temperature", None) if "max_output_tokens" in codex_kwargs: codex_kwargs["max_output_tokens"] = 5120 response = self._run_codex_stream(codex_kwargs) @@ -7344,9 +7357,10 @@ class AIAgent: "model": self.model, "messages": api_messages, "tools": [memory_tool_def], - "temperature": _flush_temperature, **self._max_tokens_param(5120), } + if _flush_temperature is not None: + api_kwargs["temperature"] = _flush_temperature from agent.auxiliary_client import _get_task_timeout response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create( **api_kwargs, timeout=_get_task_timeout("flush_memories") @@ -8368,14 +8382,17 @@ class AIAgent: summary_extra_body = {} try: - from agent.auxiliary_client import _fixed_temperature_for_model + from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE as _OMIT_TEMP except Exception: _fixed_temperature_for_model = None - _summary_temperature = ( + _OMIT_TEMP = None + _raw_summary_temp = ( _fixed_temperature_for_model(self.model, self.base_url) if _fixed_temperature_for_model is not None else None ) + _omit_summary_temperature = _raw_summary_temp is _OMIT_TEMP + _summary_temperature = None if _omit_summary_temperature else _raw_summary_temp _is_nous = "nousresearch" in self._base_url_lower if self._supports_reasoning_extra_body(): if self.reasoning_config is not None: diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 06fc51821..2285a58f4 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -696,27 +696,46 @@ class TestIsConnectionError: assert _is_connection_error(err) is False -class TestKimiForCodingTemperature: - """Moonshot kimi-for-coding models require fixed temperatures. +class TestKimiTemperatureOmitted: + """Kimi/Moonshot models should have temperature OMITTED from API kwargs. - k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock). - k2-thinking / k2-thinking-turbo → 1.0 (thinking lock). - kimi-k2-instruct* and every other model preserve the caller's temperature. + The Kimi gateway selects the correct temperature server-side based on the + active mode (thinking → 1.0, non-thinking → 0.6). Sending any temperature + value conflicts with gateway-managed defaults. """ - def test_build_call_kwargs_forces_fixed_temperature(self): + @pytest.mark.parametrize( + "model", + [ + "kimi-for-coding", + "kimi-k2.5", + "kimi-k2.6", + "kimi-k2-turbo-preview", + "kimi-k2-0905-preview", + "kimi-k2-thinking", + "kimi-k2-thinking-turbo", + "kimi-k2-instruct", + "kimi-k2-instruct-0905", + "moonshotai/kimi-k2.5", + "moonshotai/Kimi-K2-Thinking", + "moonshotai/Kimi-K2-Instruct", + ], + ) + def test_kimi_models_omit_temperature(self, model): + """No kimi model should have a temperature key in kwargs.""" from agent.auxiliary_client import _build_call_kwargs kwargs = _build_call_kwargs( provider="kimi-coding", - model="kimi-for-coding", + model=model, messages=[{"role": "user", "content": "hello"}], temperature=0.3, ) - assert kwargs["temperature"] == 0.6 + assert "temperature" not in kwargs - def test_build_call_kwargs_injects_temperature_when_missing(self): + def test_kimi_for_coding_no_temperature_when_none(self): + """When caller passes temperature=None, still no temperature key.""" from agent.auxiliary_client import _build_call_kwargs kwargs = _build_call_kwargs( @@ -726,9 +745,9 @@ class TestKimiForCodingTemperature: temperature=None, ) - assert kwargs["temperature"] == 0.6 + assert "temperature" not in kwargs - def test_auto_routed_kimi_for_coding_sync_call_uses_fixed_temperature(self): + def test_sync_call_omits_temperature(self): client = MagicMock() client.base_url = "https://api.kimi.com/coding/v1" response = MagicMock() @@ -750,10 +769,10 @@ class TestKimiForCodingTemperature: assert result is response kwargs = client.chat.completions.create.call_args.kwargs assert kwargs["model"] == "kimi-for-coding" - assert kwargs["temperature"] == 0.6 + assert "temperature" not in kwargs @pytest.mark.asyncio - async def test_auto_routed_kimi_for_coding_async_call_uses_fixed_temperature(self): + async def test_async_call_omits_temperature(self): client = MagicMock() client.base_url = "https://api.kimi.com/coding/v1" response = MagicMock() @@ -775,52 +794,17 @@ class TestKimiForCodingTemperature: assert result is response kwargs = client.chat.completions.create.call_args.kwargs assert kwargs["model"] == "kimi-for-coding" - assert kwargs["temperature"] == 0.6 - - @pytest.mark.parametrize( - "model,expected", - [ - ("kimi-k2.5", 0.6), - ("kimi-k2-turbo-preview", 0.6), - ("kimi-k2-0905-preview", 0.6), - ("kimi-k2-thinking", 1.0), - ("kimi-k2-thinking-turbo", 1.0), - ("moonshotai/kimi-k2.5", 0.6), - ("moonshotai/Kimi-K2-Thinking", 1.0), - ], - ) - def test_kimi_k2_family_temperature_override(self, model, expected): - """Moonshot kimi-k2.* models only accept fixed temperatures. - - Non-thinking models → 0.6, thinking-mode models → 1.0. - """ - from agent.auxiliary_client import _build_call_kwargs - - kwargs = _build_call_kwargs( - provider="kimi-coding", - model=model, - messages=[{"role": "user", "content": "hello"}], - temperature=0.3, - ) - - assert kwargs["temperature"] == expected + assert "temperature" not in kwargs @pytest.mark.parametrize( "model", [ "anthropic/claude-sonnet-4-6", "gpt-5.4", - # kimi-k2-instruct is the non-coding K2 family — temperature is - # variable (recommended 0.6 but not enforced). Must not clamp. - "kimi-k2-instruct", - "moonshotai/Kimi-K2-Instruct", - "moonshotai/Kimi-K2-Instruct-0905", - "kimi-k2-instruct-0905", - # Hypothetical future kimi name not in the whitelist. - "kimi-k2-experimental", + "deepseek-chat", ], ) - def test_non_restricted_model_preserves_temperature(self, model): + def test_non_kimi_models_preserve_temperature(self, model): from agent.auxiliary_client import _build_call_kwargs kwargs = _build_call_kwargs( @@ -832,25 +816,16 @@ class TestKimiForCodingTemperature: assert kwargs["temperature"] == 0.3 - # ── Endpoint-aware overrides: api.moonshot.ai vs api.kimi.com/coding ── - # The public Moonshot chat endpoint and the Coding Plan endpoint enforce - # different temperature contracts for the same model name. `kimi-k2.5` on - # api.moonshot.ai rejects 0.6 with HTTP 400 "only 1 is allowed for this - # model", while the Coding Plan docs mandate 0.6. Override must pick the - # right value per base_url. - @pytest.mark.parametrize( "base_url", [ "https://api.moonshot.ai/v1", - "https://api.moonshot.ai/v1/", - "https://API.MOONSHOT.AI/v1", "https://api.moonshot.cn/v1", - "https://api.moonshot.cn/v1/", + "https://api.kimi.com/coding/v1", ], ) - def test_kimi_k2_5_public_api_forces_temperature_1(self, base_url): - """kimi-k2.5 on the public Moonshot API only accepts temperature=1.""" + def test_kimi_k2_5_omits_temperature_regardless_of_endpoint(self, base_url): + """Temperature is omitted regardless of which Kimi endpoint is used.""" from agent.auxiliary_client import _build_call_kwargs kwargs = _build_call_kwargs( @@ -861,64 +836,7 @@ class TestKimiForCodingTemperature: base_url=base_url, ) - assert kwargs["temperature"] == 1.0 - - def test_kimi_k2_5_coding_plan_keeps_temperature_0_6(self): - """kimi-k2.5 on api.kimi.com/coding keeps the Coding Plan's 0.6 lock.""" - from agent.auxiliary_client import _build_call_kwargs - - kwargs = _build_call_kwargs( - provider="kimi-coding", - model="kimi-k2.5", - messages=[{"role": "user", "content": "hello"}], - temperature=0.1, - base_url="https://api.kimi.com/coding/v1", - ) - - assert kwargs["temperature"] == 0.6 - - def test_kimi_k2_5_no_base_url_falls_back_to_coding_plan_lock(self): - """Without a base_url hint, the Coding Plan default (0.6) applies. - - Preserves PR #12144 backward compatibility for callers that don't thread - the client's base_url through. - """ - from agent.auxiliary_client import _build_call_kwargs - - kwargs = _build_call_kwargs( - provider="kimi-coding", - model="kimi-k2.5", - messages=[{"role": "user", "content": "hello"}], - temperature=0.1, - ) - - assert kwargs["temperature"] == 0.6 - - @pytest.mark.parametrize( - "model,expected", - [ - # Only kimi-k2.5 diverges on api.moonshot.ai; the rest keep the - # Coding Plan lock (empirically verified against Moonshot in April - # 2026: turbo-preview accepts 0.6, thinking-turbo accepts 1.0). - ("kimi-k2-turbo-preview", 0.6), - ("kimi-k2-0905-preview", 0.6), - ("kimi-k2-thinking", 1.0), - ("kimi-k2-thinking-turbo", 1.0), - ("moonshotai/kimi-k2-thinking-turbo", 1.0), - ], - ) - def test_other_kimi_k2_family_unchanged_on_public_api(self, model, expected): - from agent.auxiliary_client import _build_call_kwargs - - kwargs = _build_call_kwargs( - provider="kimi-coding", - model=model, - messages=[{"role": "user", "content": "hello"}], - temperature=0.1, - base_url="https://api.moonshot.ai/v1", - ) - - assert kwargs["temperature"] == expected + assert "temperature" not in kwargs # --------------------------------------------------------------------------- diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py index c415951e2..3df51b853 100644 --- a/tests/run_agent/test_provider_parity.py +++ b/tests/run_agent/test_provider_parity.py @@ -251,8 +251,12 @@ class TestBuildApiKwargsChatCompletionsServiceTier: assert "service_tier" not in kwargs -class TestBuildApiKwargsKimiFixedTemperature: - def test_kimi_for_coding_forces_temperature_on_main_chat_path(self, monkeypatch): +class TestBuildApiKwargsKimiNoTemperatureOverride: + def test_kimi_for_coding_omits_temperature(self, monkeypatch): + """Temperature should NOT be set client-side for Kimi models. + + The Kimi gateway selects the correct temperature server-side. + """ agent = _make_agent( monkeypatch, "kimi-coding", @@ -261,7 +265,7 @@ class TestBuildApiKwargsKimiFixedTemperature: ) messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) - assert kwargs["temperature"] == 0.6 + assert "temperature" not in kwargs class TestBuildApiKwargsAIGateway: diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 13ecb0c4d..9f3341101 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -918,7 +918,11 @@ class TestBuildApiKwargs: assert kwargs["messages"] is messages assert kwargs["timeout"] == 1800.0 - def test_public_moonshot_kimi_k2_5_forces_temperature_1(self, agent): + def test_public_moonshot_kimi_k2_5_omits_temperature(self, agent): + """Kimi models should NOT have client-side temperature overrides. + + The Kimi gateway selects the correct temperature server-side. + """ agent.base_url = "https://api.moonshot.ai/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-k2.5" @@ -926,9 +930,9 @@ class TestBuildApiKwargs: kwargs = agent._build_api_kwargs(messages) - assert kwargs["temperature"] == 1.0 + assert "temperature" not in kwargs - def test_public_moonshot_cn_kimi_k2_5_forces_temperature_1(self, agent): + def test_public_moonshot_cn_kimi_k2_5_omits_temperature(self, agent): agent.base_url = "https://api.moonshot.cn/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-k2.5" @@ -936,9 +940,9 @@ class TestBuildApiKwargs: kwargs = agent._build_api_kwargs(messages) - assert kwargs["temperature"] == 1.0 + assert "temperature" not in kwargs - def test_kimi_coding_endpoint_keeps_kimi_k2_5_at_0_6(self, agent): + def test_kimi_coding_endpoint_omits_temperature(self, agent): agent.base_url = "https://api.kimi.com/coding/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-k2.5" @@ -946,7 +950,7 @@ class TestBuildApiKwargs: kwargs = agent._build_api_kwargs(messages) - assert kwargs["temperature"] == 0.6 + assert "temperature" not in kwargs def test_provider_preferences_injected(self, agent): agent.base_url = "https://openrouter.ai/api/v1" diff --git a/tests/test_mini_swe_runner.py b/tests/test_mini_swe_runner.py index b814f7738..16ef26286 100644 --- a/tests/test_mini_swe_runner.py +++ b/tests/test_mini_swe_runner.py @@ -2,7 +2,11 @@ from types import SimpleNamespace from unittest.mock import MagicMock, patch -def test_run_task_forces_kimi_fixed_temperature(): +def test_run_task_kimi_omits_temperature(): + """Kimi models should NOT have client-side temperature overrides. + + The Kimi gateway selects the correct temperature server-side. + """ with patch("openai.OpenAI") as mock_openai: client = MagicMock() client.chat.completions.create.return_value = SimpleNamespace( @@ -25,10 +29,11 @@ def test_run_task_forces_kimi_fixed_temperature(): result = runner.run_task("2+2") assert result["completed"] is True - assert client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 + assert "temperature" not in client.chat.completions.create.call_args.kwargs -def test_run_task_public_moonshot_kimi_k2_5_forces_temperature_1(): +def test_run_task_public_moonshot_kimi_k2_5_omits_temperature(): + """kimi-k2.5 on the public Moonshot API should not get a forced temperature.""" with patch("openai.OpenAI") as mock_openai: client = MagicMock() client.base_url = "https://api.moonshot.ai/v1" @@ -52,4 +57,4 @@ def test_run_task_public_moonshot_kimi_k2_5_forces_temperature_1(): result = runner.run_task("2+2") assert result["completed"] is True - assert client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 + assert "temperature" not in client.chat.completions.create.call_args.kwargs diff --git a/tests/test_trajectory_compressor.py b/tests/test_trajectory_compressor.py index b42ca1254..7978aab4c 100644 --- a/tests/test_trajectory_compressor.py +++ b/tests/test_trajectory_compressor.py @@ -31,7 +31,8 @@ def test_import_loads_env_from_hermes_home(tmp_path, monkeypatch): assert os.getenv("OPENROUTER_API_KEY") == "from-hermes-home" -def test_generate_summary_custom_client_forces_kimi_temperature(): +def test_generate_summary_kimi_omits_temperature(): + """Kimi models should have temperature omitted — server manages it.""" config = CompressionConfig( summarization_model="kimi-for-coding", temperature=0.3, @@ -51,10 +52,11 @@ def test_generate_summary_custom_client_forces_kimi_temperature(): result = compressor._generate_summary("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 + assert "temperature" not in compressor.client.chat.completions.create.call_args.kwargs -def test_generate_summary_public_moonshot_kimi_k2_5_forces_temperature_1(): +def test_generate_summary_public_moonshot_kimi_k2_5_omits_temperature(): + """kimi-k2.5 on the public Moonshot API should not get a forced temperature.""" config = CompressionConfig( summarization_model="kimi-k2.5", base_url="https://api.moonshot.ai/v1", @@ -75,10 +77,11 @@ def test_generate_summary_public_moonshot_kimi_k2_5_forces_temperature_1(): result = compressor._generate_summary("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 + assert "temperature" not in compressor.client.chat.completions.create.call_args.kwargs -def test_generate_summary_public_moonshot_cn_kimi_k2_5_forces_temperature_1(): +def test_generate_summary_public_moonshot_cn_kimi_k2_5_omits_temperature(): + """kimi-k2.5 on api.moonshot.cn should not get a forced temperature.""" config = CompressionConfig( summarization_model="kimi-k2.5", base_url="https://api.moonshot.cn/v1", @@ -99,7 +102,7 @@ def test_generate_summary_public_moonshot_cn_kimi_k2_5_forces_temperature_1(): result = compressor._generate_summary("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 + assert "temperature" not in compressor.client.chat.completions.create.call_args.kwargs # --------------------------------------------------------------------------- diff --git a/tests/test_trajectory_compressor_async.py b/tests/test_trajectory_compressor_async.py index 028f43eff..369b980b8 100644 --- a/tests/test_trajectory_compressor_async.py +++ b/tests/test_trajectory_compressor_async.py @@ -117,7 +117,8 @@ class TestSourceLineVerification: @pytest.mark.asyncio -async def test_generate_summary_async_custom_client_forces_kimi_temperature(): +async def test_generate_summary_async_kimi_omits_temperature(): + """Kimi models should have temperature omitted — server manages it.""" from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics config = CompressionConfig( @@ -140,11 +141,12 @@ async def test_generate_summary_async_custom_client_forces_kimi_temperature(): result = await compressor._generate_summary_async("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 + assert "temperature" not in async_client.chat.completions.create.call_args.kwargs @pytest.mark.asyncio -async def test_generate_summary_async_public_moonshot_kimi_k2_5_forces_temperature_1(): +async def test_generate_summary_async_public_moonshot_kimi_k2_5_omits_temperature(): + """kimi-k2.5 on the public Moonshot API should not get a forced temperature.""" from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics config = CompressionConfig( @@ -168,12 +170,12 @@ async def test_generate_summary_async_public_moonshot_kimi_k2_5_forces_temperatu result = await compressor._generate_summary_async("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 - + assert "temperature" not in async_client.chat.completions.create.call_args.kwargs @pytest.mark.asyncio -async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_forces_temperature_1(): +async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_omits_temperature(): + """kimi-k2.5 on api.moonshot.cn should not get a forced temperature.""" from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics config = CompressionConfig( @@ -197,4 +199,4 @@ async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_forces_temper result = await compressor._generate_summary_async("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 + assert "temperature" not in async_client.chat.completions.create.call_args.kwargs diff --git a/trajectory_compressor.py b/trajectory_compressor.py index e835da034..b0fec6041 100644 --- a/trajectory_compressor.py +++ b/trajectory_compressor.py @@ -58,14 +58,20 @@ def _effective_temperature_for_model( model: str, requested_temperature: float, base_url: Optional[str] = None, -) -> float: - """Apply fixed model temperature contracts to direct client calls.""" +) -> Optional[float]: + """Apply fixed model temperature contracts to direct client calls. + + Returns ``None`` when the model manages temperature server-side (Kimi); + callers must omit the ``temperature`` kwarg entirely in that case. + """ try: - from agent.auxiliary_client import _fixed_temperature_for_model + from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE except Exception: return requested_temperature fixed_temperature = _fixed_temperature_for_model(model, base_url) + if fixed_temperature is OMIT_TEMPERATURE: + return None # caller must omit temperature if fixed_temperature is not None: return fixed_temperature return requested_temperature @@ -600,12 +606,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" max_tokens=self.config.summary_target_tokens * 2, ) else: - response = self.client.chat.completions.create( - model=self.config.summarization_model, - messages=[{"role": "user", "content": prompt}], - temperature=summary_temperature, - max_tokens=self.config.summary_target_tokens * 2, - ) + _create_kwargs = { + "model": self.config.summarization_model, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": self.config.summary_target_tokens * 2, + } + if summary_temperature is not None: + _create_kwargs["temperature"] = summary_temperature + response = self.client.chat.completions.create(**_create_kwargs) summary = self._coerce_summary_content(response.choices[0].message.content) return self._ensure_summary_prefix(summary) @@ -667,12 +675,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" max_tokens=self.config.summary_target_tokens * 2, ) else: - response = await self._get_async_client().chat.completions.create( - model=self.config.summarization_model, - messages=[{"role": "user", "content": prompt}], - temperature=summary_temperature, - max_tokens=self.config.summary_target_tokens * 2, - ) + _create_kwargs = { + "model": self.config.summarization_model, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": self.config.summary_target_tokens * 2, + } + if summary_temperature is not None: + _create_kwargs["temperature"] = summary_temperature + response = await self._get_async_client().chat.completions.create(**_create_kwargs) summary = self._coerce_summary_content(response.choices[0].message.content) return self._ensure_summary_prefix(summary)