From ed201cce9cd98fe83433e38ea93fbf13bb069862 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Mon, 20 Apr 2026 23:53:24 +0530 Subject: [PATCH] fix(kimi): drop client-side temperature overrides for Kimi/Moonshot models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Kimi gateway selects the correct temperature server-side based on the active mode (thinking on → 1.0, thinking off → 0.6). Client-side clamping is no longer needed and would conflict if the gateway changes its defaults. Removed: - _FIXED_TEMPERATURE_MODELS, _KIMI_INSTANT_MODELS, _KIMI_THINKING_MODELS, _KIMI_PUBLIC_API_OVERRIDES maps from auxiliary_client.py - All Kimi-specific branches in _fixed_temperature_for_model() — the function now always returns None (kept for future non-Kimi contracts) Callers already guard with 'if fixed_temperature is not None:' so the change is transparent — temperature is simply omitted from API calls, letting the Kimi gateway use its own defaults. Updated tests across 5 files to verify temperature is NOT forced. --- agent/auxiliary_client.py | 77 +---------- run_agent.py | 4 +- tests/agent/test_auxiliary_client.py | 153 +++++----------------- tests/run_agent/test_provider_parity.py | 10 +- tests/run_agent/test_run_agent.py | 16 ++- tests/test_mini_swe_runner.py | 14 +- tests/test_trajectory_compressor.py | 18 ++- tests/test_trajectory_compressor_async.py | 18 ++- 8 files changed, 94 insertions(+), 216 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 24260126f..95ffa6fe7 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -95,84 +95,21 @@ def _normalize_aux_provider(provider: Optional[str]) -> str: return _PROVIDER_ALIASES.get(normalized, normalized) -_FIXED_TEMPERATURE_MODELS: Dict[str, float] = { - "kimi-for-coding": 0.6, -} - -# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents: -# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed -# value 0.6. Any other value will result in an error." The same lock applies -# to the other k2.* models served on that endpoint. Enumerated explicitly so -# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on -# the standard chat API and third parties) are NOT clamped. -# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart -_KIMI_INSTANT_MODELS: frozenset = frozenset({ - "kimi-k2.5", - "kimi-k2-turbo-preview", - "kimi-k2-0905-preview", -}) -_KIMI_THINKING_MODELS: frozenset = frozenset({ - "kimi-k2-thinking", - "kimi-k2-thinking-turbo", -}) - -# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different -# temperature contract than the Coding Plan endpoint above. Empirically, -# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400 -# "invalid temperature: only 1 is allowed for this model" — the Coding Plan -# lock (0.6 for non-thinking) does not apply. `kimi-k2-turbo-preview` and the -# thinking variants already match the Coding Plan contract on the public -# endpoint, so we only override the models that diverge. -# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the -# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see -# hermes_cli/auth.py:_kimi_base_url_for_key). -_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = { - "kimi-k2.5": 1.0, -} - - def _fixed_temperature_for_model( model: Optional[str], base_url: Optional[str] = None, ) -> Optional[float]: """Return a required temperature override for models with strict contracts. - Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on - the k2.5 family. Non-thinking variants require exactly 0.6; thinking - variants require 1.0. An optional ``vendor/`` prefix (e.g. - ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings. + Returns ``None`` for all models — callers should omit the ``temperature`` + parameter so the provider's server-side defaults apply. - When ``base_url`` points to Moonshot's public chat endpoint - (``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public - API only accepts ``temperature=1``, not 0.6. That override takes precedence - over the Coding Plan defaults above. - - Returns ``None`` for every other model, including ``kimi-k2-instruct*`` - which is the separate non-coding K2 family with variable temperature. + Kimi / Moonshot models previously had hardcoded temperature overrides here + (0.6 for non-thinking, 1.0 for thinking). As of July 2026 the Kimi gateway + selects the correct temperature server-side based on the active mode, so + client-side clamping is no longer needed (and would conflict if the gateway + changes its defaults). """ - normalized = (model or "").strip().lower() - bare = normalized.rsplit("/", 1)[-1] - - # Public Moonshot API has a stricter contract for some models than the - # Coding Plan endpoint — check it first so it wins on conflict. - if base_url and ("api.moonshot.ai" in base_url.lower() or "api.moonshot.cn" in base_url.lower()): - public = _KIMI_PUBLIC_API_OVERRIDES.get(bare) - if public is not None: - logger.debug( - "Forcing temperature=%s for %r on public Moonshot API", public, model - ) - return public - - fixed = _FIXED_TEMPERATURE_MODELS.get(normalized) - if fixed is not None: - logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model) - return fixed - if bare in _KIMI_THINKING_MODELS: - logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model) - return 1.0 - if bare in _KIMI_INSTANT_MODELS: - logger.debug("Forcing temperature=0.6 for kimi instant model %r", model) - return 0.6 return None # Default auxiliary models for direct API-key providers (cheap/fast for side tasks) diff --git a/run_agent.py b/run_agent.py index a1e3e3038..3cac0c251 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7988,8 +7988,8 @@ class AIAgent: _fixed_temperature_for_model, ) _aux_available = True - # Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if - # the model has a strict contract; otherwise the historical 0.3 default. + # Use the fixed-temperature override if the model has a strict + # contract; otherwise the historical 0.3 default. _flush_temperature = _fixed_temperature_for_model(self.model, self.base_url) if _flush_temperature is None: _flush_temperature = 0.3 diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 06fc51821..db8452b5a 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -696,27 +696,42 @@ class TestIsConnectionError: assert _is_connection_error(err) is False -class TestKimiForCodingTemperature: - """Moonshot kimi-for-coding models require fixed temperatures. +class TestKimiTemperatureNotForced: + """Kimi/Moonshot models should NOT have client-side temperature overrides. - k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock). - k2-thinking / k2-thinking-turbo → 1.0 (thinking lock). - kimi-k2-instruct* and every other model preserve the caller's temperature. + The Kimi gateway selects the correct temperature server-side based on the + active mode (thinking on → 1.0, thinking off → 0.6). Client-side clamping + was removed so we don't conflict with gateway-managed defaults. """ - def test_build_call_kwargs_forces_fixed_temperature(self): + @pytest.mark.parametrize( + "model", + [ + "kimi-for-coding", + "kimi-k2.5", + "kimi-k2-turbo-preview", + "kimi-k2-0905-preview", + "kimi-k2-thinking", + "kimi-k2-thinking-turbo", + "moonshotai/kimi-k2.5", + "moonshotai/Kimi-K2-Thinking", + ], + ) + def test_kimi_models_preserve_caller_temperature(self, model): + """No kimi model should have its temperature overridden client-side.""" from agent.auxiliary_client import _build_call_kwargs kwargs = _build_call_kwargs( provider="kimi-coding", - model="kimi-for-coding", + model=model, messages=[{"role": "user", "content": "hello"}], temperature=0.3, ) - assert kwargs["temperature"] == 0.6 + assert kwargs["temperature"] == 0.3 - def test_build_call_kwargs_injects_temperature_when_missing(self): + def test_kimi_for_coding_no_temperature_when_none(self): + """When caller passes temperature=None, no temperature key is emitted.""" from agent.auxiliary_client import _build_call_kwargs kwargs = _build_call_kwargs( @@ -726,9 +741,9 @@ class TestKimiForCodingTemperature: temperature=None, ) - assert kwargs["temperature"] == 0.6 + assert "temperature" not in kwargs - def test_auto_routed_kimi_for_coding_sync_call_uses_fixed_temperature(self): + def test_sync_call_preserves_caller_temperature(self): client = MagicMock() client.base_url = "https://api.kimi.com/coding/v1" response = MagicMock() @@ -750,10 +765,10 @@ class TestKimiForCodingTemperature: assert result is response kwargs = client.chat.completions.create.call_args.kwargs assert kwargs["model"] == "kimi-for-coding" - assert kwargs["temperature"] == 0.6 + assert kwargs["temperature"] == 0.1 @pytest.mark.asyncio - async def test_auto_routed_kimi_for_coding_async_call_uses_fixed_temperature(self): + async def test_async_call_preserves_caller_temperature(self): client = MagicMock() client.base_url = "https://api.kimi.com/coding/v1" response = MagicMock() @@ -775,52 +790,18 @@ class TestKimiForCodingTemperature: assert result is response kwargs = client.chat.completions.create.call_args.kwargs assert kwargs["model"] == "kimi-for-coding" - assert kwargs["temperature"] == 0.6 - - @pytest.mark.parametrize( - "model,expected", - [ - ("kimi-k2.5", 0.6), - ("kimi-k2-turbo-preview", 0.6), - ("kimi-k2-0905-preview", 0.6), - ("kimi-k2-thinking", 1.0), - ("kimi-k2-thinking-turbo", 1.0), - ("moonshotai/kimi-k2.5", 0.6), - ("moonshotai/Kimi-K2-Thinking", 1.0), - ], - ) - def test_kimi_k2_family_temperature_override(self, model, expected): - """Moonshot kimi-k2.* models only accept fixed temperatures. - - Non-thinking models → 0.6, thinking-mode models → 1.0. - """ - from agent.auxiliary_client import _build_call_kwargs - - kwargs = _build_call_kwargs( - provider="kimi-coding", - model=model, - messages=[{"role": "user", "content": "hello"}], - temperature=0.3, - ) - - assert kwargs["temperature"] == expected + assert kwargs["temperature"] == 0.1 @pytest.mark.parametrize( "model", [ "anthropic/claude-sonnet-4-6", "gpt-5.4", - # kimi-k2-instruct is the non-coding K2 family — temperature is - # variable (recommended 0.6 but not enforced). Must not clamp. "kimi-k2-instruct", "moonshotai/Kimi-K2-Instruct", - "moonshotai/Kimi-K2-Instruct-0905", - "kimi-k2-instruct-0905", - # Hypothetical future kimi name not in the whitelist. - "kimi-k2-experimental", ], ) - def test_non_restricted_model_preserves_temperature(self, model): + def test_non_kimi_models_still_preserve_temperature(self, model): from agent.auxiliary_client import _build_call_kwargs kwargs = _build_call_kwargs( @@ -832,25 +813,16 @@ class TestKimiForCodingTemperature: assert kwargs["temperature"] == 0.3 - # ── Endpoint-aware overrides: api.moonshot.ai vs api.kimi.com/coding ── - # The public Moonshot chat endpoint and the Coding Plan endpoint enforce - # different temperature contracts for the same model name. `kimi-k2.5` on - # api.moonshot.ai rejects 0.6 with HTTP 400 "only 1 is allowed for this - # model", while the Coding Plan docs mandate 0.6. Override must pick the - # right value per base_url. - @pytest.mark.parametrize( "base_url", [ "https://api.moonshot.ai/v1", - "https://api.moonshot.ai/v1/", - "https://API.MOONSHOT.AI/v1", "https://api.moonshot.cn/v1", - "https://api.moonshot.cn/v1/", + "https://api.kimi.com/coding/v1", ], ) - def test_kimi_k2_5_public_api_forces_temperature_1(self, base_url): - """kimi-k2.5 on the public Moonshot API only accepts temperature=1.""" + def test_kimi_k2_5_no_override_regardless_of_endpoint(self, base_url): + """Temperature is preserved regardless of which Kimi endpoint is used.""" from agent.auxiliary_client import _build_call_kwargs kwargs = _build_call_kwargs( @@ -861,64 +833,7 @@ class TestKimiForCodingTemperature: base_url=base_url, ) - assert kwargs["temperature"] == 1.0 - - def test_kimi_k2_5_coding_plan_keeps_temperature_0_6(self): - """kimi-k2.5 on api.kimi.com/coding keeps the Coding Plan's 0.6 lock.""" - from agent.auxiliary_client import _build_call_kwargs - - kwargs = _build_call_kwargs( - provider="kimi-coding", - model="kimi-k2.5", - messages=[{"role": "user", "content": "hello"}], - temperature=0.1, - base_url="https://api.kimi.com/coding/v1", - ) - - assert kwargs["temperature"] == 0.6 - - def test_kimi_k2_5_no_base_url_falls_back_to_coding_plan_lock(self): - """Without a base_url hint, the Coding Plan default (0.6) applies. - - Preserves PR #12144 backward compatibility for callers that don't thread - the client's base_url through. - """ - from agent.auxiliary_client import _build_call_kwargs - - kwargs = _build_call_kwargs( - provider="kimi-coding", - model="kimi-k2.5", - messages=[{"role": "user", "content": "hello"}], - temperature=0.1, - ) - - assert kwargs["temperature"] == 0.6 - - @pytest.mark.parametrize( - "model,expected", - [ - # Only kimi-k2.5 diverges on api.moonshot.ai; the rest keep the - # Coding Plan lock (empirically verified against Moonshot in April - # 2026: turbo-preview accepts 0.6, thinking-turbo accepts 1.0). - ("kimi-k2-turbo-preview", 0.6), - ("kimi-k2-0905-preview", 0.6), - ("kimi-k2-thinking", 1.0), - ("kimi-k2-thinking-turbo", 1.0), - ("moonshotai/kimi-k2-thinking-turbo", 1.0), - ], - ) - def test_other_kimi_k2_family_unchanged_on_public_api(self, model, expected): - from agent.auxiliary_client import _build_call_kwargs - - kwargs = _build_call_kwargs( - provider="kimi-coding", - model=model, - messages=[{"role": "user", "content": "hello"}], - temperature=0.1, - base_url="https://api.moonshot.ai/v1", - ) - - assert kwargs["temperature"] == expected + assert kwargs["temperature"] == 0.1 # --------------------------------------------------------------------------- diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py index c415951e2..6c2890f74 100644 --- a/tests/run_agent/test_provider_parity.py +++ b/tests/run_agent/test_provider_parity.py @@ -251,8 +251,12 @@ class TestBuildApiKwargsChatCompletionsServiceTier: assert "service_tier" not in kwargs -class TestBuildApiKwargsKimiFixedTemperature: - def test_kimi_for_coding_forces_temperature_on_main_chat_path(self, monkeypatch): +class TestBuildApiKwargsKimiNoTemperatureOverride: + def test_kimi_for_coding_does_not_force_temperature(self, monkeypatch): + """Temperature should NOT be set client-side for Kimi models. + + The Kimi gateway selects the correct temperature server-side. + """ agent = _make_agent( monkeypatch, "kimi-coding", @@ -261,7 +265,7 @@ class TestBuildApiKwargsKimiFixedTemperature: ) messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) - assert kwargs["temperature"] == 0.6 + assert "temperature" not in kwargs class TestBuildApiKwargsAIGateway: diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 13ecb0c4d..68413c26c 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -918,7 +918,11 @@ class TestBuildApiKwargs: assert kwargs["messages"] is messages assert kwargs["timeout"] == 1800.0 - def test_public_moonshot_kimi_k2_5_forces_temperature_1(self, agent): + def test_public_moonshot_kimi_k2_5_no_temperature_override(self, agent): + """Kimi models should NOT have client-side temperature overrides. + + The Kimi gateway selects the correct temperature server-side. + """ agent.base_url = "https://api.moonshot.ai/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-k2.5" @@ -926,9 +930,9 @@ class TestBuildApiKwargs: kwargs = agent._build_api_kwargs(messages) - assert kwargs["temperature"] == 1.0 + assert "temperature" not in kwargs - def test_public_moonshot_cn_kimi_k2_5_forces_temperature_1(self, agent): + def test_public_moonshot_cn_kimi_k2_5_no_temperature_override(self, agent): agent.base_url = "https://api.moonshot.cn/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-k2.5" @@ -936,9 +940,9 @@ class TestBuildApiKwargs: kwargs = agent._build_api_kwargs(messages) - assert kwargs["temperature"] == 1.0 + assert "temperature" not in kwargs - def test_kimi_coding_endpoint_keeps_kimi_k2_5_at_0_6(self, agent): + def test_kimi_coding_endpoint_no_temperature_override(self, agent): agent.base_url = "https://api.kimi.com/coding/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-k2.5" @@ -946,7 +950,7 @@ class TestBuildApiKwargs: kwargs = agent._build_api_kwargs(messages) - assert kwargs["temperature"] == 0.6 + assert "temperature" not in kwargs def test_provider_preferences_injected(self, agent): agent.base_url = "https://openrouter.ai/api/v1" diff --git a/tests/test_mini_swe_runner.py b/tests/test_mini_swe_runner.py index b814f7738..75ae28204 100644 --- a/tests/test_mini_swe_runner.py +++ b/tests/test_mini_swe_runner.py @@ -2,7 +2,12 @@ from types import SimpleNamespace from unittest.mock import MagicMock, patch -def test_run_task_forces_kimi_fixed_temperature(): +def test_run_task_kimi_preserves_default_temperature(): + """Kimi models should NOT have client-side temperature overrides. + + The Kimi gateway selects the correct temperature server-side, so + mini_swe_runner should not inject a temperature key at all. + """ with patch("openai.OpenAI") as mock_openai: client = MagicMock() client.chat.completions.create.return_value = SimpleNamespace( @@ -25,10 +30,11 @@ def test_run_task_forces_kimi_fixed_temperature(): result = runner.run_task("2+2") assert result["completed"] is True - assert client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 + assert "temperature" not in client.chat.completions.create.call_args.kwargs -def test_run_task_public_moonshot_kimi_k2_5_forces_temperature_1(): +def test_run_task_public_moonshot_kimi_k2_5_preserves_default_temperature(): + """kimi-k2.5 on the public Moonshot API should not get a forced temperature.""" with patch("openai.OpenAI") as mock_openai: client = MagicMock() client.base_url = "https://api.moonshot.ai/v1" @@ -52,4 +58,4 @@ def test_run_task_public_moonshot_kimi_k2_5_forces_temperature_1(): result = runner.run_task("2+2") assert result["completed"] is True - assert client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 + assert "temperature" not in client.chat.completions.create.call_args.kwargs diff --git a/tests/test_trajectory_compressor.py b/tests/test_trajectory_compressor.py index b42ca1254..f16e40b4c 100644 --- a/tests/test_trajectory_compressor.py +++ b/tests/test_trajectory_compressor.py @@ -31,7 +31,11 @@ def test_import_loads_env_from_hermes_home(tmp_path, monkeypatch): assert os.getenv("OPENROUTER_API_KEY") == "from-hermes-home" -def test_generate_summary_custom_client_forces_kimi_temperature(): +def test_generate_summary_kimi_preserves_caller_temperature(): + """Kimi models should use the caller's temperature, not a forced override. + + The Kimi gateway selects the correct temperature server-side. + """ config = CompressionConfig( summarization_model="kimi-for-coding", temperature=0.3, @@ -51,10 +55,11 @@ def test_generate_summary_custom_client_forces_kimi_temperature(): result = compressor._generate_summary("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 + assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.3 -def test_generate_summary_public_moonshot_kimi_k2_5_forces_temperature_1(): +def test_generate_summary_public_moonshot_kimi_k2_5_preserves_temperature(): + """kimi-k2.5 on the public Moonshot API should use caller's temperature.""" config = CompressionConfig( summarization_model="kimi-k2.5", base_url="https://api.moonshot.ai/v1", @@ -75,10 +80,11 @@ def test_generate_summary_public_moonshot_kimi_k2_5_forces_temperature_1(): result = compressor._generate_summary("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 + assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.3 -def test_generate_summary_public_moonshot_cn_kimi_k2_5_forces_temperature_1(): +def test_generate_summary_public_moonshot_cn_kimi_k2_5_preserves_temperature(): + """kimi-k2.5 on api.moonshot.cn should use caller's temperature.""" config = CompressionConfig( summarization_model="kimi-k2.5", base_url="https://api.moonshot.cn/v1", @@ -99,7 +105,7 @@ def test_generate_summary_public_moonshot_cn_kimi_k2_5_forces_temperature_1(): result = compressor._generate_summary("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 + assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.3 # --------------------------------------------------------------------------- diff --git a/tests/test_trajectory_compressor_async.py b/tests/test_trajectory_compressor_async.py index 028f43eff..641bd0e35 100644 --- a/tests/test_trajectory_compressor_async.py +++ b/tests/test_trajectory_compressor_async.py @@ -117,7 +117,11 @@ class TestSourceLineVerification: @pytest.mark.asyncio -async def test_generate_summary_async_custom_client_forces_kimi_temperature(): +async def test_generate_summary_async_kimi_preserves_caller_temperature(): + """Kimi models should use the caller's temperature, not a forced override. + + The Kimi gateway selects the correct temperature server-side. + """ from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics config = CompressionConfig( @@ -140,11 +144,12 @@ async def test_generate_summary_async_custom_client_forces_kimi_temperature(): result = await compressor._generate_summary_async("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 + assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.3 @pytest.mark.asyncio -async def test_generate_summary_async_public_moonshot_kimi_k2_5_forces_temperature_1(): +async def test_generate_summary_async_public_moonshot_kimi_k2_5_preserves_temperature(): + """kimi-k2.5 on the public Moonshot API should use caller's temperature.""" from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics config = CompressionConfig( @@ -168,12 +173,13 @@ async def test_generate_summary_async_public_moonshot_kimi_k2_5_forces_temperatu result = await compressor._generate_summary_async("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 + assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.3 @pytest.mark.asyncio -async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_forces_temperature_1(): +async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_preserves_temperature(): + """kimi-k2.5 on api.moonshot.cn should use caller's temperature.""" from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics config = CompressionConfig( @@ -197,4 +203,4 @@ async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_forces_temper result = await compressor._generate_summary_async("tool output", metrics) assert result.startswith("[CONTEXT SUMMARY]:") - assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 + assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.3