From ed201cce9cd98fe83433e38ea93fbf13bb069862 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Mon, 20 Apr 2026 23:53:24 +0530
Subject: [PATCH] fix(kimi): drop client-side temperature overrides for
 Kimi/Moonshot models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Kimi gateway selects the correct temperature server-side based on the
active mode (thinking on → 1.0, thinking off → 0.6).  Client-side clamping
is no longer needed and would conflict if the gateway changes its defaults.

Removed:
- _FIXED_TEMPERATURE_MODELS, _KIMI_INSTANT_MODELS, _KIMI_THINKING_MODELS,
  _KIMI_PUBLIC_API_OVERRIDES maps from auxiliary_client.py
- All Kimi-specific branches in _fixed_temperature_for_model() — the
  function now always returns None (kept for future non-Kimi contracts)

Callers already guard with 'if fixed_temperature is not None:' so the
change is transparent — temperature is simply omitted from API calls,
letting the Kimi gateway use its own defaults.

Updated tests across 5 files to verify temperature is NOT forced.
---
 agent/auxiliary_client.py                 |  77 +----------
 run_agent.py                              |   4 +-
 tests/agent/test_auxiliary_client.py      | 153 +++++-----------------
 tests/run_agent/test_provider_parity.py   |  10 +-
 tests/run_agent/test_run_agent.py         |  16 ++-
 tests/test_mini_swe_runner.py             |  14 +-
 tests/test_trajectory_compressor.py       |  18 ++-
 tests/test_trajectory_compressor_async.py |  18 ++-
 8 files changed, 94 insertions(+), 216 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 24260126f..95ffa6fe7 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -95,84 +95,21 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
     return _PROVIDER_ALIASES.get(normalized, normalized)
 
 
-_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
-    "kimi-for-coding": 0.6,
-}
-
-# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
-# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
-# value 0.6.  Any other value will result in an error."  The same lock applies
-# to the other k2.* models served on that endpoint.  Enumerated explicitly so
-# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
-# the standard chat API and third parties) are NOT clamped.
-# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
-_KIMI_INSTANT_MODELS: frozenset = frozenset({
-    "kimi-k2.5",
-    "kimi-k2-turbo-preview",
-    "kimi-k2-0905-preview",
-})
-_KIMI_THINKING_MODELS: frozenset = frozenset({
-    "kimi-k2-thinking",
-    "kimi-k2-thinking-turbo",
-})
-
-# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different
-# temperature contract than the Coding Plan endpoint above.  Empirically,
-# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400
-# "invalid temperature: only 1 is allowed for this model" — the Coding Plan
-# lock (0.6 for non-thinking) does not apply.  `kimi-k2-turbo-preview` and the
-# thinking variants already match the Coding Plan contract on the public
-# endpoint, so we only override the models that diverge.
-# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the
-# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see
-# hermes_cli/auth.py:_kimi_base_url_for_key).
-_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = {
-    "kimi-k2.5": 1.0,
-}
-
-
 def _fixed_temperature_for_model(
     model: Optional[str],
     base_url: Optional[str] = None,
 ) -> Optional[float]:
     """Return a required temperature override for models with strict contracts.
 
-    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
-    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
-    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
-    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
+    Returns ``None`` for all models — callers should omit the ``temperature``
+    parameter so the provider's server-side defaults apply.
 
-    When ``base_url`` points to Moonshot's public chat endpoint
-    (``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public
-    API only accepts ``temperature=1``, not 0.6.  That override takes precedence
-    over the Coding Plan defaults above.
-
-    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
-    which is the separate non-coding K2 family with variable temperature.
+    Kimi / Moonshot models previously had hardcoded temperature overrides here
+    (0.6 for non-thinking, 1.0 for thinking).  As of July 2026 the Kimi gateway
+    selects the correct temperature server-side based on the active mode, so
+    client-side clamping is no longer needed (and would conflict if the gateway
+    changes its defaults).
     """
-    normalized = (model or "").strip().lower()
-    bare = normalized.rsplit("/", 1)[-1]
-
-    # Public Moonshot API has a stricter contract for some models than the
-    # Coding Plan endpoint — check it first so it wins on conflict.
-    if base_url and ("api.moonshot.ai" in base_url.lower() or "api.moonshot.cn" in base_url.lower()):
-        public = _KIMI_PUBLIC_API_OVERRIDES.get(bare)
-        if public is not None:
-            logger.debug(
-                "Forcing temperature=%s for %r on public Moonshot API", public, model
-            )
-            return public
-
-    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
-    if fixed is not None:
-        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
-        return fixed
-    if bare in _KIMI_THINKING_MODELS:
-        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
-        return 1.0
-    if bare in _KIMI_INSTANT_MODELS:
-        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
-        return 0.6
     return None
 
 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
diff --git a/run_agent.py b/run_agent.py
index a1e3e3038..3cac0c251 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7988,8 +7988,8 @@ class AIAgent:
                 _fixed_temperature_for_model,
             )
             _aux_available = True
-            # Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if
-            # the model has a strict contract; otherwise the historical 0.3 default.
+            # Use the fixed-temperature override if the model has a strict
+            # contract; otherwise the historical 0.3 default.
             _flush_temperature = _fixed_temperature_for_model(self.model, self.base_url)
             if _flush_temperature is None:
                 _flush_temperature = 0.3
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 06fc51821..db8452b5a 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -696,27 +696,42 @@ class TestIsConnectionError:
         assert _is_connection_error(err) is False
 
 
-class TestKimiForCodingTemperature:
-    """Moonshot kimi-for-coding models require fixed temperatures.
+class TestKimiTemperatureNotForced:
+    """Kimi/Moonshot models should NOT have client-side temperature overrides.
 
-    k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock).
-    k2-thinking / k2-thinking-turbo → 1.0 (thinking lock).
-    kimi-k2-instruct* and every other model preserve the caller's temperature.
+    The Kimi gateway selects the correct temperature server-side based on the
+    active mode (thinking on → 1.0, thinking off → 0.6).  Client-side clamping
+    was removed so we don't conflict with gateway-managed defaults.
     """
 
-    def test_build_call_kwargs_forces_fixed_temperature(self):
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "kimi-for-coding",
+            "kimi-k2.5",
+            "kimi-k2-turbo-preview",
+            "kimi-k2-0905-preview",
+            "kimi-k2-thinking",
+            "kimi-k2-thinking-turbo",
+            "moonshotai/kimi-k2.5",
+            "moonshotai/Kimi-K2-Thinking",
+        ],
+    )
+    def test_kimi_models_preserve_caller_temperature(self, model):
+        """No kimi model should have its temperature overridden client-side."""
         from agent.auxiliary_client import _build_call_kwargs
 
         kwargs = _build_call_kwargs(
             provider="kimi-coding",
-            model="kimi-for-coding",
+            model=model,
             messages=[{"role": "user", "content": "hello"}],
             temperature=0.3,
         )
 
-        assert kwargs["temperature"] == 0.6
+        assert kwargs["temperature"] == 0.3
 
-    def test_build_call_kwargs_injects_temperature_when_missing(self):
+    def test_kimi_for_coding_no_temperature_when_none(self):
+        """When caller passes temperature=None, no temperature key is emitted."""
         from agent.auxiliary_client import _build_call_kwargs
 
         kwargs = _build_call_kwargs(
@@ -726,9 +741,9 @@ class TestKimiForCodingTemperature:
             temperature=None,
         )
 
-        assert kwargs["temperature"] == 0.6
+        assert "temperature" not in kwargs
 
-    def test_auto_routed_kimi_for_coding_sync_call_uses_fixed_temperature(self):
+    def test_sync_call_preserves_caller_temperature(self):
         client = MagicMock()
         client.base_url = "https://api.kimi.com/coding/v1"
         response = MagicMock()
@@ -750,10 +765,10 @@ class TestKimiForCodingTemperature:
         assert result is response
         kwargs = client.chat.completions.create.call_args.kwargs
         assert kwargs["model"] == "kimi-for-coding"
-        assert kwargs["temperature"] == 0.6
+        assert kwargs["temperature"] == 0.1
 
     @pytest.mark.asyncio
-    async def test_auto_routed_kimi_for_coding_async_call_uses_fixed_temperature(self):
+    async def test_async_call_preserves_caller_temperature(self):
         client = MagicMock()
         client.base_url = "https://api.kimi.com/coding/v1"
         response = MagicMock()
@@ -775,52 +790,18 @@ class TestKimiForCodingTemperature:
         assert result is response
         kwargs = client.chat.completions.create.call_args.kwargs
         assert kwargs["model"] == "kimi-for-coding"
-        assert kwargs["temperature"] == 0.6
-
-    @pytest.mark.parametrize(
-        "model,expected",
-        [
-            ("kimi-k2.5", 0.6),
-            ("kimi-k2-turbo-preview", 0.6),
-            ("kimi-k2-0905-preview", 0.6),
-            ("kimi-k2-thinking", 1.0),
-            ("kimi-k2-thinking-turbo", 1.0),
-            ("moonshotai/kimi-k2.5", 0.6),
-            ("moonshotai/Kimi-K2-Thinking", 1.0),
-        ],
-    )
-    def test_kimi_k2_family_temperature_override(self, model, expected):
-        """Moonshot kimi-k2.* models only accept fixed temperatures.
-
-        Non-thinking models → 0.6, thinking-mode models → 1.0.
-        """
-        from agent.auxiliary_client import _build_call_kwargs
-
-        kwargs = _build_call_kwargs(
-            provider="kimi-coding",
-            model=model,
-            messages=[{"role": "user", "content": "hello"}],
-            temperature=0.3,
-        )
-
-        assert kwargs["temperature"] == expected
+        assert kwargs["temperature"] == 0.1
 
     @pytest.mark.parametrize(
         "model",
         [
             "anthropic/claude-sonnet-4-6",
             "gpt-5.4",
-            # kimi-k2-instruct is the non-coding K2 family — temperature is
-            # variable (recommended 0.6 but not enforced).  Must not clamp.
             "kimi-k2-instruct",
             "moonshotai/Kimi-K2-Instruct",
-            "moonshotai/Kimi-K2-Instruct-0905",
-            "kimi-k2-instruct-0905",
-            # Hypothetical future kimi name not in the whitelist.
-            "kimi-k2-experimental",
         ],
     )
-    def test_non_restricted_model_preserves_temperature(self, model):
+    def test_non_kimi_models_still_preserve_temperature(self, model):
         from agent.auxiliary_client import _build_call_kwargs
 
         kwargs = _build_call_kwargs(
@@ -832,25 +813,16 @@ class TestKimiForCodingTemperature:
 
         assert kwargs["temperature"] == 0.3
 
-    # ── Endpoint-aware overrides: api.moonshot.ai vs api.kimi.com/coding ──
-    # The public Moonshot chat endpoint and the Coding Plan endpoint enforce
-    # different temperature contracts for the same model name.  `kimi-k2.5` on
-    # api.moonshot.ai rejects 0.6 with HTTP 400 "only 1 is allowed for this
-    # model", while the Coding Plan docs mandate 0.6.  Override must pick the
-    # right value per base_url.
-
     @pytest.mark.parametrize(
         "base_url",
         [
             "https://api.moonshot.ai/v1",
-            "https://api.moonshot.ai/v1/",
-            "https://API.MOONSHOT.AI/v1",
             "https://api.moonshot.cn/v1",
-            "https://api.moonshot.cn/v1/",
+            "https://api.kimi.com/coding/v1",
         ],
     )
-    def test_kimi_k2_5_public_api_forces_temperature_1(self, base_url):
-        """kimi-k2.5 on the public Moonshot API only accepts temperature=1."""
+    def test_kimi_k2_5_no_override_regardless_of_endpoint(self, base_url):
+        """Temperature is preserved regardless of which Kimi endpoint is used."""
         from agent.auxiliary_client import _build_call_kwargs
 
         kwargs = _build_call_kwargs(
@@ -861,64 +833,7 @@ class TestKimiForCodingTemperature:
             base_url=base_url,
         )
 
-        assert kwargs["temperature"] == 1.0
-
-    def test_kimi_k2_5_coding_plan_keeps_temperature_0_6(self):
-        """kimi-k2.5 on api.kimi.com/coding keeps the Coding Plan's 0.6 lock."""
-        from agent.auxiliary_client import _build_call_kwargs
-
-        kwargs = _build_call_kwargs(
-            provider="kimi-coding",
-            model="kimi-k2.5",
-            messages=[{"role": "user", "content": "hello"}],
-            temperature=0.1,
-            base_url="https://api.kimi.com/coding/v1",
-        )
-
-        assert kwargs["temperature"] == 0.6
-
-    def test_kimi_k2_5_no_base_url_falls_back_to_coding_plan_lock(self):
-        """Without a base_url hint, the Coding Plan default (0.6) applies.
-
-        Preserves PR #12144 backward compatibility for callers that don't thread
-        the client's base_url through.
-        """
-        from agent.auxiliary_client import _build_call_kwargs
-
-        kwargs = _build_call_kwargs(
-            provider="kimi-coding",
-            model="kimi-k2.5",
-            messages=[{"role": "user", "content": "hello"}],
-            temperature=0.1,
-        )
-
-        assert kwargs["temperature"] == 0.6
-
-    @pytest.mark.parametrize(
-        "model,expected",
-        [
-            # Only kimi-k2.5 diverges on api.moonshot.ai; the rest keep the
-            # Coding Plan lock (empirically verified against Moonshot in April
-            # 2026: turbo-preview accepts 0.6, thinking-turbo accepts 1.0).
-            ("kimi-k2-turbo-preview", 0.6),
-            ("kimi-k2-0905-preview", 0.6),
-            ("kimi-k2-thinking", 1.0),
-            ("kimi-k2-thinking-turbo", 1.0),
-            ("moonshotai/kimi-k2-thinking-turbo", 1.0),
-        ],
-    )
-    def test_other_kimi_k2_family_unchanged_on_public_api(self, model, expected):
-        from agent.auxiliary_client import _build_call_kwargs
-
-        kwargs = _build_call_kwargs(
-            provider="kimi-coding",
-            model=model,
-            messages=[{"role": "user", "content": "hello"}],
-            temperature=0.1,
-            base_url="https://api.moonshot.ai/v1",
-        )
-
-        assert kwargs["temperature"] == expected
+        assert kwargs["temperature"] == 0.1
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
index c415951e2..6c2890f74 100644
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@@ -251,8 +251,12 @@ class TestBuildApiKwargsChatCompletionsServiceTier:
         assert "service_tier" not in kwargs
 
 
-class TestBuildApiKwargsKimiFixedTemperature:
-    def test_kimi_for_coding_forces_temperature_on_main_chat_path(self, monkeypatch):
+class TestBuildApiKwargsKimiNoTemperatureOverride:
+    def test_kimi_for_coding_does_not_force_temperature(self, monkeypatch):
+        """Temperature should NOT be set client-side for Kimi models.
+
+        The Kimi gateway selects the correct temperature server-side.
+        """
         agent = _make_agent(
             monkeypatch,
             "kimi-coding",
@@ -261,7 +265,7 @@ class TestBuildApiKwargsKimiFixedTemperature:
         )
         messages = [{"role": "user", "content": "hi"}]
         kwargs = agent._build_api_kwargs(messages)
-        assert kwargs["temperature"] == 0.6
+        assert "temperature" not in kwargs
 
 
 class TestBuildApiKwargsAIGateway:
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 13ecb0c4d..68413c26c 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -918,7 +918,11 @@ class TestBuildApiKwargs:
         assert kwargs["messages"] is messages
         assert kwargs["timeout"] == 1800.0
 
-    def test_public_moonshot_kimi_k2_5_forces_temperature_1(self, agent):
+    def test_public_moonshot_kimi_k2_5_no_temperature_override(self, agent):
+        """Kimi models should NOT have client-side temperature overrides.
+
+        The Kimi gateway selects the correct temperature server-side.
+        """
         agent.base_url = "https://api.moonshot.ai/v1"
         agent._base_url_lower = agent.base_url.lower()
         agent.model = "kimi-k2.5"
@@ -926,9 +930,9 @@ class TestBuildApiKwargs:
 
         kwargs = agent._build_api_kwargs(messages)
 
-        assert kwargs["temperature"] == 1.0
+        assert "temperature" not in kwargs
 
-    def test_public_moonshot_cn_kimi_k2_5_forces_temperature_1(self, agent):
+    def test_public_moonshot_cn_kimi_k2_5_no_temperature_override(self, agent):
         agent.base_url = "https://api.moonshot.cn/v1"
         agent._base_url_lower = agent.base_url.lower()
         agent.model = "kimi-k2.5"
@@ -936,9 +940,9 @@ class TestBuildApiKwargs:
 
         kwargs = agent._build_api_kwargs(messages)
 
-        assert kwargs["temperature"] == 1.0
+        assert "temperature" not in kwargs
 
-    def test_kimi_coding_endpoint_keeps_kimi_k2_5_at_0_6(self, agent):
+    def test_kimi_coding_endpoint_no_temperature_override(self, agent):
         agent.base_url = "https://api.kimi.com/coding/v1"
         agent._base_url_lower = agent.base_url.lower()
         agent.model = "kimi-k2.5"
@@ -946,7 +950,7 @@ class TestBuildApiKwargs:
 
         kwargs = agent._build_api_kwargs(messages)
 
-        assert kwargs["temperature"] == 0.6
+        assert "temperature" not in kwargs
 
     def test_provider_preferences_injected(self, agent):
         agent.base_url = "https://openrouter.ai/api/v1"
diff --git a/tests/test_mini_swe_runner.py b/tests/test_mini_swe_runner.py
index b814f7738..75ae28204 100644
--- a/tests/test_mini_swe_runner.py
+++ b/tests/test_mini_swe_runner.py
@@ -2,7 +2,12 @@ from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
 
-def test_run_task_forces_kimi_fixed_temperature():
+def test_run_task_kimi_preserves_default_temperature():
+    """Kimi models should NOT have client-side temperature overrides.
+
+    The Kimi gateway selects the correct temperature server-side, so
+    mini_swe_runner should not inject a temperature key at all.
+    """
     with patch("openai.OpenAI") as mock_openai:
         client = MagicMock()
         client.chat.completions.create.return_value = SimpleNamespace(
@@ -25,10 +30,11 @@ def test_run_task_forces_kimi_fixed_temperature():
         result = runner.run_task("2+2")
 
     assert result["completed"] is True
-    assert client.chat.completions.create.call_args.kwargs["temperature"] == 0.6
+    assert "temperature" not in client.chat.completions.create.call_args.kwargs
 
 
-def test_run_task_public_moonshot_kimi_k2_5_forces_temperature_1():
+def test_run_task_public_moonshot_kimi_k2_5_preserves_default_temperature():
+    """kimi-k2.5 on the public Moonshot API should not get a forced temperature."""
     with patch("openai.OpenAI") as mock_openai:
         client = MagicMock()
         client.base_url = "https://api.moonshot.ai/v1"
@@ -52,4 +58,4 @@ def test_run_task_public_moonshot_kimi_k2_5_forces_temperature_1():
         result = runner.run_task("2+2")
 
     assert result["completed"] is True
-    assert client.chat.completions.create.call_args.kwargs["temperature"] == 1.0
+    assert "temperature" not in client.chat.completions.create.call_args.kwargs
diff --git a/tests/test_trajectory_compressor.py b/tests/test_trajectory_compressor.py
index b42ca1254..f16e40b4c 100644
--- a/tests/test_trajectory_compressor.py
+++ b/tests/test_trajectory_compressor.py
@@ -31,7 +31,11 @@ def test_import_loads_env_from_hermes_home(tmp_path, monkeypatch):
     assert os.getenv("OPENROUTER_API_KEY") == "from-hermes-home"
 
 
-def test_generate_summary_custom_client_forces_kimi_temperature():
+def test_generate_summary_kimi_preserves_caller_temperature():
+    """Kimi models should use the caller's temperature, not a forced override.
+
+    The Kimi gateway selects the correct temperature server-side.
+    """
     config = CompressionConfig(
         summarization_model="kimi-for-coding",
         temperature=0.3,
@@ -51,10 +55,11 @@ def test_generate_summary_custom_client_forces_kimi_temperature():
     result = compressor._generate_summary("tool output", metrics)
 
     assert result.startswith("[CONTEXT SUMMARY]:")
-    assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.6
+    assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.3
 
 
-def test_generate_summary_public_moonshot_kimi_k2_5_forces_temperature_1():
+def test_generate_summary_public_moonshot_kimi_k2_5_preserves_temperature():
+    """kimi-k2.5 on the public Moonshot API should use caller's temperature."""
     config = CompressionConfig(
         summarization_model="kimi-k2.5",
         base_url="https://api.moonshot.ai/v1",
@@ -75,10 +80,11 @@ def test_generate_summary_public_moonshot_kimi_k2_5_forces_temperature_1():
     result = compressor._generate_summary("tool output", metrics)
 
     assert result.startswith("[CONTEXT SUMMARY]:")
-    assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 1.0
+    assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.3
 
 
-def test_generate_summary_public_moonshot_cn_kimi_k2_5_forces_temperature_1():
+def test_generate_summary_public_moonshot_cn_kimi_k2_5_preserves_temperature():
+    """kimi-k2.5 on api.moonshot.cn should use caller's temperature."""
     config = CompressionConfig(
         summarization_model="kimi-k2.5",
         base_url="https://api.moonshot.cn/v1",
@@ -99,7 +105,7 @@ def test_generate_summary_public_moonshot_cn_kimi_k2_5_forces_temperature_1():
     result = compressor._generate_summary("tool output", metrics)
 
     assert result.startswith("[CONTEXT SUMMARY]:")
-    assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 1.0
+    assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.3
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_trajectory_compressor_async.py b/tests/test_trajectory_compressor_async.py
index 028f43eff..641bd0e35 100644
--- a/tests/test_trajectory_compressor_async.py
+++ b/tests/test_trajectory_compressor_async.py
@@ -117,7 +117,11 @@ class TestSourceLineVerification:
 
 
 @pytest.mark.asyncio
-async def test_generate_summary_async_custom_client_forces_kimi_temperature():
+async def test_generate_summary_async_kimi_preserves_caller_temperature():
+    """Kimi models should use the caller's temperature, not a forced override.
+
+    The Kimi gateway selects the correct temperature server-side.
+    """
     from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
 
     config = CompressionConfig(
@@ -140,11 +144,12 @@ async def test_generate_summary_async_custom_client_forces_kimi_temperature():
     result = await compressor._generate_summary_async("tool output", metrics)
 
     assert result.startswith("[CONTEXT SUMMARY]:")
-    assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.6
+    assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.3
 
 
 @pytest.mark.asyncio
-async def test_generate_summary_async_public_moonshot_kimi_k2_5_forces_temperature_1():
+async def test_generate_summary_async_public_moonshot_kimi_k2_5_preserves_temperature():
+    """kimi-k2.5 on the public Moonshot API should use caller's temperature."""
     from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
 
     config = CompressionConfig(
@@ -168,12 +173,13 @@ async def test_generate_summary_async_public_moonshot_kimi_k2_5_forces_temperatu
     result = await compressor._generate_summary_async("tool output", metrics)
 
     assert result.startswith("[CONTEXT SUMMARY]:")
-    assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 1.0
+    assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.3
 
 
 
 @pytest.mark.asyncio
-async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_forces_temperature_1():
+async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_preserves_temperature():
+    """kimi-k2.5 on api.moonshot.cn should use caller's temperature."""
     from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
 
     config = CompressionConfig(
@@ -197,4 +203,4 @@ async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_forces_temper
     result = await compressor._generate_summary_async("tool output", metrics)
 
     assert result.startswith("[CONTEXT SUMMARY]:")
-    assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 1.0
+    assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.3