fix(kimi): route temperature override by base_url — kimi-k2.5 needs 1.0 on api.moonshot.ai

Follow-up to #12144. That PR standardized the kimi-k2.* temperature lock against the Coding Plan endpoint (api.kimi.com/coding/v1) docs, where non-thinking models require 0.6. Verified empirically against Moonshot (April 2026) that the public chat endpoint (api.moonshot.ai/v1) has a different contract for kimi-k2.5: it only accepts temperature=1, and rejects 0.6 with: HTTP 400 "invalid temperature: only 1 is allowed for this model" Users hit the public endpoint when KIMI_API_KEY is a legacy sk-* key (the sk-kimi-* prefix routes to Coding Plan — see hermes_cli/auth.py). So for Coding Plan subscribers the fix from #12144 is correct, but for public-API users it reintroduces the exact 400 reported in #9125. Reproduction on api.moonshot.ai/v1 + kimi-k2.5: temperature=1.0 → 200 OK temperature=0.6 → 400 "only 1 is allowed" ← #12144 default temperature=None → 200 OK Other kimi-k2.* models are unaffected empirically — turbo-preview accepts 0.6 and thinking-turbo accepts 1.0 on both endpoints — so only kimi-k2.5 diverges. Fix: thread the client's actual base_url through _build_call_kwargs (the parameter already existed but callers passed config-level resolved_base_url; for auto-detected routes that was often empty). _fixed_temperature_for_model now checks api.moonshot.ai first via an explicit _KIMI_PUBLIC_API_OVERRIDES map, then falls back to the Coding Plan defaults. Tests parametrize over endpoint + model to lock both contracts. Closes #9125.
2026-06-09 08:21:50 +00:00 · 2026-04-20 04:18:49 +09:00 · 2026-04-20 04:18:49 +09:00 · 6f79b8f01d
commit 6f79b8f01d
parent 0d353ca6a8
2 changed files with 135 additions and 8 deletions
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -832,6 +832,92 @@ class TestKimiForCodingTemperature:

        assert kwargs["temperature"] == 0.3

+    # ── Endpoint-aware overrides: api.moonshot.ai vs api.kimi.com/coding ──
+    # The public Moonshot chat endpoint and the Coding Plan endpoint enforce
+    # different temperature contracts for the same model name.  `kimi-k2.5` on
+    # api.moonshot.ai rejects 0.6 with HTTP 400 "only 1 is allowed for this
+    # model", while the Coding Plan docs mandate 0.6.  Override must pick the
+    # right value per base_url.
+
+    @pytest.mark.parametrize(
+        "base_url",
+        [
+            "https://api.moonshot.ai/v1",
+            "https://api.moonshot.ai/v1/",
+            "https://API.MOONSHOT.AI/v1",
+        ],
+    )
+    def test_kimi_k2_5_public_api_forces_temperature_1(self, base_url):
+        """kimi-k2.5 on the public Moonshot API only accepts temperature=1."""
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="kimi-coding",
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.1,
+            base_url=base_url,
+        )
+
+        assert kwargs["temperature"] == 1.0
+
+    def test_kimi_k2_5_coding_plan_keeps_temperature_0_6(self):
+        """kimi-k2.5 on api.kimi.com/coding keeps the Coding Plan's 0.6 lock."""
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="kimi-coding",
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.1,
+            base_url="https://api.kimi.com/coding/v1",
+        )
+
+        assert kwargs["temperature"] == 0.6
+
+    def test_kimi_k2_5_no_base_url_falls_back_to_coding_plan_lock(self):
+        """Without a base_url hint, the Coding Plan default (0.6) applies.
+
+        Preserves PR #12144 backward compatibility for callers that don't thread
+        the client's base_url through.
+        """
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="kimi-coding",
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.1,
+        )
+
+        assert kwargs["temperature"] == 0.6
+
+    @pytest.mark.parametrize(
+        "model,expected",
+        [
+            # Only kimi-k2.5 diverges on api.moonshot.ai; the rest keep the
+            # Coding Plan lock (empirically verified against Moonshot in April
+            # 2026: turbo-preview accepts 0.6, thinking-turbo accepts 1.0).
+            ("kimi-k2-turbo-preview", 0.6),
+            ("kimi-k2-0905-preview", 0.6),
+            ("kimi-k2-thinking", 1.0),
+            ("kimi-k2-thinking-turbo", 1.0),
+            ("moonshotai/kimi-k2-thinking-turbo", 1.0),
+        ],
+    )
+    def test_other_kimi_k2_family_unchanged_on_public_api(self, model, expected):
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="kimi-coding",
+            model=model,
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.1,
+            base_url="https://api.moonshot.ai/v1",
+        )
+
+        assert kwargs["temperature"] == expected
+

 # ---------------------------------------------------------------------------
 # async_call_llm payment / connection fallback (#7512 bug 2)