mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(kimi): route temperature override by base_url — kimi-k2.5 needs 1.0 on api.moonshot.ai
Follow-up to #12144. That PR standardized the kimi-k2.* temperature lock against the Coding Plan endpoint (api.kimi.com/coding/v1) docs, where non-thinking models require 0.6. Verified empirically against Moonshot (April 2026) that the public chat endpoint (api.moonshot.ai/v1) has a different contract for kimi-k2.5: it only accepts temperature=1, and rejects 0.6 with: HTTP 400 "invalid temperature: only 1 is allowed for this model" Users hit the public endpoint when KIMI_API_KEY is a legacy sk-* key (the sk-kimi-* prefix routes to Coding Plan — see hermes_cli/auth.py). So for Coding Plan subscribers the fix from #12144 is correct, but for public-API users it reintroduces the exact 400 reported in #9125. Reproduction on api.moonshot.ai/v1 + kimi-k2.5: temperature=1.0 → 200 OK temperature=0.6 → 400 "only 1 is allowed" ← #12144 default temperature=None → 200 OK Other kimi-k2.* models are unaffected empirically — turbo-preview accepts 0.6 and thinking-turbo accepts 1.0 on both endpoints — so only kimi-k2.5 diverges. Fix: thread the client's actual base_url through _build_call_kwargs (the parameter already existed but callers passed config-level resolved_base_url; for auto-detected routes that was often empty). _fixed_temperature_for_model now checks api.moonshot.ai first via an explicit _KIMI_PUBLIC_API_OVERRIDES map, then falls back to the Coding Plan defaults. Tests parametrize over endpoint + model to lock both contracts. Closes #9125.
This commit is contained in:
parent
0d353ca6a8
commit
6f79b8f01d
2 changed files with 135 additions and 8 deletions
|
|
@ -832,6 +832,92 @@ class TestKimiForCodingTemperature:
|
|||
|
||||
assert kwargs["temperature"] == 0.3
|
||||
|
||||
# ── Endpoint-aware overrides: api.moonshot.ai vs api.kimi.com/coding ──
|
||||
# The public Moonshot chat endpoint and the Coding Plan endpoint enforce
|
||||
# different temperature contracts for the same model name. `kimi-k2.5` on
|
||||
# api.moonshot.ai rejects 0.6 with HTTP 400 "only 1 is allowed for this
|
||||
# model", while the Coding Plan docs mandate 0.6. Override must pick the
|
||||
# right value per base_url.
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"base_url",
|
||||
[
|
||||
"https://api.moonshot.ai/v1",
|
||||
"https://api.moonshot.ai/v1/",
|
||||
"https://API.MOONSHOT.AI/v1",
|
||||
],
|
||||
)
|
||||
def test_kimi_k2_5_public_api_forces_temperature_1(self, base_url):
|
||||
"""kimi-k2.5 on the public Moonshot API only accepts temperature=1."""
|
||||
from agent.auxiliary_client import _build_call_kwargs
|
||||
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="kimi-coding",
|
||||
model="kimi-k2.5",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=0.1,
|
||||
base_url=base_url,
|
||||
)
|
||||
|
||||
assert kwargs["temperature"] == 1.0
|
||||
|
||||
def test_kimi_k2_5_coding_plan_keeps_temperature_0_6(self):
|
||||
"""kimi-k2.5 on api.kimi.com/coding keeps the Coding Plan's 0.6 lock."""
|
||||
from agent.auxiliary_client import _build_call_kwargs
|
||||
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="kimi-coding",
|
||||
model="kimi-k2.5",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=0.1,
|
||||
base_url="https://api.kimi.com/coding/v1",
|
||||
)
|
||||
|
||||
assert kwargs["temperature"] == 0.6
|
||||
|
||||
def test_kimi_k2_5_no_base_url_falls_back_to_coding_plan_lock(self):
|
||||
"""Without a base_url hint, the Coding Plan default (0.6) applies.
|
||||
|
||||
Preserves PR #12144 backward compatibility for callers that don't thread
|
||||
the client's base_url through.
|
||||
"""
|
||||
from agent.auxiliary_client import _build_call_kwargs
|
||||
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="kimi-coding",
|
||||
model="kimi-k2.5",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=0.1,
|
||||
)
|
||||
|
||||
assert kwargs["temperature"] == 0.6
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model,expected",
|
||||
[
|
||||
# Only kimi-k2.5 diverges on api.moonshot.ai; the rest keep the
|
||||
# Coding Plan lock (empirically verified against Moonshot in April
|
||||
# 2026: turbo-preview accepts 0.6, thinking-turbo accepts 1.0).
|
||||
("kimi-k2-turbo-preview", 0.6),
|
||||
("kimi-k2-0905-preview", 0.6),
|
||||
("kimi-k2-thinking", 1.0),
|
||||
("kimi-k2-thinking-turbo", 1.0),
|
||||
("moonshotai/kimi-k2-thinking-turbo", 1.0),
|
||||
],
|
||||
)
|
||||
def test_other_kimi_k2_family_unchanged_on_public_api(self, model, expected):
|
||||
from agent.auxiliary_client import _build_call_kwargs
|
||||
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="kimi-coding",
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=0.1,
|
||||
base_url="https://api.moonshot.ai/v1",
|
||||
)
|
||||
|
||||
assert kwargs["temperature"] == expected
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# async_call_llm payment / connection fallback (#7512 bug 2)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue