fix(kimi): force kimi-for-coding temperature to 0.6

This commit is contained in:
helix4u 2026-04-17 16:17:15 -06:00 committed by Teknium
parent c6fd2619f7
commit 2b60478fc2
3 changed files with 108 additions and 3 deletions

View file

@ -94,6 +94,17 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
return "custom"
return _PROVIDER_ALIASES.get(normalized, normalized)
_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
"kimi-for-coding": 0.6,
}
def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
"""Return a required temperature override for models with strict contracts."""
normalized = (model or "").strip().lower()
return _FIXED_TEMPERATURE_MODELS.get(normalized)
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"gemini": "gemini-3-flash-preview",
@ -2293,6 +2304,10 @@ def _build_call_kwargs(
"timeout": timeout,
}
fixed_temperature = _fixed_temperature_for_model(model)
if fixed_temperature is not None:
temperature = fixed_temperature
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment

View file

@ -7209,12 +7209,13 @@ class AIAgent:
# it's cheaper and avoids Codex Responses API incompatibility.
from agent.auxiliary_client import call_llm as _call_llm
_aux_available = True
_flush_temperature = 0.6 if str(self.model or "").strip().lower() == "kimi-for-coding" else 0.3
try:
response = _call_llm(
task="flush_memories",
messages=api_messages,
tools=[memory_tool_def],
temperature=0.3,
temperature=_flush_temperature,
max_tokens=5120,
# timeout resolved from auxiliary.flush_memories.timeout config
)
@ -7226,7 +7227,7 @@ class AIAgent:
# No auxiliary client -- use the Codex Responses path directly
codex_kwargs = self._build_api_kwargs(api_messages)
codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
codex_kwargs["temperature"] = 0.3
codex_kwargs["temperature"] = _flush_temperature
if "max_output_tokens" in codex_kwargs:
codex_kwargs["max_output_tokens"] = 5120
response = self._run_codex_stream(codex_kwargs)
@ -7245,7 +7246,7 @@ class AIAgent:
"model": self.model,
"messages": api_messages,
"tools": [memory_tool_def],
"temperature": 0.3,
"temperature": _flush_temperature,
**self._max_tokens_param(5120),
}
from agent.auxiliary_client import _get_task_timeout

View file

@ -696,6 +696,95 @@ class TestIsConnectionError:
assert _is_connection_error(err) is False
class TestKimiForCodingTemperature:
"""kimi-for-coding now requires temperature=0.6 exactly."""
def test_build_call_kwargs_forces_fixed_temperature(self):
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="kimi-coding",
model="kimi-for-coding",
messages=[{"role": "user", "content": "hello"}],
temperature=0.3,
)
assert kwargs["temperature"] == 0.6
def test_build_call_kwargs_injects_temperature_when_missing(self):
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="kimi-coding",
model="kimi-for-coding",
messages=[{"role": "user", "content": "hello"}],
temperature=None,
)
assert kwargs["temperature"] == 0.6
def test_auto_routed_kimi_for_coding_sync_call_uses_fixed_temperature(self):
client = MagicMock()
client.base_url = "https://api.kimi.com/coding/v1"
response = MagicMock()
client.chat.completions.create.return_value = response
with patch(
"agent.auxiliary_client._get_cached_client",
return_value=(client, "kimi-for-coding"),
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", "kimi-for-coding", None, None, None),
):
result = call_llm(
task="session_search",
messages=[{"role": "user", "content": "hello"}],
temperature=0.1,
)
assert result is response
kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["model"] == "kimi-for-coding"
assert kwargs["temperature"] == 0.6
@pytest.mark.asyncio
async def test_auto_routed_kimi_for_coding_async_call_uses_fixed_temperature(self):
client = MagicMock()
client.base_url = "https://api.kimi.com/coding/v1"
response = MagicMock()
client.chat.completions.create = AsyncMock(return_value=response)
with patch(
"agent.auxiliary_client._get_cached_client",
return_value=(client, "kimi-for-coding"),
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", "kimi-for-coding", None, None, None),
):
result = await async_call_llm(
task="session_search",
messages=[{"role": "user", "content": "hello"}],
temperature=0.1,
)
assert result is response
kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["model"] == "kimi-for-coding"
assert kwargs["temperature"] == 0.6
def test_non_kimi_model_still_preserves_temperature(self):
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="kimi-coding",
model="kimi-k2.5",
messages=[{"role": "user", "content": "hello"}],
temperature=0.3,
)
assert kwargs["temperature"] == 0.3
# ---------------------------------------------------------------------------
# async_call_llm payment / connection fallback (#7512 bug 2)
# ---------------------------------------------------------------------------