diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 4f1746166..4860b16ac 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -94,6 +94,17 @@ def _normalize_aux_provider(provider: Optional[str]) -> str: return "custom" return _PROVIDER_ALIASES.get(normalized, normalized) + +_FIXED_TEMPERATURE_MODELS: Dict[str, float] = { + "kimi-for-coding": 0.6, +} + + +def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]: + """Return a required temperature override for models with strict contracts.""" + normalized = (model or "").strip().lower() + return _FIXED_TEMPERATURE_MODELS.get(normalized) + # Default auxiliary models for direct API-key providers (cheap/fast for side tasks) _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "gemini": "gemini-3-flash-preview", @@ -2293,6 +2304,10 @@ def _build_call_kwargs( "timeout": timeout, } + fixed_temperature = _fixed_temperature_for_model(model) + if fixed_temperature is not None: + temperature = fixed_temperature + # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently # drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on # flush_memories, 0 on structured-JSON extraction) don't 400 the moment diff --git a/run_agent.py b/run_agent.py index 03dead730..18729709f 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7209,12 +7209,13 @@ class AIAgent: # it's cheaper and avoids Codex Responses API incompatibility. from agent.auxiliary_client import call_llm as _call_llm _aux_available = True + _flush_temperature = 0.6 if str(self.model or "").strip().lower() == "kimi-for-coding" else 0.3 try: response = _call_llm( task="flush_memories", messages=api_messages, tools=[memory_tool_def], - temperature=0.3, + temperature=_flush_temperature, max_tokens=5120, # timeout resolved from auxiliary.flush_memories.timeout config ) @@ -7226,7 +7227,7 @@ class AIAgent: # No auxiliary client -- use the Codex Responses path directly codex_kwargs = self._build_api_kwargs(api_messages) codex_kwargs["tools"] = self._responses_tools([memory_tool_def]) - codex_kwargs["temperature"] = 0.3 + codex_kwargs["temperature"] = _flush_temperature if "max_output_tokens" in codex_kwargs: codex_kwargs["max_output_tokens"] = 5120 response = self._run_codex_stream(codex_kwargs) @@ -7245,7 +7246,7 @@ class AIAgent: "model": self.model, "messages": api_messages, "tools": [memory_tool_def], - "temperature": 0.3, + "temperature": _flush_temperature, **self._max_tokens_param(5120), } from agent.auxiliary_client import _get_task_timeout diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 5d79f96de..1778855dd 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -696,6 +696,95 @@ class TestIsConnectionError: assert _is_connection_error(err) is False +class TestKimiForCodingTemperature: + """kimi-for-coding now requires temperature=0.6 exactly.""" + + def test_build_call_kwargs_forces_fixed_temperature(self): + from agent.auxiliary_client import _build_call_kwargs + + kwargs = _build_call_kwargs( + provider="kimi-coding", + model="kimi-for-coding", + messages=[{"role": "user", "content": "hello"}], + temperature=0.3, + ) + + assert kwargs["temperature"] == 0.6 + + def test_build_call_kwargs_injects_temperature_when_missing(self): + from agent.auxiliary_client import _build_call_kwargs + + kwargs = _build_call_kwargs( + provider="kimi-coding", + model="kimi-for-coding", + messages=[{"role": "user", "content": "hello"}], + temperature=None, + ) + + assert kwargs["temperature"] == 0.6 + + def test_auto_routed_kimi_for_coding_sync_call_uses_fixed_temperature(self): + client = MagicMock() + client.base_url = "https://api.kimi.com/coding/v1" + response = MagicMock() + client.chat.completions.create.return_value = response + + with patch( + "agent.auxiliary_client._get_cached_client", + return_value=(client, "kimi-for-coding"), + ), patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("auto", "kimi-for-coding", None, None, None), + ): + result = call_llm( + task="session_search", + messages=[{"role": "user", "content": "hello"}], + temperature=0.1, + ) + + assert result is response + kwargs = client.chat.completions.create.call_args.kwargs + assert kwargs["model"] == "kimi-for-coding" + assert kwargs["temperature"] == 0.6 + + @pytest.mark.asyncio + async def test_auto_routed_kimi_for_coding_async_call_uses_fixed_temperature(self): + client = MagicMock() + client.base_url = "https://api.kimi.com/coding/v1" + response = MagicMock() + client.chat.completions.create = AsyncMock(return_value=response) + + with patch( + "agent.auxiliary_client._get_cached_client", + return_value=(client, "kimi-for-coding"), + ), patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("auto", "kimi-for-coding", None, None, None), + ): + result = await async_call_llm( + task="session_search", + messages=[{"role": "user", "content": "hello"}], + temperature=0.1, + ) + + assert result is response + kwargs = client.chat.completions.create.call_args.kwargs + assert kwargs["model"] == "kimi-for-coding" + assert kwargs["temperature"] == 0.6 + + def test_non_kimi_model_still_preserves_temperature(self): + from agent.auxiliary_client import _build_call_kwargs + + kwargs = _build_call_kwargs( + provider="kimi-coding", + model="kimi-k2.5", + messages=[{"role": "user", "content": "hello"}], + temperature=0.3, + ) + + assert kwargs["temperature"] == 0.3 + + # --------------------------------------------------------------------------- # async_call_llm payment / connection fallback (#7512 bug 2) # ---------------------------------------------------------------------------