diff --git a/mini_swe_runner.py b/mini_swe_runner.py index 739074402..a642e2411 100644 --- a/mini_swe_runner.py +++ b/mini_swe_runner.py @@ -43,13 +43,16 @@ from dotenv import load_dotenv load_dotenv() -def _effective_temperature_for_model(model: str) -> Optional[float]: +def _effective_temperature_for_model( + model: str, + base_url: Optional[str] = None, +) -> Optional[float]: """Return a fixed temperature for models with strict sampling contracts.""" try: from agent.auxiliary_client import _fixed_temperature_for_model except Exception: return None - return _fixed_temperature_for_model(model) + return _fixed_temperature_for_model(model, base_url) @@ -457,7 +460,10 @@ Complete the user's task step by step.""" "tools": self.tools, "timeout": 300.0, } - fixed_temperature = _effective_temperature_for_model(self.model) + fixed_temperature = _effective_temperature_for_model( + self.model, + str(getattr(self.client, "base_url", "") or ""), + ) if fixed_temperature is not None: api_kwargs["temperature"] = fixed_temperature diff --git a/run_agent.py b/run_agent.py index 85eaad1b3..fc795c83e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7173,7 +7173,7 @@ class AIAgent: except Exception: _fixed_temperature_for_model = None if _fixed_temperature_for_model is not None: - fixed_temperature = _fixed_temperature_for_model(self.model) + fixed_temperature = _fixed_temperature_for_model(self.model, self.base_url) if fixed_temperature is not None: api_kwargs["temperature"] = fixed_temperature if self._is_qwen_portal(): @@ -7619,7 +7619,7 @@ class AIAgent: _aux_available = True # Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if # the model has a strict contract; otherwise the historical 0.3 default. - _flush_temperature = _fixed_temperature_for_model(self.model) + _flush_temperature = _fixed_temperature_for_model(self.model, self.base_url) if _flush_temperature is None: _flush_temperature = 0.3 try: @@ -8675,7 +8675,7 @@ class AIAgent: except Exception: _fixed_temperature_for_model = None _summary_temperature = ( - _fixed_temperature_for_model(self.model) + _fixed_temperature_for_model(self.model, self.base_url) if _fixed_temperature_for_model is not None else None ) diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 9bc637135..6498bd0dd 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -918,6 +918,26 @@ class TestBuildApiKwargs: assert kwargs["messages"] is messages assert kwargs["timeout"] == 1800.0 + def test_public_moonshot_kimi_k2_5_forces_temperature_1(self, agent): + agent.base_url = "https://api.moonshot.ai/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-k2.5" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["temperature"] == 1.0 + + def test_kimi_coding_endpoint_keeps_kimi_k2_5_at_0_6(self, agent): + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-k2.5" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["temperature"] == 0.6 + def test_provider_preferences_injected(self, agent): agent.base_url = "https://openrouter.ai/api/v1" agent.providers_allowed = ["Anthropic"] diff --git a/tests/test_mini_swe_runner.py b/tests/test_mini_swe_runner.py index adecb5582..b814f7738 100644 --- a/tests/test_mini_swe_runner.py +++ b/tests/test_mini_swe_runner.py @@ -26,3 +26,30 @@ def test_run_task_forces_kimi_fixed_temperature(): assert result["completed"] is True assert client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 + + +def test_run_task_public_moonshot_kimi_k2_5_forces_temperature_1(): + with patch("openai.OpenAI") as mock_openai: + client = MagicMock() + client.base_url = "https://api.moonshot.ai/v1" + client.chat.completions.create.return_value = SimpleNamespace( + choices=[SimpleNamespace(message=SimpleNamespace(content="done", tool_calls=[]))] + ) + mock_openai.return_value = client + + from mini_swe_runner import MiniSWERunner + + runner = MiniSWERunner( + model="kimi-k2.5", + base_url="https://api.moonshot.ai/v1", + api_key="test-key", + env_type="local", + max_iterations=1, + ) + runner._create_env = MagicMock() + runner._cleanup_env = MagicMock() + + result = runner.run_task("2+2") + + assert result["completed"] is True + assert client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 diff --git a/tests/test_trajectory_compressor.py b/tests/test_trajectory_compressor.py index 682097173..1332674bf 100644 --- a/tests/test_trajectory_compressor.py +++ b/tests/test_trajectory_compressor.py @@ -54,6 +54,30 @@ def test_generate_summary_custom_client_forces_kimi_temperature(): assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 +def test_generate_summary_public_moonshot_kimi_k2_5_forces_temperature_1(): + config = CompressionConfig( + summarization_model="kimi-k2.5", + base_url="https://api.moonshot.ai/v1", + temperature=0.3, + summary_target_tokens=100, + max_retries=1, + ) + compressor = TrajectoryCompressor.__new__(TrajectoryCompressor) + compressor.config = config + compressor.logger = MagicMock() + compressor._use_call_llm = False + compressor.client = MagicMock() + compressor.client.chat.completions.create.return_value = SimpleNamespace( + choices=[SimpleNamespace(message=SimpleNamespace(content="[CONTEXT SUMMARY]: summary"))] + ) + + metrics = TrajectoryMetrics() + result = compressor._generate_summary("tool output", metrics) + + assert result.startswith("[CONTEXT SUMMARY]:") + assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 + + # --------------------------------------------------------------------------- # CompressionConfig # --------------------------------------------------------------------------- diff --git a/tests/test_trajectory_compressor_async.py b/tests/test_trajectory_compressor_async.py index 7bf519162..977e16ae9 100644 --- a/tests/test_trajectory_compressor_async.py +++ b/tests/test_trajectory_compressor_async.py @@ -141,3 +141,31 @@ async def test_generate_summary_async_custom_client_forces_kimi_temperature(): assert result.startswith("[CONTEXT SUMMARY]:") assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.6 + + +@pytest.mark.asyncio +async def test_generate_summary_async_public_moonshot_kimi_k2_5_forces_temperature_1(): + from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics + + config = CompressionConfig( + summarization_model="kimi-k2.5", + base_url="https://api.moonshot.ai/v1", + temperature=0.3, + summary_target_tokens=100, + max_retries=1, + ) + compressor = TrajectoryCompressor.__new__(TrajectoryCompressor) + compressor.config = config + compressor.logger = MagicMock() + compressor._use_call_llm = False + async_client = MagicMock() + async_client.chat.completions.create = MagicMock(return_value=SimpleNamespace( + choices=[SimpleNamespace(message=SimpleNamespace(content="[CONTEXT SUMMARY]: summary"))] + )) + compressor._get_async_client = MagicMock(return_value=async_client) + + metrics = TrajectoryMetrics() + result = await compressor._generate_summary_async("tool output", metrics) + + assert result.startswith("[CONTEXT SUMMARY]:") + assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 1.0 diff --git a/trajectory_compressor.py b/trajectory_compressor.py index dff15b227..e835da034 100644 --- a/trajectory_compressor.py +++ b/trajectory_compressor.py @@ -54,14 +54,18 @@ _project_env = Path(__file__).parent / ".env" load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env) -def _effective_temperature_for_model(model: str, requested_temperature: float) -> float: +def _effective_temperature_for_model( + model: str, + requested_temperature: float, + base_url: Optional[str] = None, +) -> float: """Apply fixed model temperature contracts to direct client calls.""" try: from agent.auxiliary_client import _fixed_temperature_for_model except Exception: return requested_temperature - fixed_temperature = _fixed_temperature_for_model(model) + fixed_temperature = _fixed_temperature_for_model(model, base_url) if fixed_temperature is not None: return fixed_temperature return requested_temperature @@ -583,6 +587,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" summary_temperature = _effective_temperature_for_model( self.config.summarization_model, self.config.temperature, + self.config.base_url, ) if getattr(self, '_use_call_llm', False): @@ -649,6 +654,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" summary_temperature = _effective_temperature_for_model( self.config.summarization_model, self.config.temperature, + self.config.base_url, ) if getattr(self, '_use_call_llm', False):