diff --git a/run_agent.py b/run_agent.py index 722f7cea4b..8ead378665 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6909,6 +6909,34 @@ class AIAgent: # (the documented max output for qwen3-coder models) so the # model has adequate output budget for tool calls. api_kwargs.update(self._max_tokens_param(65536)) + elif ( + base_url_host_matches(self.base_url, "api.kimi.com") + or base_url_host_matches(self.base_url, "moonshot.ai") + or base_url_host_matches(self.base_url, "moonshot.cn") + ): + # Kimi/Moonshot defaults to a low max_tokens when omitted. + # Reasoning tokens share the output budget — without an explicit + # value the model can exhaust it on thinking alone, causing + # "Response truncated due to output length limit". 32000 matches + # Kimi CLI's default (see MoonshotAI/kimi-cli kimi.py generate()). + api_kwargs.update(self._max_tokens_param(32000)) + # Kimi requires reasoning_effort as a top-level chat completions + # parameter (not inside extra_body). Mirror Kimi CLI's + # with_generation_kwargs(reasoning_effort=...) / with_thinking(): + # when thinking is disabled, Kimi CLI omits reasoning_effort + # entirely (maps to None). + _kimi_thinking_off = bool( + self.reasoning_config + and isinstance(self.reasoning_config, dict) + and self.reasoning_config.get("enabled") is False + ) + if not _kimi_thinking_off: + _kimi_effort = "medium" + if self.reasoning_config and isinstance(self.reasoning_config, dict): + _e = (self.reasoning_config.get("effort") or "").strip().lower() + if _e in ("low", "medium", "high"): + _kimi_effort = _e + api_kwargs["reasoning_effort"] = _kimi_effort elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower(): # OpenRouter and Nous Portal translate requests to Anthropic's # Messages API, which requires max_tokens as a mandatory field. @@ -6940,6 +6968,24 @@ class AIAgent: extra_body["provider"] = provider_preferences _is_nous = "nousresearch" in self._base_url_lower + # Kimi/Moonshot API uses extra_body.thinking (separate from the + # top-level reasoning_effort) to enable/disable reasoning mode. + # Mirror Kimi CLI's with_thinking() behavior exactly — see + # MoonshotAI/kimi-cli packages/kosong/src/kosong/chat_provider/kimi.py + _is_kimi = ( + base_url_host_matches(self.base_url, "api.kimi.com") + or base_url_host_matches(self.base_url, "moonshot.ai") + or base_url_host_matches(self.base_url, "moonshot.cn") + ) + if _is_kimi: + _kimi_thinking_enabled = True + if self.reasoning_config and isinstance(self.reasoning_config, dict): + if self.reasoning_config.get("enabled") is False: + _kimi_thinking_enabled = False + extra_body["thinking"] = { + "type": "enabled" if _kimi_thinking_enabled else "disabled", + } + if self._supports_reasoning_extra_body(): if _is_github_models: github_reasoning = self._github_models_reasoning_extra_body() diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 9f3341101a..e7a96e5dee 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -952,6 +952,84 @@ class TestBuildApiKwargs: assert "temperature" not in kwargs + def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent): + """Kimi endpoint should send max_tokens=32000 and reasoning_effort as + top-level params, matching Kimi CLI's default behavior.""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["max_tokens"] == 32000 + assert kwargs["reasoning_effort"] == "medium" + + def test_kimi_coding_endpoint_respects_custom_effort(self, agent): + """reasoning_effort should reflect reasoning_config.effort when set.""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + agent.reasoning_config = {"enabled": True, "effort": "high"} + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["reasoning_effort"] == "high" + + def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent): + """Kimi endpoint should send extra_body.thinking={"type":"enabled"} + to activate reasoning mode, mirroring Kimi CLI's with_thinking().""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + + def test_kimi_coding_endpoint_disables_thinking(self, agent): + """When reasoning_config.enabled=False, thinking should be disabled + and reasoning_effort should be omitted entirely — mirroring Kimi + CLI's with_thinking("off") which maps to reasoning_effort=None.""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + agent.reasoning_config = {"enabled": False} + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["extra_body"]["thinking"] == {"type": "disabled"} + assert "reasoning_effort" not in kwargs + + def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent): + """api.moonshot.ai should get the same Kimi-compatible params.""" + agent.base_url = "https://api.moonshot.ai/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-k2.5" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["max_tokens"] == 32000 + assert kwargs["reasoning_effort"] == "medium" + assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + + def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent): + """api.moonshot.cn (China endpoint) should get the same params.""" + agent.base_url = "https://api.moonshot.cn/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-k2.5" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["max_tokens"] == 32000 + assert kwargs["reasoning_effort"] == "medium" + assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + def test_provider_preferences_injected(self, agent): agent.base_url = "https://openrouter.ai/api/v1" agent.providers_allowed = ["Anthropic"]