fix(kimi): don't send Anthropic thinking to api.kimi.com/coding (#13826)

Kimi's /coding endpoint speaks the Anthropic Messages protocol but has its own thinking semantics: when thinking.enabled is sent, Kimi validates the history and requires every prior assistant tool-call message to carry OpenAI-style reasoning_content. The Anthropic path never populates that field, and convert_messages_to_anthropic strips Anthropic thinking blocks on third-party endpoints — so after one tool-calling turn the next request fails with: HTTP 400: thinking is enabled but reasoning_content is missing in assistant tool call message at index N Kimi on chat_completions handles thinking via extra_body in ChatCompletionsTransport (#13503). On the Anthropic route, drop the parameter entirely and let Kimi drive reasoning server-side. build_anthropic_kwargs now gates the reasoning_config -> thinking block on not _is_kimi_coding_endpoint(base_url). Tests: 8 new parametric tests cover /coding, /coding/v1, /coding/anthropic, /coding/ (trailing slash), explicit disabled, other third-party endpoints still getting thinking (MiniMax), native Anthropic unaffected, and the non-/coding Kimi root route.
2026-04-26 01:01:40 +00:00 · 2026-04-21 21:19:14 -07:00 · 2026-04-21 21:19:14 -07:00 · 410f33a728
commit 410f33a728
parent 7b79e0f4c9
2 changed files with 130 additions and 1 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -1426,11 +1426,25 @@ def build_anthropic_kwargs(
    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
    #
    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
    # validates the message history and requires every prior assistant
    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
    # Anthropic path never populates that field, and
    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
    # on third-party endpoints — so the request fails with HTTP 400
    # "thinking is enabled but reasoning_content is missing in assistant
    # tool call message at index N".  Kimi's reasoning is driven server-side
    # on the /coding route, so skip Anthropic's thinking parameter entirely
    # for that host.  (Kimi on chat_completions enables thinking via
    # extra_body in the ChatCompletionsTransport — see #13503.)
    #
    # On 4.7+ the `thinking.display` field defaults to "omitted", which
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    if reasoning_config and isinstance(reasoning_config, dict):
+    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
--- a/tests/agent/test_kimi_coding_anthropic_thinking.py
+++ b/tests/agent/test_kimi_coding_anthropic_thinking.py
@ -0,0 +1,115 @@
 """Regression guard: don't send Anthropic ``thinking`` to Kimi's /coding endpoint.
 Kimi's ``api.kimi.com/coding`` endpoint speaks the Anthropic Messages protocol
 but has its own thinking semantics.  When ``thinking.enabled`` is present in
 the request, Kimi validates the message history and requires every prior
 assistant tool-call message to carry OpenAI-style ``reasoning_content``.
 The Anthropic path never populates that field, and
 ``convert_messages_to_anthropic`` strips Anthropic thinking blocks on
 third-party endpoints — so after one turn with tool calls the next request
 fails with HTTP 400::
    thinking is enabled but reasoning_content is missing in assistant
    tool call message at index N
 Kimi on the chat_completions route handles ``thinking`` via ``extra_body`` in
 ``ChatCompletionsTransport`` (#13503).  On the Anthropic route the right
 thing to do is drop the parameter entirely and let Kimi drive reasoning
 server-side.
 """
 from __future__ import annotations
 import pytest
 class TestKimiCodingSkipsAnthropicThinking:
    """build_anthropic_kwargs must not inject ``thinking`` for Kimi /coding."""
    @pytest.mark.parametrize(
        "base_url",
        [
            "https://api.kimi.com/coding",
            "https://api.kimi.com/coding/v1",
            "https://api.kimi.com/coding/anthropic",
            "https://api.kimi.com/coding/",
        ],
    )
    def test_kimi_coding_endpoint_omits_thinking(self, base_url: str) -> None:
        from agent.anthropic_adapter import build_anthropic_kwargs
        kwargs = build_anthropic_kwargs(
            model="kimi-k2.5",
            messages=[{"role": "user", "content": "hello"}],
            tools=None,
            max_tokens=4096,
            reasoning_config={"enabled": True, "effort": "medium"},
            base_url=base_url,
        )
        assert "thinking" not in kwargs, (
            "Anthropic thinking must not be sent to Kimi /coding — "
            "endpoint requires reasoning_content on history we don't preserve."
        )
        assert "output_config" not in kwargs
    def test_kimi_coding_with_explicit_disabled_also_omits(self) -> None:
        from agent.anthropic_adapter import build_anthropic_kwargs
        kwargs = build_anthropic_kwargs(
            model="kimi-k2.5",
            messages=[{"role": "user", "content": "hello"}],
            tools=None,
            max_tokens=4096,
            reasoning_config={"enabled": False},
            base_url="https://api.kimi.com/coding",
        )
        assert "thinking" not in kwargs
    def test_non_kimi_third_party_still_gets_thinking(self) -> None:
        """MiniMax and other third-party Anthropic endpoints must retain thinking."""
        from agent.anthropic_adapter import build_anthropic_kwargs
        kwargs = build_anthropic_kwargs(
            model="MiniMax-M2.7",
            messages=[{"role": "user", "content": "hello"}],
            tools=None,
            max_tokens=4096,
            reasoning_config={"enabled": True, "effort": "medium"},
            base_url="https://api.minimax.io/anthropic",
        )
        assert "thinking" in kwargs
        assert kwargs["thinking"]["type"] == "enabled"
    def test_native_anthropic_still_gets_thinking(self) -> None:
        from agent.anthropic_adapter import build_anthropic_kwargs
        kwargs = build_anthropic_kwargs(
            model="claude-sonnet-4-20250514",
            messages=[{"role": "user", "content": "hello"}],
            tools=None,
            max_tokens=4096,
            reasoning_config={"enabled": True, "effort": "medium"},
            base_url=None,
        )
        assert "thinking" in kwargs
    def test_kimi_root_endpoint_unaffected(self) -> None:
        """Only the /coding route is special-cased — plain api.kimi.com is not.
        ``api.kimi.com`` without ``/coding`` uses the chat_completions transport
        (see runtime_provider._detect_api_mode_for_url); build_anthropic_kwargs
        should never see it, but if it somehow does we should not suppress
        thinking there — that path has different semantics.
        """
        from agent.anthropic_adapter import build_anthropic_kwargs
        kwargs = build_anthropic_kwargs(
            model="kimi-k2.5",
            messages=[{"role": "user", "content": "hello"}],
            tools=None,
            max_tokens=4096,
            reasoning_config={"enabled": True, "effort": "medium"},
            base_url="https://api.kimi.com/v1",
        )
        assert "thinking" in kwargs