mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-26 01:01:40 +00:00
fix(kimi): don't send Anthropic thinking to api.kimi.com/coding (#13826)
Kimi's /coding endpoint speaks the Anthropic Messages protocol but has its own thinking semantics: when thinking.enabled is sent, Kimi validates the history and requires every prior assistant tool-call message to carry OpenAI-style reasoning_content. The Anthropic path never populates that field, and convert_messages_to_anthropic strips Anthropic thinking blocks on third-party endpoints — so after one tool-calling turn the next request fails with: HTTP 400: thinking is enabled but reasoning_content is missing in assistant tool call message at index N Kimi on chat_completions handles thinking via extra_body in ChatCompletionsTransport (#13503). On the Anthropic route, drop the parameter entirely and let Kimi drive reasoning server-side. build_anthropic_kwargs now gates the reasoning_config -> thinking block on not _is_kimi_coding_endpoint(base_url). Tests: 8 new parametric tests cover /coding, /coding/v1, /coding/anthropic, /coding/ (trailing slash), explicit disabled, other third-party endpoints still getting thinking (MiniMax), native Anthropic unaffected, and the non-/coding Kimi root route.
This commit is contained in:
parent
7b79e0f4c9
commit
410f33a728
2 changed files with 130 additions and 1 deletions
|
|
@ -1426,11 +1426,25 @@ def build_anthropic_kwargs(
|
||||||
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
|
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
|
||||||
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
|
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
|
||||||
#
|
#
|
||||||
|
# Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
|
||||||
|
# its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
|
||||||
|
# validates the message history and requires every prior assistant
|
||||||
|
# tool-call message to carry OpenAI-style ``reasoning_content``. The
|
||||||
|
# Anthropic path never populates that field, and
|
||||||
|
# ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
|
||||||
|
# on third-party endpoints — so the request fails with HTTP 400
|
||||||
|
# "thinking is enabled but reasoning_content is missing in assistant
|
||||||
|
# tool call message at index N". Kimi's reasoning is driven server-side
|
||||||
|
# on the /coding route, so skip Anthropic's thinking parameter entirely
|
||||||
|
# for that host. (Kimi on chat_completions enables thinking via
|
||||||
|
# extra_body in the ChatCompletionsTransport — see #13503.)
|
||||||
|
#
|
||||||
# On 4.7+ the `thinking.display` field defaults to "omitted", which
|
# On 4.7+ the `thinking.display` field defaults to "omitted", which
|
||||||
# silently hides reasoning text that Hermes surfaces in its CLI. We
|
# silently hides reasoning text that Hermes surfaces in its CLI. We
|
||||||
# request "summarized" so the reasoning blocks stay populated — matching
|
# request "summarized" so the reasoning blocks stay populated — matching
|
||||||
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
|
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
|
||||||
if reasoning_config and isinstance(reasoning_config, dict):
|
_is_kimi_coding = _is_kimi_coding_endpoint(base_url)
|
||||||
|
if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
|
||||||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
||||||
effort = str(reasoning_config.get("effort", "medium")).lower()
|
effort = str(reasoning_config.get("effort", "medium")).lower()
|
||||||
budget = THINKING_BUDGET.get(effort, 8000)
|
budget = THINKING_BUDGET.get(effort, 8000)
|
||||||
|
|
|
||||||
115
tests/agent/test_kimi_coding_anthropic_thinking.py
Normal file
115
tests/agent/test_kimi_coding_anthropic_thinking.py
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
"""Regression guard: don't send Anthropic ``thinking`` to Kimi's /coding endpoint.
|
||||||
|
|
||||||
|
Kimi's ``api.kimi.com/coding`` endpoint speaks the Anthropic Messages protocol
|
||||||
|
but has its own thinking semantics. When ``thinking.enabled`` is present in
|
||||||
|
the request, Kimi validates the message history and requires every prior
|
||||||
|
assistant tool-call message to carry OpenAI-style ``reasoning_content``.
|
||||||
|
|
||||||
|
The Anthropic path never populates that field, and
|
||||||
|
``convert_messages_to_anthropic`` strips Anthropic thinking blocks on
|
||||||
|
third-party endpoints — so after one turn with tool calls the next request
|
||||||
|
fails with HTTP 400::
|
||||||
|
|
||||||
|
thinking is enabled but reasoning_content is missing in assistant
|
||||||
|
tool call message at index N
|
||||||
|
|
||||||
|
Kimi on the chat_completions route handles ``thinking`` via ``extra_body`` in
|
||||||
|
``ChatCompletionsTransport`` (#13503). On the Anthropic route the right
|
||||||
|
thing to do is drop the parameter entirely and let Kimi drive reasoning
|
||||||
|
server-side.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
class TestKimiCodingSkipsAnthropicThinking:
|
||||||
|
"""build_anthropic_kwargs must not inject ``thinking`` for Kimi /coding."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"base_url",
|
||||||
|
[
|
||||||
|
"https://api.kimi.com/coding",
|
||||||
|
"https://api.kimi.com/coding/v1",
|
||||||
|
"https://api.kimi.com/coding/anthropic",
|
||||||
|
"https://api.kimi.com/coding/",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_kimi_coding_endpoint_omits_thinking(self, base_url: str) -> None:
|
||||||
|
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||||
|
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="kimi-k2.5",
|
||||||
|
messages=[{"role": "user", "content": "hello"}],
|
||||||
|
tools=None,
|
||||||
|
max_tokens=4096,
|
||||||
|
reasoning_config={"enabled": True, "effort": "medium"},
|
||||||
|
base_url=base_url,
|
||||||
|
)
|
||||||
|
assert "thinking" not in kwargs, (
|
||||||
|
"Anthropic thinking must not be sent to Kimi /coding — "
|
||||||
|
"endpoint requires reasoning_content on history we don't preserve."
|
||||||
|
)
|
||||||
|
assert "output_config" not in kwargs
|
||||||
|
|
||||||
|
def test_kimi_coding_with_explicit_disabled_also_omits(self) -> None:
|
||||||
|
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||||
|
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="kimi-k2.5",
|
||||||
|
messages=[{"role": "user", "content": "hello"}],
|
||||||
|
tools=None,
|
||||||
|
max_tokens=4096,
|
||||||
|
reasoning_config={"enabled": False},
|
||||||
|
base_url="https://api.kimi.com/coding",
|
||||||
|
)
|
||||||
|
assert "thinking" not in kwargs
|
||||||
|
|
||||||
|
def test_non_kimi_third_party_still_gets_thinking(self) -> None:
|
||||||
|
"""MiniMax and other third-party Anthropic endpoints must retain thinking."""
|
||||||
|
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||||
|
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="MiniMax-M2.7",
|
||||||
|
messages=[{"role": "user", "content": "hello"}],
|
||||||
|
tools=None,
|
||||||
|
max_tokens=4096,
|
||||||
|
reasoning_config={"enabled": True, "effort": "medium"},
|
||||||
|
base_url="https://api.minimax.io/anthropic",
|
||||||
|
)
|
||||||
|
assert "thinking" in kwargs
|
||||||
|
assert kwargs["thinking"]["type"] == "enabled"
|
||||||
|
|
||||||
|
def test_native_anthropic_still_gets_thinking(self) -> None:
|
||||||
|
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||||
|
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
messages=[{"role": "user", "content": "hello"}],
|
||||||
|
tools=None,
|
||||||
|
max_tokens=4096,
|
||||||
|
reasoning_config={"enabled": True, "effort": "medium"},
|
||||||
|
base_url=None,
|
||||||
|
)
|
||||||
|
assert "thinking" in kwargs
|
||||||
|
|
||||||
|
def test_kimi_root_endpoint_unaffected(self) -> None:
|
||||||
|
"""Only the /coding route is special-cased — plain api.kimi.com is not.
|
||||||
|
|
||||||
|
``api.kimi.com`` without ``/coding`` uses the chat_completions transport
|
||||||
|
(see runtime_provider._detect_api_mode_for_url); build_anthropic_kwargs
|
||||||
|
should never see it, but if it somehow does we should not suppress
|
||||||
|
thinking there — that path has different semantics.
|
||||||
|
"""
|
||||||
|
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||||
|
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="kimi-k2.5",
|
||||||
|
messages=[{"role": "user", "content": "hello"}],
|
||||||
|
tools=None,
|
||||||
|
max_tokens=4096,
|
||||||
|
reasoning_config={"enabled": True, "effort": "medium"},
|
||||||
|
base_url="https://api.kimi.com/v1",
|
||||||
|
)
|
||||||
|
assert "thinking" in kwargs
|
||||||
Loading…
Add table
Add a link
Reference in a new issue