hermes-agent/tests/agent/test_kimi_coding_anthropic_thinking.py
Teknium 83c288da01
fix(anthropic): broaden Kimi thinking-suppression to custom endpoints (#17455)
The guard that drops Anthropic's `thinking` kwarg for Kimi endpoints was
matched on `https://api.kimi.com/coding` only.  Users configuring a
custom Kimi-compatible gateway (or an official Moonshot host) with
`api_mode: anthropic_messages` fall through to the generic third-party
path, which strips thinking blocks AND still sends
`thinking={enabled,...}` → upstream rejects with HTTP 400
"reasoning_content is missing in assistant tool call message at index N"
on the next request after a tool call.

Replace `_is_kimi_coding_endpoint` callers (history replay + thinking
kwarg gate) with `_is_kimi_family_endpoint(base_url, model)` that also
matches the `api.kimi.com` / `moonshot.ai` / `moonshot.cn` hosts and
Kimi/Moonshot family model names (`kimi-`, `moonshot-`, `k1.`, `k2.`,
…) for custom / proxied endpoints.  Keeps the UA-header check in
`build_anthropic_client` URL-only — the `claude-code/0.1.0` header is
an official-Kimi contract.

Plumbs optional `model` through `convert_messages_to_anthropic` so
the unsigned reasoning_content→thinking block synthesised for Kimi's
history validation survives the third-party signature-stripping pass
on custom hosts too.

Closes #17057.
2026-04-29 06:35:42 -07:00

212 lines
8.7 KiB
Python

"""Regression guard: don't send Anthropic ``thinking`` to Kimi's /coding endpoint.
Kimi's ``api.kimi.com/coding`` endpoint speaks the Anthropic Messages protocol
but has its own thinking semantics. When ``thinking.enabled`` is present in
the request, Kimi validates the message history and requires every prior
assistant tool-call message to carry OpenAI-style ``reasoning_content``.
The Anthropic path never populates that field, and
``convert_messages_to_anthropic`` strips Anthropic thinking blocks on
third-party endpoints — so after one turn with tool calls the next request
fails with HTTP 400::
thinking is enabled but reasoning_content is missing in assistant
tool call message at index N
Kimi on the chat_completions route handles ``thinking`` via ``extra_body`` in
``ChatCompletionsTransport`` (#13503). On the Anthropic route the right
thing to do is drop the parameter entirely and let Kimi drive reasoning
server-side.
"""
from __future__ import annotations
import pytest
class TestKimiCodingSkipsAnthropicThinking:
"""build_anthropic_kwargs must not inject ``thinking`` for Kimi /coding."""
@pytest.mark.parametrize(
"base_url",
[
"https://api.kimi.com/coding",
"https://api.kimi.com/coding/v1",
"https://api.kimi.com/coding/anthropic",
"https://api.kimi.com/coding/",
],
)
def test_kimi_coding_endpoint_omits_thinking(self, base_url: str) -> None:
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="kimi-k2.5",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
base_url=base_url,
)
assert "thinking" not in kwargs, (
"Anthropic thinking must not be sent to Kimi /coding — "
"endpoint requires reasoning_content on history we don't preserve."
)
assert "output_config" not in kwargs
def test_kimi_coding_with_explicit_disabled_also_omits(self) -> None:
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="kimi-k2.5",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": False},
base_url="https://api.kimi.com/coding",
)
assert "thinking" not in kwargs
def test_non_kimi_third_party_still_gets_thinking(self) -> None:
"""MiniMax and other third-party Anthropic endpoints must retain thinking."""
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="MiniMax-M2.7",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
base_url="https://api.minimax.io/anthropic",
)
assert "thinking" in kwargs
assert kwargs["thinking"]["type"] == "enabled"
def test_native_anthropic_still_gets_thinking(self) -> None:
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="claude-sonnet-4-20250514",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
base_url=None,
)
assert "thinking" in kwargs
def test_kimi_root_endpoint_via_anthropic_transport_omits_thinking(self) -> None:
"""Plain ``api.kimi.com`` hit via the Anthropic transport also omits thinking.
Auto-detection routes ``api.kimi.com/v1`` to ``chat_completions`` by
default, but users can explicitly configure
``api_mode: anthropic_messages`` against any Kimi host. The upstream
validation (reasoning_content required on replayed tool-call
messages) is the same regardless of URL path, so the thinking
suppression must apply to every Kimi host, not just ``/coding``.
See #17057.
"""
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="kimi-k2.5",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
base_url="https://api.kimi.com/v1",
)
assert "thinking" not in kwargs
# ── #17057: custom / proxied Kimi-compatible endpoints ──────────
@pytest.mark.parametrize(
"base_url,model",
[
# Custom host with Kimi-family model — the reporter's case
("http://my-kimi-proxy.internal", "kimi-2.6"),
("https://llm.example.com/anthropic", "kimi-k2.5"),
("https://llm.example.com/anthropic", "moonshot-v1-8k"),
("https://llm.example.com/anthropic", "kimi_thinking"),
("https://llm.example.com/anthropic", "moonshotai/kimi-k2.5"),
# Official Moonshot host (previously uncovered)
("https://api.moonshot.ai/anthropic", "moonshot-v1-32k"),
("https://api.moonshot.cn/anthropic", "moonshot-v1-32k"),
],
)
def test_kimi_family_custom_endpoint_omits_thinking(
self, base_url: str, model: str
) -> None:
"""Custom / proxied Kimi endpoints must also strip Anthropic thinking."""
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model=model,
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
base_url=base_url,
)
assert "thinking" not in kwargs, (
f"Kimi-family endpoint ({base_url}, {model}) must not receive "
f"Anthropic thinking — upstream validates reasoning_content on "
f"replayed tool-call history we don't preserve."
)
assert "output_config" not in kwargs
def test_custom_endpoint_non_kimi_model_keeps_thinking(self) -> None:
"""Custom endpoint with a non-Kimi model must keep thinking intact.
Guards against over-broad model-family matching — only model names
starting with a Kimi/Moonshot prefix should trigger suppression.
"""
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="MiniMax-M2.7",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
base_url="https://my-llm-proxy.example.com/anthropic",
)
assert "thinking" in kwargs
assert kwargs["thinking"]["type"] == "enabled"
def test_kimi_family_replay_preserves_unsigned_thinking(self) -> None:
"""On a custom Kimi endpoint, unsigned reasoning_content thinking
blocks must survive the third-party signature-stripping pass so
the upstream's message-history validation passes.
"""
from agent.anthropic_adapter import convert_messages_to_anthropic
messages = [
{"role": "user", "content": "hi"},
{
"role": "assistant",
"reasoning_content": "planning the tool call",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {"name": "skill_view", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
]
_, converted = convert_messages_to_anthropic(
messages,
base_url="http://my-kimi-proxy.internal",
model="kimi-2.6",
)
# The assistant message still carries the unsigned thinking block
# synthesised from reasoning_content (required by Kimi's history
# validation). A plain third-party endpoint would have stripped it.
assistant_msg = next(m for m in converted if m["role"] == "assistant")
assistant_blocks = assistant_msg["content"]
thinking_blocks = [
b for b in assistant_blocks
if isinstance(b, dict) and b.get("type") == "thinking"
]
assert len(thinking_blocks) == 1
assert thinking_blocks[0]["thinking"] == "planning the tool call"