mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 01:51:44 +00:00
fix(anthropic): broaden Kimi thinking-suppression to custom endpoints (#17455)
The guard that drops Anthropic's `thinking` kwarg for Kimi endpoints was matched on `https://api.kimi.com/coding` only. Users configuring a custom Kimi-compatible gateway (or an official Moonshot host) with `api_mode: anthropic_messages` fall through to the generic third-party path, which strips thinking blocks AND still sends `thinking={enabled,...}` → upstream rejects with HTTP 400 "reasoning_content is missing in assistant tool call message at index N" on the next request after a tool call. Replace `_is_kimi_coding_endpoint` callers (history replay + thinking kwarg gate) with `_is_kimi_family_endpoint(base_url, model)` that also matches the `api.kimi.com` / `moonshot.ai` / `moonshot.cn` hosts and Kimi/Moonshot family model names (`kimi-`, `moonshot-`, `k1.`, `k2.`, …) for custom / proxied endpoints. Keeps the UA-header check in `build_anthropic_client` URL-only — the `claude-code/0.1.0` header is an official-Kimi contract. Plumbs optional `model` through `convert_messages_to_anthropic` so the unsigned reasoning_content→thinking block synthesised for Kimi's history validation survives the third-party signature-stripping pass on custom hosts too. Closes #17057.
This commit is contained in:
parent
398945e7b1
commit
83c288da01
2 changed files with 171 additions and 10 deletions
|
|
@ -94,13 +94,16 @@ class TestKimiCodingSkipsAnthropicThinking:
|
|||
)
|
||||
assert "thinking" in kwargs
|
||||
|
||||
def test_kimi_root_endpoint_unaffected(self) -> None:
|
||||
"""Only the /coding route is special-cased — plain api.kimi.com is not.
|
||||
def test_kimi_root_endpoint_via_anthropic_transport_omits_thinking(self) -> None:
|
||||
"""Plain ``api.kimi.com`` hit via the Anthropic transport also omits thinking.
|
||||
|
||||
``api.kimi.com`` without ``/coding`` uses the chat_completions transport
|
||||
(see runtime_provider._detect_api_mode_for_url); build_anthropic_kwargs
|
||||
should never see it, but if it somehow does we should not suppress
|
||||
thinking there — that path has different semantics.
|
||||
Auto-detection routes ``api.kimi.com/v1`` to ``chat_completions`` by
|
||||
default, but users can explicitly configure
|
||||
``api_mode: anthropic_messages`` against any Kimi host. The upstream
|
||||
validation (reasoning_content required on replayed tool-call
|
||||
messages) is the same regardless of URL path, so the thinking
|
||||
suppression must apply to every Kimi host, not just ``/coding``.
|
||||
See #17057.
|
||||
"""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
|
|
@ -112,4 +115,98 @@ class TestKimiCodingSkipsAnthropicThinking:
|
|||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
base_url="https://api.kimi.com/v1",
|
||||
)
|
||||
assert "thinking" not in kwargs
|
||||
|
||||
# ── #17057: custom / proxied Kimi-compatible endpoints ──────────
|
||||
@pytest.mark.parametrize(
|
||||
"base_url,model",
|
||||
[
|
||||
# Custom host with Kimi-family model — the reporter's case
|
||||
("http://my-kimi-proxy.internal", "kimi-2.6"),
|
||||
("https://llm.example.com/anthropic", "kimi-k2.5"),
|
||||
("https://llm.example.com/anthropic", "moonshot-v1-8k"),
|
||||
("https://llm.example.com/anthropic", "kimi_thinking"),
|
||||
("https://llm.example.com/anthropic", "moonshotai/kimi-k2.5"),
|
||||
# Official Moonshot host (previously uncovered)
|
||||
("https://api.moonshot.ai/anthropic", "moonshot-v1-32k"),
|
||||
("https://api.moonshot.cn/anthropic", "moonshot-v1-32k"),
|
||||
],
|
||||
)
|
||||
def test_kimi_family_custom_endpoint_omits_thinking(
|
||||
self, base_url: str, model: str
|
||||
) -> None:
|
||||
"""Custom / proxied Kimi endpoints must also strip Anthropic thinking."""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
base_url=base_url,
|
||||
)
|
||||
assert "thinking" not in kwargs, (
|
||||
f"Kimi-family endpoint ({base_url}, {model}) must not receive "
|
||||
f"Anthropic thinking — upstream validates reasoning_content on "
|
||||
f"replayed tool-call history we don't preserve."
|
||||
)
|
||||
assert "output_config" not in kwargs
|
||||
|
||||
def test_custom_endpoint_non_kimi_model_keeps_thinking(self) -> None:
|
||||
"""Custom endpoint with a non-Kimi model must keep thinking intact.
|
||||
|
||||
Guards against over-broad model-family matching — only model names
|
||||
starting with a Kimi/Moonshot prefix should trigger suppression.
|
||||
"""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="MiniMax-M2.7",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
base_url="https://my-llm-proxy.example.com/anthropic",
|
||||
)
|
||||
assert "thinking" in kwargs
|
||||
assert kwargs["thinking"]["type"] == "enabled"
|
||||
|
||||
def test_kimi_family_replay_preserves_unsigned_thinking(self) -> None:
|
||||
"""On a custom Kimi endpoint, unsigned reasoning_content thinking
|
||||
blocks must survive the third-party signature-stripping pass so
|
||||
the upstream's message-history validation passes.
|
||||
"""
|
||||
from agent.anthropic_adapter import convert_messages_to_anthropic
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"reasoning_content": "planning the tool call",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {"name": "skill_view", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
|
||||
]
|
||||
_, converted = convert_messages_to_anthropic(
|
||||
messages,
|
||||
base_url="http://my-kimi-proxy.internal",
|
||||
model="kimi-2.6",
|
||||
)
|
||||
# The assistant message still carries the unsigned thinking block
|
||||
# synthesised from reasoning_content (required by Kimi's history
|
||||
# validation). A plain third-party endpoint would have stripped it.
|
||||
assistant_msg = next(m for m in converted if m["role"] == "assistant")
|
||||
assistant_blocks = assistant_msg["content"]
|
||||
thinking_blocks = [
|
||||
b for b in assistant_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "thinking"
|
||||
]
|
||||
assert len(thinking_blocks) == 1
|
||||
assert thinking_blocks[0]["thinking"] == "planning the tool call"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue