mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
test(kimi): align stale parity/profile tests with thinking-xor-effort contract (#41095)
* test(kimi): align stale parity/profile tests with thinking-xor-effort contract
ce4e74b3 (fix(kimi): send thinking xor reasoning_effort, never both)
changed the Kimi profile to emit at most one of extra_body.thinking or a
top-level reasoning_effort, and added tests/plugins/model_providers/test_kimi_profile.py
to pin it — but left two older test files still asserting the removed
'send both' behavior, turning main red for every PR branched after it.
Update the stale assertions to the xor contract:
- explicit recognized effort (low|medium|high) -> reasoning_effort only,
no thinking
- enabled w/o effort, or no reasoning_config -> thinking:enabled only,
no reasoning_effort
- disabled -> thinking:disabled only
No production change.
* test(kimi): cover remaining xor stale assertions (profile_wiring, run_agent)
Two more test files asserted the pre-ce4e74b3 'thinking + reasoning_effort
together' behavior — landed in a different CI shard so they surfaced only
after the first batch went green:
- tests/providers/test_profile_wiring.py::TestKimiProfileParity (2)
- tests/run_agent/test_run_agent.py::TestBuildApiKwargs (3: kimi-coding,
moonshot, moonshot-cn)
Same realignment to the xor contract: default/enabled-without-effort emits
thinking:enabled and no reasoning_effort; explicit effort emits
reasoning_effort only. Verified by running the full provider +
TestBuildApiKwargs Kimi surface (202 passed) plus a codebase-wide grep for
any remaining paired thinking+effort assertion (none).
This commit is contained in:
parent
0524c9b34e
commit
e18f14d928
4 changed files with 44 additions and 12 deletions
|
|
@ -59,6 +59,7 @@ class TestKimiProfileParity:
|
|||
assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 32000
|
||||
|
||||
def test_thinking_enabled(self, transport):
|
||||
# xor contract: explicit effort → reasoning_effort only, no thinking.
|
||||
rc = {"enabled": True, "effort": "high"}
|
||||
legacy = transport.build_kwargs(
|
||||
model="kimi-k2", messages=_msgs(), tools=None,
|
||||
|
|
@ -69,8 +70,9 @@ class TestKimiProfileParity:
|
|||
provider_profile=get_provider_profile("kimi"),
|
||||
reasoning_config=rc,
|
||||
)
|
||||
assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"]
|
||||
assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "high"
|
||||
assert "thinking" not in profile.get("extra_body", {})
|
||||
assert "thinking" not in legacy.get("extra_body", {})
|
||||
|
||||
def test_thinking_disabled(self, transport):
|
||||
rc = {"enabled": False}
|
||||
|
|
@ -89,6 +91,7 @@ class TestKimiProfileParity:
|
|||
assert "reasoning_effort" not in legacy
|
||||
|
||||
def test_reasoning_effort_default(self, transport):
|
||||
# xor contract: enabled w/o effort → thinking-enabled only, no effort.
|
||||
rc = {"enabled": True}
|
||||
legacy = transport.build_kwargs(
|
||||
model="kimi-k2", messages=_msgs(), tools=None,
|
||||
|
|
@ -99,7 +102,9 @@ class TestKimiProfileParity:
|
|||
provider_profile=get_provider_profile("kimi"),
|
||||
reasoning_config=rc,
|
||||
)
|
||||
assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "medium"
|
||||
assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
assert "reasoning_effort" not in profile
|
||||
assert "reasoning_effort" not in legacy
|
||||
|
||||
|
||||
class TestOpenRouterProfileParity:
|
||||
|
|
|
|||
|
|
@ -68,10 +68,12 @@ class TestKimiProfile:
|
|||
assert kimi.base_url != cn.base_url
|
||||
|
||||
def test_thinking_enabled(self):
|
||||
# xor contract (fix ce4e74b3): an explicit recognized effort sends
|
||||
# reasoning_effort ONLY — never paired with extra_body.thinking.
|
||||
p = get_provider_profile("kimi")
|
||||
eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True, "effort": "high"})
|
||||
assert eb["thinking"] == {"type": "enabled"}
|
||||
assert tl["reasoning_effort"] == "high"
|
||||
assert "thinking" not in eb
|
||||
|
||||
def test_thinking_disabled(self):
|
||||
p = get_provider_profile("kimi")
|
||||
|
|
@ -80,15 +82,18 @@ class TestKimiProfile:
|
|||
assert "reasoning_effort" not in tl
|
||||
|
||||
def test_reasoning_effort_default(self):
|
||||
# enabled with no effort → thinking toggle only, no top-level effort.
|
||||
p = get_provider_profile("kimi")
|
||||
eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True})
|
||||
assert tl["reasoning_effort"] == "medium"
|
||||
assert eb["thinking"] == {"type": "enabled"}
|
||||
assert "reasoning_effort" not in tl
|
||||
|
||||
def test_no_config_defaults(self):
|
||||
# No reasoning_config → thinking on, server picks depth; no effort.
|
||||
p = get_provider_profile("kimi")
|
||||
eb, tl = p.build_api_kwargs_extras(reasoning_config=None)
|
||||
assert eb["thinking"] == {"type": "enabled"}
|
||||
assert tl["reasoning_effort"] == "medium"
|
||||
assert "reasoning_effort" not in tl
|
||||
|
||||
|
||||
class TestOpenRouterProfile:
|
||||
|
|
|
|||
|
|
@ -80,6 +80,8 @@ class TestKimiParity:
|
|||
assert kw["max_completion_tokens"] == 32000
|
||||
|
||||
def test_thinking_enabled(self, transport):
|
||||
# xor contract (fix ce4e74b3): an explicit recognized effort sends
|
||||
# reasoning_effort ONLY — never paired with extra_body.thinking.
|
||||
kw = transport.build_kwargs(
|
||||
model="kimi-k2",
|
||||
messages=_simple_messages(),
|
||||
|
|
@ -87,7 +89,20 @@ class TestKimiParity:
|
|||
provider_profile=get_provider_profile("kimi-coding"),
|
||||
reasoning_config={"enabled": True, "effort": "high"},
|
||||
)
|
||||
assert kw.get("reasoning_effort") == "high"
|
||||
assert "thinking" not in kw.get("extra_body", {})
|
||||
|
||||
def test_thinking_enabled_without_effort(self, transport):
|
||||
# enabled but no effort → fall back to the thinking toggle, no effort.
|
||||
kw = transport.build_kwargs(
|
||||
model="kimi-k2",
|
||||
messages=_simple_messages(),
|
||||
tools=None,
|
||||
provider_profile=get_provider_profile("kimi-coding"),
|
||||
reasoning_config={"enabled": True},
|
||||
)
|
||||
assert kw["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
assert "reasoning_effort" not in kw
|
||||
|
||||
def test_thinking_disabled(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
|
|
@ -98,6 +113,7 @@ class TestKimiParity:
|
|||
reasoning_config={"enabled": False},
|
||||
)
|
||||
assert kw["extra_body"]["thinking"] == {"type": "disabled"}
|
||||
assert "reasoning_effort" not in kw
|
||||
|
||||
def test_reasoning_effort_top_level(self, transport):
|
||||
"""Kimi reasoning_effort is a TOP-LEVEL api_kwargs key, NOT in extra_body."""
|
||||
|
|
@ -111,7 +127,10 @@ class TestKimiParity:
|
|||
assert kw.get("reasoning_effort") == "high"
|
||||
assert "reasoning_effort" not in kw.get("extra_body", {})
|
||||
|
||||
def test_reasoning_effort_default_medium(self, transport):
|
||||
def test_reasoning_effort_default_no_effort(self, transport):
|
||||
# xor contract: enabled with no effort falls back to thinking-enabled
|
||||
# and emits NO top-level reasoning_effort (previously defaulted to
|
||||
# "medium" alongside thinking — the pairing this fix removes).
|
||||
kw = transport.build_kwargs(
|
||||
model="kimi-k2",
|
||||
messages=_simple_messages(),
|
||||
|
|
@ -119,7 +138,8 @@ class TestKimiParity:
|
|||
provider_profile=get_provider_profile("kimi-coding"),
|
||||
reasoning_config={"enabled": True},
|
||||
)
|
||||
assert kw.get("reasoning_effort") == "medium"
|
||||
assert "reasoning_effort" not in kw
|
||||
assert kw["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
|
||||
|
||||
class TestOpenRouterParity:
|
||||
|
|
|
|||
|
|
@ -1555,8 +1555,9 @@ class TestBuildApiKwargs:
|
|||
assert "temperature" not in kwargs
|
||||
|
||||
def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""Kimi endpoint should send max_tokens=32000 and reasoning_effort as
|
||||
top-level params, matching Kimi CLI's default behavior."""
|
||||
"""Kimi endpoint sends max_tokens=32000. With no reasoning_config it
|
||||
defaults to the thinking toggle (xor contract: never paired with a
|
||||
top-level reasoning_effort)."""
|
||||
agent.provider = "kimi-coding"
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
|
|
@ -1566,7 +1567,8 @@ class TestBuildApiKwargs:
|
|||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["max_tokens"] == 32000
|
||||
assert kwargs["reasoning_effort"] == "medium"
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
assert "reasoning_effort" not in kwargs
|
||||
|
||||
def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
|
||||
"""reasoning_effort should reflect reasoning_config.effort when set."""
|
||||
|
|
@ -1621,8 +1623,8 @@ class TestBuildApiKwargs:
|
|||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["max_tokens"] == 32000
|
||||
assert kwargs["reasoning_effort"] == "medium"
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
assert "reasoning_effort" not in kwargs
|
||||
|
||||
def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""api.moonshot.cn (China endpoint) should get the same params."""
|
||||
|
|
@ -1635,8 +1637,8 @@ class TestBuildApiKwargs:
|
|||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["max_tokens"] == 32000
|
||||
assert kwargs["reasoning_effort"] == "medium"
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
assert "reasoning_effort" not in kwargs
|
||||
|
||||
def test_provider_preferences_injected(self, agent):
|
||||
agent.provider = "openrouter"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue