From e18f14d928553d9c97dbacc120601b90ba9c070e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 7 Jun 2026 01:52:49 -0700 Subject: [PATCH] test(kimi): align stale parity/profile tests with thinking-xor-effort contract (#41095) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * test(kimi): align stale parity/profile tests with thinking-xor-effort contract ce4e74b3 (fix(kimi): send thinking xor reasoning_effort, never both) changed the Kimi profile to emit at most one of extra_body.thinking or a top-level reasoning_effort, and added tests/plugins/model_providers/test_kimi_profile.py to pin it — but left two older test files still asserting the removed 'send both' behavior, turning main red for every PR branched after it. Update the stale assertions to the xor contract: - explicit recognized effort (low|medium|high) -> reasoning_effort only, no thinking - enabled w/o effort, or no reasoning_config -> thinking:enabled only, no reasoning_effort - disabled -> thinking:disabled only No production change. * test(kimi): cover remaining xor stale assertions (profile_wiring, run_agent) Two more test files asserted the pre-ce4e74b3 'thinking + reasoning_effort together' behavior — landed in a different CI shard so they surfaced only after the first batch went green: - tests/providers/test_profile_wiring.py::TestKimiProfileParity (2) - tests/run_agent/test_run_agent.py::TestBuildApiKwargs (3: kimi-coding, moonshot, moonshot-cn) Same realignment to the xor contract: default/enabled-without-effort emits thinking:enabled and no reasoning_effort; explicit effort emits reasoning_effort only. Verified by running the full provider + TestBuildApiKwargs Kimi surface (202 passed) plus a codebase-wide grep for any remaining paired thinking+effort assertion (none). --- tests/providers/test_profile_wiring.py | 9 +++++++-- tests/providers/test_provider_profiles.py | 11 ++++++++--- tests/providers/test_transport_parity.py | 24 +++++++++++++++++++++-- tests/run_agent/test_run_agent.py | 12 +++++++----- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/tests/providers/test_profile_wiring.py b/tests/providers/test_profile_wiring.py index 258ff531806..047b3eb9bd2 100644 --- a/tests/providers/test_profile_wiring.py +++ b/tests/providers/test_profile_wiring.py @@ -59,6 +59,7 @@ class TestKimiProfileParity: assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 32000 def test_thinking_enabled(self, transport): + # xor contract: explicit effort → reasoning_effort only, no thinking. rc = {"enabled": True, "effort": "high"} legacy = transport.build_kwargs( model="kimi-k2", messages=_msgs(), tools=None, @@ -69,8 +70,9 @@ class TestKimiProfileParity: provider_profile=get_provider_profile("kimi"), reasoning_config=rc, ) - assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"] assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "high" + assert "thinking" not in profile.get("extra_body", {}) + assert "thinking" not in legacy.get("extra_body", {}) def test_thinking_disabled(self, transport): rc = {"enabled": False} @@ -89,6 +91,7 @@ class TestKimiProfileParity: assert "reasoning_effort" not in legacy def test_reasoning_effort_default(self, transport): + # xor contract: enabled w/o effort → thinking-enabled only, no effort. rc = {"enabled": True} legacy = transport.build_kwargs( model="kimi-k2", messages=_msgs(), tools=None, @@ -99,7 +102,9 @@ class TestKimiProfileParity: provider_profile=get_provider_profile("kimi"), reasoning_config=rc, ) - assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "medium" + assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"] == {"type": "enabled"} + assert "reasoning_effort" not in profile + assert "reasoning_effort" not in legacy class TestOpenRouterProfileParity: diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py index c9e9daa623d..438eddddf99 100644 --- a/tests/providers/test_provider_profiles.py +++ b/tests/providers/test_provider_profiles.py @@ -68,10 +68,12 @@ class TestKimiProfile: assert kimi.base_url != cn.base_url def test_thinking_enabled(self): + # xor contract (fix ce4e74b3): an explicit recognized effort sends + # reasoning_effort ONLY — never paired with extra_body.thinking. p = get_provider_profile("kimi") eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True, "effort": "high"}) - assert eb["thinking"] == {"type": "enabled"} assert tl["reasoning_effort"] == "high" + assert "thinking" not in eb def test_thinking_disabled(self): p = get_provider_profile("kimi") @@ -80,15 +82,18 @@ class TestKimiProfile: assert "reasoning_effort" not in tl def test_reasoning_effort_default(self): + # enabled with no effort → thinking toggle only, no top-level effort. p = get_provider_profile("kimi") eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True}) - assert tl["reasoning_effort"] == "medium" + assert eb["thinking"] == {"type": "enabled"} + assert "reasoning_effort" not in tl def test_no_config_defaults(self): + # No reasoning_config → thinking on, server picks depth; no effort. p = get_provider_profile("kimi") eb, tl = p.build_api_kwargs_extras(reasoning_config=None) assert eb["thinking"] == {"type": "enabled"} - assert tl["reasoning_effort"] == "medium" + assert "reasoning_effort" not in tl class TestOpenRouterProfile: diff --git a/tests/providers/test_transport_parity.py b/tests/providers/test_transport_parity.py index 5d1856cd84b..f42972547af 100644 --- a/tests/providers/test_transport_parity.py +++ b/tests/providers/test_transport_parity.py @@ -80,6 +80,8 @@ class TestKimiParity: assert kw["max_completion_tokens"] == 32000 def test_thinking_enabled(self, transport): + # xor contract (fix ce4e74b3): an explicit recognized effort sends + # reasoning_effort ONLY — never paired with extra_body.thinking. kw = transport.build_kwargs( model="kimi-k2", messages=_simple_messages(), @@ -87,7 +89,20 @@ class TestKimiParity: provider_profile=get_provider_profile("kimi-coding"), reasoning_config={"enabled": True, "effort": "high"}, ) + assert kw.get("reasoning_effort") == "high" + assert "thinking" not in kw.get("extra_body", {}) + + def test_thinking_enabled_without_effort(self, transport): + # enabled but no effort → fall back to the thinking toggle, no effort. + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("kimi-coding"), + reasoning_config={"enabled": True}, + ) assert kw["extra_body"]["thinking"] == {"type": "enabled"} + assert "reasoning_effort" not in kw def test_thinking_disabled(self, transport): kw = transport.build_kwargs( @@ -98,6 +113,7 @@ class TestKimiParity: reasoning_config={"enabled": False}, ) assert kw["extra_body"]["thinking"] == {"type": "disabled"} + assert "reasoning_effort" not in kw def test_reasoning_effort_top_level(self, transport): """Kimi reasoning_effort is a TOP-LEVEL api_kwargs key, NOT in extra_body.""" @@ -111,7 +127,10 @@ class TestKimiParity: assert kw.get("reasoning_effort") == "high" assert "reasoning_effort" not in kw.get("extra_body", {}) - def test_reasoning_effort_default_medium(self, transport): + def test_reasoning_effort_default_no_effort(self, transport): + # xor contract: enabled with no effort falls back to thinking-enabled + # and emits NO top-level reasoning_effort (previously defaulted to + # "medium" alongside thinking — the pairing this fix removes). kw = transport.build_kwargs( model="kimi-k2", messages=_simple_messages(), @@ -119,7 +138,8 @@ class TestKimiParity: provider_profile=get_provider_profile("kimi-coding"), reasoning_config={"enabled": True}, ) - assert kw.get("reasoning_effort") == "medium" + assert "reasoning_effort" not in kw + assert kw["extra_body"]["thinking"] == {"type": "enabled"} class TestOpenRouterParity: diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 126f52aacd1..8580f7c37d7 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1555,8 +1555,9 @@ class TestBuildApiKwargs: assert "temperature" not in kwargs def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent): - """Kimi endpoint should send max_tokens=32000 and reasoning_effort as - top-level params, matching Kimi CLI's default behavior.""" + """Kimi endpoint sends max_tokens=32000. With no reasoning_config it + defaults to the thinking toggle (xor contract: never paired with a + top-level reasoning_effort).""" agent.provider = "kimi-coding" agent.base_url = "https://api.kimi.com/coding/v1" agent._base_url_lower = agent.base_url.lower() @@ -1566,7 +1567,8 @@ class TestBuildApiKwargs: kwargs = agent._build_api_kwargs(messages) assert kwargs["max_tokens"] == 32000 - assert kwargs["reasoning_effort"] == "medium" + assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + assert "reasoning_effort" not in kwargs def test_kimi_coding_endpoint_respects_custom_effort(self, agent): """reasoning_effort should reflect reasoning_config.effort when set.""" @@ -1621,8 +1623,8 @@ class TestBuildApiKwargs: kwargs = agent._build_api_kwargs(messages) assert kwargs["max_tokens"] == 32000 - assert kwargs["reasoning_effort"] == "medium" assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + assert "reasoning_effort" not in kwargs def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent): """api.moonshot.cn (China endpoint) should get the same params.""" @@ -1635,8 +1637,8 @@ class TestBuildApiKwargs: kwargs = agent._build_api_kwargs(messages) assert kwargs["max_tokens"] == 32000 - assert kwargs["reasoning_effort"] == "medium" assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + assert "reasoning_effort" not in kwargs def test_provider_preferences_injected(self, agent): agent.provider = "openrouter"