From d6e1fadbf59085214e3f97e231d3c7f0d1643941 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 10 May 2026 15:21:30 -0700 Subject: [PATCH] fix(xai): omit reasoning.effort for grok models that reject it (#23435) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xAI's Responses API returns HTTP 400 ("Model X does not support parameter reasoningEffort") for grok-4, grok-4-0709, grok-4-fast-*, grok-4-1-fast-*, grok-3, grok-4.20-0309-*, and grok-code-fast-1 — even though those models reason natively. Hermes was unconditionally sending `reasoning: {effort: 'medium'}` to xAI for every Grok model, breaking direct `--provider xai` for the entire grok-4 line. Add a substring allowlist predicate (verified live against api.x.ai 2026-05-10) covering the only Grok families that accept the effort dial: grok-3-mini*, grok-4.20-multi-agent*, grok-4.3*. The Responses transport omits the `reasoning` key entirely for everything else while still including `reasoning.encrypted_content` so we capture native reasoning tokens. Verified end-to-end: `hermes chat -q hi --provider xai --model grok-4-0709` went from HTTP 400 to a successful reply. --- agent/model_metadata.py | 38 ++++++ agent/transports/codex.py | 10 +- .../agent/transports/test_codex_transport.py | 113 ++++++++++++++++++ 3 files changed, 160 insertions(+), 1 deletion(-) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 956d6b93095..cdca9ae5b2f 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -244,6 +244,44 @@ DEFAULT_CONTEXT_LENGTHS = { "zai-org/GLM-5": 202752, } +# xAI Grok models that ACCEPT the `reasoning.effort` parameter on +# api.x.ai. Verified live against /v1/responses 2026-05-10: +# +# ACCEPTS effort: grok-3-mini, grok-3-mini-fast, grok-4.20-multi-agent-0309, +# grok-4.3 +# REJECTS effort: grok-3, grok-4, grok-4-0709, grok-4-fast-(non-)reasoning, +# grok-4-1-fast-(non-)reasoning, grok-4.20-0309-(non-)reasoning, +# grok-code-fast-1 +# +# REJECTS-side models still reason natively — they just don't expose an +# effort dial — so callers should send no `reasoning` key at all rather +# than a default `medium` (which 400s with "Model X does not support +# parameter reasoningEffort"). +_GROK_EFFORT_CAPABLE_PREFIXES = ( + "grok-3-mini", + "grok-4.20-multi-agent", + "grok-4.3", +) + + +def grok_supports_reasoning_effort(model: str) -> bool: + """Return True when an xAI Grok model accepts ``reasoning.effort``. + + Allowlist by substring (matches both bare ``grok-3-mini`` and + aggregator-prefixed ``x-ai/grok-3-mini``). Conservative by design: + if a future Grok model isn't listed, we send no effort dial rather + than 400. + """ + name = (model or "").strip().lower() + if not name: + return False + # Strip common aggregator prefixes (x-ai/, openrouter/x-ai/, xai/, ...) + for sep in ("/",): + if sep in name: + name = name.rsplit(sep, 1)[-1] + return any(name.startswith(prefix) for prefix in _GROK_EFFORT_CAPABLE_PREFIXES) + + _CONTEXT_LENGTH_KEYS = ( "context_length", "context_window", diff --git a/agent/transports/codex.py b/agent/transports/codex.py index f011034dae8..6738ed3220c 100644 --- a/agent/transports/codex.py +++ b/agent/transports/codex.py @@ -104,8 +104,16 @@ class ResponsesApiTransport(ProviderTransport): kwargs["prompt_cache_key"] = session_id if reasoning_enabled and is_xai_responses: + from agent.model_metadata import grok_supports_reasoning_effort + kwargs["include"] = ["reasoning.encrypted_content"] - kwargs["reasoning"] = {"effort": reasoning_effort} + # xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3 + # / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though + # those models reason natively. Only send the effort dial when + # the target model is on the allowlist; otherwise send no + # `reasoning` key at all and let the model reason on its own. + if grok_supports_reasoning_effort(model): + kwargs["reasoning"] = {"effort": reasoning_effort} elif reasoning_enabled: if is_github_responses: github_reasoning = params.get("github_reasoning_extra") diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py index 7217f2e9e6a..6a4cda173ad 100644 --- a/tests/agent/transports/test_codex_transport.py +++ b/tests/agent/transports/test_codex_transport.py @@ -180,6 +180,119 @@ class TestCodexBuildKwargs: # "minimal" should be clamped to "low" for xAI as well assert kw.get("reasoning", {}).get("effort") == "low" + # --- Grok reasoning-effort capability allowlist --- + # api.x.ai 400s with "Model X does not support parameter reasoningEffort" + # on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*. + # Those models reason natively but don't expose the dial. The transport + # must omit the `reasoning` key for them while keeping the encrypted + # reasoning content include so we can capture native reasoning tokens. + + def test_xai_grok_4_omits_reasoning_effort(self, transport): + """grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400.""" + messages = [{"role": "user", "content": "Hi"}] + for model in ("grok-4", "grok-4-0709"): + kw = transport.build_kwargs( + model=model, messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert "reasoning" not in kw, ( + f"{model} must not receive a reasoning key (xAI rejects it)" + ) + # Still capture native reasoning tokens + assert "reasoning.encrypted_content" in kw.get("include", []) + + def test_xai_grok_4_fast_omits_reasoning_effort(self, transport): + """grok-4-fast and grok-4-1-fast variants reject reasoning.effort.""" + messages = [{"role": "user", "content": "Hi"}] + for model in ( + "grok-4-fast-reasoning", + "grok-4-fast-non-reasoning", + "grok-4-1-fast-reasoning", + "grok-4-1-fast-non-reasoning", + ): + kw = transport.build_kwargs( + model=model, messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "low"}, + ) + assert "reasoning" not in kw, ( + f"{model} must not receive a reasoning key (xAI rejects it)" + ) + + def test_xai_grok_3_non_mini_omits_reasoning_effort(self, transport): + """Plain grok-3 rejects reasoning.effort — only grok-3-mini accepts it.""" + messages = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="grok-3", messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "medium"}, + ) + assert "reasoning" not in kw + + def test_xai_grok_3_mini_keeps_reasoning_effort(self, transport): + """grok-3-mini and -fast variants do accept the effort dial.""" + messages = [{"role": "user", "content": "Hi"}] + for model in ("grok-3-mini", "grok-3-mini-fast"): + kw = transport.build_kwargs( + model=model, messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert kw.get("reasoning") == {"effort": "high"} + + def test_xai_grok_4_20_0309_variants_omit_reasoning_effort(self, transport): + """grok-4.20-0309-(non-)reasoning reject the effort dial. + + Counterintuitively, only grok-4.20-multi-agent-0309 accepts it. + """ + messages = [{"role": "user", "content": "Hi"}] + for model in ("grok-4.20-0309-reasoning", "grok-4.20-0309-non-reasoning"): + kw = transport.build_kwargs( + model=model, messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert "reasoning" not in kw, f"{model} must not receive reasoning" + + def test_xai_grok_4_20_multi_agent_keeps_reasoning_effort(self, transport): + """grok-4.20-multi-agent-0309 is the one grok-4.20 variant that accepts effort.""" + messages = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="grok-4.20-multi-agent-0309", messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "low"}, + ) + assert kw.get("reasoning") == {"effort": "low"} + + def test_xai_grok_code_fast_omits_reasoning_effort(self, transport): + """grok-code-fast-1 rejects reasoning.effort.""" + messages = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="grok-code-fast-1", messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert "reasoning" not in kw + + def test_xai_aggregator_prefix_stripped(self, transport): + """`x-ai/grok-3-mini` (OpenRouter-style slug) still resolves correctly.""" + messages = [{"role": "user", "content": "Hi"}] + # Effort-capable + kw = transport.build_kwargs( + model="x-ai/grok-3-mini", messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert kw.get("reasoning") == {"effort": "high"} + # Effort-incapable + kw = transport.build_kwargs( + model="x-ai/grok-4-0709", messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert "reasoning" not in kw + class TestCodexValidateResponse: