diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 956d6b93095..cdca9ae5b2f 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -244,6 +244,44 @@ DEFAULT_CONTEXT_LENGTHS = { "zai-org/GLM-5": 202752, } +# xAI Grok models that ACCEPT the `reasoning.effort` parameter on +# api.x.ai. Verified live against /v1/responses 2026-05-10: +# +# ACCEPTS effort: grok-3-mini, grok-3-mini-fast, grok-4.20-multi-agent-0309, +# grok-4.3 +# REJECTS effort: grok-3, grok-4, grok-4-0709, grok-4-fast-(non-)reasoning, +# grok-4-1-fast-(non-)reasoning, grok-4.20-0309-(non-)reasoning, +# grok-code-fast-1 +# +# REJECTS-side models still reason natively — they just don't expose an +# effort dial — so callers should send no `reasoning` key at all rather +# than a default `medium` (which 400s with "Model X does not support +# parameter reasoningEffort"). +_GROK_EFFORT_CAPABLE_PREFIXES = ( + "grok-3-mini", + "grok-4.20-multi-agent", + "grok-4.3", +) + + +def grok_supports_reasoning_effort(model: str) -> bool: + """Return True when an xAI Grok model accepts ``reasoning.effort``. + + Allowlist by substring (matches both bare ``grok-3-mini`` and + aggregator-prefixed ``x-ai/grok-3-mini``). Conservative by design: + if a future Grok model isn't listed, we send no effort dial rather + than 400. + """ + name = (model or "").strip().lower() + if not name: + return False + # Strip common aggregator prefixes (x-ai/, openrouter/x-ai/, xai/, ...) + for sep in ("/",): + if sep in name: + name = name.rsplit(sep, 1)[-1] + return any(name.startswith(prefix) for prefix in _GROK_EFFORT_CAPABLE_PREFIXES) + + _CONTEXT_LENGTH_KEYS = ( "context_length", "context_window", diff --git a/agent/transports/codex.py b/agent/transports/codex.py index f011034dae8..6738ed3220c 100644 --- a/agent/transports/codex.py +++ b/agent/transports/codex.py @@ -104,8 +104,16 @@ class ResponsesApiTransport(ProviderTransport): kwargs["prompt_cache_key"] = session_id if reasoning_enabled and is_xai_responses: + from agent.model_metadata import grok_supports_reasoning_effort + kwargs["include"] = ["reasoning.encrypted_content"] - kwargs["reasoning"] = {"effort": reasoning_effort} + # xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3 + # / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though + # those models reason natively. Only send the effort dial when + # the target model is on the allowlist; otherwise send no + # `reasoning` key at all and let the model reason on its own. + if grok_supports_reasoning_effort(model): + kwargs["reasoning"] = {"effort": reasoning_effort} elif reasoning_enabled: if is_github_responses: github_reasoning = params.get("github_reasoning_extra") diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py index 7217f2e9e6a..6a4cda173ad 100644 --- a/tests/agent/transports/test_codex_transport.py +++ b/tests/agent/transports/test_codex_transport.py @@ -180,6 +180,119 @@ class TestCodexBuildKwargs: # "minimal" should be clamped to "low" for xAI as well assert kw.get("reasoning", {}).get("effort") == "low" + # --- Grok reasoning-effort capability allowlist --- + # api.x.ai 400s with "Model X does not support parameter reasoningEffort" + # on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*. + # Those models reason natively but don't expose the dial. The transport + # must omit the `reasoning` key for them while keeping the encrypted + # reasoning content include so we can capture native reasoning tokens. + + def test_xai_grok_4_omits_reasoning_effort(self, transport): + """grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400.""" + messages = [{"role": "user", "content": "Hi"}] + for model in ("grok-4", "grok-4-0709"): + kw = transport.build_kwargs( + model=model, messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert "reasoning" not in kw, ( + f"{model} must not receive a reasoning key (xAI rejects it)" + ) + # Still capture native reasoning tokens + assert "reasoning.encrypted_content" in kw.get("include", []) + + def test_xai_grok_4_fast_omits_reasoning_effort(self, transport): + """grok-4-fast and grok-4-1-fast variants reject reasoning.effort.""" + messages = [{"role": "user", "content": "Hi"}] + for model in ( + "grok-4-fast-reasoning", + "grok-4-fast-non-reasoning", + "grok-4-1-fast-reasoning", + "grok-4-1-fast-non-reasoning", + ): + kw = transport.build_kwargs( + model=model, messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "low"}, + ) + assert "reasoning" not in kw, ( + f"{model} must not receive a reasoning key (xAI rejects it)" + ) + + def test_xai_grok_3_non_mini_omits_reasoning_effort(self, transport): + """Plain grok-3 rejects reasoning.effort — only grok-3-mini accepts it.""" + messages = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="grok-3", messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "medium"}, + ) + assert "reasoning" not in kw + + def test_xai_grok_3_mini_keeps_reasoning_effort(self, transport): + """grok-3-mini and -fast variants do accept the effort dial.""" + messages = [{"role": "user", "content": "Hi"}] + for model in ("grok-3-mini", "grok-3-mini-fast"): + kw = transport.build_kwargs( + model=model, messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert kw.get("reasoning") == {"effort": "high"} + + def test_xai_grok_4_20_0309_variants_omit_reasoning_effort(self, transport): + """grok-4.20-0309-(non-)reasoning reject the effort dial. + + Counterintuitively, only grok-4.20-multi-agent-0309 accepts it. + """ + messages = [{"role": "user", "content": "Hi"}] + for model in ("grok-4.20-0309-reasoning", "grok-4.20-0309-non-reasoning"): + kw = transport.build_kwargs( + model=model, messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert "reasoning" not in kw, f"{model} must not receive reasoning" + + def test_xai_grok_4_20_multi_agent_keeps_reasoning_effort(self, transport): + """grok-4.20-multi-agent-0309 is the one grok-4.20 variant that accepts effort.""" + messages = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="grok-4.20-multi-agent-0309", messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "low"}, + ) + assert kw.get("reasoning") == {"effort": "low"} + + def test_xai_grok_code_fast_omits_reasoning_effort(self, transport): + """grok-code-fast-1 rejects reasoning.effort.""" + messages = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="grok-code-fast-1", messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert "reasoning" not in kw + + def test_xai_aggregator_prefix_stripped(self, transport): + """`x-ai/grok-3-mini` (OpenRouter-style slug) still resolves correctly.""" + messages = [{"role": "user", "content": "Hi"}] + # Effort-capable + kw = transport.build_kwargs( + model="x-ai/grok-3-mini", messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert kw.get("reasoning") == {"effort": "high"} + # Effort-incapable + kw = transport.build_kwargs( + model="x-ai/grok-4-0709", messages=messages, tools=[], + is_xai_responses=True, + reasoning_config={"effort": "high"}, + ) + assert "reasoning" not in kw + class TestCodexValidateResponse: