mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-25 05:52:34 +00:00
fix(xai): omit reasoning.effort for grok models that reject it (#23435)
xAI's Responses API returns HTTP 400 ("Model X does not support
parameter reasoningEffort") for grok-4, grok-4-0709, grok-4-fast-*,
grok-4-1-fast-*, grok-3, grok-4.20-0309-*, and grok-code-fast-1 — even
though those models reason natively. Hermes was unconditionally sending
`reasoning: {effort: 'medium'}` to xAI for every Grok model, breaking
direct `--provider xai` for the entire grok-4 line.
Add a substring allowlist predicate (verified live against api.x.ai
2026-05-10) covering the only Grok families that accept the effort dial:
grok-3-mini*, grok-4.20-multi-agent*, grok-4.3*. The Responses transport
omits the `reasoning` key entirely for everything else while still
including `reasoning.encrypted_content` so we capture native reasoning
tokens.
Verified end-to-end: `hermes chat -q hi --provider xai --model grok-4-0709`
went from HTTP 400 to a successful reply.
This commit is contained in:
parent
cc2a0c674a
commit
d6e1fadbf5
3 changed files with 160 additions and 1 deletions
|
|
@ -244,6 +244,44 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||||
"zai-org/GLM-5": 202752,
|
"zai-org/GLM-5": 202752,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# xAI Grok models that ACCEPT the `reasoning.effort` parameter on
|
||||||
|
# api.x.ai. Verified live against /v1/responses 2026-05-10:
|
||||||
|
#
|
||||||
|
# ACCEPTS effort: grok-3-mini, grok-3-mini-fast, grok-4.20-multi-agent-0309,
|
||||||
|
# grok-4.3
|
||||||
|
# REJECTS effort: grok-3, grok-4, grok-4-0709, grok-4-fast-(non-)reasoning,
|
||||||
|
# grok-4-1-fast-(non-)reasoning, grok-4.20-0309-(non-)reasoning,
|
||||||
|
# grok-code-fast-1
|
||||||
|
#
|
||||||
|
# REJECTS-side models still reason natively — they just don't expose an
|
||||||
|
# effort dial — so callers should send no `reasoning` key at all rather
|
||||||
|
# than a default `medium` (which 400s with "Model X does not support
|
||||||
|
# parameter reasoningEffort").
|
||||||
|
_GROK_EFFORT_CAPABLE_PREFIXES = (
|
||||||
|
"grok-3-mini",
|
||||||
|
"grok-4.20-multi-agent",
|
||||||
|
"grok-4.3",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def grok_supports_reasoning_effort(model: str) -> bool:
|
||||||
|
"""Return True when an xAI Grok model accepts ``reasoning.effort``.
|
||||||
|
|
||||||
|
Allowlist by substring (matches both bare ``grok-3-mini`` and
|
||||||
|
aggregator-prefixed ``x-ai/grok-3-mini``). Conservative by design:
|
||||||
|
if a future Grok model isn't listed, we send no effort dial rather
|
||||||
|
than 400.
|
||||||
|
"""
|
||||||
|
name = (model or "").strip().lower()
|
||||||
|
if not name:
|
||||||
|
return False
|
||||||
|
# Strip common aggregator prefixes (x-ai/, openrouter/x-ai/, xai/, ...)
|
||||||
|
for sep in ("/",):
|
||||||
|
if sep in name:
|
||||||
|
name = name.rsplit(sep, 1)[-1]
|
||||||
|
return any(name.startswith(prefix) for prefix in _GROK_EFFORT_CAPABLE_PREFIXES)
|
||||||
|
|
||||||
|
|
||||||
_CONTEXT_LENGTH_KEYS = (
|
_CONTEXT_LENGTH_KEYS = (
|
||||||
"context_length",
|
"context_length",
|
||||||
"context_window",
|
"context_window",
|
||||||
|
|
|
||||||
|
|
@ -104,8 +104,16 @@ class ResponsesApiTransport(ProviderTransport):
|
||||||
kwargs["prompt_cache_key"] = session_id
|
kwargs["prompt_cache_key"] = session_id
|
||||||
|
|
||||||
if reasoning_enabled and is_xai_responses:
|
if reasoning_enabled and is_xai_responses:
|
||||||
|
from agent.model_metadata import grok_supports_reasoning_effort
|
||||||
|
|
||||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||||
kwargs["reasoning"] = {"effort": reasoning_effort}
|
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
|
||||||
|
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
|
||||||
|
# those models reason natively. Only send the effort dial when
|
||||||
|
# the target model is on the allowlist; otherwise send no
|
||||||
|
# `reasoning` key at all and let the model reason on its own.
|
||||||
|
if grok_supports_reasoning_effort(model):
|
||||||
|
kwargs["reasoning"] = {"effort": reasoning_effort}
|
||||||
elif reasoning_enabled:
|
elif reasoning_enabled:
|
||||||
if is_github_responses:
|
if is_github_responses:
|
||||||
github_reasoning = params.get("github_reasoning_extra")
|
github_reasoning = params.get("github_reasoning_extra")
|
||||||
|
|
|
||||||
|
|
@ -180,6 +180,119 @@ class TestCodexBuildKwargs:
|
||||||
# "minimal" should be clamped to "low" for xAI as well
|
# "minimal" should be clamped to "low" for xAI as well
|
||||||
assert kw.get("reasoning", {}).get("effort") == "low"
|
assert kw.get("reasoning", {}).get("effort") == "low"
|
||||||
|
|
||||||
|
# --- Grok reasoning-effort capability allowlist ---
|
||||||
|
# api.x.ai 400s with "Model X does not support parameter reasoningEffort"
|
||||||
|
# on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*.
|
||||||
|
# Those models reason natively but don't expose the dial. The transport
|
||||||
|
# must omit the `reasoning` key for them while keeping the encrypted
|
||||||
|
# reasoning content include so we can capture native reasoning tokens.
|
||||||
|
|
||||||
|
def test_xai_grok_4_omits_reasoning_effort(self, transport):
|
||||||
|
"""grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400."""
|
||||||
|
messages = [{"role": "user", "content": "Hi"}]
|
||||||
|
for model in ("grok-4", "grok-4-0709"):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model=model, messages=messages, tools=[],
|
||||||
|
is_xai_responses=True,
|
||||||
|
reasoning_config={"effort": "high"},
|
||||||
|
)
|
||||||
|
assert "reasoning" not in kw, (
|
||||||
|
f"{model} must not receive a reasoning key (xAI rejects it)"
|
||||||
|
)
|
||||||
|
# Still capture native reasoning tokens
|
||||||
|
assert "reasoning.encrypted_content" in kw.get("include", [])
|
||||||
|
|
||||||
|
def test_xai_grok_4_fast_omits_reasoning_effort(self, transport):
|
||||||
|
"""grok-4-fast and grok-4-1-fast variants reject reasoning.effort."""
|
||||||
|
messages = [{"role": "user", "content": "Hi"}]
|
||||||
|
for model in (
|
||||||
|
"grok-4-fast-reasoning",
|
||||||
|
"grok-4-fast-non-reasoning",
|
||||||
|
"grok-4-1-fast-reasoning",
|
||||||
|
"grok-4-1-fast-non-reasoning",
|
||||||
|
):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model=model, messages=messages, tools=[],
|
||||||
|
is_xai_responses=True,
|
||||||
|
reasoning_config={"effort": "low"},
|
||||||
|
)
|
||||||
|
assert "reasoning" not in kw, (
|
||||||
|
f"{model} must not receive a reasoning key (xAI rejects it)"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_xai_grok_3_non_mini_omits_reasoning_effort(self, transport):
|
||||||
|
"""Plain grok-3 rejects reasoning.effort — only grok-3-mini accepts it."""
|
||||||
|
messages = [{"role": "user", "content": "Hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="grok-3", messages=messages, tools=[],
|
||||||
|
is_xai_responses=True,
|
||||||
|
reasoning_config={"effort": "medium"},
|
||||||
|
)
|
||||||
|
assert "reasoning" not in kw
|
||||||
|
|
||||||
|
def test_xai_grok_3_mini_keeps_reasoning_effort(self, transport):
|
||||||
|
"""grok-3-mini and -fast variants do accept the effort dial."""
|
||||||
|
messages = [{"role": "user", "content": "Hi"}]
|
||||||
|
for model in ("grok-3-mini", "grok-3-mini-fast"):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model=model, messages=messages, tools=[],
|
||||||
|
is_xai_responses=True,
|
||||||
|
reasoning_config={"effort": "high"},
|
||||||
|
)
|
||||||
|
assert kw.get("reasoning") == {"effort": "high"}
|
||||||
|
|
||||||
|
def test_xai_grok_4_20_0309_variants_omit_reasoning_effort(self, transport):
|
||||||
|
"""grok-4.20-0309-(non-)reasoning reject the effort dial.
|
||||||
|
|
||||||
|
Counterintuitively, only grok-4.20-multi-agent-0309 accepts it.
|
||||||
|
"""
|
||||||
|
messages = [{"role": "user", "content": "Hi"}]
|
||||||
|
for model in ("grok-4.20-0309-reasoning", "grok-4.20-0309-non-reasoning"):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model=model, messages=messages, tools=[],
|
||||||
|
is_xai_responses=True,
|
||||||
|
reasoning_config={"effort": "high"},
|
||||||
|
)
|
||||||
|
assert "reasoning" not in kw, f"{model} must not receive reasoning"
|
||||||
|
|
||||||
|
def test_xai_grok_4_20_multi_agent_keeps_reasoning_effort(self, transport):
|
||||||
|
"""grok-4.20-multi-agent-0309 is the one grok-4.20 variant that accepts effort."""
|
||||||
|
messages = [{"role": "user", "content": "Hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="grok-4.20-multi-agent-0309", messages=messages, tools=[],
|
||||||
|
is_xai_responses=True,
|
||||||
|
reasoning_config={"effort": "low"},
|
||||||
|
)
|
||||||
|
assert kw.get("reasoning") == {"effort": "low"}
|
||||||
|
|
||||||
|
def test_xai_grok_code_fast_omits_reasoning_effort(self, transport):
|
||||||
|
"""grok-code-fast-1 rejects reasoning.effort."""
|
||||||
|
messages = [{"role": "user", "content": "Hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="grok-code-fast-1", messages=messages, tools=[],
|
||||||
|
is_xai_responses=True,
|
||||||
|
reasoning_config={"effort": "high"},
|
||||||
|
)
|
||||||
|
assert "reasoning" not in kw
|
||||||
|
|
||||||
|
def test_xai_aggregator_prefix_stripped(self, transport):
|
||||||
|
"""`x-ai/grok-3-mini` (OpenRouter-style slug) still resolves correctly."""
|
||||||
|
messages = [{"role": "user", "content": "Hi"}]
|
||||||
|
# Effort-capable
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="x-ai/grok-3-mini", messages=messages, tools=[],
|
||||||
|
is_xai_responses=True,
|
||||||
|
reasoning_config={"effort": "high"},
|
||||||
|
)
|
||||||
|
assert kw.get("reasoning") == {"effort": "high"}
|
||||||
|
# Effort-incapable
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="x-ai/grok-4-0709", messages=messages, tools=[],
|
||||||
|
is_xai_responses=True,
|
||||||
|
reasoning_config={"effort": "high"},
|
||||||
|
)
|
||||||
|
assert "reasoning" not in kw
|
||||||
|
|
||||||
|
|
||||||
class TestCodexValidateResponse:
|
class TestCodexValidateResponse:
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue