mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(xai): omit reasoning.effort for grok models that reject it (#23435)
xAI's Responses API returns HTTP 400 ("Model X does not support
parameter reasoningEffort") for grok-4, grok-4-0709, grok-4-fast-*,
grok-4-1-fast-*, grok-3, grok-4.20-0309-*, and grok-code-fast-1 — even
though those models reason natively. Hermes was unconditionally sending
`reasoning: {effort: 'medium'}` to xAI for every Grok model, breaking
direct `--provider xai` for the entire grok-4 line.
Add a substring allowlist predicate (verified live against api.x.ai
2026-05-10) covering the only Grok families that accept the effort dial:
grok-3-mini*, grok-4.20-multi-agent*, grok-4.3*. The Responses transport
omits the `reasoning` key entirely for everything else while still
including `reasoning.encrypted_content` so we capture native reasoning
tokens.
Verified end-to-end: `hermes chat -q hi --provider xai --model grok-4-0709`
went from HTTP 400 to a successful reply.
This commit is contained in:
parent
cc2a0c674a
commit
d6e1fadbf5
3 changed files with 160 additions and 1 deletions
|
|
@ -244,6 +244,44 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"zai-org/GLM-5": 202752,
|
||||
}
|
||||
|
||||
# xAI Grok models that ACCEPT the `reasoning.effort` parameter on
|
||||
# api.x.ai. Verified live against /v1/responses 2026-05-10:
|
||||
#
|
||||
# ACCEPTS effort: grok-3-mini, grok-3-mini-fast, grok-4.20-multi-agent-0309,
|
||||
# grok-4.3
|
||||
# REJECTS effort: grok-3, grok-4, grok-4-0709, grok-4-fast-(non-)reasoning,
|
||||
# grok-4-1-fast-(non-)reasoning, grok-4.20-0309-(non-)reasoning,
|
||||
# grok-code-fast-1
|
||||
#
|
||||
# REJECTS-side models still reason natively — they just don't expose an
|
||||
# effort dial — so callers should send no `reasoning` key at all rather
|
||||
# than a default `medium` (which 400s with "Model X does not support
|
||||
# parameter reasoningEffort").
|
||||
_GROK_EFFORT_CAPABLE_PREFIXES = (
|
||||
"grok-3-mini",
|
||||
"grok-4.20-multi-agent",
|
||||
"grok-4.3",
|
||||
)
|
||||
|
||||
|
||||
def grok_supports_reasoning_effort(model: str) -> bool:
|
||||
"""Return True when an xAI Grok model accepts ``reasoning.effort``.
|
||||
|
||||
Allowlist by substring (matches both bare ``grok-3-mini`` and
|
||||
aggregator-prefixed ``x-ai/grok-3-mini``). Conservative by design:
|
||||
if a future Grok model isn't listed, we send no effort dial rather
|
||||
than 400.
|
||||
"""
|
||||
name = (model or "").strip().lower()
|
||||
if not name:
|
||||
return False
|
||||
# Strip common aggregator prefixes (x-ai/, openrouter/x-ai/, xai/, ...)
|
||||
for sep in ("/",):
|
||||
if sep in name:
|
||||
name = name.rsplit(sep, 1)[-1]
|
||||
return any(name.startswith(prefix) for prefix in _GROK_EFFORT_CAPABLE_PREFIXES)
|
||||
|
||||
|
||||
_CONTEXT_LENGTH_KEYS = (
|
||||
"context_length",
|
||||
"context_window",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue