mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(xai-oauth): recover from prelude SSE errors, gate reasoning replay, surface entitlement 403s (#26644)
Three fixes for the May 2026 xAI OAuth (SuperGrok / X Premium) rollout
failures:
- _run_codex_stream: when openai SDK raises RuntimeError("Expected to
have received `response.created` before `<type>`"), retry once then
fall back to responses.create(stream=True) — same path used for
missing-response.completed postlude. Fallback surfaces the real
provider error with body+status_code intact. Also fixes #8133
(response.in_progress prelude on custom relays) and #14634
(codex.rate_limits prelude on codex-lb).
- _summarize_api_error: when error body matches xAI's entitlement
shape, append a one-line hint pointing to https://grok.com and
/model. Once-only, applies to both auxiliary warnings and
main-loop error surfacing.
- _chat_messages_to_responses_input: new is_xai_responses kwarg
drops replayed codex_reasoning_items (encrypted_content) before
they reach xAI. Also drops reasoning.encrypted_content from the
xAI include array. Native Codex behavior unchanged. Grok still
reasons natively each turn; coherence rides on visible message
text alone.
Closes #8133, #14634.
This commit is contained in:
parent
4aec25bc44
commit
31ba2b0cbc
5 changed files with 481 additions and 18 deletions
|
|
@ -194,9 +194,16 @@ class TestCodexBuildKwargs:
|
|||
is_xai_responses=True,
|
||||
reasoning_config={"effort": "high"},
|
||||
)
|
||||
# xAI Responses must receive both encrypted reasoning content and the effort
|
||||
# xAI Responses receives reasoning.effort on the allowlisted models.
|
||||
assert kw.get("reasoning") == {"effort": "high"}
|
||||
assert "reasoning.encrypted_content" in kw.get("include", [])
|
||||
# As of May 2026 we deliberately do NOT request
|
||||
# reasoning.encrypted_content back from xAI — the OAuth/SuperGrok
|
||||
# surface rejects replayed encrypted reasoning items on turn 2+
|
||||
# (the multi-turn "Expected to have received response.created
|
||||
# before error" failure). Grok still reasons natively each turn;
|
||||
# we just don't try to thread the prior turn's encrypted blob back
|
||||
# in. See tests/run_agent/test_codex_xai_oauth_recovery.py.
|
||||
assert "reasoning.encrypted_content" not in kw.get("include", [])
|
||||
|
||||
def test_xai_reasoning_disabled_no_reasoning_key(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
|
|
@ -222,8 +229,9 @@ class TestCodexBuildKwargs:
|
|||
# api.x.ai 400s with "Model X does not support parameter reasoningEffort"
|
||||
# on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*.
|
||||
# Those models reason natively but don't expose the dial. The transport
|
||||
# must omit the `reasoning` key for them while keeping the encrypted
|
||||
# reasoning content include so we can capture native reasoning tokens.
|
||||
# must omit the `reasoning` key for them. As of May 2026 we also no
|
||||
# longer request ``reasoning.encrypted_content`` back from xAI on ANY
|
||||
# model — see test_xai_reasoning_effort_passed for the rationale.
|
||||
|
||||
def test_xai_grok_4_omits_reasoning_effort(self, transport):
|
||||
"""grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400."""
|
||||
|
|
@ -237,8 +245,9 @@ class TestCodexBuildKwargs:
|
|||
assert "reasoning" not in kw, (
|
||||
f"{model} must not receive a reasoning key (xAI rejects it)"
|
||||
)
|
||||
# Still capture native reasoning tokens
|
||||
assert "reasoning.encrypted_content" in kw.get("include", [])
|
||||
# We no longer ask xAI for encrypted_content back (see comment
|
||||
# above) — verify the include list is empty.
|
||||
assert "reasoning.encrypted_content" not in kw.get("include", [])
|
||||
|
||||
def test_xai_grok_4_fast_omits_reasoning_effort(self, transport):
|
||||
"""grok-4-fast and grok-4-1-fast variants reject reasoning.effort."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue