fix(xai-oauth): recover from prelude SSE errors, gate reasoning replay, surface entitlement 403s (#26644)

Three fixes for the May 2026 xAI OAuth (SuperGrok / X Premium) rollout
failures:

- _run_codex_stream: when openai SDK raises RuntimeError("Expected to
  have received `response.created` before `<type>`"), retry once then
  fall back to responses.create(stream=True) — same path used for
  missing-response.completed postlude.  Fallback surfaces the real
  provider error with body+status_code intact.  Also fixes #8133
  (response.in_progress prelude on custom relays) and #14634
  (codex.rate_limits prelude on codex-lb).

- _summarize_api_error: when error body matches xAI's entitlement
  shape, append a one-line hint pointing to https://grok.com and
  /model.  Once-only, applies to both auxiliary warnings and
  main-loop error surfacing.

- _chat_messages_to_responses_input: new is_xai_responses kwarg
  drops replayed codex_reasoning_items (encrypted_content) before
  they reach xAI.  Also drops reasoning.encrypted_content from the
  xAI include array.  Native Codex behavior unchanged.  Grok still
  reasons natively each turn; coherence rides on visible message
  text alone.

Closes #8133, #14634.
This commit is contained in:
Teknium 2026-05-15 16:35:12 -07:00 committed by GitHub
parent 4aec25bc44
commit 31ba2b0cbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 481 additions and 18 deletions

View file

@ -194,9 +194,16 @@ class TestCodexBuildKwargs:
is_xai_responses=True,
reasoning_config={"effort": "high"},
)
# xAI Responses must receive both encrypted reasoning content and the effort
# xAI Responses receives reasoning.effort on the allowlisted models.
assert kw.get("reasoning") == {"effort": "high"}
assert "reasoning.encrypted_content" in kw.get("include", [])
# As of May 2026 we deliberately do NOT request
# reasoning.encrypted_content back from xAI — the OAuth/SuperGrok
# surface rejects replayed encrypted reasoning items on turn 2+
# (the multi-turn "Expected to have received response.created
# before error" failure). Grok still reasons natively each turn;
# we just don't try to thread the prior turn's encrypted blob back
# in. See tests/run_agent/test_codex_xai_oauth_recovery.py.
assert "reasoning.encrypted_content" not in kw.get("include", [])
def test_xai_reasoning_disabled_no_reasoning_key(self, transport):
messages = [{"role": "user", "content": "Hi"}]
@ -222,8 +229,9 @@ class TestCodexBuildKwargs:
# api.x.ai 400s with "Model X does not support parameter reasoningEffort"
# on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*.
# Those models reason natively but don't expose the dial. The transport
# must omit the `reasoning` key for them while keeping the encrypted
# reasoning content include so we can capture native reasoning tokens.
# must omit the `reasoning` key for them. As of May 2026 we also no
# longer request ``reasoning.encrypted_content`` back from xAI on ANY
# model — see test_xai_reasoning_effort_passed for the rationale.
def test_xai_grok_4_omits_reasoning_effort(self, transport):
"""grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400."""
@ -237,8 +245,9 @@ class TestCodexBuildKwargs:
assert "reasoning" not in kw, (
f"{model} must not receive a reasoning key (xAI rejects it)"
)
# Still capture native reasoning tokens
assert "reasoning.encrypted_content" in kw.get("include", [])
# We no longer ask xAI for encrypted_content back (see comment
# above) — verify the include list is empty.
assert "reasoning.encrypted_content" not in kw.get("include", [])
def test_xai_grok_4_fast_omits_reasoning_effort(self, transport):
"""grok-4-fast and grok-4-1-fast variants reject reasoning.effort."""

View file

@ -0,0 +1,351 @@
"""Regression tests for the May 2026 xAI OAuth (SuperGrok / X Premium) bugs.
Three distinct failure modes the user community hit during rollout:
1. ``RuntimeError("Expected to have received `response.created` before
`error`")`` on multi-turn xAI OAuth conversations. The OpenAI SDK's
Responses streaming state machine collapses an upstream ``error`` SSE
frame into a generic stream-ordering error. ``_run_codex_stream``
now treats this the same way it already treats the missing
``response.completed`` postlude fall back to a non-stream
``responses.create(stream=True)`` which surfaces the real provider
error. Also closes #8133 (``response.in_progress`` prelude on custom
relays) and #14634 (``codex.rate_limits`` prelude on codex-lb).
2. The HTTP 403 entitlement error xAI returns when an OAuth token lacks
SuperGrok / X Premium ("You have either run out of available
resources or do not have an active Grok subscription") used to read
as a confusing wall of JSON. ``_summarize_api_error`` now appends a
one-line hint pointing the user at https://grok.com and ``/model``.
3. Multi-turn replay of ``codex_reasoning_items`` (with
``encrypted_content``) is now suppressed for ``is_xai_responses=True``
in ``_chat_messages_to_responses_input``. xAI's OAuth/SuperGrok
surface rejects replayed encrypted reasoning items; Grok still
reasons natively each turn, so coherence rides on visible message
text.
"""
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Fix A: prelude error fallback
# ---------------------------------------------------------------------------
def _make_codex_agent():
"""Build a minimal AIAgent wired for codex_responses streaming tests."""
from run_agent import AIAgent
agent = AIAgent(
api_key="test-key",
base_url="https://api.x.ai/v1",
model="grok-4.3",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
agent.api_mode = "codex_responses"
agent.provider = "xai-oauth"
agent._interrupt_requested = False
return agent
@pytest.mark.parametrize(
"prelude_event_type",
[
"error", # xAI OAuth multi-turn
"codex.rate_limits", # codex-lb relays (#14634)
"response.in_progress", # custom Responses relays (#8133)
],
)
def test_codex_stream_prelude_error_falls_back_to_create_stream(prelude_event_type):
"""The SDK's prelude RuntimeError must trigger the non-stream fallback.
When the first SSE event isn't ``response.created``, openai-python
raises RuntimeError before our event loop sees anything. We must
detect that, retry once, then fall back to ``create(stream=True)``
which surfaces the real provider error or a real response.
"""
agent = _make_codex_agent()
prelude_error = RuntimeError(
f"Expected to have received `response.created` before `{prelude_event_type}`"
)
mock_client = MagicMock()
mock_client.responses.stream.side_effect = prelude_error
fallback_response = SimpleNamespace(
output=[SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="fallback ok")],
)],
status="completed",
)
with patch.object(
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
) as mock_fallback:
result = agent._run_codex_stream({}, client=mock_client)
assert result is fallback_response
mock_fallback.assert_called_once_with({}, client=mock_client)
def test_codex_stream_prelude_error_retries_once_before_fallback():
"""The retry path must fire one extra stream attempt before falling back."""
agent = _make_codex_agent()
call_count = {"n": 0}
def stream_side_effect(**kwargs):
call_count["n"] += 1
raise RuntimeError(
"Expected to have received `response.created` before `error`"
)
mock_client = MagicMock()
mock_client.responses.stream.side_effect = stream_side_effect
fallback_response = SimpleNamespace(output=[], status="completed")
with patch.object(
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
) as mock_fallback:
agent._run_codex_stream({}, client=mock_client)
# max_stream_retries=1 → one retry + final attempt → 2 stream calls,
# THEN the fallback path runs.
assert call_count["n"] == 2
mock_fallback.assert_called_once()
def test_codex_stream_unrelated_runtimeerror_still_raises():
"""RuntimeErrors that aren't prelude/postlude shape must propagate."""
agent = _make_codex_agent()
mock_client = MagicMock()
mock_client.responses.stream.side_effect = RuntimeError("something else broke")
with patch.object(agent, "_run_codex_create_stream_fallback") as mock_fallback:
with pytest.raises(RuntimeError, match="something else broke"):
agent._run_codex_stream({}, client=mock_client)
mock_fallback.assert_not_called()
def test_codex_stream_postlude_error_still_falls_back():
"""Existing ``response.completed`` fallback must not regress."""
agent = _make_codex_agent()
mock_client = MagicMock()
mock_client.responses.stream.side_effect = RuntimeError(
"Didn't receive a `response.completed` event."
)
fallback_response = SimpleNamespace(output=[], status="completed")
with patch.object(
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
) as mock_fallback:
result = agent._run_codex_stream({}, client=mock_client)
assert result is fallback_response
mock_fallback.assert_called_once()
# ---------------------------------------------------------------------------
# Fix B: friendly entitlement message
# ---------------------------------------------------------------------------
def test_summarize_api_error_decorates_xai_entitlement_403():
"""xAI's OAuth 403 must end with the subscribe-or-switch hint."""
from run_agent import AIAgent
error = RuntimeError(
"HTTP 403: Error code: 403 - {'code': 'The caller does not have permission "
"to execute the specified operation', 'error': 'You have either run out of "
"available resources or do not have an active Grok subscription. Manage "
"subscriptions at https://grok.com'}"
)
summary = AIAgent._summarize_api_error(error)
assert "do not have an active Grok subscription" in summary
assert "SuperGrok" in summary
assert "/model" in summary
assert "https://grok.com" in summary
def test_summarize_api_error_decorates_xai_body_message():
"""SDK-style error with structured body must also get the hint."""
from run_agent import AIAgent
class _XaiErr(Exception):
status_code = 403
body = {
"error": {
"message": (
"You have either run out of available resources or do "
"not have an active Grok subscription. Manage at "
"https://grok.com"
)
}
}
summary = AIAgent._summarize_api_error(_XaiErr("403"))
assert "HTTP 403" in summary
assert "SuperGrok / X Premium" in summary
def test_summarize_api_error_idempotent_for_entitlement_hint():
"""Decorating twice must not double up the hint."""
from run_agent import AIAgent
raw = "HTTP 403: do not have an active Grok subscription"
once = AIAgent._decorate_xai_entitlement_error(raw)
twice = AIAgent._decorate_xai_entitlement_error(once)
assert once == twice
def test_summarize_api_error_passes_through_unrelated_errors():
"""Non-xAI / non-entitlement errors must not be touched."""
from run_agent import AIAgent
error = RuntimeError("HTTP 500: upstream is sad")
summary = AIAgent._summarize_api_error(error)
assert "SuperGrok" not in summary
assert "grok.com" not in summary
assert "upstream is sad" in summary
# ---------------------------------------------------------------------------
# Fix C: reasoning replay gating for xai-oauth
# ---------------------------------------------------------------------------
def _assistant_msg_with_encrypted_reasoning(text="hi from grok", encrypted="enc_blob"):
return {
"role": "assistant",
"content": text,
"codex_reasoning_items": [
{
"type": "reasoning",
"id": "rs_xai_001",
"encrypted_content": encrypted,
"summary": [],
}
],
}
def test_codex_reasoning_replay_default_includes_encrypted_content():
"""Native Codex backend (default) must still replay encrypted reasoning."""
from agent.codex_responses_adapter import _chat_messages_to_responses_input
msgs = [
{"role": "user", "content": "hi"},
_assistant_msg_with_encrypted_reasoning(),
{"role": "user", "content": "what's your name?"},
]
items = _chat_messages_to_responses_input(msgs)
reasoning = [it for it in items if it.get("type") == "reasoning"]
assert len(reasoning) == 1
assert reasoning[0]["encrypted_content"] == "enc_blob"
def test_codex_reasoning_replay_stripped_for_xai_oauth():
"""xAI OAuth surface must NOT receive replayed encrypted reasoning."""
from agent.codex_responses_adapter import _chat_messages_to_responses_input
msgs = [
{"role": "user", "content": "hi"},
_assistant_msg_with_encrypted_reasoning(),
{"role": "user", "content": "what's your name?"},
]
items = _chat_messages_to_responses_input(msgs, is_xai_responses=True)
reasoning = [it for it in items if it.get("type") == "reasoning"]
assert reasoning == []
# The assistant's visible text must still survive — coherence across
# turns rides on the message text alone.
assistant_items = [
it for it in items
if it.get("role") == "assistant" or it.get("type") == "message"
]
assert assistant_items, "assistant message must still be present"
def test_codex_transport_xai_request_omits_encrypted_content_include():
"""Verify the xAI ``include`` array no longer requests encrypted reasoning."""
from agent.transports.codex import ResponsesApiTransport
transport = ResponsesApiTransport()
kwargs = transport.build_kwargs(
model="grok-4.3",
messages=[
{"role": "system", "content": "you are a helpful assistant"},
{"role": "user", "content": "hi"},
],
tools=None,
instructions="you are a helpful assistant",
reasoning_config={"enabled": True, "effort": "medium"},
is_xai_responses=True,
)
# Without this gate, xAI would echo back encrypted_content blobs we'd
# then store in codex_reasoning_items and replay next turn — which is
# exactly the multi-turn failure mode we're closing.
assert kwargs["include"] == []
def test_codex_transport_xai_strips_replayed_reasoning_in_input():
"""End-to-end: build_kwargs on xai-oauth must strip prior reasoning."""
from agent.transports.codex import ResponsesApiTransport
transport = ResponsesApiTransport()
kwargs = transport.build_kwargs(
model="grok-4.3",
messages=[
{"role": "system", "content": "sys"},
{"role": "user", "content": "hi"},
_assistant_msg_with_encrypted_reasoning(text="hi from grok"),
{"role": "user", "content": "what's your name?"},
],
tools=None,
instructions="sys",
reasoning_config={"enabled": True, "effort": "medium"},
is_xai_responses=True,
)
input_items = kwargs["input"]
reasoning_items = [it for it in input_items if it.get("type") == "reasoning"]
assert reasoning_items == []
def test_codex_transport_native_codex_still_replays_reasoning_in_input():
"""Regression guard: openai-codex must keep the existing replay path."""
from agent.transports.codex import ResponsesApiTransport
transport = ResponsesApiTransport()
kwargs = transport.build_kwargs(
model="gpt-5-codex",
messages=[
{"role": "system", "content": "sys"},
{"role": "user", "content": "hi"},
_assistant_msg_with_encrypted_reasoning(text="hi from codex"),
{"role": "user", "content": "next"},
],
tools=None,
instructions="sys",
reasoning_config={"enabled": True, "effort": "medium"},
is_xai_responses=False,
)
input_items = kwargs["input"]
reasoning_items = [it for it in input_items if it.get("type") == "reasoning"]
assert len(reasoning_items) == 1
assert reasoning_items[0]["encrypted_content"] == "enc_blob"
# Native Codex still asks for encrypted_content back.
assert "reasoning.encrypted_content" in kwargs.get("include", [])