fix(xai-oauth): recover from prelude SSE errors, gate reasoning replay, surface entitlement 403s (#26644)

Three fixes for the May 2026 xAI OAuth (SuperGrok / X Premium) rollout failures: - _run_codex_stream: when openai SDK raises RuntimeError("Expected to have received `response.created` before `<type>`"), retry once then fall back to responses.create(stream=True) — same path used for missing-response.completed postlude. Fallback surfaces the real provider error with body+status_code intact. Also fixes #8133 (response.in_progress prelude on custom relays) and #14634 (codex.rate_limits prelude on codex-lb). - _summarize_api_error: when error body matches xAI's entitlement shape, append a one-line hint pointing to https://grok.com and /model. Once-only, applies to both auxiliary warnings and main-loop error surfacing. - _chat_messages_to_responses_input: new is_xai_responses kwarg drops replayed codex_reasoning_items (encrypted_content) before they reach xAI. Also drops reasoning.encrypted_content from the xAI include array. Native Codex behavior unchanged. Grok still reasons natively each turn; coherence rides on visible message text alone. Closes #8133, #14634.
2026-05-19 04:52:06 +00:00 · 2026-05-15 16:35:12 -07:00 · 2026-05-15 16:35:12 -07:00 · 31ba2b0cbc
commit 31ba2b0cbc
parent 4aec25bc44
5 changed files with 481 additions and 18 deletions
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@ -244,8 +244,21 @@ def _normalize_responses_message_status(value: Any, *, default: str = "completed
    return default


-def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """Convert internal chat-style messages to Responses input items."""
+def _chat_messages_to_responses_input(
+    messages: List[Dict[str, Any]],
+    *,
+    is_xai_responses: bool = False,
+) -> List[Dict[str, Any]]:
+    """Convert internal chat-style messages to Responses input items.
+
+    ``is_xai_responses=True`` strips ``encrypted_content`` from replayed
+    reasoning items.  xAI's OAuth/SuperGrok ``/v1/responses`` surface
+    rejects encrypted reasoning blobs minted by prior turns: the request
+    streams an ``error`` SSE frame before ``response.created`` and the
+    OpenAI SDK collapses it into a generic stream-ordering error.  Native
+    Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
+    — keep the default off.
+    """
    items: List[Dict[str, Any]] = []
    seen_item_ids: set = set()

@ -271,9 +284,17 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
            if role == "assistant":
                # Replay encrypted reasoning items from previous turns
                # so the API can maintain coherent reasoning chains.
+                #
+                # xAI OAuth (SuperGrok/Premium) rejects replayed
+                # ``encrypted_content`` reasoning items minted by prior
+                # turns — see _chat_messages_to_responses_input docstring.
+                # When ``is_xai_responses`` is set we drop the replay
+                # entirely; Grok still reasons on each turn server-side,
+                # we just don't try to thread the prior turn's encrypted
+                # blob back in.
                codex_reasoning = msg.get("codex_reasoning_items")
                has_codex_reasoning = False
-                if isinstance(codex_reasoning, list):
+                if isinstance(codex_reasoning, list) and not is_xai_responses:
                    for ri in codex_reasoning:
                        if isinstance(ri, dict) and ri.get("encrypted_content"):
                            item_id = ri.get("id")
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@ -24,7 +24,10 @@ class ResponsesApiTransport(ProviderTransport):
    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
        """Convert OpenAI chat messages to Responses API input items."""
        from agent.codex_responses_adapter import _chat_messages_to_responses_input
-        return _chat_messages_to_responses_input(messages)
+        return _chat_messages_to_responses_input(
+            messages,
+            is_xai_responses=bool(kwargs.get("is_xai_responses")),
+        )

    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
        """Convert OpenAI tool schemas to Responses API function definitions."""
@ -93,7 +96,10 @@ class ResponsesApiTransport(ProviderTransport):
        kwargs = {
            "model": model,
            "instructions": instructions,
-            "input": _chat_messages_to_responses_input(payload_messages),
+            "input": _chat_messages_to_responses_input(
+                payload_messages,
+                is_xai_responses=is_xai_responses,
+            ),
            "tools": response_tools,
            "store": False,
        }
@ -110,7 +116,14 @@ class ResponsesApiTransport(ProviderTransport):
        if reasoning_enabled and is_xai_responses:
            from agent.model_metadata import grok_supports_reasoning_effort

-            kwargs["include"] = ["reasoning.encrypted_content"]
+            # NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
+            # any more.  xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
+            # replayed encrypted reasoning items on turn 2+ — see
+            # _chat_messages_to_responses_input docstring.  Requesting the field
+            # back would just have us cache something we then must strip.  Grok
+            # still reasons natively each turn; coherence across turns rides on
+            # the visible message text alone.
+            kwargs["include"] = []
            # xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
            # / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
            # those models reason natively. Only send the effort dial when