From 6362e71973c18b407651157f818e279122ce41f6 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sat, 16 May 2026 23:28:05 -0700 Subject: [PATCH] fix(xai-oauth): recover from prelude SSE errors, gate reasoning replay, surface entitlement 403s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original commit 31ba2b0cb by Teknium targeted run_codex_stream() at its pre-refactor location in run_agent.py. Re-applied: - Prelude error retry/fallback → agent/codex_runtime.py (in run_codex_stream where the body now lives) - _decorate_xai_entitlement_error helper + _summarize_api_error wrapping → run_agent.py (these methods remained on AIAgent as @staticmethod's; cherry-pick applied them cleanly) The xai-oauth provider gate, encrypted_content drop on replay, etc. landed in agent/codex_responses_adapter.py via the prior merge from main. Closes #8133, #14634 Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com> --- agent/codex_runtime.py | 38 +++++++++++++++++++++++++++++++++---- run_agent.py | 43 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 6 deletions(-) diff --git a/agent/codex_runtime.py b/agent/codex_runtime.py index b2e9b714586..547fbb9ce07 100644 --- a/agent/codex_runtime.py +++ b/agent/codex_runtime.py @@ -284,18 +284,48 @@ def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta except RuntimeError as exc: err_text = str(exc) missing_completed = "response.completed" in err_text - if missing_completed and attempt < max_stream_retries: + # The OpenAI SDK's Responses streaming state machine raises + # ``RuntimeError("Expected to have received `response.created` + # before ``")`` when the first SSE event from the + # server is anything other than ``response.created`` — and it + # discards the event's payload before we can read it. Three + # real-world backends emit a different first frame: + # + # * xAI on grok-4.x OAuth — sends ``error`` (issues + # reported around the May 2026 SuperGrok rollout when + # multi-turn conversations replay encrypted reasoning + # content the OAuth tier rejects) + # * codex-lb relays — send ``codex.rate_limits`` (#14634) + # * custom Responses relays — send ``response.in_progress`` + # (#8133) + # + # In all three cases the underlying byte stream is still + # readable: a non-stream ``responses.create(stream=True)`` + # fallback succeeds and surfaces the real provider error as + # a normal exception with body+status_code attached, which + # ``_summarize_api_error`` can then translate into a useful + # user-facing line. Treat ``response.created`` prelude + # errors the same way we already treat ``response.completed`` + # postlude errors. + prelude_error = ( + "Expected to have received `response.created`" in err_text + or "Expected to have received \"response.created\"" in err_text + ) + if (missing_completed or prelude_error) and attempt < max_stream_retries: logger.debug( - "Responses stream closed before completion (attempt %s/%s); retrying. %s", + "Responses stream %s (attempt %s/%s); retrying. %s", + "prelude rejected" if prelude_error else "closed before completion", attempt + 1, max_stream_retries + 1, agent._client_log_context(), ) continue - if missing_completed: + if missing_completed or prelude_error: logger.debug( - "Responses stream did not emit response.completed; falling back to create(stream=True). %s", + "Responses stream %s; falling back to create(stream=True). %s err=%s", + "rejected before response.created" if prelude_error else "did not emit response.completed", agent._client_log_context(), + err_text, ) return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client) raise diff --git a/run_agent.py b/run_agent.py index f4157807e04..80577a19be3 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1287,6 +1287,45 @@ class AIAgent: trajectory = self._convert_to_trajectory_format(messages, user_query, completed) _save_trajectory_to_file(trajectory, self.model, completed) + @staticmethod + def _decorate_xai_entitlement_error(detail: str) -> str: + """Append a friendly hint when xAI's OAuth surface returns an + entitlement-shaped error. + + xAI's ``/v1/responses`` endpoint replies to OAuth tokens that lack a + SuperGrok / X Premium subscription with HTTP 403 carrying a body like:: + + {"code": "The caller does not have permission to execute the + specified operation", "error": "You have either run out of + available resources or do not have an active Grok subscription. + Manage subscriptions at https://grok.com/..."} + + The raw text is useful but the action the user needs to take (subscribe + on grok.com, or switch providers with ``/model``) isn't obvious from + the wire format. Detect the entitlement shape and append a hint. + + Matched once per detail string — won't double-decorate if the upstream + already concatenated the same text. + """ + if not detail: + return detail + lower = detail.lower() + is_entitlement = ( + "do not have an active grok subscription" in lower + or ("out of available resources" in lower and "grok" in lower) + or ("does not have permission" in lower and "grok" in lower) + ) + if not is_entitlement: + return detail + hint = ( + " — xAI OAuth account lacks SuperGrok / X Premium entitlement for " + "this model. Subscribe at https://grok.com or run `/model` to " + "switch providers." + ) + if hint.strip() in detail: + return detail + return f"{detail}{hint}" + @staticmethod def _summarize_api_error(error: Exception) -> str: """Extract a human-readable one-liner from an API error. @@ -1320,12 +1359,12 @@ class AIAgent: if msg: status_code = getattr(error, "status_code", None) prefix = f"HTTP {status_code}: " if status_code else "" - return f"{prefix}{msg[:300]}" + return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}") # Fallback: truncate the raw string but give more room than 200 chars status_code = getattr(error, "status_code", None) prefix = f"HTTP {status_code}: " if status_code else "" - return f"{prefix}{raw[:500]}" + return AIAgent._decorate_xai_entitlement_error(f"{prefix}{raw[:500]}") def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]: if not key: