From aa05ffba530fde599b6515120578364cce682ac7 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sat, 16 May 2026 23:41:09 -0700 Subject: [PATCH] fix(xai): surface provider 'error' SSE frame in Codex fallback stream (#27184) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original commit 2b193907d by Teknium added a new module-level _StreamErrorEvent class and threaded its raise into _run_codex_create_stream_fallback in pre-refactor run_agent.py. - _StreamErrorEvent class → run_agent.py (module-level, next to _qwen_portal_headers; class needs to be top-level for the codex runtime to import it) - The fallback event-loop's 'type=error' handler → agent/codex_runtime.py where run_codex_create_stream_fallback now lives. Imports _StreamErrorEvent lazily from run_agent to avoid circular import. Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com> --- agent/codex_runtime.py | 29 +++++++++++++++++++++++++++++ run_agent.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/agent/codex_runtime.py b/agent/codex_runtime.py index 547fbb9ce07..02b788f5777 100644 --- a/agent/codex_runtime.py +++ b/agent/codex_runtime.py @@ -356,6 +356,35 @@ def run_codex_create_stream_fallback(agent, api_kwargs: dict, client: Any = None if not event_type and isinstance(event, dict): event_type = event.get("type") + # ``error`` SSE frames carry the provider's real failure + # reason (subscription / quota / model-not-available / + # rejected-reasoning-replay) but never appear in the + # ``{completed, incomplete, failed}`` terminal set, so the + # raw loop below would silently consume them and end with + # "did not emit a terminal response". xAI in particular + # emits ``type=error`` as the FIRST frame for OAuth + # accounts whose Grok subscription is missing/exhausted — + # the SDK's stream helper raises ``RuntimeError(Expected + # to have received response.created before error)`` which + # the caller catches and routes here, expecting this + # fallback to surface the message. Synthesize an + # APIError-shaped exception so ``_summarize_api_error`` + # and the credential-pool entitlement detector see the + # real text instead of a generic RuntimeError. + if event_type == "error": + err_message = getattr(event, "message", None) + if not err_message and isinstance(event, dict): + err_message = event.get("message") + err_code = getattr(event, "code", None) + if not err_code and isinstance(event, dict): + err_code = event.get("code") + err_param = getattr(event, "param", None) + if not err_param and isinstance(event, dict): + err_param = event.get("param") + err_message = (err_message or "stream emitted error event").strip() + from run_agent import _StreamErrorEvent + raise _StreamErrorEvent(err_message, code=err_code, param=err_param) + # Collect output items and text deltas for backfill if event_type == "response.output_item.done": done_item = getattr(event, "item", None) diff --git a/run_agent.py b/run_agent.py index f843603a1e5..8471afccddf 100644 --- a/run_agent.py +++ b/run_agent.py @@ -284,6 +284,45 @@ def _qwen_portal_headers() -> dict: } +class _StreamErrorEvent(Exception): + """Synthesized provider error surfaced from a Responses ``error`` SSE frame. + + Some Codex-style Responses backends (xAI for subscription/quota + failures, custom relays under malformed-tool-call conditions) emit a + standalone ``type=error`` frame instead of routing the failure + through ``response.failed`` or returning an HTTP 4xx. The fallback + streaming path raises this exception so ``_summarize_api_error`` and + ``_extract_api_error_context`` see a familiar ``.body`` / + ``.status_code`` shape and the entitlement detector can match the + underlying provider message ("do not have an active Grok + subscription", etc.). + """ + + def __init__( + self, + message: str, + *, + code: Optional[str] = None, + param: Optional[str] = None, + status_code: Optional[int] = None, + ) -> None: + super().__init__(message) + self.message = message + self.code = code + self.param = param + self.status_code = status_code + # OpenAI SDK-shaped body so _extract_api_error_context / + # _summarize_api_error / classify_api_error all pick it up. + self.body: Dict[str, Any] = { + "error": { + "message": message, + "code": code, + "param": param, + "type": "error", + } + } + + class AIAgent: """ AI Agent with tool calling capabilities.