fix(xai-oauth): recover from prelude SSE errors, gate reasoning replay, surface entitlement 403s

Original commit 31ba2b0cb by Teknium targeted run_codex_stream() at
its pre-refactor location in run_agent.py. Re-applied:

  - Prelude error retry/fallback → agent/codex_runtime.py (in
    run_codex_stream where the body now lives)
  - _decorate_xai_entitlement_error helper + _summarize_api_error
    wrapping → run_agent.py (these methods remained on AIAgent
    as @staticmethod's; cherry-pick applied them cleanly)

The xai-oauth provider gate, encrypted_content drop on replay, etc.
landed in agent/codex_responses_adapter.py via the prior merge from main.

Closes #8133, #14634

Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com>
This commit is contained in:
teknium1 2026-05-16 23:28:05 -07:00
parent 27df249564
commit 6362e71973
No known key found for this signature in database
2 changed files with 75 additions and 6 deletions

View file

@ -284,18 +284,48 @@ def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta
except RuntimeError as exc:
err_text = str(exc)
missing_completed = "response.completed" in err_text
if missing_completed and attempt < max_stream_retries:
# The OpenAI SDK's Responses streaming state machine raises
# ``RuntimeError("Expected to have received `response.created`
# before `<event-type>`")`` when the first SSE event from the
# server is anything other than ``response.created`` — and it
# discards the event's payload before we can read it. Three
# real-world backends emit a different first frame:
#
# * xAI on grok-4.x OAuth — sends ``error`` (issues
# reported around the May 2026 SuperGrok rollout when
# multi-turn conversations replay encrypted reasoning
# content the OAuth tier rejects)
# * codex-lb relays — send ``codex.rate_limits`` (#14634)
# * custom Responses relays — send ``response.in_progress``
# (#8133)
#
# In all three cases the underlying byte stream is still
# readable: a non-stream ``responses.create(stream=True)``
# fallback succeeds and surfaces the real provider error as
# a normal exception with body+status_code attached, which
# ``_summarize_api_error`` can then translate into a useful
# user-facing line. Treat ``response.created`` prelude
# errors the same way we already treat ``response.completed``
# postlude errors.
prelude_error = (
"Expected to have received `response.created`" in err_text
or "Expected to have received \"response.created\"" in err_text
)
if (missing_completed or prelude_error) and attempt < max_stream_retries:
logger.debug(
"Responses stream closed before completion (attempt %s/%s); retrying. %s",
"Responses stream %s (attempt %s/%s); retrying. %s",
"prelude rejected" if prelude_error else "closed before completion",
attempt + 1,
max_stream_retries + 1,
agent._client_log_context(),
)
continue
if missing_completed:
if missing_completed or prelude_error:
logger.debug(
"Responses stream did not emit response.completed; falling back to create(stream=True). %s",
"Responses stream %s; falling back to create(stream=True). %s err=%s",
"rejected before response.created" if prelude_error else "did not emit response.completed",
agent._client_log_context(),
err_text,
)
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
raise

View file

@ -1287,6 +1287,45 @@ class AIAgent:
trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
_save_trajectory_to_file(trajectory, self.model, completed)
@staticmethod
def _decorate_xai_entitlement_error(detail: str) -> str:
"""Append a friendly hint when xAI's OAuth surface returns an
entitlement-shaped error.
xAI's ``/v1/responses`` endpoint replies to OAuth tokens that lack a
SuperGrok / X Premium subscription with HTTP 403 carrying a body like::
{"code": "The caller does not have permission to execute the
specified operation", "error": "You have either run out of
available resources or do not have an active Grok subscription.
Manage subscriptions at https://grok.com/..."}
The raw text is useful but the action the user needs to take (subscribe
on grok.com, or switch providers with ``/model``) isn't obvious from
the wire format. Detect the entitlement shape and append a hint.
Matched once per detail string won't double-decorate if the upstream
already concatenated the same text.
"""
if not detail:
return detail
lower = detail.lower()
is_entitlement = (
"do not have an active grok subscription" in lower
or ("out of available resources" in lower and "grok" in lower)
or ("does not have permission" in lower and "grok" in lower)
)
if not is_entitlement:
return detail
hint = (
" — xAI OAuth account lacks SuperGrok / X Premium entitlement for "
"this model. Subscribe at https://grok.com or run `/model` to "
"switch providers."
)
if hint.strip() in detail:
return detail
return f"{detail}{hint}"
@staticmethod
def _summarize_api_error(error: Exception) -> str:
"""Extract a human-readable one-liner from an API error.
@ -1320,12 +1359,12 @@ class AIAgent:
if msg:
status_code = getattr(error, "status_code", None)
prefix = f"HTTP {status_code}: " if status_code else ""
return f"{prefix}{msg[:300]}"
return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}")
# Fallback: truncate the raw string but give more room than 200 chars
status_code = getattr(error, "status_code", None)
prefix = f"HTTP {status_code}: " if status_code else ""
return f"{prefix}{raw[:500]}"
return AIAgent._decorate_xai_entitlement_error(f"{prefix}{raw[:500]}")
def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
if not key: