diff --git a/agent/error_classifier.py b/agent/error_classifier.py index d29a2e34ac6..42eb42d6803 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -510,6 +510,35 @@ def classify_api_error( should_compress=False, ) + # xAI Grok subscription entitlement errors. + # + # xAI returns "You have either run out of available resources or do not + # have an active Grok subscription" through two distinct code paths: + # + # • HTTP 403 — status_code is set; _classify_by_status (step 2) routes + # it to FailoverReason.auth correctly, and _is_entitlement_failure + # then prevents the credential-refresh loop. + # + # • SSE ``type=error`` frame — surfaced as _StreamErrorEvent with + # status_code=None. _classify_by_status is skipped entirely, and + # "grok subscription" / "out of available resources" appear in none + # of the message-pattern lists below. Without this guard the error + # falls through to FailoverReason.unknown (retryable=True), burning + # max_retries before the agent stops — and _is_entitlement_failure + # is never called because it only runs under FailoverReason.auth. + # + # Both X Premium+ and SuperGrok subscribers hit this path when their + # subscription tier does not cover the requested model or feature. + if ( + "do not have an active grok subscription" in error_msg + or ("out of available resources" in error_msg and "grok" in error_msg) + ): + return _result( + FailoverReason.auth, + retryable=False, + should_fallback=True, + ) + # ── 2. HTTP status code classification ────────────────────────── if status_code is not None: diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py index 9eb641cc895..5cb48efc6c6 100644 --- a/tests/run_agent/test_codex_xai_oauth_recovery.py +++ b/tests/run_agent/test_codex_xai_oauth_recovery.py @@ -224,6 +224,62 @@ def test_summarize_api_error_passes_through_unrelated_errors(): assert "upstream is sad" in summary +# --------------------------------------------------------------------------- +# Fix D: _StreamErrorEvent xAI entitlement classified as auth, not retryable +# +# run_codex_create_stream_fallback raises _StreamErrorEvent (status_code=None) +# when the Responses stream emits a ``type=error`` SSE frame. Before this +# fix, classify_api_error had no match for "grok subscription" in its pattern +# lists, so it returned FailoverReason.unknown (retryable=True) — burning +# max_retries before the agent stopped. _is_entitlement_failure was never +# called because it only runs when FailoverReason.auth is returned. +# --------------------------------------------------------------------------- + + +def test_classify_api_error_stream_event_grok_subscription_is_auth(): + """_StreamErrorEvent with xAI subscription message classifies as auth/non-retryable. + + The SSE error path has status_code=None, so _classify_by_status is + skipped. The explicit pattern added at step 1 must fire first and + return auth/non-retryable so _is_entitlement_failure can stop the loop. + """ + from run_agent import _StreamErrorEvent + from agent.error_classifier import classify_api_error, FailoverReason + + err = _StreamErrorEvent( + "You have either run out of available resources or do not have an " + "active Grok subscription. Manage subscriptions at https://grok.com", + code="The caller does not have permission to execute the specified operation", + ) + result = classify_api_error(err, provider="xai-oauth", model="grok-4.3") + assert result.reason == FailoverReason.auth + assert result.retryable is False + assert result.should_fallback is True + + +def test_classify_api_error_stream_event_resources_exhausted_grok_is_auth(): + """'out of available resources' + 'grok' variant also classifies as auth.""" + from run_agent import _StreamErrorEvent + from agent.error_classifier import classify_api_error, FailoverReason + + err = _StreamErrorEvent( + "You have run out of available resources for Grok.", + ) + result = classify_api_error(err, provider="xai-oauth", model="grok-4.3") + assert result.reason == FailoverReason.auth + assert result.retryable is False + + +def test_classify_api_error_stream_event_unrelated_not_reclassified(): + """An unrelated _StreamErrorEvent must not be caught by the xAI guard.""" + from run_agent import _StreamErrorEvent + from agent.error_classifier import classify_api_error, FailoverReason + + err = _StreamErrorEvent("Internal server error — try again later") + result = classify_api_error(err, provider="xai-oauth", model="grok-4.3") + assert result.reason != FailoverReason.auth + + # --------------------------------------------------------------------------- # Fix C: reasoning replay gating for xai-oauth # ---------------------------------------------------------------------------