mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(error_classifier): classify xAI Grok entitlement SSE errors as auth
When xAI returns a subscription/entitlement error through an SSE
``type=error`` frame, ``_StreamErrorEvent`` is raised with
``status_code=None``. This caused ``_classify_by_status`` (step 2 of
``classify_api_error``) to be skipped entirely, and the Grok-specific
phrases ("do not have an active Grok subscription", "out of available
resources") appeared in none of the message-pattern lists. The error
fell through to ``FailoverReason.unknown (retryable=True)``, burning
``max_retries`` on every affected X Premium+ / SuperGrok user before
the agent stopped — and ``_is_entitlement_failure`` was never called
because it only fires under ``FailoverReason.auth``.
The HTTP 403 path already handled this correctly (``_classify_by_status``
returns ``auth/non-retryable`` for 403). Add an explicit pattern block
at step 1 (highest priority, before the ``status_code`` guard) so both
code paths route to ``FailoverReason.auth, retryable=False,
should_fallback=True`` — matching the 403 path exactly.
Add three regression tests in ``Fix D`` section of
``test_codex_xai_oauth_recovery.py``:
- primary "do not have an active Grok subscription" phrase
- "out of available resources" + "grok" variant
- unrelated ``_StreamErrorEvent`` must not be reclassified
This commit is contained in:
parent
bc77f79798
commit
1fabd6e100
2 changed files with 85 additions and 0 deletions
|
|
@ -510,6 +510,35 @@ def classify_api_error(
|
|||
should_compress=False,
|
||||
)
|
||||
|
||||
# xAI Grok subscription entitlement errors.
|
||||
#
|
||||
# xAI returns "You have either run out of available resources or do not
|
||||
# have an active Grok subscription" through two distinct code paths:
|
||||
#
|
||||
# • HTTP 403 — status_code is set; _classify_by_status (step 2) routes
|
||||
# it to FailoverReason.auth correctly, and _is_entitlement_failure
|
||||
# then prevents the credential-refresh loop.
|
||||
#
|
||||
# • SSE ``type=error`` frame — surfaced as _StreamErrorEvent with
|
||||
# status_code=None. _classify_by_status is skipped entirely, and
|
||||
# "grok subscription" / "out of available resources" appear in none
|
||||
# of the message-pattern lists below. Without this guard the error
|
||||
# falls through to FailoverReason.unknown (retryable=True), burning
|
||||
# max_retries before the agent stops — and _is_entitlement_failure
|
||||
# is never called because it only runs under FailoverReason.auth.
|
||||
#
|
||||
# Both X Premium+ and SuperGrok subscribers hit this path when their
|
||||
# subscription tier does not cover the requested model or feature.
|
||||
if (
|
||||
"do not have an active grok subscription" in error_msg
|
||||
or ("out of available resources" in error_msg and "grok" in error_msg)
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.auth,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# ── 2. HTTP status code classification ──────────────────────────
|
||||
|
||||
if status_code is not None:
|
||||
|
|
|
|||
|
|
@ -224,6 +224,62 @@ def test_summarize_api_error_passes_through_unrelated_errors():
|
|||
assert "upstream is sad" in summary
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix D: _StreamErrorEvent xAI entitlement classified as auth, not retryable
|
||||
#
|
||||
# run_codex_create_stream_fallback raises _StreamErrorEvent (status_code=None)
|
||||
# when the Responses stream emits a ``type=error`` SSE frame. Before this
|
||||
# fix, classify_api_error had no match for "grok subscription" in its pattern
|
||||
# lists, so it returned FailoverReason.unknown (retryable=True) — burning
|
||||
# max_retries before the agent stopped. _is_entitlement_failure was never
|
||||
# called because it only runs when FailoverReason.auth is returned.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_classify_api_error_stream_event_grok_subscription_is_auth():
|
||||
"""_StreamErrorEvent with xAI subscription message classifies as auth/non-retryable.
|
||||
|
||||
The SSE error path has status_code=None, so _classify_by_status is
|
||||
skipped. The explicit pattern added at step 1 must fire first and
|
||||
return auth/non-retryable so _is_entitlement_failure can stop the loop.
|
||||
"""
|
||||
from run_agent import _StreamErrorEvent
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
|
||||
err = _StreamErrorEvent(
|
||||
"You have either run out of available resources or do not have an "
|
||||
"active Grok subscription. Manage subscriptions at https://grok.com",
|
||||
code="The caller does not have permission to execute the specified operation",
|
||||
)
|
||||
result = classify_api_error(err, provider="xai-oauth", model="grok-4.3")
|
||||
assert result.reason == FailoverReason.auth
|
||||
assert result.retryable is False
|
||||
assert result.should_fallback is True
|
||||
|
||||
|
||||
def test_classify_api_error_stream_event_resources_exhausted_grok_is_auth():
|
||||
"""'out of available resources' + 'grok' variant also classifies as auth."""
|
||||
from run_agent import _StreamErrorEvent
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
|
||||
err = _StreamErrorEvent(
|
||||
"You have run out of available resources for Grok.",
|
||||
)
|
||||
result = classify_api_error(err, provider="xai-oauth", model="grok-4.3")
|
||||
assert result.reason == FailoverReason.auth
|
||||
assert result.retryable is False
|
||||
|
||||
|
||||
def test_classify_api_error_stream_event_unrelated_not_reclassified():
|
||||
"""An unrelated _StreamErrorEvent must not be caught by the xAI guard."""
|
||||
from run_agent import _StreamErrorEvent
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
|
||||
err = _StreamErrorEvent("Internal server error — try again later")
|
||||
result = classify_api_error(err, provider="xai-oauth", model="grok-4.3")
|
||||
assert result.reason != FailoverReason.auth
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix C: reasoning replay gating for xai-oauth
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue