mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-08 08:11:38 +00:00
fix(error_classifier): classify xAI Grok entitlement SSE errors as auth
When xAI returns a subscription/entitlement error through an SSE
``type=error`` frame, ``_StreamErrorEvent`` is raised with
``status_code=None``. This caused ``_classify_by_status`` (step 2 of
``classify_api_error``) to be skipped entirely, and the Grok-specific
phrases ("do not have an active Grok subscription", "out of available
resources") appeared in none of the message-pattern lists. The error
fell through to ``FailoverReason.unknown (retryable=True)``, burning
``max_retries`` on every affected X Premium+ / SuperGrok user before
the agent stopped — and ``_is_entitlement_failure`` was never called
because it only fires under ``FailoverReason.auth``.
The HTTP 403 path already handled this correctly (``_classify_by_status``
returns ``auth/non-retryable`` for 403). Add an explicit pattern block
at step 1 (highest priority, before the ``status_code`` guard) so both
code paths route to ``FailoverReason.auth, retryable=False,
should_fallback=True`` — matching the 403 path exactly.
Add three regression tests in ``Fix D`` section of
``test_codex_xai_oauth_recovery.py``:
- primary "do not have an active Grok subscription" phrase
- "out of available resources" + "grok" variant
- unrelated ``_StreamErrorEvent`` must not be reclassified
This commit is contained in:
parent
bc77f79798
commit
1fabd6e100
2 changed files with 85 additions and 0 deletions
|
|
@ -224,6 +224,62 @@ def test_summarize_api_error_passes_through_unrelated_errors():
|
|||
assert "upstream is sad" in summary
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix D: _StreamErrorEvent xAI entitlement classified as auth, not retryable
|
||||
#
|
||||
# run_codex_create_stream_fallback raises _StreamErrorEvent (status_code=None)
|
||||
# when the Responses stream emits a ``type=error`` SSE frame. Before this
|
||||
# fix, classify_api_error had no match for "grok subscription" in its pattern
|
||||
# lists, so it returned FailoverReason.unknown (retryable=True) — burning
|
||||
# max_retries before the agent stopped. _is_entitlement_failure was never
|
||||
# called because it only runs when FailoverReason.auth is returned.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_classify_api_error_stream_event_grok_subscription_is_auth():
|
||||
"""_StreamErrorEvent with xAI subscription message classifies as auth/non-retryable.
|
||||
|
||||
The SSE error path has status_code=None, so _classify_by_status is
|
||||
skipped. The explicit pattern added at step 1 must fire first and
|
||||
return auth/non-retryable so _is_entitlement_failure can stop the loop.
|
||||
"""
|
||||
from run_agent import _StreamErrorEvent
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
|
||||
err = _StreamErrorEvent(
|
||||
"You have either run out of available resources or do not have an "
|
||||
"active Grok subscription. Manage subscriptions at https://grok.com",
|
||||
code="The caller does not have permission to execute the specified operation",
|
||||
)
|
||||
result = classify_api_error(err, provider="xai-oauth", model="grok-4.3")
|
||||
assert result.reason == FailoverReason.auth
|
||||
assert result.retryable is False
|
||||
assert result.should_fallback is True
|
||||
|
||||
|
||||
def test_classify_api_error_stream_event_resources_exhausted_grok_is_auth():
|
||||
"""'out of available resources' + 'grok' variant also classifies as auth."""
|
||||
from run_agent import _StreamErrorEvent
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
|
||||
err = _StreamErrorEvent(
|
||||
"You have run out of available resources for Grok.",
|
||||
)
|
||||
result = classify_api_error(err, provider="xai-oauth", model="grok-4.3")
|
||||
assert result.reason == FailoverReason.auth
|
||||
assert result.retryable is False
|
||||
|
||||
|
||||
def test_classify_api_error_stream_event_unrelated_not_reclassified():
|
||||
"""An unrelated _StreamErrorEvent must not be caught by the xAI guard."""
|
||||
from run_agent import _StreamErrorEvent
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
|
||||
err = _StreamErrorEvent("Internal server error — try again later")
|
||||
result = classify_api_error(err, provider="xai-oauth", model="grok-4.3")
|
||||
assert result.reason != FailoverReason.auth
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix C: reasoning replay gating for xai-oauth
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue