"""Regression tests for the May 2026 xAI OAuth (SuperGrok / X Premium) bugs. Three distinct failure modes the user community hit during rollout: 1. ``RuntimeError("Expected to have received `response.created` before `error`")`` on multi-turn xAI OAuth conversations. The OpenAI SDK's Responses streaming state machine collapses an upstream ``error`` SSE frame into a generic stream-ordering error. ``_run_codex_stream`` now treats this the same way it already treats the missing ``response.completed`` postlude — fall back to a non-stream ``responses.create(stream=True)`` which surfaces the real provider error. Also closes #8133 (``response.in_progress`` prelude on custom relays) and #14634 (``codex.rate_limits`` prelude on codex-lb). 2. The HTTP 403 entitlement error xAI returns when an OAuth token lacks SuperGrok / X Premium ("You have either run out of available resources or do not have an active Grok subscription") used to read as a confusing wall of JSON. ``_summarize_api_error`` now appends a one-line hint pointing the user at https://grok.com and ``/model``. 3. Multi-turn replay of ``codex_reasoning_items`` (with ``encrypted_content``) was briefly suppressed for ``is_xai_responses`` in PR #26644 on the theory that xAI's OAuth/SuperGrok surface rejected replayed encrypted reasoning items. That suppression was reverted shortly after: xAI confirmed they explicitly want Hermes to thread encrypted reasoning back across turns, and the original multi-turn failure mode was actually the prelude-SSE issue closed by Fix A above. The remaining tests here lock in that xAI receives replayed reasoning AND that we ask xAI to echo it back in the ``include`` array. """ from types import SimpleNamespace from unittest.mock import MagicMock, patch import pytest # --------------------------------------------------------------------------- # Fix A: prelude error fallback # --------------------------------------------------------------------------- def _make_codex_agent(): """Build a minimal AIAgent wired for codex_responses streaming tests.""" from run_agent import AIAgent agent = AIAgent( api_key="test-key", base_url="https://api.x.ai/v1", model="grok-4.3", quiet_mode=True, skip_context_files=True, skip_memory=True, ) agent.api_mode = "codex_responses" agent.provider = "xai-oauth" agent._interrupt_requested = False return agent @pytest.mark.parametrize( "prelude_event_type", [ "error", # xAI OAuth multi-turn "codex.rate_limits", # codex-lb relays (#14634) "response.in_progress", # custom Responses relays (#8133) ], ) def test_codex_stream_prelude_error_falls_back_to_create_stream(prelude_event_type): """The SDK's prelude RuntimeError must trigger the non-stream fallback. When the first SSE event isn't ``response.created``, openai-python raises RuntimeError before our event loop sees anything. We must detect that, retry once, then fall back to ``create(stream=True)`` which surfaces the real provider error or a real response. """ agent = _make_codex_agent() prelude_error = RuntimeError( f"Expected to have received `response.created` before `{prelude_event_type}`" ) mock_client = MagicMock() mock_client.responses.stream.side_effect = prelude_error fallback_response = SimpleNamespace( output=[SimpleNamespace( type="message", content=[SimpleNamespace(type="output_text", text="fallback ok")], )], status="completed", ) with patch.object( agent, "_run_codex_create_stream_fallback", return_value=fallback_response ) as mock_fallback: result = agent._run_codex_stream({}, client=mock_client) assert result is fallback_response mock_fallback.assert_called_once_with({}, client=mock_client) def test_codex_stream_prelude_error_retries_once_before_fallback(): """The retry path must fire one extra stream attempt before falling back.""" agent = _make_codex_agent() call_count = {"n": 0} def stream_side_effect(**kwargs): call_count["n"] += 1 raise RuntimeError( "Expected to have received `response.created` before `error`" ) mock_client = MagicMock() mock_client.responses.stream.side_effect = stream_side_effect fallback_response = SimpleNamespace(output=[], status="completed") with patch.object( agent, "_run_codex_create_stream_fallback", return_value=fallback_response ) as mock_fallback: agent._run_codex_stream({}, client=mock_client) # max_stream_retries=1 → one retry + final attempt → 2 stream calls, # THEN the fallback path runs. assert call_count["n"] == 2 mock_fallback.assert_called_once() def test_codex_stream_unrelated_runtimeerror_still_raises(): """RuntimeErrors that aren't prelude/postlude shape must propagate.""" agent = _make_codex_agent() mock_client = MagicMock() mock_client.responses.stream.side_effect = RuntimeError("something else broke") with patch.object(agent, "_run_codex_create_stream_fallback") as mock_fallback: with pytest.raises(RuntimeError, match="something else broke"): agent._run_codex_stream({}, client=mock_client) mock_fallback.assert_not_called() def test_codex_stream_postlude_error_still_falls_back(): """Existing ``response.completed`` fallback must not regress.""" agent = _make_codex_agent() mock_client = MagicMock() mock_client.responses.stream.side_effect = RuntimeError( "Didn't receive a `response.completed` event." ) fallback_response = SimpleNamespace(output=[], status="completed") with patch.object( agent, "_run_codex_create_stream_fallback", return_value=fallback_response ) as mock_fallback: result = agent._run_codex_stream({}, client=mock_client) assert result is fallback_response mock_fallback.assert_called_once() # --------------------------------------------------------------------------- # Fix B: surface xAI's entitlement body verbatim (no editorializing) # # The original PR #26644 appended a hint that led with "X Premium+ does NOT # include xAI API access — only standalone SuperGrok subscribers can use this # provider." xAI announced on 2026-05-16 that X Premium subs now work in # Hermes (https://x.ai/news/grok-hermes), making that hint actively wrong: # a Premium+ user hitting a real entitlement issue (no Grok sub, wrong tier, # exhausted quota) would be misdirected to switch subscriptions when their # Premium sub is in fact valid. We now surface xAI's own body text verbatim # (which already says "Manage subscriptions at https://grok.com/?_s=usage") # and leave the diagnosis to xAI's wording. # --------------------------------------------------------------------------- def test_summarize_api_error_surfaces_xai_entitlement_body_verbatim(): """xAI's OAuth 403 body must surface as-is, with no Hermes-side hint.""" from run_agent import AIAgent error = RuntimeError( "HTTP 403: Error code: 403 - {'code': 'The caller does not have permission " "to execute the specified operation', 'error': 'You have either run out of " "available resources or do not have an active Grok subscription. Manage " "subscriptions at https://grok.com'}" ) summary = AIAgent._summarize_api_error(error) # xAI's own body text must reach the user — they need it to diagnose. assert "do not have an active Grok subscription" in summary # No stale claim that X Premium is incompatible with Hermes. assert "X Premium+ does NOT include" not in summary assert "standalone SuperGrok subscribers" not in summary def test_summarize_api_error_xai_body_message_unwrapped(): """SDK-style error with structured body surfaces the message cleanly.""" from run_agent import AIAgent class _XaiErr(Exception): status_code = 403 body = { "error": { "message": ( "You have either run out of available resources or do " "not have an active Grok subscription. Manage at " "https://grok.com" ) } } summary = AIAgent._summarize_api_error(_XaiErr("403")) assert "HTTP 403" in summary assert "do not have an active Grok subscription" in summary # No editorializing on top of xAI's own wording. assert "X Premium+ does NOT include" not in summary def test_summarize_api_error_passes_through_unrelated_errors(): """Non-xAI / non-entitlement errors must not be touched.""" from run_agent import AIAgent error = RuntimeError("HTTP 500: upstream is sad") summary = AIAgent._summarize_api_error(error) assert "SuperGrok" not in summary assert "grok.com" not in summary assert "upstream is sad" in summary # --------------------------------------------------------------------------- # Fix D: _StreamErrorEvent xAI entitlement classified as auth, not retryable # # run_codex_create_stream_fallback raises _StreamErrorEvent (status_code=None) # when the Responses stream emits a ``type=error`` SSE frame. Before this # fix, classify_api_error had no match for "grok subscription" in its pattern # lists, so it returned FailoverReason.unknown (retryable=True) — burning # max_retries before the agent stopped. _is_entitlement_failure was never # called because it only runs when FailoverReason.auth is returned. # --------------------------------------------------------------------------- def test_classify_api_error_stream_event_grok_subscription_is_auth(): """_StreamErrorEvent with xAI subscription message classifies as auth/non-retryable. The SSE error path has status_code=None, so _classify_by_status is skipped. The explicit pattern added at step 1 must fire first and return auth/non-retryable so _is_entitlement_failure can stop the loop. """ from run_agent import _StreamErrorEvent from agent.error_classifier import classify_api_error, FailoverReason err = _StreamErrorEvent( "You have either run out of available resources or do not have an " "active Grok subscription. Manage subscriptions at https://grok.com", code="The caller does not have permission to execute the specified operation", ) result = classify_api_error(err, provider="xai-oauth", model="grok-4.3") assert result.reason == FailoverReason.auth assert result.retryable is False assert result.should_fallback is True def test_classify_api_error_stream_event_resources_exhausted_grok_is_auth(): """'out of available resources' + 'grok' variant also classifies as auth.""" from run_agent import _StreamErrorEvent from agent.error_classifier import classify_api_error, FailoverReason err = _StreamErrorEvent( "You have run out of available resources for Grok.", ) result = classify_api_error(err, provider="xai-oauth", model="grok-4.3") assert result.reason == FailoverReason.auth assert result.retryable is False def test_classify_api_error_stream_event_unrelated_not_reclassified(): """An unrelated _StreamErrorEvent must not be caught by the xAI guard.""" from run_agent import _StreamErrorEvent from agent.error_classifier import classify_api_error, FailoverReason err = _StreamErrorEvent("Internal server error — try again later") result = classify_api_error(err, provider="xai-oauth", model="grok-4.3") assert result.reason != FailoverReason.auth # --------------------------------------------------------------------------- # Fix C: reasoning replay gating for xai-oauth # --------------------------------------------------------------------------- def _assistant_msg_with_encrypted_reasoning(text="hi from grok", encrypted="enc_blob"): return { "role": "assistant", "content": text, "codex_reasoning_items": [ { "type": "reasoning", "id": "rs_xai_001", "encrypted_content": encrypted, "summary": [], } ], } def test_codex_reasoning_replay_default_includes_encrypted_content(): """Native Codex backend (default) must still replay encrypted reasoning.""" from agent.codex_responses_adapter import _chat_messages_to_responses_input msgs = [ {"role": "user", "content": "hi"}, _assistant_msg_with_encrypted_reasoning(), {"role": "user", "content": "what's your name?"}, ] items = _chat_messages_to_responses_input(msgs) reasoning = [it for it in items if it.get("type") == "reasoning"] assert len(reasoning) == 1 assert reasoning[0]["encrypted_content"] == "enc_blob" def test_codex_reasoning_replay_includes_encrypted_content_for_xai(): """xAI must receive replayed encrypted reasoning items (May 2026 reversal). Earlier we stripped these on the theory that the OAuth/SuperGrok surface rejected them. xAI subsequently confirmed they explicitly want Hermes to thread encrypted reasoning back across turns for cross-turn coherence — that's the whole point of the partnership integration. """ from agent.codex_responses_adapter import _chat_messages_to_responses_input msgs = [ {"role": "user", "content": "hi"}, _assistant_msg_with_encrypted_reasoning(), {"role": "user", "content": "what's your name?"}, ] items = _chat_messages_to_responses_input(msgs, is_xai_responses=True) reasoning = [it for it in items if it.get("type") == "reasoning"] assert len(reasoning) == 1, ( "xAI must receive replayed reasoning items — see docstring for the " "May 2026 reversal of the earlier suppression gate." ) assert reasoning[0]["encrypted_content"] == "enc_blob" # And the assistant's visible text must still be present alongside it. assistant_items = [ it for it in items if it.get("role") == "assistant" or it.get("type") == "message" ] assert assistant_items, "assistant message must still be present" def test_codex_transport_xai_request_includes_encrypted_content(): """xAI ``include`` array must request ``reasoning.encrypted_content``. This is the request-side half of the May 2026 reversal: we ask xAI to echo back encrypted reasoning so the next turn can replay it. """ from agent.transports.codex import ResponsesApiTransport transport = ResponsesApiTransport() kwargs = transport.build_kwargs( model="grok-4.3", messages=[ {"role": "system", "content": "you are a helpful assistant"}, {"role": "user", "content": "hi"}, ], tools=None, instructions="you are a helpful assistant", reasoning_config={"enabled": True, "effort": "medium"}, is_xai_responses=True, ) assert kwargs["include"] == ["reasoning.encrypted_content"] def test_codex_transport_xai_replays_reasoning_in_input(): """End-to-end: build_kwargs on xAI must replay prior encrypted reasoning.""" from agent.transports.codex import ResponsesApiTransport transport = ResponsesApiTransport() kwargs = transport.build_kwargs( model="grok-4.3", messages=[ {"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}, _assistant_msg_with_encrypted_reasoning(text="hi from grok"), {"role": "user", "content": "what's your name?"}, ], tools=None, instructions="sys", reasoning_config={"enabled": True, "effort": "medium"}, is_xai_responses=True, ) input_items = kwargs["input"] reasoning_items = [it for it in input_items if it.get("type") == "reasoning"] assert len(reasoning_items) == 1 assert reasoning_items[0]["encrypted_content"] == "enc_blob" def test_codex_transport_native_codex_still_replays_reasoning_in_input(): """Regression guard: openai-codex must keep the existing replay path.""" from agent.transports.codex import ResponsesApiTransport transport = ResponsesApiTransport() kwargs = transport.build_kwargs( model="gpt-5-codex", messages=[ {"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}, _assistant_msg_with_encrypted_reasoning(text="hi from codex"), {"role": "user", "content": "next"}, ], tools=None, instructions="sys", reasoning_config={"enabled": True, "effort": "medium"}, is_xai_responses=False, ) input_items = kwargs["input"] reasoning_items = [it for it in input_items if it.get("type") == "reasoning"] assert len(reasoning_items) == 1 assert reasoning_items[0]["encrypted_content"] == "enc_blob" # Native Codex still asks for encrypted_content back. assert "reasoning.encrypted_content" in kwargs.get("include", []) # --------------------------------------------------------------------------- # Fix D: entitlement 403 must NOT trigger credential-pool refresh loop # --------------------------------------------------------------------------- @pytest.mark.parametrize( "message", [ # The exact wire text RaidenTyler and Don Piedro captured. "You have either run out of available resources or do not have an " "active Grok subscription. Manage at https://grok.com", # Permission-style variant from the same 403 body. "The caller does not have permission to execute the specified " "operation for grok-4.3", ], ) def test_is_entitlement_failure_matches_real_xai_bodies(message): from run_agent import AIAgent assert AIAgent._is_entitlement_failure( {"message": message, "reason": "permission_denied"}, 403, ) def test_is_entitlement_failure_false_for_status_other_than_401_403(): """200/429/500 must never be classified as entitlement, even if body matches.""" from run_agent import AIAgent body = { "message": "do not have an active Grok subscription", } assert not AIAgent._is_entitlement_failure(body, 500) assert not AIAgent._is_entitlement_failure(body, 429) assert not AIAgent._is_entitlement_failure(body, 200) def test_is_entitlement_failure_false_for_unrelated_auth_errors(): """A real auth failure (expired token, wrong key) must keep refreshing.""" from run_agent import AIAgent # Generic Anthropic-style auth failure assert not AIAgent._is_entitlement_failure( {"message": "Invalid API key", "reason": "authentication_error"}, 401, ) # OAuth token expired assert not AIAgent._is_entitlement_failure( {"message": "Token has expired", "reason": "unauthorized"}, 401, ) # Empty context assert not AIAgent._is_entitlement_failure({}, 401) assert not AIAgent._is_entitlement_failure(None, 401) def test_recover_with_credential_pool_skips_refresh_on_entitlement_403(): """The recovery path must NOT call pool.try_refresh_current() on entitlement 403. Before the fix, an unsubscribed xAI OAuth account would burn the agent loop indefinitely: refresh → 403 → refresh → 403, infinitely. With the entitlement guard, recovery returns False so the error surfaces normally with the friendly hint from _summarize_api_error. """ from run_agent import AIAgent from agent.error_classifier import FailoverReason agent = _make_codex_agent() # Wire a fake credential pool that records refresh attempts. refresh_calls = {"n": 0} class _FakePool: def try_refresh_current(self): refresh_calls["n"] += 1 return MagicMock(id="should_not_be_called") def mark_exhausted_and_rotate(self, **_kwargs): return None def has_available(self): return False agent._credential_pool = _FakePool() error_context = { "reason": "The caller does not have permission to execute the specified operation", "message": "You have either run out of available resources or do not have an " "active Grok subscription. Manage at https://grok.com", } recovered, _retried_429 = agent._recover_with_credential_pool( status_code=403, has_retried_429=False, classified_reason=FailoverReason.auth, error_context=error_context, ) assert recovered is False, "Entitlement 403 must surface, not silently recover" assert refresh_calls["n"] == 0, "try_refresh_current must NOT be called on entitlement 403" def test_recover_with_credential_pool_skips_refresh_on_bare_403_for_xai_oauth(): """A bare HTTP 403 from ``xai-oauth`` (no keyword match) must NOT loop refresh. Regression for #26847 — xAI's backend has been seen to 403 standard SuperGrok subscribers with a terser body that doesn't contain any of the existing entitlement keywords ("do not have an active Grok subscription", etc.). Before the defense-in-depth guard, the recovery path would happily mint a fresh token, get a fresh 403, and spin. """ from run_agent import AIAgent from agent.error_classifier import FailoverReason agent = _make_codex_agent() assert agent.provider == "xai-oauth" refresh_calls = {"n": 0} class _FakePool: def try_refresh_current(self): refresh_calls["n"] += 1 return MagicMock(id="should_not_be_called") def mark_exhausted_and_rotate(self, **_kwargs): return None def has_available(self): return False agent._credential_pool = _FakePool() error_context = { "reason": "forbidden", "message": "Forbidden", } assert not AIAgent._is_entitlement_failure(error_context, 403), ( "Pre-condition: bare 'Forbidden' body must NOT match the keyword " "heuristic — otherwise this test isn't covering the defense-in-depth path." ) recovered, _retried_429 = agent._recover_with_credential_pool( status_code=403, has_retried_429=False, classified_reason=FailoverReason.auth, error_context=error_context, ) assert recovered is False, "Bare 403 on xai-oauth must surface, not refresh-loop" assert refresh_calls["n"] == 0, "try_refresh_current must NOT be called on xai-oauth 403" def test_recover_with_credential_pool_still_refreshes_genuine_auth_failure(): """Regression guard: legitimate auth errors must still trigger refresh.""" from run_agent import AIAgent from agent.error_classifier import FailoverReason agent = _make_codex_agent() refresh_calls = {"n": 0} class _FakePool: def try_refresh_current(self): refresh_calls["n"] += 1 # Return a fake refreshed entry — semantically "refresh worked" entry = MagicMock() entry.id = "entry_refreshed" return entry def mark_exhausted_and_rotate(self, **_kwargs): return None def has_available(self): return False agent._credential_pool = _FakePool() # _swap_credential is called by the recovery path — stub it out agent._swap_credential = MagicMock() error_context = { "reason": "authentication_error", "message": "Invalid API key", } recovered, _retried_429 = agent._recover_with_credential_pool( status_code=401, has_retried_429=False, classified_reason=FailoverReason.auth, error_context=error_context, ) assert recovered is True, "Genuine auth failure must still recover via refresh" assert refresh_calls["n"] == 1 # --------------------------------------------------------------------------- # Fix D-bis: bad-credentials 403 must NOT be classified as entitlement (#29344) # # xAI returns the same permission-denied ``code`` text for two distinct # conditions: unsubscribed account vs. stale OAuth access token. The # ``error`` field's ``[WKE=unauthenticated:...]`` suffix (and the # accompanying "OAuth2 access token could not be validated" phrasing) is # xAI's authoritative disambiguator — when present, the body is an auth # failure, not entitlement, and the credential-pool refresh path must # run. Pre-fix, long-running TUI sessions stuck on a stale token # surfaced as a non-retryable client error; the workaround was to exit # and reopen the TUI so the startup-resolve path refreshed. # --------------------------------------------------------------------------- def test_is_entitlement_failure_false_for_bad_credentials_wke_suffix(): """403 with ``[WKE=unauthenticated:bad-credentials]`` is auth, not entitlement. Verbatim shape from the #29344 reporter — the ``code`` text matches the entitlement permission-denied heuristic, but the ``error`` field carries xAI's explicit "this is a credential validation failure" signal. Classifier must honor it. """ from run_agent import AIAgent assert not AIAgent._is_entitlement_failure( { "code": "The caller does not have permission to execute the specified operation", "error": "The OAuth2 access token could not be validated. [WKE=unauthenticated:bad-credentials]", }, 403, ) def test_is_entitlement_failure_false_for_wke_suffix_in_normalized_shape(): """The same body after ``_extract_api_error_context`` normalisation. Real runtime paths feed the classifier through ``_extract_api_error_context``, which converts the raw body to ``{message, reason, reset_at}``. The disambiguator must fire in BOTH the raw-body shape (test above) and the normalised shape so the fix actually reaches the production call site at ``_recover_with_credential_pool``. """ from run_agent import AIAgent assert not AIAgent._is_entitlement_failure( { "reason": "The caller does not have permission to execute the specified operation", "message": "The OAuth2 access token could not be validated. [WKE=unauthenticated:bad-credentials]", }, 403, ) @pytest.mark.parametrize("wke_variant", [ # The headline variant — what xAI returns today. "[WKE=unauthenticated:bad-credentials]", # Forward-compat: xAI documents the WKE prefix as a stable shape, # the suffix after the colon is the "reason code" and could grow # new values. Anything under ``unauthenticated:`` must route to # the refresh path. "[WKE=unauthenticated:expired-token]", "[WKE=unauthenticated:revoked]", "[WKE=unauthenticated:some-future-reason]", ]) def test_is_entitlement_failure_false_for_any_wke_unauthenticated_variant(wke_variant): from run_agent import AIAgent assert not AIAgent._is_entitlement_failure( { "code": "The caller does not have permission to execute the specified operation", "error": f"Token rejected. {wke_variant}", }, 403, ) def test_is_entitlement_failure_false_via_oauth2_validation_phrase_alone(): """Second disambiguator: the "OAuth2 access token could not be validated" phrase by itself (no WKE suffix) must also route to refresh. This is a belt-and-braces guard against xAI dropping or reformatting the WKE suffix in a future API revision without changing the human-readable error text.""" from run_agent import AIAgent assert not AIAgent._is_entitlement_failure( { "code": "The caller does not have permission to execute the specified operation", "error": "The OAuth2 access token could not be validated.", }, 403, ) def test_is_entitlement_failure_wke_signal_overrides_entitlement_keywords(): """Defensive: if a future xAI body somehow carries BOTH the WKE suffix AND entitlement language, the WKE signal wins. Auth is recoverable; entitlement isn't. If the refreshed token still can't access the resource, the next 403 (without WKE) lands on the entitlement path correctly.""" from run_agent import AIAgent assert not AIAgent._is_entitlement_failure( { "code": "The caller does not have permission to execute the specified operation", "error": ( "do not have an active Grok subscription. " "[WKE=unauthenticated:bad-credentials]" ), }, 403, ) def test_is_entitlement_failure_case_insensitive_wke_match(): """Substring match is case-insensitive — the classifier lowercases everything before matching, so a future xAI build that uppercases the prefix wouldn't reintroduce the misclassification.""" from run_agent import AIAgent assert not AIAgent._is_entitlement_failure( { "code": "The caller does not have permission to execute the specified operation", "error": "[wke=Unauthenticated:Bad-Credentials]", }, 403, ) def test_recover_with_credential_pool_refreshes_on_xai_bad_credentials_403(): """End-to-end #29344: a bad-credentials 403 from xai-oauth MUST call ``try_refresh_current()`` so the long-running TUI session recovers without an exit/reopen cycle. Mirrors the scaffolding of ``test_recover_with_credential_pool_still_refreshes_genuine_auth_failure`` but with the exact 403 body shape xAI ships for stale tokens — the very body that pre-fix tripped the entitlement classifier and short-circuited the refresh path. """ from run_agent import AIAgent from agent.error_classifier import FailoverReason agent = _make_codex_agent() refresh_calls = {"n": 0} class _FakePool: def try_refresh_current(self): refresh_calls["n"] += 1 entry = MagicMock() entry.id = "entry_refreshed_after_stale" return entry def mark_exhausted_and_rotate(self, **_kwargs): return None def has_available(self): return False agent._credential_pool = _FakePool() agent._swap_credential = MagicMock() # Normalised shape that ``_extract_api_error_context`` would # produce for the reporter's wire-level body. error_context = { "reason": ( "The caller does not have permission to execute the specified operation" ), "message": ( "The OAuth2 access token could not be validated. " "[WKE=unauthenticated:bad-credentials]" ), } recovered, _retried_429 = agent._recover_with_credential_pool( status_code=403, has_retried_429=False, classified_reason=FailoverReason.auth, error_context=error_context, ) assert recovered is True, ( "Stale OAuth token (bad-credentials 403) must trigger refresh — " "pre-fix this returned False because the entitlement classifier " "over-matched on the permission-denied code text" ) assert refresh_calls["n"] == 1, "try_refresh_current must run exactly once" agent._swap_credential.assert_called_once() def test_recover_with_credential_pool_still_blocks_real_entitlement(): """Companion regression guard for the #29344 fix: the original #26847 protection — entitlement 403 must NOT refresh — must survive the new disambiguator. A real unsubscribed-account body has no WKE suffix and no OAuth2-validation phrase, so the classifier still classifies it as entitlement and short-circuits.""" from run_agent import AIAgent from agent.error_classifier import FailoverReason agent = _make_codex_agent() refresh_calls = {"n": 0} class _FakePool: def try_refresh_current(self): refresh_calls["n"] += 1 return MagicMock(id="should_not_be_called") def mark_exhausted_and_rotate(self, **_kwargs): return None def has_available(self): return False agent._credential_pool = _FakePool() # Pure entitlement body — no WKE suffix, no OAuth2 phrase. error_context = { "reason": ( "The caller does not have permission to execute the specified operation" ), "message": ( "You have either run out of available resources or do not have an " "active Grok subscription. Manage at https://grok.com" ), } recovered, _retried_429 = agent._recover_with_credential_pool( status_code=403, has_retried_429=False, classified_reason=FailoverReason.auth, error_context=error_context, ) assert recovered is False, "Entitlement 403 must surface, not refresh" assert refresh_calls["n"] == 0 # --------------------------------------------------------------------------- # Fix E: grok-4.3 context length must be 1M, not 256K # --------------------------------------------------------------------------- def test_grok_4_3_context_length_is_1m(): """grok-4.3 ships with 1M context per docs.x.ai/developers/models/grok-4.3. Hermes' substring-match fallback used to return 256k (from the "grok-4" catch-all) which under-reported the model's real capacity. """ from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS # The entry exists with the expected value. assert DEFAULT_CONTEXT_LENGTHS["grok-4.3"] == 1_000_000 # And longest-first substring matching resolves grok-4.3 and # grok-4.3-latest to the new value, NOT the grok-4 catch-all. for slug in ("grok-4.3", "grok-4.3-latest"): matched_key = max( (k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()), key=len, ) assert matched_key == "grok-4.3", ( f"Expected longest-first match to land on grok-4.3 for {slug}, " f"got {matched_key}" ) assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 1_000_000 def test_grok_4_still_resolves_to_256k(): """Regression guard: grok-4 (non-.3) must still resolve to 256k.""" from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS for slug in ("grok-4", "grok-4-0709"): matched_key = max( (k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()), key=len, ) # grok-4-0709 contains "grok-4" but not "grok-4.3"; matched key # must be "grok-4" (or a more specific variant family if one is # ever added). The 256k contract must hold. assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 256_000