diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py index 00345f054e8..6fe9dc5bc64 100644 --- a/agent/codex_responses_adapter.py +++ b/agent/codex_responses_adapter.py @@ -244,8 +244,21 @@ def _normalize_responses_message_status(value: Any, *, default: str = "completed return default -def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Convert internal chat-style messages to Responses input items.""" +def _chat_messages_to_responses_input( + messages: List[Dict[str, Any]], + *, + is_xai_responses: bool = False, +) -> List[Dict[str, Any]]: + """Convert internal chat-style messages to Responses input items. + + ``is_xai_responses=True`` strips ``encrypted_content`` from replayed + reasoning items. xAI's OAuth/SuperGrok ``/v1/responses`` surface + rejects encrypted reasoning blobs minted by prior turns: the request + streams an ``error`` SSE frame before ``response.created`` and the + OpenAI SDK collapses it into a generic stream-ordering error. Native + Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content + — keep the default off. + """ items: List[Dict[str, Any]] = [] seen_item_ids: set = set() @@ -271,9 +284,17 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di if role == "assistant": # Replay encrypted reasoning items from previous turns # so the API can maintain coherent reasoning chains. + # + # xAI OAuth (SuperGrok/Premium) rejects replayed + # ``encrypted_content`` reasoning items minted by prior + # turns — see _chat_messages_to_responses_input docstring. + # When ``is_xai_responses`` is set we drop the replay + # entirely; Grok still reasons on each turn server-side, + # we just don't try to thread the prior turn's encrypted + # blob back in. codex_reasoning = msg.get("codex_reasoning_items") has_codex_reasoning = False - if isinstance(codex_reasoning, list): + if isinstance(codex_reasoning, list) and not is_xai_responses: for ri in codex_reasoning: if isinstance(ri, dict) and ri.get("encrypted_content"): item_id = ri.get("id") diff --git a/agent/transports/codex.py b/agent/transports/codex.py index cfd9f128778..3661ea17a3e 100644 --- a/agent/transports/codex.py +++ b/agent/transports/codex.py @@ -24,7 +24,10 @@ class ResponsesApiTransport(ProviderTransport): def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: """Convert OpenAI chat messages to Responses API input items.""" from agent.codex_responses_adapter import _chat_messages_to_responses_input - return _chat_messages_to_responses_input(messages) + return _chat_messages_to_responses_input( + messages, + is_xai_responses=bool(kwargs.get("is_xai_responses")), + ) def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: """Convert OpenAI tool schemas to Responses API function definitions.""" @@ -93,7 +96,10 @@ class ResponsesApiTransport(ProviderTransport): kwargs = { "model": model, "instructions": instructions, - "input": _chat_messages_to_responses_input(payload_messages), + "input": _chat_messages_to_responses_input( + payload_messages, + is_xai_responses=is_xai_responses, + ), "tools": response_tools, "store": False, } @@ -110,7 +116,14 @@ class ResponsesApiTransport(ProviderTransport): if reasoning_enabled and is_xai_responses: from agent.model_metadata import grok_supports_reasoning_effort - kwargs["include"] = ["reasoning.encrypted_content"] + # NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content`` + # any more. xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects + # replayed encrypted reasoning items on turn 2+ — see + # _chat_messages_to_responses_input docstring. Requesting the field + # back would just have us cache something we then must strip. Grok + # still reasons natively each turn; coherence across turns rides on + # the visible message text alone. + kwargs["include"] = [] # xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3 # / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though # those models reason natively. Only send the effort dial when diff --git a/run_agent.py b/run_agent.py index 7e42beb3eba..2b20d48ede2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4966,6 +4966,45 @@ class AIAgent: trajectory = self._convert_to_trajectory_format(messages, user_query, completed) _save_trajectory_to_file(trajectory, self.model, completed) + @staticmethod + def _decorate_xai_entitlement_error(detail: str) -> str: + """Append a friendly hint when xAI's OAuth surface returns an + entitlement-shaped error. + + xAI's ``/v1/responses`` endpoint replies to OAuth tokens that lack a + SuperGrok / X Premium subscription with HTTP 403 carrying a body like:: + + {"code": "The caller does not have permission to execute the + specified operation", "error": "You have either run out of + available resources or do not have an active Grok subscription. + Manage subscriptions at https://grok.com/..."} + + The raw text is useful but the action the user needs to take (subscribe + on grok.com, or switch providers with ``/model``) isn't obvious from + the wire format. Detect the entitlement shape and append a hint. + + Matched once per detail string — won't double-decorate if the upstream + already concatenated the same text. + """ + if not detail: + return detail + lower = detail.lower() + is_entitlement = ( + "do not have an active grok subscription" in lower + or ("out of available resources" in lower and "grok" in lower) + or ("does not have permission" in lower and "grok" in lower) + ) + if not is_entitlement: + return detail + hint = ( + " — xAI OAuth account lacks SuperGrok / X Premium entitlement for " + "this model. Subscribe at https://grok.com or run `/model` to " + "switch providers." + ) + if hint.strip() in detail: + return detail + return f"{detail}{hint}" + @staticmethod def _summarize_api_error(error: Exception) -> str: """Extract a human-readable one-liner from an API error. @@ -4999,12 +5038,12 @@ class AIAgent: if msg: status_code = getattr(error, "status_code", None) prefix = f"HTTP {status_code}: " if status_code else "" - return f"{prefix}{msg[:300]}" + return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}") # Fallback: truncate the raw string but give more room than 200 chars status_code = getattr(error, "status_code", None) prefix = f"HTTP {status_code}: " if status_code else "" - return f"{prefix}{raw[:500]}" + return AIAgent._decorate_xai_entitlement_error(f"{prefix}{raw[:500]}") def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]: if not key: @@ -7056,18 +7095,48 @@ class AIAgent: except RuntimeError as exc: err_text = str(exc) missing_completed = "response.completed" in err_text - if missing_completed and attempt < max_stream_retries: + # The OpenAI SDK's Responses streaming state machine raises + # ``RuntimeError("Expected to have received `response.created` + # before ``")`` when the first SSE event from the + # server is anything other than ``response.created`` — and it + # discards the event's payload before we can read it. Three + # real-world backends emit a different first frame: + # + # * xAI on grok-4.x OAuth — sends ``error`` (issues + # reported around the May 2026 SuperGrok rollout when + # multi-turn conversations replay encrypted reasoning + # content the OAuth tier rejects) + # * codex-lb relays — send ``codex.rate_limits`` (#14634) + # * custom Responses relays — send ``response.in_progress`` + # (#8133) + # + # In all three cases the underlying byte stream is still + # readable: a non-stream ``responses.create(stream=True)`` + # fallback succeeds and surfaces the real provider error as + # a normal exception with body+status_code attached, which + # ``_summarize_api_error`` can then translate into a useful + # user-facing line. Treat ``response.created`` prelude + # errors the same way we already treat ``response.completed`` + # postlude errors. + prelude_error = ( + "Expected to have received `response.created`" in err_text + or "Expected to have received \"response.created\"" in err_text + ) + if (missing_completed or prelude_error) and attempt < max_stream_retries: logger.debug( - "Responses stream closed before completion (attempt %s/%s); retrying. %s", + "Responses stream %s (attempt %s/%s); retrying. %s", + "prelude rejected" if prelude_error else "closed before completion", attempt + 1, max_stream_retries + 1, self._client_log_context(), ) continue - if missing_completed: + if missing_completed or prelude_error: logger.debug( - "Responses stream did not emit response.completed; falling back to create(stream=True). %s", + "Responses stream %s; falling back to create(stream=True). %s err=%s", + "rejected before response.created" if prelude_error else "did not emit response.completed", self._client_log_context(), + err_text, ) return self._run_codex_create_stream_fallback(api_kwargs, client=active_client) raise diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py index ad70167b09f..82251823790 100644 --- a/tests/agent/transports/test_codex_transport.py +++ b/tests/agent/transports/test_codex_transport.py @@ -194,9 +194,16 @@ class TestCodexBuildKwargs: is_xai_responses=True, reasoning_config={"effort": "high"}, ) - # xAI Responses must receive both encrypted reasoning content and the effort + # xAI Responses receives reasoning.effort on the allowlisted models. assert kw.get("reasoning") == {"effort": "high"} - assert "reasoning.encrypted_content" in kw.get("include", []) + # As of May 2026 we deliberately do NOT request + # reasoning.encrypted_content back from xAI — the OAuth/SuperGrok + # surface rejects replayed encrypted reasoning items on turn 2+ + # (the multi-turn "Expected to have received response.created + # before error" failure). Grok still reasons natively each turn; + # we just don't try to thread the prior turn's encrypted blob back + # in. See tests/run_agent/test_codex_xai_oauth_recovery.py. + assert "reasoning.encrypted_content" not in kw.get("include", []) def test_xai_reasoning_disabled_no_reasoning_key(self, transport): messages = [{"role": "user", "content": "Hi"}] @@ -222,8 +229,9 @@ class TestCodexBuildKwargs: # api.x.ai 400s with "Model X does not support parameter reasoningEffort" # on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*. # Those models reason natively but don't expose the dial. The transport - # must omit the `reasoning` key for them while keeping the encrypted - # reasoning content include so we can capture native reasoning tokens. + # must omit the `reasoning` key for them. As of May 2026 we also no + # longer request ``reasoning.encrypted_content`` back from xAI on ANY + # model — see test_xai_reasoning_effort_passed for the rationale. def test_xai_grok_4_omits_reasoning_effort(self, transport): """grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400.""" @@ -237,8 +245,9 @@ class TestCodexBuildKwargs: assert "reasoning" not in kw, ( f"{model} must not receive a reasoning key (xAI rejects it)" ) - # Still capture native reasoning tokens - assert "reasoning.encrypted_content" in kw.get("include", []) + # We no longer ask xAI for encrypted_content back (see comment + # above) — verify the include list is empty. + assert "reasoning.encrypted_content" not in kw.get("include", []) def test_xai_grok_4_fast_omits_reasoning_effort(self, transport): """grok-4-fast and grok-4-1-fast variants reject reasoning.effort.""" diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py new file mode 100644 index 00000000000..0f3603d2ca7 --- /dev/null +++ b/tests/run_agent/test_codex_xai_oauth_recovery.py @@ -0,0 +1,351 @@ +"""Regression tests for the May 2026 xAI OAuth (SuperGrok / X Premium) bugs. + +Three distinct failure modes the user community hit during rollout: + +1. ``RuntimeError("Expected to have received `response.created` before + `error`")`` on multi-turn xAI OAuth conversations. The OpenAI SDK's + Responses streaming state machine collapses an upstream ``error`` SSE + frame into a generic stream-ordering error. ``_run_codex_stream`` + now treats this the same way it already treats the missing + ``response.completed`` postlude — fall back to a non-stream + ``responses.create(stream=True)`` which surfaces the real provider + error. Also closes #8133 (``response.in_progress`` prelude on custom + relays) and #14634 (``codex.rate_limits`` prelude on codex-lb). + +2. The HTTP 403 entitlement error xAI returns when an OAuth token lacks + SuperGrok / X Premium ("You have either run out of available + resources or do not have an active Grok subscription") used to read + as a confusing wall of JSON. ``_summarize_api_error`` now appends a + one-line hint pointing the user at https://grok.com and ``/model``. + +3. Multi-turn replay of ``codex_reasoning_items`` (with + ``encrypted_content``) is now suppressed for ``is_xai_responses=True`` + in ``_chat_messages_to_responses_input``. xAI's OAuth/SuperGrok + surface rejects replayed encrypted reasoning items; Grok still + reasons natively each turn, so coherence rides on visible message + text. +""" + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Fix A: prelude error fallback +# --------------------------------------------------------------------------- + + +def _make_codex_agent(): + """Build a minimal AIAgent wired for codex_responses streaming tests.""" + from run_agent import AIAgent + + agent = AIAgent( + api_key="test-key", + base_url="https://api.x.ai/v1", + model="grok-4.3", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + agent.api_mode = "codex_responses" + agent.provider = "xai-oauth" + agent._interrupt_requested = False + return agent + + +@pytest.mark.parametrize( + "prelude_event_type", + [ + "error", # xAI OAuth multi-turn + "codex.rate_limits", # codex-lb relays (#14634) + "response.in_progress", # custom Responses relays (#8133) + ], +) +def test_codex_stream_prelude_error_falls_back_to_create_stream(prelude_event_type): + """The SDK's prelude RuntimeError must trigger the non-stream fallback. + + When the first SSE event isn't ``response.created``, openai-python + raises RuntimeError before our event loop sees anything. We must + detect that, retry once, then fall back to ``create(stream=True)`` + which surfaces the real provider error or a real response. + """ + agent = _make_codex_agent() + + prelude_error = RuntimeError( + f"Expected to have received `response.created` before `{prelude_event_type}`" + ) + + mock_client = MagicMock() + mock_client.responses.stream.side_effect = prelude_error + + fallback_response = SimpleNamespace( + output=[SimpleNamespace( + type="message", + content=[SimpleNamespace(type="output_text", text="fallback ok")], + )], + status="completed", + ) + + with patch.object( + agent, "_run_codex_create_stream_fallback", return_value=fallback_response + ) as mock_fallback: + result = agent._run_codex_stream({}, client=mock_client) + + assert result is fallback_response + mock_fallback.assert_called_once_with({}, client=mock_client) + + +def test_codex_stream_prelude_error_retries_once_before_fallback(): + """The retry path must fire one extra stream attempt before falling back.""" + agent = _make_codex_agent() + + call_count = {"n": 0} + + def stream_side_effect(**kwargs): + call_count["n"] += 1 + raise RuntimeError( + "Expected to have received `response.created` before `error`" + ) + + mock_client = MagicMock() + mock_client.responses.stream.side_effect = stream_side_effect + + fallback_response = SimpleNamespace(output=[], status="completed") + with patch.object( + agent, "_run_codex_create_stream_fallback", return_value=fallback_response + ) as mock_fallback: + agent._run_codex_stream({}, client=mock_client) + + # max_stream_retries=1 → one retry + final attempt → 2 stream calls, + # THEN the fallback path runs. + assert call_count["n"] == 2 + mock_fallback.assert_called_once() + + +def test_codex_stream_unrelated_runtimeerror_still_raises(): + """RuntimeErrors that aren't prelude/postlude shape must propagate.""" + agent = _make_codex_agent() + + mock_client = MagicMock() + mock_client.responses.stream.side_effect = RuntimeError("something else broke") + + with patch.object(agent, "_run_codex_create_stream_fallback") as mock_fallback: + with pytest.raises(RuntimeError, match="something else broke"): + agent._run_codex_stream({}, client=mock_client) + + mock_fallback.assert_not_called() + + +def test_codex_stream_postlude_error_still_falls_back(): + """Existing ``response.completed`` fallback must not regress.""" + agent = _make_codex_agent() + + mock_client = MagicMock() + mock_client.responses.stream.side_effect = RuntimeError( + "Didn't receive a `response.completed` event." + ) + + fallback_response = SimpleNamespace(output=[], status="completed") + with patch.object( + agent, "_run_codex_create_stream_fallback", return_value=fallback_response + ) as mock_fallback: + result = agent._run_codex_stream({}, client=mock_client) + + assert result is fallback_response + mock_fallback.assert_called_once() + + +# --------------------------------------------------------------------------- +# Fix B: friendly entitlement message +# --------------------------------------------------------------------------- + + +def test_summarize_api_error_decorates_xai_entitlement_403(): + """xAI's OAuth 403 must end with the subscribe-or-switch hint.""" + from run_agent import AIAgent + + error = RuntimeError( + "HTTP 403: Error code: 403 - {'code': 'The caller does not have permission " + "to execute the specified operation', 'error': 'You have either run out of " + "available resources or do not have an active Grok subscription. Manage " + "subscriptions at https://grok.com'}" + ) + summary = AIAgent._summarize_api_error(error) + assert "do not have an active Grok subscription" in summary + assert "SuperGrok" in summary + assert "/model" in summary + assert "https://grok.com" in summary + + +def test_summarize_api_error_decorates_xai_body_message(): + """SDK-style error with structured body must also get the hint.""" + from run_agent import AIAgent + + class _XaiErr(Exception): + status_code = 403 + body = { + "error": { + "message": ( + "You have either run out of available resources or do " + "not have an active Grok subscription. Manage at " + "https://grok.com" + ) + } + } + + summary = AIAgent._summarize_api_error(_XaiErr("403")) + assert "HTTP 403" in summary + assert "SuperGrok / X Premium" in summary + + +def test_summarize_api_error_idempotent_for_entitlement_hint(): + """Decorating twice must not double up the hint.""" + from run_agent import AIAgent + + raw = "HTTP 403: do not have an active Grok subscription" + once = AIAgent._decorate_xai_entitlement_error(raw) + twice = AIAgent._decorate_xai_entitlement_error(once) + assert once == twice + + +def test_summarize_api_error_passes_through_unrelated_errors(): + """Non-xAI / non-entitlement errors must not be touched.""" + from run_agent import AIAgent + + error = RuntimeError("HTTP 500: upstream is sad") + summary = AIAgent._summarize_api_error(error) + assert "SuperGrok" not in summary + assert "grok.com" not in summary + assert "upstream is sad" in summary + + +# --------------------------------------------------------------------------- +# Fix C: reasoning replay gating for xai-oauth +# --------------------------------------------------------------------------- + + +def _assistant_msg_with_encrypted_reasoning(text="hi from grok", encrypted="enc_blob"): + return { + "role": "assistant", + "content": text, + "codex_reasoning_items": [ + { + "type": "reasoning", + "id": "rs_xai_001", + "encrypted_content": encrypted, + "summary": [], + } + ], + } + + +def test_codex_reasoning_replay_default_includes_encrypted_content(): + """Native Codex backend (default) must still replay encrypted reasoning.""" + from agent.codex_responses_adapter import _chat_messages_to_responses_input + + msgs = [ + {"role": "user", "content": "hi"}, + _assistant_msg_with_encrypted_reasoning(), + {"role": "user", "content": "what's your name?"}, + ] + + items = _chat_messages_to_responses_input(msgs) + reasoning = [it for it in items if it.get("type") == "reasoning"] + assert len(reasoning) == 1 + assert reasoning[0]["encrypted_content"] == "enc_blob" + + +def test_codex_reasoning_replay_stripped_for_xai_oauth(): + """xAI OAuth surface must NOT receive replayed encrypted reasoning.""" + from agent.codex_responses_adapter import _chat_messages_to_responses_input + + msgs = [ + {"role": "user", "content": "hi"}, + _assistant_msg_with_encrypted_reasoning(), + {"role": "user", "content": "what's your name?"}, + ] + + items = _chat_messages_to_responses_input(msgs, is_xai_responses=True) + reasoning = [it for it in items if it.get("type") == "reasoning"] + assert reasoning == [] + + # The assistant's visible text must still survive — coherence across + # turns rides on the message text alone. + assistant_items = [ + it for it in items + if it.get("role") == "assistant" or it.get("type") == "message" + ] + assert assistant_items, "assistant message must still be present" + + +def test_codex_transport_xai_request_omits_encrypted_content_include(): + """Verify the xAI ``include`` array no longer requests encrypted reasoning.""" + from agent.transports.codex import ResponsesApiTransport + + transport = ResponsesApiTransport() + kwargs = transport.build_kwargs( + model="grok-4.3", + messages=[ + {"role": "system", "content": "you are a helpful assistant"}, + {"role": "user", "content": "hi"}, + ], + tools=None, + instructions="you are a helpful assistant", + reasoning_config={"enabled": True, "effort": "medium"}, + is_xai_responses=True, + ) + # Without this gate, xAI would echo back encrypted_content blobs we'd + # then store in codex_reasoning_items and replay next turn — which is + # exactly the multi-turn failure mode we're closing. + assert kwargs["include"] == [] + + +def test_codex_transport_xai_strips_replayed_reasoning_in_input(): + """End-to-end: build_kwargs on xai-oauth must strip prior reasoning.""" + from agent.transports.codex import ResponsesApiTransport + + transport = ResponsesApiTransport() + kwargs = transport.build_kwargs( + model="grok-4.3", + messages=[ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hi"}, + _assistant_msg_with_encrypted_reasoning(text="hi from grok"), + {"role": "user", "content": "what's your name?"}, + ], + tools=None, + instructions="sys", + reasoning_config={"enabled": True, "effort": "medium"}, + is_xai_responses=True, + ) + input_items = kwargs["input"] + reasoning_items = [it for it in input_items if it.get("type") == "reasoning"] + assert reasoning_items == [] + + +def test_codex_transport_native_codex_still_replays_reasoning_in_input(): + """Regression guard: openai-codex must keep the existing replay path.""" + from agent.transports.codex import ResponsesApiTransport + + transport = ResponsesApiTransport() + kwargs = transport.build_kwargs( + model="gpt-5-codex", + messages=[ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hi"}, + _assistant_msg_with_encrypted_reasoning(text="hi from codex"), + {"role": "user", "content": "next"}, + ], + tools=None, + instructions="sys", + reasoning_config={"enabled": True, "effort": "medium"}, + is_xai_responses=False, + ) + input_items = kwargs["input"] + reasoning_items = [it for it in input_items if it.get("type") == "reasoning"] + assert len(reasoning_items) == 1 + assert reasoning_items[0]["encrypted_content"] == "enc_blob" + # Native Codex still asks for encrypted_content back. + assert "reasoning.encrypted_content" in kwargs.get("include", [])