fix(xai-oauth): recover from prelude SSE errors, gate reasoning replay, surface entitlement 403s (#26644)

Three fixes for the May 2026 xAI OAuth (SuperGrok / X Premium) rollout failures: - _run_codex_stream: when openai SDK raises RuntimeError("Expected to have received `response.created` before `<type>`"), retry once then fall back to responses.create(stream=True) — same path used for missing-response.completed postlude. Fallback surfaces the real provider error with body+status_code intact. Also fixes #8133 (response.in_progress prelude on custom relays) and #14634 (codex.rate_limits prelude on codex-lb). - _summarize_api_error: when error body matches xAI's entitlement shape, append a one-line hint pointing to https://grok.com and /model. Once-only, applies to both auxiliary warnings and main-loop error surfacing. - _chat_messages_to_responses_input: new is_xai_responses kwarg drops replayed codex_reasoning_items (encrypted_content) before they reach xAI. Also drops reasoning.encrypted_content from the xAI include array. Native Codex behavior unchanged. Grok still reasons natively each turn; coherence rides on visible message text alone. Closes #8133, #14634.
2026-05-18 04:41:56 +00:00 · 2026-05-15 16:35:12 -07:00 · 2026-05-15 16:35:12 -07:00 · 31ba2b0cbc
commit 31ba2b0cbc
parent 4aec25bc44
5 changed files with 481 additions and 18 deletions
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@ -244,8 +244,21 @@ def _normalize_responses_message_status(value: Any, *, default: str = "completed
    return default


-def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """Convert internal chat-style messages to Responses input items."""
+def _chat_messages_to_responses_input(
+    messages: List[Dict[str, Any]],
+    *,
+    is_xai_responses: bool = False,
+) -> List[Dict[str, Any]]:
+    """Convert internal chat-style messages to Responses input items.
+
+    ``is_xai_responses=True`` strips ``encrypted_content`` from replayed
+    reasoning items.  xAI's OAuth/SuperGrok ``/v1/responses`` surface
+    rejects encrypted reasoning blobs minted by prior turns: the request
+    streams an ``error`` SSE frame before ``response.created`` and the
+    OpenAI SDK collapses it into a generic stream-ordering error.  Native
+    Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
+    — keep the default off.
+    """
    items: List[Dict[str, Any]] = []
    seen_item_ids: set = set()

@ -271,9 +284,17 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
            if role == "assistant":
                # Replay encrypted reasoning items from previous turns
                # so the API can maintain coherent reasoning chains.
+                #
+                # xAI OAuth (SuperGrok/Premium) rejects replayed
+                # ``encrypted_content`` reasoning items minted by prior
+                # turns — see _chat_messages_to_responses_input docstring.
+                # When ``is_xai_responses`` is set we drop the replay
+                # entirely; Grok still reasons on each turn server-side,
+                # we just don't try to thread the prior turn's encrypted
+                # blob back in.
                codex_reasoning = msg.get("codex_reasoning_items")
                has_codex_reasoning = False
-                if isinstance(codex_reasoning, list):
+                if isinstance(codex_reasoning, list) and not is_xai_responses:
                    for ri in codex_reasoning:
                        if isinstance(ri, dict) and ri.get("encrypted_content"):
                            item_id = ri.get("id")
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@ -24,7 +24,10 @@ class ResponsesApiTransport(ProviderTransport):
    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
        """Convert OpenAI chat messages to Responses API input items."""
        from agent.codex_responses_adapter import _chat_messages_to_responses_input
-        return _chat_messages_to_responses_input(messages)
+        return _chat_messages_to_responses_input(
+            messages,
+            is_xai_responses=bool(kwargs.get("is_xai_responses")),
+        )

    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
        """Convert OpenAI tool schemas to Responses API function definitions."""
@ -93,7 +96,10 @@ class ResponsesApiTransport(ProviderTransport):
        kwargs = {
            "model": model,
            "instructions": instructions,
-            "input": _chat_messages_to_responses_input(payload_messages),
+            "input": _chat_messages_to_responses_input(
+                payload_messages,
+                is_xai_responses=is_xai_responses,
+            ),
            "tools": response_tools,
            "store": False,
        }
@ -110,7 +116,14 @@ class ResponsesApiTransport(ProviderTransport):
        if reasoning_enabled and is_xai_responses:
            from agent.model_metadata import grok_supports_reasoning_effort

-            kwargs["include"] = ["reasoning.encrypted_content"]
+            # NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
+            # any more.  xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
+            # replayed encrypted reasoning items on turn 2+ — see
+            # _chat_messages_to_responses_input docstring.  Requesting the field
+            # back would just have us cache something we then must strip.  Grok
+            # still reasons natively each turn; coherence across turns rides on
+            # the visible message text alone.
+            kwargs["include"] = []
            # xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
            # / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
            # those models reason natively. Only send the effort dial when
--- a/run_agent.py
+++ b/run_agent.py
@ -4966,6 +4966,45 @@ class AIAgent:
        trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
        _save_trajectory_to_file(trajectory, self.model, completed)

+    @staticmethod
+    def _decorate_xai_entitlement_error(detail: str) -> str:
+        """Append a friendly hint when xAI's OAuth surface returns an
+        entitlement-shaped error.
+
+        xAI's ``/v1/responses`` endpoint replies to OAuth tokens that lack a
+        SuperGrok / X Premium subscription with HTTP 403 carrying a body like::
+
+            {"code": "The caller does not have permission to execute the
+             specified operation", "error": "You have either run out of
+             available resources or do not have an active Grok subscription.
+             Manage subscriptions at https://grok.com/..."}
+
+        The raw text is useful but the action the user needs to take (subscribe
+        on grok.com, or switch providers with ``/model``) isn't obvious from
+        the wire format.  Detect the entitlement shape and append a hint.
+
+        Matched once per detail string — won't double-decorate if the upstream
+        already concatenated the same text.
+        """
+        if not detail:
+            return detail
+        lower = detail.lower()
+        is_entitlement = (
+            "do not have an active grok subscription" in lower
+            or ("out of available resources" in lower and "grok" in lower)
+            or ("does not have permission" in lower and "grok" in lower)
+        )
+        if not is_entitlement:
+            return detail
+        hint = (
+            " — xAI OAuth account lacks SuperGrok / X Premium entitlement for "
+            "this model. Subscribe at https://grok.com or run `/model` to "
+            "switch providers."
+        )
+        if hint.strip() in detail:
+            return detail
+        return f"{detail}{hint}"
+
    @staticmethod
    def _summarize_api_error(error: Exception) -> str:
        """Extract a human-readable one-liner from an API error.
@ -4999,12 +5038,12 @@ class AIAgent:
            if msg:
                status_code = getattr(error, "status_code", None)
                prefix = f"HTTP {status_code}: " if status_code else ""
-                return f"{prefix}{msg[:300]}"
+                return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}")

        # Fallback: truncate the raw string but give more room than 200 chars
        status_code = getattr(error, "status_code", None)
        prefix = f"HTTP {status_code}: " if status_code else ""
-        return f"{prefix}{raw[:500]}"
+        return AIAgent._decorate_xai_entitlement_error(f"{prefix}{raw[:500]}")

    def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
        if not key:
@ -7056,18 +7095,48 @@ class AIAgent:
            except RuntimeError as exc:
                err_text = str(exc)
                missing_completed = "response.completed" in err_text
-                if missing_completed and attempt < max_stream_retries:
+                # The OpenAI SDK's Responses streaming state machine raises
+                # ``RuntimeError("Expected to have received `response.created`
+                # before `<event-type>`")`` when the first SSE event from the
+                # server is anything other than ``response.created`` — and it
+                # discards the event's payload before we can read it.  Three
+                # real-world backends emit a different first frame:
+                #
+                #   * xAI on grok-4.x OAuth — sends ``error`` (issues
+                #     reported around the May 2026 SuperGrok rollout when
+                #     multi-turn conversations replay encrypted reasoning
+                #     content the OAuth tier rejects)
+                #   * codex-lb relays — send ``codex.rate_limits`` (#14634)
+                #   * custom Responses relays — send ``response.in_progress``
+                #     (#8133)
+                #
+                # In all three cases the underlying byte stream is still
+                # readable: a non-stream ``responses.create(stream=True)``
+                # fallback succeeds and surfaces the real provider error as
+                # a normal exception with body+status_code attached, which
+                # ``_summarize_api_error`` can then translate into a useful
+                # user-facing line.  Treat ``response.created`` prelude
+                # errors the same way we already treat ``response.completed``
+                # postlude errors.
+                prelude_error = (
+                    "Expected to have received `response.created`" in err_text
+                    or "Expected to have received \"response.created\"" in err_text
+                )
+                if (missing_completed or prelude_error) and attempt < max_stream_retries:
                    logger.debug(
-                        "Responses stream closed before completion (attempt %s/%s); retrying. %s",
+                        "Responses stream %s (attempt %s/%s); retrying. %s",
+                        "prelude rejected" if prelude_error else "closed before completion",
                        attempt + 1,
                        max_stream_retries + 1,
                        self._client_log_context(),
                    )
                    continue
-                if missing_completed:
+                if missing_completed or prelude_error:
                    logger.debug(
-                        "Responses stream did not emit response.completed; falling back to create(stream=True). %s",
+                        "Responses stream %s; falling back to create(stream=True). %s err=%s",
+                        "rejected before response.created" if prelude_error else "did not emit response.completed",
                        self._client_log_context(),
+                        err_text,
                    )
                    return self._run_codex_create_stream_fallback(api_kwargs, client=active_client)
                raise
--- a/tests/agent/transports/test_codex_transport.py
+++ b/tests/agent/transports/test_codex_transport.py
@ -194,9 +194,16 @@ class TestCodexBuildKwargs:
            is_xai_responses=True,
            reasoning_config={"effort": "high"},
        )
-        # xAI Responses must receive both encrypted reasoning content and the effort
+        # xAI Responses receives reasoning.effort on the allowlisted models.
        assert kw.get("reasoning") == {"effort": "high"}
-        assert "reasoning.encrypted_content" in kw.get("include", [])
+        # As of May 2026 we deliberately do NOT request
+        # reasoning.encrypted_content back from xAI — the OAuth/SuperGrok
+        # surface rejects replayed encrypted reasoning items on turn 2+
+        # (the multi-turn "Expected to have received response.created
+        # before error" failure).  Grok still reasons natively each turn;
+        # we just don't try to thread the prior turn's encrypted blob back
+        # in.  See tests/run_agent/test_codex_xai_oauth_recovery.py.
+        assert "reasoning.encrypted_content" not in kw.get("include", [])

    def test_xai_reasoning_disabled_no_reasoning_key(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
@ -222,8 +229,9 @@ class TestCodexBuildKwargs:
    # api.x.ai 400s with "Model X does not support parameter reasoningEffort"
    # on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*.
    # Those models reason natively but don't expose the dial. The transport
-    # must omit the `reasoning` key for them while keeping the encrypted
-    # reasoning content include so we can capture native reasoning tokens.
+    # must omit the `reasoning` key for them.  As of May 2026 we also no
+    # longer request ``reasoning.encrypted_content`` back from xAI on ANY
+    # model — see test_xai_reasoning_effort_passed for the rationale.

    def test_xai_grok_4_omits_reasoning_effort(self, transport):
        """grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400."""
@ -237,8 +245,9 @@ class TestCodexBuildKwargs:
            assert "reasoning" not in kw, (
                f"{model} must not receive a reasoning key (xAI rejects it)"
            )
-            # Still capture native reasoning tokens
-            assert "reasoning.encrypted_content" in kw.get("include", [])
+            # We no longer ask xAI for encrypted_content back (see comment
+            # above) — verify the include list is empty.
+            assert "reasoning.encrypted_content" not in kw.get("include", [])

    def test_xai_grok_4_fast_omits_reasoning_effort(self, transport):
        """grok-4-fast and grok-4-1-fast variants reject reasoning.effort."""
--- a/tests/run_agent/test_codex_xai_oauth_recovery.py
+++ b/tests/run_agent/test_codex_xai_oauth_recovery.py
@ -0,0 +1,351 @@
+"""Regression tests for the May 2026 xAI OAuth (SuperGrok / X Premium) bugs.
+
+Three distinct failure modes the user community hit during rollout:
+
+1. ``RuntimeError("Expected to have received `response.created` before
+   `error`")`` on multi-turn xAI OAuth conversations.  The OpenAI SDK's
+   Responses streaming state machine collapses an upstream ``error`` SSE
+   frame into a generic stream-ordering error.  ``_run_codex_stream``
+   now treats this the same way it already treats the missing
+   ``response.completed`` postlude — fall back to a non-stream
+   ``responses.create(stream=True)`` which surfaces the real provider
+   error.  Also closes #8133 (``response.in_progress`` prelude on custom
+   relays) and #14634 (``codex.rate_limits`` prelude on codex-lb).
+
+2. The HTTP 403 entitlement error xAI returns when an OAuth token lacks
+   SuperGrok / X Premium ("You have either run out of available
+   resources or do not have an active Grok subscription") used to read
+   as a confusing wall of JSON.  ``_summarize_api_error`` now appends a
+   one-line hint pointing the user at https://grok.com and ``/model``.
+
+3. Multi-turn replay of ``codex_reasoning_items`` (with
+   ``encrypted_content``) is now suppressed for ``is_xai_responses=True``
+   in ``_chat_messages_to_responses_input``.  xAI's OAuth/SuperGrok
+   surface rejects replayed encrypted reasoning items; Grok still
+   reasons natively each turn, so coherence rides on visible message
+   text.
+"""
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Fix A: prelude error fallback
+# ---------------------------------------------------------------------------
+
+
+def _make_codex_agent():
+    """Build a minimal AIAgent wired for codex_responses streaming tests."""
+    from run_agent import AIAgent
+
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://api.x.ai/v1",
+        model="grok-4.3",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    agent.api_mode = "codex_responses"
+    agent.provider = "xai-oauth"
+    agent._interrupt_requested = False
+    return agent
+
+
+@pytest.mark.parametrize(
+    "prelude_event_type",
+    [
+        "error",                  # xAI OAuth multi-turn
+        "codex.rate_limits",      # codex-lb relays (#14634)
+        "response.in_progress",   # custom Responses relays (#8133)
+    ],
+)
+def test_codex_stream_prelude_error_falls_back_to_create_stream(prelude_event_type):
+    """The SDK's prelude RuntimeError must trigger the non-stream fallback.
+
+    When the first SSE event isn't ``response.created``, openai-python
+    raises RuntimeError before our event loop sees anything.  We must
+    detect that, retry once, then fall back to ``create(stream=True)``
+    which surfaces the real provider error or a real response.
+    """
+    agent = _make_codex_agent()
+
+    prelude_error = RuntimeError(
+        f"Expected to have received `response.created` before `{prelude_event_type}`"
+    )
+
+    mock_client = MagicMock()
+    mock_client.responses.stream.side_effect = prelude_error
+
+    fallback_response = SimpleNamespace(
+        output=[SimpleNamespace(
+            type="message",
+            content=[SimpleNamespace(type="output_text", text="fallback ok")],
+        )],
+        status="completed",
+    )
+
+    with patch.object(
+        agent, "_run_codex_create_stream_fallback", return_value=fallback_response
+    ) as mock_fallback:
+        result = agent._run_codex_stream({}, client=mock_client)
+
+    assert result is fallback_response
+    mock_fallback.assert_called_once_with({}, client=mock_client)
+
+
+def test_codex_stream_prelude_error_retries_once_before_fallback():
+    """The retry path must fire one extra stream attempt before falling back."""
+    agent = _make_codex_agent()
+
+    call_count = {"n": 0}
+
+    def stream_side_effect(**kwargs):
+        call_count["n"] += 1
+        raise RuntimeError(
+            "Expected to have received `response.created` before `error`"
+        )
+
+    mock_client = MagicMock()
+    mock_client.responses.stream.side_effect = stream_side_effect
+
+    fallback_response = SimpleNamespace(output=[], status="completed")
+    with patch.object(
+        agent, "_run_codex_create_stream_fallback", return_value=fallback_response
+    ) as mock_fallback:
+        agent._run_codex_stream({}, client=mock_client)
+
+    # max_stream_retries=1 → one retry + final attempt → 2 stream calls,
+    # THEN the fallback path runs.
+    assert call_count["n"] == 2
+    mock_fallback.assert_called_once()
+
+
+def test_codex_stream_unrelated_runtimeerror_still_raises():
+    """RuntimeErrors that aren't prelude/postlude shape must propagate."""
+    agent = _make_codex_agent()
+
+    mock_client = MagicMock()
+    mock_client.responses.stream.side_effect = RuntimeError("something else broke")
+
+    with patch.object(agent, "_run_codex_create_stream_fallback") as mock_fallback:
+        with pytest.raises(RuntimeError, match="something else broke"):
+            agent._run_codex_stream({}, client=mock_client)
+
+    mock_fallback.assert_not_called()
+
+
+def test_codex_stream_postlude_error_still_falls_back():
+    """Existing ``response.completed`` fallback must not regress."""
+    agent = _make_codex_agent()
+
+    mock_client = MagicMock()
+    mock_client.responses.stream.side_effect = RuntimeError(
+        "Didn't receive a `response.completed` event."
+    )
+
+    fallback_response = SimpleNamespace(output=[], status="completed")
+    with patch.object(
+        agent, "_run_codex_create_stream_fallback", return_value=fallback_response
+    ) as mock_fallback:
+        result = agent._run_codex_stream({}, client=mock_client)
+
+    assert result is fallback_response
+    mock_fallback.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# Fix B: friendly entitlement message
+# ---------------------------------------------------------------------------
+
+
+def test_summarize_api_error_decorates_xai_entitlement_403():
+    """xAI's OAuth 403 must end with the subscribe-or-switch hint."""
+    from run_agent import AIAgent
+
+    error = RuntimeError(
+        "HTTP 403: Error code: 403 - {'code': 'The caller does not have permission "
+        "to execute the specified operation', 'error': 'You have either run out of "
+        "available resources or do not have an active Grok subscription. Manage "
+        "subscriptions at https://grok.com'}"
+    )
+    summary = AIAgent._summarize_api_error(error)
+    assert "do not have an active Grok subscription" in summary
+    assert "SuperGrok" in summary
+    assert "/model" in summary
+    assert "https://grok.com" in summary
+
+
+def test_summarize_api_error_decorates_xai_body_message():
+    """SDK-style error with structured body must also get the hint."""
+    from run_agent import AIAgent
+
+    class _XaiErr(Exception):
+        status_code = 403
+        body = {
+            "error": {
+                "message": (
+                    "You have either run out of available resources or do "
+                    "not have an active Grok subscription. Manage at "
+                    "https://grok.com"
+                )
+            }
+        }
+
+    summary = AIAgent._summarize_api_error(_XaiErr("403"))
+    assert "HTTP 403" in summary
+    assert "SuperGrok / X Premium" in summary
+
+
+def test_summarize_api_error_idempotent_for_entitlement_hint():
+    """Decorating twice must not double up the hint."""
+    from run_agent import AIAgent
+
+    raw = "HTTP 403: do not have an active Grok subscription"
+    once = AIAgent._decorate_xai_entitlement_error(raw)
+    twice = AIAgent._decorate_xai_entitlement_error(once)
+    assert once == twice
+
+
+def test_summarize_api_error_passes_through_unrelated_errors():
+    """Non-xAI / non-entitlement errors must not be touched."""
+    from run_agent import AIAgent
+
+    error = RuntimeError("HTTP 500: upstream is sad")
+    summary = AIAgent._summarize_api_error(error)
+    assert "SuperGrok" not in summary
+    assert "grok.com" not in summary
+    assert "upstream is sad" in summary
+
+
+# ---------------------------------------------------------------------------
+# Fix C: reasoning replay gating for xai-oauth
+# ---------------------------------------------------------------------------
+
+
+def _assistant_msg_with_encrypted_reasoning(text="hi from grok", encrypted="enc_blob"):
+    return {
+        "role": "assistant",
+        "content": text,
+        "codex_reasoning_items": [
+            {
+                "type": "reasoning",
+                "id": "rs_xai_001",
+                "encrypted_content": encrypted,
+                "summary": [],
+            }
+        ],
+    }
+
+
+def test_codex_reasoning_replay_default_includes_encrypted_content():
+    """Native Codex backend (default) must still replay encrypted reasoning."""
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+
+    msgs = [
+        {"role": "user", "content": "hi"},
+        _assistant_msg_with_encrypted_reasoning(),
+        {"role": "user", "content": "what's your name?"},
+    ]
+
+    items = _chat_messages_to_responses_input(msgs)
+    reasoning = [it for it in items if it.get("type") == "reasoning"]
+    assert len(reasoning) == 1
+    assert reasoning[0]["encrypted_content"] == "enc_blob"
+
+
+def test_codex_reasoning_replay_stripped_for_xai_oauth():
+    """xAI OAuth surface must NOT receive replayed encrypted reasoning."""
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+
+    msgs = [
+        {"role": "user", "content": "hi"},
+        _assistant_msg_with_encrypted_reasoning(),
+        {"role": "user", "content": "what's your name?"},
+    ]
+
+    items = _chat_messages_to_responses_input(msgs, is_xai_responses=True)
+    reasoning = [it for it in items if it.get("type") == "reasoning"]
+    assert reasoning == []
+
+    # The assistant's visible text must still survive — coherence across
+    # turns rides on the message text alone.
+    assistant_items = [
+        it for it in items
+        if it.get("role") == "assistant" or it.get("type") == "message"
+    ]
+    assert assistant_items, "assistant message must still be present"
+
+
+def test_codex_transport_xai_request_omits_encrypted_content_include():
+    """Verify the xAI ``include`` array no longer requests encrypted reasoning."""
+    from agent.transports.codex import ResponsesApiTransport
+
+    transport = ResponsesApiTransport()
+    kwargs = transport.build_kwargs(
+        model="grok-4.3",
+        messages=[
+            {"role": "system", "content": "you are a helpful assistant"},
+            {"role": "user", "content": "hi"},
+        ],
+        tools=None,
+        instructions="you are a helpful assistant",
+        reasoning_config={"enabled": True, "effort": "medium"},
+        is_xai_responses=True,
+    )
+    # Without this gate, xAI would echo back encrypted_content blobs we'd
+    # then store in codex_reasoning_items and replay next turn — which is
+    # exactly the multi-turn failure mode we're closing.
+    assert kwargs["include"] == []
+
+
+def test_codex_transport_xai_strips_replayed_reasoning_in_input():
+    """End-to-end: build_kwargs on xai-oauth must strip prior reasoning."""
+    from agent.transports.codex import ResponsesApiTransport
+
+    transport = ResponsesApiTransport()
+    kwargs = transport.build_kwargs(
+        model="grok-4.3",
+        messages=[
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hi"},
+            _assistant_msg_with_encrypted_reasoning(text="hi from grok"),
+            {"role": "user", "content": "what's your name?"},
+        ],
+        tools=None,
+        instructions="sys",
+        reasoning_config={"enabled": True, "effort": "medium"},
+        is_xai_responses=True,
+    )
+    input_items = kwargs["input"]
+    reasoning_items = [it for it in input_items if it.get("type") == "reasoning"]
+    assert reasoning_items == []
+
+
+def test_codex_transport_native_codex_still_replays_reasoning_in_input():
+    """Regression guard: openai-codex must keep the existing replay path."""
+    from agent.transports.codex import ResponsesApiTransport
+
+    transport = ResponsesApiTransport()
+    kwargs = transport.build_kwargs(
+        model="gpt-5-codex",
+        messages=[
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hi"},
+            _assistant_msg_with_encrypted_reasoning(text="hi from codex"),
+            {"role": "user", "content": "next"},
+        ],
+        tools=None,
+        instructions="sys",
+        reasoning_config={"enabled": True, "effort": "medium"},
+        is_xai_responses=False,
+    )
+    input_items = kwargs["input"]
+    reasoning_items = [it for it in input_items if it.get("type") == "reasoning"]
+    assert len(reasoning_items) == 1
+    assert reasoning_items[0]["encrypted_content"] == "enc_blob"
+    # Native Codex still asks for encrypted_content back.
+    assert "reasoning.encrypted_content" in kwargs.get("include", [])