diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py index 6fe9dc5bc64..adea34d094c 100644 --- a/agent/codex_responses_adapter.py +++ b/agent/codex_responses_adapter.py @@ -251,13 +251,16 @@ def _chat_messages_to_responses_input( ) -> List[Dict[str, Any]]: """Convert internal chat-style messages to Responses input items. - ``is_xai_responses=True`` strips ``encrypted_content`` from replayed - reasoning items. xAI's OAuth/SuperGrok ``/v1/responses`` surface - rejects encrypted reasoning blobs minted by prior turns: the request - streams an ``error`` SSE frame before ``response.created`` and the - OpenAI SDK collapses it into a generic stream-ordering error. Native - Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content - — keep the default off. + ``is_xai_responses`` is kept for transport signature compatibility but + no longer suppresses encrypted reasoning replay. Earlier (PR #26644, + May 2026) we believed xAI's OAuth/SuperGrok ``/v1/responses`` surface + rejected replayed ``encrypted_content`` reasoning items minted by + prior turns, and we stripped them. That decision was wrong — xAI + explicitly relies on Hermes threading encrypted reasoning back across + turns for cross-turn coherence (the whole point of their partnership + integration). We now replay encrypted reasoning on every Responses + transport (xAI, native Codex, custom relays) and let xAI tell us + explicitly if a specific surface ever rejects a payload. """ items: List[Dict[str, Any]] = [] seen_item_ids: set = set() @@ -284,17 +287,12 @@ def _chat_messages_to_responses_input( if role == "assistant": # Replay encrypted reasoning items from previous turns # so the API can maintain coherent reasoning chains. - # - # xAI OAuth (SuperGrok/Premium) rejects replayed - # ``encrypted_content`` reasoning items minted by prior - # turns — see _chat_messages_to_responses_input docstring. - # When ``is_xai_responses`` is set we drop the replay - # entirely; Grok still reasons on each turn server-side, - # we just don't try to thread the prior turn's encrypted - # blob back in. + # This applies to every Responses transport including + # xAI — see _chat_messages_to_responses_input docstring + # for the May 2026 reversal of the earlier xAI gate. codex_reasoning = msg.get("codex_reasoning_items") has_codex_reasoning = False - if isinstance(codex_reasoning, list) and not is_xai_responses: + if isinstance(codex_reasoning, list): for ri in codex_reasoning: if isinstance(ri, dict) and ri.get("encrypted_content"): item_id = ri.get("id") diff --git a/agent/transports/codex.py b/agent/transports/codex.py index 3661ea17a3e..27264f2f38f 100644 --- a/agent/transports/codex.py +++ b/agent/transports/codex.py @@ -116,14 +116,11 @@ class ResponsesApiTransport(ProviderTransport): if reasoning_enabled and is_xai_responses: from agent.model_metadata import grok_supports_reasoning_effort - # NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content`` - # any more. xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects - # replayed encrypted reasoning items on turn 2+ — see - # _chat_messages_to_responses_input docstring. Requesting the field - # back would just have us cache something we then must strip. Grok - # still reasons natively each turn; coherence across turns rides on - # the visible message text alone. - kwargs["include"] = [] + # Ask xAI to echo back encrypted reasoning items so we can + # replay them on subsequent turns for cross-turn coherence. + # See agent/codex_responses_adapter._chat_messages_to_responses_input + # for the May 2026 reversal of the earlier suppression gate. + kwargs["include"] = ["reasoning.encrypted_content"] # xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3 # / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though # those models reason natively. Only send the effort dial when diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py index 82251823790..a0470fa8de8 100644 --- a/tests/agent/transports/test_codex_transport.py +++ b/tests/agent/transports/test_codex_transport.py @@ -196,14 +196,13 @@ class TestCodexBuildKwargs: ) # xAI Responses receives reasoning.effort on the allowlisted models. assert kw.get("reasoning") == {"effort": "high"} - # As of May 2026 we deliberately do NOT request - # reasoning.encrypted_content back from xAI — the OAuth/SuperGrok - # surface rejects replayed encrypted reasoning items on turn 2+ - # (the multi-turn "Expected to have received response.created - # before error" failure). Grok still reasons natively each turn; - # we just don't try to thread the prior turn's encrypted blob back - # in. See tests/run_agent/test_codex_xai_oauth_recovery.py. - assert "reasoning.encrypted_content" not in kw.get("include", []) + # As of May 2026 (post-revert of PR #26644) we DO request + # reasoning.encrypted_content back from xAI so we can replay it + # across turns for cross-turn coherence — xAI explicitly relies + # on this for their partnership integration. See + # tests/run_agent/test_codex_xai_oauth_recovery.py for the + # full history. + assert "reasoning.encrypted_content" in kw.get("include", []) def test_xai_reasoning_disabled_no_reasoning_key(self, transport): messages = [{"role": "user", "content": "Hi"}] @@ -229,9 +228,9 @@ class TestCodexBuildKwargs: # api.x.ai 400s with "Model X does not support parameter reasoningEffort" # on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*. # Those models reason natively but don't expose the dial. The transport - # must omit the `reasoning` key for them. As of May 2026 we also no - # longer request ``reasoning.encrypted_content`` back from xAI on ANY - # model — see test_xai_reasoning_effort_passed for the rationale. + # must omit the `reasoning` key for them. As of May 2026 we DO request + # ``reasoning.encrypted_content`` back from xAI on every model — + # see test_xai_reasoning_effort_passed for the rationale. def test_xai_grok_4_omits_reasoning_effort(self, transport): """grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400.""" @@ -245,9 +244,9 @@ class TestCodexBuildKwargs: assert "reasoning" not in kw, ( f"{model} must not receive a reasoning key (xAI rejects it)" ) - # We no longer ask xAI for encrypted_content back (see comment - # above) — verify the include list is empty. - assert "reasoning.encrypted_content" not in kw.get("include", []) + # Even without the effort dial we still ask xAI to echo back + # encrypted reasoning content so it can be replayed next turn. + assert "reasoning.encrypted_content" in kw.get("include", []) def test_xai_grok_4_fast_omits_reasoning_effort(self, transport): """grok-4-fast and grok-4-1-fast variants reject reasoning.effort.""" diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py index ea26783f10f..585be09ab4d 100644 --- a/tests/run_agent/test_codex_xai_oauth_recovery.py +++ b/tests/run_agent/test_codex_xai_oauth_recovery.py @@ -19,11 +19,15 @@ Three distinct failure modes the user community hit during rollout: one-line hint pointing the user at https://grok.com and ``/model``. 3. Multi-turn replay of ``codex_reasoning_items`` (with - ``encrypted_content``) is now suppressed for ``is_xai_responses=True`` - in ``_chat_messages_to_responses_input``. xAI's OAuth/SuperGrok - surface rejects replayed encrypted reasoning items; Grok still - reasons natively each turn, so coherence rides on visible message - text. + ``encrypted_content``) was briefly suppressed for ``is_xai_responses`` + in PR #26644 on the theory that xAI's OAuth/SuperGrok surface + rejected replayed encrypted reasoning items. That suppression was + reverted shortly after: xAI confirmed they explicitly want Hermes to + thread encrypted reasoning back across turns, and the original + multi-turn failure mode was actually the prelude-SSE issue closed by + Fix A above. The remaining tests here lock in that xAI receives + replayed reasoning AND that we ask xAI to echo it back in the + ``include`` array. """ from types import SimpleNamespace @@ -316,8 +320,15 @@ def test_codex_reasoning_replay_default_includes_encrypted_content(): assert reasoning[0]["encrypted_content"] == "enc_blob" -def test_codex_reasoning_replay_stripped_for_xai_oauth(): - """xAI OAuth surface must NOT receive replayed encrypted reasoning.""" +def test_codex_reasoning_replay_includes_encrypted_content_for_xai(): + """xAI must receive replayed encrypted reasoning items (May 2026 reversal). + + Earlier we stripped these on the theory that the OAuth/SuperGrok + surface rejected them. xAI subsequently confirmed they explicitly + want Hermes to thread encrypted reasoning back across turns for + cross-turn coherence — that's the whole point of the partnership + integration. + """ from agent.codex_responses_adapter import _chat_messages_to_responses_input msgs = [ @@ -328,10 +339,13 @@ def test_codex_reasoning_replay_stripped_for_xai_oauth(): items = _chat_messages_to_responses_input(msgs, is_xai_responses=True) reasoning = [it for it in items if it.get("type") == "reasoning"] - assert reasoning == [] + assert len(reasoning) == 1, ( + "xAI must receive replayed reasoning items — see docstring for the " + "May 2026 reversal of the earlier suppression gate." + ) + assert reasoning[0]["encrypted_content"] == "enc_blob" - # The assistant's visible text must still survive — coherence across - # turns rides on the message text alone. + # And the assistant's visible text must still be present alongside it. assistant_items = [ it for it in items if it.get("role") == "assistant" or it.get("type") == "message" @@ -339,8 +353,12 @@ def test_codex_reasoning_replay_stripped_for_xai_oauth(): assert assistant_items, "assistant message must still be present" -def test_codex_transport_xai_request_omits_encrypted_content_include(): - """Verify the xAI ``include`` array no longer requests encrypted reasoning.""" +def test_codex_transport_xai_request_includes_encrypted_content(): + """xAI ``include`` array must request ``reasoning.encrypted_content``. + + This is the request-side half of the May 2026 reversal: we ask xAI + to echo back encrypted reasoning so the next turn can replay it. + """ from agent.transports.codex import ResponsesApiTransport transport = ResponsesApiTransport() @@ -355,14 +373,11 @@ def test_codex_transport_xai_request_omits_encrypted_content_include(): reasoning_config={"enabled": True, "effort": "medium"}, is_xai_responses=True, ) - # Without this gate, xAI would echo back encrypted_content blobs we'd - # then store in codex_reasoning_items and replay next turn — which is - # exactly the multi-turn failure mode we're closing. - assert kwargs["include"] == [] + assert kwargs["include"] == ["reasoning.encrypted_content"] -def test_codex_transport_xai_strips_replayed_reasoning_in_input(): - """End-to-end: build_kwargs on xai-oauth must strip prior reasoning.""" +def test_codex_transport_xai_replays_reasoning_in_input(): + """End-to-end: build_kwargs on xAI must replay prior encrypted reasoning.""" from agent.transports.codex import ResponsesApiTransport transport = ResponsesApiTransport() @@ -381,7 +396,8 @@ def test_codex_transport_xai_strips_replayed_reasoning_in_input(): ) input_items = kwargs["input"] reasoning_items = [it for it in input_items if it.get("type") == "reasoning"] - assert reasoning_items == [] + assert len(reasoning_items) == 1 + assert reasoning_items[0]["encrypted_content"] == "enc_blob" def test_codex_transport_native_codex_still_replays_reasoning_in_input():