fix(xai): restore encrypted reasoning replay across turns

xAI partner integration requires Hermes to thread `encrypted_content`
reasoning items back to the Responses API on every turn so Grok can
maintain cross-turn reasoning coherence. PR #26644 (May 15) gated this
off for `is_xai_responses` on the theory that the OAuth/SuperGrok
surface rejected replayed encrypted blobs and produced the multi-turn
"Expected to have received \`response.created\` before \`error\`"
failure. That diagnosis was wrong — the prelude-SSE fallback added in
the same PR is what actually fixed that failure mode. Suppressing the
replay was an unnecessary side-effect that broke the whole point of
xAI's partnership integration.

Changes:
- agent/codex_responses_adapter.py — drop the `is_xai_responses` gate
  in `_chat_messages_to_responses_input`. Keep the kwarg in the
  signature for transport compatibility; update the docstring to
  document the May 2026 reversal.
- agent/transports/codex.py — restore
  `kwargs["include"] = ["reasoning.encrypted_content"]` on the xAI
  Responses path so xAI echoes encrypted reasoning back to us.
- tests/run_agent/test_codex_xai_oauth_recovery.py — flip the three
  xAI assertions (now: xAI MUST receive replayed reasoning AND we MUST
  include encrypted_content in the request).
- tests/agent/transports/test_codex_transport.py — flip the
  `include` assertions on `test_xai_reasoning_effort_passed` and
  `test_xai_grok_4_omits_reasoning_effort`; update the allowlist
  block comment.

The prelude-SSE fallback and the entitlement-403 surfacing fixes from
#26644 are untouched — they were independent fixes that happened to
ride along with the reasoning-replay gate.

Validation:
- Targeted: tests/run_agent/test_codex_xai_oauth_recovery.py +
  tests/agent/transports/test_codex_transport.py → 65/65 pass
- Broader: tests/agent/transports/ + tests/run_agent/ →
  1674 passed, 3 skipped, 0 failures
- E2E (real imports, isolated HERMES_HOME, ResponsesApiTransport
  build_kwargs): turn-1 request carries
  `include: ["reasoning.encrypted_content"]`; turn-2 input replays
  the encrypted_content blob from turn-1's
  `codex_reasoning_items`; native Codex unchanged.
This commit is contained in:
Teknium 2026-05-20 22:14:18 -07:00
parent 127b56a61a
commit b4afc6546e
4 changed files with 67 additions and 57 deletions

View file

@ -251,13 +251,16 @@ def _chat_messages_to_responses_input(
) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items.
``is_xai_responses=True`` strips ``encrypted_content`` from replayed
reasoning items. xAI's OAuth/SuperGrok ``/v1/responses`` surface
rejects encrypted reasoning blobs minted by prior turns: the request
streams an ``error`` SSE frame before ``response.created`` and the
OpenAI SDK collapses it into a generic stream-ordering error. Native
Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
keep the default off.
``is_xai_responses`` is kept for transport signature compatibility but
no longer suppresses encrypted reasoning replay. Earlier (PR #26644,
May 2026) we believed xAI's OAuth/SuperGrok ``/v1/responses`` surface
rejected replayed ``encrypted_content`` reasoning items minted by
prior turns, and we stripped them. That decision was wrong xAI
explicitly relies on Hermes threading encrypted reasoning back across
turns for cross-turn coherence (the whole point of their partnership
integration). We now replay encrypted reasoning on every Responses
transport (xAI, native Codex, custom relays) and let xAI tell us
explicitly if a specific surface ever rejects a payload.
"""
items: List[Dict[str, Any]] = []
seen_item_ids: set = set()
@ -284,17 +287,12 @@ def _chat_messages_to_responses_input(
if role == "assistant":
# Replay encrypted reasoning items from previous turns
# so the API can maintain coherent reasoning chains.
#
# xAI OAuth (SuperGrok/Premium) rejects replayed
# ``encrypted_content`` reasoning items minted by prior
# turns — see _chat_messages_to_responses_input docstring.
# When ``is_xai_responses`` is set we drop the replay
# entirely; Grok still reasons on each turn server-side,
# we just don't try to thread the prior turn's encrypted
# blob back in.
# This applies to every Responses transport including
# xAI — see _chat_messages_to_responses_input docstring
# for the May 2026 reversal of the earlier xAI gate.
codex_reasoning = msg.get("codex_reasoning_items")
has_codex_reasoning = False
if isinstance(codex_reasoning, list) and not is_xai_responses:
if isinstance(codex_reasoning, list):
for ri in codex_reasoning:
if isinstance(ri, dict) and ri.get("encrypted_content"):
item_id = ri.get("id")

View file

@ -116,14 +116,11 @@ class ResponsesApiTransport(ProviderTransport):
if reasoning_enabled and is_xai_responses:
from agent.model_metadata import grok_supports_reasoning_effort
# NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
# any more. xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
# replayed encrypted reasoning items on turn 2+ — see
# _chat_messages_to_responses_input docstring. Requesting the field
# back would just have us cache something we then must strip. Grok
# still reasons natively each turn; coherence across turns rides on
# the visible message text alone.
kwargs["include"] = []
# Ask xAI to echo back encrypted reasoning items so we can
# replay them on subsequent turns for cross-turn coherence.
# See agent/codex_responses_adapter._chat_messages_to_responses_input
# for the May 2026 reversal of the earlier suppression gate.
kwargs["include"] = ["reasoning.encrypted_content"]
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
# those models reason natively. Only send the effort dial when

View file

@ -196,14 +196,13 @@ class TestCodexBuildKwargs:
)
# xAI Responses receives reasoning.effort on the allowlisted models.
assert kw.get("reasoning") == {"effort": "high"}
# As of May 2026 we deliberately do NOT request
# reasoning.encrypted_content back from xAI — the OAuth/SuperGrok
# surface rejects replayed encrypted reasoning items on turn 2+
# (the multi-turn "Expected to have received response.created
# before error" failure). Grok still reasons natively each turn;
# we just don't try to thread the prior turn's encrypted blob back
# in. See tests/run_agent/test_codex_xai_oauth_recovery.py.
assert "reasoning.encrypted_content" not in kw.get("include", [])
# As of May 2026 (post-revert of PR #26644) we DO request
# reasoning.encrypted_content back from xAI so we can replay it
# across turns for cross-turn coherence — xAI explicitly relies
# on this for their partnership integration. See
# tests/run_agent/test_codex_xai_oauth_recovery.py for the
# full history.
assert "reasoning.encrypted_content" in kw.get("include", [])
def test_xai_reasoning_disabled_no_reasoning_key(self, transport):
messages = [{"role": "user", "content": "Hi"}]
@ -229,9 +228,9 @@ class TestCodexBuildKwargs:
# api.x.ai 400s with "Model X does not support parameter reasoningEffort"
# on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*.
# Those models reason natively but don't expose the dial. The transport
# must omit the `reasoning` key for them. As of May 2026 we also no
# longer request ``reasoning.encrypted_content`` back from xAI on ANY
# model — see test_xai_reasoning_effort_passed for the rationale.
# must omit the `reasoning` key for them. As of May 2026 we DO request
# ``reasoning.encrypted_content`` back from xAI on every model —
# see test_xai_reasoning_effort_passed for the rationale.
def test_xai_grok_4_omits_reasoning_effort(self, transport):
"""grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400."""
@ -245,9 +244,9 @@ class TestCodexBuildKwargs:
assert "reasoning" not in kw, (
f"{model} must not receive a reasoning key (xAI rejects it)"
)
# We no longer ask xAI for encrypted_content back (see comment
# above) — verify the include list is empty.
assert "reasoning.encrypted_content" not in kw.get("include", [])
# Even without the effort dial we still ask xAI to echo back
# encrypted reasoning content so it can be replayed next turn.
assert "reasoning.encrypted_content" in kw.get("include", [])
def test_xai_grok_4_fast_omits_reasoning_effort(self, transport):
"""grok-4-fast and grok-4-1-fast variants reject reasoning.effort."""

View file

@ -19,11 +19,15 @@ Three distinct failure modes the user community hit during rollout:
one-line hint pointing the user at https://grok.com and ``/model``.
3. Multi-turn replay of ``codex_reasoning_items`` (with
``encrypted_content``) is now suppressed for ``is_xai_responses=True``
in ``_chat_messages_to_responses_input``. xAI's OAuth/SuperGrok
surface rejects replayed encrypted reasoning items; Grok still
reasons natively each turn, so coherence rides on visible message
text.
``encrypted_content``) was briefly suppressed for ``is_xai_responses``
in PR #26644 on the theory that xAI's OAuth/SuperGrok surface
rejected replayed encrypted reasoning items. That suppression was
reverted shortly after: xAI confirmed they explicitly want Hermes to
thread encrypted reasoning back across turns, and the original
multi-turn failure mode was actually the prelude-SSE issue closed by
Fix A above. The remaining tests here lock in that xAI receives
replayed reasoning AND that we ask xAI to echo it back in the
``include`` array.
"""
from types import SimpleNamespace
@ -316,8 +320,15 @@ def test_codex_reasoning_replay_default_includes_encrypted_content():
assert reasoning[0]["encrypted_content"] == "enc_blob"
def test_codex_reasoning_replay_stripped_for_xai_oauth():
"""xAI OAuth surface must NOT receive replayed encrypted reasoning."""
def test_codex_reasoning_replay_includes_encrypted_content_for_xai():
"""xAI must receive replayed encrypted reasoning items (May 2026 reversal).
Earlier we stripped these on the theory that the OAuth/SuperGrok
surface rejected them. xAI subsequently confirmed they explicitly
want Hermes to thread encrypted reasoning back across turns for
cross-turn coherence that's the whole point of the partnership
integration.
"""
from agent.codex_responses_adapter import _chat_messages_to_responses_input
msgs = [
@ -328,10 +339,13 @@ def test_codex_reasoning_replay_stripped_for_xai_oauth():
items = _chat_messages_to_responses_input(msgs, is_xai_responses=True)
reasoning = [it for it in items if it.get("type") == "reasoning"]
assert reasoning == []
assert len(reasoning) == 1, (
"xAI must receive replayed reasoning items — see docstring for the "
"May 2026 reversal of the earlier suppression gate."
)
assert reasoning[0]["encrypted_content"] == "enc_blob"
# The assistant's visible text must still survive — coherence across
# turns rides on the message text alone.
# And the assistant's visible text must still be present alongside it.
assistant_items = [
it for it in items
if it.get("role") == "assistant" or it.get("type") == "message"
@ -339,8 +353,12 @@ def test_codex_reasoning_replay_stripped_for_xai_oauth():
assert assistant_items, "assistant message must still be present"
def test_codex_transport_xai_request_omits_encrypted_content_include():
"""Verify the xAI ``include`` array no longer requests encrypted reasoning."""
def test_codex_transport_xai_request_includes_encrypted_content():
"""xAI ``include`` array must request ``reasoning.encrypted_content``.
This is the request-side half of the May 2026 reversal: we ask xAI
to echo back encrypted reasoning so the next turn can replay it.
"""
from agent.transports.codex import ResponsesApiTransport
transport = ResponsesApiTransport()
@ -355,14 +373,11 @@ def test_codex_transport_xai_request_omits_encrypted_content_include():
reasoning_config={"enabled": True, "effort": "medium"},
is_xai_responses=True,
)
# Without this gate, xAI would echo back encrypted_content blobs we'd
# then store in codex_reasoning_items and replay next turn — which is
# exactly the multi-turn failure mode we're closing.
assert kwargs["include"] == []
assert kwargs["include"] == ["reasoning.encrypted_content"]
def test_codex_transport_xai_strips_replayed_reasoning_in_input():
"""End-to-end: build_kwargs on xai-oauth must strip prior reasoning."""
def test_codex_transport_xai_replays_reasoning_in_input():
"""End-to-end: build_kwargs on xAI must replay prior encrypted reasoning."""
from agent.transports.codex import ResponsesApiTransport
transport = ResponsesApiTransport()
@ -381,7 +396,8 @@ def test_codex_transport_xai_strips_replayed_reasoning_in_input():
)
input_items = kwargs["input"]
reasoning_items = [it for it in input_items if it.get("type") == "reasoning"]
assert reasoning_items == []
assert len(reasoning_items) == 1
assert reasoning_items[0]["encrypted_content"] == "enc_blob"
def test_codex_transport_native_codex_still_replays_reasoning_in_input():