mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-08 08:11:38 +00:00
fix(xai): restore encrypted reasoning replay across turns
xAI partner integration requires Hermes to thread `encrypted_content` reasoning items back to the Responses API on every turn so Grok can maintain cross-turn reasoning coherence. PR #26644 (May 15) gated this off for `is_xai_responses` on the theory that the OAuth/SuperGrok surface rejected replayed encrypted blobs and produced the multi-turn "Expected to have received \`response.created\` before \`error\`" failure. That diagnosis was wrong — the prelude-SSE fallback added in the same PR is what actually fixed that failure mode. Suppressing the replay was an unnecessary side-effect that broke the whole point of xAI's partnership integration. Changes: - agent/codex_responses_adapter.py — drop the `is_xai_responses` gate in `_chat_messages_to_responses_input`. Keep the kwarg in the signature for transport compatibility; update the docstring to document the May 2026 reversal. - agent/transports/codex.py — restore `kwargs["include"] = ["reasoning.encrypted_content"]` on the xAI Responses path so xAI echoes encrypted reasoning back to us. - tests/run_agent/test_codex_xai_oauth_recovery.py — flip the three xAI assertions (now: xAI MUST receive replayed reasoning AND we MUST include encrypted_content in the request). - tests/agent/transports/test_codex_transport.py — flip the `include` assertions on `test_xai_reasoning_effort_passed` and `test_xai_grok_4_omits_reasoning_effort`; update the allowlist block comment. The prelude-SSE fallback and the entitlement-403 surfacing fixes from #26644 are untouched — they were independent fixes that happened to ride along with the reasoning-replay gate. Validation: - Targeted: tests/run_agent/test_codex_xai_oauth_recovery.py + tests/agent/transports/test_codex_transport.py → 65/65 pass - Broader: tests/agent/transports/ + tests/run_agent/ → 1674 passed, 3 skipped, 0 failures - E2E (real imports, isolated HERMES_HOME, ResponsesApiTransport build_kwargs): turn-1 request carries `include: ["reasoning.encrypted_content"]`; turn-2 input replays the encrypted_content blob from turn-1's `codex_reasoning_items`; native Codex unchanged.
This commit is contained in:
parent
127b56a61a
commit
b4afc6546e
4 changed files with 67 additions and 57 deletions
|
|
@ -19,11 +19,15 @@ Three distinct failure modes the user community hit during rollout:
|
|||
one-line hint pointing the user at https://grok.com and ``/model``.
|
||||
|
||||
3. Multi-turn replay of ``codex_reasoning_items`` (with
|
||||
``encrypted_content``) is now suppressed for ``is_xai_responses=True``
|
||||
in ``_chat_messages_to_responses_input``. xAI's OAuth/SuperGrok
|
||||
surface rejects replayed encrypted reasoning items; Grok still
|
||||
reasons natively each turn, so coherence rides on visible message
|
||||
text.
|
||||
``encrypted_content``) was briefly suppressed for ``is_xai_responses``
|
||||
in PR #26644 on the theory that xAI's OAuth/SuperGrok surface
|
||||
rejected replayed encrypted reasoning items. That suppression was
|
||||
reverted shortly after: xAI confirmed they explicitly want Hermes to
|
||||
thread encrypted reasoning back across turns, and the original
|
||||
multi-turn failure mode was actually the prelude-SSE issue closed by
|
||||
Fix A above. The remaining tests here lock in that xAI receives
|
||||
replayed reasoning AND that we ask xAI to echo it back in the
|
||||
``include`` array.
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
|
@ -316,8 +320,15 @@ def test_codex_reasoning_replay_default_includes_encrypted_content():
|
|||
assert reasoning[0]["encrypted_content"] == "enc_blob"
|
||||
|
||||
|
||||
def test_codex_reasoning_replay_stripped_for_xai_oauth():
|
||||
"""xAI OAuth surface must NOT receive replayed encrypted reasoning."""
|
||||
def test_codex_reasoning_replay_includes_encrypted_content_for_xai():
|
||||
"""xAI must receive replayed encrypted reasoning items (May 2026 reversal).
|
||||
|
||||
Earlier we stripped these on the theory that the OAuth/SuperGrok
|
||||
surface rejected them. xAI subsequently confirmed they explicitly
|
||||
want Hermes to thread encrypted reasoning back across turns for
|
||||
cross-turn coherence — that's the whole point of the partnership
|
||||
integration.
|
||||
"""
|
||||
from agent.codex_responses_adapter import _chat_messages_to_responses_input
|
||||
|
||||
msgs = [
|
||||
|
|
@ -328,10 +339,13 @@ def test_codex_reasoning_replay_stripped_for_xai_oauth():
|
|||
|
||||
items = _chat_messages_to_responses_input(msgs, is_xai_responses=True)
|
||||
reasoning = [it for it in items if it.get("type") == "reasoning"]
|
||||
assert reasoning == []
|
||||
assert len(reasoning) == 1, (
|
||||
"xAI must receive replayed reasoning items — see docstring for the "
|
||||
"May 2026 reversal of the earlier suppression gate."
|
||||
)
|
||||
assert reasoning[0]["encrypted_content"] == "enc_blob"
|
||||
|
||||
# The assistant's visible text must still survive — coherence across
|
||||
# turns rides on the message text alone.
|
||||
# And the assistant's visible text must still be present alongside it.
|
||||
assistant_items = [
|
||||
it for it in items
|
||||
if it.get("role") == "assistant" or it.get("type") == "message"
|
||||
|
|
@ -339,8 +353,12 @@ def test_codex_reasoning_replay_stripped_for_xai_oauth():
|
|||
assert assistant_items, "assistant message must still be present"
|
||||
|
||||
|
||||
def test_codex_transport_xai_request_omits_encrypted_content_include():
|
||||
"""Verify the xAI ``include`` array no longer requests encrypted reasoning."""
|
||||
def test_codex_transport_xai_request_includes_encrypted_content():
|
||||
"""xAI ``include`` array must request ``reasoning.encrypted_content``.
|
||||
|
||||
This is the request-side half of the May 2026 reversal: we ask xAI
|
||||
to echo back encrypted reasoning so the next turn can replay it.
|
||||
"""
|
||||
from agent.transports.codex import ResponsesApiTransport
|
||||
|
||||
transport = ResponsesApiTransport()
|
||||
|
|
@ -355,14 +373,11 @@ def test_codex_transport_xai_request_omits_encrypted_content_include():
|
|||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
is_xai_responses=True,
|
||||
)
|
||||
# Without this gate, xAI would echo back encrypted_content blobs we'd
|
||||
# then store in codex_reasoning_items and replay next turn — which is
|
||||
# exactly the multi-turn failure mode we're closing.
|
||||
assert kwargs["include"] == []
|
||||
assert kwargs["include"] == ["reasoning.encrypted_content"]
|
||||
|
||||
|
||||
def test_codex_transport_xai_strips_replayed_reasoning_in_input():
|
||||
"""End-to-end: build_kwargs on xai-oauth must strip prior reasoning."""
|
||||
def test_codex_transport_xai_replays_reasoning_in_input():
|
||||
"""End-to-end: build_kwargs on xAI must replay prior encrypted reasoning."""
|
||||
from agent.transports.codex import ResponsesApiTransport
|
||||
|
||||
transport = ResponsesApiTransport()
|
||||
|
|
@ -381,7 +396,8 @@ def test_codex_transport_xai_strips_replayed_reasoning_in_input():
|
|||
)
|
||||
input_items = kwargs["input"]
|
||||
reasoning_items = [it for it in input_items if it.get("type") == "reasoning"]
|
||||
assert reasoning_items == []
|
||||
assert len(reasoning_items) == 1
|
||||
assert reasoning_items[0]["encrypted_content"] == "enc_blob"
|
||||
|
||||
|
||||
def test_codex_transport_native_codex_still_replays_reasoning_in_input():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue