mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(xai-oauth): recover from prelude SSE errors, gate reasoning replay, surface entitlement 403s (#26644)
Three fixes for the May 2026 xAI OAuth (SuperGrok / X Premium) rollout
failures:
- _run_codex_stream: when openai SDK raises RuntimeError("Expected to
have received `response.created` before `<type>`"), retry once then
fall back to responses.create(stream=True) — same path used for
missing-response.completed postlude. Fallback surfaces the real
provider error with body+status_code intact. Also fixes #8133
(response.in_progress prelude on custom relays) and #14634
(codex.rate_limits prelude on codex-lb).
- _summarize_api_error: when error body matches xAI's entitlement
shape, append a one-line hint pointing to https://grok.com and
/model. Once-only, applies to both auxiliary warnings and
main-loop error surfacing.
- _chat_messages_to_responses_input: new is_xai_responses kwarg
drops replayed codex_reasoning_items (encrypted_content) before
they reach xAI. Also drops reasoning.encrypted_content from the
xAI include array. Native Codex behavior unchanged. Grok still
reasons natively each turn; coherence rides on visible message
text alone.
Closes #8133, #14634.
This commit is contained in:
parent
4aec25bc44
commit
31ba2b0cbc
5 changed files with 481 additions and 18 deletions
|
|
@ -244,8 +244,21 @@ def _normalize_responses_message_status(value: Any, *, default: str = "completed
|
|||
return default
|
||||
|
||||
|
||||
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items."""
|
||||
def _chat_messages_to_responses_input(
|
||||
messages: List[Dict[str, Any]],
|
||||
*,
|
||||
is_xai_responses: bool = False,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items.
|
||||
|
||||
``is_xai_responses=True`` strips ``encrypted_content`` from replayed
|
||||
reasoning items. xAI's OAuth/SuperGrok ``/v1/responses`` surface
|
||||
rejects encrypted reasoning blobs minted by prior turns: the request
|
||||
streams an ``error`` SSE frame before ``response.created`` and the
|
||||
OpenAI SDK collapses it into a generic stream-ordering error. Native
|
||||
Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
|
||||
— keep the default off.
|
||||
"""
|
||||
items: List[Dict[str, Any]] = []
|
||||
seen_item_ids: set = set()
|
||||
|
||||
|
|
@ -271,9 +284,17 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
|||
if role == "assistant":
|
||||
# Replay encrypted reasoning items from previous turns
|
||||
# so the API can maintain coherent reasoning chains.
|
||||
#
|
||||
# xAI OAuth (SuperGrok/Premium) rejects replayed
|
||||
# ``encrypted_content`` reasoning items minted by prior
|
||||
# turns — see _chat_messages_to_responses_input docstring.
|
||||
# When ``is_xai_responses`` is set we drop the replay
|
||||
# entirely; Grok still reasons on each turn server-side,
|
||||
# we just don't try to thread the prior turn's encrypted
|
||||
# blob back in.
|
||||
codex_reasoning = msg.get("codex_reasoning_items")
|
||||
has_codex_reasoning = False
|
||||
if isinstance(codex_reasoning, list):
|
||||
if isinstance(codex_reasoning, list) and not is_xai_responses:
|
||||
for ri in codex_reasoning:
|
||||
if isinstance(ri, dict) and ri.get("encrypted_content"):
|
||||
item_id = ri.get("id")
|
||||
|
|
|
|||
|
|
@ -24,7 +24,10 @@ class ResponsesApiTransport(ProviderTransport):
|
|||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
||||
"""Convert OpenAI chat messages to Responses API input items."""
|
||||
from agent.codex_responses_adapter import _chat_messages_to_responses_input
|
||||
return _chat_messages_to_responses_input(messages)
|
||||
return _chat_messages_to_responses_input(
|
||||
messages,
|
||||
is_xai_responses=bool(kwargs.get("is_xai_responses")),
|
||||
)
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
||||
"""Convert OpenAI tool schemas to Responses API function definitions."""
|
||||
|
|
@ -93,7 +96,10 @@ class ResponsesApiTransport(ProviderTransport):
|
|||
kwargs = {
|
||||
"model": model,
|
||||
"instructions": instructions,
|
||||
"input": _chat_messages_to_responses_input(payload_messages),
|
||||
"input": _chat_messages_to_responses_input(
|
||||
payload_messages,
|
||||
is_xai_responses=is_xai_responses,
|
||||
),
|
||||
"tools": response_tools,
|
||||
"store": False,
|
||||
}
|
||||
|
|
@ -110,7 +116,14 @@ class ResponsesApiTransport(ProviderTransport):
|
|||
if reasoning_enabled and is_xai_responses:
|
||||
from agent.model_metadata import grok_supports_reasoning_effort
|
||||
|
||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
# NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
|
||||
# any more. xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
|
||||
# replayed encrypted reasoning items on turn 2+ — see
|
||||
# _chat_messages_to_responses_input docstring. Requesting the field
|
||||
# back would just have us cache something we then must strip. Grok
|
||||
# still reasons natively each turn; coherence across turns rides on
|
||||
# the visible message text alone.
|
||||
kwargs["include"] = []
|
||||
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
|
||||
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
|
||||
# those models reason natively. Only send the effort dial when
|
||||
|
|
|
|||
81
run_agent.py
81
run_agent.py
|
|
@ -4966,6 +4966,45 @@ class AIAgent:
|
|||
trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
|
||||
_save_trajectory_to_file(trajectory, self.model, completed)
|
||||
|
||||
@staticmethod
|
||||
def _decorate_xai_entitlement_error(detail: str) -> str:
|
||||
"""Append a friendly hint when xAI's OAuth surface returns an
|
||||
entitlement-shaped error.
|
||||
|
||||
xAI's ``/v1/responses`` endpoint replies to OAuth tokens that lack a
|
||||
SuperGrok / X Premium subscription with HTTP 403 carrying a body like::
|
||||
|
||||
{"code": "The caller does not have permission to execute the
|
||||
specified operation", "error": "You have either run out of
|
||||
available resources or do not have an active Grok subscription.
|
||||
Manage subscriptions at https://grok.com/..."}
|
||||
|
||||
The raw text is useful but the action the user needs to take (subscribe
|
||||
on grok.com, or switch providers with ``/model``) isn't obvious from
|
||||
the wire format. Detect the entitlement shape and append a hint.
|
||||
|
||||
Matched once per detail string — won't double-decorate if the upstream
|
||||
already concatenated the same text.
|
||||
"""
|
||||
if not detail:
|
||||
return detail
|
||||
lower = detail.lower()
|
||||
is_entitlement = (
|
||||
"do not have an active grok subscription" in lower
|
||||
or ("out of available resources" in lower and "grok" in lower)
|
||||
or ("does not have permission" in lower and "grok" in lower)
|
||||
)
|
||||
if not is_entitlement:
|
||||
return detail
|
||||
hint = (
|
||||
" — xAI OAuth account lacks SuperGrok / X Premium entitlement for "
|
||||
"this model. Subscribe at https://grok.com or run `/model` to "
|
||||
"switch providers."
|
||||
)
|
||||
if hint.strip() in detail:
|
||||
return detail
|
||||
return f"{detail}{hint}"
|
||||
|
||||
@staticmethod
|
||||
def _summarize_api_error(error: Exception) -> str:
|
||||
"""Extract a human-readable one-liner from an API error.
|
||||
|
|
@ -4999,12 +5038,12 @@ class AIAgent:
|
|||
if msg:
|
||||
status_code = getattr(error, "status_code", None)
|
||||
prefix = f"HTTP {status_code}: " if status_code else ""
|
||||
return f"{prefix}{msg[:300]}"
|
||||
return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}")
|
||||
|
||||
# Fallback: truncate the raw string but give more room than 200 chars
|
||||
status_code = getattr(error, "status_code", None)
|
||||
prefix = f"HTTP {status_code}: " if status_code else ""
|
||||
return f"{prefix}{raw[:500]}"
|
||||
return AIAgent._decorate_xai_entitlement_error(f"{prefix}{raw[:500]}")
|
||||
|
||||
def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
|
||||
if not key:
|
||||
|
|
@ -7056,18 +7095,48 @@ class AIAgent:
|
|||
except RuntimeError as exc:
|
||||
err_text = str(exc)
|
||||
missing_completed = "response.completed" in err_text
|
||||
if missing_completed and attempt < max_stream_retries:
|
||||
# The OpenAI SDK's Responses streaming state machine raises
|
||||
# ``RuntimeError("Expected to have received `response.created`
|
||||
# before `<event-type>`")`` when the first SSE event from the
|
||||
# server is anything other than ``response.created`` — and it
|
||||
# discards the event's payload before we can read it. Three
|
||||
# real-world backends emit a different first frame:
|
||||
#
|
||||
# * xAI on grok-4.x OAuth — sends ``error`` (issues
|
||||
# reported around the May 2026 SuperGrok rollout when
|
||||
# multi-turn conversations replay encrypted reasoning
|
||||
# content the OAuth tier rejects)
|
||||
# * codex-lb relays — send ``codex.rate_limits`` (#14634)
|
||||
# * custom Responses relays — send ``response.in_progress``
|
||||
# (#8133)
|
||||
#
|
||||
# In all three cases the underlying byte stream is still
|
||||
# readable: a non-stream ``responses.create(stream=True)``
|
||||
# fallback succeeds and surfaces the real provider error as
|
||||
# a normal exception with body+status_code attached, which
|
||||
# ``_summarize_api_error`` can then translate into a useful
|
||||
# user-facing line. Treat ``response.created`` prelude
|
||||
# errors the same way we already treat ``response.completed``
|
||||
# postlude errors.
|
||||
prelude_error = (
|
||||
"Expected to have received `response.created`" in err_text
|
||||
or "Expected to have received \"response.created\"" in err_text
|
||||
)
|
||||
if (missing_completed or prelude_error) and attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Responses stream closed before completion (attempt %s/%s); retrying. %s",
|
||||
"Responses stream %s (attempt %s/%s); retrying. %s",
|
||||
"prelude rejected" if prelude_error else "closed before completion",
|
||||
attempt + 1,
|
||||
max_stream_retries + 1,
|
||||
self._client_log_context(),
|
||||
)
|
||||
continue
|
||||
if missing_completed:
|
||||
if missing_completed or prelude_error:
|
||||
logger.debug(
|
||||
"Responses stream did not emit response.completed; falling back to create(stream=True). %s",
|
||||
"Responses stream %s; falling back to create(stream=True). %s err=%s",
|
||||
"rejected before response.created" if prelude_error else "did not emit response.completed",
|
||||
self._client_log_context(),
|
||||
err_text,
|
||||
)
|
||||
return self._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -194,9 +194,16 @@ class TestCodexBuildKwargs:
|
|||
is_xai_responses=True,
|
||||
reasoning_config={"effort": "high"},
|
||||
)
|
||||
# xAI Responses must receive both encrypted reasoning content and the effort
|
||||
# xAI Responses receives reasoning.effort on the allowlisted models.
|
||||
assert kw.get("reasoning") == {"effort": "high"}
|
||||
assert "reasoning.encrypted_content" in kw.get("include", [])
|
||||
# As of May 2026 we deliberately do NOT request
|
||||
# reasoning.encrypted_content back from xAI — the OAuth/SuperGrok
|
||||
# surface rejects replayed encrypted reasoning items on turn 2+
|
||||
# (the multi-turn "Expected to have received response.created
|
||||
# before error" failure). Grok still reasons natively each turn;
|
||||
# we just don't try to thread the prior turn's encrypted blob back
|
||||
# in. See tests/run_agent/test_codex_xai_oauth_recovery.py.
|
||||
assert "reasoning.encrypted_content" not in kw.get("include", [])
|
||||
|
||||
def test_xai_reasoning_disabled_no_reasoning_key(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
|
|
@ -222,8 +229,9 @@ class TestCodexBuildKwargs:
|
|||
# api.x.ai 400s with "Model X does not support parameter reasoningEffort"
|
||||
# on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*.
|
||||
# Those models reason natively but don't expose the dial. The transport
|
||||
# must omit the `reasoning` key for them while keeping the encrypted
|
||||
# reasoning content include so we can capture native reasoning tokens.
|
||||
# must omit the `reasoning` key for them. As of May 2026 we also no
|
||||
# longer request ``reasoning.encrypted_content`` back from xAI on ANY
|
||||
# model — see test_xai_reasoning_effort_passed for the rationale.
|
||||
|
||||
def test_xai_grok_4_omits_reasoning_effort(self, transport):
|
||||
"""grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400."""
|
||||
|
|
@ -237,8 +245,9 @@ class TestCodexBuildKwargs:
|
|||
assert "reasoning" not in kw, (
|
||||
f"{model} must not receive a reasoning key (xAI rejects it)"
|
||||
)
|
||||
# Still capture native reasoning tokens
|
||||
assert "reasoning.encrypted_content" in kw.get("include", [])
|
||||
# We no longer ask xAI for encrypted_content back (see comment
|
||||
# above) — verify the include list is empty.
|
||||
assert "reasoning.encrypted_content" not in kw.get("include", [])
|
||||
|
||||
def test_xai_grok_4_fast_omits_reasoning_effort(self, transport):
|
||||
"""grok-4-fast and grok-4-1-fast variants reject reasoning.effort."""
|
||||
|
|
|
|||
351
tests/run_agent/test_codex_xai_oauth_recovery.py
Normal file
351
tests/run_agent/test_codex_xai_oauth_recovery.py
Normal file
|
|
@ -0,0 +1,351 @@
|
|||
"""Regression tests for the May 2026 xAI OAuth (SuperGrok / X Premium) bugs.
|
||||
|
||||
Three distinct failure modes the user community hit during rollout:
|
||||
|
||||
1. ``RuntimeError("Expected to have received `response.created` before
|
||||
`error`")`` on multi-turn xAI OAuth conversations. The OpenAI SDK's
|
||||
Responses streaming state machine collapses an upstream ``error`` SSE
|
||||
frame into a generic stream-ordering error. ``_run_codex_stream``
|
||||
now treats this the same way it already treats the missing
|
||||
``response.completed`` postlude — fall back to a non-stream
|
||||
``responses.create(stream=True)`` which surfaces the real provider
|
||||
error. Also closes #8133 (``response.in_progress`` prelude on custom
|
||||
relays) and #14634 (``codex.rate_limits`` prelude on codex-lb).
|
||||
|
||||
2. The HTTP 403 entitlement error xAI returns when an OAuth token lacks
|
||||
SuperGrok / X Premium ("You have either run out of available
|
||||
resources or do not have an active Grok subscription") used to read
|
||||
as a confusing wall of JSON. ``_summarize_api_error`` now appends a
|
||||
one-line hint pointing the user at https://grok.com and ``/model``.
|
||||
|
||||
3. Multi-turn replay of ``codex_reasoning_items`` (with
|
||||
``encrypted_content``) is now suppressed for ``is_xai_responses=True``
|
||||
in ``_chat_messages_to_responses_input``. xAI's OAuth/SuperGrok
|
||||
surface rejects replayed encrypted reasoning items; Grok still
|
||||
reasons natively each turn, so coherence rides on visible message
|
||||
text.
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix A: prelude error fallback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_codex_agent():
|
||||
"""Build a minimal AIAgent wired for codex_responses streaming tests."""
|
||||
from run_agent import AIAgent
|
||||
|
||||
agent = AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://api.x.ai/v1",
|
||||
model="grok-4.3",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
agent.api_mode = "codex_responses"
|
||||
agent.provider = "xai-oauth"
|
||||
agent._interrupt_requested = False
|
||||
return agent
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"prelude_event_type",
|
||||
[
|
||||
"error", # xAI OAuth multi-turn
|
||||
"codex.rate_limits", # codex-lb relays (#14634)
|
||||
"response.in_progress", # custom Responses relays (#8133)
|
||||
],
|
||||
)
|
||||
def test_codex_stream_prelude_error_falls_back_to_create_stream(prelude_event_type):
|
||||
"""The SDK's prelude RuntimeError must trigger the non-stream fallback.
|
||||
|
||||
When the first SSE event isn't ``response.created``, openai-python
|
||||
raises RuntimeError before our event loop sees anything. We must
|
||||
detect that, retry once, then fall back to ``create(stream=True)``
|
||||
which surfaces the real provider error or a real response.
|
||||
"""
|
||||
agent = _make_codex_agent()
|
||||
|
||||
prelude_error = RuntimeError(
|
||||
f"Expected to have received `response.created` before `{prelude_event_type}`"
|
||||
)
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.side_effect = prelude_error
|
||||
|
||||
fallback_response = SimpleNamespace(
|
||||
output=[SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="fallback ok")],
|
||||
)],
|
||||
status="completed",
|
||||
)
|
||||
|
||||
with patch.object(
|
||||
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
|
||||
) as mock_fallback:
|
||||
result = agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
assert result is fallback_response
|
||||
mock_fallback.assert_called_once_with({}, client=mock_client)
|
||||
|
||||
|
||||
def test_codex_stream_prelude_error_retries_once_before_fallback():
|
||||
"""The retry path must fire one extra stream attempt before falling back."""
|
||||
agent = _make_codex_agent()
|
||||
|
||||
call_count = {"n": 0}
|
||||
|
||||
def stream_side_effect(**kwargs):
|
||||
call_count["n"] += 1
|
||||
raise RuntimeError(
|
||||
"Expected to have received `response.created` before `error`"
|
||||
)
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.side_effect = stream_side_effect
|
||||
|
||||
fallback_response = SimpleNamespace(output=[], status="completed")
|
||||
with patch.object(
|
||||
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
|
||||
) as mock_fallback:
|
||||
agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
# max_stream_retries=1 → one retry + final attempt → 2 stream calls,
|
||||
# THEN the fallback path runs.
|
||||
assert call_count["n"] == 2
|
||||
mock_fallback.assert_called_once()
|
||||
|
||||
|
||||
def test_codex_stream_unrelated_runtimeerror_still_raises():
|
||||
"""RuntimeErrors that aren't prelude/postlude shape must propagate."""
|
||||
agent = _make_codex_agent()
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.side_effect = RuntimeError("something else broke")
|
||||
|
||||
with patch.object(agent, "_run_codex_create_stream_fallback") as mock_fallback:
|
||||
with pytest.raises(RuntimeError, match="something else broke"):
|
||||
agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
mock_fallback.assert_not_called()
|
||||
|
||||
|
||||
def test_codex_stream_postlude_error_still_falls_back():
|
||||
"""Existing ``response.completed`` fallback must not regress."""
|
||||
agent = _make_codex_agent()
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.side_effect = RuntimeError(
|
||||
"Didn't receive a `response.completed` event."
|
||||
)
|
||||
|
||||
fallback_response = SimpleNamespace(output=[], status="completed")
|
||||
with patch.object(
|
||||
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
|
||||
) as mock_fallback:
|
||||
result = agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
assert result is fallback_response
|
||||
mock_fallback.assert_called_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix B: friendly entitlement message
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_summarize_api_error_decorates_xai_entitlement_403():
|
||||
"""xAI's OAuth 403 must end with the subscribe-or-switch hint."""
|
||||
from run_agent import AIAgent
|
||||
|
||||
error = RuntimeError(
|
||||
"HTTP 403: Error code: 403 - {'code': 'The caller does not have permission "
|
||||
"to execute the specified operation', 'error': 'You have either run out of "
|
||||
"available resources or do not have an active Grok subscription. Manage "
|
||||
"subscriptions at https://grok.com'}"
|
||||
)
|
||||
summary = AIAgent._summarize_api_error(error)
|
||||
assert "do not have an active Grok subscription" in summary
|
||||
assert "SuperGrok" in summary
|
||||
assert "/model" in summary
|
||||
assert "https://grok.com" in summary
|
||||
|
||||
|
||||
def test_summarize_api_error_decorates_xai_body_message():
|
||||
"""SDK-style error with structured body must also get the hint."""
|
||||
from run_agent import AIAgent
|
||||
|
||||
class _XaiErr(Exception):
|
||||
status_code = 403
|
||||
body = {
|
||||
"error": {
|
||||
"message": (
|
||||
"You have either run out of available resources or do "
|
||||
"not have an active Grok subscription. Manage at "
|
||||
"https://grok.com"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
summary = AIAgent._summarize_api_error(_XaiErr("403"))
|
||||
assert "HTTP 403" in summary
|
||||
assert "SuperGrok / X Premium" in summary
|
||||
|
||||
|
||||
def test_summarize_api_error_idempotent_for_entitlement_hint():
|
||||
"""Decorating twice must not double up the hint."""
|
||||
from run_agent import AIAgent
|
||||
|
||||
raw = "HTTP 403: do not have an active Grok subscription"
|
||||
once = AIAgent._decorate_xai_entitlement_error(raw)
|
||||
twice = AIAgent._decorate_xai_entitlement_error(once)
|
||||
assert once == twice
|
||||
|
||||
|
||||
def test_summarize_api_error_passes_through_unrelated_errors():
|
||||
"""Non-xAI / non-entitlement errors must not be touched."""
|
||||
from run_agent import AIAgent
|
||||
|
||||
error = RuntimeError("HTTP 500: upstream is sad")
|
||||
summary = AIAgent._summarize_api_error(error)
|
||||
assert "SuperGrok" not in summary
|
||||
assert "grok.com" not in summary
|
||||
assert "upstream is sad" in summary
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix C: reasoning replay gating for xai-oauth
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _assistant_msg_with_encrypted_reasoning(text="hi from grok", encrypted="enc_blob"):
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": text,
|
||||
"codex_reasoning_items": [
|
||||
{
|
||||
"type": "reasoning",
|
||||
"id": "rs_xai_001",
|
||||
"encrypted_content": encrypted,
|
||||
"summary": [],
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_codex_reasoning_replay_default_includes_encrypted_content():
|
||||
"""Native Codex backend (default) must still replay encrypted reasoning."""
|
||||
from agent.codex_responses_adapter import _chat_messages_to_responses_input
|
||||
|
||||
msgs = [
|
||||
{"role": "user", "content": "hi"},
|
||||
_assistant_msg_with_encrypted_reasoning(),
|
||||
{"role": "user", "content": "what's your name?"},
|
||||
]
|
||||
|
||||
items = _chat_messages_to_responses_input(msgs)
|
||||
reasoning = [it for it in items if it.get("type") == "reasoning"]
|
||||
assert len(reasoning) == 1
|
||||
assert reasoning[0]["encrypted_content"] == "enc_blob"
|
||||
|
||||
|
||||
def test_codex_reasoning_replay_stripped_for_xai_oauth():
|
||||
"""xAI OAuth surface must NOT receive replayed encrypted reasoning."""
|
||||
from agent.codex_responses_adapter import _chat_messages_to_responses_input
|
||||
|
||||
msgs = [
|
||||
{"role": "user", "content": "hi"},
|
||||
_assistant_msg_with_encrypted_reasoning(),
|
||||
{"role": "user", "content": "what's your name?"},
|
||||
]
|
||||
|
||||
items = _chat_messages_to_responses_input(msgs, is_xai_responses=True)
|
||||
reasoning = [it for it in items if it.get("type") == "reasoning"]
|
||||
assert reasoning == []
|
||||
|
||||
# The assistant's visible text must still survive — coherence across
|
||||
# turns rides on the message text alone.
|
||||
assistant_items = [
|
||||
it for it in items
|
||||
if it.get("role") == "assistant" or it.get("type") == "message"
|
||||
]
|
||||
assert assistant_items, "assistant message must still be present"
|
||||
|
||||
|
||||
def test_codex_transport_xai_request_omits_encrypted_content_include():
|
||||
"""Verify the xAI ``include`` array no longer requests encrypted reasoning."""
|
||||
from agent.transports.codex import ResponsesApiTransport
|
||||
|
||||
transport = ResponsesApiTransport()
|
||||
kwargs = transport.build_kwargs(
|
||||
model="grok-4.3",
|
||||
messages=[
|
||||
{"role": "system", "content": "you are a helpful assistant"},
|
||||
{"role": "user", "content": "hi"},
|
||||
],
|
||||
tools=None,
|
||||
instructions="you are a helpful assistant",
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
is_xai_responses=True,
|
||||
)
|
||||
# Without this gate, xAI would echo back encrypted_content blobs we'd
|
||||
# then store in codex_reasoning_items and replay next turn — which is
|
||||
# exactly the multi-turn failure mode we're closing.
|
||||
assert kwargs["include"] == []
|
||||
|
||||
|
||||
def test_codex_transport_xai_strips_replayed_reasoning_in_input():
|
||||
"""End-to-end: build_kwargs on xai-oauth must strip prior reasoning."""
|
||||
from agent.transports.codex import ResponsesApiTransport
|
||||
|
||||
transport = ResponsesApiTransport()
|
||||
kwargs = transport.build_kwargs(
|
||||
model="grok-4.3",
|
||||
messages=[
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "hi"},
|
||||
_assistant_msg_with_encrypted_reasoning(text="hi from grok"),
|
||||
{"role": "user", "content": "what's your name?"},
|
||||
],
|
||||
tools=None,
|
||||
instructions="sys",
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
is_xai_responses=True,
|
||||
)
|
||||
input_items = kwargs["input"]
|
||||
reasoning_items = [it for it in input_items if it.get("type") == "reasoning"]
|
||||
assert reasoning_items == []
|
||||
|
||||
|
||||
def test_codex_transport_native_codex_still_replays_reasoning_in_input():
|
||||
"""Regression guard: openai-codex must keep the existing replay path."""
|
||||
from agent.transports.codex import ResponsesApiTransport
|
||||
|
||||
transport = ResponsesApiTransport()
|
||||
kwargs = transport.build_kwargs(
|
||||
model="gpt-5-codex",
|
||||
messages=[
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "hi"},
|
||||
_assistant_msg_with_encrypted_reasoning(text="hi from codex"),
|
||||
{"role": "user", "content": "next"},
|
||||
],
|
||||
tools=None,
|
||||
instructions="sys",
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
is_xai_responses=False,
|
||||
)
|
||||
input_items = kwargs["input"]
|
||||
reasoning_items = [it for it in input_items if it.get("type") == "reasoning"]
|
||||
assert len(reasoning_items) == 1
|
||||
assert reasoning_items[0]["encrypted_content"] == "enc_blob"
|
||||
# Native Codex still asks for encrypted_content back.
|
||||
assert "reasoning.encrypted_content" in kwargs.get("include", [])
|
||||
Loading…
Add table
Add a link
Reference in a new issue