fix(codex-responses): gracefully recover from invalid_encrypted_content (salvage #10144) (#33035)

* fix(codex-responses): gracefully recover from invalid_encrypted_content (salvage #10144)

When an OpenAI-compatible Responses API surface accepts an initial
request but later rejects the replayed `codex_reasoning_items`
encrypted blob with HTTP 400 `invalid_encrypted_content`, the
session previously got stuck retrying the same poisoned payload.

Recovery: classify the error as a dedicated FailoverReason, and on the
first hit disable encrypted reasoning replay for the rest of the
session, strip cached items from message history, and retry once.

Changes:
* error_classifier: add FailoverReason.invalid_encrypted_content
  branch in _classify_400 (before context_overflow so the messages
  that mention 'encrypted content … could not be verified' don't trip
  context heuristics), in _classify_by_error_code, and extend
  _extract_error_code to peek inside wrapped JSON in error.message and
  ignore the bare '400' as a code.
* agent_init: initialize `_codex_reasoning_replay_enabled = True` on
  every agent.
* run_agent: add AIAgent._disable_codex_reasoning_replay() helper
  that flips the flag and pops cached items.
* codex_responses_adapter: thread a `replay_encrypted_reasoning`
  kwarg through _chat_messages_to_responses_input so that when the
  flag is False we don't replay codex_reasoning_items.
* transports/codex.py: read `replay_encrypted_reasoning` from params,
  thread it into the adapter, and gate the
  `include=['reasoning.encrypted_content']` request hint on it.
* chat_completion_helpers: pass the agent's replay flag through to
  the transport.
* conversation_loop: in the retry loop, add an
  invalid_encrypted_content recovery branch that fires once per
  session, only when api_mode == codex_responses, only when replay is
  still enabled, and only when at least one assistant message in
  history actually carries cached reasoning items (otherwise the 400
  has nothing to do with our cache and the normal retry path handles
  it).

Tests:
* test_error_classifier: new wrapped-JSON _extract_error_code case;
  new TestClassifyApiError cases proving the 400 is retryable with
  no fallback, that the broad message match doesn't catch a generic
  'parsed' message, and that the error code match is
  case-insensitive.
* test_run_agent_codex_responses: end-to-end test of the recovery
  branch firing once and disabling replay, plus a sibling test that
  proves the branch does *not* fire (and the flag stays True) when
  history has no cached reasoning items.

Salvages PR #10144 onto the post-refactor module layout
(error_classifier / codex_responses_adapter / transports/codex /
conversation_loop / agent_init) since the original diff was written
against the pre-refactor monolithic run_agent.py.

* chore(release): map victorGPT in AUTHOR_MAP for #10144 salvage

---------

Co-authored-by: victorGPT <wuxuebin1993@gmail.com>
This commit is contained in:
Teknium 2026-05-26 22:01:17 -07:00 committed by GitHub
parent 9d3e9316f4
commit b6ca56f651
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 342 additions and 5 deletions

View file

@ -1005,6 +1005,13 @@ def init_agent(
# Track conversation messages for session logging
agent._session_messages: List[Dict[str, Any]] = []
# Responses encrypted reasoning replay state. Some OpenAI-compatible
# routes accept GPT-5 Responses requests but later reject replayed
# encrypted reasoning blobs (HTTP 400 ``invalid_encrypted_content``).
# When that happens we disable replay for the rest of the session and
# fall back to stateless continuity. See
# agent/conversation_loop.py's invalid_encrypted_content retry branch.
agent._codex_reasoning_replay_enabled = True
agent._memory_write_origin = "assistant_tool"
agent._memory_write_context = "foreground"

View file

@ -507,6 +507,9 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
is_codex_backend=is_codex_backend,
is_xai_responses=is_xai_responses,
github_reasoning_extra=agent._github_models_reasoning_extra_body() if is_github_responses else None,
replay_encrypted_reasoning=bool(
getattr(agent, "_codex_reasoning_replay_enabled", True)
),
)
# ── chat_completions (default) ─────────────────────────────────────

View file

@ -248,6 +248,7 @@ def _chat_messages_to_responses_input(
messages: List[Dict[str, Any]],
*,
is_xai_responses: bool = False,
replay_encrypted_reasoning: bool = True,
) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items.
@ -261,6 +262,14 @@ def _chat_messages_to_responses_input(
integration). We now replay encrypted reasoning on every Responses
transport (xAI, native Codex, custom relays) and let xAI tell us
explicitly if a specific surface ever rejects a payload.
``replay_encrypted_reasoning`` is the per-session kill switch. Some
OpenAI-compatible relays accept the request but later reject the
replayed encrypted blob with HTTP 400 ``invalid_encrypted_content``;
when that happens the retry loop calls
``AIAgent._disable_codex_reasoning_replay`` which both strips cached
items from the conversation history and threads ``replay_enabled=False``
through this converter so subsequent turns send no reasoning items.
"""
items: List[Dict[str, Any]] = []
seen_item_ids: set = set()
@ -290,7 +299,11 @@ def _chat_messages_to_responses_input(
# This applies to every Responses transport including
# xAI — see _chat_messages_to_responses_input docstring
# for the May 2026 reversal of the earlier xAI gate.
codex_reasoning = msg.get("codex_reasoning_items")
codex_reasoning = (
msg.get("codex_reasoning_items")
if replay_encrypted_reasoning
else None
)
has_codex_reasoning = False
if isinstance(codex_reasoning, list):
for ri in codex_reasoning:

View file

@ -1019,6 +1019,7 @@ def run_conversation(
nous_auth_retry_attempted=False
copilot_auth_retry_attempted=False
thinking_sig_retry_attempted = False
invalid_encrypted_content_retry_attempted = False
image_shrink_retry_attempted = False
multimodal_tool_content_retry_attempted = False
oauth_1m_beta_retry_attempted = False
@ -2296,6 +2297,49 @@ def run_conversation(
)
continue
# ── Invalid encrypted reasoning replay recovery ───────
# OpenAI Responses API surfaces (and some compatible relays)
# return HTTP 400 ``invalid_encrypted_content`` when a
# replayed ``codex_reasoning_items`` blob from a previous
# turn fails verification (provider rotated the encryption
# key, the route doesn't actually persist reasoning state,
# etc.). Recovery: disable replay for the rest of the
# session, strip cached items from history, retry once.
# One-shot — if a second 400 fires we fall through to the
# normal retry/backoff path. Only fires for codex_responses
# mode with at least one assistant message that has cached
# ``codex_reasoning_items``; without replay state, the
# error is unrelated to our cache so the normal retry path
# handles it (the provider is rejecting something else).
if (
classified.reason == FailoverReason.invalid_encrypted_content
and not invalid_encrypted_content_retry_attempted
and agent.api_mode == "codex_responses"
and bool(getattr(agent, "_codex_reasoning_replay_enabled", True))
and any(
isinstance(_m, dict)
and _m.get("role") == "assistant"
and isinstance(_m.get("codex_reasoning_items"), list)
and _m.get("codex_reasoning_items")
for _m in messages
)
):
invalid_encrypted_content_retry_attempted = True
replay_stats = agent._disable_codex_reasoning_replay(messages)
agent._vprint(
f"{agent.log_prefix}⚠️ Encrypted reasoning replay was rejected by the provider — "
f"disabled replay and stripped {replay_stats['items']} item(s) from "
f"{replay_stats['messages']} message(s), retrying...",
force=True,
)
logger.warning(
"%sInvalid encrypted reasoning recovery: disabled replay and stripped %d items from %d messages",
agent.log_prefix,
replay_stats["items"],
replay_stats["messages"],
)
continue
# ── llama.cpp grammar-parse recovery ──────────────────
# llama.cpp's ``json-schema-to-grammar`` converter rejects
# regex escape classes (``\d``, ``\w``, ``\s``) and most

View file

@ -50,6 +50,7 @@ class FailoverReason(enum.Enum):
# Request format
format_error = "format_error" # 400 bad request — abort or strip + retry
invalid_encrypted_content = "invalid_encrypted_content" # Responses replay blob rejected — strip replay state and retry
multimodal_tool_content_unsupported = "multimodal_tool_content_unsupported" # Provider rejected list-type content in tool messages (e.g. Xiaomi MiMo) — downgrade to text and retry
# Provider-specific
@ -865,6 +866,26 @@ def _classify_400(
retryable=True,
)
# Invalid encrypted reasoning replay blob (OpenAI Responses API). Must be
# checked BEFORE context_overflow because some surfaces emit messages that
# contain context-like phrasing ("encrypted content … could not be
# verified") which could otherwise trip the context_overflow heuristics.
# ``error_msg`` is lowercased upstream — match accordingly.
error_code_lower = (error_code or "").lower()
if (
error_code_lower == "invalid_encrypted_content"
or "invalid_encrypted_content" in error_msg
or (
"encrypted content for item" in error_msg
and "could not be verified" in error_msg
)
):
return result_fn(
FailoverReason.invalid_encrypted_content,
retryable=True,
should_fallback=False,
)
# Context overflow from 400
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
return result_fn(
@ -974,6 +995,13 @@ def _classify_by_error_code(
should_compress=True,
)
if code_lower == "invalid_encrypted_content":
return result_fn(
FailoverReason.invalid_encrypted_content,
retryable=True,
should_fallback=False,
)
return None
@ -1141,15 +1169,49 @@ def _extract_error_code(body: dict) -> str:
"""Extract an error code string from the response body."""
if not body:
return ""
def _code_from_payload(payload) -> str:
"""Extract a code/type from a nested error payload dict (defensive)."""
if not isinstance(payload, dict):
return ""
payload_error = payload.get("error", {})
if isinstance(payload_error, dict):
nested = payload_error.get("code") or payload_error.get("type") or ""
if isinstance(nested, str) and nested.strip() and nested.strip() != "400":
return nested.strip()
code = payload.get("code") or payload.get("error_code") or ""
if isinstance(code, (str, int)):
text = str(code).strip()
if text and text != "400":
return text
return ""
error_obj = body.get("error", {})
if isinstance(error_obj, dict):
code = error_obj.get("code") or error_obj.get("type") or ""
if isinstance(code, str) and code.strip():
if isinstance(code, str) and code.strip() and code.strip() != "400":
return code.strip()
# Some providers wrap the real JSON error body as a string inside
# error.message — peek into it for a nested code (e.g. Responses API
# surfaces ``invalid_encrypted_content`` this way).
message = error_obj.get("message")
if isinstance(message, str) and message.strip().startswith("{"):
import json
try:
inner = json.loads(message)
except (json.JSONDecodeError, TypeError):
inner = None
nested_code = _code_from_payload(inner)
if nested_code:
return nested_code
# Top-level code
code = body.get("code") or body.get("error_code") or ""
if isinstance(code, (str, int)):
return str(code).strip()
text = str(code).strip()
if text and text != "400":
return text
return ""

View file

@ -27,6 +27,9 @@ class ResponsesApiTransport(ProviderTransport):
return _chat_messages_to_responses_input(
messages,
is_xai_responses=bool(kwargs.get("is_xai_responses")),
replay_encrypted_reasoning=bool(
kwargs.get("replay_encrypted_reasoning", True)
),
)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
@ -79,6 +82,9 @@ class ResponsesApiTransport(ProviderTransport):
is_github_responses = params.get("is_github_responses", False)
is_codex_backend = params.get("is_codex_backend", False)
is_xai_responses = params.get("is_xai_responses", False)
replay_encrypted_reasoning = bool(
params.get("replay_encrypted_reasoning", True)
)
# Resolve reasoning effort
reasoning_effort = "medium"
@ -100,6 +106,7 @@ class ResponsesApiTransport(ProviderTransport):
"input": _chat_messages_to_responses_input(
payload_messages,
is_xai_responses=is_xai_responses,
replay_encrypted_reasoning=replay_encrypted_reasoning,
),
"tools": response_tools,
"store": False,
@ -121,7 +128,9 @@ class ResponsesApiTransport(ProviderTransport):
# replay them on subsequent turns for cross-turn coherence.
# See agent/codex_responses_adapter._chat_messages_to_responses_input
# for the May 2026 reversal of the earlier suppression gate.
kwargs["include"] = ["reasoning.encrypted_content"]
kwargs["include"] = (
["reasoning.encrypted_content"] if replay_encrypted_reasoning else []
)
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
# those models reason natively. Only send the effort dial when
@ -136,7 +145,9 @@ class ResponsesApiTransport(ProviderTransport):
kwargs["reasoning"] = github_reasoning
else:
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
kwargs["include"] = ["reasoning.encrypted_content"]
kwargs["include"] = (
["reasoning.encrypted_content"] if replay_encrypted_reasoning else []
)
elif not is_github_responses and not is_xai_responses:
kwargs["include"] = []