refactor(codex): drop SDK responses.stream() helper; consume events directly (#33042)

* refactor(codex): drop SDK responses.stream() helper; consume events directly

The OpenAI Python SDK's high-level `client.responses.stream(...)` helper
does post-hoc typed reconstruction from the terminal
`response.completed.response.output` field.  The chatgpt.com Codex
backend has been observed (today, gpt-5.5) to ship `response.output =
null` on terminal frames, which crashes the SDK with `TypeError:
'NoneType' object is not iterable` mid-iteration.

Carlton's #32963 patched the symptom by wrapping the helper in
try/except and recovering from the same per-event accumulator the SDK
was supposed to populate.  This PR removes the helper from the call
path entirely: we now use `client.responses.create(stream=True)` (raw
AsyncIterable of SSE events) and assemble the final response object
ourselves from `response.output_item.done` events as they arrive.  The
terminal event's `output` field is never read for content.  Same
strategy OpenClaw uses for the same backend.

This makes Hermes structurally immune to the bug class, not patched.
The next time OpenAI ships a shape change to chatgpt.com's terminal
frame, our consumer keeps working because it doesn't read that frame
for content — only for usage/status/id.

Changes
- `agent/codex_runtime.py`: new `_consume_codex_event_stream()` shared
  consumer; `run_codex_stream()` uses `responses.create(stream=True)`;
  `run_codex_create_stream_fallback()` collapses into a thin alias
  since the primary path now does what the fallback used to do.
- `agent/auxiliary_client.py`: `_CodexCompletionsAdapter` uses the
  same consumer; old null-output recovery helpers deleted as
  unreferenced.
- Tests migrated: fixtures that mocked `responses.stream` now mock
  `responses.create` returning a raw iterable.  New regression test
  asserts the auxiliary path returns streamed items even when the
  terminal event's `output` is literally `null`.

Validation
- Live: tested against fresh OAuth on `chatgpt.com/backend-api/codex`
  with `gpt-5.5` — response built correctly with `response.output=null`
  on the terminal frame, all events consumed, usage/reasoning tokens
  propagated.
- `tests/run_agent/test_run_agent_codex_responses.py` +
  `tests/agent/test_auxiliary_client.py`: 242 passed.

* test+fix(codex): migrate streaming tests, raise on truncated streams

CI surfaced 10 test failures across tests/run_agent/test_streaming.py
and tests/run_agent/test_codex_xai_oauth_recovery.py — both files had
their own `responses.stream(...)` mocks I missed in the first sweep.

agent/codex_runtime.py: _consume_codex_event_stream() now raises
"Codex Responses stream did not emit a terminal response" when the
stream ends without any terminal frame AND no usable content. This
preserves the signal callers used to get from the SDK's high-level
helper, which they distinguished from "completed with empty body"
in error handling.

Tests migrated:
- test_streaming.py: text-delta callback, activity-touch, and
  remote-protocol-error tests all switch from mocking responses.stream
  to responses.create returning an iterable of events.
- test_codex_xai_oauth_recovery.py: prelude-error tests are recast as
  wire-error-event tests (the new path raises _StreamErrorEvent
  directly when the wire emits type=error, which is strictly better
  than the old two-phase "SDK RuntimeError → retry → fallback"). The
  retry-on-transport-error test moves from responses.stream side-effect
  to responses.create side-effect.

Verified live against chatgpt.com Codex with gpt-5.5 — AIAgent.chat()
through the full codex_responses path returns correctly, 319/319
targeted tests passing.
This commit is contained in:
Teknium 2026-05-27 00:30:06 -07:00 committed by GitHub
parent fb298a958c
commit cb38ce28cb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 645 additions and 725 deletions

View file

@ -107,32 +107,6 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
logger = logging.getLogger(__name__)
def _responses_null_output_iterable_error(exc: BaseException) -> bool:
"""True when the OpenAI SDK trips over terminal response.output=None."""
text = str(exc)
return isinstance(exc, TypeError) and "NoneType" in text and "not iterable" in text
def _responses_backfilled_response(output_items: List[Any], text_parts: List[str], *, has_function_calls: bool, model: str = None) -> Optional[Any]:
"""Build a minimal Responses-like object from already streamed events."""
if output_items:
return SimpleNamespace(output=list(output_items), usage=None, status="completed", model=model)
if text_parts and not has_function_calls:
assembled = "".join(text_parts)
return SimpleNamespace(
output=[SimpleNamespace(
type="message",
role="assistant",
status="completed",
content=[SimpleNamespace(type="output_text", text=assembled)],
)],
usage=None,
status="completed",
model=model,
)
return None
def _safe_isinstance(obj: Any, maybe_type: Any) -> bool:
"""Return False instead of raising when a patched symbol is not a type."""
try:
@ -811,77 +785,53 @@ class _CodexCompletionsAdapter:
pass
try:
# Collect output items and text deltas during streaming —
# the Codex backend can return empty response.output from
# get_final_response() even when items were streamed.
collected_output_items: List[Any] = []
collected_text_deltas: List[str] = []
has_function_calls = False
if total_timeout:
timeout_timer = threading.Timer(float(total_timeout), _close_client_on_timeout)
timeout_timer.daemon = True
timeout_timer.start()
_check_cancelled()
final = None
with self._client.responses.stream(**resp_kwargs) as stream:
try:
for _event in stream:
_check_cancelled()
_etype = getattr(_event, "type", "")
if _etype == "response.output_item.done":
_done = getattr(_event, "item", None)
if _done is not None:
collected_output_items.append(_done)
elif "output_text.delta" in _etype:
_delta = getattr(_event, "delta", "")
if _delta:
collected_text_deltas.append(_delta)
elif "function_call" in _etype:
has_function_calls = True
_check_cancelled()
final = stream.get_final_response()
except TypeError as exc:
if not _responses_null_output_iterable_error(exc):
raise
final = _responses_backfilled_response(
collected_output_items,
collected_text_deltas,
has_function_calls=has_function_calls,
model=resp_kwargs.get("model"),
)
if final is None:
raise
logger.debug(
"Codex auxiliary Responses stream parser hit response.output=None; "
"recovered from streamed events (items=%d, text_parts=%d)",
len(collected_output_items),
len(collected_text_deltas),
)
# Event-driven Responses streaming via the low-level
# ``responses.create(stream=True)`` path. The high-level
# ``responses.stream(...)`` helper does post-hoc typed
# reconstruction from ``response.completed.response.output``,
# which the chatgpt.com Codex backend has been observed to
# return as ``null`` (gpt-5.5, May 2026) — that crashes the SDK
# with ``TypeError: 'NoneType' object is not iterable``.
# Consuming raw events and assembling the final response
# ourselves from ``response.output_item.done`` makes us
# structurally immune to that drift.
from agent.codex_runtime import _consume_codex_event_stream
stream_kwargs = dict(resp_kwargs)
stream_kwargs["stream"] = True
def _on_each_event(_event: Any) -> None:
# Re-check timeout/cancellation per event, matching the
# cadence the old in-line ``_check_cancelled()`` used.
_check_cancelled()
event_stream = self._client.responses.create(**stream_kwargs)
try:
final = _consume_codex_event_stream(
event_stream,
model=resp_kwargs.get("model"),
on_event=_on_each_event,
)
finally:
close_fn = getattr(event_stream, "close", None)
if callable(close_fn):
try:
close_fn()
except Exception:
pass
if final is None:
raise RuntimeError("Codex auxiliary Responses stream did not return a final response")
# Backfill empty output from collected stream events
_output = getattr(final, "output", None)
if _output is None or (isinstance(_output, list) and not _output):
recovered = _responses_backfilled_response(
collected_output_items,
collected_text_deltas,
has_function_calls=has_function_calls,
model=resp_kwargs.get("model"),
)
if recovered is not None:
final.output = recovered.output
logger.debug(
"Codex auxiliary: backfilled missing output from stream events "
"(items=%d, text_parts=%d)",
len(collected_output_items),
len(collected_text_deltas),
)
# Extract text and tool calls from the Responses output.
# Items may be SDK objects (attrs) or dicts (raw/fallback paths),
# so use a helper that handles both shapes.
# Items may be SimpleNamespace (raw-event path) or dicts
# (some legacy fallback paths), so handle both shapes.
def _item_get(obj: Any, key: str, default: Any = None) -> Any:
val = getattr(obj, key, None)
if val is None and isinstance(obj, dict):
@ -908,9 +858,12 @@ class _CodexCompletionsAdapter:
resp_usage = getattr(final, "usage", None)
if resp_usage:
usage = SimpleNamespace(
prompt_tokens=getattr(resp_usage, "input_tokens", 0),
completion_tokens=getattr(resp_usage, "output_tokens", 0),
total_tokens=getattr(resp_usage, "total_tokens", 0),
prompt_tokens=getattr(resp_usage, "input_tokens", 0)
or (resp_usage.get("input_tokens", 0) if isinstance(resp_usage, dict) else 0),
completion_tokens=getattr(resp_usage, "output_tokens", 0)
or (resp_usage.get("output_tokens", 0) if isinstance(resp_usage, dict) else 0),
total_tokens=getattr(resp_usage, "total_tokens", 0)
or (resp_usage.get("total_tokens", 0) if isinstance(resp_usage, dict) else 0),
)
except Exception as exc:
if timed_out.is_set():

View file

@ -174,332 +174,363 @@ def run_codex_app_server_turn(
}
# ---------------------------------------------------------------------------
# Event-driven Responses streaming
#
# OpenAI ships its consumer Codex backend (chatgpt.com/backend-api/codex) on
# a different schedule from the openai Python SDK. The high-level
# ``client.responses.stream(...)`` helper reconstructs a typed Response from
# the terminal ``response.completed`` event's ``response.output`` field, and
# when that field drifts to ``null`` (gpt-5.5, May 2026) the SDK raises
# ``TypeError: 'NoneType' object is not iterable`` mid-iteration.
#
# We sidestep the whole class of failure by going one level lower:
# ``client.responses.create(stream=True)`` returns the raw AsyncIterable of
# SSE events, and we assemble the final response object purely from
# ``response.output_item.done`` events as they arrive. We never read
# ``response.completed.response.output`` for content reconstruction, so the
# backend can return ``null``, ``[]``, a string, or omit the field entirely
# and we don't care.
#
# This mirrors what the OpenClaw TS implementation does for the same backend
# and is structurally immune to the bug class rather than patched.
# ---------------------------------------------------------------------------
def _responses_null_output_iterable_error(exc: BaseException) -> bool:
"""True when the OpenAI SDK trips over terminal response.output=None."""
text = str(exc)
return isinstance(exc, TypeError) and "NoneType" in text and "not iterable" in text
_TERMINAL_EVENT_TYPES = frozenset({
"response.completed",
"response.incomplete",
"response.failed",
})
def _codex_backfilled_response(output_items: list, text_parts: list, *, has_tool_calls: bool, model: str = None):
"""Build a minimal Responses-like object from events already streamed."""
if output_items:
return SimpleNamespace(
output=list(output_items),
usage=None,
def _event_field(event: Any, name: str, default: Any = None) -> Any:
"""Field access that handles both attr-style (SDK objects) and dict (raw JSON) events."""
value = getattr(event, name, None)
if value is None and isinstance(event, dict):
value = event.get(name, default)
return value if value is not None else default
def _raise_stream_error(event: Any) -> None:
"""Raise a ``_StreamErrorEvent`` from a ``type=error`` SSE frame.
Imported lazily so this module stays importable from places that don't
pull in ``run_agent`` (e.g. plugin code, doc tools).
"""
from run_agent import _StreamErrorEvent
message = (_event_field(event, "message", "") or "stream emitted error event").strip()
raise _StreamErrorEvent(
message,
code=_event_field(event, "code"),
param=_event_field(event, "param"),
)
def _consume_codex_event_stream(
event_iter: Any,
*,
model: str,
on_text_delta=None,
on_reasoning_delta=None,
on_first_delta=None,
on_event=None,
interrupt_check=None,
) -> SimpleNamespace:
"""Consume a Codex Responses SSE event stream and return a final response.
The returned object is a ``SimpleNamespace`` shaped like the SDK's typed
``Response`` for the fields downstream code actually reads:
* ``output``: list of output items, assembled from ``response.output_item.done``.
For tool-call turns this contains the function_call items; for plain-text
turns it contains a synthesized ``message`` item built from streamed deltas
if no message item was emitted directly.
* ``output_text``: assembled text from ``response.output_text.delta`` deltas.
* ``usage``: copied from the terminal event's ``response.usage`` (when present).
* ``status``: ``completed`` / ``incomplete`` / ``failed`` (or ``completed`` if
the stream ended without a terminal frame but produced content).
* ``id``: ``response.id`` when present.
* ``incomplete_details``: passed through for ``response.incomplete`` frames.
* ``error``: passed through for ``response.failed`` frames.
* ``model``: from kwargs (the wire model name is not authoritative).
Critically, we never read ``response.output`` from the terminal event for
content reconstruction only ``usage``, ``status``, ``id``. That field
being ``null`` / ``[]`` / missing is fine.
Callbacks:
* ``on_text_delta(str)`` fires per ``response.output_text.delta``, suppressed
once a function_call event is seen (so tool-call turns don't bleed text
into the chat).
* ``on_reasoning_delta(str)`` fires per ``response.reasoning.*.delta``.
* ``on_first_delta()`` one-shot, fires on the first text delta only.
* ``on_event(event)`` fires for every event before any other processing.
Used for watchdog activity, debug logging, anything wire-shape-agnostic.
* ``interrupt_check()`` returns True to break the loop early.
"""
collected_output_items: List[Any] = []
collected_text_deltas: List[str] = []
has_tool_calls = False
first_delta_fired = False
terminal_status: str = "completed"
terminal_usage: Any = None
terminal_response_id: str = None
terminal_incomplete_details: Any = None
terminal_error: Any = None
saw_terminal = False
for event in event_iter:
if on_event is not None:
try:
on_event(event)
except (TimeoutError, InterruptedError):
# Control-flow signals from watchdog/cancellation hooks must
# propagate, not get swallowed as "debug noise".
raise
except Exception:
# Genuine bugs in third-party debug/log hooks shouldn't break
# stream consumption.
logger.debug("Codex stream on_event hook raised", exc_info=True)
if interrupt_check is not None and interrupt_check():
break
event_type = _event_field(event, "type", "")
if not isinstance(event_type, str):
event_type = ""
# ``error`` SSE frames carry the provider's real failure reason
# (subscription / quota / model-not-available / rejected-reasoning-replay)
# but never appear in the terminal set. Surface them as a structured
# exception so the credential pool + error classifier see the body.
if event_type == "error":
_raise_stream_error(event)
if "output_text.delta" in event_type or event_type == "response.output_text.delta":
delta_text = _event_field(event, "delta", "")
if delta_text:
collected_text_deltas.append(delta_text)
if not has_tool_calls:
if not first_delta_fired:
first_delta_fired = True
if on_first_delta is not None:
try:
on_first_delta()
except Exception:
logger.debug("Codex stream on_first_delta raised", exc_info=True)
if on_text_delta is not None:
try:
on_text_delta(delta_text)
except Exception:
logger.debug("Codex stream on_text_delta raised", exc_info=True)
continue
if "function_call" in event_type:
has_tool_calls = True
# fall through — function_call items still get added on output_item.done
if "reasoning" in event_type and "delta" in event_type:
reasoning_text = _event_field(event, "delta", "")
if reasoning_text and on_reasoning_delta is not None:
try:
on_reasoning_delta(reasoning_text)
except Exception:
logger.debug("Codex stream on_reasoning_delta raised", exc_info=True)
continue
if event_type == "response.output_item.done":
done_item = _event_field(event, "item")
if done_item is not None:
collected_output_items.append(done_item)
continue
if event_type in _TERMINAL_EVENT_TYPES:
saw_terminal = True
resp_obj = _event_field(event, "response")
if resp_obj is not None:
terminal_usage = getattr(resp_obj, "usage", None)
if terminal_usage is None and isinstance(resp_obj, dict):
terminal_usage = resp_obj.get("usage")
rid = getattr(resp_obj, "id", None)
if rid is None and isinstance(resp_obj, dict):
rid = resp_obj.get("id")
terminal_response_id = rid
rstatus = getattr(resp_obj, "status", None)
if rstatus is None and isinstance(resp_obj, dict):
rstatus = resp_obj.get("status")
if isinstance(rstatus, str):
terminal_status = rstatus
if event_type == "response.incomplete":
terminal_incomplete_details = getattr(resp_obj, "incomplete_details", None)
if terminal_incomplete_details is None and isinstance(resp_obj, dict):
terminal_incomplete_details = resp_obj.get("incomplete_details")
if event_type == "response.failed":
terminal_error = getattr(resp_obj, "error", None)
if terminal_error is None and isinstance(resp_obj, dict):
terminal_error = resp_obj.get("error")
if event_type == "response.completed":
terminal_status = terminal_status or "completed"
elif event_type == "response.incomplete":
terminal_status = terminal_status or "incomplete"
elif event_type == "response.failed":
terminal_status = terminal_status or "failed"
# Stop on terminal event.
break
# Build the final output list. Prefer items observed via output_item.done;
# if none arrived but we streamed plain text deltas (no tool calls), synthesize
# a single message item so downstream normalization has something to work with.
if collected_output_items:
output = list(collected_output_items)
elif collected_text_deltas and not has_tool_calls:
assembled = "".join(collected_text_deltas)
output = [SimpleNamespace(
type="message",
role="assistant",
status="completed",
model=model,
content=[SimpleNamespace(type="output_text", text=assembled)],
)]
else:
output = []
# If the stream ended without any terminal event AND produced no usable
# content (no items, no text deltas), surface that as a RuntimeError so
# callers can distinguish "stream truncated mid-flight / provider rejected
# the call" from "stream completed with empty body". This preserves the
# signal the SDK's high-level helper used to raise as
# ``RuntimeError("Didn't receive a `response.completed` event.")``.
if not saw_terminal and not output:
raise RuntimeError(
"Codex Responses stream did not emit a terminal response"
)
if text_parts and not has_tool_calls:
assembled = "".join(text_parts)
return SimpleNamespace(
output=[SimpleNamespace(
type="message",
role="assistant",
status="completed",
content=[SimpleNamespace(type="output_text", text=assembled)],
)],
usage=None,
status="completed",
model=model,
)
return None
assembled_text = "".join(collected_text_deltas)
final = SimpleNamespace(
output=output,
output_text=assembled_text,
usage=terminal_usage,
status=terminal_status,
id=terminal_response_id,
model=model,
incomplete_details=terminal_incomplete_details,
error=terminal_error,
)
return final
def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
"""Execute one streaming Responses API request and return the final response."""
def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta=None):
"""Execute one streaming Responses API request and return the final response.
Uses ``responses.create(stream=True)`` (low-level raw event iteration)
rather than the high-level ``responses.stream(...)`` helper. This makes
us structurally immune to backend drift in the ``response.completed``
payload shape we never let the SDK reconstruct a typed object from
the terminal event's ``output`` field.
"""
import httpx as _httpx
active_client = client or agent._ensure_primary_openai_client(reason="codex_stream_direct")
max_stream_retries = 1
has_tool_calls = False
first_delta_fired = False
# Accumulate streamed text so we can recover if get_final_response()
# returns empty output (e.g. chatgpt.com backend-api sends
# response.incomplete instead of response.completed).
# Accumulate streamed text so callers / compat shims can read it.
agent._codex_streamed_text_parts: list = []
def _on_text_delta(text: str) -> None:
agent._codex_streamed_text_parts.append(text)
agent._fire_stream_delta(text)
def _on_reasoning_delta(text: str) -> None:
agent._fire_reasoning_delta(text)
def _on_event(event: Any) -> None:
# TTFB watchdog and activity touch — runs once per SSE event.
agent._codex_stream_last_event_ts = time.time()
agent._touch_activity("receiving stream response")
def _interrupt_check() -> bool:
return bool(agent._interrupt_requested)
for attempt in range(max_stream_retries + 1):
if agent._interrupt_requested:
raise InterruptedError("Agent interrupted before Codex stream retry")
collected_output_items: list = []
stream_kwargs = dict(api_kwargs)
stream_kwargs["stream"] = True
try:
with active_client.responses.stream(**api_kwargs) as stream:
for event in stream:
# Mark stream activity for the TTFB watchdog in
# interruptible_api_call. The Codex backend can accept the
# connection but never emit a single event; this timestamp
# staying None tells the watchdog no bytes are flowing.
agent._codex_stream_last_event_ts = time.time()
agent._touch_activity("receiving stream response")
if agent._interrupt_requested:
break
event_type = getattr(event, "type", "")
# Fire callbacks on text content deltas (suppress during tool calls)
if "output_text.delta" in event_type or event_type == "response.output_text.delta":
delta_text = getattr(event, "delta", "")
if delta_text:
agent._codex_streamed_text_parts.append(delta_text)
if delta_text and not has_tool_calls:
if not first_delta_fired:
first_delta_fired = True
if on_first_delta:
try:
on_first_delta()
except Exception:
pass
agent._fire_stream_delta(delta_text)
# Track tool calls to suppress text streaming
elif "function_call" in event_type:
has_tool_calls = True
# Fire reasoning callbacks
elif "reasoning" in event_type and "delta" in event_type:
reasoning_text = getattr(event, "delta", "")
if reasoning_text:
agent._fire_reasoning_delta(reasoning_text)
# Collect completed output items — some backends
# (chatgpt.com/backend-api/codex) stream valid items
# via response.output_item.done but the SDK's
# get_final_response() returns an empty output list.
elif event_type == "response.output_item.done":
done_item = getattr(event, "item", None)
if done_item is not None:
collected_output_items.append(done_item)
# Log non-completed terminal events for diagnostics
elif event_type in {"response.incomplete", "response.failed"}:
resp_obj = getattr(event, "response", None)
status = getattr(resp_obj, "status", None) if resp_obj else None
incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
logger.warning(
"Codex Responses stream received terminal event %s "
"(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
event_type, status, incomplete_details,
sum(len(p) for p in agent._codex_streamed_text_parts),
agent._client_log_context(),
)
final_response = stream.get_final_response()
# PATCH: ChatGPT Codex backend streams valid output items
# but get_final_response() can return an empty output list.
# Backfill from collected items or synthesize from deltas.
_out = getattr(final_response, "output", None)
if _out is None or (isinstance(_out, list) and not _out):
recovered = _codex_backfilled_response(
collected_output_items,
agent._codex_streamed_text_parts,
has_tool_calls=has_tool_calls,
model=api_kwargs.get("model"),
)
if recovered is not None:
final_response.output = recovered.output
logger.debug(
"Codex stream: backfilled missing output from stream events "
"(items=%d, text_parts=%d)",
len(collected_output_items),
len(agent._codex_streamed_text_parts),
)
return final_response
event_stream = active_client.responses.create(**stream_kwargs)
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
if attempt < max_stream_retries:
logger.debug(
"Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s",
attempt + 1,
max_stream_retries + 1,
agent._client_log_context(),
exc,
"Codex Responses stream connect failed (attempt %s/%s); retrying. %s error=%s",
attempt + 1, max_stream_retries + 1,
agent._client_log_context(), exc,
)
continue
logger.debug(
"Codex Responses stream transport failed; falling back to create(stream=True). %s error=%s",
agent._client_log_context(),
exc,
)
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
except TypeError as exc:
if _responses_null_output_iterable_error(exc):
recovered = _codex_backfilled_response(
collected_output_items,
agent._codex_streamed_text_parts,
has_tool_calls=has_tool_calls,
model=api_kwargs.get("model"),
)
if recovered is not None:
logger.debug(
"Codex Responses stream parser hit response.output=None; "
"recovered from streamed events (items=%d, text_parts=%d). %s",
len(collected_output_items),
len(agent._codex_streamed_text_parts),
agent._client_log_context(),
)
return recovered
logger.debug(
"Codex Responses stream parser hit response.output=None without "
"recoverable events; falling back to create(stream=True). %s",
agent._client_log_context(),
)
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
raise
except RuntimeError as exc:
err_text = str(exc)
missing_completed = "response.completed" in err_text
# The OpenAI SDK's Responses streaming state machine raises
# ``RuntimeError("Expected to have received `response.created`
# before `<event-type>`")`` when the first SSE event from the
# server is anything other than ``response.created`` — and it
# discards the event's payload before we can read it. Three
# real-world backends emit a different first frame:
#
# * xAI on grok-4.x OAuth — sends ``error`` (issues
# reported around the May 2026 SuperGrok rollout when
# multi-turn conversations replay encrypted reasoning
# content the OAuth tier rejects)
# * codex-lb relays — send ``codex.rate_limits`` (#14634)
# * custom Responses relays — send ``response.in_progress``
# (#8133)
#
# In all three cases the underlying byte stream is still
# readable: a non-stream ``responses.create(stream=True)``
# fallback succeeds and surfaces the real provider error as
# a normal exception with body+status_code attached, which
# ``_summarize_api_error`` can then translate into a useful
# user-facing line. Treat ``response.created`` prelude
# errors the same way we already treat ``response.completed``
# postlude errors.
prelude_error = (
"Expected to have received `response.created`" in err_text
or "Expected to have received \"response.created\"" in err_text
)
if (missing_completed or prelude_error) and attempt < max_stream_retries:
logger.debug(
"Responses stream %s (attempt %s/%s); retrying. %s",
"prelude rejected" if prelude_error else "closed before completion",
attempt + 1,
max_stream_retries + 1,
agent._client_log_context(),
)
continue
if missing_completed or prelude_error:
logger.debug(
"Responses stream %s; falling back to create(stream=True). %s err=%s",
"rejected before response.created" if prelude_error else "did not emit response.completed",
agent._client_log_context(),
err_text,
)
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
raise
try:
# Compatibility: some mocks/providers return a concrete response
# instead of an iterable. Pass it straight through.
if hasattr(event_stream, "output") and not hasattr(event_stream, "__iter__"):
return event_stream
try:
final = _consume_codex_event_stream(
event_stream,
model=api_kwargs.get("model"),
on_text_delta=_on_text_delta,
on_reasoning_delta=_on_reasoning_delta,
on_first_delta=on_first_delta,
on_event=_on_event,
interrupt_check=_interrupt_check,
)
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
if attempt < max_stream_retries:
logger.debug(
"Codex Responses stream transport failed mid-iteration "
"(attempt %s/%s); retrying. %s error=%s",
attempt + 1, max_stream_retries + 1,
agent._client_log_context(), exc,
)
continue
raise
if final.status in {"incomplete", "failed"}:
logger.warning(
"Codex Responses stream terminal status=%s "
"(incomplete_details=%s, error=%s, streamed_chars=%d). %s",
final.status, final.incomplete_details, final.error,
sum(len(p) for p in agent._codex_streamed_text_parts),
agent._client_log_context(),
)
return final
finally:
close_fn = getattr(event_stream, "close", None)
if callable(close_fn):
try:
close_fn()
except Exception:
pass
def run_codex_create_stream_fallback(agent, api_kwargs: dict, client: Any = None):
"""Fallback path for stream completion edge cases on Codex-style Responses backends."""
active_client = client or agent._ensure_primary_openai_client(reason="codex_create_stream_fallback")
fallback_kwargs = dict(api_kwargs)
fallback_kwargs["stream"] = True
fallback_kwargs = agent._get_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
stream_or_response = active_client.responses.create(**fallback_kwargs)
# Compatibility shim for mocks or providers that still return a concrete response.
if hasattr(stream_or_response, "output"):
return stream_or_response
if not hasattr(stream_or_response, "__iter__"):
return stream_or_response
terminal_response = None
collected_output_items: list = []
collected_text_deltas: list = []
has_tool_calls = False
try:
for event in stream_or_response:
agent._touch_activity("receiving stream response")
event_type = getattr(event, "type", None)
if not event_type and isinstance(event, dict):
event_type = event.get("type")
# ``error`` SSE frames carry the provider's real failure
# reason (subscription / quota / model-not-available /
# rejected-reasoning-replay) but never appear in the
# ``{completed, incomplete, failed}`` terminal set, so the
# raw loop below would silently consume them and end with
# "did not emit a terminal response". xAI in particular
# emits ``type=error`` as the FIRST frame for OAuth
# accounts whose Grok subscription is missing/exhausted —
# the SDK's stream helper raises ``RuntimeError(Expected
# to have received response.created before error)`` which
# the caller catches and routes here, expecting this
# fallback to surface the message. Synthesize an
# APIError-shaped exception so ``_summarize_api_error``
# and the credential-pool entitlement detector see the
# real text instead of a generic RuntimeError.
if event_type == "error":
err_message = getattr(event, "message", None)
if not err_message and isinstance(event, dict):
err_message = event.get("message")
err_code = getattr(event, "code", None)
if not err_code and isinstance(event, dict):
err_code = event.get("code")
err_param = getattr(event, "param", None)
if not err_param and isinstance(event, dict):
err_param = event.get("param")
err_message = (err_message or "stream emitted error event").strip()
from run_agent import _StreamErrorEvent
raise _StreamErrorEvent(err_message, code=err_code, param=err_param)
# Collect output items and text deltas for backfill
if event_type == "response.output_item.done":
done_item = getattr(event, "item", None)
if done_item is None and isinstance(event, dict):
done_item = event.get("item")
if done_item is not None:
collected_output_items.append(done_item)
elif event_type in {"response.output_text.delta",}:
delta = getattr(event, "delta", "")
if not delta and isinstance(event, dict):
delta = event.get("delta", "")
if delta:
collected_text_deltas.append(delta)
elif event_type and "function_call" in event_type:
has_tool_calls = True
if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
continue
terminal_response = getattr(event, "response", None)
if terminal_response is None and isinstance(event, dict):
terminal_response = event.get("response")
if terminal_response is not None:
# Backfill empty output from collected stream events
_out = getattr(terminal_response, "output", None)
if _out is None or (isinstance(_out, list) and not _out):
recovered = _codex_backfilled_response(
collected_output_items,
collected_text_deltas,
has_tool_calls=has_tool_calls,
model=fallback_kwargs.get("model"),
)
if recovered is not None:
terminal_response.output = recovered.output
logger.debug(
"Codex fallback stream: backfilled missing output "
"(items=%d, text_parts=%d)",
len(collected_output_items),
len(collected_text_deltas),
)
return terminal_response
finally:
close_fn = getattr(stream_or_response, "close", None)
if callable(close_fn):
try:
close_fn()
except Exception:
pass
if terminal_response is not None:
return terminal_response
raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
"""Backward-compatible alias for the unified event-driven path.
Historically this was the fallback when the SDK's high-level
``responses.stream(...)`` helper raised on shape drift. The primary
path now does exactly what the fallback did, so this just forwards.
Kept as a public symbol because tests and a small number of call sites
still reference it by name.
"""
return run_codex_stream(agent, api_kwargs, client=client)
__all__ = [
"run_codex_app_server_turn",
"run_codex_stream",
"run_codex_create_stream_fallback",
"_consume_codex_event_stream",
]

View file

@ -2222,34 +2222,45 @@ class TestCodexAdapterReasoningTranslation:
@staticmethod
def _build_adapter():
"""Build a _CodexCompletionsAdapter with a mocked responses.stream()."""
"""Build a _CodexCompletionsAdapter with a mocked responses.create()."""
from agent.auxiliary_client import _CodexCompletionsAdapter
from types import SimpleNamespace
# Mock the stream context manager: yields no events, get_final_response
# returns a minimal empty-output response.
fake_final = SimpleNamespace(
output=[SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="hi")],
)],
usage=SimpleNamespace(input_tokens=1, output_tokens=1, total_tokens=2),
# The event-driven path consumes ``responses.create(stream=True)`` as a
# raw iterable of SSE events. Emit a minimal stream containing one
# ``response.output_item.done`` (message) and a ``response.completed``
# terminal frame.
message_item = SimpleNamespace(
type="message",
role="assistant",
status="completed",
content=[SimpleNamespace(type="output_text", text="hi")],
)
events = [
SimpleNamespace(type="response.created"),
SimpleNamespace(type="response.output_item.done", item=message_item),
SimpleNamespace(
type="response.completed",
response=SimpleNamespace(
status="completed",
id="resp_test",
usage=SimpleNamespace(input_tokens=1, output_tokens=1, total_tokens=2),
),
),
]
class _FakeStream:
def __enter__(self): return self
def __exit__(self, *a): return False
def __iter__(self): return iter([])
def get_final_response(self): return fake_final
class _FakeCreateStream:
def __iter__(self): return iter(events)
def close(self): pass
captured_kwargs = {}
def _stream(**kwargs):
def _create(**kwargs):
captured_kwargs.update(kwargs)
return _FakeStream()
return _FakeCreateStream()
real_client = MagicMock()
real_client.responses.stream = _stream
real_client.responses.create = _create
adapter = _CodexCompletionsAdapter(real_client, "gpt-5.3-codex")
return adapter, captured_kwargs
@ -2476,33 +2487,29 @@ class TestVisionAutoSkipsKimiCoding:
class TestCodexAuxiliaryAdapterTimeout:
def test_forwards_timeout_to_responses_stream(self):
class FakeStream:
def __enter__(self):
return self
def test_forwards_timeout_to_responses_create(self):
message_item = SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="summary")],
)
events = [
SimpleNamespace(type="response.output_item.done", item=message_item),
SimpleNamespace(type="response.completed", response=SimpleNamespace(
status="completed", id="r1", usage=None,
)),
]
def __exit__(self, exc_type, exc, tb):
return False
def __iter__(self):
return iter(())
def get_final_response(self):
return SimpleNamespace(
output=[SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="summary")],
)],
usage=None,
)
class _FakeCreateStream:
def __iter__(self): return iter(events)
def close(self): pass
class FakeResponses:
def __init__(self):
self.kwargs = None
def stream(self, **kwargs):
def create(self, **kwargs):
self.kwargs = kwargs
return FakeStream()
return _FakeCreateStream()
fake_client = SimpleNamespace(responses=FakeResponses())
adapter = _CodexCompletionsAdapter(fake_client, "gpt-5.5")
@ -2513,33 +2520,21 @@ class TestCodexAuxiliaryAdapterTimeout:
)
assert fake_client.responses.kwargs["timeout"] == 12.5
assert fake_client.responses.kwargs["stream"] is True
assert response.choices[0].message.content == "summary"
def test_enforces_total_timeout_while_stream_keeps_emitting_events(self):
class SlowAliveStream:
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
class _SlowAliveCreateStream:
def __iter__(self):
for _ in range(5):
time.sleep(0.03)
yield SimpleNamespace(type="response.in_progress")
def get_final_response(self):
return SimpleNamespace(
output=[SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="late")],
)],
usage=None,
)
def close(self): pass
class FakeResponses:
def stream(self, **kwargs):
return SlowAliveStream()
def create(self, **kwargs):
return _SlowAliveCreateStream()
fake_client = SimpleNamespace(responses=FakeResponses(), close=lambda: None)
adapter = _CodexCompletionsAdapter(fake_client, "gpt-5.5")
@ -2555,34 +2550,39 @@ class TestCodexAuxiliaryAdapterTimeout:
class TestCodexAuxiliaryAdapterNullOutputRecovery:
def test_recovers_output_item_when_sdk_raises_during_iteration(self):
"""Regression for #11179 in auxiliary calls such as compression/title generation."""
def test_recovers_output_item_when_terminal_event_has_null_output(self):
"""Regression for #11179 in auxiliary calls.
The wire shape that broke the SDK is ``response.completed`` with
``response.output = null``. The event-driven path is structurally
immune because it reconstructs from ``response.output_item.done``
events and never reads the terminal event's ``output`` field for
content. Assert the auxiliary path returns the streamed item even
when the terminal frame's output is ``null``.
"""
output_item = SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="aux survived")],
)
events = [
SimpleNamespace(type="response.created"),
SimpleNamespace(type="response.output_item.done", item=output_item),
SimpleNamespace(type="response.completed", response=SimpleNamespace(
status="completed",
id="resp_null_output",
# This is the field the SDK helper would have iterated and crashed on:
output=None,
usage=None,
)),
]
class NullOutputParseStream:
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def __iter__(self):
yield SimpleNamespace(type="response.output_item.done", item=output_item)
raise TypeError("'NoneType' object is not iterable")
def get_final_response(self): # pragma: no cover - iterator fails first
raise AssertionError("get_final_response should not be reached")
class _NullOutputCreateStream:
def __iter__(self): return iter(events)
def close(self): pass
class FakeResponses:
def __init__(self):
self.create = MagicMock()
def stream(self, **kwargs):
return NullOutputParseStream()
def create(self, **kwargs):
return _NullOutputCreateStream()
fake_client = SimpleNamespace(responses=FakeResponses())
adapter = _CodexCompletionsAdapter(fake_client, "gpt-5.5")
@ -2590,7 +2590,6 @@ class TestCodexAuxiliaryAdapterNullOutputRecovery:
response = adapter.create(messages=[{"role": "user", "content": "summarize"}])
assert response.choices[0].message.content == "aux survived"
fake_client.responses.create.assert_not_called()
# ---------------------------------------------------------------------------
@ -2696,26 +2695,19 @@ class TestAuxiliaryClientPoisonedCacheEviction:
_CodexCompletionsAdapter, CodexAuxiliaryClient,
)
class SlowAliveStream:
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
class _SlowAliveCreateStream:
def __iter__(self):
for _ in range(20):
time.sleep(0.01)
yield SimpleNamespace(type="response.in_progress")
def get_final_response(self): # pragma: no cover — timeout fires first
return SimpleNamespace(output=[], usage=None)
def close(self): pass
closed = {"flag": False}
class FakeClient:
def __init__(self):
self.responses = SimpleNamespace(stream=lambda **k: SlowAliveStream())
self.responses = SimpleNamespace(create=lambda **k: _SlowAliveCreateStream())
self.api_key = "k"
self.base_url = "https://chatgpt.com/backend-api/codex"

View file

@ -37,7 +37,18 @@ import pytest
# ---------------------------------------------------------------------------
# Fix A: prelude error fallback
# Fix A: prelude error surfacing via wire `error` events
#
# With the migration to ``responses.create(stream=True)`` raw event iteration,
# the SDK's high-level state-machine RuntimeError no longer mediates between
# the wire and us — we read the wire directly. When the chatgpt.com Codex
# backend (or xAI, codex-lb, custom relays) emits a ``type=error`` frame as
# its first event, our consumer raises ``_StreamErrorEvent`` straight from
# the wire payload, which carries the real provider message in ``.body`` /
# ``.message`` shape for ``_summarize_api_error`` to consume. This is
# strictly better than the old "SDK raises RuntimeError → we retry → fall
# back to a second non-stream call" two-phase dance, because the error
# surfaces on the first event instead of after one wasted round trip.
# ---------------------------------------------------------------------------
@ -60,105 +71,104 @@ def _make_codex_agent():
@pytest.mark.parametrize(
"prelude_event_type",
"provider_message",
[
"error", # xAI OAuth multi-turn
"codex.rate_limits", # codex-lb relays (#14634)
"response.in_progress", # custom Responses relays (#8133)
"You do not have an active Grok subscription",
"rate limit exceeded",
"model not available",
],
)
def test_codex_stream_prelude_error_falls_back_to_create_stream(prelude_event_type):
"""The SDK's prelude RuntimeError must trigger the non-stream fallback.
def test_codex_stream_wire_error_event_surfaces_stream_error_event(provider_message):
"""A wire ``type=error`` SSE frame raises ``_StreamErrorEvent`` with the
provider's real message in the body."""
from run_agent import _StreamErrorEvent
When the first SSE event isn't ``response.created``, openai-python
raises RuntimeError before our event loop sees anything. We must
detect that, retry once, then fall back to ``create(stream=True)``
which surfaces the real provider error or a real response.
"""
agent = _make_codex_agent()
prelude_error = RuntimeError(
f"Expected to have received `response.created` before `{prelude_event_type}`"
)
class _ErrorCreateStream:
def __iter__(self_inner):
yield SimpleNamespace(type="error", message=provider_message, code="forbidden")
def close(self_inner):
pass
mock_client = MagicMock()
mock_client.responses.stream.side_effect = prelude_error
mock_client.responses.create.return_value = _ErrorCreateStream()
fallback_response = SimpleNamespace(
output=[SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="fallback ok")],
)],
status="completed",
)
with pytest.raises(_StreamErrorEvent) as excinfo:
agent._run_codex_stream({}, client=mock_client)
with patch.object(
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
) as mock_fallback:
result = agent._run_codex_stream({}, client=mock_client)
assert result is fallback_response
mock_fallback.assert_called_once_with({}, client=mock_client)
assert provider_message in str(excinfo.value)
assert excinfo.value.body["error"]["message"] == provider_message
def test_codex_stream_prelude_error_retries_once_before_fallback():
"""The retry path must fire one extra stream attempt before falling back."""
def test_codex_stream_retries_remote_protocol_error_once():
"""Transport errors (``httpx.RemoteProtocolError``) trigger a single retry.
Previously this was on the ``responses.stream(...)`` helper; now it's on
``responses.create(stream=True)`` itself. The user-facing behavior is the
same: one retry, then re-raise if the second attempt also fails.
"""
import httpx
agent = _make_codex_agent()
call_count = {"n": 0}
def stream_side_effect(**kwargs):
def create_side_effect(**kwargs):
call_count["n"] += 1
raise RuntimeError(
"Expected to have received `response.created` before `error`"
raise httpx.RemoteProtocolError(
"peer closed connection without sending complete message body"
)
mock_client = MagicMock()
mock_client.responses.stream.side_effect = stream_side_effect
mock_client.responses.create.side_effect = create_side_effect
fallback_response = SimpleNamespace(output=[], status="completed")
with patch.object(
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
) as mock_fallback:
with pytest.raises(httpx.RemoteProtocolError):
agent._run_codex_stream({}, client=mock_client)
# max_stream_retries=1 → one retry + final attempt → 2 stream calls,
# THEN the fallback path runs.
# max_stream_retries=1 → one retry + final attempt → 2 create calls total.
assert call_count["n"] == 2
mock_fallback.assert_called_once()
def test_codex_stream_unrelated_runtimeerror_still_raises():
"""RuntimeErrors that aren't prelude/postlude shape must propagate."""
"""RuntimeErrors that aren't transport errors must propagate.
With the event-driven path there's no separate fallback function to
short-circuit into; any RuntimeError from ``responses.create()`` or the
consumer surfaces directly.
"""
agent = _make_codex_agent()
mock_client = MagicMock()
mock_client.responses.stream.side_effect = RuntimeError("something else broke")
mock_client.responses.create.side_effect = RuntimeError("something else broke")
with patch.object(agent, "_run_codex_create_stream_fallback") as mock_fallback:
with pytest.raises(RuntimeError, match="something else broke"):
agent._run_codex_stream({}, client=mock_client)
mock_fallback.assert_not_called()
with pytest.raises(RuntimeError, match="something else broke"):
agent._run_codex_stream({}, client=mock_client)
def test_codex_stream_postlude_error_still_falls_back():
"""Existing ``response.completed`` fallback must not regress."""
def test_codex_stream_truncated_no_terminal_event_raises():
"""Streams that end without a terminal event AND no items raise.
Preserves the "Codex Responses stream did not emit a terminal response"
signal callers use to distinguish "stream truncated mid-flight" from
"stream completed with empty body". Previously surfaced by the SDK's
``RuntimeError("Didn't receive a `response.completed` event.")``; now
surfaced directly by the event consumer.
"""
agent = _make_codex_agent()
class _EmptyStream:
def __iter__(self_inner):
return iter(())
def close(self_inner):
pass
mock_client = MagicMock()
mock_client.responses.stream.side_effect = RuntimeError(
"Didn't receive a `response.completed` event."
)
mock_client.responses.create.return_value = _EmptyStream()
fallback_response = SimpleNamespace(output=[], status="completed")
with patch.object(
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
) as mock_fallback:
result = agent._run_codex_stream({}, client=mock_client)
assert result is fallback_response
mock_fallback.assert_called_once()
with pytest.raises(RuntimeError, match="did not emit a terminal response"):
agent._run_codex_stream({}, client=mock_client)
# ---------------------------------------------------------------------------

View file

@ -154,27 +154,13 @@ def _codex_ack_message_response(text: str):
)
class _FakeResponsesStream:
def __init__(self, *, final_response=None, final_error=None):
self._final_response = final_response
self._final_error = final_error
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def __iter__(self):
return iter(())
def get_final_response(self):
if self._final_error is not None:
raise self._final_error
return self._final_response
class _FakeCreateStream:
"""Iterable-only fake for ``responses.create(stream=True)`` outputs.
The event-driven Codex path expects an iterable that yields SSE events;
tests use this to drive it through the same code paths the wire does.
"""
def __init__(self, events):
self._events = list(events)
self.closed = False
@ -186,27 +172,6 @@ class _FakeCreateStream:
self.closed = True
class _IteratorTypeErrorStream:
"""Mimic the SDK raising while parsing response.completed.output=None."""
def __init__(self, events_before_error):
self._events_before_error = list(events_before_error)
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def __iter__(self):
for event in self._events_before_error:
yield event
raise TypeError("'NoneType' object is not iterable")
def get_final_response(self): # pragma: no cover - iterator fails first
raise AssertionError("get_final_response should not be reached")
def _codex_request_kwargs():
return {
"model": "gpt-5-codex",
@ -418,60 +383,75 @@ def test_build_api_kwargs_copilot_responses_omits_reasoning_for_non_reasoning_mo
assert "prompt_cache_key" not in kwargs
def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
def test_run_codex_stream_returns_collected_items_when_stream_ends_without_terminal(monkeypatch):
"""The event-driven path tolerates streams that end without a terminal frame.
Previously the SDK's ``responses.stream(...)`` helper raised
``RuntimeError("Didn't receive a `response.completed` event.")`` which the
primary path caught and retried/fell back through. The new
``responses.create(stream=True)`` path consumes events directly and just
returns whatever it collected no retry, no separate fallback path.
"""
agent = _build_agent(monkeypatch)
calls = {"stream": 0}
def _fake_stream(**kwargs):
calls["stream"] += 1
if calls["stream"] == 1:
return _FakeResponsesStream(
final_error=RuntimeError("Didn't receive a `response.completed` event.")
)
return _FakeResponsesStream(final_response=_codex_message_response("stream ok"))
agent.client = SimpleNamespace(
responses=SimpleNamespace(
stream=_fake_stream,
create=lambda **kwargs: _codex_message_response("fallback"),
)
output_item = SimpleNamespace(
type="message",
status="completed",
content=[SimpleNamespace(type="output_text", text="no terminal frame")],
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert response.output[0].content[0].text == "stream ok"
def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch):
agent = _build_agent(monkeypatch)
calls = {"stream": 0, "create": 0}
def _fake_stream(**kwargs):
calls["stream"] += 1
return _FakeResponsesStream(
final_error=RuntimeError("Didn't receive a `response.completed` event.")
)
calls = {"create": 0}
def _fake_create(**kwargs):
calls["create"] += 1
return _codex_message_response("create fallback ok")
assert kwargs.get("stream") is True
return _FakeCreateStream([
SimpleNamespace(type="response.created"),
SimpleNamespace(type="response.output_item.done", item=output_item),
# stream ends without a response.completed/incomplete/failed frame
])
agent.client = SimpleNamespace(
responses=SimpleNamespace(
stream=_fake_stream,
create=_fake_create,
)
responses=SimpleNamespace(create=_fake_create),
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert calls["create"] == 1
assert response.output[0].content[0].text == "create fallback ok"
assert response.status == "completed"
assert response.output == [output_item]
def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
def test_run_codex_stream_surfaces_failed_status_in_final_response(monkeypatch):
"""A ``response.failed`` terminal event is reflected on the returned object."""
agent = _build_agent(monkeypatch)
calls = {"stream": 0, "create": 0}
error_payload = {"message": "model overloaded", "code": "overloaded"}
failed_event = SimpleNamespace(
type="response.failed",
response=SimpleNamespace(
status="failed",
error=error_payload,
id="resp_failed_1",
usage=None,
),
)
def _fake_create(**kwargs):
return _FakeCreateStream([
SimpleNamespace(type="response.created"),
failed_event,
])
agent.client = SimpleNamespace(
responses=SimpleNamespace(create=_fake_create),
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert response.status == "failed"
assert response.error == error_payload
def test_run_codex_stream_parses_create_stream_events(monkeypatch):
"""The primary path consumes ``responses.create(stream=True)`` events directly."""
agent = _build_agent(monkeypatch)
calls = {"create": 0}
create_stream = _FakeCreateStream(
[
SimpleNamespace(type="response.created"),
@ -480,62 +460,26 @@ def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
]
)
def _fake_stream(**kwargs):
calls["stream"] += 1
return _FakeResponsesStream(
final_error=RuntimeError("Didn't receive a `response.completed` event.")
)
def _fake_create(**kwargs):
calls["create"] += 1
assert kwargs.get("stream") is True
return create_stream
agent.client = SimpleNamespace(
responses=SimpleNamespace(
stream=_fake_stream,
create=_fake_create,
)
responses=SimpleNamespace(create=_fake_create),
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert calls["create"] == 1
assert create_stream.closed is True
assert response.output[0].content[0].text == "streamed create ok"
def test_run_codex_stream_falls_back_when_stream_iteration_parses_null_output(monkeypatch):
"""Regression for #11179: the SDK can raise while iterating response.completed.
The failure happens before get_final_response(), so post-loop backfill alone is
not enough. Preserve already streamed output_item.done events.
"""
agent = _build_agent(monkeypatch)
output_item = SimpleNamespace(
type="message",
status="completed",
content=[SimpleNamespace(type="output_text", text="stream item survived")],
)
calls = {"stream": 0}
def _fake_stream(**kwargs):
calls["stream"] += 1
return _IteratorTypeErrorStream([
SimpleNamespace(type="response.output_item.done", item=output_item),
])
def _unexpected_create(**kwargs): # pragma: no cover - recovery should avoid fallback call
raise AssertionError("create fallback should not be needed when output items were collected")
agent.client = SimpleNamespace(
responses=SimpleNamespace(stream=_fake_stream, create=_unexpected_create),
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 1
assert response.output == [output_item]
# The wire's response.completed.response.output is a list with the message item,
# but the event-driven path reconstructs from response.output_item.done.
# _codex_message_response returns a SimpleNamespace whose .output is a list of
# items — we don't read those directly, we read the items via output_item.done,
# but this fixture doesn't emit output_item.done. So the consumer assembles a
# message from streamed text deltas if present, or returns the items it has.
# For backward compatibility with the helper that builds _codex_message_response,
# we just assert status is completed and id propagated.
assert response.status == "completed"

View file

@ -783,32 +783,28 @@ class TestCodexStreamCallbacks:
agent.api_mode = "codex_responses"
agent._interrupt_requested = False
# Mock the stream context manager
mock_event_text = SimpleNamespace(
type="response.output_text.delta",
delta="Hello from Codex!",
)
mock_event_done = SimpleNamespace(
type="response.completed",
delta="",
)
events = [
SimpleNamespace(type="response.created"),
SimpleNamespace(
type="response.output_text.delta",
delta="Hello from Codex!",
),
SimpleNamespace(
type="response.completed",
response=SimpleNamespace(status="completed", id="r1", usage=None),
),
]
mock_stream = MagicMock()
mock_stream.__enter__ = MagicMock(return_value=mock_stream)
mock_stream.__exit__ = MagicMock(return_value=False)
mock_stream.__iter__ = MagicMock(return_value=iter([mock_event_text, mock_event_done]))
mock_stream.get_final_response.return_value = SimpleNamespace(
output=[SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="Hello from Codex!")],
)],
status="completed",
)
class _FakeCreateStream:
def __iter__(self_inner):
return iter(events)
def close(self_inner):
return None
mock_client = MagicMock()
mock_client.responses.stream.return_value = mock_stream
mock_client.responses.create.return_value = _FakeCreateStream()
response = agent._run_codex_stream({}, client=mock_client)
agent._run_codex_stream({}, client=mock_client)
assert "Hello from Codex!" in deltas
def test_codex_stream_refreshes_activity_on_every_event(self):
@ -828,57 +824,40 @@ class TestCodexStreamCallbacks:
touch_calls = []
agent._touch_activity = lambda desc: touch_calls.append(desc)
mock_event_text_1 = SimpleNamespace(
type="response.output_text.delta",
delta="Hello",
)
mock_event_text_2 = SimpleNamespace(
type="response.output_text.delta",
delta=" world",
)
mock_event_done = SimpleNamespace(
type="response.completed",
delta="",
)
events = [
SimpleNamespace(type="response.output_text.delta", delta="Hello"),
SimpleNamespace(type="response.output_text.delta", delta=" world"),
SimpleNamespace(
type="response.completed",
response=SimpleNamespace(status="completed", id="r2", usage=None),
),
]
mock_stream = MagicMock()
mock_stream.__enter__ = MagicMock(return_value=mock_stream)
mock_stream.__exit__ = MagicMock(return_value=False)
mock_stream.__iter__ = MagicMock(
return_value=iter([mock_event_text_1, mock_event_text_2, mock_event_done])
)
mock_stream.get_final_response.return_value = SimpleNamespace(
output=[SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="Hello world")],
)],
status="completed",
)
class _FakeCreateStream:
def __iter__(self_inner):
return iter(events)
def close(self_inner):
return None
mock_client = MagicMock()
mock_client.responses.stream.return_value = mock_stream
mock_client.responses.create.return_value = _FakeCreateStream()
agent._run_codex_stream({}, client=mock_client)
assert touch_calls.count("receiving stream response") == 3
def test_codex_remote_protocol_error_falls_back_to_create_stream(self):
def test_codex_remote_protocol_error_retries_then_raises(self):
"""Transport errors from ``responses.create`` retry once then re-raise.
With the migration from ``responses.stream(...)`` to
``responses.create(stream=True)``, there is no longer a separate
fallback function the same call IS the streaming path. When it
raises ``httpx.RemoteProtocolError``, we retry once (matching the
old behavior on the helper) and re-raise on the second failure.
"""
from run_agent import AIAgent
import httpx
fallback_response = SimpleNamespace(
output=[SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="fallback from create stream")],
)],
status="completed",
)
mock_client = MagicMock()
mock_client.responses.stream.side_effect = httpx.RemoteProtocolError(
"peer closed connection without sending complete message body"
)
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
@ -890,11 +869,22 @@ class TestCodexStreamCallbacks:
agent.api_mode = "codex_responses"
agent._interrupt_requested = False
with patch.object(agent, "_run_codex_create_stream_fallback", return_value=fallback_response) as mock_fallback:
response = agent._run_codex_stream({}, client=mock_client)
call_count = {"n": 0}
assert response is fallback_response
mock_fallback.assert_called_once_with({}, client=mock_client)
def _create_side_effect(**kwargs):
call_count["n"] += 1
raise httpx.RemoteProtocolError(
"peer closed connection without sending complete message body"
)
mock_client = MagicMock()
mock_client.responses.create.side_effect = _create_side_effect
with pytest.raises(httpx.RemoteProtocolError):
agent._run_codex_stream({}, client=mock_client)
# 1 initial + 1 retry = 2 calls
assert call_count["n"] == 2
def test_codex_create_stream_fallback_refreshes_activity_on_every_event(self):
from run_agent import AIAgent