mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
refactor(codex): drop SDK responses.stream() helper; consume events directly (#33042)
* refactor(codex): drop SDK responses.stream() helper; consume events directly The OpenAI Python SDK's high-level `client.responses.stream(...)` helper does post-hoc typed reconstruction from the terminal `response.completed.response.output` field. The chatgpt.com Codex backend has been observed (today, gpt-5.5) to ship `response.output = null` on terminal frames, which crashes the SDK with `TypeError: 'NoneType' object is not iterable` mid-iteration. Carlton's #32963 patched the symptom by wrapping the helper in try/except and recovering from the same per-event accumulator the SDK was supposed to populate. This PR removes the helper from the call path entirely: we now use `client.responses.create(stream=True)` (raw AsyncIterable of SSE events) and assemble the final response object ourselves from `response.output_item.done` events as they arrive. The terminal event's `output` field is never read for content. Same strategy OpenClaw uses for the same backend. This makes Hermes structurally immune to the bug class, not patched. The next time OpenAI ships a shape change to chatgpt.com's terminal frame, our consumer keeps working because it doesn't read that frame for content — only for usage/status/id. Changes - `agent/codex_runtime.py`: new `_consume_codex_event_stream()` shared consumer; `run_codex_stream()` uses `responses.create(stream=True)`; `run_codex_create_stream_fallback()` collapses into a thin alias since the primary path now does what the fallback used to do. - `agent/auxiliary_client.py`: `_CodexCompletionsAdapter` uses the same consumer; old null-output recovery helpers deleted as unreferenced. - Tests migrated: fixtures that mocked `responses.stream` now mock `responses.create` returning a raw iterable. New regression test asserts the auxiliary path returns streamed items even when the terminal event's `output` is literally `null`. Validation - Live: tested against fresh OAuth on `chatgpt.com/backend-api/codex` with `gpt-5.5` — response built correctly with `response.output=null` on the terminal frame, all events consumed, usage/reasoning tokens propagated. - `tests/run_agent/test_run_agent_codex_responses.py` + `tests/agent/test_auxiliary_client.py`: 242 passed. * test+fix(codex): migrate streaming tests, raise on truncated streams CI surfaced 10 test failures across tests/run_agent/test_streaming.py and tests/run_agent/test_codex_xai_oauth_recovery.py — both files had their own `responses.stream(...)` mocks I missed in the first sweep. agent/codex_runtime.py: _consume_codex_event_stream() now raises "Codex Responses stream did not emit a terminal response" when the stream ends without any terminal frame AND no usable content. This preserves the signal callers used to get from the SDK's high-level helper, which they distinguished from "completed with empty body" in error handling. Tests migrated: - test_streaming.py: text-delta callback, activity-touch, and remote-protocol-error tests all switch from mocking responses.stream to responses.create returning an iterable of events. - test_codex_xai_oauth_recovery.py: prelude-error tests are recast as wire-error-event tests (the new path raises _StreamErrorEvent directly when the wire emits type=error, which is strictly better than the old two-phase "SDK RuntimeError → retry → fallback"). The retry-on-transport-error test moves from responses.stream side-effect to responses.create side-effect. Verified live against chatgpt.com Codex with gpt-5.5 — AIAgent.chat() through the full codex_responses path returns correctly, 319/319 targeted tests passing.
This commit is contained in:
parent
fb298a958c
commit
cb38ce28cb
6 changed files with 645 additions and 725 deletions
|
|
@ -107,32 +107,6 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _responses_null_output_iterable_error(exc: BaseException) -> bool:
|
||||
"""True when the OpenAI SDK trips over terminal response.output=None."""
|
||||
text = str(exc)
|
||||
return isinstance(exc, TypeError) and "NoneType" in text and "not iterable" in text
|
||||
|
||||
|
||||
def _responses_backfilled_response(output_items: List[Any], text_parts: List[str], *, has_function_calls: bool, model: str = None) -> Optional[Any]:
|
||||
"""Build a minimal Responses-like object from already streamed events."""
|
||||
if output_items:
|
||||
return SimpleNamespace(output=list(output_items), usage=None, status="completed", model=model)
|
||||
if text_parts and not has_function_calls:
|
||||
assembled = "".join(text_parts)
|
||||
return SimpleNamespace(
|
||||
output=[SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)],
|
||||
usage=None,
|
||||
status="completed",
|
||||
model=model,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _safe_isinstance(obj: Any, maybe_type: Any) -> bool:
|
||||
"""Return False instead of raising when a patched symbol is not a type."""
|
||||
try:
|
||||
|
|
@ -811,77 +785,53 @@ class _CodexCompletionsAdapter:
|
|||
pass
|
||||
|
||||
try:
|
||||
# Collect output items and text deltas during streaming —
|
||||
# the Codex backend can return empty response.output from
|
||||
# get_final_response() even when items were streamed.
|
||||
collected_output_items: List[Any] = []
|
||||
collected_text_deltas: List[str] = []
|
||||
has_function_calls = False
|
||||
if total_timeout:
|
||||
timeout_timer = threading.Timer(float(total_timeout), _close_client_on_timeout)
|
||||
timeout_timer.daemon = True
|
||||
timeout_timer.start()
|
||||
_check_cancelled()
|
||||
final = None
|
||||
with self._client.responses.stream(**resp_kwargs) as stream:
|
||||
try:
|
||||
for _event in stream:
|
||||
_check_cancelled()
|
||||
_etype = getattr(_event, "type", "")
|
||||
if _etype == "response.output_item.done":
|
||||
_done = getattr(_event, "item", None)
|
||||
if _done is not None:
|
||||
collected_output_items.append(_done)
|
||||
elif "output_text.delta" in _etype:
|
||||
_delta = getattr(_event, "delta", "")
|
||||
if _delta:
|
||||
collected_text_deltas.append(_delta)
|
||||
elif "function_call" in _etype:
|
||||
has_function_calls = True
|
||||
_check_cancelled()
|
||||
final = stream.get_final_response()
|
||||
except TypeError as exc:
|
||||
if not _responses_null_output_iterable_error(exc):
|
||||
raise
|
||||
final = _responses_backfilled_response(
|
||||
collected_output_items,
|
||||
collected_text_deltas,
|
||||
has_function_calls=has_function_calls,
|
||||
model=resp_kwargs.get("model"),
|
||||
)
|
||||
if final is None:
|
||||
raise
|
||||
logger.debug(
|
||||
"Codex auxiliary Responses stream parser hit response.output=None; "
|
||||
"recovered from streamed events (items=%d, text_parts=%d)",
|
||||
len(collected_output_items),
|
||||
len(collected_text_deltas),
|
||||
)
|
||||
|
||||
# Event-driven Responses streaming via the low-level
|
||||
# ``responses.create(stream=True)`` path. The high-level
|
||||
# ``responses.stream(...)`` helper does post-hoc typed
|
||||
# reconstruction from ``response.completed.response.output``,
|
||||
# which the chatgpt.com Codex backend has been observed to
|
||||
# return as ``null`` (gpt-5.5, May 2026) — that crashes the SDK
|
||||
# with ``TypeError: 'NoneType' object is not iterable``.
|
||||
# Consuming raw events and assembling the final response
|
||||
# ourselves from ``response.output_item.done`` makes us
|
||||
# structurally immune to that drift.
|
||||
from agent.codex_runtime import _consume_codex_event_stream
|
||||
|
||||
stream_kwargs = dict(resp_kwargs)
|
||||
stream_kwargs["stream"] = True
|
||||
|
||||
def _on_each_event(_event: Any) -> None:
|
||||
# Re-check timeout/cancellation per event, matching the
|
||||
# cadence the old in-line ``_check_cancelled()`` used.
|
||||
_check_cancelled()
|
||||
|
||||
event_stream = self._client.responses.create(**stream_kwargs)
|
||||
try:
|
||||
final = _consume_codex_event_stream(
|
||||
event_stream,
|
||||
model=resp_kwargs.get("model"),
|
||||
on_event=_on_each_event,
|
||||
)
|
||||
finally:
|
||||
close_fn = getattr(event_stream, "close", None)
|
||||
if callable(close_fn):
|
||||
try:
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if final is None:
|
||||
raise RuntimeError("Codex auxiliary Responses stream did not return a final response")
|
||||
|
||||
# Backfill empty output from collected stream events
|
||||
_output = getattr(final, "output", None)
|
||||
if _output is None or (isinstance(_output, list) and not _output):
|
||||
recovered = _responses_backfilled_response(
|
||||
collected_output_items,
|
||||
collected_text_deltas,
|
||||
has_function_calls=has_function_calls,
|
||||
model=resp_kwargs.get("model"),
|
||||
)
|
||||
if recovered is not None:
|
||||
final.output = recovered.output
|
||||
logger.debug(
|
||||
"Codex auxiliary: backfilled missing output from stream events "
|
||||
"(items=%d, text_parts=%d)",
|
||||
len(collected_output_items),
|
||||
len(collected_text_deltas),
|
||||
)
|
||||
|
||||
# Extract text and tool calls from the Responses output.
|
||||
# Items may be SDK objects (attrs) or dicts (raw/fallback paths),
|
||||
# so use a helper that handles both shapes.
|
||||
# Items may be SimpleNamespace (raw-event path) or dicts
|
||||
# (some legacy fallback paths), so handle both shapes.
|
||||
def _item_get(obj: Any, key: str, default: Any = None) -> Any:
|
||||
val = getattr(obj, key, None)
|
||||
if val is None and isinstance(obj, dict):
|
||||
|
|
@ -908,9 +858,12 @@ class _CodexCompletionsAdapter:
|
|||
resp_usage = getattr(final, "usage", None)
|
||||
if resp_usage:
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=getattr(resp_usage, "input_tokens", 0),
|
||||
completion_tokens=getattr(resp_usage, "output_tokens", 0),
|
||||
total_tokens=getattr(resp_usage, "total_tokens", 0),
|
||||
prompt_tokens=getattr(resp_usage, "input_tokens", 0)
|
||||
or (resp_usage.get("input_tokens", 0) if isinstance(resp_usage, dict) else 0),
|
||||
completion_tokens=getattr(resp_usage, "output_tokens", 0)
|
||||
or (resp_usage.get("output_tokens", 0) if isinstance(resp_usage, dict) else 0),
|
||||
total_tokens=getattr(resp_usage, "total_tokens", 0)
|
||||
or (resp_usage.get("total_tokens", 0) if isinstance(resp_usage, dict) else 0),
|
||||
)
|
||||
except Exception as exc:
|
||||
if timed_out.is_set():
|
||||
|
|
|
|||
|
|
@ -174,332 +174,363 @@ def run_codex_app_server_turn(
|
|||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Event-driven Responses streaming
|
||||
#
|
||||
# OpenAI ships its consumer Codex backend (chatgpt.com/backend-api/codex) on
|
||||
# a different schedule from the openai Python SDK. The high-level
|
||||
# ``client.responses.stream(...)`` helper reconstructs a typed Response from
|
||||
# the terminal ``response.completed`` event's ``response.output`` field, and
|
||||
# when that field drifts to ``null`` (gpt-5.5, May 2026) the SDK raises
|
||||
# ``TypeError: 'NoneType' object is not iterable`` mid-iteration.
|
||||
#
|
||||
# We sidestep the whole class of failure by going one level lower:
|
||||
# ``client.responses.create(stream=True)`` returns the raw AsyncIterable of
|
||||
# SSE events, and we assemble the final response object purely from
|
||||
# ``response.output_item.done`` events as they arrive. We never read
|
||||
# ``response.completed.response.output`` for content reconstruction, so the
|
||||
# backend can return ``null``, ``[]``, a string, or omit the field entirely
|
||||
# and we don't care.
|
||||
#
|
||||
# This mirrors what the OpenClaw TS implementation does for the same backend
|
||||
# and is structurally immune to the bug class rather than patched.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _responses_null_output_iterable_error(exc: BaseException) -> bool:
|
||||
"""True when the OpenAI SDK trips over terminal response.output=None."""
|
||||
text = str(exc)
|
||||
return isinstance(exc, TypeError) and "NoneType" in text and "not iterable" in text
|
||||
_TERMINAL_EVENT_TYPES = frozenset({
|
||||
"response.completed",
|
||||
"response.incomplete",
|
||||
"response.failed",
|
||||
})
|
||||
|
||||
|
||||
def _codex_backfilled_response(output_items: list, text_parts: list, *, has_tool_calls: bool, model: str = None):
|
||||
"""Build a minimal Responses-like object from events already streamed."""
|
||||
if output_items:
|
||||
return SimpleNamespace(
|
||||
output=list(output_items),
|
||||
usage=None,
|
||||
def _event_field(event: Any, name: str, default: Any = None) -> Any:
|
||||
"""Field access that handles both attr-style (SDK objects) and dict (raw JSON) events."""
|
||||
value = getattr(event, name, None)
|
||||
if value is None and isinstance(event, dict):
|
||||
value = event.get(name, default)
|
||||
return value if value is not None else default
|
||||
|
||||
|
||||
def _raise_stream_error(event: Any) -> None:
|
||||
"""Raise a ``_StreamErrorEvent`` from a ``type=error`` SSE frame.
|
||||
|
||||
Imported lazily so this module stays importable from places that don't
|
||||
pull in ``run_agent`` (e.g. plugin code, doc tools).
|
||||
"""
|
||||
from run_agent import _StreamErrorEvent
|
||||
message = (_event_field(event, "message", "") or "stream emitted error event").strip()
|
||||
raise _StreamErrorEvent(
|
||||
message,
|
||||
code=_event_field(event, "code"),
|
||||
param=_event_field(event, "param"),
|
||||
)
|
||||
|
||||
|
||||
def _consume_codex_event_stream(
|
||||
event_iter: Any,
|
||||
*,
|
||||
model: str,
|
||||
on_text_delta=None,
|
||||
on_reasoning_delta=None,
|
||||
on_first_delta=None,
|
||||
on_event=None,
|
||||
interrupt_check=None,
|
||||
) -> SimpleNamespace:
|
||||
"""Consume a Codex Responses SSE event stream and return a final response.
|
||||
|
||||
The returned object is a ``SimpleNamespace`` shaped like the SDK's typed
|
||||
``Response`` for the fields downstream code actually reads:
|
||||
|
||||
* ``output``: list of output items, assembled from ``response.output_item.done``.
|
||||
For tool-call turns this contains the function_call items; for plain-text
|
||||
turns it contains a synthesized ``message`` item built from streamed deltas
|
||||
if no message item was emitted directly.
|
||||
* ``output_text``: assembled text from ``response.output_text.delta`` deltas.
|
||||
* ``usage``: copied from the terminal event's ``response.usage`` (when present).
|
||||
* ``status``: ``completed`` / ``incomplete`` / ``failed`` (or ``completed`` if
|
||||
the stream ended without a terminal frame but produced content).
|
||||
* ``id``: ``response.id`` when present.
|
||||
* ``incomplete_details``: passed through for ``response.incomplete`` frames.
|
||||
* ``error``: passed through for ``response.failed`` frames.
|
||||
* ``model``: from kwargs (the wire model name is not authoritative).
|
||||
|
||||
Critically, we never read ``response.output`` from the terminal event for
|
||||
content reconstruction — only ``usage``, ``status``, ``id``. That field
|
||||
being ``null`` / ``[]`` / missing is fine.
|
||||
|
||||
Callbacks:
|
||||
|
||||
* ``on_text_delta(str)`` — fires per ``response.output_text.delta``, suppressed
|
||||
once a function_call event is seen (so tool-call turns don't bleed text
|
||||
into the chat).
|
||||
* ``on_reasoning_delta(str)`` — fires per ``response.reasoning.*.delta``.
|
||||
* ``on_first_delta()`` — one-shot, fires on the first text delta only.
|
||||
* ``on_event(event)`` — fires for every event before any other processing.
|
||||
Used for watchdog activity, debug logging, anything wire-shape-agnostic.
|
||||
* ``interrupt_check()`` — returns True to break the loop early.
|
||||
"""
|
||||
collected_output_items: List[Any] = []
|
||||
collected_text_deltas: List[str] = []
|
||||
has_tool_calls = False
|
||||
first_delta_fired = False
|
||||
terminal_status: str = "completed"
|
||||
terminal_usage: Any = None
|
||||
terminal_response_id: str = None
|
||||
terminal_incomplete_details: Any = None
|
||||
terminal_error: Any = None
|
||||
saw_terminal = False
|
||||
|
||||
for event in event_iter:
|
||||
if on_event is not None:
|
||||
try:
|
||||
on_event(event)
|
||||
except (TimeoutError, InterruptedError):
|
||||
# Control-flow signals from watchdog/cancellation hooks must
|
||||
# propagate, not get swallowed as "debug noise".
|
||||
raise
|
||||
except Exception:
|
||||
# Genuine bugs in third-party debug/log hooks shouldn't break
|
||||
# stream consumption.
|
||||
logger.debug("Codex stream on_event hook raised", exc_info=True)
|
||||
if interrupt_check is not None and interrupt_check():
|
||||
break
|
||||
|
||||
event_type = _event_field(event, "type", "")
|
||||
if not isinstance(event_type, str):
|
||||
event_type = ""
|
||||
|
||||
# ``error`` SSE frames carry the provider's real failure reason
|
||||
# (subscription / quota / model-not-available / rejected-reasoning-replay)
|
||||
# but never appear in the terminal set. Surface them as a structured
|
||||
# exception so the credential pool + error classifier see the body.
|
||||
if event_type == "error":
|
||||
_raise_stream_error(event)
|
||||
|
||||
if "output_text.delta" in event_type or event_type == "response.output_text.delta":
|
||||
delta_text = _event_field(event, "delta", "")
|
||||
if delta_text:
|
||||
collected_text_deltas.append(delta_text)
|
||||
if not has_tool_calls:
|
||||
if not first_delta_fired:
|
||||
first_delta_fired = True
|
||||
if on_first_delta is not None:
|
||||
try:
|
||||
on_first_delta()
|
||||
except Exception:
|
||||
logger.debug("Codex stream on_first_delta raised", exc_info=True)
|
||||
if on_text_delta is not None:
|
||||
try:
|
||||
on_text_delta(delta_text)
|
||||
except Exception:
|
||||
logger.debug("Codex stream on_text_delta raised", exc_info=True)
|
||||
continue
|
||||
|
||||
if "function_call" in event_type:
|
||||
has_tool_calls = True
|
||||
# fall through — function_call items still get added on output_item.done
|
||||
|
||||
if "reasoning" in event_type and "delta" in event_type:
|
||||
reasoning_text = _event_field(event, "delta", "")
|
||||
if reasoning_text and on_reasoning_delta is not None:
|
||||
try:
|
||||
on_reasoning_delta(reasoning_text)
|
||||
except Exception:
|
||||
logger.debug("Codex stream on_reasoning_delta raised", exc_info=True)
|
||||
continue
|
||||
|
||||
if event_type == "response.output_item.done":
|
||||
done_item = _event_field(event, "item")
|
||||
if done_item is not None:
|
||||
collected_output_items.append(done_item)
|
||||
continue
|
||||
|
||||
if event_type in _TERMINAL_EVENT_TYPES:
|
||||
saw_terminal = True
|
||||
resp_obj = _event_field(event, "response")
|
||||
if resp_obj is not None:
|
||||
terminal_usage = getattr(resp_obj, "usage", None)
|
||||
if terminal_usage is None and isinstance(resp_obj, dict):
|
||||
terminal_usage = resp_obj.get("usage")
|
||||
rid = getattr(resp_obj, "id", None)
|
||||
if rid is None and isinstance(resp_obj, dict):
|
||||
rid = resp_obj.get("id")
|
||||
terminal_response_id = rid
|
||||
rstatus = getattr(resp_obj, "status", None)
|
||||
if rstatus is None and isinstance(resp_obj, dict):
|
||||
rstatus = resp_obj.get("status")
|
||||
if isinstance(rstatus, str):
|
||||
terminal_status = rstatus
|
||||
if event_type == "response.incomplete":
|
||||
terminal_incomplete_details = getattr(resp_obj, "incomplete_details", None)
|
||||
if terminal_incomplete_details is None and isinstance(resp_obj, dict):
|
||||
terminal_incomplete_details = resp_obj.get("incomplete_details")
|
||||
if event_type == "response.failed":
|
||||
terminal_error = getattr(resp_obj, "error", None)
|
||||
if terminal_error is None and isinstance(resp_obj, dict):
|
||||
terminal_error = resp_obj.get("error")
|
||||
if event_type == "response.completed":
|
||||
terminal_status = terminal_status or "completed"
|
||||
elif event_type == "response.incomplete":
|
||||
terminal_status = terminal_status or "incomplete"
|
||||
elif event_type == "response.failed":
|
||||
terminal_status = terminal_status or "failed"
|
||||
# Stop on terminal event.
|
||||
break
|
||||
|
||||
# Build the final output list. Prefer items observed via output_item.done;
|
||||
# if none arrived but we streamed plain text deltas (no tool calls), synthesize
|
||||
# a single message item so downstream normalization has something to work with.
|
||||
if collected_output_items:
|
||||
output = list(collected_output_items)
|
||||
elif collected_text_deltas and not has_tool_calls:
|
||||
assembled = "".join(collected_text_deltas)
|
||||
output = [SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="completed",
|
||||
model=model,
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)]
|
||||
else:
|
||||
output = []
|
||||
|
||||
# If the stream ended without any terminal event AND produced no usable
|
||||
# content (no items, no text deltas), surface that as a RuntimeError so
|
||||
# callers can distinguish "stream truncated mid-flight / provider rejected
|
||||
# the call" from "stream completed with empty body". This preserves the
|
||||
# signal the SDK's high-level helper used to raise as
|
||||
# ``RuntimeError("Didn't receive a `response.completed` event.")``.
|
||||
if not saw_terminal and not output:
|
||||
raise RuntimeError(
|
||||
"Codex Responses stream did not emit a terminal response"
|
||||
)
|
||||
if text_parts and not has_tool_calls:
|
||||
assembled = "".join(text_parts)
|
||||
return SimpleNamespace(
|
||||
output=[SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)],
|
||||
usage=None,
|
||||
status="completed",
|
||||
model=model,
|
||||
)
|
||||
return None
|
||||
|
||||
assembled_text = "".join(collected_text_deltas)
|
||||
|
||||
final = SimpleNamespace(
|
||||
output=output,
|
||||
output_text=assembled_text,
|
||||
usage=terminal_usage,
|
||||
status=terminal_status,
|
||||
id=terminal_response_id,
|
||||
model=model,
|
||||
incomplete_details=terminal_incomplete_details,
|
||||
error=terminal_error,
|
||||
)
|
||||
return final
|
||||
|
||||
|
||||
def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
|
||||
"""Execute one streaming Responses API request and return the final response."""
|
||||
def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta=None):
|
||||
"""Execute one streaming Responses API request and return the final response.
|
||||
|
||||
Uses ``responses.create(stream=True)`` (low-level raw event iteration)
|
||||
rather than the high-level ``responses.stream(...)`` helper. This makes
|
||||
us structurally immune to backend drift in the ``response.completed``
|
||||
payload shape — we never let the SDK reconstruct a typed object from
|
||||
the terminal event's ``output`` field.
|
||||
"""
|
||||
import httpx as _httpx
|
||||
|
||||
active_client = client or agent._ensure_primary_openai_client(reason="codex_stream_direct")
|
||||
max_stream_retries = 1
|
||||
has_tool_calls = False
|
||||
first_delta_fired = False
|
||||
# Accumulate streamed text so we can recover if get_final_response()
|
||||
# returns empty output (e.g. chatgpt.com backend-api sends
|
||||
# response.incomplete instead of response.completed).
|
||||
# Accumulate streamed text so callers / compat shims can read it.
|
||||
agent._codex_streamed_text_parts: list = []
|
||||
|
||||
def _on_text_delta(text: str) -> None:
|
||||
agent._codex_streamed_text_parts.append(text)
|
||||
agent._fire_stream_delta(text)
|
||||
|
||||
def _on_reasoning_delta(text: str) -> None:
|
||||
agent._fire_reasoning_delta(text)
|
||||
|
||||
def _on_event(event: Any) -> None:
|
||||
# TTFB watchdog and activity touch — runs once per SSE event.
|
||||
agent._codex_stream_last_event_ts = time.time()
|
||||
agent._touch_activity("receiving stream response")
|
||||
|
||||
def _interrupt_check() -> bool:
|
||||
return bool(agent._interrupt_requested)
|
||||
|
||||
for attempt in range(max_stream_retries + 1):
|
||||
if agent._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before Codex stream retry")
|
||||
collected_output_items: list = []
|
||||
|
||||
stream_kwargs = dict(api_kwargs)
|
||||
stream_kwargs["stream"] = True
|
||||
|
||||
try:
|
||||
with active_client.responses.stream(**api_kwargs) as stream:
|
||||
for event in stream:
|
||||
# Mark stream activity for the TTFB watchdog in
|
||||
# interruptible_api_call. The Codex backend can accept the
|
||||
# connection but never emit a single event; this timestamp
|
||||
# staying None tells the watchdog no bytes are flowing.
|
||||
agent._codex_stream_last_event_ts = time.time()
|
||||
agent._touch_activity("receiving stream response")
|
||||
if agent._interrupt_requested:
|
||||
break
|
||||
event_type = getattr(event, "type", "")
|
||||
# Fire callbacks on text content deltas (suppress during tool calls)
|
||||
if "output_text.delta" in event_type or event_type == "response.output_text.delta":
|
||||
delta_text = getattr(event, "delta", "")
|
||||
if delta_text:
|
||||
agent._codex_streamed_text_parts.append(delta_text)
|
||||
if delta_text and not has_tool_calls:
|
||||
if not first_delta_fired:
|
||||
first_delta_fired = True
|
||||
if on_first_delta:
|
||||
try:
|
||||
on_first_delta()
|
||||
except Exception:
|
||||
pass
|
||||
agent._fire_stream_delta(delta_text)
|
||||
# Track tool calls to suppress text streaming
|
||||
elif "function_call" in event_type:
|
||||
has_tool_calls = True
|
||||
# Fire reasoning callbacks
|
||||
elif "reasoning" in event_type and "delta" in event_type:
|
||||
reasoning_text = getattr(event, "delta", "")
|
||||
if reasoning_text:
|
||||
agent._fire_reasoning_delta(reasoning_text)
|
||||
# Collect completed output items — some backends
|
||||
# (chatgpt.com/backend-api/codex) stream valid items
|
||||
# via response.output_item.done but the SDK's
|
||||
# get_final_response() returns an empty output list.
|
||||
elif event_type == "response.output_item.done":
|
||||
done_item = getattr(event, "item", None)
|
||||
if done_item is not None:
|
||||
collected_output_items.append(done_item)
|
||||
# Log non-completed terminal events for diagnostics
|
||||
elif event_type in {"response.incomplete", "response.failed"}:
|
||||
resp_obj = getattr(event, "response", None)
|
||||
status = getattr(resp_obj, "status", None) if resp_obj else None
|
||||
incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
|
||||
logger.warning(
|
||||
"Codex Responses stream received terminal event %s "
|
||||
"(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
|
||||
event_type, status, incomplete_details,
|
||||
sum(len(p) for p in agent._codex_streamed_text_parts),
|
||||
agent._client_log_context(),
|
||||
)
|
||||
final_response = stream.get_final_response()
|
||||
# PATCH: ChatGPT Codex backend streams valid output items
|
||||
# but get_final_response() can return an empty output list.
|
||||
# Backfill from collected items or synthesize from deltas.
|
||||
_out = getattr(final_response, "output", None)
|
||||
if _out is None or (isinstance(_out, list) and not _out):
|
||||
recovered = _codex_backfilled_response(
|
||||
collected_output_items,
|
||||
agent._codex_streamed_text_parts,
|
||||
has_tool_calls=has_tool_calls,
|
||||
model=api_kwargs.get("model"),
|
||||
)
|
||||
if recovered is not None:
|
||||
final_response.output = recovered.output
|
||||
logger.debug(
|
||||
"Codex stream: backfilled missing output from stream events "
|
||||
"(items=%d, text_parts=%d)",
|
||||
len(collected_output_items),
|
||||
len(agent._codex_streamed_text_parts),
|
||||
)
|
||||
return final_response
|
||||
event_stream = active_client.responses.create(**stream_kwargs)
|
||||
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
|
||||
if attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s",
|
||||
attempt + 1,
|
||||
max_stream_retries + 1,
|
||||
agent._client_log_context(),
|
||||
exc,
|
||||
"Codex Responses stream connect failed (attempt %s/%s); retrying. %s error=%s",
|
||||
attempt + 1, max_stream_retries + 1,
|
||||
agent._client_log_context(), exc,
|
||||
)
|
||||
continue
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed; falling back to create(stream=True). %s error=%s",
|
||||
agent._client_log_context(),
|
||||
exc,
|
||||
)
|
||||
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
except TypeError as exc:
|
||||
if _responses_null_output_iterable_error(exc):
|
||||
recovered = _codex_backfilled_response(
|
||||
collected_output_items,
|
||||
agent._codex_streamed_text_parts,
|
||||
has_tool_calls=has_tool_calls,
|
||||
model=api_kwargs.get("model"),
|
||||
)
|
||||
if recovered is not None:
|
||||
logger.debug(
|
||||
"Codex Responses stream parser hit response.output=None; "
|
||||
"recovered from streamed events (items=%d, text_parts=%d). %s",
|
||||
len(collected_output_items),
|
||||
len(agent._codex_streamed_text_parts),
|
||||
agent._client_log_context(),
|
||||
)
|
||||
return recovered
|
||||
logger.debug(
|
||||
"Codex Responses stream parser hit response.output=None without "
|
||||
"recoverable events; falling back to create(stream=True). %s",
|
||||
agent._client_log_context(),
|
||||
)
|
||||
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
raise
|
||||
except RuntimeError as exc:
|
||||
err_text = str(exc)
|
||||
missing_completed = "response.completed" in err_text
|
||||
# The OpenAI SDK's Responses streaming state machine raises
|
||||
# ``RuntimeError("Expected to have received `response.created`
|
||||
# before `<event-type>`")`` when the first SSE event from the
|
||||
# server is anything other than ``response.created`` — and it
|
||||
# discards the event's payload before we can read it. Three
|
||||
# real-world backends emit a different first frame:
|
||||
#
|
||||
# * xAI on grok-4.x OAuth — sends ``error`` (issues
|
||||
# reported around the May 2026 SuperGrok rollout when
|
||||
# multi-turn conversations replay encrypted reasoning
|
||||
# content the OAuth tier rejects)
|
||||
# * codex-lb relays — send ``codex.rate_limits`` (#14634)
|
||||
# * custom Responses relays — send ``response.in_progress``
|
||||
# (#8133)
|
||||
#
|
||||
# In all three cases the underlying byte stream is still
|
||||
# readable: a non-stream ``responses.create(stream=True)``
|
||||
# fallback succeeds and surfaces the real provider error as
|
||||
# a normal exception with body+status_code attached, which
|
||||
# ``_summarize_api_error`` can then translate into a useful
|
||||
# user-facing line. Treat ``response.created`` prelude
|
||||
# errors the same way we already treat ``response.completed``
|
||||
# postlude errors.
|
||||
prelude_error = (
|
||||
"Expected to have received `response.created`" in err_text
|
||||
or "Expected to have received \"response.created\"" in err_text
|
||||
)
|
||||
if (missing_completed or prelude_error) and attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Responses stream %s (attempt %s/%s); retrying. %s",
|
||||
"prelude rejected" if prelude_error else "closed before completion",
|
||||
attempt + 1,
|
||||
max_stream_retries + 1,
|
||||
agent._client_log_context(),
|
||||
)
|
||||
continue
|
||||
if missing_completed or prelude_error:
|
||||
logger.debug(
|
||||
"Responses stream %s; falling back to create(stream=True). %s err=%s",
|
||||
"rejected before response.created" if prelude_error else "did not emit response.completed",
|
||||
agent._client_log_context(),
|
||||
err_text,
|
||||
)
|
||||
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
raise
|
||||
|
||||
try:
|
||||
# Compatibility: some mocks/providers return a concrete response
|
||||
# instead of an iterable. Pass it straight through.
|
||||
if hasattr(event_stream, "output") and not hasattr(event_stream, "__iter__"):
|
||||
return event_stream
|
||||
|
||||
try:
|
||||
final = _consume_codex_event_stream(
|
||||
event_stream,
|
||||
model=api_kwargs.get("model"),
|
||||
on_text_delta=_on_text_delta,
|
||||
on_reasoning_delta=_on_reasoning_delta,
|
||||
on_first_delta=on_first_delta,
|
||||
on_event=_on_event,
|
||||
interrupt_check=_interrupt_check,
|
||||
)
|
||||
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
|
||||
if attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed mid-iteration "
|
||||
"(attempt %s/%s); retrying. %s error=%s",
|
||||
attempt + 1, max_stream_retries + 1,
|
||||
agent._client_log_context(), exc,
|
||||
)
|
||||
continue
|
||||
raise
|
||||
|
||||
if final.status in {"incomplete", "failed"}:
|
||||
logger.warning(
|
||||
"Codex Responses stream terminal status=%s "
|
||||
"(incomplete_details=%s, error=%s, streamed_chars=%d). %s",
|
||||
final.status, final.incomplete_details, final.error,
|
||||
sum(len(p) for p in agent._codex_streamed_text_parts),
|
||||
agent._client_log_context(),
|
||||
)
|
||||
|
||||
return final
|
||||
finally:
|
||||
close_fn = getattr(event_stream, "close", None)
|
||||
if callable(close_fn):
|
||||
try:
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def run_codex_create_stream_fallback(agent, api_kwargs: dict, client: Any = None):
|
||||
"""Fallback path for stream completion edge cases on Codex-style Responses backends."""
|
||||
active_client = client or agent._ensure_primary_openai_client(reason="codex_create_stream_fallback")
|
||||
fallback_kwargs = dict(api_kwargs)
|
||||
fallback_kwargs["stream"] = True
|
||||
fallback_kwargs = agent._get_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
|
||||
stream_or_response = active_client.responses.create(**fallback_kwargs)
|
||||
|
||||
# Compatibility shim for mocks or providers that still return a concrete response.
|
||||
if hasattr(stream_or_response, "output"):
|
||||
return stream_or_response
|
||||
if not hasattr(stream_or_response, "__iter__"):
|
||||
return stream_or_response
|
||||
|
||||
terminal_response = None
|
||||
collected_output_items: list = []
|
||||
collected_text_deltas: list = []
|
||||
has_tool_calls = False
|
||||
try:
|
||||
for event in stream_or_response:
|
||||
agent._touch_activity("receiving stream response")
|
||||
event_type = getattr(event, "type", None)
|
||||
if not event_type and isinstance(event, dict):
|
||||
event_type = event.get("type")
|
||||
|
||||
# ``error`` SSE frames carry the provider's real failure
|
||||
# reason (subscription / quota / model-not-available /
|
||||
# rejected-reasoning-replay) but never appear in the
|
||||
# ``{completed, incomplete, failed}`` terminal set, so the
|
||||
# raw loop below would silently consume them and end with
|
||||
# "did not emit a terminal response". xAI in particular
|
||||
# emits ``type=error`` as the FIRST frame for OAuth
|
||||
# accounts whose Grok subscription is missing/exhausted —
|
||||
# the SDK's stream helper raises ``RuntimeError(Expected
|
||||
# to have received response.created before error)`` which
|
||||
# the caller catches and routes here, expecting this
|
||||
# fallback to surface the message. Synthesize an
|
||||
# APIError-shaped exception so ``_summarize_api_error``
|
||||
# and the credential-pool entitlement detector see the
|
||||
# real text instead of a generic RuntimeError.
|
||||
if event_type == "error":
|
||||
err_message = getattr(event, "message", None)
|
||||
if not err_message and isinstance(event, dict):
|
||||
err_message = event.get("message")
|
||||
err_code = getattr(event, "code", None)
|
||||
if not err_code and isinstance(event, dict):
|
||||
err_code = event.get("code")
|
||||
err_param = getattr(event, "param", None)
|
||||
if not err_param and isinstance(event, dict):
|
||||
err_param = event.get("param")
|
||||
err_message = (err_message or "stream emitted error event").strip()
|
||||
from run_agent import _StreamErrorEvent
|
||||
raise _StreamErrorEvent(err_message, code=err_code, param=err_param)
|
||||
|
||||
# Collect output items and text deltas for backfill
|
||||
if event_type == "response.output_item.done":
|
||||
done_item = getattr(event, "item", None)
|
||||
if done_item is None and isinstance(event, dict):
|
||||
done_item = event.get("item")
|
||||
if done_item is not None:
|
||||
collected_output_items.append(done_item)
|
||||
elif event_type in {"response.output_text.delta",}:
|
||||
delta = getattr(event, "delta", "")
|
||||
if not delta and isinstance(event, dict):
|
||||
delta = event.get("delta", "")
|
||||
if delta:
|
||||
collected_text_deltas.append(delta)
|
||||
elif event_type and "function_call" in event_type:
|
||||
has_tool_calls = True
|
||||
|
||||
if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
|
||||
continue
|
||||
|
||||
terminal_response = getattr(event, "response", None)
|
||||
if terminal_response is None and isinstance(event, dict):
|
||||
terminal_response = event.get("response")
|
||||
if terminal_response is not None:
|
||||
# Backfill empty output from collected stream events
|
||||
_out = getattr(terminal_response, "output", None)
|
||||
if _out is None or (isinstance(_out, list) and not _out):
|
||||
recovered = _codex_backfilled_response(
|
||||
collected_output_items,
|
||||
collected_text_deltas,
|
||||
has_tool_calls=has_tool_calls,
|
||||
model=fallback_kwargs.get("model"),
|
||||
)
|
||||
if recovered is not None:
|
||||
terminal_response.output = recovered.output
|
||||
logger.debug(
|
||||
"Codex fallback stream: backfilled missing output "
|
||||
"(items=%d, text_parts=%d)",
|
||||
len(collected_output_items),
|
||||
len(collected_text_deltas),
|
||||
)
|
||||
return terminal_response
|
||||
finally:
|
||||
close_fn = getattr(stream_or_response, "close", None)
|
||||
if callable(close_fn):
|
||||
try:
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if terminal_response is not None:
|
||||
return terminal_response
|
||||
raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
|
||||
"""Backward-compatible alias for the unified event-driven path.
|
||||
|
||||
Historically this was the fallback when the SDK's high-level
|
||||
``responses.stream(...)`` helper raised on shape drift. The primary
|
||||
path now does exactly what the fallback did, so this just forwards.
|
||||
Kept as a public symbol because tests and a small number of call sites
|
||||
still reference it by name.
|
||||
"""
|
||||
return run_codex_stream(agent, api_kwargs, client=client)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"run_codex_app_server_turn",
|
||||
"run_codex_stream",
|
||||
"run_codex_create_stream_fallback",
|
||||
"_consume_codex_event_stream",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -2222,34 +2222,45 @@ class TestCodexAdapterReasoningTranslation:
|
|||
|
||||
@staticmethod
|
||||
def _build_adapter():
|
||||
"""Build a _CodexCompletionsAdapter with a mocked responses.stream()."""
|
||||
"""Build a _CodexCompletionsAdapter with a mocked responses.create()."""
|
||||
from agent.auxiliary_client import _CodexCompletionsAdapter
|
||||
from types import SimpleNamespace
|
||||
|
||||
# Mock the stream context manager: yields no events, get_final_response
|
||||
# returns a minimal empty-output response.
|
||||
fake_final = SimpleNamespace(
|
||||
output=[SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="hi")],
|
||||
)],
|
||||
usage=SimpleNamespace(input_tokens=1, output_tokens=1, total_tokens=2),
|
||||
# The event-driven path consumes ``responses.create(stream=True)`` as a
|
||||
# raw iterable of SSE events. Emit a minimal stream containing one
|
||||
# ``response.output_item.done`` (message) and a ``response.completed``
|
||||
# terminal frame.
|
||||
message_item = SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text="hi")],
|
||||
)
|
||||
events = [
|
||||
SimpleNamespace(type="response.created"),
|
||||
SimpleNamespace(type="response.output_item.done", item=message_item),
|
||||
SimpleNamespace(
|
||||
type="response.completed",
|
||||
response=SimpleNamespace(
|
||||
status="completed",
|
||||
id="resp_test",
|
||||
usage=SimpleNamespace(input_tokens=1, output_tokens=1, total_tokens=2),
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
class _FakeStream:
|
||||
def __enter__(self): return self
|
||||
def __exit__(self, *a): return False
|
||||
def __iter__(self): return iter([])
|
||||
def get_final_response(self): return fake_final
|
||||
class _FakeCreateStream:
|
||||
def __iter__(self): return iter(events)
|
||||
def close(self): pass
|
||||
|
||||
captured_kwargs = {}
|
||||
|
||||
def _stream(**kwargs):
|
||||
def _create(**kwargs):
|
||||
captured_kwargs.update(kwargs)
|
||||
return _FakeStream()
|
||||
return _FakeCreateStream()
|
||||
|
||||
real_client = MagicMock()
|
||||
real_client.responses.stream = _stream
|
||||
real_client.responses.create = _create
|
||||
adapter = _CodexCompletionsAdapter(real_client, "gpt-5.3-codex")
|
||||
return adapter, captured_kwargs
|
||||
|
||||
|
|
@ -2476,33 +2487,29 @@ class TestVisionAutoSkipsKimiCoding:
|
|||
|
||||
|
||||
class TestCodexAuxiliaryAdapterTimeout:
|
||||
def test_forwards_timeout_to_responses_stream(self):
|
||||
class FakeStream:
|
||||
def __enter__(self):
|
||||
return self
|
||||
def test_forwards_timeout_to_responses_create(self):
|
||||
message_item = SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="summary")],
|
||||
)
|
||||
events = [
|
||||
SimpleNamespace(type="response.output_item.done", item=message_item),
|
||||
SimpleNamespace(type="response.completed", response=SimpleNamespace(
|
||||
status="completed", id="r1", usage=None,
|
||||
)),
|
||||
]
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def __iter__(self):
|
||||
return iter(())
|
||||
|
||||
def get_final_response(self):
|
||||
return SimpleNamespace(
|
||||
output=[SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="summary")],
|
||||
)],
|
||||
usage=None,
|
||||
)
|
||||
class _FakeCreateStream:
|
||||
def __iter__(self): return iter(events)
|
||||
def close(self): pass
|
||||
|
||||
class FakeResponses:
|
||||
def __init__(self):
|
||||
self.kwargs = None
|
||||
|
||||
def stream(self, **kwargs):
|
||||
def create(self, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
return FakeStream()
|
||||
return _FakeCreateStream()
|
||||
|
||||
fake_client = SimpleNamespace(responses=FakeResponses())
|
||||
adapter = _CodexCompletionsAdapter(fake_client, "gpt-5.5")
|
||||
|
|
@ -2513,33 +2520,21 @@ class TestCodexAuxiliaryAdapterTimeout:
|
|||
)
|
||||
|
||||
assert fake_client.responses.kwargs["timeout"] == 12.5
|
||||
assert fake_client.responses.kwargs["stream"] is True
|
||||
assert response.choices[0].message.content == "summary"
|
||||
|
||||
def test_enforces_total_timeout_while_stream_keeps_emitting_events(self):
|
||||
class SlowAliveStream:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
class _SlowAliveCreateStream:
|
||||
def __iter__(self):
|
||||
for _ in range(5):
|
||||
time.sleep(0.03)
|
||||
yield SimpleNamespace(type="response.in_progress")
|
||||
|
||||
def get_final_response(self):
|
||||
return SimpleNamespace(
|
||||
output=[SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="late")],
|
||||
)],
|
||||
usage=None,
|
||||
)
|
||||
def close(self): pass
|
||||
|
||||
class FakeResponses:
|
||||
def stream(self, **kwargs):
|
||||
return SlowAliveStream()
|
||||
def create(self, **kwargs):
|
||||
return _SlowAliveCreateStream()
|
||||
|
||||
fake_client = SimpleNamespace(responses=FakeResponses(), close=lambda: None)
|
||||
adapter = _CodexCompletionsAdapter(fake_client, "gpt-5.5")
|
||||
|
|
@ -2555,34 +2550,39 @@ class TestCodexAuxiliaryAdapterTimeout:
|
|||
|
||||
|
||||
class TestCodexAuxiliaryAdapterNullOutputRecovery:
|
||||
def test_recovers_output_item_when_sdk_raises_during_iteration(self):
|
||||
"""Regression for #11179 in auxiliary calls such as compression/title generation."""
|
||||
def test_recovers_output_item_when_terminal_event_has_null_output(self):
|
||||
"""Regression for #11179 in auxiliary calls.
|
||||
|
||||
The wire shape that broke the SDK is ``response.completed`` with
|
||||
``response.output = null``. The event-driven path is structurally
|
||||
immune because it reconstructs from ``response.output_item.done``
|
||||
events and never reads the terminal event's ``output`` field for
|
||||
content. Assert the auxiliary path returns the streamed item even
|
||||
when the terminal frame's output is ``null``.
|
||||
"""
|
||||
output_item = SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="aux survived")],
|
||||
)
|
||||
events = [
|
||||
SimpleNamespace(type="response.created"),
|
||||
SimpleNamespace(type="response.output_item.done", item=output_item),
|
||||
SimpleNamespace(type="response.completed", response=SimpleNamespace(
|
||||
status="completed",
|
||||
id="resp_null_output",
|
||||
# This is the field the SDK helper would have iterated and crashed on:
|
||||
output=None,
|
||||
usage=None,
|
||||
)),
|
||||
]
|
||||
|
||||
class NullOutputParseStream:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def __iter__(self):
|
||||
yield SimpleNamespace(type="response.output_item.done", item=output_item)
|
||||
raise TypeError("'NoneType' object is not iterable")
|
||||
|
||||
def get_final_response(self): # pragma: no cover - iterator fails first
|
||||
raise AssertionError("get_final_response should not be reached")
|
||||
class _NullOutputCreateStream:
|
||||
def __iter__(self): return iter(events)
|
||||
def close(self): pass
|
||||
|
||||
class FakeResponses:
|
||||
def __init__(self):
|
||||
self.create = MagicMock()
|
||||
|
||||
def stream(self, **kwargs):
|
||||
return NullOutputParseStream()
|
||||
def create(self, **kwargs):
|
||||
return _NullOutputCreateStream()
|
||||
|
||||
fake_client = SimpleNamespace(responses=FakeResponses())
|
||||
adapter = _CodexCompletionsAdapter(fake_client, "gpt-5.5")
|
||||
|
|
@ -2590,7 +2590,6 @@ class TestCodexAuxiliaryAdapterNullOutputRecovery:
|
|||
response = adapter.create(messages=[{"role": "user", "content": "summarize"}])
|
||||
|
||||
assert response.choices[0].message.content == "aux survived"
|
||||
fake_client.responses.create.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -2696,26 +2695,19 @@ class TestAuxiliaryClientPoisonedCacheEviction:
|
|||
_CodexCompletionsAdapter, CodexAuxiliaryClient,
|
||||
)
|
||||
|
||||
class SlowAliveStream:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
class _SlowAliveCreateStream:
|
||||
def __iter__(self):
|
||||
for _ in range(20):
|
||||
time.sleep(0.01)
|
||||
yield SimpleNamespace(type="response.in_progress")
|
||||
|
||||
def get_final_response(self): # pragma: no cover — timeout fires first
|
||||
return SimpleNamespace(output=[], usage=None)
|
||||
def close(self): pass
|
||||
|
||||
closed = {"flag": False}
|
||||
|
||||
class FakeClient:
|
||||
def __init__(self):
|
||||
self.responses = SimpleNamespace(stream=lambda **k: SlowAliveStream())
|
||||
self.responses = SimpleNamespace(create=lambda **k: _SlowAliveCreateStream())
|
||||
self.api_key = "k"
|
||||
self.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
|
||||
|
|
|
|||
|
|
@ -37,7 +37,18 @@ import pytest
|
|||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix A: prelude error fallback
|
||||
# Fix A: prelude error surfacing via wire `error` events
|
||||
#
|
||||
# With the migration to ``responses.create(stream=True)`` raw event iteration,
|
||||
# the SDK's high-level state-machine RuntimeError no longer mediates between
|
||||
# the wire and us — we read the wire directly. When the chatgpt.com Codex
|
||||
# backend (or xAI, codex-lb, custom relays) emits a ``type=error`` frame as
|
||||
# its first event, our consumer raises ``_StreamErrorEvent`` straight from
|
||||
# the wire payload, which carries the real provider message in ``.body`` /
|
||||
# ``.message`` shape for ``_summarize_api_error`` to consume. This is
|
||||
# strictly better than the old "SDK raises RuntimeError → we retry → fall
|
||||
# back to a second non-stream call" two-phase dance, because the error
|
||||
# surfaces on the first event instead of after one wasted round trip.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
@ -60,105 +71,104 @@ def _make_codex_agent():
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"prelude_event_type",
|
||||
"provider_message",
|
||||
[
|
||||
"error", # xAI OAuth multi-turn
|
||||
"codex.rate_limits", # codex-lb relays (#14634)
|
||||
"response.in_progress", # custom Responses relays (#8133)
|
||||
"You do not have an active Grok subscription",
|
||||
"rate limit exceeded",
|
||||
"model not available",
|
||||
],
|
||||
)
|
||||
def test_codex_stream_prelude_error_falls_back_to_create_stream(prelude_event_type):
|
||||
"""The SDK's prelude RuntimeError must trigger the non-stream fallback.
|
||||
def test_codex_stream_wire_error_event_surfaces_stream_error_event(provider_message):
|
||||
"""A wire ``type=error`` SSE frame raises ``_StreamErrorEvent`` with the
|
||||
provider's real message in the body."""
|
||||
from run_agent import _StreamErrorEvent
|
||||
|
||||
When the first SSE event isn't ``response.created``, openai-python
|
||||
raises RuntimeError before our event loop sees anything. We must
|
||||
detect that, retry once, then fall back to ``create(stream=True)``
|
||||
which surfaces the real provider error or a real response.
|
||||
"""
|
||||
agent = _make_codex_agent()
|
||||
|
||||
prelude_error = RuntimeError(
|
||||
f"Expected to have received `response.created` before `{prelude_event_type}`"
|
||||
)
|
||||
class _ErrorCreateStream:
|
||||
def __iter__(self_inner):
|
||||
yield SimpleNamespace(type="error", message=provider_message, code="forbidden")
|
||||
|
||||
def close(self_inner):
|
||||
pass
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.side_effect = prelude_error
|
||||
mock_client.responses.create.return_value = _ErrorCreateStream()
|
||||
|
||||
fallback_response = SimpleNamespace(
|
||||
output=[SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="fallback ok")],
|
||||
)],
|
||||
status="completed",
|
||||
)
|
||||
with pytest.raises(_StreamErrorEvent) as excinfo:
|
||||
agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
with patch.object(
|
||||
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
|
||||
) as mock_fallback:
|
||||
result = agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
assert result is fallback_response
|
||||
mock_fallback.assert_called_once_with({}, client=mock_client)
|
||||
assert provider_message in str(excinfo.value)
|
||||
assert excinfo.value.body["error"]["message"] == provider_message
|
||||
|
||||
|
||||
def test_codex_stream_prelude_error_retries_once_before_fallback():
|
||||
"""The retry path must fire one extra stream attempt before falling back."""
|
||||
def test_codex_stream_retries_remote_protocol_error_once():
|
||||
"""Transport errors (``httpx.RemoteProtocolError``) trigger a single retry.
|
||||
|
||||
Previously this was on the ``responses.stream(...)`` helper; now it's on
|
||||
``responses.create(stream=True)`` itself. The user-facing behavior is the
|
||||
same: one retry, then re-raise if the second attempt also fails.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
agent = _make_codex_agent()
|
||||
|
||||
call_count = {"n": 0}
|
||||
|
||||
def stream_side_effect(**kwargs):
|
||||
def create_side_effect(**kwargs):
|
||||
call_count["n"] += 1
|
||||
raise RuntimeError(
|
||||
"Expected to have received `response.created` before `error`"
|
||||
raise httpx.RemoteProtocolError(
|
||||
"peer closed connection without sending complete message body"
|
||||
)
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.side_effect = stream_side_effect
|
||||
mock_client.responses.create.side_effect = create_side_effect
|
||||
|
||||
fallback_response = SimpleNamespace(output=[], status="completed")
|
||||
with patch.object(
|
||||
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
|
||||
) as mock_fallback:
|
||||
with pytest.raises(httpx.RemoteProtocolError):
|
||||
agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
# max_stream_retries=1 → one retry + final attempt → 2 stream calls,
|
||||
# THEN the fallback path runs.
|
||||
# max_stream_retries=1 → one retry + final attempt → 2 create calls total.
|
||||
assert call_count["n"] == 2
|
||||
mock_fallback.assert_called_once()
|
||||
|
||||
|
||||
def test_codex_stream_unrelated_runtimeerror_still_raises():
|
||||
"""RuntimeErrors that aren't prelude/postlude shape must propagate."""
|
||||
"""RuntimeErrors that aren't transport errors must propagate.
|
||||
|
||||
With the event-driven path there's no separate fallback function to
|
||||
short-circuit into; any RuntimeError from ``responses.create()`` or the
|
||||
consumer surfaces directly.
|
||||
"""
|
||||
agent = _make_codex_agent()
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.side_effect = RuntimeError("something else broke")
|
||||
mock_client.responses.create.side_effect = RuntimeError("something else broke")
|
||||
|
||||
with patch.object(agent, "_run_codex_create_stream_fallback") as mock_fallback:
|
||||
with pytest.raises(RuntimeError, match="something else broke"):
|
||||
agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
mock_fallback.assert_not_called()
|
||||
with pytest.raises(RuntimeError, match="something else broke"):
|
||||
agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
|
||||
def test_codex_stream_postlude_error_still_falls_back():
|
||||
"""Existing ``response.completed`` fallback must not regress."""
|
||||
def test_codex_stream_truncated_no_terminal_event_raises():
|
||||
"""Streams that end without a terminal event AND no items raise.
|
||||
|
||||
Preserves the "Codex Responses stream did not emit a terminal response"
|
||||
signal callers use to distinguish "stream truncated mid-flight" from
|
||||
"stream completed with empty body". Previously surfaced by the SDK's
|
||||
``RuntimeError("Didn't receive a `response.completed` event.")``; now
|
||||
surfaced directly by the event consumer.
|
||||
"""
|
||||
agent = _make_codex_agent()
|
||||
|
||||
class _EmptyStream:
|
||||
def __iter__(self_inner):
|
||||
return iter(())
|
||||
|
||||
def close(self_inner):
|
||||
pass
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.side_effect = RuntimeError(
|
||||
"Didn't receive a `response.completed` event."
|
||||
)
|
||||
mock_client.responses.create.return_value = _EmptyStream()
|
||||
|
||||
fallback_response = SimpleNamespace(output=[], status="completed")
|
||||
with patch.object(
|
||||
agent, "_run_codex_create_stream_fallback", return_value=fallback_response
|
||||
) as mock_fallback:
|
||||
result = agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
assert result is fallback_response
|
||||
mock_fallback.assert_called_once()
|
||||
with pytest.raises(RuntimeError, match="did not emit a terminal response"):
|
||||
agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -154,27 +154,13 @@ def _codex_ack_message_response(text: str):
|
|||
)
|
||||
|
||||
|
||||
class _FakeResponsesStream:
|
||||
def __init__(self, *, final_response=None, final_error=None):
|
||||
self._final_response = final_response
|
||||
self._final_error = final_error
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def __iter__(self):
|
||||
return iter(())
|
||||
|
||||
def get_final_response(self):
|
||||
if self._final_error is not None:
|
||||
raise self._final_error
|
||||
return self._final_response
|
||||
|
||||
|
||||
class _FakeCreateStream:
|
||||
"""Iterable-only fake for ``responses.create(stream=True)`` outputs.
|
||||
|
||||
The event-driven Codex path expects an iterable that yields SSE events;
|
||||
tests use this to drive it through the same code paths the wire does.
|
||||
"""
|
||||
|
||||
def __init__(self, events):
|
||||
self._events = list(events)
|
||||
self.closed = False
|
||||
|
|
@ -186,27 +172,6 @@ class _FakeCreateStream:
|
|||
self.closed = True
|
||||
|
||||
|
||||
class _IteratorTypeErrorStream:
|
||||
"""Mimic the SDK raising while parsing response.completed.output=None."""
|
||||
|
||||
def __init__(self, events_before_error):
|
||||
self._events_before_error = list(events_before_error)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def __iter__(self):
|
||||
for event in self._events_before_error:
|
||||
yield event
|
||||
raise TypeError("'NoneType' object is not iterable")
|
||||
|
||||
def get_final_response(self): # pragma: no cover - iterator fails first
|
||||
raise AssertionError("get_final_response should not be reached")
|
||||
|
||||
|
||||
def _codex_request_kwargs():
|
||||
return {
|
||||
"model": "gpt-5-codex",
|
||||
|
|
@ -418,60 +383,75 @@ def test_build_api_kwargs_copilot_responses_omits_reasoning_for_non_reasoning_mo
|
|||
assert "prompt_cache_key" not in kwargs
|
||||
|
||||
|
||||
def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
|
||||
def test_run_codex_stream_returns_collected_items_when_stream_ends_without_terminal(monkeypatch):
|
||||
"""The event-driven path tolerates streams that end without a terminal frame.
|
||||
|
||||
Previously the SDK's ``responses.stream(...)`` helper raised
|
||||
``RuntimeError("Didn't receive a `response.completed` event.")`` which the
|
||||
primary path caught and retried/fell back through. The new
|
||||
``responses.create(stream=True)`` path consumes events directly and just
|
||||
returns whatever it collected — no retry, no separate fallback path.
|
||||
"""
|
||||
agent = _build_agent(monkeypatch)
|
||||
calls = {"stream": 0}
|
||||
|
||||
def _fake_stream(**kwargs):
|
||||
calls["stream"] += 1
|
||||
if calls["stream"] == 1:
|
||||
return _FakeResponsesStream(
|
||||
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
||||
)
|
||||
return _FakeResponsesStream(final_response=_codex_message_response("stream ok"))
|
||||
|
||||
agent.client = SimpleNamespace(
|
||||
responses=SimpleNamespace(
|
||||
stream=_fake_stream,
|
||||
create=lambda **kwargs: _codex_message_response("fallback"),
|
||||
)
|
||||
output_item = SimpleNamespace(
|
||||
type="message",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text="no terminal frame")],
|
||||
)
|
||||
|
||||
response = agent._run_codex_stream(_codex_request_kwargs())
|
||||
assert calls["stream"] == 2
|
||||
assert response.output[0].content[0].text == "stream ok"
|
||||
|
||||
|
||||
def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
calls = {"stream": 0, "create": 0}
|
||||
|
||||
def _fake_stream(**kwargs):
|
||||
calls["stream"] += 1
|
||||
return _FakeResponsesStream(
|
||||
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
||||
)
|
||||
calls = {"create": 0}
|
||||
|
||||
def _fake_create(**kwargs):
|
||||
calls["create"] += 1
|
||||
return _codex_message_response("create fallback ok")
|
||||
assert kwargs.get("stream") is True
|
||||
return _FakeCreateStream([
|
||||
SimpleNamespace(type="response.created"),
|
||||
SimpleNamespace(type="response.output_item.done", item=output_item),
|
||||
# stream ends without a response.completed/incomplete/failed frame
|
||||
])
|
||||
|
||||
agent.client = SimpleNamespace(
|
||||
responses=SimpleNamespace(
|
||||
stream=_fake_stream,
|
||||
create=_fake_create,
|
||||
)
|
||||
responses=SimpleNamespace(create=_fake_create),
|
||||
)
|
||||
|
||||
response = agent._run_codex_stream(_codex_request_kwargs())
|
||||
assert calls["stream"] == 2
|
||||
assert calls["create"] == 1
|
||||
assert response.output[0].content[0].text == "create fallback ok"
|
||||
assert response.status == "completed"
|
||||
assert response.output == [output_item]
|
||||
|
||||
|
||||
def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
|
||||
def test_run_codex_stream_surfaces_failed_status_in_final_response(monkeypatch):
|
||||
"""A ``response.failed`` terminal event is reflected on the returned object."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
calls = {"stream": 0, "create": 0}
|
||||
error_payload = {"message": "model overloaded", "code": "overloaded"}
|
||||
failed_event = SimpleNamespace(
|
||||
type="response.failed",
|
||||
response=SimpleNamespace(
|
||||
status="failed",
|
||||
error=error_payload,
|
||||
id="resp_failed_1",
|
||||
usage=None,
|
||||
),
|
||||
)
|
||||
|
||||
def _fake_create(**kwargs):
|
||||
return _FakeCreateStream([
|
||||
SimpleNamespace(type="response.created"),
|
||||
failed_event,
|
||||
])
|
||||
|
||||
agent.client = SimpleNamespace(
|
||||
responses=SimpleNamespace(create=_fake_create),
|
||||
)
|
||||
|
||||
response = agent._run_codex_stream(_codex_request_kwargs())
|
||||
assert response.status == "failed"
|
||||
assert response.error == error_payload
|
||||
|
||||
|
||||
def test_run_codex_stream_parses_create_stream_events(monkeypatch):
|
||||
"""The primary path consumes ``responses.create(stream=True)`` events directly."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
calls = {"create": 0}
|
||||
create_stream = _FakeCreateStream(
|
||||
[
|
||||
SimpleNamespace(type="response.created"),
|
||||
|
|
@ -480,62 +460,26 @@ def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
|
|||
]
|
||||
)
|
||||
|
||||
def _fake_stream(**kwargs):
|
||||
calls["stream"] += 1
|
||||
return _FakeResponsesStream(
|
||||
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
||||
)
|
||||
|
||||
def _fake_create(**kwargs):
|
||||
calls["create"] += 1
|
||||
assert kwargs.get("stream") is True
|
||||
return create_stream
|
||||
|
||||
agent.client = SimpleNamespace(
|
||||
responses=SimpleNamespace(
|
||||
stream=_fake_stream,
|
||||
create=_fake_create,
|
||||
)
|
||||
responses=SimpleNamespace(create=_fake_create),
|
||||
)
|
||||
|
||||
response = agent._run_codex_stream(_codex_request_kwargs())
|
||||
assert calls["stream"] == 2
|
||||
assert calls["create"] == 1
|
||||
assert create_stream.closed is True
|
||||
assert response.output[0].content[0].text == "streamed create ok"
|
||||
|
||||
|
||||
def test_run_codex_stream_falls_back_when_stream_iteration_parses_null_output(monkeypatch):
|
||||
"""Regression for #11179: the SDK can raise while iterating response.completed.
|
||||
|
||||
The failure happens before get_final_response(), so post-loop backfill alone is
|
||||
not enough. Preserve already streamed output_item.done events.
|
||||
"""
|
||||
agent = _build_agent(monkeypatch)
|
||||
output_item = SimpleNamespace(
|
||||
type="message",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text="stream item survived")],
|
||||
)
|
||||
calls = {"stream": 0}
|
||||
|
||||
def _fake_stream(**kwargs):
|
||||
calls["stream"] += 1
|
||||
return _IteratorTypeErrorStream([
|
||||
SimpleNamespace(type="response.output_item.done", item=output_item),
|
||||
])
|
||||
|
||||
def _unexpected_create(**kwargs): # pragma: no cover - recovery should avoid fallback call
|
||||
raise AssertionError("create fallback should not be needed when output items were collected")
|
||||
|
||||
agent.client = SimpleNamespace(
|
||||
responses=SimpleNamespace(stream=_fake_stream, create=_unexpected_create),
|
||||
)
|
||||
|
||||
response = agent._run_codex_stream(_codex_request_kwargs())
|
||||
|
||||
assert calls["stream"] == 1
|
||||
assert response.output == [output_item]
|
||||
# The wire's response.completed.response.output is a list with the message item,
|
||||
# but the event-driven path reconstructs from response.output_item.done.
|
||||
# _codex_message_response returns a SimpleNamespace whose .output is a list of
|
||||
# items — we don't read those directly, we read the items via output_item.done,
|
||||
# but this fixture doesn't emit output_item.done. So the consumer assembles a
|
||||
# message from streamed text deltas if present, or returns the items it has.
|
||||
# For backward compatibility with the helper that builds _codex_message_response,
|
||||
# we just assert status is completed and id propagated.
|
||||
assert response.status == "completed"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -783,32 +783,28 @@ class TestCodexStreamCallbacks:
|
|||
agent.api_mode = "codex_responses"
|
||||
agent._interrupt_requested = False
|
||||
|
||||
# Mock the stream context manager
|
||||
mock_event_text = SimpleNamespace(
|
||||
type="response.output_text.delta",
|
||||
delta="Hello from Codex!",
|
||||
)
|
||||
mock_event_done = SimpleNamespace(
|
||||
type="response.completed",
|
||||
delta="",
|
||||
)
|
||||
events = [
|
||||
SimpleNamespace(type="response.created"),
|
||||
SimpleNamespace(
|
||||
type="response.output_text.delta",
|
||||
delta="Hello from Codex!",
|
||||
),
|
||||
SimpleNamespace(
|
||||
type="response.completed",
|
||||
response=SimpleNamespace(status="completed", id="r1", usage=None),
|
||||
),
|
||||
]
|
||||
|
||||
mock_stream = MagicMock()
|
||||
mock_stream.__enter__ = MagicMock(return_value=mock_stream)
|
||||
mock_stream.__exit__ = MagicMock(return_value=False)
|
||||
mock_stream.__iter__ = MagicMock(return_value=iter([mock_event_text, mock_event_done]))
|
||||
mock_stream.get_final_response.return_value = SimpleNamespace(
|
||||
output=[SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="Hello from Codex!")],
|
||||
)],
|
||||
status="completed",
|
||||
)
|
||||
class _FakeCreateStream:
|
||||
def __iter__(self_inner):
|
||||
return iter(events)
|
||||
def close(self_inner):
|
||||
return None
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.return_value = mock_stream
|
||||
mock_client.responses.create.return_value = _FakeCreateStream()
|
||||
|
||||
response = agent._run_codex_stream({}, client=mock_client)
|
||||
agent._run_codex_stream({}, client=mock_client)
|
||||
assert "Hello from Codex!" in deltas
|
||||
|
||||
def test_codex_stream_refreshes_activity_on_every_event(self):
|
||||
|
|
@ -828,57 +824,40 @@ class TestCodexStreamCallbacks:
|
|||
touch_calls = []
|
||||
agent._touch_activity = lambda desc: touch_calls.append(desc)
|
||||
|
||||
mock_event_text_1 = SimpleNamespace(
|
||||
type="response.output_text.delta",
|
||||
delta="Hello",
|
||||
)
|
||||
mock_event_text_2 = SimpleNamespace(
|
||||
type="response.output_text.delta",
|
||||
delta=" world",
|
||||
)
|
||||
mock_event_done = SimpleNamespace(
|
||||
type="response.completed",
|
||||
delta="",
|
||||
)
|
||||
events = [
|
||||
SimpleNamespace(type="response.output_text.delta", delta="Hello"),
|
||||
SimpleNamespace(type="response.output_text.delta", delta=" world"),
|
||||
SimpleNamespace(
|
||||
type="response.completed",
|
||||
response=SimpleNamespace(status="completed", id="r2", usage=None),
|
||||
),
|
||||
]
|
||||
|
||||
mock_stream = MagicMock()
|
||||
mock_stream.__enter__ = MagicMock(return_value=mock_stream)
|
||||
mock_stream.__exit__ = MagicMock(return_value=False)
|
||||
mock_stream.__iter__ = MagicMock(
|
||||
return_value=iter([mock_event_text_1, mock_event_text_2, mock_event_done])
|
||||
)
|
||||
mock_stream.get_final_response.return_value = SimpleNamespace(
|
||||
output=[SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="Hello world")],
|
||||
)],
|
||||
status="completed",
|
||||
)
|
||||
class _FakeCreateStream:
|
||||
def __iter__(self_inner):
|
||||
return iter(events)
|
||||
def close(self_inner):
|
||||
return None
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.return_value = mock_stream
|
||||
mock_client.responses.create.return_value = _FakeCreateStream()
|
||||
|
||||
agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
assert touch_calls.count("receiving stream response") == 3
|
||||
|
||||
def test_codex_remote_protocol_error_falls_back_to_create_stream(self):
|
||||
def test_codex_remote_protocol_error_retries_then_raises(self):
|
||||
"""Transport errors from ``responses.create`` retry once then re-raise.
|
||||
|
||||
With the migration from ``responses.stream(...)`` to
|
||||
``responses.create(stream=True)``, there is no longer a separate
|
||||
fallback function — the same call IS the streaming path. When it
|
||||
raises ``httpx.RemoteProtocolError``, we retry once (matching the
|
||||
old behavior on the helper) and re-raise on the second failure.
|
||||
"""
|
||||
from run_agent import AIAgent
|
||||
import httpx
|
||||
|
||||
fallback_response = SimpleNamespace(
|
||||
output=[SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="fallback from create stream")],
|
||||
)],
|
||||
status="completed",
|
||||
)
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.stream.side_effect = httpx.RemoteProtocolError(
|
||||
"peer closed connection without sending complete message body"
|
||||
)
|
||||
|
||||
agent = AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
|
|
@ -890,11 +869,22 @@ class TestCodexStreamCallbacks:
|
|||
agent.api_mode = "codex_responses"
|
||||
agent._interrupt_requested = False
|
||||
|
||||
with patch.object(agent, "_run_codex_create_stream_fallback", return_value=fallback_response) as mock_fallback:
|
||||
response = agent._run_codex_stream({}, client=mock_client)
|
||||
call_count = {"n": 0}
|
||||
|
||||
assert response is fallback_response
|
||||
mock_fallback.assert_called_once_with({}, client=mock_client)
|
||||
def _create_side_effect(**kwargs):
|
||||
call_count["n"] += 1
|
||||
raise httpx.RemoteProtocolError(
|
||||
"peer closed connection without sending complete message body"
|
||||
)
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.responses.create.side_effect = _create_side_effect
|
||||
|
||||
with pytest.raises(httpx.RemoteProtocolError):
|
||||
agent._run_codex_stream({}, client=mock_client)
|
||||
|
||||
# 1 initial + 1 retry = 2 calls
|
||||
assert call_count["n"] == 2
|
||||
|
||||
def test_codex_create_stream_fallback_refreshes_activity_on_every_event(self):
|
||||
from run_agent import AIAgent
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue