fix: parse Codex image generation SSE directly

This commit is contained in:
Will Falcon 2026-05-27 09:35:01 +08:00 committed by Teknium
parent 16e86ce6a7
commit bba50977bc
2 changed files with 167 additions and 166 deletions

View file

@ -19,6 +19,7 @@ Output is saved as PNG under ``$HERMES_HOME/cache/images/``.
from __future__ import annotations
import json
import logging
from typing import Any, Dict, List, Optional, Tuple
@ -142,39 +143,18 @@ def _read_codex_access_token() -> Optional[str]:
return None
def _build_codex_client():
"""Return an OpenAI client pointed at the ChatGPT/Codex backend, or None."""
token = _read_codex_access_token()
if not token:
return None
try:
import openai
from agent.auxiliary_client import _codex_cloudflare_headers
return openai.OpenAI(
api_key=token,
base_url=_CODEX_BASE_URL,
default_headers=_codex_cloudflare_headers(token),
)
except Exception as exc:
logger.debug("Could not build Codex image client: %s", exc)
return None
def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) -> Optional[str]:
"""Stream a Codex Responses image_generation call and return the b64 image."""
image_b64: Optional[str] = None
with client.responses.stream(
model=_CODEX_CHAT_MODEL,
store=False,
instructions=_CODEX_INSTRUCTIONS,
input=[{
def _build_responses_payload(*, prompt: str, size: str, quality: str) -> Dict[str, Any]:
"""Build the Codex Responses request body for an image_generation call."""
return {
"model": _CODEX_CHAT_MODEL,
"store": False,
"instructions": _CODEX_INSTRUCTIONS,
"input": [{
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": prompt}],
}],
tools=[{
"tools": [{
"type": "image_generation",
"model": API_MODEL,
"size": size,
@ -183,33 +163,114 @@ def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) ->
"background": "opaque",
"partial_images": 1,
}],
tool_choice={
"tool_choice": {
"type": "allowed_tools",
"mode": "required",
"tools": [{"type": "image_generation"}],
},
) as stream:
for event in stream:
event_type = getattr(event, "type", "")
if event_type == "response.output_item.done":
item = getattr(event, "item", None)
if getattr(item, "type", None) == "image_generation_call":
result = getattr(item, "result", None)
if isinstance(result, str) and result:
image_b64 = result
elif event_type == "response.image_generation_call.partial_image":
partial = getattr(event, "partial_image_b64", None)
if isinstance(partial, str) and partial:
image_b64 = partial
final = stream.get_final_response()
"stream": True,
}
# Final-response sweep covers the case where the stream finished before
# we observed the ``output_item.done`` event for the image call.
for item in getattr(final, "output", None) or []:
if getattr(item, "type", None) == "image_generation_call":
result = getattr(item, "result", None)
def _extract_image_b64(value: Any) -> Optional[str]:
"""Return the newest image b64 embedded in a Responses event payload."""
found: Optional[str] = None
if isinstance(value, dict):
if value.get("type") == "image_generation_call":
result = value.get("result")
if isinstance(result, str) and result:
image_b64 = result
found = result
partial = value.get("partial_image_b64")
if isinstance(partial, str) and partial:
found = partial
for child in value.values():
nested = _extract_image_b64(child)
if nested:
found = nested
elif isinstance(value, list):
for child in value:
nested = _extract_image_b64(child)
if nested:
found = nested
return found
def _iter_sse_json(response: Any):
"""Yield JSON payloads from an SSE response without OpenAI SDK parsing.
The ChatGPT/Codex backend can emit image-generation events newer than the
pinned Python SDK understands. Parsing raw SSE keeps this provider tolerant
of those event-shape changes.
"""
event_name: Optional[str] = None
data_lines: List[str] = []
def flush():
nonlocal event_name, data_lines
if not data_lines:
event_name = None
return None
raw = "\n".join(data_lines).strip()
event = event_name
event_name = None
data_lines = []
if not raw or raw == "[DONE]":
return None
payload = json.loads(raw)
if isinstance(payload, dict) and event and "type" not in payload:
payload["type"] = event
return payload
for line in response.iter_lines():
if isinstance(line, bytes):
line = line.decode("utf-8", errors="replace")
line = str(line)
if line == "":
payload = flush()
if payload is not None:
yield payload
continue
if line.startswith(":"):
continue
if line.startswith("event:"):
event_name = line[len("event:"):].strip()
elif line.startswith("data:"):
data_lines.append(line[len("data:"):].lstrip())
payload = flush()
if payload is not None:
yield payload
def _collect_image_b64(token: str, *, prompt: str, size: str, quality: str) -> Optional[str]:
"""Stream a Codex Responses image_generation call and return the b64 image."""
import httpx
from agent.auxiliary_client import _codex_cloudflare_headers
headers = _codex_cloudflare_headers(token)
headers.update({
"Accept": "text/event-stream",
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
})
payload = _build_responses_payload(prompt=prompt, size=size, quality=quality)
timeout = httpx.Timeout(300.0, connect=30.0, read=300.0, write=30.0, pool=30.0)
image_b64: Optional[str] = None
with httpx.Client(timeout=timeout, headers=headers) as http:
with http.stream("POST", f"{_CODEX_BASE_URL}/responses", json=payload) as response:
try:
response.raise_for_status()
except httpx.HTTPStatusError as exc:
exc.response.read()
body = exc.response.text[:500]
raise RuntimeError(
f"Codex Responses API returned HTTP {exc.response.status_code}: {body}"
) from exc
for event in _iter_sse_json(response):
found = _extract_image_b64(event)
if found:
image_b64 = found
return image_b64
@ -234,7 +295,7 @@ class OpenAICodexImageGenProvider(ImageGenProvider):
if not _read_codex_access_token():
return False
try:
import openai # noqa: F401
import httpx # noqa: F401
except ImportError:
return False
return True
@ -295,10 +356,10 @@ class OpenAICodexImageGenProvider(ImageGenProvider):
)
try:
import openai # noqa: F401
import httpx # noqa: F401
except ImportError:
return error_response(
error="openai Python package not installed (pip install openai)",
error="httpx Python package not installed (pip install httpx)",
error_type="missing_dependency",
provider="openai-codex",
aspect_ratio=aspect,
@ -307,10 +368,13 @@ class OpenAICodexImageGenProvider(ImageGenProvider):
tier_id, meta = _resolve_model()
size = _SIZES.get(aspect, _SIZES["square"])
client = _build_codex_client()
if client is None:
token = _read_codex_access_token()
if not token:
return error_response(
error="Could not initialize Codex image client",
error=(
"No Codex/ChatGPT OAuth credentials available. Run "
"`hermes auth codex` (or `hermes setup` → Codex) to sign in."
),
error_type="auth_required",
provider="openai-codex",
model=tier_id,
@ -320,7 +384,7 @@ class OpenAICodexImageGenProvider(ImageGenProvider):
try:
b64 = _collect_image_b64(
client,
token,
prompt=prompt,
size=size,
quality=meta["quality"],