mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix: parse Codex image generation SSE directly
This commit is contained in:
parent
16e86ce6a7
commit
bba50977bc
2 changed files with 167 additions and 166 deletions
|
|
@ -19,6 +19,7 @@ Output is saved as PNG under ``$HERMES_HOME/cache/images/``.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
|
|
@ -142,39 +143,18 @@ def _read_codex_access_token() -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def _build_codex_client():
|
||||
"""Return an OpenAI client pointed at the ChatGPT/Codex backend, or None."""
|
||||
token = _read_codex_access_token()
|
||||
if not token:
|
||||
return None
|
||||
try:
|
||||
import openai
|
||||
from agent.auxiliary_client import _codex_cloudflare_headers
|
||||
|
||||
return openai.OpenAI(
|
||||
api_key=token,
|
||||
base_url=_CODEX_BASE_URL,
|
||||
default_headers=_codex_cloudflare_headers(token),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Could not build Codex image client: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) -> Optional[str]:
|
||||
"""Stream a Codex Responses image_generation call and return the b64 image."""
|
||||
image_b64: Optional[str] = None
|
||||
|
||||
with client.responses.stream(
|
||||
model=_CODEX_CHAT_MODEL,
|
||||
store=False,
|
||||
instructions=_CODEX_INSTRUCTIONS,
|
||||
input=[{
|
||||
def _build_responses_payload(*, prompt: str, size: str, quality: str) -> Dict[str, Any]:
|
||||
"""Build the Codex Responses request body for an image_generation call."""
|
||||
return {
|
||||
"model": _CODEX_CHAT_MODEL,
|
||||
"store": False,
|
||||
"instructions": _CODEX_INSTRUCTIONS,
|
||||
"input": [{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": prompt}],
|
||||
}],
|
||||
tools=[{
|
||||
"tools": [{
|
||||
"type": "image_generation",
|
||||
"model": API_MODEL,
|
||||
"size": size,
|
||||
|
|
@ -183,33 +163,114 @@ def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) ->
|
|||
"background": "opaque",
|
||||
"partial_images": 1,
|
||||
}],
|
||||
tool_choice={
|
||||
"tool_choice": {
|
||||
"type": "allowed_tools",
|
||||
"mode": "required",
|
||||
"tools": [{"type": "image_generation"}],
|
||||
},
|
||||
) as stream:
|
||||
for event in stream:
|
||||
event_type = getattr(event, "type", "")
|
||||
if event_type == "response.output_item.done":
|
||||
item = getattr(event, "item", None)
|
||||
if getattr(item, "type", None) == "image_generation_call":
|
||||
result = getattr(item, "result", None)
|
||||
if isinstance(result, str) and result:
|
||||
image_b64 = result
|
||||
elif event_type == "response.image_generation_call.partial_image":
|
||||
partial = getattr(event, "partial_image_b64", None)
|
||||
if isinstance(partial, str) and partial:
|
||||
image_b64 = partial
|
||||
final = stream.get_final_response()
|
||||
"stream": True,
|
||||
}
|
||||
|
||||
# Final-response sweep covers the case where the stream finished before
|
||||
# we observed the ``output_item.done`` event for the image call.
|
||||
for item in getattr(final, "output", None) or []:
|
||||
if getattr(item, "type", None) == "image_generation_call":
|
||||
result = getattr(item, "result", None)
|
||||
|
||||
def _extract_image_b64(value: Any) -> Optional[str]:
|
||||
"""Return the newest image b64 embedded in a Responses event payload."""
|
||||
found: Optional[str] = None
|
||||
if isinstance(value, dict):
|
||||
if value.get("type") == "image_generation_call":
|
||||
result = value.get("result")
|
||||
if isinstance(result, str) and result:
|
||||
image_b64 = result
|
||||
found = result
|
||||
partial = value.get("partial_image_b64")
|
||||
if isinstance(partial, str) and partial:
|
||||
found = partial
|
||||
for child in value.values():
|
||||
nested = _extract_image_b64(child)
|
||||
if nested:
|
||||
found = nested
|
||||
elif isinstance(value, list):
|
||||
for child in value:
|
||||
nested = _extract_image_b64(child)
|
||||
if nested:
|
||||
found = nested
|
||||
return found
|
||||
|
||||
|
||||
def _iter_sse_json(response: Any):
|
||||
"""Yield JSON payloads from an SSE response without OpenAI SDK parsing.
|
||||
|
||||
The ChatGPT/Codex backend can emit image-generation events newer than the
|
||||
pinned Python SDK understands. Parsing raw SSE keeps this provider tolerant
|
||||
of those event-shape changes.
|
||||
"""
|
||||
event_name: Optional[str] = None
|
||||
data_lines: List[str] = []
|
||||
|
||||
def flush():
|
||||
nonlocal event_name, data_lines
|
||||
if not data_lines:
|
||||
event_name = None
|
||||
return None
|
||||
raw = "\n".join(data_lines).strip()
|
||||
event = event_name
|
||||
event_name = None
|
||||
data_lines = []
|
||||
if not raw or raw == "[DONE]":
|
||||
return None
|
||||
payload = json.loads(raw)
|
||||
if isinstance(payload, dict) and event and "type" not in payload:
|
||||
payload["type"] = event
|
||||
return payload
|
||||
|
||||
for line in response.iter_lines():
|
||||
if isinstance(line, bytes):
|
||||
line = line.decode("utf-8", errors="replace")
|
||||
line = str(line)
|
||||
if line == "":
|
||||
payload = flush()
|
||||
if payload is not None:
|
||||
yield payload
|
||||
continue
|
||||
if line.startswith(":"):
|
||||
continue
|
||||
if line.startswith("event:"):
|
||||
event_name = line[len("event:"):].strip()
|
||||
elif line.startswith("data:"):
|
||||
data_lines.append(line[len("data:"):].lstrip())
|
||||
|
||||
payload = flush()
|
||||
if payload is not None:
|
||||
yield payload
|
||||
|
||||
|
||||
def _collect_image_b64(token: str, *, prompt: str, size: str, quality: str) -> Optional[str]:
|
||||
"""Stream a Codex Responses image_generation call and return the b64 image."""
|
||||
import httpx
|
||||
from agent.auxiliary_client import _codex_cloudflare_headers
|
||||
|
||||
headers = _codex_cloudflare_headers(token)
|
||||
headers.update({
|
||||
"Accept": "text/event-stream",
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
})
|
||||
payload = _build_responses_payload(prompt=prompt, size=size, quality=quality)
|
||||
timeout = httpx.Timeout(300.0, connect=30.0, read=300.0, write=30.0, pool=30.0)
|
||||
|
||||
image_b64: Optional[str] = None
|
||||
with httpx.Client(timeout=timeout, headers=headers) as http:
|
||||
with http.stream("POST", f"{_CODEX_BASE_URL}/responses", json=payload) as response:
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
exc.response.read()
|
||||
body = exc.response.text[:500]
|
||||
raise RuntimeError(
|
||||
f"Codex Responses API returned HTTP {exc.response.status_code}: {body}"
|
||||
) from exc
|
||||
for event in _iter_sse_json(response):
|
||||
found = _extract_image_b64(event)
|
||||
if found:
|
||||
image_b64 = found
|
||||
|
||||
return image_b64
|
||||
|
||||
|
|
@ -234,7 +295,7 @@ class OpenAICodexImageGenProvider(ImageGenProvider):
|
|||
if not _read_codex_access_token():
|
||||
return False
|
||||
try:
|
||||
import openai # noqa: F401
|
||||
import httpx # noqa: F401
|
||||
except ImportError:
|
||||
return False
|
||||
return True
|
||||
|
|
@ -295,10 +356,10 @@ class OpenAICodexImageGenProvider(ImageGenProvider):
|
|||
)
|
||||
|
||||
try:
|
||||
import openai # noqa: F401
|
||||
import httpx # noqa: F401
|
||||
except ImportError:
|
||||
return error_response(
|
||||
error="openai Python package not installed (pip install openai)",
|
||||
error="httpx Python package not installed (pip install httpx)",
|
||||
error_type="missing_dependency",
|
||||
provider="openai-codex",
|
||||
aspect_ratio=aspect,
|
||||
|
|
@ -307,10 +368,13 @@ class OpenAICodexImageGenProvider(ImageGenProvider):
|
|||
tier_id, meta = _resolve_model()
|
||||
size = _SIZES.get(aspect, _SIZES["square"])
|
||||
|
||||
client = _build_codex_client()
|
||||
if client is None:
|
||||
token = _read_codex_access_token()
|
||||
if not token:
|
||||
return error_response(
|
||||
error="Could not initialize Codex image client",
|
||||
error=(
|
||||
"No Codex/ChatGPT OAuth credentials available. Run "
|
||||
"`hermes auth codex` (or `hermes setup` → Codex) to sign in."
|
||||
),
|
||||
error_type="auth_required",
|
||||
provider="openai-codex",
|
||||
model=tier_id,
|
||||
|
|
@ -320,7 +384,7 @@ class OpenAICodexImageGenProvider(ImageGenProvider):
|
|||
|
||||
try:
|
||||
b64 = _collect_image_b64(
|
||||
client,
|
||||
token,
|
||||
prompt=prompt,
|
||||
size=size,
|
||||
quality=meta["quality"],
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ from __future__ import annotations
|
|||
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -33,24 +32,6 @@ def _b64_png() -> str:
|
|||
return base64.b64encode(bytes.fromhex(_PNG_HEX)).decode()
|
||||
|
||||
|
||||
class _FakeStream:
|
||||
def __init__(self, events, final_response):
|
||||
self._events = list(events)
|
||||
self._final = final_response
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._events)
|
||||
|
||||
def get_final_response(self):
|
||||
return self._final
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _tmp_hermes_home(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
|
@ -127,22 +108,7 @@ class TestGenerate:
|
|||
|
||||
def test_generate_uses_codex_stream_path(self, provider, monkeypatch, tmp_path):
|
||||
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
|
||||
|
||||
output_item = SimpleNamespace(
|
||||
type="image_generation_call",
|
||||
status="generating",
|
||||
id="ig_test",
|
||||
result=_b64_png(),
|
||||
)
|
||||
done_event = SimpleNamespace(type="response.output_item.done", item=output_item)
|
||||
final_response = SimpleNamespace(output=[], status="completed", output_text="")
|
||||
|
||||
fake_client = SimpleNamespace(
|
||||
responses=SimpleNamespace(
|
||||
stream=lambda **kwargs: _FakeStream([done_event], final_response)
|
||||
)
|
||||
)
|
||||
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
|
||||
monkeypatch.setattr(codex_plugin, "_collect_image_b64", lambda *a, **kw: _b64_png())
|
||||
|
||||
result = provider.generate("a cat", aspect_ratio="landscape")
|
||||
|
||||
|
|
@ -163,20 +129,15 @@ class TestGenerate:
|
|||
|
||||
captured = {}
|
||||
|
||||
def _stream(**kwargs):
|
||||
captured.update(kwargs)
|
||||
output_item = SimpleNamespace(
|
||||
type="image_generation_call",
|
||||
status="generating",
|
||||
id="ig_test",
|
||||
result=_b64_png(),
|
||||
)
|
||||
done_event = SimpleNamespace(type="response.output_item.done", item=output_item)
|
||||
final_response = SimpleNamespace(output=[], status="completed", output_text="")
|
||||
return _FakeStream([done_event], final_response)
|
||||
def _collect(token, *, prompt, size, quality):
|
||||
captured.update(codex_plugin._build_responses_payload(
|
||||
prompt=prompt,
|
||||
size=size,
|
||||
quality=quality,
|
||||
))
|
||||
return _b64_png()
|
||||
|
||||
fake_client = SimpleNamespace(responses=SimpleNamespace(stream=_stream))
|
||||
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
|
||||
monkeypatch.setattr(codex_plugin, "_collect_image_b64", _collect)
|
||||
|
||||
result = provider.generate("a cat", aspect_ratio="portrait")
|
||||
assert result["success"] is True
|
||||
|
|
@ -199,83 +160,59 @@ class TestGenerate:
|
|||
assert tool["background"] == "opaque"
|
||||
assert tool["partial_images"] == 1
|
||||
|
||||
def test_partial_image_event_used_when_done_missing(self, provider, monkeypatch):
|
||||
"""If the stream never emits output_item.done, fall back to the
|
||||
partial_image event so users at least get the latest preview frame."""
|
||||
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
|
||||
def test_partial_image_event_used_when_done_missing(self):
|
||||
"""If output_item.done is missing, partial_image_b64 is accepted."""
|
||||
payload = {
|
||||
"type": "response.image_generation_call.partial_image",
|
||||
"partial_image_b64": _b64_png(),
|
||||
}
|
||||
assert codex_plugin._extract_image_b64(payload) == _b64_png()
|
||||
|
||||
partial_event = SimpleNamespace(
|
||||
type="response.image_generation_call.partial_image",
|
||||
partial_image_b64=_b64_png(),
|
||||
)
|
||||
final_response = SimpleNamespace(output=[], status="completed", output_text="")
|
||||
def test_sse_parser_handles_event_and_data_lines(self):
|
||||
class _Response:
|
||||
def iter_lines(self):
|
||||
return iter([
|
||||
"event: response.output_item.done",
|
||||
'data: {"item": {"type": "image_generation_call", "result": "abc"}}',
|
||||
"",
|
||||
])
|
||||
|
||||
fake_client = SimpleNamespace(
|
||||
responses=SimpleNamespace(
|
||||
stream=lambda **kwargs: _FakeStream([partial_event], final_response)
|
||||
)
|
||||
)
|
||||
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
|
||||
events = list(codex_plugin._iter_sse_json(_Response()))
|
||||
assert events == [{
|
||||
"type": "response.output_item.done",
|
||||
"item": {"type": "image_generation_call", "result": "abc"},
|
||||
}]
|
||||
|
||||
result = provider.generate("a cat")
|
||||
assert result["success"] is True
|
||||
assert Path(result["image"]).exists()
|
||||
|
||||
def test_final_response_sweep_recovers_image(self, provider, monkeypatch):
|
||||
"""If no image_generation_call event arrives mid-stream, the
|
||||
post-stream final-response sweep should still find the image."""
|
||||
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
|
||||
|
||||
final_item = SimpleNamespace(
|
||||
type="image_generation_call",
|
||||
status="completed",
|
||||
id="ig_final",
|
||||
result=_b64_png(),
|
||||
)
|
||||
final_response = SimpleNamespace(output=[final_item], status="completed", output_text="")
|
||||
|
||||
fake_client = SimpleNamespace(
|
||||
responses=SimpleNamespace(
|
||||
stream=lambda **kwargs: _FakeStream([], final_response)
|
||||
)
|
||||
)
|
||||
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
|
||||
|
||||
result = provider.generate("a cat")
|
||||
assert result["success"] is True
|
||||
assert Path(result["image"]).exists()
|
||||
def test_final_response_sweep_recovers_image(self):
|
||||
"""Completed response output is found by recursive payload scanning."""
|
||||
payload = {
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"output": [{
|
||||
"type": "image_generation_call",
|
||||
"status": "completed",
|
||||
"id": "ig_final",
|
||||
"result": _b64_png(),
|
||||
}],
|
||||
},
|
||||
}
|
||||
assert codex_plugin._extract_image_b64(payload) == _b64_png()
|
||||
|
||||
def test_empty_response_returns_error(self, provider, monkeypatch):
|
||||
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
|
||||
|
||||
final_response = SimpleNamespace(output=[], status="completed", output_text="")
|
||||
fake_client = SimpleNamespace(
|
||||
responses=SimpleNamespace(
|
||||
stream=lambda **kwargs: _FakeStream([], final_response)
|
||||
)
|
||||
)
|
||||
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
|
||||
monkeypatch.setattr(codex_plugin, "_collect_image_b64", lambda *a, **kw: None)
|
||||
|
||||
result = provider.generate("a cat")
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "empty_response"
|
||||
|
||||
def test_client_init_failure_returns_auth_error(self, provider, monkeypatch):
|
||||
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
|
||||
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: None)
|
||||
|
||||
result = provider.generate("a cat")
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "auth_required"
|
||||
|
||||
def test_stream_exception_returns_api_error(self, provider, monkeypatch):
|
||||
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
|
||||
|
||||
def _boom(**kwargs):
|
||||
def _boom(*args, **kwargs):
|
||||
raise RuntimeError("cloudflare 403")
|
||||
|
||||
fake_client = SimpleNamespace(responses=SimpleNamespace(stream=_boom))
|
||||
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
|
||||
monkeypatch.setattr(codex_plugin, "_collect_image_b64", _boom)
|
||||
|
||||
result = provider.generate("a cat")
|
||||
assert result["success"] is False
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue