feat(image_gen): add openai-codex plugin (gpt-image-2 via Codex OAuth) (#14317)

New built-in image_gen backend at plugins/image_gen/openai-codex/ that
exposes the same gpt-image-2 low/medium/high tier catalog as the
existing 'openai' plugin, but routes generation through the ChatGPT/
Codex Responses image_generation tool path. Available whenever the user
has Codex OAuth signed in; no OPENAI_API_KEY required.

The two plugins are independent — users select between them via
'hermes tools' → Image Generation, and image_gen.provider in
config.yaml. The existing 'openai' (API-key) plugin is unchanged.

Reuses _read_codex_access_token() and _codex_cloudflare_headers() from
agent.auxiliary_client so token expiry / cred-pool / Cloudflare
originator handling stays in one place.

Inspired by #14047 by @Hygaard, but re-implemented as a separate
plugin instead of an in-place fork of the openai plugin.

Closes #11195
This commit is contained in:
Teknium 2026-04-22 20:43:21 -07:00 committed by GitHub
parent 563ed0e61f
commit eda5ae5a5e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 682 additions and 0 deletions

View file

@ -0,0 +1,378 @@
"""OpenAI image generation backend — ChatGPT/Codex OAuth variant.
Identical model catalog and tier semantics to the ``openai`` image-gen plugin
(``gpt-image-2`` at low/medium/high quality), but routes the request through
the Codex Responses API ``image_generation`` tool instead of the
``images.generate`` REST endpoint. This lets users who are already
authenticated with Codex/ChatGPT generate images without configuring a
separate ``OPENAI_API_KEY``.
Selection precedence for the tier (first hit wins):
1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
2. ``image_gen.openai-codex.model`` in ``config.yaml``
3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
4. :data:`DEFAULT_MODEL` ``gpt-image-2-medium``
Output is saved as PNG under ``$HERMES_HOME/cache/images/``.
"""
from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional, Tuple
from agent.image_gen_provider import (
DEFAULT_ASPECT_RATIO,
ImageGenProvider,
error_response,
resolve_aspect_ratio,
save_b64_image,
success_response,
)
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Model catalog — mirrors the ``openai`` plugin so the picker UX is identical.
# ---------------------------------------------------------------------------
API_MODEL = "gpt-image-2"
_MODELS: Dict[str, Dict[str, Any]] = {
"gpt-image-2-low": {
"display": "GPT Image 2 (Low)",
"speed": "~15s",
"strengths": "Fast iteration, lowest cost",
"quality": "low",
},
"gpt-image-2-medium": {
"display": "GPT Image 2 (Medium)",
"speed": "~40s",
"strengths": "Balanced — default",
"quality": "medium",
},
"gpt-image-2-high": {
"display": "GPT Image 2 (High)",
"speed": "~2min",
"strengths": "Highest fidelity, strongest prompt adherence",
"quality": "high",
},
}
DEFAULT_MODEL = "gpt-image-2-medium"
_SIZES = {
"landscape": "1536x1024",
"square": "1024x1024",
"portrait": "1024x1536",
}
# Codex Responses surface used for the request. The chat model itself is only
# the host that calls the ``image_generation`` tool; the actual image work is
# done by ``API_MODEL``.
_CODEX_CHAT_MODEL = "gpt-5.4"
_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
_CODEX_INSTRUCTIONS = (
"You are an assistant that must fulfill image generation requests by "
"using the image_generation tool when provided."
)
# ---------------------------------------------------------------------------
# Config + auth helpers
# ---------------------------------------------------------------------------
def _load_image_gen_config() -> Dict[str, Any]:
"""Read ``image_gen`` from config.yaml (returns {} on any failure)."""
try:
from hermes_cli.config import load_config
cfg = load_config()
section = cfg.get("image_gen") if isinstance(cfg, dict) else None
return section if isinstance(section, dict) else {}
except Exception as exc:
logger.debug("Could not load image_gen config: %s", exc)
return {}
def _resolve_model() -> Tuple[str, Dict[str, Any]]:
"""Decide which tier to use and return ``(model_id, meta)``."""
import os
env_override = os.environ.get("OPENAI_IMAGE_MODEL")
if env_override and env_override in _MODELS:
return env_override, _MODELS[env_override]
cfg = _load_image_gen_config()
sub = cfg.get("openai-codex") if isinstance(cfg.get("openai-codex"), dict) else {}
candidate: Optional[str] = None
if isinstance(sub, dict):
value = sub.get("model")
if isinstance(value, str) and value in _MODELS:
candidate = value
if candidate is None:
top = cfg.get("model")
if isinstance(top, str) and top in _MODELS:
candidate = top
if candidate is not None:
return candidate, _MODELS[candidate]
return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
def _read_codex_access_token() -> Optional[str]:
"""Return a usable Codex OAuth token, or None.
Delegates to the canonical reader in ``agent.auxiliary_client`` so token
expiry, credential pool selection, and JWT decoding stay in one place.
"""
try:
from agent.auxiliary_client import _read_codex_access_token as _reader
token = _reader()
if isinstance(token, str) and token.strip():
return token.strip()
return None
except Exception as exc:
logger.debug("Could not resolve Codex access token: %s", exc)
return None
def _build_codex_client():
"""Return an OpenAI client pointed at the ChatGPT/Codex backend, or None."""
token = _read_codex_access_token()
if not token:
return None
try:
import openai
from agent.auxiliary_client import _codex_cloudflare_headers
return openai.OpenAI(
api_key=token,
base_url=_CODEX_BASE_URL,
default_headers=_codex_cloudflare_headers(token),
)
except Exception as exc:
logger.debug("Could not build Codex image client: %s", exc)
return None
def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) -> Optional[str]:
"""Stream a Codex Responses image_generation call and return the b64 image."""
image_b64: Optional[str] = None
with client.responses.stream(
model=_CODEX_CHAT_MODEL,
store=False,
instructions=_CODEX_INSTRUCTIONS,
input=[{
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": prompt}],
}],
tools=[{
"type": "image_generation",
"model": API_MODEL,
"size": size,
"quality": quality,
"output_format": "png",
"background": "opaque",
"partial_images": 1,
}],
tool_choice={
"type": "allowed_tools",
"mode": "required",
"tools": [{"type": "image_generation"}],
},
) as stream:
for event in stream:
event_type = getattr(event, "type", "")
if event_type == "response.output_item.done":
item = getattr(event, "item", None)
if getattr(item, "type", None) == "image_generation_call":
result = getattr(item, "result", None)
if isinstance(result, str) and result:
image_b64 = result
elif event_type == "response.image_generation_call.partial_image":
partial = getattr(event, "partial_image_b64", None)
if isinstance(partial, str) and partial:
image_b64 = partial
final = stream.get_final_response()
# Final-response sweep covers the case where the stream finished before
# we observed the ``output_item.done`` event for the image call.
for item in getattr(final, "output", None) or []:
if getattr(item, "type", None) == "image_generation_call":
result = getattr(item, "result", None)
if isinstance(result, str) and result:
image_b64 = result
return image_b64
# ---------------------------------------------------------------------------
# Provider
# ---------------------------------------------------------------------------
class OpenAICodexImageGenProvider(ImageGenProvider):
"""gpt-image-2 routed through ChatGPT/Codex OAuth instead of an API key."""
@property
def name(self) -> str:
return "openai-codex"
@property
def display_name(self) -> str:
return "OpenAI (Codex auth)"
def is_available(self) -> bool:
if not _read_codex_access_token():
return False
try:
import openai # noqa: F401
except ImportError:
return False
return True
def list_models(self) -> List[Dict[str, Any]]:
return [
{
"id": model_id,
"display": meta["display"],
"speed": meta["speed"],
"strengths": meta["strengths"],
"price": "varies",
}
for model_id, meta in _MODELS.items()
]
def default_model(self) -> Optional[str]:
return DEFAULT_MODEL
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "OpenAI (Codex auth)",
"badge": "free",
"tag": "gpt-image-2 via ChatGPT/Codex OAuth — no API key required",
"env_vars": [],
"post_setup_hint": (
"Sign in with `hermes auth codex` (or `hermes setup` → Codex) "
"if you haven't already. No API key needed."
),
}
def generate(
self,
prompt: str,
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
**kwargs: Any,
) -> Dict[str, Any]:
prompt = (prompt or "").strip()
aspect = resolve_aspect_ratio(aspect_ratio)
if not prompt:
return error_response(
error="Prompt is required and must be a non-empty string",
error_type="invalid_argument",
provider="openai-codex",
aspect_ratio=aspect,
)
if not _read_codex_access_token():
return error_response(
error=(
"No Codex/ChatGPT OAuth credentials available. Run "
"`hermes auth codex` (or `hermes setup` → Codex) to sign in."
),
error_type="auth_required",
provider="openai-codex",
aspect_ratio=aspect,
)
try:
import openai # noqa: F401
except ImportError:
return error_response(
error="openai Python package not installed (pip install openai)",
error_type="missing_dependency",
provider="openai-codex",
aspect_ratio=aspect,
)
tier_id, meta = _resolve_model()
size = _SIZES.get(aspect, _SIZES["square"])
client = _build_codex_client()
if client is None:
return error_response(
error="Could not initialize Codex image client",
error_type="auth_required",
provider="openai-codex",
model=tier_id,
prompt=prompt,
aspect_ratio=aspect,
)
try:
b64 = _collect_image_b64(
client,
prompt=prompt,
size=size,
quality=meta["quality"],
)
except Exception as exc:
logger.debug("Codex image generation failed", exc_info=True)
return error_response(
error=f"OpenAI image generation via Codex auth failed: {exc}",
error_type="api_error",
provider="openai-codex",
model=tier_id,
prompt=prompt,
aspect_ratio=aspect,
)
if not b64:
return error_response(
error="Codex response contained no image_generation_call result",
error_type="empty_response",
provider="openai-codex",
model=tier_id,
prompt=prompt,
aspect_ratio=aspect,
)
try:
saved_path = save_b64_image(b64, prefix=f"openai_codex_{tier_id}")
except Exception as exc:
return error_response(
error=f"Could not save image to cache: {exc}",
error_type="io_error",
provider="openai-codex",
model=tier_id,
prompt=prompt,
aspect_ratio=aspect,
)
return success_response(
image=str(saved_path),
model=tier_id,
prompt=prompt,
aspect_ratio=aspect,
provider="openai-codex",
extra={"size": size, "quality": meta["quality"]},
)
# ---------------------------------------------------------------------------
# Plugin entry point
# ---------------------------------------------------------------------------
def register(ctx) -> None:
"""Plugin entry point — register the Codex-backed image-gen provider."""
ctx.register_image_gen_provider(OpenAICodexImageGenProvider())

View file

@ -0,0 +1,5 @@
name: openai-codex
version: 1.0.0
description: "OpenAI image generation backed by ChatGPT/Codex OAuth (gpt-image-2 via the Responses image_generation tool). Saves generated images to $HERMES_HOME/cache/images/."
author: NousResearch
kind: backend

View file

@ -0,0 +1,299 @@
"""Tests for the bundled ``openai-codex`` image_gen plugin.
Mirrors ``test_openai_provider.py`` but targets the standalone
Codex/ChatGPT-OAuth-backed provider that uses the Responses
``image_generation`` tool path instead of the ``images.generate`` REST
endpoint.
"""
from __future__ import annotations
import importlib
from pathlib import Path
from types import SimpleNamespace
import pytest
# The plugin directory uses a hyphen, which is not a valid Python identifier
# for the dotted-import form. Load it via importlib so tests don't need to
# touch sys.path or rename the directory.
codex_plugin = importlib.import_module("plugins.image_gen.openai-codex")
# 1×1 transparent PNG — valid bytes for save_b64_image()
_PNG_HEX = (
"89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
"890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
"ae426082"
)
def _b64_png() -> str:
import base64
return base64.b64encode(bytes.fromhex(_PNG_HEX)).decode()
class _FakeStream:
def __init__(self, events, final_response):
self._events = list(events)
self._final = final_response
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def __iter__(self):
return iter(self._events)
def get_final_response(self):
return self._final
@pytest.fixture(autouse=True)
def _tmp_hermes_home(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
yield tmp_path
@pytest.fixture
def provider(monkeypatch):
# Codex plugin is API-key-independent; clear it to make the test honest.
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
return codex_plugin.OpenAICodexImageGenProvider()
# ── Metadata ────────────────────────────────────────────────────────────────
class TestMetadata:
def test_name(self, provider):
assert provider.name == "openai-codex"
def test_display_name(self, provider):
assert provider.display_name == "OpenAI (Codex auth)"
def test_default_model(self, provider):
assert provider.default_model() == "gpt-image-2-medium"
def test_list_models_three_tiers(self, provider):
ids = [m["id"] for m in provider.list_models()]
assert ids == ["gpt-image-2-low", "gpt-image-2-medium", "gpt-image-2-high"]
def test_setup_schema_has_no_required_env_vars(self, provider):
schema = provider.get_setup_schema()
assert schema["env_vars"] == []
assert schema["badge"] == "free"
# ── Availability ────────────────────────────────────────────────────────────
class TestAvailability:
def test_unavailable_without_codex_token(self, monkeypatch):
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None)
assert codex_plugin.OpenAICodexImageGenProvider().is_available() is False
def test_available_with_codex_token(self, monkeypatch):
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
assert codex_plugin.OpenAICodexImageGenProvider().is_available() is True
def test_openai_api_key_alone_is_not_enough(self, monkeypatch):
# Codex plugin is intentionally orthogonal to the API-key plugin —
# the API key alone must NOT make it appear available.
monkeypatch.setenv("OPENAI_API_KEY", "sk-test")
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None)
assert codex_plugin.OpenAICodexImageGenProvider().is_available() is False
# ── Generate ────────────────────────────────────────────────────────────────
class TestGenerate:
def test_returns_auth_error_without_codex_token(self, provider, monkeypatch):
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None)
result = provider.generate("a cat")
assert result["success"] is False
assert result["error_type"] == "auth_required"
def test_returns_invalid_argument_for_empty_prompt(self, provider, monkeypatch):
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
result = provider.generate(" ")
assert result["success"] is False
assert result["error_type"] == "invalid_argument"
def test_generate_uses_codex_stream_path(self, provider, monkeypatch, tmp_path):
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
output_item = SimpleNamespace(
type="image_generation_call",
status="generating",
id="ig_test",
result=_b64_png(),
)
done_event = SimpleNamespace(type="response.output_item.done", item=output_item)
final_response = SimpleNamespace(output=[], status="completed", output_text="")
fake_client = SimpleNamespace(
responses=SimpleNamespace(
stream=lambda **kwargs: _FakeStream([done_event], final_response)
)
)
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
result = provider.generate("a cat", aspect_ratio="landscape")
assert result["success"] is True
assert result["model"] == "gpt-image-2-medium"
assert result["provider"] == "openai-codex"
assert result["quality"] == "medium"
saved = Path(result["image"])
assert saved.exists()
assert saved.parent == tmp_path / "cache" / "images"
# Filename prefix differs from the API-key plugin so cache audits can
# tell the two backends apart.
assert saved.name.startswith("openai_codex_")
def test_codex_stream_request_shape(self, provider, monkeypatch):
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
captured = {}
def _stream(**kwargs):
captured.update(kwargs)
output_item = SimpleNamespace(
type="image_generation_call",
status="generating",
id="ig_test",
result=_b64_png(),
)
done_event = SimpleNamespace(type="response.output_item.done", item=output_item)
final_response = SimpleNamespace(output=[], status="completed", output_text="")
return _FakeStream([done_event], final_response)
fake_client = SimpleNamespace(responses=SimpleNamespace(stream=_stream))
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
result = provider.generate("a cat", aspect_ratio="portrait")
assert result["success"] is True
assert captured["model"] == "gpt-5.4"
assert captured["store"] is False
assert captured["input"][0]["type"] == "message"
assert captured["input"][0]["role"] == "user"
assert captured["input"][0]["content"][0]["type"] == "input_text"
assert captured["tool_choice"]["type"] == "allowed_tools"
assert captured["tool_choice"]["mode"] == "required"
assert captured["tool_choice"]["tools"] == [{"type": "image_generation"}]
tool = captured["tools"][0]
assert tool["type"] == "image_generation"
assert tool["model"] == "gpt-image-2"
assert tool["quality"] == "medium"
assert tool["size"] == "1024x1536"
assert tool["output_format"] == "png"
assert tool["background"] == "opaque"
assert tool["partial_images"] == 1
def test_partial_image_event_used_when_done_missing(self, provider, monkeypatch):
"""If the stream never emits output_item.done, fall back to the
partial_image event so users at least get the latest preview frame."""
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
partial_event = SimpleNamespace(
type="response.image_generation_call.partial_image",
partial_image_b64=_b64_png(),
)
final_response = SimpleNamespace(output=[], status="completed", output_text="")
fake_client = SimpleNamespace(
responses=SimpleNamespace(
stream=lambda **kwargs: _FakeStream([partial_event], final_response)
)
)
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
result = provider.generate("a cat")
assert result["success"] is True
assert Path(result["image"]).exists()
def test_final_response_sweep_recovers_image(self, provider, monkeypatch):
"""If no image_generation_call event arrives mid-stream, the
post-stream final-response sweep should still find the image."""
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
final_item = SimpleNamespace(
type="image_generation_call",
status="completed",
id="ig_final",
result=_b64_png(),
)
final_response = SimpleNamespace(output=[final_item], status="completed", output_text="")
fake_client = SimpleNamespace(
responses=SimpleNamespace(
stream=lambda **kwargs: _FakeStream([], final_response)
)
)
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
result = provider.generate("a cat")
assert result["success"] is True
assert Path(result["image"]).exists()
def test_empty_response_returns_error(self, provider, monkeypatch):
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
final_response = SimpleNamespace(output=[], status="completed", output_text="")
fake_client = SimpleNamespace(
responses=SimpleNamespace(
stream=lambda **kwargs: _FakeStream([], final_response)
)
)
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
result = provider.generate("a cat")
assert result["success"] is False
assert result["error_type"] == "empty_response"
def test_client_init_failure_returns_auth_error(self, provider, monkeypatch):
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: None)
result = provider.generate("a cat")
assert result["success"] is False
assert result["error_type"] == "auth_required"
def test_stream_exception_returns_api_error(self, provider, monkeypatch):
monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
def _boom(**kwargs):
raise RuntimeError("cloudflare 403")
fake_client = SimpleNamespace(responses=SimpleNamespace(stream=_boom))
monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
result = provider.generate("a cat")
assert result["success"] is False
assert result["error_type"] == "api_error"
assert "cloudflare 403" in result["error"]
# ── Plugin entry point ──────────────────────────────────────────────────────
class TestRegistration:
def test_register_calls_register_image_gen_provider(self):
registered = []
class _Ctx:
def register_image_gen_provider(self, prov):
registered.append(prov)
codex_plugin.register(_Ctx())
assert len(registered) == 1
assert registered[0].name == "openai-codex"