feat(image_gen): add openai-codex plugin (gpt-image-2 via Codex OAuth) (#14317)

New built-in image_gen backend at plugins/image_gen/openai-codex/ that exposes the same gpt-image-2 low/medium/high tier catalog as the existing 'openai' plugin, but routes generation through the ChatGPT/ Codex Responses image_generation tool path. Available whenever the user has Codex OAuth signed in; no OPENAI_API_KEY required. The two plugins are independent — users select between them via 'hermes tools' → Image Generation, and image_gen.provider in config.yaml. The existing 'openai' (API-key) plugin is unchanged. Reuses _read_codex_access_token() and _codex_cloudflare_headers() from agent.auxiliary_client so token expiry / cred-pool / Cloudflare originator handling stays in one place. Inspired by #14047 by @Hygaard, but re-implemented as a separate plugin instead of an in-place fork of the openai plugin. Closes #11195
2026-04-25 00:51:20 +00:00 · 2026-04-22 20:43:21 -07:00 · 2026-04-22 20:43:21 -07:00 · eda5ae5a5e
commit eda5ae5a5e
parent 563ed0e61f
3 changed files with 682 additions and 0 deletions
--- a/plugins/image_gen/openai-codex/init.py
+++ b/plugins/image_gen/openai-codex/init.py
@ -0,0 +1,378 @@
 """OpenAI image generation backend — ChatGPT/Codex OAuth variant.
 Identical model catalog and tier semantics to the ``openai`` image-gen plugin
 (``gpt-image-2`` at low/medium/high quality), but routes the request through
 the Codex Responses API ``image_generation`` tool instead of the
 ``images.generate`` REST endpoint. This lets users who are already
 authenticated with Codex/ChatGPT generate images without configuring a
 separate ``OPENAI_API_KEY``.
 Selection precedence for the tier (first hit wins):
 1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
 2. ``image_gen.openai-codex.model`` in ``config.yaml``
 3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
 4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium``
 Output is saved as PNG under ``$HERMES_HOME/cache/images/``.
 """
 from __future__ import annotations
 import logging
 from typing import Any, Dict, List, Optional, Tuple
 from agent.image_gen_provider import (
    DEFAULT_ASPECT_RATIO,
    ImageGenProvider,
    error_response,
    resolve_aspect_ratio,
    save_b64_image,
    success_response,
 )
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # Model catalog — mirrors the ``openai`` plugin so the picker UX is identical.
 # ---------------------------------------------------------------------------
 API_MODEL = "gpt-image-2"
 _MODELS: Dict[str, Dict[str, Any]] = {
    "gpt-image-2-low": {
        "display": "GPT Image 2 (Low)",
        "speed": "~15s",
        "strengths": "Fast iteration, lowest cost",
        "quality": "low",
    },
    "gpt-image-2-medium": {
        "display": "GPT Image 2 (Medium)",
        "speed": "~40s",
        "strengths": "Balanced — default",
        "quality": "medium",
    },
    "gpt-image-2-high": {
        "display": "GPT Image 2 (High)",
        "speed": "~2min",
        "strengths": "Highest fidelity, strongest prompt adherence",
        "quality": "high",
    },
 }
 DEFAULT_MODEL = "gpt-image-2-medium"
 _SIZES = {
    "landscape": "1536x1024",
    "square": "1024x1024",
    "portrait": "1024x1536",
 }
 # Codex Responses surface used for the request. The chat model itself is only
 # the host that calls the ``image_generation`` tool; the actual image work is
 # done by ``API_MODEL``.
 _CODEX_CHAT_MODEL = "gpt-5.4"
 _CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
 _CODEX_INSTRUCTIONS = (
    "You are an assistant that must fulfill image generation requests by "
    "using the image_generation tool when provided."
 )
 # ---------------------------------------------------------------------------
 # Config + auth helpers
 # ---------------------------------------------------------------------------
 def _load_image_gen_config() -> Dict[str, Any]:
    """Read ``image_gen`` from config.yaml (returns {} on any failure)."""
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
        return section if isinstance(section, dict) else {}
    except Exception as exc:
        logger.debug("Could not load image_gen config: %s", exc)
        return {}
 def _resolve_model() -> Tuple[str, Dict[str, Any]]:
    """Decide which tier to use and return ``(model_id, meta)``."""
    import os
    env_override = os.environ.get("OPENAI_IMAGE_MODEL")
    if env_override and env_override in _MODELS:
        return env_override, _MODELS[env_override]
    cfg = _load_image_gen_config()
    sub = cfg.get("openai-codex") if isinstance(cfg.get("openai-codex"), dict) else {}
    candidate: Optional[str] = None
    if isinstance(sub, dict):
        value = sub.get("model")
        if isinstance(value, str) and value in _MODELS:
            candidate = value
    if candidate is None:
        top = cfg.get("model")
        if isinstance(top, str) and top in _MODELS:
            candidate = top
    if candidate is not None:
        return candidate, _MODELS[candidate]
    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
 def _read_codex_access_token() -> Optional[str]:
    """Return a usable Codex OAuth token, or None.
    Delegates to the canonical reader in ``agent.auxiliary_client`` so token
    expiry, credential pool selection, and JWT decoding stay in one place.
    """
    try:
        from agent.auxiliary_client import _read_codex_access_token as _reader
        token = _reader()
        if isinstance(token, str) and token.strip():
            return token.strip()
        return None
    except Exception as exc:
        logger.debug("Could not resolve Codex access token: %s", exc)
        return None
 def _build_codex_client():
    """Return an OpenAI client pointed at the ChatGPT/Codex backend, or None."""
    token = _read_codex_access_token()
    if not token:
        return None
    try:
        import openai
        from agent.auxiliary_client import _codex_cloudflare_headers
        return openai.OpenAI(
            api_key=token,
            base_url=_CODEX_BASE_URL,
            default_headers=_codex_cloudflare_headers(token),
        )
    except Exception as exc:
        logger.debug("Could not build Codex image client: %s", exc)
        return None
 def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) -> Optional[str]:
    """Stream a Codex Responses image_generation call and return the b64 image."""
    image_b64: Optional[str] = None
    with client.responses.stream(
        model=_CODEX_CHAT_MODEL,
        store=False,
        instructions=_CODEX_INSTRUCTIONS,
        input=[{
            "type": "message",
            "role": "user",
            "content": [{"type": "input_text", "text": prompt}],
        }],
        tools=[{
            "type": "image_generation",
            "model": API_MODEL,
            "size": size,
            "quality": quality,
            "output_format": "png",
            "background": "opaque",
            "partial_images": 1,
        }],
        tool_choice={
            "type": "allowed_tools",
            "mode": "required",
            "tools": [{"type": "image_generation"}],
        },
    ) as stream:
        for event in stream:
            event_type = getattr(event, "type", "")
            if event_type == "response.output_item.done":
                item = getattr(event, "item", None)
                if getattr(item, "type", None) == "image_generation_call":
                    result = getattr(item, "result", None)
                    if isinstance(result, str) and result:
                        image_b64 = result
            elif event_type == "response.image_generation_call.partial_image":
                partial = getattr(event, "partial_image_b64", None)
                if isinstance(partial, str) and partial:
                    image_b64 = partial
        final = stream.get_final_response()
    # Final-response sweep covers the case where the stream finished before
    # we observed the ``output_item.done`` event for the image call.
    for item in getattr(final, "output", None) or []:
        if getattr(item, "type", None) == "image_generation_call":
            result = getattr(item, "result", None)
            if isinstance(result, str) and result:
                image_b64 = result
    return image_b64
 # ---------------------------------------------------------------------------
 # Provider
 # ---------------------------------------------------------------------------
 class OpenAICodexImageGenProvider(ImageGenProvider):
    """gpt-image-2 routed through ChatGPT/Codex OAuth instead of an API key."""
    @property
    def name(self) -> str:
        return "openai-codex"
    @property
    def display_name(self) -> str:
        return "OpenAI (Codex auth)"
    def is_available(self) -> bool:
        if not _read_codex_access_token():
            return False
        try:
            import openai  # noqa: F401
        except ImportError:
            return False
        return True
    def list_models(self) -> List[Dict[str, Any]]:
        return [
            {
                "id": model_id,
                "display": meta["display"],
                "speed": meta["speed"],
                "strengths": meta["strengths"],
                "price": "varies",
            }
            for model_id, meta in _MODELS.items()
        ]
    def default_model(self) -> Optional[str]:
        return DEFAULT_MODEL
    def get_setup_schema(self) -> Dict[str, Any]:
        return {
            "name": "OpenAI (Codex auth)",
            "badge": "free",
            "tag": "gpt-image-2 via ChatGPT/Codex OAuth — no API key required",
            "env_vars": [],
            "post_setup_hint": (
                "Sign in with `hermes auth codex` (or `hermes setup` → Codex) "
                "if you haven't already. No API key needed."
            ),
        }
    def generate(
        self,
        prompt: str,
        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
        **kwargs: Any,
    ) -> Dict[str, Any]:
        prompt = (prompt or "").strip()
        aspect = resolve_aspect_ratio(aspect_ratio)
        if not prompt:
            return error_response(
                error="Prompt is required and must be a non-empty string",
                error_type="invalid_argument",
                provider="openai-codex",
                aspect_ratio=aspect,
            )
        if not _read_codex_access_token():
            return error_response(
                error=(
                    "No Codex/ChatGPT OAuth credentials available. Run "
                    "`hermes auth codex` (or `hermes setup` → Codex) to sign in."
                ),
                error_type="auth_required",
                provider="openai-codex",
                aspect_ratio=aspect,
            )
        try:
            import openai  # noqa: F401
        except ImportError:
            return error_response(
                error="openai Python package not installed (pip install openai)",
                error_type="missing_dependency",
                provider="openai-codex",
                aspect_ratio=aspect,
            )
        tier_id, meta = _resolve_model()
        size = _SIZES.get(aspect, _SIZES["square"])
        client = _build_codex_client()
        if client is None:
            return error_response(
                error="Could not initialize Codex image client",
                error_type="auth_required",
                provider="openai-codex",
                model=tier_id,
                prompt=prompt,
                aspect_ratio=aspect,
            )
        try:
            b64 = _collect_image_b64(
                client,
                prompt=prompt,
                size=size,
                quality=meta["quality"],
            )
        except Exception as exc:
            logger.debug("Codex image generation failed", exc_info=True)
            return error_response(
                error=f"OpenAI image generation via Codex auth failed: {exc}",
                error_type="api_error",
                provider="openai-codex",
                model=tier_id,
                prompt=prompt,
                aspect_ratio=aspect,
            )
        if not b64:
            return error_response(
                error="Codex response contained no image_generation_call result",
                error_type="empty_response",
                provider="openai-codex",
                model=tier_id,
                prompt=prompt,
                aspect_ratio=aspect,
            )
        try:
            saved_path = save_b64_image(b64, prefix=f"openai_codex_{tier_id}")
        except Exception as exc:
            return error_response(
                error=f"Could not save image to cache: {exc}",
                error_type="io_error",
                provider="openai-codex",
                model=tier_id,
                prompt=prompt,
                aspect_ratio=aspect,
            )
        return success_response(
            image=str(saved_path),
            model=tier_id,
            prompt=prompt,
            aspect_ratio=aspect,
            provider="openai-codex",
            extra={"size": size, "quality": meta["quality"]},
        )
 # ---------------------------------------------------------------------------
 # Plugin entry point
 # ---------------------------------------------------------------------------
 def register(ctx) -> None:
    """Plugin entry point — register the Codex-backed image-gen provider."""
    ctx.register_image_gen_provider(OpenAICodexImageGenProvider())
--- a/plugins/image_gen/openai-codex/plugin.yaml
+++ b/plugins/image_gen/openai-codex/plugin.yaml
@ -0,0 +1,5 @@
 name: openai-codex
 version: 1.0.0
 description: "OpenAI image generation backed by ChatGPT/Codex OAuth (gpt-image-2 via the Responses image_generation tool). Saves generated images to $HERMES_HOME/cache/images/."
 author: NousResearch
 kind: backend
--- a/tests/plugins/image_gen/test_openai_codex_provider.py
+++ b/tests/plugins/image_gen/test_openai_codex_provider.py
@ -0,0 +1,299 @@
 """Tests for the bundled ``openai-codex`` image_gen plugin.
 Mirrors ``test_openai_provider.py`` but targets the standalone
 Codex/ChatGPT-OAuth-backed provider that uses the Responses
 ``image_generation`` tool path instead of the ``images.generate`` REST
 endpoint.
 """
 from __future__ import annotations
 import importlib
 from pathlib import Path
 from types import SimpleNamespace
 import pytest
 # The plugin directory uses a hyphen, which is not a valid Python identifier
 # for the dotted-import form. Load it via importlib so tests don't need to
 # touch sys.path or rename the directory.
 codex_plugin = importlib.import_module("plugins.image_gen.openai-codex")
 # 1×1 transparent PNG — valid bytes for save_b64_image()
 _PNG_HEX = (
    "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
    "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
    "ae426082"
 )
 def _b64_png() -> str:
    import base64
    return base64.b64encode(bytes.fromhex(_PNG_HEX)).decode()
 class _FakeStream:
    def __init__(self, events, final_response):
        self._events = list(events)
        self._final = final_response
    def __enter__(self):
        return self
    def __exit__(self, exc_type, exc, tb):
        return False
    def __iter__(self):
        return iter(self._events)
    def get_final_response(self):
        return self._final
@pytest.fixture(autouse=True)
 def _tmp_hermes_home(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    yield tmp_path
@pytest.fixture
 def provider(monkeypatch):
    # Codex plugin is API-key-independent; clear it to make the test honest.
    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
    return codex_plugin.OpenAICodexImageGenProvider()
 # ── Metadata ────────────────────────────────────────────────────────────────
 class TestMetadata:
    def test_name(self, provider):
        assert provider.name == "openai-codex"
    def test_display_name(self, provider):
        assert provider.display_name == "OpenAI (Codex auth)"
    def test_default_model(self, provider):
        assert provider.default_model() == "gpt-image-2-medium"
    def test_list_models_three_tiers(self, provider):
        ids = [m["id"] for m in provider.list_models()]
        assert ids == ["gpt-image-2-low", "gpt-image-2-medium", "gpt-image-2-high"]
    def test_setup_schema_has_no_required_env_vars(self, provider):
        schema = provider.get_setup_schema()
        assert schema["env_vars"] == []
        assert schema["badge"] == "free"
 # ── Availability ────────────────────────────────────────────────────────────
 class TestAvailability:
    def test_unavailable_without_codex_token(self, monkeypatch):
        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None)
        assert codex_plugin.OpenAICodexImageGenProvider().is_available() is False
    def test_available_with_codex_token(self, monkeypatch):
        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
        assert codex_plugin.OpenAICodexImageGenProvider().is_available() is True
    def test_openai_api_key_alone_is_not_enough(self, monkeypatch):
        # Codex plugin is intentionally orthogonal to the API-key plugin —
        # the API key alone must NOT make it appear available.
        monkeypatch.setenv("OPENAI_API_KEY", "sk-test")
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None)
        assert codex_plugin.OpenAICodexImageGenProvider().is_available() is False
 # ── Generate ────────────────────────────────────────────────────────────────
 class TestGenerate:
    def test_returns_auth_error_without_codex_token(self, provider, monkeypatch):
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None)
        result = provider.generate("a cat")
        assert result["success"] is False
        assert result["error_type"] == "auth_required"
    def test_returns_invalid_argument_for_empty_prompt(self, provider, monkeypatch):
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
        result = provider.generate("   ")
        assert result["success"] is False
        assert result["error_type"] == "invalid_argument"
    def test_generate_uses_codex_stream_path(self, provider, monkeypatch, tmp_path):
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
        output_item = SimpleNamespace(
            type="image_generation_call",
            status="generating",
            id="ig_test",
            result=_b64_png(),
        )
        done_event = SimpleNamespace(type="response.output_item.done", item=output_item)
        final_response = SimpleNamespace(output=[], status="completed", output_text="")
        fake_client = SimpleNamespace(
            responses=SimpleNamespace(
                stream=lambda **kwargs: _FakeStream([done_event], final_response)
            )
        )
        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
        result = provider.generate("a cat", aspect_ratio="landscape")
        assert result["success"] is True
        assert result["model"] == "gpt-image-2-medium"
        assert result["provider"] == "openai-codex"
        assert result["quality"] == "medium"
        saved = Path(result["image"])
        assert saved.exists()
        assert saved.parent == tmp_path / "cache" / "images"
        # Filename prefix differs from the API-key plugin so cache audits can
        # tell the two backends apart.
        assert saved.name.startswith("openai_codex_")
    def test_codex_stream_request_shape(self, provider, monkeypatch):
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
        captured = {}
        def _stream(**kwargs):
            captured.update(kwargs)
            output_item = SimpleNamespace(
                type="image_generation_call",
                status="generating",
                id="ig_test",
                result=_b64_png(),
            )
            done_event = SimpleNamespace(type="response.output_item.done", item=output_item)
            final_response = SimpleNamespace(output=[], status="completed", output_text="")
            return _FakeStream([done_event], final_response)
        fake_client = SimpleNamespace(responses=SimpleNamespace(stream=_stream))
        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
        result = provider.generate("a cat", aspect_ratio="portrait")
        assert result["success"] is True
        assert captured["model"] == "gpt-5.4"
        assert captured["store"] is False
        assert captured["input"][0]["type"] == "message"
        assert captured["input"][0]["role"] == "user"
        assert captured["input"][0]["content"][0]["type"] == "input_text"
        assert captured["tool_choice"]["type"] == "allowed_tools"
        assert captured["tool_choice"]["mode"] == "required"
        assert captured["tool_choice"]["tools"] == [{"type": "image_generation"}]
        tool = captured["tools"][0]
        assert tool["type"] == "image_generation"
        assert tool["model"] == "gpt-image-2"
        assert tool["quality"] == "medium"
        assert tool["size"] == "1024x1536"
        assert tool["output_format"] == "png"
        assert tool["background"] == "opaque"
        assert tool["partial_images"] == 1
    def test_partial_image_event_used_when_done_missing(self, provider, monkeypatch):
        """If the stream never emits output_item.done, fall back to the
        partial_image event so users at least get the latest preview frame."""
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
        partial_event = SimpleNamespace(
            type="response.image_generation_call.partial_image",
            partial_image_b64=_b64_png(),
        )
        final_response = SimpleNamespace(output=[], status="completed", output_text="")
        fake_client = SimpleNamespace(
            responses=SimpleNamespace(
                stream=lambda **kwargs: _FakeStream([partial_event], final_response)
            )
        )
        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
        result = provider.generate("a cat")
        assert result["success"] is True
        assert Path(result["image"]).exists()
    def test_final_response_sweep_recovers_image(self, provider, monkeypatch):
        """If no image_generation_call event arrives mid-stream, the
        post-stream final-response sweep should still find the image."""
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
        final_item = SimpleNamespace(
            type="image_generation_call",
            status="completed",
            id="ig_final",
            result=_b64_png(),
        )
        final_response = SimpleNamespace(output=[final_item], status="completed", output_text="")
        fake_client = SimpleNamespace(
            responses=SimpleNamespace(
                stream=lambda **kwargs: _FakeStream([], final_response)
            )
        )
        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
        result = provider.generate("a cat")
        assert result["success"] is True
        assert Path(result["image"]).exists()
    def test_empty_response_returns_error(self, provider, monkeypatch):
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
        final_response = SimpleNamespace(output=[], status="completed", output_text="")
        fake_client = SimpleNamespace(
            responses=SimpleNamespace(
                stream=lambda **kwargs: _FakeStream([], final_response)
            )
        )
        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
        result = provider.generate("a cat")
        assert result["success"] is False
        assert result["error_type"] == "empty_response"
    def test_client_init_failure_returns_auth_error(self, provider, monkeypatch):
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: None)
        result = provider.generate("a cat")
        assert result["success"] is False
        assert result["error_type"] == "auth_required"
    def test_stream_exception_returns_api_error(self, provider, monkeypatch):
        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
        def _boom(**kwargs):
            raise RuntimeError("cloudflare 403")
        fake_client = SimpleNamespace(responses=SimpleNamespace(stream=_boom))
        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
        result = provider.generate("a cat")
        assert result["success"] is False
        assert result["error_type"] == "api_error"
        assert "cloudflare 403" in result["error"]
 # ── Plugin entry point ──────────────────────────────────────────────────────
 class TestRegistration:
    def test_register_calls_register_image_gen_provider(self):
        registered = []
        class _Ctx:
            def register_image_gen_provider(self, prov):
                registered.append(prov)
        codex_plugin.register(_Ctx())
        assert len(registered) == 1
        assert registered[0].name == "openai-codex"