diff --git a/plugins/image_gen/openai-codex/__init__.py b/plugins/image_gen/openai-codex/__init__.py new file mode 100644 index 000000000..ab524dbdd --- /dev/null +++ b/plugins/image_gen/openai-codex/__init__.py @@ -0,0 +1,378 @@ +"""OpenAI image generation backend — ChatGPT/Codex OAuth variant. + +Identical model catalog and tier semantics to the ``openai`` image-gen plugin +(``gpt-image-2`` at low/medium/high quality), but routes the request through +the Codex Responses API ``image_generation`` tool instead of the +``images.generate`` REST endpoint. This lets users who are already +authenticated with Codex/ChatGPT generate images without configuring a +separate ``OPENAI_API_KEY``. + +Selection precedence for the tier (first hit wins): + +1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests) +2. ``image_gen.openai-codex.model`` in ``config.yaml`` +3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs) +4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium`` + +Output is saved as PNG under ``$HERMES_HOME/cache/images/``. +""" + +from __future__ import annotations + +import logging +from typing import Any, Dict, List, Optional, Tuple + +from agent.image_gen_provider import ( + DEFAULT_ASPECT_RATIO, + ImageGenProvider, + error_response, + resolve_aspect_ratio, + save_b64_image, + success_response, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Model catalog — mirrors the ``openai`` plugin so the picker UX is identical. +# --------------------------------------------------------------------------- + +API_MODEL = "gpt-image-2" + +_MODELS: Dict[str, Dict[str, Any]] = { + "gpt-image-2-low": { + "display": "GPT Image 2 (Low)", + "speed": "~15s", + "strengths": "Fast iteration, lowest cost", + "quality": "low", + }, + "gpt-image-2-medium": { + "display": "GPT Image 2 (Medium)", + "speed": "~40s", + "strengths": "Balanced — default", + "quality": "medium", + }, + "gpt-image-2-high": { + "display": "GPT Image 2 (High)", + "speed": "~2min", + "strengths": "Highest fidelity, strongest prompt adherence", + "quality": "high", + }, +} + +DEFAULT_MODEL = "gpt-image-2-medium" + +_SIZES = { + "landscape": "1536x1024", + "square": "1024x1024", + "portrait": "1024x1536", +} + +# Codex Responses surface used for the request. The chat model itself is only +# the host that calls the ``image_generation`` tool; the actual image work is +# done by ``API_MODEL``. +_CODEX_CHAT_MODEL = "gpt-5.4" +_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex" +_CODEX_INSTRUCTIONS = ( + "You are an assistant that must fulfill image generation requests by " + "using the image_generation tool when provided." +) + + +# --------------------------------------------------------------------------- +# Config + auth helpers +# --------------------------------------------------------------------------- + + +def _load_image_gen_config() -> Dict[str, Any]: + """Read ``image_gen`` from config.yaml (returns {} on any failure).""" + try: + from hermes_cli.config import load_config + + cfg = load_config() + section = cfg.get("image_gen") if isinstance(cfg, dict) else None + return section if isinstance(section, dict) else {} + except Exception as exc: + logger.debug("Could not load image_gen config: %s", exc) + return {} + + +def _resolve_model() -> Tuple[str, Dict[str, Any]]: + """Decide which tier to use and return ``(model_id, meta)``.""" + import os + + env_override = os.environ.get("OPENAI_IMAGE_MODEL") + if env_override and env_override in _MODELS: + return env_override, _MODELS[env_override] + + cfg = _load_image_gen_config() + sub = cfg.get("openai-codex") if isinstance(cfg.get("openai-codex"), dict) else {} + candidate: Optional[str] = None + if isinstance(sub, dict): + value = sub.get("model") + if isinstance(value, str) and value in _MODELS: + candidate = value + if candidate is None: + top = cfg.get("model") + if isinstance(top, str) and top in _MODELS: + candidate = top + + if candidate is not None: + return candidate, _MODELS[candidate] + + return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL] + + +def _read_codex_access_token() -> Optional[str]: + """Return a usable Codex OAuth token, or None. + + Delegates to the canonical reader in ``agent.auxiliary_client`` so token + expiry, credential pool selection, and JWT decoding stay in one place. + """ + try: + from agent.auxiliary_client import _read_codex_access_token as _reader + + token = _reader() + if isinstance(token, str) and token.strip(): + return token.strip() + return None + except Exception as exc: + logger.debug("Could not resolve Codex access token: %s", exc) + return None + + +def _build_codex_client(): + """Return an OpenAI client pointed at the ChatGPT/Codex backend, or None.""" + token = _read_codex_access_token() + if not token: + return None + try: + import openai + from agent.auxiliary_client import _codex_cloudflare_headers + + return openai.OpenAI( + api_key=token, + base_url=_CODEX_BASE_URL, + default_headers=_codex_cloudflare_headers(token), + ) + except Exception as exc: + logger.debug("Could not build Codex image client: %s", exc) + return None + + +def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) -> Optional[str]: + """Stream a Codex Responses image_generation call and return the b64 image.""" + image_b64: Optional[str] = None + + with client.responses.stream( + model=_CODEX_CHAT_MODEL, + store=False, + instructions=_CODEX_INSTRUCTIONS, + input=[{ + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": prompt}], + }], + tools=[{ + "type": "image_generation", + "model": API_MODEL, + "size": size, + "quality": quality, + "output_format": "png", + "background": "opaque", + "partial_images": 1, + }], + tool_choice={ + "type": "allowed_tools", + "mode": "required", + "tools": [{"type": "image_generation"}], + }, + ) as stream: + for event in stream: + event_type = getattr(event, "type", "") + if event_type == "response.output_item.done": + item = getattr(event, "item", None) + if getattr(item, "type", None) == "image_generation_call": + result = getattr(item, "result", None) + if isinstance(result, str) and result: + image_b64 = result + elif event_type == "response.image_generation_call.partial_image": + partial = getattr(event, "partial_image_b64", None) + if isinstance(partial, str) and partial: + image_b64 = partial + final = stream.get_final_response() + + # Final-response sweep covers the case where the stream finished before + # we observed the ``output_item.done`` event for the image call. + for item in getattr(final, "output", None) or []: + if getattr(item, "type", None) == "image_generation_call": + result = getattr(item, "result", None) + if isinstance(result, str) and result: + image_b64 = result + + return image_b64 + + +# --------------------------------------------------------------------------- +# Provider +# --------------------------------------------------------------------------- + + +class OpenAICodexImageGenProvider(ImageGenProvider): + """gpt-image-2 routed through ChatGPT/Codex OAuth instead of an API key.""" + + @property + def name(self) -> str: + return "openai-codex" + + @property + def display_name(self) -> str: + return "OpenAI (Codex auth)" + + def is_available(self) -> bool: + if not _read_codex_access_token(): + return False + try: + import openai # noqa: F401 + except ImportError: + return False + return True + + def list_models(self) -> List[Dict[str, Any]]: + return [ + { + "id": model_id, + "display": meta["display"], + "speed": meta["speed"], + "strengths": meta["strengths"], + "price": "varies", + } + for model_id, meta in _MODELS.items() + ] + + def default_model(self) -> Optional[str]: + return DEFAULT_MODEL + + def get_setup_schema(self) -> Dict[str, Any]: + return { + "name": "OpenAI (Codex auth)", + "badge": "free", + "tag": "gpt-image-2 via ChatGPT/Codex OAuth — no API key required", + "env_vars": [], + "post_setup_hint": ( + "Sign in with `hermes auth codex` (or `hermes setup` → Codex) " + "if you haven't already. No API key needed." + ), + } + + def generate( + self, + prompt: str, + aspect_ratio: str = DEFAULT_ASPECT_RATIO, + **kwargs: Any, + ) -> Dict[str, Any]: + prompt = (prompt or "").strip() + aspect = resolve_aspect_ratio(aspect_ratio) + + if not prompt: + return error_response( + error="Prompt is required and must be a non-empty string", + error_type="invalid_argument", + provider="openai-codex", + aspect_ratio=aspect, + ) + + if not _read_codex_access_token(): + return error_response( + error=( + "No Codex/ChatGPT OAuth credentials available. Run " + "`hermes auth codex` (or `hermes setup` → Codex) to sign in." + ), + error_type="auth_required", + provider="openai-codex", + aspect_ratio=aspect, + ) + + try: + import openai # noqa: F401 + except ImportError: + return error_response( + error="openai Python package not installed (pip install openai)", + error_type="missing_dependency", + provider="openai-codex", + aspect_ratio=aspect, + ) + + tier_id, meta = _resolve_model() + size = _SIZES.get(aspect, _SIZES["square"]) + + client = _build_codex_client() + if client is None: + return error_response( + error="Could not initialize Codex image client", + error_type="auth_required", + provider="openai-codex", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + try: + b64 = _collect_image_b64( + client, + prompt=prompt, + size=size, + quality=meta["quality"], + ) + except Exception as exc: + logger.debug("Codex image generation failed", exc_info=True) + return error_response( + error=f"OpenAI image generation via Codex auth failed: {exc}", + error_type="api_error", + provider="openai-codex", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + if not b64: + return error_response( + error="Codex response contained no image_generation_call result", + error_type="empty_response", + provider="openai-codex", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + try: + saved_path = save_b64_image(b64, prefix=f"openai_codex_{tier_id}") + except Exception as exc: + return error_response( + error=f"Could not save image to cache: {exc}", + error_type="io_error", + provider="openai-codex", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + return success_response( + image=str(saved_path), + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + provider="openai-codex", + extra={"size": size, "quality": meta["quality"]}, + ) + + +# --------------------------------------------------------------------------- +# Plugin entry point +# --------------------------------------------------------------------------- + + +def register(ctx) -> None: + """Plugin entry point — register the Codex-backed image-gen provider.""" + ctx.register_image_gen_provider(OpenAICodexImageGenProvider()) diff --git a/plugins/image_gen/openai-codex/plugin.yaml b/plugins/image_gen/openai-codex/plugin.yaml new file mode 100644 index 000000000..61757773e --- /dev/null +++ b/plugins/image_gen/openai-codex/plugin.yaml @@ -0,0 +1,5 @@ +name: openai-codex +version: 1.0.0 +description: "OpenAI image generation backed by ChatGPT/Codex OAuth (gpt-image-2 via the Responses image_generation tool). Saves generated images to $HERMES_HOME/cache/images/." +author: NousResearch +kind: backend diff --git a/tests/plugins/image_gen/test_openai_codex_provider.py b/tests/plugins/image_gen/test_openai_codex_provider.py new file mode 100644 index 000000000..3c8cf86c0 --- /dev/null +++ b/tests/plugins/image_gen/test_openai_codex_provider.py @@ -0,0 +1,299 @@ +"""Tests for the bundled ``openai-codex`` image_gen plugin. + +Mirrors ``test_openai_provider.py`` but targets the standalone +Codex/ChatGPT-OAuth-backed provider that uses the Responses +``image_generation`` tool path instead of the ``images.generate`` REST +endpoint. +""" + +from __future__ import annotations + +import importlib +from pathlib import Path +from types import SimpleNamespace + +import pytest + +# The plugin directory uses a hyphen, which is not a valid Python identifier +# for the dotted-import form. Load it via importlib so tests don't need to +# touch sys.path or rename the directory. +codex_plugin = importlib.import_module("plugins.image_gen.openai-codex") + + +# 1×1 transparent PNG — valid bytes for save_b64_image() +_PNG_HEX = ( + "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4" + "890000000d49444154789c6300010000000500010d0a2db40000000049454e44" + "ae426082" +) + + +def _b64_png() -> str: + import base64 + return base64.b64encode(bytes.fromhex(_PNG_HEX)).decode() + + +class _FakeStream: + def __init__(self, events, final_response): + self._events = list(events) + self._final = final_response + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def __iter__(self): + return iter(self._events) + + def get_final_response(self): + return self._final + + +@pytest.fixture(autouse=True) +def _tmp_hermes_home(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + yield tmp_path + + +@pytest.fixture +def provider(monkeypatch): + # Codex plugin is API-key-independent; clear it to make the test honest. + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + return codex_plugin.OpenAICodexImageGenProvider() + + +# ── Metadata ──────────────────────────────────────────────────────────────── + + +class TestMetadata: + def test_name(self, provider): + assert provider.name == "openai-codex" + + def test_display_name(self, provider): + assert provider.display_name == "OpenAI (Codex auth)" + + def test_default_model(self, provider): + assert provider.default_model() == "gpt-image-2-medium" + + def test_list_models_three_tiers(self, provider): + ids = [m["id"] for m in provider.list_models()] + assert ids == ["gpt-image-2-low", "gpt-image-2-medium", "gpt-image-2-high"] + + def test_setup_schema_has_no_required_env_vars(self, provider): + schema = provider.get_setup_schema() + assert schema["env_vars"] == [] + assert schema["badge"] == "free" + + +# ── Availability ──────────────────────────────────────────────────────────── + + +class TestAvailability: + def test_unavailable_without_codex_token(self, monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None) + assert codex_plugin.OpenAICodexImageGenProvider().is_available() is False + + def test_available_with_codex_token(self, monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token") + assert codex_plugin.OpenAICodexImageGenProvider().is_available() is True + + def test_openai_api_key_alone_is_not_enough(self, monkeypatch): + # Codex plugin is intentionally orthogonal to the API-key plugin — + # the API key alone must NOT make it appear available. + monkeypatch.setenv("OPENAI_API_KEY", "sk-test") + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None) + assert codex_plugin.OpenAICodexImageGenProvider().is_available() is False + + +# ── Generate ──────────────────────────────────────────────────────────────── + + +class TestGenerate: + def test_returns_auth_error_without_codex_token(self, provider, monkeypatch): + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None) + result = provider.generate("a cat") + assert result["success"] is False + assert result["error_type"] == "auth_required" + + def test_returns_invalid_argument_for_empty_prompt(self, provider, monkeypatch): + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token") + result = provider.generate(" ") + assert result["success"] is False + assert result["error_type"] == "invalid_argument" + + def test_generate_uses_codex_stream_path(self, provider, monkeypatch, tmp_path): + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token") + + output_item = SimpleNamespace( + type="image_generation_call", + status="generating", + id="ig_test", + result=_b64_png(), + ) + done_event = SimpleNamespace(type="response.output_item.done", item=output_item) + final_response = SimpleNamespace(output=[], status="completed", output_text="") + + fake_client = SimpleNamespace( + responses=SimpleNamespace( + stream=lambda **kwargs: _FakeStream([done_event], final_response) + ) + ) + monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client) + + result = provider.generate("a cat", aspect_ratio="landscape") + + assert result["success"] is True + assert result["model"] == "gpt-image-2-medium" + assert result["provider"] == "openai-codex" + assert result["quality"] == "medium" + + saved = Path(result["image"]) + assert saved.exists() + assert saved.parent == tmp_path / "cache" / "images" + # Filename prefix differs from the API-key plugin so cache audits can + # tell the two backends apart. + assert saved.name.startswith("openai_codex_") + + def test_codex_stream_request_shape(self, provider, monkeypatch): + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token") + + captured = {} + + def _stream(**kwargs): + captured.update(kwargs) + output_item = SimpleNamespace( + type="image_generation_call", + status="generating", + id="ig_test", + result=_b64_png(), + ) + done_event = SimpleNamespace(type="response.output_item.done", item=output_item) + final_response = SimpleNamespace(output=[], status="completed", output_text="") + return _FakeStream([done_event], final_response) + + fake_client = SimpleNamespace(responses=SimpleNamespace(stream=_stream)) + monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client) + + result = provider.generate("a cat", aspect_ratio="portrait") + assert result["success"] is True + + assert captured["model"] == "gpt-5.4" + assert captured["store"] is False + assert captured["input"][0]["type"] == "message" + assert captured["input"][0]["role"] == "user" + assert captured["input"][0]["content"][0]["type"] == "input_text" + assert captured["tool_choice"]["type"] == "allowed_tools" + assert captured["tool_choice"]["mode"] == "required" + assert captured["tool_choice"]["tools"] == [{"type": "image_generation"}] + + tool = captured["tools"][0] + assert tool["type"] == "image_generation" + assert tool["model"] == "gpt-image-2" + assert tool["quality"] == "medium" + assert tool["size"] == "1024x1536" + assert tool["output_format"] == "png" + assert tool["background"] == "opaque" + assert tool["partial_images"] == 1 + + def test_partial_image_event_used_when_done_missing(self, provider, monkeypatch): + """If the stream never emits output_item.done, fall back to the + partial_image event so users at least get the latest preview frame.""" + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token") + + partial_event = SimpleNamespace( + type="response.image_generation_call.partial_image", + partial_image_b64=_b64_png(), + ) + final_response = SimpleNamespace(output=[], status="completed", output_text="") + + fake_client = SimpleNamespace( + responses=SimpleNamespace( + stream=lambda **kwargs: _FakeStream([partial_event], final_response) + ) + ) + monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client) + + result = provider.generate("a cat") + assert result["success"] is True + assert Path(result["image"]).exists() + + def test_final_response_sweep_recovers_image(self, provider, monkeypatch): + """If no image_generation_call event arrives mid-stream, the + post-stream final-response sweep should still find the image.""" + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token") + + final_item = SimpleNamespace( + type="image_generation_call", + status="completed", + id="ig_final", + result=_b64_png(), + ) + final_response = SimpleNamespace(output=[final_item], status="completed", output_text="") + + fake_client = SimpleNamespace( + responses=SimpleNamespace( + stream=lambda **kwargs: _FakeStream([], final_response) + ) + ) + monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client) + + result = provider.generate("a cat") + assert result["success"] is True + assert Path(result["image"]).exists() + + def test_empty_response_returns_error(self, provider, monkeypatch): + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token") + + final_response = SimpleNamespace(output=[], status="completed", output_text="") + fake_client = SimpleNamespace( + responses=SimpleNamespace( + stream=lambda **kwargs: _FakeStream([], final_response) + ) + ) + monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client) + + result = provider.generate("a cat") + assert result["success"] is False + assert result["error_type"] == "empty_response" + + def test_client_init_failure_returns_auth_error(self, provider, monkeypatch): + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token") + monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: None) + + result = provider.generate("a cat") + assert result["success"] is False + assert result["error_type"] == "auth_required" + + def test_stream_exception_returns_api_error(self, provider, monkeypatch): + monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token") + + def _boom(**kwargs): + raise RuntimeError("cloudflare 403") + + fake_client = SimpleNamespace(responses=SimpleNamespace(stream=_boom)) + monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client) + + result = provider.generate("a cat") + assert result["success"] is False + assert result["error_type"] == "api_error" + assert "cloudflare 403" in result["error"] + + +# ── Plugin entry point ────────────────────────────────────────────────────── + + +class TestRegistration: + def test_register_calls_register_image_gen_provider(self): + registered = [] + + class _Ctx: + def register_image_gen_provider(self, prov): + registered.append(prov) + + codex_plugin.register(_Ctx()) + assert len(registered) == 1 + assert registered[0].name == "openai-codex"