"""OpenAI image generation backend — ChatGPT/Codex OAuth variant. Identical model catalog and tier semantics to the ``openai`` image-gen plugin (``gpt-image-2`` at low/medium/high quality), but routes the request through the Codex Responses API ``image_generation`` tool instead of the ``images.generate`` REST endpoint. This lets users who are already authenticated with Codex/ChatGPT generate images without configuring a separate ``OPENAI_API_KEY``. Selection precedence for the tier (first hit wins): 1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests) 2. ``image_gen.openai-codex.model`` in ``config.yaml`` 3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs) 4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium`` Output is saved as PNG under ``$HERMES_HOME/cache/images/``. """ from __future__ import annotations import logging from typing import Any, Dict, List, Optional, Tuple from agent.image_gen_provider import ( DEFAULT_ASPECT_RATIO, ImageGenProvider, error_response, resolve_aspect_ratio, save_b64_image, success_response, ) logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Model catalog — mirrors the ``openai`` plugin so the picker UX is identical. # --------------------------------------------------------------------------- API_MODEL = "gpt-image-2" _MODELS: Dict[str, Dict[str, Any]] = { "gpt-image-2-low": { "display": "GPT Image 2 (Low)", "speed": "~15s", "strengths": "Fast iteration, lowest cost", "quality": "low", }, "gpt-image-2-medium": { "display": "GPT Image 2 (Medium)", "speed": "~40s", "strengths": "Balanced — default", "quality": "medium", }, "gpt-image-2-high": { "display": "GPT Image 2 (High)", "speed": "~2min", "strengths": "Highest fidelity, strongest prompt adherence", "quality": "high", }, } DEFAULT_MODEL = "gpt-image-2-medium" _SIZES = { "landscape": "1536x1024", "square": "1024x1024", "portrait": "1024x1536", } # Codex Responses surface used for the request. The chat model itself is only # the host that calls the ``image_generation`` tool; the actual image work is # done by ``API_MODEL``. _CODEX_CHAT_MODEL = "gpt-5.4" _CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex" _CODEX_INSTRUCTIONS = ( "You are an assistant that must fulfill image generation requests by " "using the image_generation tool when provided." ) # --------------------------------------------------------------------------- # Config + auth helpers # --------------------------------------------------------------------------- def _load_image_gen_config() -> Dict[str, Any]: """Read ``image_gen`` from config.yaml (returns {} on any failure).""" try: from hermes_cli.config import load_config cfg = load_config() section = cfg.get("image_gen") if isinstance(cfg, dict) else None return section if isinstance(section, dict) else {} except Exception as exc: logger.debug("Could not load image_gen config: %s", exc) return {} def _resolve_model() -> Tuple[str, Dict[str, Any]]: """Decide which tier to use and return ``(model_id, meta)``.""" import os env_override = os.environ.get("OPENAI_IMAGE_MODEL") if env_override and env_override in _MODELS: return env_override, _MODELS[env_override] cfg = _load_image_gen_config() sub = cfg.get("openai-codex") if isinstance(cfg.get("openai-codex"), dict) else {} candidate: Optional[str] = None if isinstance(sub, dict): value = sub.get("model") if isinstance(value, str) and value in _MODELS: candidate = value if candidate is None: top = cfg.get("model") if isinstance(top, str) and top in _MODELS: candidate = top if candidate is not None: return candidate, _MODELS[candidate] return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL] def _read_codex_access_token() -> Optional[str]: """Return a usable Codex OAuth token, or None. Delegates to the canonical reader in ``agent.auxiliary_client`` so token expiry, credential pool selection, and JWT decoding stay in one place. """ try: from agent.auxiliary_client import _read_codex_access_token as _reader token = _reader() if isinstance(token, str) and token.strip(): return token.strip() return None except Exception as exc: logger.debug("Could not resolve Codex access token: %s", exc) return None def _build_codex_client(): """Return an OpenAI client pointed at the ChatGPT/Codex backend, or None.""" token = _read_codex_access_token() if not token: return None try: import openai from agent.auxiliary_client import _codex_cloudflare_headers return openai.OpenAI( api_key=token, base_url=_CODEX_BASE_URL, default_headers=_codex_cloudflare_headers(token), ) except Exception as exc: logger.debug("Could not build Codex image client: %s", exc) return None def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) -> Optional[str]: """Stream a Codex Responses image_generation call and return the b64 image.""" image_b64: Optional[str] = None with client.responses.stream( model=_CODEX_CHAT_MODEL, store=False, instructions=_CODEX_INSTRUCTIONS, input=[{ "type": "message", "role": "user", "content": [{"type": "input_text", "text": prompt}], }], tools=[{ "type": "image_generation", "model": API_MODEL, "size": size, "quality": quality, "output_format": "png", "background": "opaque", "partial_images": 1, }], tool_choice={ "type": "allowed_tools", "mode": "required", "tools": [{"type": "image_generation"}], }, ) as stream: for event in stream: event_type = getattr(event, "type", "") if event_type == "response.output_item.done": item = getattr(event, "item", None) if getattr(item, "type", None) == "image_generation_call": result = getattr(item, "result", None) if isinstance(result, str) and result: image_b64 = result elif event_type == "response.image_generation_call.partial_image": partial = getattr(event, "partial_image_b64", None) if isinstance(partial, str) and partial: image_b64 = partial final = stream.get_final_response() # Final-response sweep covers the case where the stream finished before # we observed the ``output_item.done`` event for the image call. for item in getattr(final, "output", None) or []: if getattr(item, "type", None) == "image_generation_call": result = getattr(item, "result", None) if isinstance(result, str) and result: image_b64 = result return image_b64 # --------------------------------------------------------------------------- # Provider # --------------------------------------------------------------------------- class OpenAICodexImageGenProvider(ImageGenProvider): """gpt-image-2 routed through ChatGPT/Codex OAuth instead of an API key.""" @property def name(self) -> str: return "openai-codex" @property def display_name(self) -> str: return "OpenAI (Codex auth)" def is_available(self) -> bool: if not _read_codex_access_token(): return False try: import openai # noqa: F401 except ImportError: return False return True def list_models(self) -> List[Dict[str, Any]]: return [ { "id": model_id, "display": meta["display"], "speed": meta["speed"], "strengths": meta["strengths"], "price": "varies", } for model_id, meta in _MODELS.items() ] def default_model(self) -> Optional[str]: return DEFAULT_MODEL def get_setup_schema(self) -> Dict[str, Any]: return { "name": "OpenAI (Codex auth)", "badge": "free", "tag": "gpt-image-2 via ChatGPT/Codex OAuth — no API key required", "env_vars": [], "post_setup_hint": ( "Sign in with `hermes auth codex` (or `hermes setup` → Codex) " "if you haven't already. No API key needed." ), } def generate( self, prompt: str, aspect_ratio: str = DEFAULT_ASPECT_RATIO, **kwargs: Any, ) -> Dict[str, Any]: prompt = (prompt or "").strip() aspect = resolve_aspect_ratio(aspect_ratio) if not prompt: return error_response( error="Prompt is required and must be a non-empty string", error_type="invalid_argument", provider="openai-codex", aspect_ratio=aspect, ) if not _read_codex_access_token(): return error_response( error=( "No Codex/ChatGPT OAuth credentials available. Run " "`hermes auth codex` (or `hermes setup` → Codex) to sign in." ), error_type="auth_required", provider="openai-codex", aspect_ratio=aspect, ) try: import openai # noqa: F401 except ImportError: return error_response( error="openai Python package not installed (pip install openai)", error_type="missing_dependency", provider="openai-codex", aspect_ratio=aspect, ) tier_id, meta = _resolve_model() size = _SIZES.get(aspect, _SIZES["square"]) client = _build_codex_client() if client is None: return error_response( error="Could not initialize Codex image client", error_type="auth_required", provider="openai-codex", model=tier_id, prompt=prompt, aspect_ratio=aspect, ) try: b64 = _collect_image_b64( client, prompt=prompt, size=size, quality=meta["quality"], ) except Exception as exc: logger.debug("Codex image generation failed", exc_info=True) return error_response( error=f"OpenAI image generation via Codex auth failed: {exc}", error_type="api_error", provider="openai-codex", model=tier_id, prompt=prompt, aspect_ratio=aspect, ) if not b64: return error_response( error="Codex response contained no image_generation_call result", error_type="empty_response", provider="openai-codex", model=tier_id, prompt=prompt, aspect_ratio=aspect, ) try: saved_path = save_b64_image(b64, prefix=f"openai_codex_{tier_id}") except Exception as exc: return error_response( error=f"Could not save image to cache: {exc}", error_type="io_error", provider="openai-codex", model=tier_id, prompt=prompt, aspect_ratio=aspect, ) return success_response( image=str(saved_path), model=tier_id, prompt=prompt, aspect_ratio=aspect, provider="openai-codex", extra={"size": size, "quality": meta["quality"]}, ) # --------------------------------------------------------------------------- # Plugin entry point # --------------------------------------------------------------------------- def register(ctx) -> None: """Plugin entry point — register the Codex-backed image-gen provider.""" ctx.register_image_gen_provider(OpenAICodexImageGenProvider())