mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
xAI's grok-imagine-image API returns ephemeral imgen.x.ai/xai-tmp-* URLs that 404 within minutes — long before downstream consumers (Telegram send_photo, browser preview, multi-tier delivery fallback) get a chance to fetch them. The xAI image_gen provider was passing those URLs through unchanged on the elif url: branch; b64 responses were already cached locally via save_b64_image. Result: every image_generate call on a Telegram-routed xai-oauth profile delivered no image, falling through to text-only. Adds agent.image_gen_provider.save_url_image() — a sibling helper to save_b64_image that downloads URL bytes to $HERMES_HOME/cache/images/. Content-type-aware extension inference with URL-suffix fallback; oversize cap (25MB default) with partial-write cleanup; empty-body refusal. Mirrors the audio_cache pattern used by text_to_speech. Wires save_url_image into both the xAI and OpenAI providers' URL branches. When the download fails (network blip, 404 in-flight) we log a warning and fall back to the bare URL rather than turning the tool call into a hard error — the gateway's existing URL-send fallback then gets a chance to surface the original error legibly. Test plan: - tests/agent/test_save_url_image.py — 8 direct tests against a real in-process HTTP server: bytes round-trip, content-type → extension, URL-suffix fallback, default-to-png, 404 propagation, empty-body refusal, oversize cap + cleanup, filename uniqueness. - tests/plugins/image_gen/test_xai_provider.py — flip test_successful_url_response (was asserting the bug), add test_url_response_falls_back_to_bare_url_when_download_fails. - tests/plugins/image_gen/test_openai_provider.py — symmetric pair. 160/160 in the broader image_gen test surface.
324 lines
10 KiB
Python
324 lines
10 KiB
Python
"""
|
|
Image Generation Provider ABC
|
|
=============================
|
|
|
|
Defines the pluggable-backend interface for image generation. Providers register
|
|
instances via ``PluginContext.register_image_gen_provider()``; the active one
|
|
(selected via ``image_gen.provider`` in ``config.yaml``) services every
|
|
``image_generate`` tool call.
|
|
|
|
Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
|
|
as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
|
|
via ``plugins.enabled``).
|
|
|
|
Response shape
|
|
--------------
|
|
All providers return a dict that :func:`success_response` / :func:`error_response`
|
|
produce. The tool wrapper JSON-serializes it. Keys:
|
|
|
|
success bool
|
|
image str | None URL or absolute file path
|
|
model str provider-specific model identifier
|
|
prompt str echoed prompt
|
|
aspect_ratio str "landscape" | "square" | "portrait"
|
|
provider str provider name (for diagnostics)
|
|
error str only when success=False
|
|
error_type str only when success=False
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import abc
|
|
import base64
|
|
import datetime
|
|
import logging
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
|
|
DEFAULT_ASPECT_RATIO = "landscape"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ABC
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class ImageGenProvider(abc.ABC):
|
|
"""Abstract base class for an image generation backend.
|
|
|
|
Subclasses must implement :meth:`generate`. Everything else has sane
|
|
defaults — override only what your provider needs.
|
|
"""
|
|
|
|
@property
|
|
@abc.abstractmethod
|
|
def name(self) -> str:
|
|
"""Stable short identifier used in ``image_gen.provider`` config.
|
|
|
|
Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
|
|
"""
|
|
|
|
@property
|
|
def display_name(self) -> str:
|
|
"""Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
|
|
return self.name.title()
|
|
|
|
def is_available(self) -> bool:
|
|
"""Return True when this provider can service calls.
|
|
|
|
Typically checks for a required API key. Default: True
|
|
(providers with no external dependencies are always available).
|
|
"""
|
|
return True
|
|
|
|
def list_models(self) -> List[Dict[str, Any]]:
|
|
"""Return catalog entries for ``hermes tools`` model picker.
|
|
|
|
Each entry::
|
|
|
|
{
|
|
"id": "gpt-image-1.5", # required
|
|
"display": "GPT Image 1.5", # optional; defaults to id
|
|
"speed": "~10s", # optional
|
|
"strengths": "...", # optional
|
|
"price": "$...", # optional
|
|
}
|
|
|
|
Default: empty list (provider has no user-selectable models).
|
|
"""
|
|
return []
|
|
|
|
def get_setup_schema(self) -> Dict[str, Any]:
|
|
"""Return provider metadata for the ``hermes tools`` picker.
|
|
|
|
Used by ``tools_config.py`` to inject this provider as a row in
|
|
the Image Generation provider list. Shape::
|
|
|
|
{
|
|
"name": "OpenAI", # picker label
|
|
"badge": "paid", # optional short tag
|
|
"tag": "One-line description...", # optional subtitle
|
|
"env_vars": [ # keys to prompt for
|
|
{"key": "OPENAI_API_KEY",
|
|
"prompt": "OpenAI API key",
|
|
"url": "https://platform.openai.com/api-keys"},
|
|
],
|
|
}
|
|
|
|
Default: minimal entry derived from ``display_name``. Override to
|
|
expose API key prompts and custom badges.
|
|
"""
|
|
return {
|
|
"name": self.display_name,
|
|
"badge": "",
|
|
"tag": "",
|
|
"env_vars": [],
|
|
}
|
|
|
|
def default_model(self) -> Optional[str]:
|
|
"""Return the default model id, or None if not applicable."""
|
|
models = self.list_models()
|
|
if models:
|
|
return models[0].get("id")
|
|
return None
|
|
|
|
@abc.abstractmethod
|
|
def generate(
|
|
self,
|
|
prompt: str,
|
|
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
|
**kwargs: Any,
|
|
) -> Dict[str, Any]:
|
|
"""Generate an image.
|
|
|
|
Implementations should return the dict from :func:`success_response`
|
|
or :func:`error_response`. ``kwargs`` may contain forward-compat
|
|
parameters future versions of the schema will expose — implementations
|
|
should ignore unknown keys.
|
|
"""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def resolve_aspect_ratio(value: Optional[str]) -> str:
|
|
"""Clamp an aspect_ratio value to the valid set, defaulting to landscape.
|
|
|
|
Invalid values are coerced rather than rejected so the tool surface is
|
|
forgiving of agent mistakes.
|
|
"""
|
|
if not isinstance(value, str):
|
|
return DEFAULT_ASPECT_RATIO
|
|
v = value.strip().lower()
|
|
if v in VALID_ASPECT_RATIOS:
|
|
return v
|
|
return DEFAULT_ASPECT_RATIO
|
|
|
|
|
|
def _images_cache_dir() -> Path:
|
|
"""Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
|
|
from hermes_constants import get_hermes_home
|
|
|
|
path = get_hermes_home() / "cache" / "images"
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
return path
|
|
|
|
|
|
def save_b64_image(
|
|
b64_data: str,
|
|
*,
|
|
prefix: str = "image",
|
|
extension: str = "png",
|
|
) -> Path:
|
|
"""Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
|
|
|
|
Returns the absolute :class:`Path` to the saved file.
|
|
|
|
Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
|
|
"""
|
|
raw = base64.b64decode(b64_data)
|
|
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
short = uuid.uuid4().hex[:8]
|
|
path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
|
|
path.write_bytes(raw)
|
|
return path
|
|
|
|
|
|
# Extension inference for save_url_image — keep small and explicit. We don't
|
|
# want to import mimetypes for a handful of formats every image_gen provider
|
|
# actually returns, and we never want to inherit a content-type that points
|
|
# at HTML or JSON when the API gives us a degenerate response.
|
|
_URL_IMAGE_CONTENT_TYPES = {
|
|
"image/png": "png",
|
|
"image/jpeg": "jpg",
|
|
"image/jpg": "jpg",
|
|
"image/webp": "webp",
|
|
"image/gif": "gif",
|
|
}
|
|
|
|
|
|
def save_url_image(
|
|
url: str,
|
|
*,
|
|
prefix: str = "image",
|
|
timeout: float = 60.0,
|
|
max_bytes: int = 25 * 1024 * 1024,
|
|
) -> Path:
|
|
"""Download an image URL and write it under ``$HERMES_HOME/cache/images/``.
|
|
|
|
Used by providers (xAI, fallback OpenAI) whose API returns an *ephemeral*
|
|
URL instead of inline base64 — those URLs frequently expire before a
|
|
downstream consumer (Telegram ``send_photo``, browser fetch) can resolve
|
|
them, so we materialise the bytes locally at tool-completion time.
|
|
Mirrors :func:`save_b64_image`'s shape so providers can swap in one line.
|
|
|
|
Returns the absolute :class:`Path` to the saved file. Raises on any
|
|
network / HTTP / oversize / non-image-content-type error so callers can
|
|
fall back to returning the bare URL with a clear error message.
|
|
"""
|
|
import requests
|
|
|
|
response = requests.get(url, timeout=timeout, stream=True)
|
|
response.raise_for_status()
|
|
|
|
# Infer extension from the response content-type, falling back to the
|
|
# URL suffix when xAI / OpenAI omit a precise type (some CDNs return
|
|
# ``application/octet-stream``). Defaults to ``png``.
|
|
content_type = (response.headers.get("Content-Type") or "").split(";", 1)[0].strip().lower()
|
|
extension = _URL_IMAGE_CONTENT_TYPES.get(content_type)
|
|
if extension is None:
|
|
url_path = url.split("?", 1)[0].lower()
|
|
for ext in ("png", "jpg", "jpeg", "webp", "gif"):
|
|
if url_path.endswith(f".{ext}"):
|
|
extension = "jpg" if ext == "jpeg" else ext
|
|
break
|
|
if extension is None:
|
|
extension = "png"
|
|
|
|
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
short = uuid.uuid4().hex[:8]
|
|
path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
|
|
|
|
bytes_written = 0
|
|
with path.open("wb") as fh:
|
|
for chunk in response.iter_content(chunk_size=64 * 1024):
|
|
if not chunk:
|
|
continue
|
|
bytes_written += len(chunk)
|
|
if bytes_written > max_bytes:
|
|
fh.close()
|
|
try:
|
|
path.unlink()
|
|
except OSError:
|
|
pass
|
|
raise ValueError(
|
|
f"Image at {url} exceeds {max_bytes // (1024 * 1024)}MB cap; refusing to cache."
|
|
)
|
|
fh.write(chunk)
|
|
|
|
if bytes_written == 0:
|
|
try:
|
|
path.unlink()
|
|
except OSError:
|
|
pass
|
|
raise ValueError(f"Image at {url} returned 0 bytes; refusing to cache.")
|
|
|
|
return path
|
|
|
|
|
|
def success_response(
|
|
*,
|
|
image: str,
|
|
model: str,
|
|
prompt: str,
|
|
aspect_ratio: str,
|
|
provider: str,
|
|
extra: Optional[Dict[str, Any]] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Build a uniform success response dict.
|
|
|
|
``image`` may be an HTTP URL or an absolute filesystem path (for b64
|
|
providers like OpenAI). Callers that need to pass through additional
|
|
backend-specific fields can supply ``extra``.
|
|
"""
|
|
payload: Dict[str, Any] = {
|
|
"success": True,
|
|
"image": image,
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"aspect_ratio": aspect_ratio,
|
|
"provider": provider,
|
|
}
|
|
if extra:
|
|
for k, v in extra.items():
|
|
payload.setdefault(k, v)
|
|
return payload
|
|
|
|
|
|
def error_response(
|
|
*,
|
|
error: str,
|
|
error_type: str = "provider_error",
|
|
provider: str = "",
|
|
model: str = "",
|
|
prompt: str = "",
|
|
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
|
) -> Dict[str, Any]:
|
|
"""Build a uniform error response dict."""
|
|
return {
|
|
"success": False,
|
|
"image": None,
|
|
"error": error,
|
|
"error_type": error_type,
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"aspect_ratio": aspect_ratio,
|
|
"provider": provider,
|
|
}
|