hermes-agent/tests/tools/test_image_generation_image_to_image.py
hakanpak d45addc2f1 fix(tools): never let a model whitelist strip the prompt / source images
_build_fal_payload and _build_fal_edit_payload assemble the request and then
filter it down to the model's supports / edit_supports whitelist. That filter
also covers prompt (and image_urls for edits), which every FAL endpoint
requires. Today all model configs happen to list those keys, but a single
config that omits one would silently produce a request with no prompt or no
source images — a broken generation with no error.

Always keep the mandatory keys regardless of the whitelist so a missing
whitelist entry can only drop optional knobs, never the prompt or the images.
2026-06-19 16:59:54 -07:00

383 lines
15 KiB
Python

"""Tests for the image-to-image / editing surface of ``image_generate``.
Mirrors the video-gen image-to-video tests: the unified ``image_generate``
tool routes to a provider's edit endpoint when ``image_url`` /
``reference_image_urls`` is supplied, otherwise to text-to-image. Coverage:
- In-tree FAL edit payload construction (``_build_fal_edit_payload``)
- In-tree FAL routing (text vs edit endpoint) via ``image_generate_tool``
- Plugin dispatch forwards image_url / reference_image_urls to ``generate()``
- ``capabilities()`` honesty drives the dynamic tool-schema description
- Models without an edit endpoint reject image inputs with a clear error
"""
from __future__ import annotations
import json
from typing import Any, Dict, List, Optional
import pytest
import yaml
from agent import image_gen_registry
from agent.image_gen_provider import ImageGenProvider
@pytest.fixture(autouse=True)
def _reset_registry():
image_gen_registry._reset_for_tests()
yield
image_gen_registry._reset_for_tests()
@pytest.fixture
def cfg_home(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
return tmp_path
def _write_cfg(home, cfg: dict):
(home / "config.yaml").write_text(yaml.safe_dump(cfg))
# ---------------------------------------------------------------------------
# In-tree FAL edit payload + routing
# ---------------------------------------------------------------------------
class TestFalEditPayload:
def test_edit_payload_includes_image_urls(self):
from tools.image_generation_tool import _build_fal_edit_payload
payload = _build_fal_edit_payload(
"fal-ai/nano-banana-pro", "make it night", ["https://x/y.png"],
"landscape",
)
assert payload["prompt"] == "make it night"
assert payload["image_urls"] == ["https://x/y.png"]
# nano-banana edit advertises aspect_ratio in edit_supports
assert payload.get("aspect_ratio") == "16:9"
def test_edit_payload_strips_keys_outside_edit_supports(self):
from tools.image_generation_tool import _build_fal_edit_payload
# gpt-image-2 edit does NOT advertise image_size (auto-inferred), so
# it must be stripped even though the text-to-image path sets it.
payload = _build_fal_edit_payload(
"fal-ai/gpt-image-2", "swap bg", ["https://x/y.png"], "square",
)
assert "image_size" not in payload
assert payload["image_urls"] == ["https://x/y.png"]
assert payload["quality"] == "medium"
def test_text_only_model_has_no_edit_endpoint(self):
from tools.image_generation_tool import FAL_MODELS
# z-image/turbo is a pure text-to-image model — no edit endpoint.
assert "edit_endpoint" not in FAL_MODELS["fal-ai/z-image/turbo"]
# while nano-banana-pro is edit-capable
assert FAL_MODELS["fal-ai/nano-banana-pro"].get("edit_endpoint")
class TestMandatoryKeysSurviveWhitelist:
"""A model whose whitelist forgets the mandatory keys must not produce a
request with the prompt / source images silently stripped."""
_SIZES = {"square": "1024x1024", "landscape": "1536x1024", "portrait": "1024x1536"}
def test_edit_keeps_prompt_and_image_urls(self, monkeypatch):
from tools import image_generation_tool as t
fake = {
"size_style": "image_size_preset",
"sizes": self._SIZES,
"edit_supports": {"seed"}, # intentionally omits prompt + image_urls
}
monkeypatch.setitem(t.FAL_MODELS, "test/edit-model", fake)
payload = t._build_fal_edit_payload(
"test/edit-model", "make it blue", ["https://x/y.png"], "square",
)
assert payload["prompt"] == "make it blue"
assert payload["image_urls"] == ["https://x/y.png"]
def test_text_keeps_prompt(self, monkeypatch):
from tools import image_generation_tool as t
fake = {
"size_style": "image_size_preset",
"sizes": self._SIZES,
"supports": {"seed"}, # intentionally omits prompt
}
monkeypatch.setitem(t.FAL_MODELS, "test/text-model", fake)
payload = t._build_fal_payload("test/text-model", "a cat", aspect_ratio="square")
assert payload["prompt"] == "a cat"
class TestFalRouting:
def _patch_submit(self, monkeypatch, image_tool, capture: dict):
class _Handler:
def get(self_inner):
return {"images": [{"url": "https://out/img.png", "width": 1, "height": 1}]}
def fake_submit(endpoint, arguments):
capture["endpoint"] = endpoint
capture["arguments"] = arguments
return _Handler()
monkeypatch.setattr(image_tool, "_submit_fal_request", fake_submit)
monkeypatch.setattr(image_tool, "fal_key_is_configured", lambda: True)
monkeypatch.setattr(image_tool, "_resolve_managed_fal_gateway", lambda: None)
def test_text_to_image_uses_base_endpoint(self, cfg_home, monkeypatch):
import tools.image_generation_tool as image_tool
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
capture: dict = {}
self._patch_submit(monkeypatch, image_tool, capture)
raw = image_tool.image_generate_tool(prompt="a cat", aspect_ratio="square")
out = json.loads(raw)
assert out["success"] is True
assert out["modality"] == "text"
assert capture["endpoint"] == "fal-ai/nano-banana-pro"
assert "image_urls" not in capture["arguments"]
def test_image_to_image_routes_to_edit_endpoint(self, cfg_home, monkeypatch):
import tools.image_generation_tool as image_tool
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
capture: dict = {}
self._patch_submit(monkeypatch, image_tool, capture)
raw = image_tool.image_generate_tool(
prompt="make it night",
aspect_ratio="square",
image_url="https://in/src.png",
)
out = json.loads(raw)
assert out["success"] is True
assert out["modality"] == "image"
assert capture["endpoint"] == "fal-ai/nano-banana-pro/edit"
assert capture["arguments"]["image_urls"] == ["https://in/src.png"]
def test_reference_images_clamped_to_model_cap(self, cfg_home, monkeypatch):
import tools.image_generation_tool as image_tool
# nano-banana-pro caps at 2 reference images.
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
capture: dict = {}
self._patch_submit(monkeypatch, image_tool, capture)
raw = image_tool.image_generate_tool(
prompt="blend",
image_url="https://in/a.png",
reference_image_urls=["https://in/b.png", "https://in/c.png", "https://in/d.png"],
)
out = json.loads(raw)
assert out["success"] is True
assert capture["arguments"]["image_urls"] == ["https://in/a.png", "https://in/b.png"]
def test_text_only_model_rejects_image_url(self, cfg_home, monkeypatch):
import tools.image_generation_tool as image_tool
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/z-image/turbo"}})
capture: dict = {}
self._patch_submit(monkeypatch, image_tool, capture)
raw = image_tool.image_generate_tool(
prompt="edit this", image_url="https://in/src.png",
)
out = json.loads(raw)
assert out["success"] is False
assert "image-to-image" in out["error"]
# Must NOT have submitted anything.
assert capture == {}
def test_edit_skips_upscaler(self, cfg_home, monkeypatch):
import tools.image_generation_tool as image_tool
# flux-2-pro has upscale=True for text-to-image, but edits must skip it.
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/flux-2-pro"}})
capture: dict = {}
self._patch_submit(monkeypatch, image_tool, capture)
upscale_called = {"hit": False}
monkeypatch.setattr(
image_tool, "_upscale_image",
lambda *a, **k: upscale_called.__setitem__("hit", True) or None,
)
raw = image_tool.image_generate_tool(
prompt="tweak", image_url="https://in/src.png",
)
out = json.loads(raw)
assert out["success"] is True
assert out["modality"] == "image"
assert upscale_called["hit"] is False
# ---------------------------------------------------------------------------
# Plugin dispatch forwarding
# ---------------------------------------------------------------------------
class _EditCapableProvider(ImageGenProvider):
def __init__(self):
self.received: Dict[str, Any] = {}
@property
def name(self) -> str:
return "editcap"
def capabilities(self) -> Dict[str, Any]:
return {"modalities": ["text", "image"], "max_reference_images": 4}
def generate(self, prompt, aspect_ratio="landscape", *, image_url=None,
reference_image_urls=None, **kwargs):
self.received = {
"prompt": prompt,
"aspect_ratio": aspect_ratio,
"image_url": image_url,
"reference_image_urls": reference_image_urls,
}
return {
"success": True, "image": "/tmp/out.png", "model": "editcap-1",
"prompt": prompt, "aspect_ratio": aspect_ratio,
"modality": "image" if image_url else "text", "provider": "editcap",
}
class _LegacyProvider(ImageGenProvider):
"""Provider whose generate() predates image_url (no **kwargs absorb)."""
@property
def name(self) -> str:
return "legacy"
def generate(self, prompt, aspect_ratio="landscape"): # narrow signature
return {"success": True, "image": "/tmp/legacy.png", "provider": "legacy"}
class TestPluginDispatchImageToImage:
def test_dispatch_forwards_image_url(self, cfg_home, monkeypatch):
import tools.image_generation_tool as image_tool
from hermes_cli import plugins as plugins_module
from agent import image_gen_registry as reg
provider = _EditCapableProvider()
reg.register_provider(provider)
monkeypatch.setattr(image_tool, "_read_configured_image_provider", lambda: "editcap")
monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
monkeypatch.setattr(reg, "get_provider", lambda n: provider if n == "editcap" else None)
raw = image_tool._dispatch_to_plugin_provider(
"make night", "square",
image_url="https://in/src.png",
reference_image_urls=["https://in/ref.png"],
)
out = json.loads(raw)
assert out["success"] is True
assert out["modality"] == "image"
assert provider.received["image_url"] == "https://in/src.png"
assert provider.received["reference_image_urls"] == ["https://in/ref.png"]
def test_dispatch_text_only_when_no_image(self, cfg_home, monkeypatch):
import tools.image_generation_tool as image_tool
from hermes_cli import plugins as plugins_module
from agent import image_gen_registry as reg
provider = _EditCapableProvider()
reg.register_provider(provider)
monkeypatch.setattr(image_tool, "_read_configured_image_provider", lambda: "editcap")
monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
monkeypatch.setattr(reg, "get_provider", lambda n: provider if n == "editcap" else None)
raw = image_tool._dispatch_to_plugin_provider("a dog", "landscape")
out = json.loads(raw)
assert out["success"] is True
assert provider.received["image_url"] is None
assert "reference_image_urls" not in provider.received or provider.received["reference_image_urls"] is None
def test_legacy_provider_edit_request_surfaces_clear_error(self, cfg_home, monkeypatch):
import tools.image_generation_tool as image_tool
from hermes_cli import plugins as plugins_module
from agent import image_gen_registry as reg
provider = _LegacyProvider()
reg.register_provider(provider)
monkeypatch.setattr(image_tool, "_read_configured_image_provider", lambda: "legacy")
monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
monkeypatch.setattr(reg, "get_provider", lambda n: provider if n == "legacy" else None)
raw = image_tool._dispatch_to_plugin_provider(
"edit it", "square", image_url="https://in/src.png",
)
out = json.loads(raw)
assert out["success"] is False
assert out["error_type"] == "modality_unsupported"
# ---------------------------------------------------------------------------
# Dynamic schema reflects active capabilities
# ---------------------------------------------------------------------------
class _PluginBothProvider(ImageGenProvider):
@property
def name(self) -> str:
return "both"
def is_available(self) -> bool:
return True
def default_model(self) -> Optional[str]:
return "both-v1"
def capabilities(self) -> Dict[str, Any]:
return {"modalities": ["text", "image"], "max_reference_images": 5}
def generate(self, prompt, aspect_ratio="landscape", *, image_url=None,
reference_image_urls=None, **kwargs):
return {"success": True}
class TestDynamicSchema:
def _no_discovery(self, monkeypatch):
import hermes_cli.plugins as plugins_module
monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
def test_fal_edit_model_advertises_both(self, cfg_home, monkeypatch):
from tools.image_generation_tool import _build_dynamic_image_schema
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
desc = _build_dynamic_image_schema()["description"]
assert "text-to-image" in desc and "image-to-image" in desc
assert "routes automatically" in desc
def test_fal_text_only_model_warns(self, cfg_home, monkeypatch):
from tools.image_generation_tool import _build_dynamic_image_schema
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/z-image/turbo"}})
desc = _build_dynamic_image_schema()["description"]
assert "text-to-image only" in desc
assert "NOT capable of image-to-image" in desc
def test_plugin_both_provider_advertises_refs(self, cfg_home, monkeypatch):
from tools.image_generation_tool import _build_dynamic_image_schema
from agent import image_gen_registry as reg
_write_cfg(cfg_home, {"image_gen": {"provider": "both"}})
reg.register_provider(_PluginBothProvider())
self._no_discovery(monkeypatch)
desc = _build_dynamic_image_schema()["description"]
assert "image-to-image / editing" in desc
assert "up to 5 reference image(s)" in desc
def test_builder_wired_into_registry(self):
from tools.registry import discover_builtin_tools, registry
discover_builtin_tools()
entry = registry._tools["image_generate"]
assert entry.dynamic_schema_overrides is not None
out = entry.dynamic_schema_overrides()
assert "description" in out