mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-21 10:22:18 +00:00
_build_fal_payload and _build_fal_edit_payload assemble the request and then filter it down to the model's supports / edit_supports whitelist. That filter also covers prompt (and image_urls for edits), which every FAL endpoint requires. Today all model configs happen to list those keys, but a single config that omits one would silently produce a request with no prompt or no source images — a broken generation with no error. Always keep the mandatory keys regardless of the whitelist so a missing whitelist entry can only drop optional knobs, never the prompt or the images.
383 lines
15 KiB
Python
383 lines
15 KiB
Python
"""Tests for the image-to-image / editing surface of ``image_generate``.
|
|
|
|
Mirrors the video-gen image-to-video tests: the unified ``image_generate``
|
|
tool routes to a provider's edit endpoint when ``image_url`` /
|
|
``reference_image_urls`` is supplied, otherwise to text-to-image. Coverage:
|
|
|
|
- In-tree FAL edit payload construction (``_build_fal_edit_payload``)
|
|
- In-tree FAL routing (text vs edit endpoint) via ``image_generate_tool``
|
|
- Plugin dispatch forwards image_url / reference_image_urls to ``generate()``
|
|
- ``capabilities()`` honesty drives the dynamic tool-schema description
|
|
- Models without an edit endpoint reject image inputs with a clear error
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
from agent import image_gen_registry
|
|
from agent.image_gen_provider import ImageGenProvider
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_registry():
|
|
image_gen_registry._reset_for_tests()
|
|
yield
|
|
image_gen_registry._reset_for_tests()
|
|
|
|
|
|
@pytest.fixture
|
|
def cfg_home(tmp_path, monkeypatch):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
return tmp_path
|
|
|
|
|
|
def _write_cfg(home, cfg: dict):
|
|
(home / "config.yaml").write_text(yaml.safe_dump(cfg))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# In-tree FAL edit payload + routing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFalEditPayload:
|
|
def test_edit_payload_includes_image_urls(self):
|
|
from tools.image_generation_tool import _build_fal_edit_payload
|
|
|
|
payload = _build_fal_edit_payload(
|
|
"fal-ai/nano-banana-pro", "make it night", ["https://x/y.png"],
|
|
"landscape",
|
|
)
|
|
assert payload["prompt"] == "make it night"
|
|
assert payload["image_urls"] == ["https://x/y.png"]
|
|
# nano-banana edit advertises aspect_ratio in edit_supports
|
|
assert payload.get("aspect_ratio") == "16:9"
|
|
|
|
def test_edit_payload_strips_keys_outside_edit_supports(self):
|
|
from tools.image_generation_tool import _build_fal_edit_payload
|
|
|
|
# gpt-image-2 edit does NOT advertise image_size (auto-inferred), so
|
|
# it must be stripped even though the text-to-image path sets it.
|
|
payload = _build_fal_edit_payload(
|
|
"fal-ai/gpt-image-2", "swap bg", ["https://x/y.png"], "square",
|
|
)
|
|
assert "image_size" not in payload
|
|
assert payload["image_urls"] == ["https://x/y.png"]
|
|
assert payload["quality"] == "medium"
|
|
|
|
def test_text_only_model_has_no_edit_endpoint(self):
|
|
from tools.image_generation_tool import FAL_MODELS
|
|
|
|
# z-image/turbo is a pure text-to-image model — no edit endpoint.
|
|
assert "edit_endpoint" not in FAL_MODELS["fal-ai/z-image/turbo"]
|
|
# while nano-banana-pro is edit-capable
|
|
assert FAL_MODELS["fal-ai/nano-banana-pro"].get("edit_endpoint")
|
|
|
|
|
|
class TestMandatoryKeysSurviveWhitelist:
|
|
"""A model whose whitelist forgets the mandatory keys must not produce a
|
|
request with the prompt / source images silently stripped."""
|
|
|
|
_SIZES = {"square": "1024x1024", "landscape": "1536x1024", "portrait": "1024x1536"}
|
|
|
|
def test_edit_keeps_prompt_and_image_urls(self, monkeypatch):
|
|
from tools import image_generation_tool as t
|
|
|
|
fake = {
|
|
"size_style": "image_size_preset",
|
|
"sizes": self._SIZES,
|
|
"edit_supports": {"seed"}, # intentionally omits prompt + image_urls
|
|
}
|
|
monkeypatch.setitem(t.FAL_MODELS, "test/edit-model", fake)
|
|
payload = t._build_fal_edit_payload(
|
|
"test/edit-model", "make it blue", ["https://x/y.png"], "square",
|
|
)
|
|
assert payload["prompt"] == "make it blue"
|
|
assert payload["image_urls"] == ["https://x/y.png"]
|
|
|
|
def test_text_keeps_prompt(self, monkeypatch):
|
|
from tools import image_generation_tool as t
|
|
|
|
fake = {
|
|
"size_style": "image_size_preset",
|
|
"sizes": self._SIZES,
|
|
"supports": {"seed"}, # intentionally omits prompt
|
|
}
|
|
monkeypatch.setitem(t.FAL_MODELS, "test/text-model", fake)
|
|
payload = t._build_fal_payload("test/text-model", "a cat", aspect_ratio="square")
|
|
assert payload["prompt"] == "a cat"
|
|
|
|
|
|
class TestFalRouting:
|
|
def _patch_submit(self, monkeypatch, image_tool, capture: dict):
|
|
class _Handler:
|
|
def get(self_inner):
|
|
return {"images": [{"url": "https://out/img.png", "width": 1, "height": 1}]}
|
|
|
|
def fake_submit(endpoint, arguments):
|
|
capture["endpoint"] = endpoint
|
|
capture["arguments"] = arguments
|
|
return _Handler()
|
|
|
|
monkeypatch.setattr(image_tool, "_submit_fal_request", fake_submit)
|
|
monkeypatch.setattr(image_tool, "fal_key_is_configured", lambda: True)
|
|
monkeypatch.setattr(image_tool, "_resolve_managed_fal_gateway", lambda: None)
|
|
|
|
def test_text_to_image_uses_base_endpoint(self, cfg_home, monkeypatch):
|
|
import tools.image_generation_tool as image_tool
|
|
|
|
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
|
|
capture: dict = {}
|
|
self._patch_submit(monkeypatch, image_tool, capture)
|
|
|
|
raw = image_tool.image_generate_tool(prompt="a cat", aspect_ratio="square")
|
|
out = json.loads(raw)
|
|
assert out["success"] is True
|
|
assert out["modality"] == "text"
|
|
assert capture["endpoint"] == "fal-ai/nano-banana-pro"
|
|
assert "image_urls" not in capture["arguments"]
|
|
|
|
def test_image_to_image_routes_to_edit_endpoint(self, cfg_home, monkeypatch):
|
|
import tools.image_generation_tool as image_tool
|
|
|
|
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
|
|
capture: dict = {}
|
|
self._patch_submit(monkeypatch, image_tool, capture)
|
|
|
|
raw = image_tool.image_generate_tool(
|
|
prompt="make it night",
|
|
aspect_ratio="square",
|
|
image_url="https://in/src.png",
|
|
)
|
|
out = json.loads(raw)
|
|
assert out["success"] is True
|
|
assert out["modality"] == "image"
|
|
assert capture["endpoint"] == "fal-ai/nano-banana-pro/edit"
|
|
assert capture["arguments"]["image_urls"] == ["https://in/src.png"]
|
|
|
|
def test_reference_images_clamped_to_model_cap(self, cfg_home, monkeypatch):
|
|
import tools.image_generation_tool as image_tool
|
|
|
|
# nano-banana-pro caps at 2 reference images.
|
|
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
|
|
capture: dict = {}
|
|
self._patch_submit(monkeypatch, image_tool, capture)
|
|
|
|
raw = image_tool.image_generate_tool(
|
|
prompt="blend",
|
|
image_url="https://in/a.png",
|
|
reference_image_urls=["https://in/b.png", "https://in/c.png", "https://in/d.png"],
|
|
)
|
|
out = json.loads(raw)
|
|
assert out["success"] is True
|
|
assert capture["arguments"]["image_urls"] == ["https://in/a.png", "https://in/b.png"]
|
|
|
|
def test_text_only_model_rejects_image_url(self, cfg_home, monkeypatch):
|
|
import tools.image_generation_tool as image_tool
|
|
|
|
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/z-image/turbo"}})
|
|
capture: dict = {}
|
|
self._patch_submit(monkeypatch, image_tool, capture)
|
|
|
|
raw = image_tool.image_generate_tool(
|
|
prompt="edit this", image_url="https://in/src.png",
|
|
)
|
|
out = json.loads(raw)
|
|
assert out["success"] is False
|
|
assert "image-to-image" in out["error"]
|
|
# Must NOT have submitted anything.
|
|
assert capture == {}
|
|
|
|
def test_edit_skips_upscaler(self, cfg_home, monkeypatch):
|
|
import tools.image_generation_tool as image_tool
|
|
|
|
# flux-2-pro has upscale=True for text-to-image, but edits must skip it.
|
|
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/flux-2-pro"}})
|
|
capture: dict = {}
|
|
self._patch_submit(monkeypatch, image_tool, capture)
|
|
upscale_called = {"hit": False}
|
|
monkeypatch.setattr(
|
|
image_tool, "_upscale_image",
|
|
lambda *a, **k: upscale_called.__setitem__("hit", True) or None,
|
|
)
|
|
|
|
raw = image_tool.image_generate_tool(
|
|
prompt="tweak", image_url="https://in/src.png",
|
|
)
|
|
out = json.loads(raw)
|
|
assert out["success"] is True
|
|
assert out["modality"] == "image"
|
|
assert upscale_called["hit"] is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Plugin dispatch forwarding
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class _EditCapableProvider(ImageGenProvider):
|
|
def __init__(self):
|
|
self.received: Dict[str, Any] = {}
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "editcap"
|
|
|
|
def capabilities(self) -> Dict[str, Any]:
|
|
return {"modalities": ["text", "image"], "max_reference_images": 4}
|
|
|
|
def generate(self, prompt, aspect_ratio="landscape", *, image_url=None,
|
|
reference_image_urls=None, **kwargs):
|
|
self.received = {
|
|
"prompt": prompt,
|
|
"aspect_ratio": aspect_ratio,
|
|
"image_url": image_url,
|
|
"reference_image_urls": reference_image_urls,
|
|
}
|
|
return {
|
|
"success": True, "image": "/tmp/out.png", "model": "editcap-1",
|
|
"prompt": prompt, "aspect_ratio": aspect_ratio,
|
|
"modality": "image" if image_url else "text", "provider": "editcap",
|
|
}
|
|
|
|
|
|
class _LegacyProvider(ImageGenProvider):
|
|
"""Provider whose generate() predates image_url (no **kwargs absorb)."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "legacy"
|
|
|
|
def generate(self, prompt, aspect_ratio="landscape"): # narrow signature
|
|
return {"success": True, "image": "/tmp/legacy.png", "provider": "legacy"}
|
|
|
|
|
|
class TestPluginDispatchImageToImage:
|
|
def test_dispatch_forwards_image_url(self, cfg_home, monkeypatch):
|
|
import tools.image_generation_tool as image_tool
|
|
from hermes_cli import plugins as plugins_module
|
|
from agent import image_gen_registry as reg
|
|
|
|
provider = _EditCapableProvider()
|
|
reg.register_provider(provider)
|
|
monkeypatch.setattr(image_tool, "_read_configured_image_provider", lambda: "editcap")
|
|
monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
|
|
monkeypatch.setattr(reg, "get_provider", lambda n: provider if n == "editcap" else None)
|
|
|
|
raw = image_tool._dispatch_to_plugin_provider(
|
|
"make night", "square",
|
|
image_url="https://in/src.png",
|
|
reference_image_urls=["https://in/ref.png"],
|
|
)
|
|
out = json.loads(raw)
|
|
assert out["success"] is True
|
|
assert out["modality"] == "image"
|
|
assert provider.received["image_url"] == "https://in/src.png"
|
|
assert provider.received["reference_image_urls"] == ["https://in/ref.png"]
|
|
|
|
def test_dispatch_text_only_when_no_image(self, cfg_home, monkeypatch):
|
|
import tools.image_generation_tool as image_tool
|
|
from hermes_cli import plugins as plugins_module
|
|
from agent import image_gen_registry as reg
|
|
|
|
provider = _EditCapableProvider()
|
|
reg.register_provider(provider)
|
|
monkeypatch.setattr(image_tool, "_read_configured_image_provider", lambda: "editcap")
|
|
monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
|
|
monkeypatch.setattr(reg, "get_provider", lambda n: provider if n == "editcap" else None)
|
|
|
|
raw = image_tool._dispatch_to_plugin_provider("a dog", "landscape")
|
|
out = json.loads(raw)
|
|
assert out["success"] is True
|
|
assert provider.received["image_url"] is None
|
|
assert "reference_image_urls" not in provider.received or provider.received["reference_image_urls"] is None
|
|
|
|
def test_legacy_provider_edit_request_surfaces_clear_error(self, cfg_home, monkeypatch):
|
|
import tools.image_generation_tool as image_tool
|
|
from hermes_cli import plugins as plugins_module
|
|
from agent import image_gen_registry as reg
|
|
|
|
provider = _LegacyProvider()
|
|
reg.register_provider(provider)
|
|
monkeypatch.setattr(image_tool, "_read_configured_image_provider", lambda: "legacy")
|
|
monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
|
|
monkeypatch.setattr(reg, "get_provider", lambda n: provider if n == "legacy" else None)
|
|
|
|
raw = image_tool._dispatch_to_plugin_provider(
|
|
"edit it", "square", image_url="https://in/src.png",
|
|
)
|
|
out = json.loads(raw)
|
|
assert out["success"] is False
|
|
assert out["error_type"] == "modality_unsupported"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Dynamic schema reflects active capabilities
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class _PluginBothProvider(ImageGenProvider):
|
|
@property
|
|
def name(self) -> str:
|
|
return "both"
|
|
|
|
def is_available(self) -> bool:
|
|
return True
|
|
|
|
def default_model(self) -> Optional[str]:
|
|
return "both-v1"
|
|
|
|
def capabilities(self) -> Dict[str, Any]:
|
|
return {"modalities": ["text", "image"], "max_reference_images": 5}
|
|
|
|
def generate(self, prompt, aspect_ratio="landscape", *, image_url=None,
|
|
reference_image_urls=None, **kwargs):
|
|
return {"success": True}
|
|
|
|
|
|
class TestDynamicSchema:
|
|
def _no_discovery(self, monkeypatch):
|
|
import hermes_cli.plugins as plugins_module
|
|
monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
|
|
|
|
def test_fal_edit_model_advertises_both(self, cfg_home, monkeypatch):
|
|
from tools.image_generation_tool import _build_dynamic_image_schema
|
|
|
|
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
|
|
desc = _build_dynamic_image_schema()["description"]
|
|
assert "text-to-image" in desc and "image-to-image" in desc
|
|
assert "routes automatically" in desc
|
|
|
|
def test_fal_text_only_model_warns(self, cfg_home, monkeypatch):
|
|
from tools.image_generation_tool import _build_dynamic_image_schema
|
|
|
|
_write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/z-image/turbo"}})
|
|
desc = _build_dynamic_image_schema()["description"]
|
|
assert "text-to-image only" in desc
|
|
assert "NOT capable of image-to-image" in desc
|
|
|
|
def test_plugin_both_provider_advertises_refs(self, cfg_home, monkeypatch):
|
|
from tools.image_generation_tool import _build_dynamic_image_schema
|
|
from agent import image_gen_registry as reg
|
|
|
|
_write_cfg(cfg_home, {"image_gen": {"provider": "both"}})
|
|
reg.register_provider(_PluginBothProvider())
|
|
self._no_discovery(monkeypatch)
|
|
|
|
desc = _build_dynamic_image_schema()["description"]
|
|
assert "image-to-image / editing" in desc
|
|
assert "up to 5 reference image(s)" in desc
|
|
|
|
def test_builder_wired_into_registry(self):
|
|
from tools.registry import discover_builtin_tools, registry
|
|
|
|
discover_builtin_tools()
|
|
entry = registry._tools["image_generate"]
|
|
assert entry.dynamic_schema_overrides is not None
|
|
out = entry.dynamic_schema_overrides()
|
|
assert "description" in out
|