hermes-agent/tests/tools/test_image_generation_artifacts.py
Teknium c02192ff6a
feat(image-gen): add image-to-image / editing to image_generate (#48705)
* feat(image-gen): add image-to-image / editing to image_generate

Brings image generation to parity with video generation: the unified
image_generate tool now edits/transforms a source image (image-to-image)
when given image_url / reference_image_urls, routing to each backend's
edit endpoint, exactly as video_generate routes to image-to-video.

- ImageGenProvider ABC: generate() gains keyword-only image_url +
  reference_image_urls; new capabilities() declares modalities +
  max_reference_images (defaults to text-only, backward compatible).
  success_response gains a modality field; adds normalize_reference_images.
- image_generate tool: schema exposes image_url + reference_image_urls;
  dynamic schema reflects the active model's actual edit capability so the
  agent knows when image_url is honored. Handler + plugin dispatch forward
  the new inputs; legacy/text-only providers get a clear modality_unsupported
  error instead of silently dropping the source image.
- In-tree FAL: 7 models gain edit endpoints (flux-2-klein, flux-2-pro,
  nano-banana-pro, gpt-image-1.5, gpt-image-2, ideogram/v3, qwen-image)
  with per-model edit_supports whitelists + reference caps; routes to the
  /edit endpoint and skips the upscaler for edits.
- Plugins: openai (images.edit, 16 refs), xai (/v1/images/edits via
  grok-imagine-image-quality, JSON body per xAI docs), krea
  (image_style_references, 10 refs). openai-codex stays text-only and
  rejects edits with an actionable error.
- Tests: 15 new (payload, routing, dispatch forwarding, dynamic schema,
  capabilities); updated 2 change-detector/lambda tests for the new schema.
- Docs: image-generation feature page, image-gen provider plugin guide,
  tools reference.

* fix(image-gen): preserve legacy passthrough in fal/krea plugin tests

Two existing plugin tests asserted pre-image-to-image behavior:
- fal: forward image_url/reference_image_urls only when supplied, so a
  text-to-image delegation stays byte-identical (no None kwargs).
- krea: keep dict-shaped image_style_references refs verbatim (the unified
  string refs go through normalize_reference_images; legacy non-string ref
  objects pass through unchanged) — fixes KeyError when callers pass the
  richer Krea ref-object shape.

* fix(image-gen): clearer not-capable message for text-to-image-only models

When a text-to-image-only model (incl. gpt-image-2 on the Codex OAuth path,
which can't do editing through the Responses image_generation tool) gets a
source image, say 'this model is not capable of image-to-image / editing —
provide a text-only prompt' rather than sending the user shopping for other
backends. Applies to the openai-codex guard, the in-tree FAL no-edit-endpoint
error, and the dynamic tool-schema text-only line.
2026-06-18 22:13:07 -07:00

124 lines
4.3 KiB
Python

import json
from types import SimpleNamespace
def test_postprocess_adds_agent_visible_image_for_active_ssh_env(monkeypatch, tmp_path):
from tools import image_generation_tool
hermes_home = tmp_path / ".hermes"
image_dir = hermes_home / "cache" / "images"
image_dir.mkdir(parents=True)
image_path = image_dir / "xai_grok-imagine-image_test.jpg"
image_path.write_bytes(b"jpg")
sync_calls = []
class FakeSyncManager:
def sync(self, *, force=False):
sync_calls.append(force)
env = SimpleNamespace(
_remote_home="/home/remotesshuser",
_sync_manager=FakeSyncManager(),
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setattr(image_generation_tool, "_active_terminal_env", lambda task_id: env)
raw = json.dumps({"success": True, "image": str(image_path)})
result = json.loads(
image_generation_tool._postprocess_image_generate_result(raw, task_id="task-1")
)
assert result["image"] == str(image_path)
assert result["host_image"] == str(image_path)
assert result["agent_visible_image"] == (
"/home/remotesshuser/.hermes/cache/images/xai_grok-imagine-image_test.jpg"
)
assert sync_calls == [True]
def test_postprocess_maps_docker_cache_path_without_active_env(monkeypatch, tmp_path):
from tools import image_generation_tool
hermes_home = tmp_path / ".hermes"
image_dir = hermes_home / "cache" / "images"
image_dir.mkdir(parents=True)
image_path = image_dir / "generated.png"
image_path.write_bytes(b"png")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("TERMINAL_ENV", "docker")
monkeypatch.setattr(image_generation_tool, "_active_terminal_env", lambda task_id: None)
raw = json.dumps({"success": True, "image": str(image_path)})
result = json.loads(image_generation_tool._postprocess_image_generate_result(raw))
assert result["image"] == str(image_path)
assert result["agent_visible_image"] == "/root/.hermes/cache/images/generated.png"
def test_postprocess_maps_ssh_cache_path_without_active_env(monkeypatch, tmp_path):
from tools import image_generation_tool
hermes_home = tmp_path / ".hermes"
image_dir = hermes_home / "cache" / "images"
image_dir.mkdir(parents=True)
image_path = image_dir / "first-call.png"
image_path.write_bytes(b"png")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("TERMINAL_ENV", "ssh")
monkeypatch.setattr(image_generation_tool, "_active_terminal_env", lambda task_id: None)
raw = json.dumps({"success": True, "image": str(image_path)})
result = json.loads(image_generation_tool._postprocess_image_generate_result(raw))
assert result["image"] == str(image_path)
assert result["agent_visible_image"] == "~/.hermes/cache/images/first-call.png"
def test_postprocess_leaves_remote_image_urls_unchanged(monkeypatch):
from tools import image_generation_tool
monkeypatch.setattr(image_generation_tool, "_active_terminal_env", lambda task_id: None)
raw = json.dumps({"success": True, "image": "https://example.com/image.png"})
assert image_generation_tool._postprocess_image_generate_result(raw) == raw
def test_handle_image_generate_postprocesses_plugin_result(monkeypatch, tmp_path):
from tools import image_generation_tool
hermes_home = tmp_path / ".hermes"
image_dir = hermes_home / "cache" / "images"
image_dir.mkdir(parents=True)
image_path = image_dir / "plugin.png"
image_path.write_bytes(b"png")
env = SimpleNamespace(_remote_home="/home/remote", _sync_manager=None)
seen_task_ids = []
def fake_active_env(task_id):
seen_task_ids.append(task_id)
return env
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setattr(image_generation_tool, "_active_terminal_env", fake_active_env)
monkeypatch.setattr(
image_generation_tool,
"_dispatch_to_plugin_provider",
lambda prompt, aspect_ratio, **kw: json.dumps({"success": True, "image": str(image_path)}),
)
result = json.loads(
image_generation_tool._handle_image_generate(
{"prompt": "draw", "aspect_ratio": "square"},
task_id="plugin-task",
)
)
assert seen_task_ids == ["plugin-task"]
assert result["agent_visible_image"] == "/home/remote/.hermes/cache/images/plugin.png"