mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-21 10:22:18 +00:00
* feat(image-gen): add image-to-image / editing to image_generate Brings image generation to parity with video generation: the unified image_generate tool now edits/transforms a source image (image-to-image) when given image_url / reference_image_urls, routing to each backend's edit endpoint, exactly as video_generate routes to image-to-video. - ImageGenProvider ABC: generate() gains keyword-only image_url + reference_image_urls; new capabilities() declares modalities + max_reference_images (defaults to text-only, backward compatible). success_response gains a modality field; adds normalize_reference_images. - image_generate tool: schema exposes image_url + reference_image_urls; dynamic schema reflects the active model's actual edit capability so the agent knows when image_url is honored. Handler + plugin dispatch forward the new inputs; legacy/text-only providers get a clear modality_unsupported error instead of silently dropping the source image. - In-tree FAL: 7 models gain edit endpoints (flux-2-klein, flux-2-pro, nano-banana-pro, gpt-image-1.5, gpt-image-2, ideogram/v3, qwen-image) with per-model edit_supports whitelists + reference caps; routes to the /edit endpoint and skips the upscaler for edits. - Plugins: openai (images.edit, 16 refs), xai (/v1/images/edits via grok-imagine-image-quality, JSON body per xAI docs), krea (image_style_references, 10 refs). openai-codex stays text-only and rejects edits with an actionable error. - Tests: 15 new (payload, routing, dispatch forwarding, dynamic schema, capabilities); updated 2 change-detector/lambda tests for the new schema. - Docs: image-generation feature page, image-gen provider plugin guide, tools reference. * fix(image-gen): preserve legacy passthrough in fal/krea plugin tests Two existing plugin tests asserted pre-image-to-image behavior: - fal: forward image_url/reference_image_urls only when supplied, so a text-to-image delegation stays byte-identical (no None kwargs). - krea: keep dict-shaped image_style_references refs verbatim (the unified string refs go through normalize_reference_images; legacy non-string ref objects pass through unchanged) — fixes KeyError when callers pass the richer Krea ref-object shape. * fix(image-gen): clearer not-capable message for text-to-image-only models When a text-to-image-only model (incl. gpt-image-2 on the Codex OAuth path, which can't do editing through the Responses image_generation tool) gets a source image, say 'this model is not capable of image-to-image / editing — provide a text-only prompt' rather than sending the user shopping for other backends. Applies to the openai-codex guard, the in-tree FAL no-edit-endpoint error, and the dynamic tool-schema text-only line.
124 lines
4.3 KiB
Python
124 lines
4.3 KiB
Python
import json
|
|
from types import SimpleNamespace
|
|
|
|
|
|
def test_postprocess_adds_agent_visible_image_for_active_ssh_env(monkeypatch, tmp_path):
|
|
from tools import image_generation_tool
|
|
|
|
hermes_home = tmp_path / ".hermes"
|
|
image_dir = hermes_home / "cache" / "images"
|
|
image_dir.mkdir(parents=True)
|
|
image_path = image_dir / "xai_grok-imagine-image_test.jpg"
|
|
image_path.write_bytes(b"jpg")
|
|
|
|
sync_calls = []
|
|
|
|
class FakeSyncManager:
|
|
def sync(self, *, force=False):
|
|
sync_calls.append(force)
|
|
|
|
env = SimpleNamespace(
|
|
_remote_home="/home/remotesshuser",
|
|
_sync_manager=FakeSyncManager(),
|
|
)
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
monkeypatch.setattr(image_generation_tool, "_active_terminal_env", lambda task_id: env)
|
|
|
|
raw = json.dumps({"success": True, "image": str(image_path)})
|
|
result = json.loads(
|
|
image_generation_tool._postprocess_image_generate_result(raw, task_id="task-1")
|
|
)
|
|
|
|
assert result["image"] == str(image_path)
|
|
assert result["host_image"] == str(image_path)
|
|
assert result["agent_visible_image"] == (
|
|
"/home/remotesshuser/.hermes/cache/images/xai_grok-imagine-image_test.jpg"
|
|
)
|
|
assert sync_calls == [True]
|
|
|
|
|
|
def test_postprocess_maps_docker_cache_path_without_active_env(monkeypatch, tmp_path):
|
|
from tools import image_generation_tool
|
|
|
|
hermes_home = tmp_path / ".hermes"
|
|
image_dir = hermes_home / "cache" / "images"
|
|
image_dir.mkdir(parents=True)
|
|
image_path = image_dir / "generated.png"
|
|
image_path.write_bytes(b"png")
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
monkeypatch.setenv("TERMINAL_ENV", "docker")
|
|
monkeypatch.setattr(image_generation_tool, "_active_terminal_env", lambda task_id: None)
|
|
|
|
raw = json.dumps({"success": True, "image": str(image_path)})
|
|
result = json.loads(image_generation_tool._postprocess_image_generate_result(raw))
|
|
|
|
assert result["image"] == str(image_path)
|
|
assert result["agent_visible_image"] == "/root/.hermes/cache/images/generated.png"
|
|
|
|
|
|
def test_postprocess_maps_ssh_cache_path_without_active_env(monkeypatch, tmp_path):
|
|
from tools import image_generation_tool
|
|
|
|
hermes_home = tmp_path / ".hermes"
|
|
image_dir = hermes_home / "cache" / "images"
|
|
image_dir.mkdir(parents=True)
|
|
image_path = image_dir / "first-call.png"
|
|
image_path.write_bytes(b"png")
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
monkeypatch.setenv("TERMINAL_ENV", "ssh")
|
|
monkeypatch.setattr(image_generation_tool, "_active_terminal_env", lambda task_id: None)
|
|
|
|
raw = json.dumps({"success": True, "image": str(image_path)})
|
|
result = json.loads(image_generation_tool._postprocess_image_generate_result(raw))
|
|
|
|
assert result["image"] == str(image_path)
|
|
assert result["agent_visible_image"] == "~/.hermes/cache/images/first-call.png"
|
|
|
|
|
|
def test_postprocess_leaves_remote_image_urls_unchanged(monkeypatch):
|
|
from tools import image_generation_tool
|
|
|
|
monkeypatch.setattr(image_generation_tool, "_active_terminal_env", lambda task_id: None)
|
|
|
|
raw = json.dumps({"success": True, "image": "https://example.com/image.png"})
|
|
|
|
assert image_generation_tool._postprocess_image_generate_result(raw) == raw
|
|
|
|
|
|
def test_handle_image_generate_postprocesses_plugin_result(monkeypatch, tmp_path):
|
|
from tools import image_generation_tool
|
|
|
|
hermes_home = tmp_path / ".hermes"
|
|
image_dir = hermes_home / "cache" / "images"
|
|
image_dir.mkdir(parents=True)
|
|
image_path = image_dir / "plugin.png"
|
|
image_path.write_bytes(b"png")
|
|
|
|
env = SimpleNamespace(_remote_home="/home/remote", _sync_manager=None)
|
|
|
|
seen_task_ids = []
|
|
|
|
def fake_active_env(task_id):
|
|
seen_task_ids.append(task_id)
|
|
return env
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
monkeypatch.setattr(image_generation_tool, "_active_terminal_env", fake_active_env)
|
|
monkeypatch.setattr(
|
|
image_generation_tool,
|
|
"_dispatch_to_plugin_provider",
|
|
lambda prompt, aspect_ratio, **kw: json.dumps({"success": True, "image": str(image_path)}),
|
|
)
|
|
|
|
result = json.loads(
|
|
image_generation_tool._handle_image_generate(
|
|
{"prompt": "draw", "aspect_ratio": "square"},
|
|
task_id="plugin-task",
|
|
)
|
|
)
|
|
|
|
assert seen_task_ids == ["plugin-task"]
|
|
assert result["agent_visible_image"] == "/home/remote/.hermes/cache/images/plugin.png"
|