diff --git a/agent/image_gen_provider.py b/agent/image_gen_provider.py
index a7f1b8c31ff..a3eeb1e4c8c 100644
--- a/agent/image_gen_provider.py
+++ b/agent/image_gen_provider.py
@@ -11,6 +11,18 @@ Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
 as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
 via ``plugins.enabled``).
 
+Unified surface
+---------------
+One tool — ``image_generate`` — covers **text-to-image** and
+**image-to-image / image editing**. The router is the presence of
+``image_url`` (and/or ``reference_image_urls``): if any source image is
+provided, the provider routes to its image-to-image / edit endpoint; if
+omitted, the provider routes to text-to-image. Users pick one **model**
+(e.g. nano-banana-pro, gpt-image-2, grok-imagine-image); the provider
+handles which underlying endpoint to hit. This mirrors the ``video_gen``
+provider design (``agent/video_gen_provider.py``) so the two surfaces
+stay learnable together.
+
 Response shape
 --------------
 All providers return a dict that :func:`success_response` / :func:`error_response`
@@ -21,6 +33,7 @@ produce. The tool wrapper JSON-serializes it. Keys:
     model          str              provider-specific model identifier
     prompt         str              echoed prompt
     aspect_ratio   str              "landscape" | "square" | "portrait"
+    modality       str              "text" | "image" (which mode was used)
     provider       str              provider name (for diagnostics)
     error          str              only when success=False
     error_type     str              only when success=False
@@ -127,19 +140,51 @@ class ImageGenProvider(abc.ABC):
             return models[0].get("id")
         return None
 
+    def capabilities(self) -> Dict[str, Any]:
+        """Return what this provider supports.
+
+        Returned dict (all keys optional)::
+
+            {
+                "modalities": ["text", "image"],   # which inputs the backend accepts
+                "max_reference_images": 9,          # cap for reference_image_urls
+            }
+
+        ``modalities`` declares whether the active backend/model supports
+        text-to-image (``"text"``), image-to-image / editing (``"image"``),
+        or both. The tool layer surfaces this in the dynamic schema so the
+        model knows when ``image_url`` is honored. Used by ``hermes tools``
+        for the picker too. Default: text-only (backward compatible — a
+        provider that doesn't override this advertises text-to-image only).
+        """
+        return {
+            "modalities": ["text"],
+            "max_reference_images": 0,
+        }
+
     @abc.abstractmethod
     def generate(
         self,
         prompt: str,
         aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        *,
+        image_url: Optional[str] = None,
+        reference_image_urls: Optional[List[str]] = None,
         **kwargs: Any,
     ) -> Dict[str, Any]:
-        """Generate an image.
+        """Generate an image from a text prompt, or edit/transform a source image.
+
+        Routing: if ``image_url`` (or any ``reference_image_urls``) is
+        provided, the provider should route to its image-to-image / edit
+        endpoint; otherwise text-to-image. ``image_url`` is the primary
+        source image to edit; ``reference_image_urls`` are additional
+        style/composition references (provider clamps to its declared
+        ``max_reference_images``).
 
         Implementations should return the dict from :func:`success_response`
         or :func:`error_response`. ``kwargs`` may contain forward-compat
-        parameters future versions of the schema will expose — implementations
-        should ignore unknown keys.
+        parameters future versions of the schema will expose —
+        implementations MUST ignore unknown keys (no TypeError).
         """
 
 
@@ -162,6 +207,26 @@ def resolve_aspect_ratio(value: Optional[str]) -> str:
     return DEFAULT_ASPECT_RATIO
 
 
+def normalize_reference_images(value: Any) -> Optional[List[str]]:
+    """Coerce a reference-image argument into a clean list of URL/path strings.
+
+    Accepts a single string or a list; strips blanks and whitespace. Returns
+    ``None`` when nothing usable remains so providers can treat "no refs" as a
+    single sentinel.
+    """
+    if value is None:
+        return None
+    if isinstance(value, str):
+        value = [value]
+    if not isinstance(value, (list, tuple)):
+        return None
+    out: List[str] = []
+    for item in value:
+        if isinstance(item, str) and item.strip():
+            out.append(item.strip())
+    return out or None
+
+
 def _images_cache_dir() -> Path:
     """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
     from hermes_constants import get_hermes_home
@@ -280,13 +345,16 @@ def success_response(
     prompt: str,
     aspect_ratio: str,
     provider: str,
+    modality: str = "text",
     extra: Optional[Dict[str, Any]] = None,
 ) -> Dict[str, Any]:
     """Build a uniform success response dict.
 
     ``image`` may be an HTTP URL or an absolute filesystem path (for b64
-    providers like OpenAI). Callers that need to pass through additional
-    backend-specific fields can supply ``extra``.
+    providers like OpenAI). ``modality`` is ``"text"`` (text-to-image) or
+    ``"image"`` (image-to-image / editing) — indicates which endpoint was
+    actually hit, useful for diagnostics. Callers that need to pass through
+    additional backend-specific fields can supply ``extra``.
     """
     payload: Dict[str, Any] = {
         "success": True,
@@ -294,6 +362,7 @@ def success_response(
         "model": model,
         "prompt": prompt,
         "aspect_ratio": aspect_ratio,
+        "modality": modality,
         "provider": provider,
     }
     if extra:
diff --git a/plugins/image_gen/fal/__init__.py b/plugins/image_gen/fal/__init__.py
index 21b88f37f34..3e7777c7149 100644
--- a/plugins/image_gen/fal/__init__.py
+++ b/plugins/image_gen/fal/__init__.py
@@ -87,7 +87,7 @@ class FalImageGenProvider(ImageGenProvider):
         return {
             "name": "FAL.ai",
             "badge": "paid",
-            "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
+            "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc. — text-to-image & image editing",
             "env_vars": [
                 {
                     "key": "FAL_KEY",
@@ -97,18 +97,40 @@ class FalImageGenProvider(ImageGenProvider):
             ],
         }
 
+    def capabilities(self) -> Dict[str, Any]:
+        # Whether image-to-image is available depends on the currently-
+        # selected FAL model (each model entry declares an edit_endpoint or
+        # not). Report the active model's actual surface so the dynamic tool
+        # schema is accurate.
+        import tools.image_generation_tool as _it
+
+        try:
+            _model_id, meta = _it._resolve_fal_model()
+        except Exception:  # noqa: BLE001
+            return {"modalities": ["text"], "max_reference_images": 0}
+        if meta.get("edit_endpoint"):
+            return {
+                "modalities": ["text", "image"],
+                "max_reference_images": int(meta.get("max_reference_images") or 1),
+            }
+        return {"modalities": ["text"], "max_reference_images": 0}
+
     def generate(
         self,
         prompt: str,
         aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        *,
+        image_url: Optional[str] = None,
+        reference_image_urls: Optional[List[str]] = None,
         **kwargs: Any,
     ) -> Dict[str, Any]:
-        """Generate an image via the legacy FAL pipeline.
+        """Generate or edit an image via the legacy FAL pipeline.
 
-        Forwards prompt + aspect_ratio (and any forward-compat extras
-        the schema supports) into :func:`tools.image_generation_tool.image_generate_tool`,
-        then reshapes its JSON-string response into the provider-ABC
-        dict format consumed by ``_dispatch_to_plugin_provider``.
+        Forwards prompt + aspect_ratio + image_url/reference_image_urls (and
+        any forward-compat extras the schema supports) into
+        :func:`tools.image_generation_tool.image_generate_tool`, then reshapes
+        its JSON-string response into the provider-ABC dict format consumed by
+        ``_dispatch_to_plugin_provider``.
         """
         import tools.image_generation_tool as _it
 
@@ -124,6 +146,13 @@ class FalImageGenProvider(ImageGenProvider):
             )
             if key in kwargs and kwargs[key] is not None
         }
+        # Only forward the image-to-image inputs when actually supplied, so a
+        # plain text-to-image call delegates exactly as it did before (no
+        # noisy None kwargs).
+        if image_url is not None:
+            passthrough["image_url"] = image_url
+        if reference_image_urls is not None:
+            passthrough["reference_image_urls"] = reference_image_urls
 
         try:
             raw = _it.image_generate_tool(
diff --git a/plugins/image_gen/krea/__init__.py b/plugins/image_gen/krea/__init__.py
index 552f2ae71fe..a897302175b 100644
--- a/plugins/image_gen/krea/__init__.py
+++ b/plugins/image_gen/krea/__init__.py
@@ -33,6 +33,7 @@ from agent.image_gen_provider import (
     DEFAULT_ASPECT_RATIO,
     ImageGenProvider,
     error_response,
+    normalize_reference_images,
     resolve_aspect_ratio,
     save_url_image,
     success_response,
@@ -191,7 +192,7 @@ class KreaImageGenProvider(ImageGenProvider):
         return {
             "name": "Krea",
             "badge": "paid",
-            "tag": "Krea 2 foundation model — Medium ($0.03) + Large ($0.06). Strong style transfer + moodboards.",
+            "tag": "Krea 2 foundation model — Medium ($0.03) + Large ($0.06). Style transfer, moodboards, reference-guided generation.",
             "env_vars": [
                 {
                     "key": "KREA_API_KEY",
@@ -201,6 +202,11 @@ class KreaImageGenProvider(ImageGenProvider):
             ],
         }
 
+    def capabilities(self) -> Dict[str, Any]:
+        # Krea supports reference-guided generation (image-to-image style
+        # transfer) via image_style_references — up to 10 refs.
+        return {"modalities": ["text", "image"], "max_reference_images": 10}
+
     # ------------------------------------------------------------------
     # generate()
     # ------------------------------------------------------------------
@@ -209,12 +215,48 @@ class KreaImageGenProvider(ImageGenProvider):
         self,
         prompt: str,
         aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        *,
+        image_url: Optional[str] = None,
+        reference_image_urls: Optional[List[str]] = None,
         **kwargs: Any,
     ) -> Dict[str, Any]:
         prompt = (prompt or "").strip()
         aspect = resolve_aspect_ratio(aspect_ratio)
         krea_ar = _ASPECT_MAP.get(aspect, "1:1")
 
+        # Collect reference images for reference-guided generation (image-to-
+        # image style transfer). Sources, in order:
+        #   1. unified image_url (primary source) + reference_image_urls (strings)
+        #   2. legacy image_style_references kwarg — may be plain URL strings OR
+        #      Krea's richer ref objects (e.g. {"url": ..., "strength": ...}),
+        #      which are passed through verbatim for backward compatibility.
+        style_refs: List[Any] = []
+        if isinstance(image_url, str) and image_url.strip():
+            style_refs.append(image_url.strip())
+        for ref in (normalize_reference_images(reference_image_urls) or []):
+            style_refs.append(ref)
+        legacy_refs = kwargs.get("image_style_references")
+        if isinstance(legacy_refs, list):
+            for ref in legacy_refs:
+                if isinstance(ref, str):
+                    if ref.strip():
+                        style_refs.append(ref.strip())
+                elif ref:
+                    # Non-string ref object (dict, etc.) — pass through as-is.
+                    style_refs.append(ref)
+        # Dedupe string entries while preserving order (dict refs aren't
+        # hashable, so they're kept verbatim); Krea caps at 10.
+        seen: set = set()
+        deduped: List[Any] = []
+        for r in style_refs:
+            if isinstance(r, str):
+                if r in seen:
+                    continue
+                seen.add(r)
+            deduped.append(r)
+        style_refs = deduped[:10]
+        modality = "image" if style_refs else "text"
+
         if not prompt:
             return error_response(
                 error="Prompt is required and must be a non-empty string",
@@ -256,10 +298,10 @@ class KreaImageGenProvider(ImageGenProvider):
         if isinstance(styles, list) and styles:
             payload["styles"] = styles
 
-        image_style_references = kwargs.get("image_style_references")
-        if isinstance(image_style_references, list) and image_style_references:
-            # Krea caps at 10 refs per request.
-            payload["image_style_references"] = image_style_references[:10]
+        if style_refs:
+            # Reference-guided generation (image-to-image style transfer).
+            # Krea caps at 10 refs per request (already clamped above).
+            payload["image_style_references"] = style_refs
 
         moodboards = kwargs.get("moodboards")
         if isinstance(moodboards, list) and moodboards:
@@ -483,19 +525,19 @@ class KreaImageGenProvider(ImageGenProvider):
         # Per Krea's job-lifecycle docs the completed payload exposes
         # ``result.urls`` (an array). Fall back to a single ``url`` field
         # for forward/backward compatibility.
-        image_url: Optional[str] = None
+        result_image_url: Optional[str] = None
         urls = result.get("urls")
         if isinstance(urls, list) and urls:
             for candidate in urls:
                 if isinstance(candidate, str) and candidate.strip():
-                    image_url = candidate.strip()
+                    result_image_url = candidate.strip()
                     break
-        if image_url is None:
+        if result_image_url is None:
             single = result.get("url")
             if isinstance(single, str) and single.strip():
-                image_url = single.strip()
+                result_image_url = single.strip()
 
-        if image_url is None:
+        if result_image_url is None:
             return error_response(
                 error="Krea result contained no image URL",
                 error_type="empty_response",
@@ -508,14 +550,14 @@ class KreaImageGenProvider(ImageGenProvider):
         # Materialise locally — Krea result URLs may expire, mirroring
         # what we do for xAI / OpenAI URL responses (#26942).
         try:
-            saved_path = save_url_image(image_url, prefix=f"krea_{model_id}")
+            saved_path = save_url_image(result_image_url, prefix=f"krea_{model_id}")
         except Exception as exc:  # noqa: BLE001
             logger.warning(
                 "Krea image URL %s could not be cached (%s); falling back to bare URL.",
-                image_url,
+                result_image_url,
                 exc,
             )
-            image_ref = image_url
+            image_ref = result_image_url
         else:
             image_ref = str(saved_path)
 
@@ -534,6 +576,7 @@ class KreaImageGenProvider(ImageGenProvider):
             prompt=prompt,
             aspect_ratio=aspect,
             provider="krea",
+            modality=modality,
             extra=extra,
         )
 
diff --git a/plugins/image_gen/openai-codex/__init__.py b/plugins/image_gen/openai-codex/__init__.py
index 6fde2d60bbb..0bd61267db1 100644
--- a/plugins/image_gen/openai-codex/__init__.py
+++ b/plugins/image_gen/openai-codex/__init__.py
@@ -319,7 +319,7 @@ class OpenAICodexImageGenProvider(ImageGenProvider):
         return {
             "name": "OpenAI (Codex auth)",
             "badge": "free",
-            "tag": "gpt-image-2 via ChatGPT/Codex OAuth — no API key required",
+            "tag": "gpt-image-2 via ChatGPT/Codex OAuth — no API key required (text-to-image only)",
             "env_vars": [],
             "post_setup_hint": (
                 "Sign in with `hermes auth codex` (or `hermes setup` → Codex) "
@@ -327,15 +327,41 @@ class OpenAICodexImageGenProvider(ImageGenProvider):
             ),
         }
 
+    def capabilities(self) -> Dict[str, Any]:
+        # The Codex Responses image_generation tool path is text-to-image
+        # only here. Image-to-image / editing via Codex OAuth is not wired —
+        # users who need editing should use the `openai` (API key), `fal`, or
+        # `xai` backends. Declaring text-only keeps the dynamic tool schema
+        # honest so the model doesn't attempt an unsupported edit.
+        return {"modalities": ["text"], "max_reference_images": 0}
+
     def generate(
         self,
         prompt: str,
         aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        *,
+        image_url: Optional[str] = None,
+        reference_image_urls: Optional[List[str]] = None,
         **kwargs: Any,
     ) -> Dict[str, Any]:
         prompt = (prompt or "").strip()
         aspect = resolve_aspect_ratio(aspect_ratio)
 
+        # Image-to-image / editing is not supported on the Codex OAuth path.
+        # Surface a clear, actionable error instead of silently ignoring the
+        # source image and producing an unrelated picture.
+        if (isinstance(image_url, str) and image_url.strip()) or reference_image_urls:
+            return error_response(
+                error=(
+                    "This model is not capable of image-to-image / editing. "
+                    "Please provide a text-only prompt (drop image_url and "
+                    "reference_image_urls)."
+                ),
+                error_type="modality_unsupported",
+                provider="openai-codex",
+                aspect_ratio=aspect,
+            )
+
         if not prompt:
             return error_response(
                 error="Prompt is required and must be a non-empty string",
diff --git a/plugins/image_gen/openai/__init__.py b/plugins/image_gen/openai/__init__.py
index 448f5bc45af..e214271bcd9 100644
--- a/plugins/image_gen/openai/__init__.py
+++ b/plugins/image_gen/openai/__init__.py
@@ -31,6 +31,7 @@ from agent.image_gen_provider import (
     DEFAULT_ASPECT_RATIO,
     ImageGenProvider,
     error_response,
+    normalize_reference_images,
     resolve_aspect_ratio,
     save_b64_image,
     save_url_image,
@@ -117,13 +118,48 @@ def _resolve_model() -> Tuple[str, Dict[str, Any]]:
     return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
 
 
+# ---------------------------------------------------------------------------
+# Source-image loading (for image-to-image / edit)
+# ---------------------------------------------------------------------------
+
+
+def _load_image_bytes(ref: str) -> Tuple[bytes, str]:
+    """Load image bytes from a URL or local file path.
+
+    Returns ``(data, filename)``. Raises on any network / IO error so the
+    caller can surface a clean error_response.
+    """
+    ref = ref.strip()
+    lower = ref.lower()
+    if lower.startswith(("http://", "https://")):
+        import requests
+
+        resp = requests.get(ref, timeout=60)
+        resp.raise_for_status()
+        name = ref.split("?", 1)[0].rsplit("/", 1)[-1] or "image.png"
+        return resp.content, name
+    if lower.startswith("data:"):
+        import base64
+
+        header, _, b64 = ref.partition(",")
+        ext = "png"
+        if "image/" in header:
+            ext = header.split("image/", 1)[1].split(";", 1)[0] or "png"
+        return base64.b64decode(b64), f"image.{ext}"
+    # Local file path.
+    with open(ref, "rb") as fh:
+        data = fh.read()
+    name = os.path.basename(ref) or "image.png"
+    return data, name
+
+
 # ---------------------------------------------------------------------------
 # Provider
 # ---------------------------------------------------------------------------
 
 
 class OpenAIImageGenProvider(ImageGenProvider):
-    """OpenAI ``images.generate`` backend — gpt-image-2 at low/medium/high."""
+    """OpenAI ``images.generate`` / ``images.edit`` backend — gpt-image-2."""
 
     @property
     def name(self) -> str:
@@ -161,7 +197,7 @@ class OpenAIImageGenProvider(ImageGenProvider):
         return {
             "name": "OpenAI",
             "badge": "paid",
-            "tag": "gpt-image-2 at low/medium/high quality tiers",
+            "tag": "gpt-image-2 at low/medium/high quality tiers — text-to-image & image editing",
             "env_vars": [
                 {
                     "key": "OPENAI_API_KEY",
@@ -171,10 +207,18 @@ class OpenAIImageGenProvider(ImageGenProvider):
             ],
         }
 
+    def capabilities(self) -> Dict[str, Any]:
+        # gpt-image-2 supports editing via images.edit() with up to 16 source
+        # images.
+        return {"modalities": ["text", "image"], "max_reference_images": 16}
+
     def generate(
         self,
         prompt: str,
         aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        *,
+        image_url: Optional[str] = None,
+        reference_image_urls: Optional[List[str]] = None,
         **kwargs: Any,
     ) -> Dict[str, Any]:
         prompt = (prompt or "").strip()
@@ -213,29 +257,82 @@ class OpenAIImageGenProvider(ImageGenProvider):
         tier_id, meta = _resolve_model()
         size = _SIZES.get(aspect, _SIZES["square"])
 
-        # gpt-image-2 returns b64_json unconditionally and REJECTS
-        # ``response_format`` as an unknown parameter. Don't send it.
-        payload: Dict[str, Any] = {
-            "model": API_MODEL,
-            "prompt": prompt,
-            "size": size,
-            "n": 1,
-            "quality": meta["quality"],
-        }
+        # Collect source images (primary + references) for image-to-image.
+        sources: List[str] = []
+        if isinstance(image_url, str) and image_url.strip():
+            sources.append(image_url.strip())
+        for ref in (normalize_reference_images(reference_image_urls) or []):
+            sources.append(ref)
+        sources = sources[:16]  # gpt-image-2 edit caps at 16 images
+        is_edit = bool(sources)
+        modality = "image" if is_edit else "text"
 
-        try:
-            client = openai.OpenAI()
-            response = client.images.generate(**payload)
-        except Exception as exc:
-            logger.debug("OpenAI image generation failed", exc_info=True)
-            return error_response(
-                error=f"OpenAI image generation failed: {exc}",
-                error_type="api_error",
-                provider="openai",
-                model=tier_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
+        client = openai.OpenAI()
+
+        if is_edit:
+            # images.edit() expects file-like objects. Download/read each
+            # source into a named BytesIO so the SDK sends correct multipart.
+            import io
+
+            try:
+                files = []
+                for ref in sources:
+                    data, fname = _load_image_bytes(ref)
+                    bio = io.BytesIO(data)
+                    bio.name = fname
+                    files.append(bio)
+            except Exception as exc:
+                return error_response(
+                    error=f"Could not load source image for editing: {exc}",
+                    error_type="io_error",
+                    provider="openai",
+                    model=tier_id,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
+
+            try:
+                response = client.images.edit(
+                    model=API_MODEL,
+                    image=files if len(files) > 1 else files[0],
+                    prompt=prompt,
+                    size=size,  # type: ignore[arg-type]  # _SIZES values are valid gpt-image sizes
+                    quality=meta["quality"],
+                    n=1,
+                )
+            except Exception as exc:
+                logger.debug("OpenAI image edit failed", exc_info=True)
+                return error_response(
+                    error=f"OpenAI image editing failed: {exc}",
+                    error_type="api_error",
+                    provider="openai",
+                    model=tier_id,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
+        else:
+            # gpt-image-2 returns b64_json unconditionally and REJECTS
+            # ``response_format`` as an unknown parameter. Don't send it.
+            payload: Dict[str, Any] = {
+                "model": API_MODEL,
+                "prompt": prompt,
+                "size": size,
+                "n": 1,
+                "quality": meta["quality"],
+            }
+
+            try:
+                response = client.images.generate(**payload)
+            except Exception as exc:
+                logger.debug("OpenAI image generation failed", exc_info=True)
+                return error_response(
+                    error=f"OpenAI image generation failed: {exc}",
+                    error_type="api_error",
+                    provider="openai",
+                    model=tier_id,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
 
         data = getattr(response, "data", None) or []
         if not data:
@@ -302,6 +399,7 @@ class OpenAIImageGenProvider(ImageGenProvider):
             prompt=prompt,
             aspect_ratio=aspect,
             provider="openai",
+            modality=modality,
             extra=extra,
         )
 
diff --git a/plugins/image_gen/xai/__init__.py b/plugins/image_gen/xai/__init__.py
index a8982393f7e..f487d90ada6 100644
--- a/plugins/image_gen/xai/__init__.py
+++ b/plugins/image_gen/xai/__init__.py
@@ -27,6 +27,7 @@ from agent.image_gen_provider import (
     DEFAULT_ASPECT_RATIO,
     ImageGenProvider,
     error_response,
+    normalize_reference_images,
     resolve_aspect_ratio,
     save_b64_image,
     save_url_image,
@@ -114,6 +115,31 @@ def _resolve_resolution() -> str:
     return DEFAULT_RESOLUTION
 
 
+def _xai_image_field(source: str) -> Dict[str, str]:
+    """Build the xAI ``image`` field for an edit request.
+
+    xAI's ``/v1/images/edits`` accepts ``{"url": <ref>, "type": "image_url"}``
+    where ``<ref>`` is a public URL or a base64 data URI. Public URLs and
+    existing data URIs pass through unchanged; local file paths are read and
+    encoded into a ``data:`` URI.
+    """
+    source = source.strip()
+    lower = source.lower()
+    if lower.startswith(("http://", "https://", "data:")):
+        return {"url": source, "type": "image_url"}
+    # Local file path → base64 data URI.
+    import base64
+    import os as _os
+
+    with open(source, "rb") as fh:
+        raw = fh.read()
+    ext = (_os.path.splitext(source)[1].lstrip(".") or "png").lower()
+    if ext == "jpg":
+        ext = "jpeg"
+    b64 = base64.b64encode(raw).decode("utf-8")
+    return {"url": f"data:image/{ext};base64,{b64}", "type": "image_url"}
+
+
 # ---------------------------------------------------------------------------
 # Provider
 # ---------------------------------------------------------------------------
@@ -153,18 +179,34 @@ class XAIImageGenProvider(ImageGenProvider):
         return {
             "name": "xAI Grok Imagine (image)",
             "badge": "paid",
-            "tag": "grok-imagine-image — text-to-image; uses xAI Grok OAuth or XAI_API_KEY",
+            "tag": "grok-imagine-image — text-to-image & image editing; uses xAI Grok OAuth or XAI_API_KEY",
             "env_vars": [],
             "post_setup": "xai_grok",
         }
 
+    def capabilities(self) -> Dict[str, Any]:
+        # xAI's /v1/images/edits supports image editing via grok-imagine-image
+        # -quality. Single primary source image (multi-image editing exists as
+        # a separate capability but we keep the primary edit surface here).
+        return {"modalities": ["text", "image"], "max_reference_images": 1}
+
     def generate(
         self,
         prompt: str,
         aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        *,
+        image_url: Optional[str] = None,
+        reference_image_urls: Optional[List[str]] = None,
         **kwargs: Any,
     ) -> Dict[str, Any]:
-        """Generate an image using xAI's grok-imagine-image."""
+        """Generate an image (text-to-image) or edit a source image (image-to-image).
+
+        Routing: when ``image_url`` is provided, POST to ``/v1/images/edits``
+        with the source image; otherwise POST to ``/v1/images/generations``.
+        Per xAI docs, editing uses the ``grok-imagine-image-quality`` model and
+        a JSON body (the OpenAI SDK's multipart ``images.edit()`` is NOT
+        supported by xAI).
+        """
         creds = resolve_xai_http_credentials()
         api_key = str(creds.get("api_key") or "").strip()
         provider_name = str(creds.get("provider") or "xai").strip() or "xai"
@@ -182,12 +224,17 @@ class XAIImageGenProvider(ImageGenProvider):
         resolution = _resolve_resolution()
         xai_res = resolution if resolution in _XAI_RESOLUTIONS else DEFAULT_RESOLUTION
 
-        payload: Dict[str, Any] = {
-            "model": model_id,
-            "prompt": prompt,
-            "aspect_ratio": xai_ar,
-            "resolution": xai_res,
-        }
+        # Pick the primary source image: explicit image_url wins, else the
+        # first reference image.
+        source_image = None
+        if isinstance(image_url, str) and image_url.strip():
+            source_image = image_url.strip()
+        else:
+            refs = normalize_reference_images(reference_image_urls)
+            if refs:
+                source_image = refs[0]
+        is_edit = bool(source_image)
+        modality = "image" if is_edit else "text"
 
         headers = {
             "Authorization": f"Bearer {api_key}",
@@ -197,9 +244,41 @@ class XAIImageGenProvider(ImageGenProvider):
 
         base_url = str(creds.get("base_url") or "https://api.x.ai/v1").strip().rstrip("/")
 
+        if is_edit:
+            # Editing requires the quality model per xAI docs. The source
+            # image may be a public URL or a base64 data URI; local file paths
+            # are converted to a data URI here.
+            edit_model = "grok-imagine-image-quality"
+            try:
+                image_field = _xai_image_field(source_image)
+            except Exception as exc:
+                return error_response(
+                    error=f"Could not load source image for editing: {exc}",
+                    error_type="io_error",
+                    provider=provider_name,
+                    model=edit_model,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
+            payload: Dict[str, Any] = {
+                "model": edit_model,
+                "prompt": prompt,
+                "image": image_field,
+            }
+            endpoint_url = f"{base_url}/images/edits"
+            model_id = edit_model
+        else:
+            payload = {
+                "model": model_id,
+                "prompt": prompt,
+                "aspect_ratio": xai_ar,
+                "resolution": xai_res,
+            }
+            endpoint_url = f"{base_url}/images/generations"
+
         try:
             response = requests.post(
-                f"{base_url}/images/generations",
+                endpoint_url,
                 headers=headers,
                 json=payload,
                 timeout=120,
@@ -310,9 +389,9 @@ class XAIImageGenProvider(ImageGenProvider):
                 aspect_ratio=aspect,
             )
 
-        extra: Dict[str, Any] = {
-            "resolution": xai_res,
-        }
+        extra: Dict[str, Any] = {}
+        if not is_edit:
+            extra["resolution"] = xai_res
 
         return success_response(
             image=image_ref,
@@ -320,6 +399,7 @@ class XAIImageGenProvider(ImageGenProvider):
             prompt=prompt,
             aspect_ratio=aspect,
             provider="xai",
+            modality=modality,
             extra=extra,
         )
 
diff --git a/tests/tools/test_image_generation.py b/tests/tools/test_image_generation.py
index b24e6bc1fcc..df7d3a34abb 100644
--- a/tests/tools/test_image_generation.py
+++ b/tests/tools/test_image_generation.py
@@ -363,11 +363,16 @@ class TestAspectRatioNormalization:
 
 class TestRegistryIntegration:
 
-    def test_schema_exposes_only_prompt_and_aspect_ratio_to_agent(self, image_tool):
-        """The agent-facing schema must stay tight — model selection is a
-        user-level config choice, not an agent-level arg."""
+    def test_schema_exposes_expected_agent_params(self, image_tool):
+        """The agent-facing schema exposes the unified text+image surface:
+        prompt (required), aspect_ratio, and the image-to-image inputs
+        image_url + reference_image_urls. Model selection stays a user-level
+        config choice, never an agent-level arg."""
         props = image_tool.IMAGE_GENERATE_SCHEMA["parameters"]["properties"]
-        assert set(props.keys()) == {"prompt", "aspect_ratio"}
+        assert set(props.keys()) == {
+            "prompt", "aspect_ratio", "image_url", "reference_image_urls",
+        }
+        assert image_tool.IMAGE_GENERATE_SCHEMA["parameters"]["required"] == ["prompt"]
 
     def test_aspect_ratio_enum_is_three_values(self, image_tool):
         enum = image_tool.IMAGE_GENERATE_SCHEMA["parameters"]["properties"]["aspect_ratio"]["enum"]
diff --git a/tests/tools/test_image_generation_artifacts.py b/tests/tools/test_image_generation_artifacts.py
index 2a1ce111353..ea4fd37d01c 100644
--- a/tests/tools/test_image_generation_artifacts.py
+++ b/tests/tools/test_image_generation_artifacts.py
@@ -110,7 +110,7 @@ def test_handle_image_generate_postprocesses_plugin_result(monkeypatch, tmp_path
     monkeypatch.setattr(
         image_generation_tool,
         "_dispatch_to_plugin_provider",
-        lambda prompt, aspect_ratio: json.dumps({"success": True, "image": str(image_path)}),
+        lambda prompt, aspect_ratio, **kw: json.dumps({"success": True, "image": str(image_path)}),
     )
 
     result = json.loads(
diff --git a/tests/tools/test_image_generation_image_to_image.py b/tests/tools/test_image_generation_image_to_image.py
new file mode 100644
index 00000000000..4e9d457a49f
--- /dev/null
+++ b/tests/tools/test_image_generation_image_to_image.py
@@ -0,0 +1,349 @@
+"""Tests for the image-to-image / editing surface of ``image_generate``.
+
+Mirrors the video-gen image-to-video tests: the unified ``image_generate``
+tool routes to a provider's edit endpoint when ``image_url`` /
+``reference_image_urls`` is supplied, otherwise to text-to-image. Coverage:
+
+- In-tree FAL edit payload construction (``_build_fal_edit_payload``)
+- In-tree FAL routing (text vs edit endpoint) via ``image_generate_tool``
+- Plugin dispatch forwards image_url / reference_image_urls to ``generate()``
+- ``capabilities()`` honesty drives the dynamic tool-schema description
+- Models without an edit endpoint reject image inputs with a clear error
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, Dict, List, Optional
+
+import pytest
+import yaml
+
+from agent import image_gen_registry
+from agent.image_gen_provider import ImageGenProvider
+
+
+@pytest.fixture(autouse=True)
+def _reset_registry():
+    image_gen_registry._reset_for_tests()
+    yield
+    image_gen_registry._reset_for_tests()
+
+
+@pytest.fixture
+def cfg_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    return tmp_path
+
+
+def _write_cfg(home, cfg: dict):
+    (home / "config.yaml").write_text(yaml.safe_dump(cfg))
+
+
+# ---------------------------------------------------------------------------
+# In-tree FAL edit payload + routing
+# ---------------------------------------------------------------------------
+
+
+class TestFalEditPayload:
+    def test_edit_payload_includes_image_urls(self):
+        from tools.image_generation_tool import _build_fal_edit_payload
+
+        payload = _build_fal_edit_payload(
+            "fal-ai/nano-banana-pro", "make it night", ["https://x/y.png"],
+            "landscape",
+        )
+        assert payload["prompt"] == "make it night"
+        assert payload["image_urls"] == ["https://x/y.png"]
+        # nano-banana edit advertises aspect_ratio in edit_supports
+        assert payload.get("aspect_ratio") == "16:9"
+
+    def test_edit_payload_strips_keys_outside_edit_supports(self):
+        from tools.image_generation_tool import _build_fal_edit_payload
+
+        # gpt-image-2 edit does NOT advertise image_size (auto-inferred), so
+        # it must be stripped even though the text-to-image path sets it.
+        payload = _build_fal_edit_payload(
+            "fal-ai/gpt-image-2", "swap bg", ["https://x/y.png"], "square",
+        )
+        assert "image_size" not in payload
+        assert payload["image_urls"] == ["https://x/y.png"]
+        assert payload["quality"] == "medium"
+
+    def test_text_only_model_has_no_edit_endpoint(self):
+        from tools.image_generation_tool import FAL_MODELS
+
+        # z-image/turbo is a pure text-to-image model — no edit endpoint.
+        assert "edit_endpoint" not in FAL_MODELS["fal-ai/z-image/turbo"]
+        # while nano-banana-pro is edit-capable
+        assert FAL_MODELS["fal-ai/nano-banana-pro"].get("edit_endpoint")
+
+
+class TestFalRouting:
+    def _patch_submit(self, monkeypatch, image_tool, capture: dict):
+        class _Handler:
+            def get(self_inner):
+                return {"images": [{"url": "https://out/img.png", "width": 1, "height": 1}]}
+
+        def fake_submit(endpoint, arguments):
+            capture["endpoint"] = endpoint
+            capture["arguments"] = arguments
+            return _Handler()
+
+        monkeypatch.setattr(image_tool, "_submit_fal_request", fake_submit)
+        monkeypatch.setattr(image_tool, "fal_key_is_configured", lambda: True)
+        monkeypatch.setattr(image_tool, "_resolve_managed_fal_gateway", lambda: None)
+
+    def test_text_to_image_uses_base_endpoint(self, cfg_home, monkeypatch):
+        import tools.image_generation_tool as image_tool
+
+        _write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
+        capture: dict = {}
+        self._patch_submit(monkeypatch, image_tool, capture)
+
+        raw = image_tool.image_generate_tool(prompt="a cat", aspect_ratio="square")
+        out = json.loads(raw)
+        assert out["success"] is True
+        assert out["modality"] == "text"
+        assert capture["endpoint"] == "fal-ai/nano-banana-pro"
+        assert "image_urls" not in capture["arguments"]
+
+    def test_image_to_image_routes_to_edit_endpoint(self, cfg_home, monkeypatch):
+        import tools.image_generation_tool as image_tool
+
+        _write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
+        capture: dict = {}
+        self._patch_submit(monkeypatch, image_tool, capture)
+
+        raw = image_tool.image_generate_tool(
+            prompt="make it night",
+            aspect_ratio="square",
+            image_url="https://in/src.png",
+        )
+        out = json.loads(raw)
+        assert out["success"] is True
+        assert out["modality"] == "image"
+        assert capture["endpoint"] == "fal-ai/nano-banana-pro/edit"
+        assert capture["arguments"]["image_urls"] == ["https://in/src.png"]
+
+    def test_reference_images_clamped_to_model_cap(self, cfg_home, monkeypatch):
+        import tools.image_generation_tool as image_tool
+
+        # nano-banana-pro caps at 2 reference images.
+        _write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
+        capture: dict = {}
+        self._patch_submit(monkeypatch, image_tool, capture)
+
+        raw = image_tool.image_generate_tool(
+            prompt="blend",
+            image_url="https://in/a.png",
+            reference_image_urls=["https://in/b.png", "https://in/c.png", "https://in/d.png"],
+        )
+        out = json.loads(raw)
+        assert out["success"] is True
+        assert capture["arguments"]["image_urls"] == ["https://in/a.png", "https://in/b.png"]
+
+    def test_text_only_model_rejects_image_url(self, cfg_home, monkeypatch):
+        import tools.image_generation_tool as image_tool
+
+        _write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/z-image/turbo"}})
+        capture: dict = {}
+        self._patch_submit(monkeypatch, image_tool, capture)
+
+        raw = image_tool.image_generate_tool(
+            prompt="edit this", image_url="https://in/src.png",
+        )
+        out = json.loads(raw)
+        assert out["success"] is False
+        assert "image-to-image" in out["error"]
+        # Must NOT have submitted anything.
+        assert capture == {}
+
+    def test_edit_skips_upscaler(self, cfg_home, monkeypatch):
+        import tools.image_generation_tool as image_tool
+
+        # flux-2-pro has upscale=True for text-to-image, but edits must skip it.
+        _write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/flux-2-pro"}})
+        capture: dict = {}
+        self._patch_submit(monkeypatch, image_tool, capture)
+        upscale_called = {"hit": False}
+        monkeypatch.setattr(
+            image_tool, "_upscale_image",
+            lambda *a, **k: upscale_called.__setitem__("hit", True) or None,
+        )
+
+        raw = image_tool.image_generate_tool(
+            prompt="tweak", image_url="https://in/src.png",
+        )
+        out = json.loads(raw)
+        assert out["success"] is True
+        assert out["modality"] == "image"
+        assert upscale_called["hit"] is False
+
+
+# ---------------------------------------------------------------------------
+# Plugin dispatch forwarding
+# ---------------------------------------------------------------------------
+
+
+class _EditCapableProvider(ImageGenProvider):
+    def __init__(self):
+        self.received: Dict[str, Any] = {}
+
+    @property
+    def name(self) -> str:
+        return "editcap"
+
+    def capabilities(self) -> Dict[str, Any]:
+        return {"modalities": ["text", "image"], "max_reference_images": 4}
+
+    def generate(self, prompt, aspect_ratio="landscape", *, image_url=None,
+                 reference_image_urls=None, **kwargs):
+        self.received = {
+            "prompt": prompt,
+            "aspect_ratio": aspect_ratio,
+            "image_url": image_url,
+            "reference_image_urls": reference_image_urls,
+        }
+        return {
+            "success": True, "image": "/tmp/out.png", "model": "editcap-1",
+            "prompt": prompt, "aspect_ratio": aspect_ratio,
+            "modality": "image" if image_url else "text", "provider": "editcap",
+        }
+
+
+class _LegacyProvider(ImageGenProvider):
+    """Provider whose generate() predates image_url (no **kwargs absorb)."""
+
+    @property
+    def name(self) -> str:
+        return "legacy"
+
+    def generate(self, prompt, aspect_ratio="landscape"):  # narrow signature
+        return {"success": True, "image": "/tmp/legacy.png", "provider": "legacy"}
+
+
+class TestPluginDispatchImageToImage:
+    def test_dispatch_forwards_image_url(self, cfg_home, monkeypatch):
+        import tools.image_generation_tool as image_tool
+        from hermes_cli import plugins as plugins_module
+        from agent import image_gen_registry as reg
+
+        provider = _EditCapableProvider()
+        reg.register_provider(provider)
+        monkeypatch.setattr(image_tool, "_read_configured_image_provider", lambda: "editcap")
+        monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
+        monkeypatch.setattr(reg, "get_provider", lambda n: provider if n == "editcap" else None)
+
+        raw = image_tool._dispatch_to_plugin_provider(
+            "make night", "square",
+            image_url="https://in/src.png",
+            reference_image_urls=["https://in/ref.png"],
+        )
+        out = json.loads(raw)
+        assert out["success"] is True
+        assert out["modality"] == "image"
+        assert provider.received["image_url"] == "https://in/src.png"
+        assert provider.received["reference_image_urls"] == ["https://in/ref.png"]
+
+    def test_dispatch_text_only_when_no_image(self, cfg_home, monkeypatch):
+        import tools.image_generation_tool as image_tool
+        from hermes_cli import plugins as plugins_module
+        from agent import image_gen_registry as reg
+
+        provider = _EditCapableProvider()
+        reg.register_provider(provider)
+        monkeypatch.setattr(image_tool, "_read_configured_image_provider", lambda: "editcap")
+        monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
+        monkeypatch.setattr(reg, "get_provider", lambda n: provider if n == "editcap" else None)
+
+        raw = image_tool._dispatch_to_plugin_provider("a dog", "landscape")
+        out = json.loads(raw)
+        assert out["success"] is True
+        assert provider.received["image_url"] is None
+        assert "reference_image_urls" not in provider.received or provider.received["reference_image_urls"] is None
+
+    def test_legacy_provider_edit_request_surfaces_clear_error(self, cfg_home, monkeypatch):
+        import tools.image_generation_tool as image_tool
+        from hermes_cli import plugins as plugins_module
+        from agent import image_gen_registry as reg
+
+        provider = _LegacyProvider()
+        reg.register_provider(provider)
+        monkeypatch.setattr(image_tool, "_read_configured_image_provider", lambda: "legacy")
+        monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
+        monkeypatch.setattr(reg, "get_provider", lambda n: provider if n == "legacy" else None)
+
+        raw = image_tool._dispatch_to_plugin_provider(
+            "edit it", "square", image_url="https://in/src.png",
+        )
+        out = json.loads(raw)
+        assert out["success"] is False
+        assert out["error_type"] == "modality_unsupported"
+
+
+# ---------------------------------------------------------------------------
+# Dynamic schema reflects active capabilities
+# ---------------------------------------------------------------------------
+
+
+class _PluginBothProvider(ImageGenProvider):
+    @property
+    def name(self) -> str:
+        return "both"
+
+    def is_available(self) -> bool:
+        return True
+
+    def default_model(self) -> Optional[str]:
+        return "both-v1"
+
+    def capabilities(self) -> Dict[str, Any]:
+        return {"modalities": ["text", "image"], "max_reference_images": 5}
+
+    def generate(self, prompt, aspect_ratio="landscape", *, image_url=None,
+                 reference_image_urls=None, **kwargs):
+        return {"success": True}
+
+
+class TestDynamicSchema:
+    def _no_discovery(self, monkeypatch):
+        import hermes_cli.plugins as plugins_module
+        monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda *a, **k: None)
+
+    def test_fal_edit_model_advertises_both(self, cfg_home, monkeypatch):
+        from tools.image_generation_tool import _build_dynamic_image_schema
+
+        _write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/nano-banana-pro"}})
+        desc = _build_dynamic_image_schema()["description"]
+        assert "text-to-image" in desc and "image-to-image" in desc
+        assert "routes automatically" in desc
+
+    def test_fal_text_only_model_warns(self, cfg_home, monkeypatch):
+        from tools.image_generation_tool import _build_dynamic_image_schema
+
+        _write_cfg(cfg_home, {"image_gen": {"model": "fal-ai/z-image/turbo"}})
+        desc = _build_dynamic_image_schema()["description"]
+        assert "text-to-image only" in desc
+        assert "NOT capable of image-to-image" in desc
+
+    def test_plugin_both_provider_advertises_refs(self, cfg_home, monkeypatch):
+        from tools.image_generation_tool import _build_dynamic_image_schema
+        from agent import image_gen_registry as reg
+
+        _write_cfg(cfg_home, {"image_gen": {"provider": "both"}})
+        reg.register_provider(_PluginBothProvider())
+        self._no_discovery(monkeypatch)
+
+        desc = _build_dynamic_image_schema()["description"]
+        assert "image-to-image / editing" in desc
+        assert "up to 5 reference image(s)" in desc
+
+    def test_builder_wired_into_registry(self):
+        from tools.registry import discover_builtin_tools, registry
+
+        discover_builtin_tools()
+        entry = registry._tools["image_generate"]
+        assert entry.dynamic_schema_overrides is not None
+        out = entry.dynamic_schema_overrides()
+        assert "description" in out
diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py
index d7eeb30d175..3213068ddd9 100644
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@@ -116,6 +116,14 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
             "output_format", "enable_safety_checker",
         },
         "upscale": False,
+        # Image-to-image / editing: FLUX.2 [klein] 9B edit endpoint takes
+        # `image_urls` (list). Natural-language edits, multi-ref.
+        "edit_endpoint": "fal-ai/flux-2/klein/9b/edit",
+        "edit_supports": {
+            "prompt", "image_urls", "num_inference_steps", "seed",
+            "output_format", "enable_safety_checker",
+        },
+        "max_reference_images": 9,
     },
     "fal-ai/flux-2-pro": {
         "display": "FLUX 2 Pro",
@@ -143,6 +151,14 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
             "safety_tolerance", "sync_mode", "seed",
         },
         "upscale": True,   # Backward-compat: current default behavior.
+        # Edit endpoint accepts up to 9 reference images.
+        "edit_endpoint": "fal-ai/flux-2-pro/edit",
+        "edit_supports": {
+            "prompt", "image_urls", "num_inference_steps", "guidance_scale",
+            "num_images", "output_format", "enable_safety_checker",
+            "safety_tolerance", "sync_mode", "seed",
+        },
+        "max_reference_images": 9,
     },
     "fal-ai/z-image/turbo": {
         "display": "Z-Image Turbo",
@@ -194,6 +210,15 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
             "enable_web_search", "limit_generations",
         },
         "upscale": False,
+        # Nano Banana Pro edit (Gemini 3 Pro Image): natural-language edits
+        # with up to 2 reference images via `image_urls`.
+        "edit_endpoint": "fal-ai/nano-banana-pro/edit",
+        "edit_supports": {
+            "prompt", "image_urls", "aspect_ratio", "num_images",
+            "output_format", "safety_tolerance", "seed", "sync_mode",
+            "resolution", "enable_web_search", "limit_generations",
+        },
+        "max_reference_images": 2,
     },
     "fal-ai/gpt-image-1.5": {
         "display": "GPT Image 1.5",
@@ -218,6 +243,13 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
             "background", "sync_mode",
         },
         "upscale": False,
+        # Edit endpoint: high-fidelity edits preserving composition/lighting.
+        "edit_endpoint": "fal-ai/gpt-image-1.5/edit",
+        "edit_supports": {
+            "prompt", "image_urls", "image_size", "quality", "num_images",
+            "output_format", "sync_mode",
+        },
+        "max_reference_images": 16,
     },
     "fal-ai/gpt-image-2": {
         "display": "GPT Image 2",
@@ -250,6 +282,15 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
             # through the shared FAL billing path.
         },
         "upscale": False,
+        # GPT Image 2 edit endpoint lives under the OpenAI namespace on FAL
+        # (NOT fal-ai/). Takes `image_urls` (list) + optional mask. We don't
+        # send `image_size` on edit so the model auto-infers from input.
+        "edit_endpoint": "openai/gpt-image-2/edit",
+        "edit_supports": {
+            "prompt", "image_urls", "quality", "num_images", "output_format",
+            "sync_mode", "mask_image_url",
+        },
+        "max_reference_images": 16,
     },
     "fal-ai/ideogram/v3": {
         "display": "Ideogram V3",
@@ -272,6 +313,13 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
             "style", "seed",
         },
         "upscale": False,
+        # Ideogram V3 edit endpoint takes `image_urls` (list).
+        "edit_endpoint": "fal-ai/ideogram/v3/edit",
+        "edit_supports": {
+            "prompt", "image_urls", "rendering_speed", "expand_prompt",
+            "style", "seed",
+        },
+        "max_reference_images": 1,
     },
     "fal-ai/recraft/v4/pro/text-to-image": {
         "display": "Recraft V4 Pro",
@@ -317,6 +365,14 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
             "num_images", "output_format", "acceleration", "seed", "sync_mode",
         },
         "upscale": False,
+        # Qwen edit uses the Qwen Image 2.0 Pro editing endpoint, which takes
+        # `image_urls` (list) + natural-language edit instructions.
+        "edit_endpoint": "fal-ai/qwen-image-2/pro/edit",
+        "edit_supports": {
+            "prompt", "image_urls", "num_inference_steps", "guidance_scale",
+            "num_images", "output_format", "acceleration", "seed", "sync_mode",
+        },
+        "max_reference_images": 3,
     },
     # Krea 2 — Krea's first foundation image model, day-0 partner launch on
     # fal (2026-05-27). Same model family as our direct ``plugins/image_gen/krea``
@@ -554,6 +610,55 @@ def _build_fal_payload(
     return {k: v for k, v in payload.items() if k in supports}
 
 
+def _build_fal_edit_payload(
+    model_id: str,
+    prompt: str,
+    image_urls: list,
+    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+    seed: Optional[int] = None,
+    overrides: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Build a FAL *edit* request payload (image-to-image) from unified inputs.
+
+    Every FAL edit endpoint takes ``image_urls`` (a list of source/reference
+    image URLs) plus the prompt. Size handling differs from text-to-image:
+    most edit endpoints auto-infer output dimensions from the input image, so
+    we only send ``image_size`` / ``aspect_ratio`` when the edit endpoint's
+    ``edit_supports`` whitelist accepts it. Keys outside ``edit_supports`` are
+    stripped before submission.
+    """
+    meta = FAL_MODELS[model_id]
+    edit_supports = meta.get("edit_supports") or set()
+    size_style = meta["size_style"]
+    sizes = meta["sizes"]
+
+    aspect = (aspect_ratio or DEFAULT_ASPECT_RATIO).lower().strip()
+    if aspect not in sizes:
+        aspect = DEFAULT_ASPECT_RATIO
+
+    payload: Dict[str, Any] = dict(meta.get("defaults", {}))
+    payload["prompt"] = (prompt or "").strip()
+    payload["image_urls"] = list(image_urls)
+
+    # Only express output size when the edit endpoint advertises the key.
+    # gpt-image-2 edit auto-infers size from the input, so `image_size` is
+    # intentionally absent from its edit_supports whitelist.
+    if size_style in {"image_size_preset", "gpt_literal"} and "image_size" in edit_supports:
+        payload["image_size"] = sizes[aspect]
+    elif size_style == "aspect_ratio" and "aspect_ratio" in edit_supports:
+        payload["aspect_ratio"] = sizes[aspect]
+
+    if seed is not None and isinstance(seed, int):
+        payload["seed"] = seed
+
+    if overrides:
+        for k, v in overrides.items():
+            if v is not None:
+                payload[k] = v
+
+    return {k: v for k, v in payload.items() if k in edit_supports}
+
+
 # ---------------------------------------------------------------------------
 # Upscaler
 # ---------------------------------------------------------------------------
@@ -729,19 +834,39 @@ def image_generate_tool(
     num_images: Optional[int] = None,
     output_format: Optional[str] = None,
     seed: Optional[int] = None,
+    image_url: Optional[str] = None,
+    reference_image_urls: Optional[list] = None,
 ) -> str:
-    """Generate an image from a text prompt using the configured FAL model.
+    """Generate an image from a text prompt, or edit a source image, via FAL.
 
-    The agent-facing schema exposes only ``prompt`` and ``aspect_ratio``; the
-    remaining kwargs are overrides for direct Python callers and are filtered
-    per-model via the ``supports`` whitelist (unsupported overrides are
-    silently dropped so legacy callers don't break when switching models).
+    Routing: when ``image_url`` (or ``reference_image_urls``) is provided AND
+    the configured model declares an ``edit_endpoint``, the call routes to that
+    image-to-image / edit endpoint; otherwise it's plain text-to-image.
+
+    The agent-facing schema exposes ``prompt``, ``aspect_ratio``, ``image_url``
+    and ``reference_image_urls``; the remaining kwargs are overrides for direct
+    Python callers and are filtered per-model via the ``supports`` /
+    ``edit_supports`` whitelist (unsupported overrides are silently dropped so
+    legacy callers don't break when switching models).
 
     Returns a JSON string with ``{"success": bool, "image": url | None,
-    "error": str, "error_type": str}``.
+    "modality": "text" | "image", "error": str, "error_type": str}``.
     """
     model_id, meta = _resolve_fal_model()
 
+    # Collect any source images (primary + references) into one ordered list.
+    source_images: list = []
+    if isinstance(image_url, str) and image_url.strip():
+        source_images.append(image_url.strip())
+    if isinstance(reference_image_urls, (list, tuple)):
+        for ref in reference_image_urls:
+            if isinstance(ref, str) and ref.strip():
+                source_images.append(ref.strip())
+
+    edit_endpoint = meta.get("edit_endpoint")
+    use_edit = bool(source_images) and bool(edit_endpoint)
+    modality = "image" if use_edit else "text"
+
     debug_call_data = {
         "model": model_id,
         "parameters": {
@@ -752,6 +877,8 @@ def image_generate_tool(
             "num_images": num_images,
             "output_format": output_format,
             "seed": seed,
+            "modality": modality,
+            "source_images": len(source_images),
         },
         "error": None,
         "success": False,
@@ -768,6 +895,17 @@ def image_generate_tool(
         if not (fal_key_is_configured() or _resolve_managed_fal_gateway()):
             raise ValueError(_build_no_backend_setup_message())
 
+        # If the caller supplied source images but the active model has no
+        # edit endpoint, fail with a clear, actionable message instead of
+        # silently dropping the images and producing an unrelated picture.
+        if source_images and not edit_endpoint:
+            raise ValueError(
+                f"Model '{meta.get('display', model_id)}' ({model_id}) is not "
+                f"capable of image-to-image / editing. Provide a text-only "
+                f"prompt (omit image_url), or switch to an edit-capable model "
+                f"via `hermes tools` → Image Generation."
+            )
+
         aspect_lc = (aspect_ratio or DEFAULT_ASPECT_RATIO).lower().strip()
         if aspect_lc not in VALID_ASPECT_RATIOS:
             logger.warning(
@@ -786,16 +924,31 @@ def image_generate_tool(
         if output_format is not None:
             overrides["output_format"] = output_format
 
-        arguments = _build_fal_payload(
-            model_id, prompt, aspect_lc, seed=seed, overrides=overrides,
-        )
+        if use_edit:
+            # Clamp reference count to the model's declared cap.
+            max_refs = int(meta.get("max_reference_images") or 1)
+            clamped_sources = source_images[:max_refs] if max_refs > 0 else source_images
+            arguments = _build_fal_edit_payload(
+                model_id, prompt, clamped_sources, aspect_lc,
+                seed=seed, overrides=overrides,
+            )
+            endpoint = edit_endpoint
+            logger.info(
+                "Editing image with %s (%s) — %d source image(s), prompt: %s",
+                meta.get("display", model_id), endpoint, len(clamped_sources),
+                prompt[:80],
+            )
+        else:
+            arguments = _build_fal_payload(
+                model_id, prompt, aspect_lc, seed=seed, overrides=overrides,
+            )
+            endpoint = model_id
+            logger.info(
+                "Generating image with %s (%s) — prompt: %s",
+                meta.get("display", model_id), model_id, prompt[:80],
+            )
 
-        logger.info(
-            "Generating image with %s (%s) — prompt: %s",
-            meta.get("display", model_id), model_id, prompt[:80],
-        )
-
-        handler = _submit_fal_request(model_id, arguments=arguments)
+        handler = _submit_fal_request(endpoint, arguments=arguments)
         result = handler.get()
 
         generation_time = (datetime.datetime.now() - start_time).total_seconds()
@@ -807,7 +960,9 @@ def image_generate_tool(
         if not images:
             raise ValueError("No images were generated")
 
-        should_upscale = bool(meta.get("upscale", False))
+        # Edit endpoints already return the final composition; the Clarity
+        # upscaler is a text-to-image quality pass, so skip it for edits.
+        should_upscale = bool(meta.get("upscale", False)) and not use_edit
 
         formatted_images = []
         for img in images:
@@ -834,13 +989,15 @@ def image_generate_tool(
 
         upscaled_count = sum(1 for img in formatted_images if img.get("upscaled"))
         logger.info(
-            "Generated %s image(s) in %.1fs (%s upscaled) via %s",
-            len(formatted_images), generation_time, upscaled_count, model_id,
+            "Generated %s image(s) in %.1fs (%s upscaled) via %s [%s]",
+            len(formatted_images), generation_time, upscaled_count, endpoint,
+            modality,
         )
 
         response_data = {
             "success": True,
             "image": formatted_images[0]["url"] if formatted_images else None,
+            "modality": modality,
         }
 
         debug_call_data["success"] = True
@@ -1001,22 +1158,34 @@ from tools.registry import registry, tool_error
 
 IMAGE_GENERATE_SCHEMA = {
     "name": "image_generate",
+    # Placeholder — the real description is rebuilt dynamically at
+    # get_tool_definitions() time so it reflects the active backend's actual
+    # capabilities (whether the selected model supports image-to-image /
+    # editing). See _build_dynamic_image_schema() below and the
+    # dynamic-tool-schemas skill.
     "description": (
-        "Generate high-quality images from text prompts. The underlying "
-        "backend (FAL, OpenAI, etc.) and model are user-configured and not "
-        "selectable by the agent. Returns either a URL or an absolute file "
-        "path in the `image` field; display it with markdown "
-        "![description](url-or-path) and the gateway will deliver it. When "
-        "the active terminal backend has a different filesystem, successful "
-        "local-file results may also include `agent_visible_image` for "
-        "follow-up terminal/file operations."
+        "Generate high-quality images from text prompts (text-to-image), or "
+        "edit / transform an existing image (image-to-image) when the active "
+        "model supports it. Pass `image_url` to edit that image; add "
+        "`reference_image_urls` for style/composition references; omit both "
+        "for text-to-image. The underlying backend (FAL, OpenAI, xAI, etc.) "
+        "and model are user-configured and not selectable by the agent. "
+        "Returns either a URL or an absolute file path in the `image` field; "
+        "display it with markdown ![description](url-or-path) and the gateway "
+        "will deliver it. When the active terminal backend has a different "
+        "filesystem, successful local-file results may also include "
+        "`agent_visible_image` for follow-up terminal/file operations."
     ),
     "parameters": {
         "type": "object",
         "properties": {
             "prompt": {
                 "type": "string",
-                "description": "The text prompt describing the desired image. Be detailed and descriptive.",
+                "description": (
+                    "The text prompt describing the desired image (text-to-"
+                    "image) or the edit to apply (image-to-image). Be detailed "
+                    "and descriptive."
+                ),
             },
             "aspect_ratio": {
                 "type": "string",
@@ -1024,6 +1193,28 @@ IMAGE_GENERATE_SCHEMA = {
                 "description": "The aspect ratio of the generated image. 'landscape' is 16:9 wide, 'portrait' is 16:9 tall, 'square' is 1:1.",
                 "default": DEFAULT_ASPECT_RATIO,
             },
+            "image_url": {
+                "type": "string",
+                "description": (
+                    "Optional source image to edit/transform (image-to-image). "
+                    "When provided, the active backend routes to its image "
+                    "editing endpoint; when omitted, it generates from text "
+                    "alone. Pass a public URL or an absolute local file path "
+                    "from the conversation. Only honored by models that "
+                    "support editing — the description above indicates whether "
+                    "the active model does."
+                ),
+            },
+            "reference_image_urls": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": (
+                    "Optional list of additional reference image URLs / paths "
+                    "(style, character, or composition references) to guide an "
+                    "image-to-image edit. Supported only by some models and "
+                    "capped per-model; the description above indicates the max."
+                ),
+            },
         },
         "required": ["prompt"],
     },
@@ -1069,7 +1260,12 @@ def _read_configured_image_provider():
     return None
 
 
-def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
+def _dispatch_to_plugin_provider(
+    prompt: str,
+    aspect_ratio: str,
+    image_url: Optional[str] = None,
+    reference_image_urls: Optional[list] = None,
+):
     """Route the call to a plugin-registered provider when one is selected.
 
     Returns a JSON string on dispatch, or ``None`` to fall through to the
@@ -1080,6 +1276,10 @@ def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
     ``plugins/image_gen/fal/`` plugin (the plugin re-enters this module's
     pipeline via ``_it`` indirection so behavior is identical to the
     direct call, just routed through the registry).
+
+    ``image_url`` / ``reference_image_urls`` enable image-to-image / editing:
+    they are forwarded to the provider's ``generate()`` so the backend can
+    route to its edit endpoint.
     """
     configured = _read_configured_image_provider()
     if not configured:
@@ -1122,11 +1322,53 @@ def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
             "error_type": "provider_not_registered",
         })
 
+    kwargs: Dict[str, Any] = {"prompt": prompt, "aspect_ratio": aspect_ratio}
     try:
-        kwargs = {"prompt": prompt, "aspect_ratio": aspect_ratio}
         if configured_model:
             kwargs["model"] = configured_model
+        if isinstance(image_url, str) and image_url.strip():
+            kwargs["image_url"] = image_url.strip()
+        norm_refs = None
+        if reference_image_urls is not None:
+            from agent.image_gen_provider import normalize_reference_images
+
+            norm_refs = normalize_reference_images(reference_image_urls)
+        if norm_refs:
+            kwargs["reference_image_urls"] = norm_refs
         result = provider.generate(**kwargs)
+    except TypeError as exc:
+        # A provider whose generate() signature predates image_url support
+        # (third-party plugin not yet updated) — retry without the new kwargs
+        # so text-to-image keeps working, but surface a clear note when the
+        # user actually asked for an edit.
+        if "image_url" in kwargs or "reference_image_urls" in kwargs:
+            logger.warning(
+                "image_gen provider '%s' rejected image-to-image kwargs "
+                "(signature too narrow): %s",
+                getattr(provider, "name", "?"), exc,
+            )
+            return json.dumps({
+                "success": False,
+                "image": None,
+                "error": (
+                    f"Provider '{getattr(provider, 'name', '?')}' does not "
+                    f"support image-to-image / editing (its generate() "
+                    f"signature is out of date with the image_generate schema). "
+                    f"Omit image_url for text-to-image, or pick a backend that "
+                    f"supports editing via `hermes tools` → Image Generation."
+                ),
+                "error_type": "modality_unsupported",
+            })
+        logger.warning(
+            "Image gen provider '%s' raised TypeError: %s",
+            getattr(provider, "name", "?"), exc,
+        )
+        return json.dumps({
+            "success": False,
+            "image": None,
+            "error": f"Provider '{getattr(provider, 'name', '?')}' error: {exc}",
+            "error_type": "provider_exception",
+        })
     except Exception as exc:
         logger.warning(
             "Image gen provider '%s' raised: %s",
@@ -1153,21 +1395,144 @@ def _handle_image_generate(args, **kw):
     if not prompt:
         return tool_error("prompt is required for image generation")
     aspect_ratio = args.get("aspect_ratio", DEFAULT_ASPECT_RATIO)
+    image_url = args.get("image_url")
+    reference_image_urls = args.get("reference_image_urls")
     task_id = kw.get("task_id")
 
     # Route to a plugin-registered provider if one is active (and it's
     # not the in-tree FAL path).
-    dispatched = _dispatch_to_plugin_provider(prompt, aspect_ratio)
+    dispatched = _dispatch_to_plugin_provider(
+        prompt, aspect_ratio,
+        image_url=image_url,
+        reference_image_urls=reference_image_urls,
+    )
     if dispatched is not None:
         return _postprocess_image_generate_result(dispatched, task_id=task_id)
 
     raw = image_generate_tool(
         prompt=prompt,
         aspect_ratio=aspect_ratio,
+        image_url=image_url,
+        reference_image_urls=reference_image_urls,
     )
     return _postprocess_image_generate_result(raw, task_id=task_id)
 
 
+# ---------------------------------------------------------------------------
+# Dynamic schema — reflect the active backend's image-to-image capability
+# ---------------------------------------------------------------------------
+#
+# Why dynamic: whether the active model supports image-to-image / editing
+# depends entirely on the user's configured backend + model. Telling the
+# model up front ("the active model is text-to-image only — image_url will be
+# rejected") saves a wasted turn. Memoized by config.yaml mtime in
+# model_tools.get_tool_definitions(), so it rebuilds when the user switches
+# model/provider via `hermes tools` or `/skills`.
+
+
+_GENERIC_IMAGE_DESCRIPTION = IMAGE_GENERATE_SCHEMA["description"]
+
+
+def _active_image_capabilities() -> Dict[str, Any]:
+    """Best-effort: return the active backend/model's image capabilities.
+
+    Resolution order mirrors the runtime dispatch:
+    1. If ``image_gen.provider`` is set, ask that plugin provider.
+    2. Otherwise inspect the in-tree FAL model catalog for the active model.
+
+    Returns a dict like ``{"modalities": [...], "max_reference_images": N,
+    "model": "...", "provider": "..."}``. Never raises.
+    """
+    info: Dict[str, Any] = {"modalities": ["text"], "max_reference_images": 0}
+
+    configured_provider = _read_configured_image_provider()
+    if configured_provider and configured_provider != "fal":
+        try:
+            from agent.image_gen_registry import get_provider
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            provider = get_provider(configured_provider)
+            if provider is not None:
+                caps = {}
+                try:
+                    caps = provider.capabilities() or {}
+                except Exception:  # noqa: BLE001
+                    caps = {}
+                info["provider"] = provider.display_name
+                info["model"] = _read_configured_image_model() or (provider.default_model() or "")
+                if caps.get("modalities"):
+                    info["modalities"] = list(caps["modalities"])
+                if caps.get("max_reference_images"):
+                    info["max_reference_images"] = int(caps["max_reference_images"])
+                return info
+        except Exception:  # noqa: BLE001
+            pass
+
+    # In-tree FAL path (provider unset or == "fal").
+    try:
+        model_id, meta = _resolve_fal_model()
+        info["provider"] = "FAL.ai"
+        info["model"] = meta.get("display", model_id)
+        if meta.get("edit_endpoint"):
+            info["modalities"] = ["text", "image"]
+            info["max_reference_images"] = int(meta.get("max_reference_images") or 1)
+        else:
+            info["modalities"] = ["text"]
+            info["max_reference_images"] = 0
+    except Exception:  # noqa: BLE001
+        pass
+
+    return info
+
+
+def _build_dynamic_image_schema() -> Dict[str, Any]:
+    """Build a description reflecting whether the active model supports editing."""
+    parts = [_GENERIC_IMAGE_DESCRIPTION]
+
+    try:
+        info = _active_image_capabilities()
+    except Exception:  # noqa: BLE001
+        return {"description": _GENERIC_IMAGE_DESCRIPTION}
+
+    provider = info.get("provider")
+    model = info.get("model")
+    modalities = set(info.get("modalities") or ["text"])
+
+    line = "\nActive backend"
+    if provider:
+        line += f": {provider}"
+    if model:
+        line += f" · model: {model}"
+    parts.append(line)
+
+    if "image" in modalities and "text" in modalities:
+        max_refs = info.get("max_reference_images") or 0
+        ref_note = (
+            f"; up to {max_refs} reference image(s) via reference_image_urls"
+            if max_refs and max_refs > 1
+            else ""
+        )
+        parts.append(
+            "- supports both text-to-image (omit image_url) and "
+            f"image-to-image / editing (pass image_url){ref_note} — "
+            "routes automatically"
+        )
+    elif "image" in modalities and "text" not in modalities:
+        parts.append(
+            "- this model is image-to-image / edit only — image_url is REQUIRED"
+        )
+    else:
+        parts.append(
+            "- this model is text-to-image only — it is NOT capable of "
+            "image-to-image / editing; do not pass image_url or "
+            "reference_image_urls (they will be rejected). Provide a "
+            "text-only prompt."
+        )
+
+    return {"description": "\n".join(parts)}
+
+
 registry.register(
     name="image_generate",
     toolset="image_gen",
@@ -1177,4 +1542,5 @@ registry.register(
     requires_env=[],
     is_async=False,   # sync fal_client API to avoid "Event loop is closed" in gateway
     emoji="🎨",
+    dynamic_schema_overrides=_build_dynamic_image_schema,
 )
diff --git a/website/docs/developer-guide/image-gen-provider-plugin.md b/website/docs/developer-guide/image-gen-provider-plugin.md
index c9823d1cedd..b746ce82229 100644
--- a/website/docs/developer-guide/image-gen-provider-plugin.md
+++ b/website/docs/developer-guide/image-gen-provider-plugin.md
@@ -47,6 +47,7 @@ from agent.image_gen_provider import (
     DEFAULT_ASPECT_RATIO,
     ImageGenProvider,
     error_response,
+    normalize_reference_images,
     resolve_aspect_ratio,
     save_b64_image,
     success_response,
@@ -112,10 +113,20 @@ class MyBackendImageGenProvider(ImageGenProvider):
             ],
         }
 
+    def capabilities(self) -> Dict[str, Any]:
+        # Declare whether this backend supports image-to-image / editing.
+        # The tool layer surfaces this in the dynamic schema so the model
+        # knows when `image_url` is honored. Default (if you omit this) is
+        # text-only: {"modalities": ["text"], "max_reference_images": 0}.
+        return {"modalities": ["text", "image"], "max_reference_images": 4}
+
     def generate(
         self,
         prompt: str,
         aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        *,
+        image_url: Optional[str] = None,
+        reference_image_urls: Optional[List[str]] = None,
         **kwargs: Any,
     ) -> Dict[str, Any]:
         prompt = (prompt or "").strip()
@@ -130,6 +141,15 @@ class MyBackendImageGenProvider(ImageGenProvider):
                 aspect_ratio=aspect_ratio,
             )
 
+        # Routing: if image_url (or reference_image_urls) is set, the call is
+        # an image-to-image / edit request; otherwise text-to-image. Report
+        # which path you took via the `modality` field of success_response.
+        sources = []
+        if image_url:
+            sources.append(image_url)
+        sources.extend(normalize_reference_images(reference_image_urls) or [])
+        modality = "image" if sources else "text"
+
         # Model selection precedence: env var → config → default. The helper
         # _resolve_model() in the built-in openai plugin is a good reference.
         model_id = kwargs.get("model") or self.default_model() or "my-model-fast"
@@ -137,11 +157,18 @@ class MyBackendImageGenProvider(ImageGenProvider):
         try:
             import my_backend_sdk
             client = my_backend_sdk.Client(api_key=os.environ["MY_BACKEND_API_KEY"])
-            result = client.generate(
-                prompt=prompt,
-                model=model_id,
-                aspect_ratio=aspect_ratio,
-            )
+            if modality == "image":
+                result = client.edit(
+                    prompt=prompt,
+                    model=model_id,
+                    image_urls=sources,
+                )
+            else:
+                result = client.generate(
+                    prompt=prompt,
+                    model=model_id,
+                    aspect_ratio=aspect_ratio,
+                )
 
             # Two shapes supported:
             #   - URL string: return it as `image`
@@ -162,6 +189,7 @@ class MyBackendImageGenProvider(ImageGenProvider):
                 prompt=prompt,
                 aspect_ratio=aspect_ratio,
                 provider=self.name,
+                modality=modality,
             )
         except Exception as exc:
             return error_response(
diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index 2393a9db7d1..1f6b86c0063 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -114,7 +114,7 @@ Scoped to the Feishu document-comment handler. Drives comment read/write operati
 
 | Tool | Description | Requires environment |
 |------|-------------|----------------------|
-| `image_generate` | Generate high-quality images from text prompts using FAL.ai. The underlying model is user-configured (default: FLUX 2 Klein 9B, sub-1s generation) and is not selectable by the agent. Returns a single image URL. Display it using… | FAL_KEY |
+| `image_generate` | Generate images from text prompts (text-to-image) or edit/transform an existing image (image-to-image) via the user-configured backend (FAL.ai, OpenAI, xAI, Krea). Pass `image_url` to edit an image and `reference_image_urls` for style references; omit both for text-to-image. The model is user-configured and not selectable by the agent. Returns a single image URL or local path. | FAL_KEY / OPENAI_API_KEY / xAI OAuth / KREA_API_KEY |
 
 ## `kanban` toolset
 
diff --git a/website/docs/user-guide/features/image-generation.md b/website/docs/user-guide/features/image-generation.md
index 4f225ee00b1..62dfe7bd127 100644
--- a/website/docs/user-guide/features/image-generation.md
+++ b/website/docs/user-guide/features/image-generation.md
@@ -86,6 +86,46 @@ Create a square portrait of a wise old owl — use the typography model
 Make me a futuristic cityscape, landscape orientation
 ```
 
+## Image-to-Image / Editing
+
+The same `image_generate` tool also **edits existing images** when the active
+model supports it — pass a source image and the backend routes to its editing
+endpoint automatically (mirrors how `video_generate` handles image-to-video).
+Omit the source image and it's plain text-to-image.
+
+```
+Take this photo and make it a rainy Tokyo street at night → <image>
+```
+
+```
+Blend these two product shots into one hero image → <image1> <image2>
+```
+
+Two inputs drive the edit:
+
+- **`image_url`** — the primary source image to edit/transform (public URL or local path).
+- **`reference_image_urls`** — additional style/composition references (capped per-model).
+
+### Which backends support editing
+
+| Backend | Image-to-image | Reference cap | How |
+|---|---|---|---|
+| **FAL.ai** (edit-capable models below) | ✓ | up to 9 | routes to the model's `/edit` endpoint |
+| **OpenAI** (`gpt-image-2`) | ✓ | up to 16 | `images.edit()` |
+| **xAI** (Grok Imagine) | ✓ | 1 | `/v1/images/edits` (`grok-imagine-image-quality`) |
+| **Krea** (`Krea 2`) | ✓ | up to 10 | reference-guided generation (`image_style_references`) |
+| **OpenAI (Codex auth)** | ✗ | — | text-to-image only |
+
+FAL models with an editing endpoint: `flux-2/klein/9b`, `flux-2-pro`,
+`nano-banana-pro`, `gpt-image-1.5`, `gpt-image-2`, `ideogram/v3`, and
+`qwen-image`. Pure text-to-image FAL models (`z-image/turbo`, `recraft`,
+`krea/*`) reject image inputs with a clear error pointing you at an
+edit-capable model.
+
+The active model's editing capability is surfaced in the tool description at
+runtime, so the agent knows whether `image_url` will be honored before it
+calls the tool.
+
 ## Aspect Ratios
 
 Every model accepts the same three aspect ratios from the agent's perspective. Internally, each model's native size spec is filled in automatically:
@@ -152,7 +192,7 @@ Debug logs go to `./logs/image_tools_debug_<session_id>.json` with per-call deta
 
 ## Limitations
 
-- **Requires FAL credentials** (direct `FAL_KEY` or Nous Subscription)
-- **Text-to-image only** — no inpainting, img2img, or editing via this tool
-- **Temporary URLs** — FAL returns hosted URLs that expire after hours/days; save locally if needed
-- **Per-model constraints** — some models don't support `seed`, `num_inference_steps`, etc. The `supports` filter silently drops unsupported params; this is expected behavior
+- **Requires credentials** for the active backend (FAL `FAL_KEY` / Nous Subscription, `OPENAI_API_KEY`, xAI OAuth, `KREA_API_KEY`)
+- **Editing is model-dependent** — image-to-image works only on edit-capable models (see the table above); text-to-image-only models reject image inputs with a clear error
+- **Temporary URLs** — backends return hosted URLs that expire after hours/days; Hermes materializes them to the local cache so delivery still works after expiry
+- **Per-model constraints** — some models don't support `seed`, `num_inference_steps`, etc. The `supports` / `edit_supports` filter silently drops unsupported params; this is expected behavior