feat(xai): Imagine public-URL storage, chaining & video edit/extend

Add durable public-URL output and URL-based chaining to xAI Grok Imagine: - Store generated media on files-cdn with permanent public HTTPS URLs (public_url: true, no expiry by default). - Chain by URL: generate -> edit -> extend each take a prior result's public HTTPS URL (or a data URI / local file for inputs). - Add provider-specific xai_video_edit and xai_video_extend tools. - Image generation: public-URL/storage output, multi-reference edits, and ~/ local-path support for image edits. Credentials use xAI Grok device-code OAuth (separate PR).
2026-07-02 12:13:05 +00:00 · 2026-06-29 18:18:30 +00:00 · 2026-06-29 18:18:30 +00:00 · 9ce79cd642
commit 9ce79cd642
parent 184c10cf97
15 changed files with 1694 additions and 294 deletions
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@ -66,7 +66,7 @@ CONFIGURABLE_TOOLSETS = [
    ("vision",          "👁️  Vision / Image Analysis",  "vision_analyze"),
    ("video",           "🎬 Video Analysis",            "video_analyze (requires video-capable model)"),
    ("image_gen",       "🎨 Image Generation",          "image_generate"),
-    ("video_gen",       "🎬 Video Generation",          "video_generate (text-to-video + image-to-video)"),
+    ("video_gen",       "🎬 Video Generation",          "video_generate (text/image/reference)"),
    ("x_search",        "🐦 X (Twitter) Search",        "x_search (requires xAI OAuth or XAI_API_KEY)"),
    ("tts",             "🔊 Text-to-Speech",            "text_to_speech"),
    ("skills",          "📚 Skills",                    "list, view, manage"),
@ -2785,6 +2785,49 @@ def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None
    _print_success(f"  Model set to: {chosen}")


+def _configure_xai_imagine_storage(section_name: str, config: dict) -> None:
+    """Prompt for xAI Imagine stored public URL behavior."""
+    section = config.setdefault(section_name, {})
+    if not isinstance(section, dict):
+        section = {}
+        config[section_name] = section
+    xai_cfg = section.setdefault("xai", {})
+    if not isinstance(xai_cfg, dict):
+        xai_cfg = {}
+        section["xai"] = xai_cfg
+    storage_cfg = xai_cfg.setdefault("storage", {})
+    if not isinstance(storage_cfg, dict):
+        storage_cfg = {}
+        xai_cfg["storage"] = storage_cfg
+
+    _print_warning(
+        "  xAI Imagine can store generated media and create reusable public URLs. "
+        "xAI may bill for stored files and public URL hosting."
+    )
+    idx = _prompt_choice(
+        "  Stored public URLs:",
+        [
+            "Enable public URLs without automatic expiry (recommended)",
+            "Disable stored public URLs",
+            "Enable public URLs for 2 days",
+        ],
+        default=0,
+    )
+    if idx == 1:
+        storage_cfg["enabled"] = False
+        _print_success("  xAI stored public URLs disabled")
+    elif idx == 2:
+        storage_cfg["enabled"] = True
+        storage_cfg["public_url"] = True
+        storage_cfg["expires_after"] = 2 * 24 * 60 * 60
+        _print_success("  xAI stored public URLs enabled for 2 days")
+    else:
+        storage_cfg["enabled"] = True
+        storage_cfg["public_url"] = True
+        storage_cfg["expires_after"] = None
+        _print_success("  xAI stored public URLs enabled without automatic expiry")
+
+
 def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None:
    """Persist a plugin-backed image generation provider selection."""
    img_cfg = config.setdefault("image_gen", {})
@ -2795,6 +2838,8 @@ def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None:
    img_cfg["use_gateway"] = False
    _print_success(f"  image_gen.provider set to: {plugin_name}")
    _configure_imagegen_model_for_plugin(plugin_name, config)
+    if plugin_name == "xai":
+        _configure_xai_imagine_storage("image_gen", config)


 # ─── Video Generation Model Pickers ───────────────────────────────────────────
@ -2895,6 +2940,8 @@ def _select_plugin_video_gen_provider(plugin_name: str, config: dict, *, use_gat
    vid_cfg["use_gateway"] = use_gateway
    _print_success(f"  video_gen.provider set to: {plugin_name}")
    _configure_videogen_model_for_plugin(plugin_name, config)
+    if plugin_name == "xai":
+        _configure_xai_imagine_storage("video_gen", config)


 def _write_provider_config(provider: dict, config: dict, *, managed_feature) -> None:
--- a/plugins/image_gen/xai/init.py
+++ b/plugins/image_gen/xai/init.py
@ -19,6 +19,7 @@ from __future__ import annotations

 import logging
 import os
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple

 import requests
@ -33,7 +34,14 @@ from agent.image_gen_provider import (
    save_url_image,
    success_response,
 )
-from tools.xai_http import hermes_xai_user_agent, resolve_xai_http_credentials
+from tools.xai_http import (
+    build_xai_storage_options,
+    hermes_xai_user_agent,
+    maybe_mark_xai_storage_notice_seen,
+    read_xai_imagine_storage_config,
+    resolve_xai_http_credentials,
+    xai_storage_notice_text,
+)

 logger = logging.getLogger(__name__)

@ -118,10 +126,8 @@ def _resolve_resolution() -> str:
 def _xai_image_field(source: str) -> Dict[str, str]:
    """Build the xAI ``image`` field for an edit request.

-    xAI's ``/v1/images/edits`` accepts ``{"url": <ref>, "type": "image_url"}``
-    where ``<ref>`` is a public URL or a base64 data URI. Public URLs and
-    existing data URIs pass through unchanged; local file paths are read and
-    encoded into a ``data:`` URI.
+    xAI's ``/v1/images/edits`` accepts a public HTTPS URL or a base64 data URI.
+    Local file paths are read and encoded into a ``data:`` URI.
    """
    source = source.strip()
    lower = source.lower()
@ -131,7 +137,7 @@ def _xai_image_field(source: str) -> Dict[str, str]:
    import base64
    import os as _os

-    with open(source, "rb") as fh:
+    with open(_os.path.expanduser(source), "rb") as fh:
        raw = fh.read()
    ext = (_os.path.splitext(source)[1].lstrip(".") or "png").lower()
    if ext == "jpg":
@ -176,19 +182,29 @@ class XAIImageGenProvider(ImageGenProvider):
        # hook (``hermes_cli/tools_config.py``); identical to the TTS / video
        # gen entries so users see the same OAuth-or-API-key choice for every
        # xAI service.
+        storage_notice = xai_storage_notice_text("image_gen")
+        tag = (
+            "grok-imagine-image - text-to-image & image editing; uses xAI "
+            "Grok OAuth or XAI_API_KEY"
+        )
+        if storage_notice:
+            tag += f". {storage_notice}"
        return {
            "name": "xAI Grok Imagine (image)",
            "badge": "paid",
-            "tag": "grok-imagine-image — text-to-image & image editing; uses xAI Grok OAuth or XAI_API_KEY",
+            "tag": tag,
            "env_vars": [],
            "post_setup": "xai_grok",
        }

    def capabilities(self) -> Dict[str, Any]:
        # xAI's /v1/images/edits supports image editing via grok-imagine-image
-        # -quality. Single primary source image (multi-image editing exists as
-        # a separate capability but we keep the primary edit surface here).
-        return {"modalities": ["text", "image"], "max_reference_images": 1}
+        # -quality, including up to 3 total source images.
+        return {
+            "modalities": ["text", "image"],
+            "max_reference_images": 2,
+            "max_source_images": 3,
+        }

    def generate(
        self,
@ -224,16 +240,39 @@ class XAIImageGenProvider(ImageGenProvider):
        resolution = _resolve_resolution()
        xai_res = resolution if resolution in _XAI_RESOLUTIONS else DEFAULT_RESOLUTION

-        # Pick the primary source image: explicit image_url wins, else the
-        # first reference image.
-        source_image = None
+        source_images: List[str] = []
        if isinstance(image_url, str) and image_url.strip():
-            source_image = image_url.strip()
-        else:
-            refs = normalize_reference_images(reference_image_urls)
-            if refs:
-                source_image = refs[0]
-        is_edit = bool(source_image)
+            source_images.append(image_url.strip())
+        refs = normalize_reference_images(reference_image_urls)
+        if refs:
+            source_images.extend(refs)
+        if len(source_images) > 3:
+            return error_response(
+                error="xAI image editing supports at most 3 source images",
+                error_type="too_many_references",
+                provider=provider_name,
+                model="grok-imagine-image-quality",
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+        for index, source in enumerate(source_images):
+            field = "image_url" if index == 0 and image_url and image_url.strip() == source else "reference_image_urls"
+            lower = source.lower()
+            if not lower.startswith(("http://", "https://", "data:")):
+                path = Path(source).expanduser()
+                if not path.is_file():
+                    return error_response(
+                        error=(
+                            f"{field} must be a public HTTPS URL or data URI "
+                            "(e.g. the `image`/`public_url` from a prior Imagine result)"
+                        ),
+                        error_type="invalid_image_url",
+                        provider=provider_name,
+                        model="grok-imagine-image-quality",
+                        prompt=prompt,
+                        aspect_ratio=aspect,
+                    )
+        is_edit = bool(source_images)
        modality = "image" if is_edit else "text"

        headers = {
@ -243,6 +282,13 @@ class XAIImageGenProvider(ImageGenProvider):
        }

        base_url = str(creds.get("base_url") or "https://api.x.ai/v1").strip().rstrip("/")
+        storage_options = build_xai_storage_options(
+            "image_gen",
+            filename_prefix="hermes-xai-image",
+            extension="png",
+        )
+        storage_notice = maybe_mark_xai_storage_notice_seen("image_gen")
+        storage_cfg = read_xai_imagine_storage_config("image_gen")

        if is_edit:
            # Editing requires the quality model per xAI docs. The source
@ -250,7 +296,7 @@ class XAIImageGenProvider(ImageGenProvider):
            # are converted to a data URI here.
            edit_model = "grok-imagine-image-quality"
            try:
-                image_field = _xai_image_field(source_image)
+                image_fields = [_xai_image_field(source) for source in source_images]
            except Exception as exc:
                return error_response(
                    error=f"Could not load source image for editing: {exc}",
@ -263,8 +309,11 @@ class XAIImageGenProvider(ImageGenProvider):
            payload: Dict[str, Any] = {
                "model": edit_model,
                "prompt": prompt,
-                "image": image_field,
            }
+            if len(image_fields) == 1:
+                payload["image"] = image_fields[0]
+            else:
+                payload["images"] = image_fields
            endpoint_url = f"{base_url}/images/edits"
            model_id = edit_model
        else:
@ -275,6 +324,8 @@ class XAIImageGenProvider(ImageGenProvider):
                "resolution": xai_res,
            }
            endpoint_url = f"{base_url}/images/generations"
+        if storage_options is not None:
+            payload["storage_options"] = storage_options

        try:
            response = requests.post(
@ -331,7 +382,8 @@ class XAIImageGenProvider(ImageGenProvider):
                aspect_ratio=aspect,
            )

-        # Parse response — xAI returns data[0].b64_json or data[0].url
+        # Parse response - xAI returns data[0].b64_json, data[0].url, and
+        # optionally data[0].file_output when storage_options were requested.
        data = result.get("data", [])
        if not data:
            return error_response(
@ -346,8 +398,13 @@ class XAIImageGenProvider(ImageGenProvider):
        first = data[0]
        b64 = first.get("b64_json")
        url = first.get("url")
+        file_output = first.get("file_output") if isinstance(first, dict) else None
+        file_output = file_output if isinstance(file_output, dict) else {}
+        public_url = file_output.get("public_url") if isinstance(file_output.get("public_url"), str) else None

-        if b64:
+        if public_url:
+            image_ref = public_url
+        elif b64:
            try:
                saved_path = save_b64_image(b64, prefix=f"xai_{model_id}")
            except Exception as exc:
@ -389,9 +446,27 @@ class XAIImageGenProvider(ImageGenProvider):
                aspect_ratio=aspect,
            )

-        extra: Dict[str, Any] = {}
+        extra: Dict[str, Any] = {
+            "storage_enabled": bool(storage_cfg["enabled"]),
+        }
        if not is_edit:
            extra["resolution"] = xai_res
+        if storage_notice:
+            extra["storage_notice"] = storage_notice
+        if public_url:
+            extra["public_url"] = public_url
+        if file_output:
+            for key in (
+                "filename",
+                "expires_at",
+                "public_url_expires_at",
+                "public_url_error",
+                "storage_error",
+            ):
+                if key in file_output:
+                    extra[key] = file_output[key]
+        if result.get("usage"):
+            extra["usage"] = result["usage"]

        return success_response(
            image=image_ref,
--- a/plugins/video_gen/xai/init.py
+++ b/plugins/video_gen/xai/init.py
@ -1,10 +1,7 @@
 """xAI Grok-Imagine video generation backend.

-Surface: text-to-video and image-to-video (animate an input image)
-through xAI's ``/videos/generations`` endpoint. Edit and extend are not
-exposed in this unified surface — xAI is the only backend that supports
-them and the inconsistency would force per-backend prose in the agent's
-tool description.
+Surface: text-to-video, image-to-video, and reference-to-video through the
+unified video provider. xAI edit/extend are exposed through separate tools.

 Originally salvaged from PR #10600 by @Jaaneek; reshaped into the
 :class:`VideoGenProvider` plugin interface and trimmed to the
@ -14,8 +11,9 @@ Authentication: xAI Grok OAuth tokens (preferred — billed against the
 user's SuperGrok or X Premium+ subscription) or ``XAI_API_KEY``. Both routes are
 resolved through ``tools.xai_http.resolve_xai_http_credentials`` so a
 single login covers chat + TTS + image gen + video gen + transcription.
-Output is an HTTPS URL from xAI's CDN; the gateway downloads and
-delivers it.
+When xAI storage is enabled, the primary ``video`` / ``public_url`` fields are the
+stored files-cdn HTTPS link. Pass that public MP4 URL as ``video_url`` for
+edit/extend; it is sent to xAI as ``video.url``.
 """

 from __future__ import annotations
@ -46,13 +44,14 @@ logger = logging.getLogger(__name__)

 DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"
 DEFAULT_TEXT_TO_VIDEO_MODEL = "grok-imagine-video"
-DEFAULT_IMAGE_TO_VIDEO_MODEL = "grok-imagine-video-1.5-preview"
+DEFAULT_IMAGE_TO_VIDEO_MODEL = "grok-imagine-video-1.5"
 DEFAULT_MODEL = DEFAULT_TEXT_TO_VIDEO_MODEL
 DEFAULT_DURATION = 8
 DEFAULT_ASPECT_RATIO = "16:9"
 DEFAULT_RESOLUTION = "720p"
 DEFAULT_TIMEOUT_SECONDS = 240
 DEFAULT_POLL_INTERVAL_SECONDS = 5
+DEFAULT_EXTEND_DURATION = 6

 VALID_ASPECT_RATIOS = {"1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"}
 VALID_RESOLUTIONS = {"480p", "720p"}
@ -67,16 +66,20 @@ _MODELS: Dict[str, Dict[str, Any]] = {
        "price": "see https://docs.x.ai/developers/models/grok-imagine-video",
        "modalities": ["text", "image"],
    },
-    "grok-imagine-video-1.5-preview": {
-        "display": "Grok Imagine Video 1.5 Preview",
+    "grok-imagine-video-1.5": {
+        "display": "Grok Imagine Video 1.5",
        "speed": "~60-240s",
        "strengths": "Latest xAI image-to-video model.",
-        "price": "see https://docs.x.ai/developers/models/grok-imagine-video-1.5-preview",
+        "price": "see https://docs.x.ai/developers/pricing",
        "modalities": ["image"],
-        "aliases": ["grok-imagine-video-1.5-2026-05-30"],
    },
 }

+_IMAGE_TO_VIDEO_COMPAT_MODEL_IDS = {
+    "grok-imagine-video-1.5-preview",
+    "grok-imagine-video-1.5-2026-05-30",
+}
+

 # ---------------------------------------------------------------------------
 # HTTP helpers
@ -145,21 +148,114 @@ def _image_ref_to_xai_url(value: str) -> str:
    return f"data:{mime};base64,{encoded}"


-def _normalize_reference_images(reference_image_urls: Optional[List[str]]):
-    refs = []
+def _image_ref_to_xai_input(value: str) -> Optional[Dict[str, str]]:
+    ref = _image_ref_to_xai_url(value)
+    if not ref:
+        return None
+    lower = ref.lower()
+    if lower.startswith(("http://", "https://", "data:image/")):
+        return {"url": ref}
+    return None
+
+
+def _xai_video_output_urls(
+    video: Dict[str, Any],
+) -> Tuple[str, Optional[str], Optional[str]]:
+    """Return ``(public_video_url, temporary_url, stored_public_url)``.
+
+    ``public_video_url`` is the stored files-cdn HTTPS MP4 (``public_url``) when
+    storage is enabled; otherwise xAI's temporary ``video.url``. Pass this value
+    as ``video_url`` for edit/extend chaining.
+    """
+    file_output = video.get("file_output") if isinstance(video.get("file_output"), dict) else {}
+    file_output = file_output or {}
+    stored_public = file_output.get("public_url")
+    stored_public = stored_public.strip() if isinstance(stored_public, str) else None
+    temporary = video.get("url")
+    temporary = temporary.strip() if isinstance(temporary, str) else None
+    public_video_url = stored_public or temporary or ""
+    temporary_out = (
+        temporary
+        if temporary and stored_public and temporary != stored_public
+        else None
+    )
+    return public_video_url, temporary_out, stored_public
+
+
+def _video_ref_to_xai_url(value: str) -> str:
+    """Return a URL/data URI accepted by xAI for video inputs."""
+    ref = (value or "").strip()
+    if not ref:
+        return ""
+    lower = ref.lower()
+    if lower.startswith(("http://", "https://", "data:video/")):
+        return ref
+
+    path = Path(ref).expanduser()
+    if not path.is_file():
+        return ref
+
+    mime = mimetypes.guess_type(path.name)[0] or "video/mp4"
+    if not mime.startswith("video/"):
+        return ref
+
+    encoded = base64.b64encode(path.read_bytes()).decode("ascii")
+    return f"data:{mime};base64,{encoded}"
+
+
+async def _video_input_from_public_url(
+    value: str,
+    *,
+    api_key: str,
+    base_url: str,
+) -> Optional[Dict[str, str]]:
+    """Build xAI ``video`` input using a public HTTPS URL (``url`` field only)."""
+    ref = (value or "").strip()
+    if not ref:
+        return None
+
+    path = Path(ref).expanduser()
+    if path.is_file():
+        data_ref = _video_ref_to_xai_url(ref)
+        return {"url": data_ref} if data_ref else None
+
+    lower = ref.lower()
+    if not lower.startswith(("http://", "https://")):
+        return None
+
+    return {"url": ref}
+
+
+def _normalize_reference_images(
+    reference_image_urls: Optional[List[str]],
+) -> Tuple[Optional[List[Dict[str, str]]], Optional[str]]:
+    refs: List[Dict[str, str]] = []
    for url in reference_image_urls or []:
-        normalized = _image_ref_to_xai_url(url)
-        if normalized:
-            refs.append({"url": normalized})
-    return refs or None
+        cleaned = (url or "").strip()
+        if not cleaned:
+            continue
+        normalized = _image_ref_to_xai_input(cleaned)
+        if not normalized:
+            return None, (
+                "reference_image_urls must be public HTTPS URLs or data URIs "
+                "(e.g. the `image`/`public_url` from a prior Imagine result)"
+            )
+        refs.append(normalized)
+    return (refs if refs else None), None


-def _clamp_duration(duration: Optional[int], has_reference_images: bool) -> int:
-    value = duration if duration is not None else DEFAULT_DURATION
+def _clamp_duration(
+    duration: Optional[int],
+    *,
+    has_reference_images: bool = False,
+    max_seconds: int = 15,
+    default: int = DEFAULT_DURATION,
+) -> int:
+    value = duration if duration is not None else default
    if value < 1:
        value = 1
-    if value > 15:
-        value = 15
+    if value > max_seconds:
+        value = max_seconds
    if has_reference_images and value > 10:
        value = 10
    return value
@ -173,7 +269,7 @@ def _resolve_model_for_modality(
 ) -> str:
    """Select xAI's text/video model without treating config as a prompt override.

-    ``grok-imagine-video-1.5-preview`` currently rejects text-only video
+    ``grok-imagine-video-1.5`` currently rejects text-only video
    generation, but it is the desired image-to-video backend. Explicit tool
    ``model=`` still wins for users who intentionally request another model.
    """
@ -182,7 +278,7 @@ def _resolve_model_for_modality(
        return requested
    if modality == "image":
        return DEFAULT_IMAGE_TO_VIDEO_MODEL
-    if requested == DEFAULT_IMAGE_TO_VIDEO_MODEL:
+    if requested == DEFAULT_IMAGE_TO_VIDEO_MODEL or requested in _IMAGE_TO_VIDEO_COMPAT_MODEL_IDS:
        return DEFAULT_TEXT_TO_VIDEO_MODEL
    return requested or DEFAULT_TEXT_TO_VIDEO_MODEL

@ -193,11 +289,11 @@ async def _submit(
    *,
    api_key: str,
    base_url: str,
+    endpoint: str = "generations",
 ) -> str:
-    """POST to /videos/generations — xAI's only public endpoint for our
-    text-to-video and image-to-video surface."""
+    """POST to one of xAI's async video endpoints and return request_id."""
    response = await client.post(
-        f"{base_url}/videos/generations",
+        f"{base_url}/videos/{endpoint}",
        headers={**_xai_headers(api_key), "x-idempotency-key": str(uuid.uuid4())},
        json=payload,
        timeout=60,
@ -248,7 +344,7 @@ async def _poll(


 class XAIVideoGenProvider(VideoGenProvider):
-    """xAI Grok Imagine video backend (text-to-video + image-to-video)."""
+    """xAI Grok Imagine video backend."""

    @property
    def name(self) -> str:
@ -275,10 +371,25 @@ class XAIVideoGenProvider(VideoGenProvider):
        # Grok OAuth (SuperGrok / Premium+) — TTS / image gen / video gen
        # all share the same credential resolver. The hook offers an
        # OAuth-vs-API-key choice when neither is configured.
+        try:
+            from tools.xai_http import xai_storage_notice_text
+
+            storage_notice = xai_storage_notice_text("video_gen")
+        except Exception:
+            storage_notice = ""
+        tag = (
+            "grok-imagine-video for text/reference; "
+            "grok-imagine-video-1.5 for image-to-video; "
+            "edit/extend: pass the stored public HTTPS MP4 (`video` / "
+            "`public_url` from a prior Imagine result); uses xAI Grok OAuth "
+            "or XAI_API_KEY"
+        )
+        if storage_notice:
+            tag += f". {storage_notice}"
        return {
            "name": "xAI Grok Imagine",
            "badge": "paid",
-            "tag": "grok-imagine-video for text-to-video; grok-imagine-video-1.5-preview for image-to-video; uses xAI Grok OAuth or XAI_API_KEY",
+            "tag": tag,
            "env_vars": [],
            "post_setup": "xai_grok",
        }
@ -310,189 +421,479 @@ class XAIVideoGenProvider(VideoGenProvider):
        seed: Optional[int] = None,
        **kwargs: Any,
    ) -> Dict[str, Any]:
-        try:
-            loop = asyncio.new_event_loop()
-            try:
-                return loop.run_until_complete(self._generate_async(
-                    prompt=prompt,
-                    model=model,
-                    explicit_model=bool(kwargs.get("_model_override_explicit")),
-                    image_url=image_url,
-                    reference_image_urls=reference_image_urls,
-                    duration=duration,
-                    aspect_ratio=aspect_ratio,
-                    resolution=resolution,
-                ))
-            finally:
-                loop.close()
-        except Exception as exc:
-            logger.warning("xAI video gen unexpected failure: %s", exc, exc_info=True)
-            return error_response(
-                error=f"xAI video generation failed: {exc}",
-                error_type="api_error",
-                provider="xai",
-                model=model or DEFAULT_MODEL,
-                prompt=prompt,
-                aspect_ratio=aspect_ratio,
-            )
-
-    async def _generate_async(
-        self,
-        *,
-        prompt: str,
-        model: Optional[str],
-        explicit_model: bool,
-        image_url: Optional[str],
-        reference_image_urls: Optional[List[str]],
-        duration: Optional[int],
-        aspect_ratio: str,
-        resolution: str,
-    ) -> Dict[str, Any]:
-        api_key, base_url = _resolve_xai_credentials()
-        if not api_key:
-            return error_response(
-                error=(
-                    "No xAI credentials found. Sign in via `hermes auth add xai-oauth` "
-                    "(SuperGrok / Premium+) or set XAI_API_KEY from "
-                    "https://console.x.ai/."
-                ),
-                error_type="auth_required",
-                provider="xai", prompt=prompt,
-            )
-
-        prompt = (prompt or "").strip()
-        image_url_norm = _image_ref_to_xai_url(image_url or "") or None
-        normalized_aspect_ratio = (aspect_ratio or DEFAULT_ASPECT_RATIO).strip()
-        normalized_resolution = (resolution or DEFAULT_RESOLUTION).strip().lower()
-        modality_used = "image" if image_url_norm else "text"
-        resolved_model = _resolve_model_for_modality(
-            model,
-            modality=modality_used,
-            explicit_model=explicit_model,
+        return run_xai_video_generation(
+            prompt=prompt,
+            model=model,
+            explicit_model=bool(kwargs.get("_model_override_explicit")),
+            image_url=image_url,
+            reference_image_urls=reference_image_urls,
+            duration=duration,
+            aspect_ratio=aspect_ratio,
+            resolution=resolution,
        )

-        if not prompt:
+
+def has_xai_video_credentials() -> bool:
+    api_key, _ = _resolve_xai_credentials()
+    return bool(api_key)
+
+
+def run_xai_video_generation(
+    *,
+    prompt: str,
+    model: Optional[str],
+    explicit_model: bool,
+    image_url: Optional[str],
+    reference_image_urls: Optional[List[str]],
+    duration: Optional[int],
+    aspect_ratio: str,
+    resolution: str,
+) -> Dict[str, Any]:
+    return _run_xai_video_coroutine(
+        _generate_xai_video_async(
+            prompt=prompt,
+            model=model,
+            explicit_model=explicit_model,
+            image_url=image_url,
+            reference_image_urls=reference_image_urls,
+            duration=duration,
+            aspect_ratio=aspect_ratio,
+            resolution=resolution,
+        ),
+        operation_label="generation",
+        model=model,
+        prompt=prompt,
+        aspect_ratio=aspect_ratio,
+    )
+
+
+def run_xai_video_edit(
+    *,
+    prompt: str,
+    video_url: str,
+    model: Optional[str] = None,
+) -> Dict[str, Any]:
+    return _run_xai_video_coroutine(
+        _edit_xai_video_async(prompt=prompt, video_url=video_url, model=model),
+        operation_label="edit",
+        model=model,
+        prompt=prompt,
+        aspect_ratio=DEFAULT_ASPECT_RATIO,
+    )
+
+
+def run_xai_video_extend(
+    *,
+    prompt: str,
+    video_url: str,
+    duration: Optional[int] = None,
+    model: Optional[str] = None,
+) -> Dict[str, Any]:
+    return _run_xai_video_coroutine(
+        _extend_xai_video_async(
+            prompt=prompt,
+            video_url=video_url,
+            duration=duration,
+            model=model,
+        ),
+        operation_label="extend",
+        model=model,
+        prompt=prompt,
+        aspect_ratio=DEFAULT_ASPECT_RATIO,
+    )
+
+
+def _run_xai_video_coroutine(
+    coro,
+    *,
+    operation_label: str,
+    model: Optional[str],
+    prompt: str,
+    aspect_ratio: str,
+) -> Dict[str, Any]:
+    try:
+        loop = asyncio.new_event_loop()
+        try:
+            return loop.run_until_complete(coro)
+        finally:
+            loop.close()
+    except Exception as exc:
+        logger.warning("xAI video %s unexpected failure: %s", operation_label, exc, exc_info=True)
+        return error_response(
+            error=f"xAI video {operation_label} failed: {exc}",
+            error_type="api_error",
+            provider="xai",
+            model=model or DEFAULT_MODEL,
+            prompt=prompt,
+            aspect_ratio=aspect_ratio,
+        )
+
+
+async def _generate_xai_video_async(
+    *,
+    prompt: str,
+    model: Optional[str],
+    explicit_model: bool,
+    image_url: Optional[str],
+    reference_image_urls: Optional[List[str]],
+    duration: Optional[int],
+    aspect_ratio: str,
+    resolution: str,
+) -> Dict[str, Any]:
+    api_key, base_url = _resolve_xai_credentials()
+    if not api_key:
+        return _auth_required_response(prompt)
+
+    prompt = (prompt or "").strip()
+    image_input = None
+    if (image_url or "").strip():
+        image_input = _image_ref_to_xai_input(image_url)
+        if not image_input:
            return error_response(
                error=(
-                    "prompt is required for xAI video generation "
-                    "(text-to-video or image-to-video)"
+                    "image_url must be a public HTTPS URL or data URI "
+                    "(e.g. the `image`/`public_url` from a prior Imagine result)"
                ),
-                error_type="missing_prompt",
-                provider="xai", prompt=prompt,
-            )
-
-        refs = _normalize_reference_images(reference_image_urls)
-        if refs and len(refs) > MAX_REFERENCE_IMAGES:
-            return error_response(
-                error=f"reference_image_urls supports at most {MAX_REFERENCE_IMAGES} images on xAI",
-                error_type="too_many_references",
-                provider="xai", prompt=prompt,
-            )
-        if image_url_norm and refs:
-            return error_response(
-                error="image_url and reference_image_urls cannot be combined on xAI",
-                error_type="conflicting_inputs",
-                provider="xai", prompt=prompt,
-            )
-
-        clamped_duration = _clamp_duration(duration, has_reference_images=bool(refs))
-
-        if normalized_aspect_ratio not in VALID_ASPECT_RATIOS:
-            normalized_aspect_ratio = DEFAULT_ASPECT_RATIO
-        if normalized_resolution not in VALID_RESOLUTIONS:
-            normalized_resolution = DEFAULT_RESOLUTION
-
-        payload: Dict[str, Any] = {
-            "model": resolved_model,
-            "prompt": prompt,
-            "duration": clamped_duration,
-            "aspect_ratio": normalized_aspect_ratio,
-            "resolution": normalized_resolution,
-        }
-        if image_url_norm:
-            payload["image"] = {"url": image_url_norm}
-        if refs:
-            payload["reference_images"] = refs
-
-        async with httpx.AsyncClient() as client:
-            try:
-                request_id = await _submit(
-                    client, payload, api_key=api_key, base_url=base_url
-                )
-            except httpx.HTTPStatusError as exc:
-                detail = ""
-                try:
-                    detail = exc.response.text[:500]
-                except Exception:
-                    pass
-                return error_response(
-                    error=f"xAI submit failed ({exc.response.status_code}): {detail or exc}",
-                    error_type="api_error",
-                    provider="xai",
-                    model=resolved_model,
-                    prompt=prompt,
-                )
-
-            poll_result = await _poll(
-                client, request_id,
-                api_key=api_key, base_url=base_url,
-                timeout_seconds=DEFAULT_TIMEOUT_SECONDS,
-                poll_interval=DEFAULT_POLL_INTERVAL_SECONDS,
-            )
-
-        status = poll_result["status"]
-        body = poll_result["body"]
-
-        if status == "done":
-            video = body.get("video") or {}
-            url = video.get("url")
-            if not url:
-                return error_response(
-                    error="xAI video generation completed without a video URL",
-                    error_type="empty_response",
-                    provider="xai",
-                    model=body.get("model") or resolved_model,
-                    prompt=prompt,
-                )
-            extra: Dict[str, Any] = {
-                "request_id": request_id,
-                "resolution": normalized_resolution,
-            }
-            if body.get("usage"):
-                extra["usage"] = body["usage"]
-            return success_response(
-                video=url,
-                model=body.get("model") or resolved_model,
-                prompt=prompt,
-                modality=modality_used,
-                aspect_ratio=normalized_aspect_ratio,
-                duration=video.get("duration") or clamped_duration,
+                error_type="invalid_image_url",
                provider="xai",
-                extra=extra,
+                prompt=prompt,
            )
+    normalized_aspect_ratio = (aspect_ratio or DEFAULT_ASPECT_RATIO).strip()
+    normalized_resolution = (resolution or DEFAULT_RESOLUTION).strip().lower()
+    refs, refs_error = _normalize_reference_images(reference_image_urls)
+    if refs_error:
+        return error_response(
+            error=refs_error,
+            error_type="invalid_reference_image_urls",
+            provider="xai",
+            prompt=prompt,
+        )

-        if status == "timeout":
+    if not prompt:
+        return error_response(
+            error="prompt is required for xAI video generation",
+            error_type="missing_prompt",
+            provider="xai", prompt=prompt,
+        )
+    if refs and len(refs) > MAX_REFERENCE_IMAGES:
+        return error_response(
+            error=f"reference_image_urls supports at most {MAX_REFERENCE_IMAGES} images on xAI",
+            error_type="too_many_references",
+            provider="xai", prompt=prompt,
+        )
+    if image_input and refs:
+        return error_response(
+            error="image_url and reference_image_urls cannot be combined on xAI",
+            error_type="conflicting_inputs",
+            provider="xai", prompt=prompt,
+        )
+
+    if normalized_aspect_ratio not in VALID_ASPECT_RATIOS:
+        normalized_aspect_ratio = DEFAULT_ASPECT_RATIO
+    if normalized_resolution not in VALID_RESOLUTIONS:
+        normalized_resolution = DEFAULT_RESOLUTION
+
+    modality_used = "reference" if refs else ("image" if image_input else "text")
+    resolved_model = _resolve_model_for_modality(
+        model,
+        modality=modality_used,
+        explicit_model=explicit_model,
+    )
+    if refs and resolved_model != DEFAULT_TEXT_TO_VIDEO_MODEL:
+        if explicit_model:
            return error_response(
-                error=f"Timed out waiting for video generation after {DEFAULT_TIMEOUT_SECONDS}s",
-                error_type="timeout",
+                error=(
+                    "xAI reference-to-video requires "
+                    f"{DEFAULT_TEXT_TO_VIDEO_MODEL}; got {resolved_model}"
+                ),
+                error_type="unsupported_model",
+                provider="xai",
+                model=resolved_model,
+                prompt=prompt,
+            )
+        resolved_model = DEFAULT_TEXT_TO_VIDEO_MODEL
+
+    clamped_duration = _clamp_duration(duration, has_reference_images=bool(refs))
+    payload = {
+        "model": resolved_model,
+        "prompt": prompt,
+        "duration": clamped_duration,
+        "aspect_ratio": normalized_aspect_ratio,
+        "resolution": normalized_resolution,
+    }
+    if image_input:
+        payload["image"] = image_input
+    if refs:
+        payload["reference_images"] = refs
+
+    return await _submit_xai_video_payload(
+        api_key=api_key,
+        base_url=base_url,
+        endpoint="generations",
+        payload=payload,
+        prompt=prompt,
+        resolved_model=resolved_model,
+        modality=modality_used,
+        aspect_ratio=normalized_aspect_ratio,
+        duration=clamped_duration,
+        operation="generate",
+        resolution=normalized_resolution,
+    )
+
+
+async def _run_xai_video_mutation(
+    *,
+    prompt: str,
+    video_url: str,
+    model: Optional[str],
+    endpoint: str,
+    operation: str,
+    duration: int,
+) -> Dict[str, Any]:
+    """Edit or extend using a public HTTPS ``video_url`` input (``url`` on the wire)."""
+    api_key, base_url = _resolve_xai_credentials()
+    if not api_key:
+        return _auth_required_response(prompt)
+
+    prompt = (prompt or "").strip()
+    video_input = await _video_input_from_public_url(
+        video_url or "",
+        api_key=api_key,
+        base_url=base_url,
+    )
+    if not prompt:
+        return error_response(
+            error="prompt is required for xAI video edit/extend",
+            error_type="missing_prompt",
+            provider="xai",
+            prompt=prompt,
+        )
+    if not video_input:
+        return error_response(
+            error=(
+                "video_url must be a public HTTPS MP4 URL "
+                "(the `video`/`public_url` from a prior Imagine result)"
+            ),
+            error_type="missing_video",
+            provider="xai",
+            prompt=prompt,
+        )
+
+    resolved_model = _resolve_model_for_modality(
+        model,
+        modality="text",
+        explicit_model=bool(model),
+    )
+    payload: Dict[str, Any] = {
+        "model": resolved_model,
+        "prompt": prompt,
+        "video": video_input,
+    }
+    if endpoint == "extensions":
+        payload["duration"] = duration
+
+    return await _submit_xai_video_payload(
+        api_key=api_key,
+        base_url=base_url,
+        endpoint=endpoint,
+        payload=payload,
+        prompt=prompt,
+        resolved_model=resolved_model,
+        modality=operation,
+        aspect_ratio=DEFAULT_ASPECT_RATIO,
+        duration=duration,
+        operation=operation,
+    )
+
+
+async def _edit_xai_video_async(
+    *,
+    prompt: str,
+    video_url: str,
+    model: Optional[str],
+) -> Dict[str, Any]:
+    return await _run_xai_video_mutation(
+        prompt=prompt,
+        video_url=video_url,
+        model=model,
+        endpoint="edits",
+        operation="edit",
+        duration=DEFAULT_DURATION,
+    )
+
+
+async def _extend_xai_video_async(
+    *,
+    prompt: str,
+    video_url: str,
+    duration: Optional[int],
+    model: Optional[str],
+) -> Dict[str, Any]:
+    clamped_duration = _clamp_duration(
+        duration,
+        max_seconds=10,
+        default=DEFAULT_EXTEND_DURATION,
+    )
+    return await _run_xai_video_mutation(
+        prompt=prompt,
+        video_url=video_url,
+        model=model,
+        endpoint="extensions",
+        operation="extend",
+        duration=clamped_duration,
+    )
+
+
+def _auth_required_response(prompt: str) -> Dict[str, Any]:
+    return error_response(
+        error=(
+            "No xAI credentials found. Sign in via `hermes auth add xai-oauth` "
+            "(SuperGrok / Premium+) or set XAI_API_KEY from "
+            "https://console.x.ai/."
+        ),
+        error_type="auth_required",
+        provider="xai", prompt=prompt,
+    )
+
+
+async def _submit_xai_video_payload(
+    *,
+    api_key: str,
+    base_url: str,
+    endpoint: str,
+    payload: Dict[str, Any],
+    prompt: str,
+    resolved_model: str,
+    modality: str,
+    aspect_ratio: str,
+    duration: int,
+    operation: str,
+    resolution: Optional[str] = None,
+) -> Dict[str, Any]:
+    try:
+        from tools.xai_http import (
+            build_xai_storage_options,
+            maybe_mark_xai_storage_notice_seen,
+            read_xai_imagine_storage_config,
+        )
+
+        storage_options = build_xai_storage_options(
+            "video_gen",
+            filename_prefix="hermes-xai-video",
+            extension="mp4",
+        )
+        storage_notice = maybe_mark_xai_storage_notice_seen("video_gen")
+        storage_cfg = read_xai_imagine_storage_config("video_gen")
+    except Exception:
+        storage_options = None
+        storage_notice = None
+        storage_cfg = {"enabled": False}
+    if storage_options is not None:
+        payload["storage_options"] = storage_options
+
+    async with httpx.AsyncClient() as client:
+        try:
+            request_id = await _submit(
+                client, payload, api_key=api_key, base_url=base_url,
+                endpoint=endpoint,
+            )
+        except httpx.HTTPStatusError as exc:
+            detail = ""
+            try:
+                detail = exc.response.text[:500]
+            except Exception:
+                pass
+            return error_response(
+                error=f"xAI submit failed ({exc.response.status_code}): {detail or exc}",
+                error_type="api_error",
                provider="xai",
                model=resolved_model,
                prompt=prompt,
            )

-        message = (
-            (body.get("error", {}) or {}).get("message")
-            or body.get("message")
-            or f"xAI video generation ended with status '{status}'"
+        poll_result = await _poll(
+            client, request_id,
+            api_key=api_key, base_url=base_url,
+            timeout_seconds=DEFAULT_TIMEOUT_SECONDS,
+            poll_interval=DEFAULT_POLL_INTERVAL_SECONDS,
        )
+
+    status = poll_result["status"]
+    body = poll_result["body"]
+
+    if status == "done":
+        video = body.get("video") or {}
+        if not isinstance(video, dict):
+            video = {}
+        file_output = video.get("file_output") if isinstance(video.get("file_output"), dict) else {}
+        file_output = file_output or {}
+        public_video_url, temporary_url, stored_public_url = _xai_video_output_urls(video)
+        if not public_video_url:
+            return error_response(
+                error="xAI video request completed without a video URL",
+                error_type="empty_response",
+                provider="xai",
+                model=body.get("model") or resolved_model,
+                prompt=prompt,
+            )
+        extra: Dict[str, Any] = {
+            "request_id": request_id,
+            "operation": operation,
+            "storage_enabled": bool(storage_cfg.get("enabled")),
+        }
+        if resolution:
+            extra["resolution"] = resolution
+        if storage_notice:
+            extra["storage_notice"] = storage_notice
+        if stored_public_url:
+            extra["public_url"] = stored_public_url
+        if temporary_url:
+            extra["temporary_url"] = temporary_url
+        if file_output:
+            for key in (
+                "filename",
+                "expires_at",
+                "public_url_expires_at",
+                "public_url_error",
+                "storage_error",
+            ):
+                if key in file_output:
+                    extra[key] = file_output[key]
+        if body.get("usage"):
+            extra["usage"] = body["usage"]
+        return success_response(
+            video=public_video_url,
+            model=body.get("model") or resolved_model,
+            prompt=prompt,
+            modality=modality,
+            aspect_ratio=aspect_ratio,
+            duration=video.get("duration") or duration,
+            provider="xai",
+            extra=extra,
+        )
+
+    if status == "timeout":
        return error_response(
-            error=message,
-            error_type=f"xai_{status}",
+            error=f"Timed out waiting for xAI video request after {DEFAULT_TIMEOUT_SECONDS}s",
+            error_type="timeout",
            provider="xai",
            model=resolved_model,
            prompt=prompt,
        )

+    message = (
+        (body.get("error", {}) or {}).get("message")
+        or body.get("message")
+        or f"xAI video request ended with status '{status}'"
+    )
+    return error_response(
+        error=message,
+        error_type=f"xai_{status}",
+        provider="xai",
+        model=resolved_model,
+        prompt=prompt,
+    )
+

 # ---------------------------------------------------------------------------
 # Plugin entry point
--- a/plugins/video_gen/xai/plugin.yaml
+++ b/plugins/video_gen/xai/plugin.yaml
@ -1,6 +1,6 @@
 name: xai
 version: 1.0.0
-description: "xAI Grok Imagine video generation backend. Supports text-to-video, image-to-video, and reference-image-guided generation via the xAI async videos API."
+description: "xAI Grok Imagine video generation backend. Supports text-to-video, image-to-video, reference-to-video, video editing, video extension, and stored public URLs via the xAI async videos API."
 author: NousResearch
 kind: backend
 requires_env:
--- a/tests/plugins/image_gen/test_xai_provider.py
+++ b/tests/plugins/image_gen/test_xai_provider.py
@ -16,9 +16,17 @@ import pytest


@pytest.fixture(autouse=True)
-def _fake_api_key(monkeypatch):
+def _fake_api_key(monkeypatch, tmp_path):
    """Ensure XAI_API_KEY is set for all tests."""
    monkeypatch.setenv("XAI_API_KEY", "test-key-12345")
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    try:
+        import hermes_cli.config as cfg_mod
+
+        if hasattr(cfg_mod, "_invalidate_load_config_cache"):
+            cfg_mod._invalidate_load_config_cache()
+    except Exception:
+        pass


 # ---------------------------------------------------------------------------
@ -80,6 +88,13 @@ class TestXAIImageGenProvider:
        assert schema["env_vars"] == []
        assert schema["post_setup"] == "xai_grok"

+    def test_capabilities_expose_total_source_image_limit(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        caps = XAIImageGenProvider().capabilities()
+        assert caps["max_reference_images"] == 2
+        assert caps["max_source_images"] == 3
+

 # ---------------------------------------------------------------------------
 # Config tests
@ -318,6 +333,131 @@ class TestGenerate:
            f"resolution must be the literal '1k' or '2k', got {payload['resolution']!r}"
        )

+    def test_image_edit_rejects_bare_file_id_input(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {"data": [{"url": "https://xai.image/edited.png"}]}
+
+        with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \
+             patch("plugins.image_gen.xai.save_url_image", return_value="/tmp/edited.png"):
+            provider = XAIImageGenProvider()
+            result = provider.generate(
+                prompt="make the robot red",
+                image_url="file_03eb65b1-aa97-482f-9ef0-b04f9172ea00",
+            )
+
+        assert result["success"] is False
+        assert result["error_type"] == "invalid_image_url"
+        mock_post.assert_not_called()
+
+    def test_image_edit_accepts_public_https_url(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {"data": [{"url": "https://xai.image/edited.png"}]}
+
+        public_url = "https://files-cdn.x.ai/token/file_abc.png"
+        with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \
+             patch("plugins.image_gen.xai.save_url_image", return_value="/tmp/edited.png"):
+            provider = XAIImageGenProvider()
+            result = provider.generate(
+                prompt="make the robot red",
+                image_url=public_url,
+            )
+
+        assert result["success"] is True
+        payload = mock_post.call_args.kwargs.get("json") or mock_post.call_args[1].get("json")
+        assert payload["image"] == {"url": public_url, "type": "image_url"}
+
+    def test_multi_image_edit_rejects_bare_file_id_inputs(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {"data": [{"url": "https://xai.image/edited.png"}]}
+
+        with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \
+             patch("plugins.image_gen.xai.save_url_image", return_value="/tmp/edited.png"):
+            provider = XAIImageGenProvider()
+            result = provider.generate(
+                prompt="combine these robots into one product shot",
+                image_url="file_03eb65b1-aa97-482f-9ef0-b04f9172ea00",
+                reference_image_urls=[
+                    "file_54b48d6d-28ad-4982-9d72-bd3ac677c9bc",
+                    "file_aa11bb22-cc33-44dd-88ee-ff0011223344",
+                ],
+            )
+
+        assert result["success"] is False
+        assert result["error_type"] == "invalid_image_url"
+        mock_post.assert_not_called()
+
+    def test_multi_image_edit_rejects_more_than_three_sources(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        provider = XAIImageGenProvider()
+        result = provider.generate(
+            prompt="combine too many references",
+            image_url="file_1",
+            reference_image_urls=["file_2", "file_3", "file_4"],
+        )
+
+        assert result["success"] is False
+        assert result["error_type"] == "too_many_references"
+
+    def test_storage_options_are_sent_by_default(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {"data": [{"b64_json": "dGVzdA=="}]}
+
+        with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \
+             patch("plugins.image_gen.xai.save_b64_image", return_value="/tmp/test.png"):
+            provider = XAIImageGenProvider()
+            provider.generate(prompt="test")
+
+        payload = mock_post.call_args.kwargs.get("json") or mock_post.call_args[1].get("json")
+        assert payload["storage_options"]["public_url"] is True
+        assert "expires_after" not in payload["storage_options"]
+        assert payload["storage_options"]["filename"].endswith(".png")
+
+    def test_public_url_file_output_wins_over_temporary_url(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {
+            "data": [{
+                "url": "https://imgen.x.ai/xai-tmp-imgen-test.jpeg",
+                "file_output": {
+                    "file_id": "file-123",
+                    "filename": "stored.png",
+                    "public_url": "https://xai-files.example/stored.png",
+                    "public_url_expires_at": 1234567890,
+                },
+            }],
+        }
+
+        with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp), \
+             patch("plugins.image_gen.xai.save_url_image") as mock_save_url:
+            provider = XAIImageGenProvider()
+            result = provider.generate(prompt="A cat playing piano")
+
+        assert result["success"] is True
+        assert result["image"] == "https://xai-files.example/stored.png"
+        assert result["public_url"] == "https://xai-files.example/stored.png"
+        assert "file_id" not in result
+        mock_save_url.assert_not_called()
+

 # ---------------------------------------------------------------------------
 # Registration test
@ -334,3 +474,21 @@ class TestRegistration:
        provider = mock_ctx.register_image_gen_provider.call_args[0][0]
        assert isinstance(provider, XAIImageGenProvider)
        assert provider.name == "xai"
+
+
+def test_xai_image_field_expands_user_home(tmp_path, monkeypatch):
+    """A ~-prefixed local image path must load (expanduser), not raise io_error.
+
+    Pre-flight validation uses ``Path(source).expanduser()`` so a ``~/...`` path
+    passes; ``_xai_image_field`` must expand it too or the load fails spuriously.
+    """
+    from plugins.image_gen.xai import _xai_image_field
+
+    monkeypatch.setenv("HOME", str(tmp_path))
+    monkeypatch.setenv("USERPROFILE", str(tmp_path))
+    img = tmp_path / "pic.png"
+    img.write_bytes(b"\x89PNG\r\n\x1a\n")
+
+    field = _xai_image_field("~/pic.png")
+    assert field["type"] == "image_url"
+    assert field["url"].startswith("data:image/png;base64,")
--- a/tests/plugins/video_gen/test_xai_plugin.py
+++ b/tests/plugins/video_gen/test_xai_plugin.py
@ -32,9 +32,9 @@ def test_xai_provider_lists_text_and_current_image_video_models():
    ids = [model["id"] for model in models]

    assert ids[0] == "grok-imagine-video"
-    assert ids[1] == "grok-imagine-video-1.5-preview"
+    assert ids[1] == "grok-imagine-video-1.5"
    assert models[1]["modalities"] == ["image"]
-    assert models[1]["aliases"] == ["grok-imagine-video-1.5-2026-05-30"]
+    assert "aliases" not in models[1]


 def test_xai_routes_default_models_by_modality():
@ -49,7 +49,7 @@ def test_xai_routes_default_models_by_modality():
        "grok-imagine-video",
        modality="image",
        explicit_model=False,
-    ) == "grok-imagine-video-1.5-preview"
+    ) == "grok-imagine-video-1.5"
    assert _resolve_model_for_modality(
        "grok-imagine-video-1.5-preview",
        modality="text",
@ -62,15 +62,11 @@ def test_xai_routes_default_models_by_modality():
    ) == "grok-imagine-video-1.5-preview"


-def test_xai_capabilities_text_and_image_only():
-    """xAI was previously advertised with edit/extend operations. The
-    simplified surface only exposes text-to-video and image-to-video —
-    confirm those are the only modalities advertised."""
+def test_xai_capabilities_keep_generate_surface_only():
    from plugins.video_gen.xai import XAIVideoGenProvider

    caps = XAIVideoGenProvider().capabilities()
    assert caps["modalities"] == ["text", "image"]
-    # No 'operations' key in the simplified surface
    assert "operations" not in caps
    assert caps["max_reference_images"] == 7

@ -148,3 +144,45 @@ def test_xai_no_operation_kwarg():
    assert result["success"] is False
    # auth_required, NOT some signature error
    assert result["error_type"] in {"auth_required", "api_error"}
+
+
+def test_xai_video_output_urls_prefers_stored_public_url():
+    from plugins.video_gen.xai import _xai_video_output_urls
+
+    public_url, temporary, stored = _xai_video_output_urls({
+        "url": "https://vidgen.x.ai/xai-vidgen-bucket/out.mp4",
+        "file_output": {
+            "public_url": "https://files-cdn.x.ai/token/file_abc.mp4",
+            "file_id": "file_abc",
+        },
+    })
+    assert public_url == "https://files-cdn.x.ai/token/file_abc.mp4"
+    assert stored == "https://files-cdn.x.ai/token/file_abc.mp4"
+    assert temporary == "https://vidgen.x.ai/xai-vidgen-bucket/out.mp4"
+
+
+@pytest.mark.asyncio
+async def test_video_input_from_public_url_uses_url_field():
+    from plugins.video_gen.xai import _video_input_from_public_url
+
+    url = "https://files-cdn.x.ai/kRQVP6PRQlioVAUNC3GAdg/file_1faca9c3-9411-46ad-bb41-b9b8527789e6.mp4"
+    result = await _video_input_from_public_url(
+        url,
+        api_key="test-key",
+        base_url="https://api.x.ai/v1",
+    )
+    assert result == {"url": url}
+
+
+def test_video_input_from_public_url_rejects_bare_file_id():
+    import asyncio
+    from plugins.video_gen.xai import _video_input_from_public_url
+
+    result = asyncio.run(
+        _video_input_from_public_url(
+            "file_1faca9c3-9411-46ad-bb41-b9b8527789e6",
+            api_key="test-key",
+            base_url="https://api.x.ai/v1",
+        )
+    )
+    assert result is None
--- a/tests/plugins/video_gen/test_xai_plugin_integration.py
+++ b/tests/plugins/video_gen/test_xai_plugin_integration.py
@ -122,7 +122,7 @@ class TestXAIPayload:
        provider, captured = xai_provider
        provider.generate("animate this", image_url="https://example.com/cat.png")
        payload = _last_post(captured)["json"]
-        assert payload["model"] == "grok-imagine-video-1.5-preview"
+        assert payload["model"] == "grok-imagine-video-1.5"
        assert payload["image"] == {"url": "https://example.com/cat.png"}

    def test_local_image_path_is_sent_as_data_uri(self, xai_provider, tmp_path):
@ -133,7 +133,7 @@ class TestXAIPayload:
        provider.generate("animate this", image_url=str(image_path))

        payload = _last_post(captured)["json"]
-        assert payload["model"] == "grok-imagine-video-1.5-preview"
+        assert payload["model"] == "grok-imagine-video-1.5"
        assert payload["image"]["url"].startswith("data:image/png;base64,")

    def test_explicit_model_override_is_honored_for_image(self, xai_provider):
--- a/tests/tools/test_video_generation_dispatch.py
+++ b/tests/tools/test_video_generation_dispatch.py
@ -35,6 +35,9 @@ class _RecordingProvider(VideoGenProvider):
    def default_model(self) -> Optional[str]:
        return "model-a"

+    def capabilities(self) -> Dict[str, Any]:
+        return {"modalities": ["text", "image"]}
+
    def generate(self, prompt, **kwargs):
        self.last_kwargs = {"prompt": prompt, **kwargs}
        modality = "image" if kwargs.get("image_url") else "text"
@ -113,14 +116,25 @@ class TestUnifiedDispatch:
        assert "error" in result
        assert "prompt" in result["error"].lower()

+    def test_edit_extend_args_are_rejected_by_generate_tool(self):
+        provider = _RecordingProvider("rec")
+        video_gen_registry.register_provider(provider)
+        result = self._run({
+            "prompt": "make it rain",
+            "operation": "edit",
+            "video_url": "https://example.com/in.mp4",
+        })
+        assert "error" in result
+        assert "provider-specific tool" in result["error"]
+
    def test_provider_exception_caught(self):
        video_gen_registry.register_provider(_RaisingProvider())
        result = self._run({"prompt": "x"})
        assert result["success"] is False
        assert result["error_type"] == "provider_exception"

-    def test_operation_field_not_in_schema(self):
-        """Make sure we removed the operation field from the schema."""
+    def test_edit_extend_fields_not_in_schema(self):
        from tools.video_generation_tool import VIDEO_GENERATE_SCHEMA
-        assert "operation" not in VIDEO_GENERATE_SCHEMA["parameters"]["properties"]
-        assert "video_url" not in VIDEO_GENERATE_SCHEMA["parameters"]["properties"]
+        props = VIDEO_GENERATE_SCHEMA["parameters"]["properties"]
+        assert "operation" not in props
+        assert "video_url" not in props
--- a/tests/tools/test_video_generation_dynamic_schema.py
+++ b/tests/tools/test_video_generation_dynamic_schema.py
@ -1,4 +1,4 @@
-"""Tests for the dynamic schema builder under the simplified surface."""
+"""Tests for the dynamic schema builder."""

 from __future__ import annotations

@ -91,20 +91,13 @@ class TestDynamicSchemaBuilder:
        assert "No video backend is configured" in desc
        assert "hermes tools" in desc

-    def test_does_not_mention_edit_or_extend(self, cfg_home):
-        """The simplified surface only does text→video and image→video.
-        The description must not mention edit/extend anywhere."""
+    def test_generic_description_keeps_edit_extend_out_of_surface(self, cfg_home):
        from tools.video_generation_tool import _build_dynamic_video_schema, _GENERIC_DESCRIPTION

        desc = _build_dynamic_video_schema()["description"]
-        # Block words that would suggest functionality we removed
-        assert "edit" not in desc.lower() or "audio" in desc.lower()  # 'audio' contains 'audi' not 'edit'
-        # Stronger: no occurrence of the words "edit" or "extend" as standalone
-        for forbidden in (" edit ", " edits ", " extend ", " extends "):
-            assert forbidden not in desc.lower(), f"description leaks '{forbidden.strip()}'"
-        # Sanity: the generic blurb itself is also clean
-        for forbidden in ("edit", "extend"):
-            assert forbidden not in _GENERIC_DESCRIPTION.lower()
+        assert "Video edit/extend workflows are not part of this unified surface" in desc
+        assert "operation='edit'" not in _GENERIC_DESCRIPTION
+        assert "operation='extend'" not in _GENERIC_DESCRIPTION

    def test_both_modalities_advertises_auto_routing(self, cfg_home):
        from tools.video_generation_tool import _build_dynamic_video_schema
@ -123,7 +116,6 @@ class TestDynamicSchemaBuilder:
        assert "Active backend: Both" in desc
        assert "text-to-video" in desc and "image-to-video" in desc
        assert "routes automatically" in desc
-        # operations bullet is gone
        assert "operations supported" not in desc

    def test_image_only_model_warns_about_required_image_url(self, cfg_home):
--- a/tests/tools/test_video_generation_tool_surface_matrix.py
+++ b/tests/tools/test_video_generation_tool_surface_matrix.py
@ -79,10 +79,21 @@ def matrix_env(tmp_path, monkeypatch):
            xai_calls.append({"url": url, "json": json})
            return _Resp({"request_id": "req-1"})
        async def get(self, url, headers=None, timeout=None):
+            payload = xai_calls[-1]["json"]
+            storage_options = payload.get("storage_options") or {}
            return _Resp({
                "status": "done",
-                "video": {"url": "https://xai-cdn/out.mp4", "duration": 8},
-                "model": xai_calls[-1]["json"].get("model", "grok-imagine-video"),
+                "video": {
+                    "url": "https://xai-cdn/out.mp4",
+                    "duration": 8,
+                    "file_output": {
+                        "file_id": "file-123",
+                        "filename": storage_options.get("filename", "out.mp4"),
+                        "public_url": "https://xai-files.example/out.mp4",
+                        "public_url_expires_at": 1234567890,
+                    },
+                },
+                "model": payload.get("model", "grok-imagine-video"),
            })
    import plugins.video_gen.xai as xai_plugin
    monkeypatch.setattr(xai_plugin.httpx, "AsyncClient", lambda: _Client())
@ -100,7 +111,7 @@ def matrix_env(tmp_path, monkeypatch):
    return tmp_path, fal_calls, xai_calls


-def _invoke_tool(home, cfg: dict, args: dict) -> dict:
+def _invoke_tool(home, cfg: dict, args: dict, tool_name: str = "video_generate") -> dict:
    """Write config, invoke the registered tool handler, return parsed JSON."""
    (home / "config.yaml").write_text(yaml.safe_dump(cfg))
    import hermes_cli.config as cfg_mod
@ -108,9 +119,9 @@ def _invoke_tool(home, cfg: dict, args: dict) -> dict:
        cfg_mod._invalidate_load_config_cache()

    from tools.registry import discover_builtin_tools, registry
-    if "video_generate" not in registry._tools:
+    if tool_name not in registry._tools:
        discover_builtin_tools()
-    handler = registry._tools["video_generate"].handler
+    handler = registry._tools[tool_name].handler
    return json.loads(handler(args))


@ -205,6 +216,11 @@ def test_xai_text_only_via_tool_surface(matrix_env):
    assert payload["model"] == "grok-imagine-video"
    assert "image" not in payload
    assert "reference_images" not in payload
+    assert payload["storage_options"]["public_url"] is True
+    assert "expires_after" not in payload["storage_options"]
+    assert result["video"] == "https://xai-files.example/out.mp4"
+    assert result["public_url"] == "https://xai-files.example/out.mp4"
+    assert result.get("temporary_url") == "https://xai-cdn/out.mp4"


 def test_xai_text_plus_image_via_tool_surface(matrix_env):
@ -222,10 +238,157 @@ def test_xai_text_plus_image_via_tool_surface(matrix_env):
    assert len(xai_calls) == 1
    assert xai_calls[0]["url"].endswith("/videos/generations")
    payload = xai_calls[0]["json"] or {}
-    assert payload["model"] == "grok-imagine-video-1.5-preview"
+    assert payload["model"] == "grok-imagine-video-1.5"
    assert payload["image"] == {"url": "https://example.com/img.png"}


+def test_xai_image_to_video_rejects_bare_file_id_via_tool_surface(matrix_env):
+    home, _, xai_calls = matrix_env
+
+    result = _invoke_tool(
+        home,
+        {"video_gen": {"provider": "xai"}},
+        {
+            "prompt": "animate this robot waving",
+            "image_url": "file_03eb65b1-aa97-482f-9ef0-b04f9172ea00",
+        },
+    )
+    assert result["success"] is False
+    assert result.get("error_type") == "invalid_image_url"
+    assert len(xai_calls) == 0
+
+
+def test_xai_reference_to_video_via_tool_surface(matrix_env):
+    home, _, xai_calls = matrix_env
+
+    result = _invoke_tool(
+        home,
+        {"video_gen": {"provider": "xai"}},
+        {
+            "prompt": "put the jacket from the reference on the runway model",
+            "reference_image_urls": [
+                "https://example.com/model.png",
+                "https://example.com/jacket.png",
+            ],
+            "duration": 15,
+        },
+    )
+    assert result["success"] is True
+    assert result["modality"] == "reference"
+    assert result["provider"] == "xai"
+
+    payload = xai_calls[0]["json"] or {}
+    assert xai_calls[0]["url"].endswith("/videos/generations")
+    assert payload["model"] == "grok-imagine-video"
+    assert payload["duration"] == 10
+    assert payload["reference_images"] == [
+        {"url": "https://example.com/model.png"},
+        {"url": "https://example.com/jacket.png"},
+    ]
+
+
+def test_xai_reference_to_video_rejects_bare_file_ids_via_tool_surface(matrix_env):
+    home, _, xai_calls = matrix_env
+
+    result = _invoke_tool(
+        home,
+        {"video_gen": {"provider": "xai"}},
+        {
+            "prompt": "use these references for a robot product shot",
+            "reference_image_urls": [
+                "file_03eb65b1-aa97-482f-9ef0-b04f9172ea00",
+                "file_54b48d6d-28ad-4982-9d72-bd3ac677c9bc",
+            ],
+        },
+    )
+    assert result["success"] is False
+    assert result.get("error_type") == "invalid_reference_image_urls"
+    assert len(xai_calls) == 0
+
+
+def test_xai_video_edit_via_tool_surface(matrix_env):
+    home, _, xai_calls = matrix_env
+
+    result = _invoke_tool(
+        home,
+        {"video_gen": {"provider": "xai"}},
+        {
+            "prompt": "make the sky stormy",
+            "video_url": "https://example.com/source.mp4",
+        },
+        tool_name="xai_video_edit",
+    )
+    assert result["success"] is True
+    assert result["modality"] == "edit"
+
+    payload = xai_calls[0]["json"] or {}
+    assert xai_calls[0]["url"].endswith("/videos/edits")
+    assert payload["model"] == "grok-imagine-video"
+    assert payload["video"] == {"url": "https://example.com/source.mp4"}
+    assert "duration" not in payload
+    assert "aspect_ratio" not in payload
+    assert "resolution" not in payload
+
+
+def test_xai_video_extend_via_tool_surface(matrix_env):
+    home, _, xai_calls = matrix_env
+
+    result = _invoke_tool(
+        home,
+        {"video_gen": {"provider": "xai"}},
+        {
+            "prompt": "the camera pulls back to reveal the city",
+            "video_url": "https://example.com/source.mp4",
+            "duration": 15,
+        },
+        tool_name="xai_video_extend",
+    )
+    assert result["success"] is True
+    assert result["modality"] == "extend"
+
+    payload = xai_calls[0]["json"] or {}
+    assert xai_calls[0]["url"].endswith("/videos/extensions")
+    assert payload["model"] == "grok-imagine-video"
+    assert payload["video"] == {"url": "https://example.com/source.mp4"}
+    assert payload["duration"] == 10
+
+
+def test_xai_video_edit_rejects_bare_file_id_via_tool_surface(matrix_env):
+    home, _, xai_calls = matrix_env
+
+    result = _invoke_tool(
+        home,
+        {"video_gen": {"provider": "xai"}},
+        {
+            "prompt": "make the sky stormy",
+            "video_url": "file-123",
+        },
+        tool_name="xai_video_edit",
+    )
+    assert result.get("success") is not True
+    assert "error" in result
+    assert "url" in result["error"].lower()
+    assert len(xai_calls) == 0
+
+
+def test_xai_video_extend_rejects_bare_file_id_via_tool_surface(matrix_env):
+    home, _, xai_calls = matrix_env
+
+    result = _invoke_tool(
+        home,
+        {"video_gen": {"provider": "xai"}},
+        {
+            "prompt": "continue into a sunrise",
+            "video_url": "file_25ac1c31-d6d8-48b2-8504-a97d282310c4",
+        },
+        tool_name="xai_video_extend",
+    )
+    assert result.get("success") is not True
+    assert "error" in result
+    assert "url" in result["error"].lower()
+    assert len(xai_calls) == 0
+
+
 def test_xai_explicit_model_override_via_tool_surface(matrix_env):
    home, _, xai_calls = matrix_env

--- a/tests/tools/test_xai_http_storage.py
+++ b/tests/tools/test_xai_http_storage.py
@ -0,0 +1,132 @@
+"""Tests for xAI Imagine storage helper behavior."""
+
+from __future__ import annotations
+
+import yaml
+
+
+def _invalidate_config_cache():
+    try:
+        import hermes_cli.config as cfg_mod
+
+        if hasattr(cfg_mod, "_invalidate_load_config_cache"):
+            cfg_mod._invalidate_load_config_cache()
+    except Exception:
+        pass
+
+
+def test_storage_defaults_to_permanent_public_urls(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _invalidate_config_cache()
+
+    from tools.xai_http import build_xai_storage_options
+
+    storage = build_xai_storage_options(
+        "image_gen",
+        filename_prefix="hermes-xai-image",
+        extension="png",
+    )
+
+    assert storage is not None
+    assert storage["public_url"] is True
+    assert "expires_after" not in storage
+    assert storage["filename"].startswith("hermes-xai-image-")
+    assert storage["filename"].endswith(".png")
+
+
+def test_storage_can_be_disabled(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / "config.yaml").write_text(yaml.safe_dump({
+        "video_gen": {
+            "xai": {
+                "storage": {
+                    "enabled": False,
+                },
+            },
+        },
+    }))
+    _invalidate_config_cache()
+
+    from tools.xai_http import build_xai_storage_options, xai_storage_notice_text
+
+    assert build_xai_storage_options(
+        "video_gen",
+        filename_prefix="hermes-xai-video",
+        extension="mp4",
+    ) is None
+    assert xai_storage_notice_text("video_gen") == ""
+
+
+def test_storage_can_be_permanent(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / "config.yaml").write_text(yaml.safe_dump({
+        "image_gen": {
+            "xai": {
+                "storage": {
+                    "expires_after": "permanent",
+                },
+            },
+        },
+    }))
+    _invalidate_config_cache()
+
+    from tools.xai_http import build_xai_storage_options
+
+    storage = build_xai_storage_options(
+        "image_gen",
+        filename_prefix="hermes-xai-image",
+        extension="png",
+    )
+
+    assert storage is not None
+    assert "expires_after" not in storage
+
+
+def test_storage_can_use_finite_retention(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / "config.yaml").write_text(yaml.safe_dump({
+        "image_gen": {
+            "xai": {
+                "storage": {
+                    "expires_after": 172800,
+                },
+            },
+        },
+    }))
+    _invalidate_config_cache()
+
+    from tools.xai_http import build_xai_storage_options
+
+    storage = build_xai_storage_options(
+        "image_gen",
+        filename_prefix="hermes-xai-image",
+        extension="png",
+    )
+
+    assert storage is not None
+    assert storage["expires_after"] == 172800
+
+
+def test_invalid_storage_retention_falls_back_to_bounded_ttl(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / "config.yaml").write_text(yaml.safe_dump({
+        "video_gen": {
+            "xai": {
+                "storage": {
+                    "expires_after": "definitely-not-a-duration",
+                },
+            },
+        },
+    }))
+    _invalidate_config_cache()
+
+    from tools.xai_http import build_xai_storage_options
+
+    storage = build_xai_storage_options(
+        "video_gen",
+        filename_prefix="hermes-xai-video",
+        extension="mp4",
+    )
+
+    assert storage is not None
+    assert storage["expires_after"] == 172800
--- a/tools/video_generation_tool.py
+++ b/tools/video_generation_tool.py
@ -18,13 +18,11 @@ Generation.

 Unified surface
 ---------------
-One tool covers the common cases — text-to-video, image-to-video, video
-edit, video extend — with a compact schema:
+One tool covers the common cases - text-to-video, image-to-video, and
+reference-to-video - with a compact schema:

-    prompt                   text instruction (required for generate/edit)
-    operation                "generate" | "edit" | "extend"
-    image_url                drives image-to-video when operation=generate
-    video_url                source video for edit/extend
+    prompt                   text instruction (required)
+    image_url                drives image-to-video
    reference_image_urls     list, up to provider-declared cap
    duration                 seconds (provider clamps)
    aspect_ratio             "16:9" | "9:16" | "1:1" | ...
@ -38,6 +36,9 @@ Providers ignore parameters they do not support. The tool layer does
 **lightweight** validation (type/required-prompt) and lets each provider
 do its own clamping inside :meth:`VideoGenProvider.generate` — that keeps
 the tool surface stable as new providers ship with different capabilities.
+
+Video edit and video extend are intentionally not exposed here; providers with
+those workflows should expose separate tools.
 """

 from __future__ import annotations
@ -80,21 +81,20 @@ VIDEO_GENERATE_SCHEMA: Dict[str, Any] = {
            "image_url": {
                "type": "string",
                "description": (
-                    "Optional public URL of a still image. When provided, "
+                    "Optional public HTTPS URL of a still image. When provided, "
                    "the active backend routes to its image-to-video "
                    "endpoint (animate the image); when omitted, it routes "
-                    "to text-to-video. Pass either a URL the user supplied "
-                    "or a path/URL from the conversation."
+                    "to text-to-video. For xAI chaining, use the `image` or "
+                    "`public_url` HTTPS URL from a prior Imagine result."
                ),
            },
            "reference_image_urls": {
                "type": "array",
                "items": {"type": "string"},
                "description": (
-                    "Optional list of reference image URLs (style or "
-                    "character refs). Only supported by some backends; "
-                    "the active backend's description below indicates whether "
-                    "this is honored and what the max is."
+                    "Optional list of public HTTPS reference image URLs "
+                    "(style or character refs). For xAI chaining, use "
+                    "`image` or `public_url` from prior Imagine results."
                ),
            },
            "duration": {
@ -324,6 +324,11 @@ def _handle_video_generate(args: Dict[str, Any], **_kw: Any) -> str:
    # endpoint but our surface always needs a prompt.
    if not prompt:
        return tool_error("prompt is required for video generation")
+    if "operation" in args or "video_url" in args:
+        return tool_error(
+            "video_generate only supports text-to-video, image-to-video, and "
+            "reference-to-video; use a provider-specific tool for video edit/extend"
+        )

    # Resolve the active provider.
    configured = _read_configured_video_provider()
@ -398,13 +403,13 @@ def _handle_video_generate(args: Dict[str, Any], **_kw: Any) -> str:
 # Dynamic schema — reflect the active backend's actual capabilities
 # ---------------------------------------------------------------------------
 #
-# Why dynamic: the user's configured backend determines which operations
-# (generate/edit/extend), modalities (text / image / refs), aspect ratios,
-# resolutions, durations, and audio/negative-prompt flags are real. A model
-# that calls video_generate without knowing the active backend wastes a
-# turn on something like "fal-ai/veo3.1/image-to-video requires image_url".
-# Surfacing the per-model surface in the description means the model
-# usually gets the call right on the first try.
+# Why dynamic: the user's configured backend determines which modalities
+# (text / image / refs), aspect ratios, resolutions, durations, and
+# audio/negative-prompt flags are real. A model that calls video_generate
+# without knowing the active backend wastes a turn on something like
+# "fal-ai/veo3.1/image-to-video requires image_url". Surfacing the per-model
+# surface in the description means the model usually gets the call right on
+# the first try.
 #
 # Memoization: model_tools.get_tool_definitions() keys its cache on
 # config.yaml mtime, so when the user changes provider/model via
@ -412,11 +417,12 @@ def _handle_video_generate(args: Dict[str, Any], **_kw: Any) -> str:


 _GENERIC_DESCRIPTION = (
-    "Generate a video from a text prompt (text-to-video) or animate a "
-    "still image (image-to-video) using the user's configured video "
-    "generation backend. Pass `image_url` to animate that image; omit it "
-    "to generate from text alone. The backend auto-routes to the right "
-    "endpoint. The backend and model family are user-configured via "
+    "Generate a video from a text prompt (text-to-video), animate a "
+    "still image (image-to-video), or guide generation with reference images. "
+    "Pass `image_url` to animate an image or `reference_image_urls` for "
+    "reference-to-video. Video edit/extend workflows are not part of this "
+    "unified surface; use a dedicated provider-specific tool when one is "
+    "available. The backend and model family are user-configured via "
    "`hermes tools` → Video Generation; the agent does not pick them. "
    "Long-running generations may take 30 seconds to several minutes — "
    "the call blocks until the video is ready. Returns the result in the "
@ -542,6 +548,21 @@ def _build_dynamic_video_schema() -> Dict[str, Any]:
    max_refs = caps.get("max_reference_images") or 0
    if max_refs:
        parts.append(f"- reference_image_urls: up to {max_refs} images")
+    if configured == "xai":
+        parts.append(
+            "- chaining: for edit/extend pass the public HTTPS MP4 in `video` "
+            "or `public_url` from the prior Imagine result (files-cdn). For "
+            "image-to-video / reference-to-video pass public image URLs the "
+            "same way"
+        )
+        try:
+            from tools.xai_http import xai_storage_notice_text
+
+            notice = xai_storage_notice_text("video_gen")
+        except Exception:
+            notice = ""
+        if notice:
+            parts.append(f"- storage: {notice}")

    return {"description": "\n".join(parts)}

--- a/tools/xai_http.py
+++ b/tools/xai_http.py
@ -2,9 +2,15 @@

 from __future__ import annotations

+import datetime
 import json
 import os
-from typing import Dict
+import uuid
+from typing import Any, Dict, Optional
+
+
+MAX_XAI_STORAGE_EXPIRES_AFTER_SECONDS = 30 * 24 * 60 * 60
+SAFE_XAI_STORAGE_EXPIRES_AFTER_SECONDS = 2 * 24 * 60 * 60


 def has_xai_credentials() -> bool:
@ -72,6 +78,149 @@ def hermes_xai_user_agent() -> str:
    return f"Hermes-Agent/{__version__}"


+def _load_config_section(section_name: str) -> Dict[str, Any]:
+    """Return a top-level Hermes config section as a dict, or empty."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get(section_name) if isinstance(cfg, dict) else None
+        return section if isinstance(section, dict) else {}
+    except Exception:
+        return {}
+
+
+def _coerce_bool(value: Any, default: bool) -> bool:
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in {"1", "true", "yes", "on", "enabled"}:
+            return True
+        if normalized in {"0", "false", "no", "off", "disabled"}:
+            return False
+    return default
+
+
+def _coerce_expires_after(value: Any) -> Optional[int]:
+    """Normalize an xAI storage TTL.
+
+    Returns:
+        int seconds for an expiring file,
+        None for permanent storage (omit expires_after on the wire).
+    """
+    if value is None:
+        return None
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in {"", "default"}:
+            return None
+        if normalized in {"none", "null", "never", "permanent", "forever", "0"}:
+            return None
+        try:
+            value = int(normalized)
+        except ValueError:
+            return SAFE_XAI_STORAGE_EXPIRES_AFTER_SECONDS
+    if isinstance(value, (int, float)):
+        seconds = int(value)
+        if seconds <= 0:
+            return None
+        return min(seconds, MAX_XAI_STORAGE_EXPIRES_AFTER_SECONDS)
+    return SAFE_XAI_STORAGE_EXPIRES_AFTER_SECONDS
+
+
+def read_xai_imagine_storage_config(section_name: str) -> Dict[str, Any]:
+    """Read storage settings for xAI Imagine under image_gen/video_gen config.
+
+    Supported config shape:
+
+        image_gen:
+          xai:
+            storage:
+              enabled: true
+              public_url: true
+              expires_after: null     # omit for permanent public URLs
+
+    The same shape is accepted under ``video_gen.xai.storage``. Storage is on
+    by default so xAI returns permanent public URLs instead of short-lived CDN URLs.
+    """
+    section = _load_config_section(section_name)
+    xai_section = section.get("xai") if isinstance(section, dict) else None
+    storage = xai_section.get("storage") if isinstance(xai_section, dict) else None
+    storage = storage if isinstance(storage, dict) else {}
+
+    enabled = _coerce_bool(storage.get("enabled"), True)
+    public_url = _coerce_bool(storage.get("public_url"), True)
+    expires_after = _coerce_expires_after(storage.get("expires_after"))
+
+    return {
+        "enabled": enabled,
+        "public_url": public_url,
+        "expires_after": expires_after,
+    }
+
+
+def build_xai_storage_options(
+    section_name: str,
+    *,
+    filename_prefix: str,
+    extension: str,
+) -> Optional[Dict[str, Any]]:
+    """Return an xAI ``storage_options`` payload, or None when disabled."""
+    cfg = read_xai_imagine_storage_config(section_name)
+    if not cfg["enabled"]:
+        return None
+
+    now = datetime.datetime.now(datetime.UTC)
+    ts = now.strftime("%Y%m%d-%H%M%S")
+    short = uuid.uuid4().hex[:8]
+    ext = extension.lstrip(".") or "bin"
+    payload: Dict[str, Any] = {
+        "filename": f"{filename_prefix}-{ts}-{short}.{ext}",
+        "public_url": bool(cfg["public_url"]),
+    }
+    if cfg["expires_after"] is not None:
+        payload["expires_after"] = cfg["expires_after"]
+    return payload
+
+
+def xai_storage_notice_text(section_name: str) -> str:
+    """User-facing notice for first xAI Imagine storage use."""
+    cfg = read_xai_imagine_storage_config(section_name)
+    if not cfg["enabled"]:
+        return ""
+    if cfg["expires_after"] is None:
+        retention = "without an automatic expiry"
+    else:
+        days = cfg["expires_after"] / (24 * 60 * 60)
+        retention = f"for about {days:g} day{'s' if days != 1 else ''}"
+    return (
+        "xAI Imagine storage is enabled so generated media gets a reusable "
+        f"public URL {retention}. xAI may bill for stored files and public URL "
+        f"hosting. Disable this with `{section_name}.xai.storage.enabled: false` "
+        "or set `expires_after` to change the retention."
+    )
+
+
+def maybe_mark_xai_storage_notice_seen(section_name: str) -> Optional[str]:
+    """Return the storage notice once per Hermes home, then mark it seen."""
+    notice = xai_storage_notice_text(section_name)
+    if not notice:
+        return None
+    try:
+        from hermes_constants import get_hermes_home
+
+        marker_dir = get_hermes_home() / "state"
+        marker_dir.mkdir(parents=True, exist_ok=True)
+        marker = marker_dir / f"{section_name}_xai_storage_notice_seen"
+        if marker.exists():
+            return None
+        marker.write_text(datetime.datetime.now(datetime.UTC).isoformat() + "\n")
+        return notice
+    except Exception:
+        return notice
+
+
 def resolve_xai_http_credentials(*, force_refresh: bool = False) -> Dict[str, str]:
    """Resolve bearer credentials for direct xAI HTTP endpoints.

@ -88,6 +237,21 @@ def resolve_xai_http_credentials(*, force_refresh: bool = False) -> Dict[str, st
    tokens where the proactive JWT check is a no-op, etc.), not as a default —
    the auth-store lock is held for the duration of the refresh.
    """
+    try:
+        from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
+
+        creds = resolve_xai_oauth_runtime_credentials(force_refresh=force_refresh)
+        access_token = str(creds.get("api_key") or "").strip()
+        base_url = str(creds.get("base_url") or "").strip().rstrip("/")
+        if access_token:
+            return {
+                "provider": "xai-oauth",
+                "api_key": access_token,
+                "base_url": base_url or "https://api.x.ai/v1",
+            }
+    except Exception:
+        pass
+
    if not force_refresh:
        try:
            from hermes_cli.runtime_provider import resolve_runtime_provider
@ -104,21 +268,6 @@ def resolve_xai_http_credentials(*, force_refresh: bool = False) -> Dict[str, st
        except Exception:
            pass

-    try:
-        from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
-
-        creds = resolve_xai_oauth_runtime_credentials(force_refresh=force_refresh)
-        access_token = str(creds.get("api_key") or "").strip()
-        base_url = str(creds.get("base_url") or "").strip().rstrip("/")
-        if access_token:
-            return {
-                "provider": "xai-oauth",
-                "api_key": access_token,
-                "base_url": base_url or "https://api.x.ai/v1",
-            }
-    except Exception:
-        pass
-
    api_key = str(get_env_value("XAI_API_KEY") or "").strip()
    base_url = str(get_env_value("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
    return {
--- a/tools/xai_video_tools.py
+++ b/tools/xai_video_tools.py
@ -0,0 +1,209 @@
+#!/usr/bin/env python3
+"""xAI-specific Imagine video edit and extend tools."""
+
+from __future__ import annotations
+
+import json
+from typing import Any, Dict, Optional
+
+from hermes_cli.config import load_config
+from plugins.video_gen.xai import (
+    has_xai_video_credentials,
+    run_xai_video_edit,
+    run_xai_video_extend,
+)
+from tools.registry import registry, tool_error
+
+
+def _configured_for_xai_video() -> bool:
+    try:
+        cfg = load_config()
+    except Exception:
+        return False
+    section = cfg.get("video_gen") if isinstance(cfg, dict) else None
+    return isinstance(section, dict) and section.get("provider") == "xai"
+
+
+def _check_xai_video_requirements() -> bool:
+    return _configured_for_xai_video() and has_xai_video_credentials()
+
+
+def _clean_string(value: Any) -> Optional[str]:
+    if isinstance(value, str) and value.strip():
+        return value.strip()
+    return None
+
+
+def _coerce_int(value: Any) -> Optional[int]:
+    if value is None:
+        return None
+    if isinstance(value, bool):
+        return None
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return None
+
+
+def _provider_not_configured_error() -> str:
+    return json.dumps({
+        "success": False,
+        "error": (
+            "xAI video edit/extend tools require `video_gen.provider` to be "
+            "configured as `xai` via `hermes tools` -> Video Generation."
+        ),
+        "error_type": "provider_not_configured",
+        "provider": "xai",
+    })
+
+
+def _normalize_public_video_url(video_url: Any) -> Optional[str]:
+    """Require a public HTTPS MP4 URL (``http``/``https`` only)."""
+    cleaned = _clean_string(video_url)
+    if not cleaned:
+        return None
+    if cleaned.lower().startswith(("http://", "https://")):
+        return cleaned
+    return None
+
+
+XAI_VIDEO_EDIT_SCHEMA: Dict[str, Any] = {
+    "name": "xai_video_edit",
+    "description": (
+        "Edit an existing video with xAI Imagine. This is separate from "
+        "`video_generate` because video editing is provider-specific. "
+        "`video_url` must be the public HTTPS MP4 URL from a prior Imagine "
+        "result (`video` or `public_url` on files-cdn)."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "prompt": {
+                "type": "string",
+                "description": "Instruction for how xAI should modify the source video.",
+            },
+            "video_url": {
+                "type": "string",
+                "description": (
+                    "Public HTTPS MP4 URL of the source video — the `video` or "
+                    "`public_url` from a prior xAI Imagine result."
+                ),
+            },
+            "model": {
+                "type": "string",
+                "description": "Optional xAI Imagine model override.",
+            },
+        },
+        "required": ["prompt", "video_url"],
+    },
+}
+
+
+XAI_VIDEO_EXTEND_SCHEMA: Dict[str, Any] = {
+    "name": "xai_video_extend",
+    "description": (
+        "Extend an existing video with xAI Imagine. This is separate from "
+        "`video_generate` because video extension is provider-specific. "
+        "`video_url` must be the public HTTPS MP4 URL from a prior Imagine "
+        "result (`video` or `public_url` on files-cdn)."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "prompt": {
+                "type": "string",
+                "description": "Instruction for how xAI should continue the source video.",
+            },
+            "video_url": {
+                "type": "string",
+                "description": (
+                    "Public HTTPS MP4 URL of the source video — the `video` or "
+                    "`public_url` from a prior xAI Imagine result."
+                ),
+            },
+            "duration": {
+                "type": "integer",
+                "description": (
+                    "Desired extension duration in seconds. xAI clamps this "
+                    "to its supported range."
+                ),
+            },
+            "model": {
+                "type": "string",
+                "description": "Optional xAI Imagine model override.",
+            },
+        },
+        "required": ["prompt", "video_url"],
+    },
+}
+
+
+def _handle_xai_video_edit(args: Dict[str, Any], **_kw: Any) -> str:
+    prompt = _clean_string(args.get("prompt"))
+    video_url = _normalize_public_video_url(args.get("video_url"))
+    model = _clean_string(args.get("model"))
+
+    if not prompt:
+        return tool_error("prompt is required for xAI video edit")
+    if not video_url:
+        return tool_error(
+            "video_url must be a public HTTPS MP4 URL (the `video`/`public_url` "
+            "from a prior Imagine result)"
+        )
+    if not _configured_for_xai_video():
+        return _provider_not_configured_error()
+
+    result = run_xai_video_edit(
+        prompt=prompt,
+        video_url=video_url,
+        model=model,
+    )
+    return json.dumps(result)
+
+
+def _handle_xai_video_extend(args: Dict[str, Any], **_kw: Any) -> str:
+    prompt = _clean_string(args.get("prompt"))
+    video_url = _normalize_public_video_url(args.get("video_url"))
+    model = _clean_string(args.get("model"))
+    duration = _coerce_int(args.get("duration"))
+
+    if not prompt:
+        return tool_error("prompt is required for xAI video extend")
+    if not video_url:
+        return tool_error(
+            "video_url must be a public HTTPS MP4 URL (the `video`/`public_url` "
+            "from a prior Imagine result)"
+        )
+    if not _configured_for_xai_video():
+        return _provider_not_configured_error()
+
+    result = run_xai_video_extend(
+        prompt=prompt,
+        video_url=video_url,
+        duration=duration,
+        model=model,
+    )
+    return json.dumps(result)
+
+
+registry.register(
+    name="xai_video_edit",
+    toolset="video_gen",
+    schema=XAI_VIDEO_EDIT_SCHEMA,
+    handler=_handle_xai_video_edit,
+    check_fn=_check_xai_video_requirements,
+    requires_env=[],
+    is_async=False,
+    emoji="video",
+)
+
+registry.register(
+    name="xai_video_extend",
+    toolset="video_gen",
+    schema=XAI_VIDEO_EXTEND_SCHEMA,
+    handler=_handle_xai_video_extend,
+    check_fn=_check_xai_video_requirements,
+    requires_env=[],
+    is_async=False,
+    emoji="video",
+)
--- a/toolsets.py
+++ b/toolsets.py
@ -139,10 +139,11 @@ TOOLSETS = {
        "description": (
            "Video generation tools. Single ``video_generate`` tool covers "
            "text-to-video (prompt only) and image-to-video (prompt + "
-            "image_url) — the active backend auto-routes. Configure via "
+            "image_url), plus reference-to-video. Provider-specific edit/"
+            "extend workflows may appear as separate tools. Configure via "
            "``hermes tools`` → Video Generation."
        ),
-        "tools": ["video_generate"],
+        "tools": ["video_generate", "xai_video_edit", "xai_video_extend"],
        "includes": []
    },