diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index d0b024b7449..404796322a8 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -66,7 +66,7 @@ CONFIGURABLE_TOOLSETS = [ ("vision", "πŸ‘οΈ Vision / Image Analysis", "vision_analyze"), ("video", "🎬 Video Analysis", "video_analyze (requires video-capable model)"), ("image_gen", "🎨 Image Generation", "image_generate"), - ("video_gen", "🎬 Video Generation", "video_generate (text-to-video + image-to-video)"), + ("video_gen", "🎬 Video Generation", "video_generate (text/image/reference)"), ("x_search", "🐦 X (Twitter) Search", "x_search (requires xAI OAuth or XAI_API_KEY)"), ("tts", "πŸ”Š Text-to-Speech", "text_to_speech"), ("skills", "πŸ“š Skills", "list, view, manage"), @@ -2785,6 +2785,49 @@ def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None _print_success(f" Model set to: {chosen}") +def _configure_xai_imagine_storage(section_name: str, config: dict) -> None: + """Prompt for xAI Imagine stored public URL behavior.""" + section = config.setdefault(section_name, {}) + if not isinstance(section, dict): + section = {} + config[section_name] = section + xai_cfg = section.setdefault("xai", {}) + if not isinstance(xai_cfg, dict): + xai_cfg = {} + section["xai"] = xai_cfg + storage_cfg = xai_cfg.setdefault("storage", {}) + if not isinstance(storage_cfg, dict): + storage_cfg = {} + xai_cfg["storage"] = storage_cfg + + _print_warning( + " xAI Imagine can store generated media and create reusable public URLs. " + "xAI may bill for stored files and public URL hosting." + ) + idx = _prompt_choice( + " Stored public URLs:", + [ + "Enable public URLs without automatic expiry (recommended)", + "Disable stored public URLs", + "Enable public URLs for 2 days", + ], + default=0, + ) + if idx == 1: + storage_cfg["enabled"] = False + _print_success(" xAI stored public URLs disabled") + elif idx == 2: + storage_cfg["enabled"] = True + storage_cfg["public_url"] = True + storage_cfg["expires_after"] = 2 * 24 * 60 * 60 + _print_success(" xAI stored public URLs enabled for 2 days") + else: + storage_cfg["enabled"] = True + storage_cfg["public_url"] = True + storage_cfg["expires_after"] = None + _print_success(" xAI stored public URLs enabled without automatic expiry") + + def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None: """Persist a plugin-backed image generation provider selection.""" img_cfg = config.setdefault("image_gen", {}) @@ -2795,6 +2838,8 @@ def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None: img_cfg["use_gateway"] = False _print_success(f" image_gen.provider set to: {plugin_name}") _configure_imagegen_model_for_plugin(plugin_name, config) + if plugin_name == "xai": + _configure_xai_imagine_storage("image_gen", config) # ─── Video Generation Model Pickers ─────────────────────────────────────────── @@ -2895,6 +2940,8 @@ def _select_plugin_video_gen_provider(plugin_name: str, config: dict, *, use_gat vid_cfg["use_gateway"] = use_gateway _print_success(f" video_gen.provider set to: {plugin_name}") _configure_videogen_model_for_plugin(plugin_name, config) + if plugin_name == "xai": + _configure_xai_imagine_storage("video_gen", config) def _write_provider_config(provider: dict, config: dict, *, managed_feature) -> None: diff --git a/plugins/image_gen/xai/__init__.py b/plugins/image_gen/xai/__init__.py index f487d90ada6..a611a0aa031 100644 --- a/plugins/image_gen/xai/__init__.py +++ b/plugins/image_gen/xai/__init__.py @@ -19,6 +19,7 @@ from __future__ import annotations import logging import os +from pathlib import Path from typing import Any, Dict, List, Optional, Tuple import requests @@ -33,7 +34,14 @@ from agent.image_gen_provider import ( save_url_image, success_response, ) -from tools.xai_http import hermes_xai_user_agent, resolve_xai_http_credentials +from tools.xai_http import ( + build_xai_storage_options, + hermes_xai_user_agent, + maybe_mark_xai_storage_notice_seen, + read_xai_imagine_storage_config, + resolve_xai_http_credentials, + xai_storage_notice_text, +) logger = logging.getLogger(__name__) @@ -118,10 +126,8 @@ def _resolve_resolution() -> str: def _xai_image_field(source: str) -> Dict[str, str]: """Build the xAI ``image`` field for an edit request. - xAI's ``/v1/images/edits`` accepts ``{"url": , "type": "image_url"}`` - where ```` is a public URL or a base64 data URI. Public URLs and - existing data URIs pass through unchanged; local file paths are read and - encoded into a ``data:`` URI. + xAI's ``/v1/images/edits`` accepts a public HTTPS URL or a base64 data URI. + Local file paths are read and encoded into a ``data:`` URI. """ source = source.strip() lower = source.lower() @@ -131,7 +137,7 @@ def _xai_image_field(source: str) -> Dict[str, str]: import base64 import os as _os - with open(source, "rb") as fh: + with open(_os.path.expanduser(source), "rb") as fh: raw = fh.read() ext = (_os.path.splitext(source)[1].lstrip(".") or "png").lower() if ext == "jpg": @@ -176,19 +182,29 @@ class XAIImageGenProvider(ImageGenProvider): # hook (``hermes_cli/tools_config.py``); identical to the TTS / video # gen entries so users see the same OAuth-or-API-key choice for every # xAI service. + storage_notice = xai_storage_notice_text("image_gen") + tag = ( + "grok-imagine-image - text-to-image & image editing; uses xAI " + "Grok OAuth or XAI_API_KEY" + ) + if storage_notice: + tag += f". {storage_notice}" return { "name": "xAI Grok Imagine (image)", "badge": "paid", - "tag": "grok-imagine-image β€” text-to-image & image editing; uses xAI Grok OAuth or XAI_API_KEY", + "tag": tag, "env_vars": [], "post_setup": "xai_grok", } def capabilities(self) -> Dict[str, Any]: # xAI's /v1/images/edits supports image editing via grok-imagine-image - # -quality. Single primary source image (multi-image editing exists as - # a separate capability but we keep the primary edit surface here). - return {"modalities": ["text", "image"], "max_reference_images": 1} + # -quality, including up to 3 total source images. + return { + "modalities": ["text", "image"], + "max_reference_images": 2, + "max_source_images": 3, + } def generate( self, @@ -224,16 +240,39 @@ class XAIImageGenProvider(ImageGenProvider): resolution = _resolve_resolution() xai_res = resolution if resolution in _XAI_RESOLUTIONS else DEFAULT_RESOLUTION - # Pick the primary source image: explicit image_url wins, else the - # first reference image. - source_image = None + source_images: List[str] = [] if isinstance(image_url, str) and image_url.strip(): - source_image = image_url.strip() - else: - refs = normalize_reference_images(reference_image_urls) - if refs: - source_image = refs[0] - is_edit = bool(source_image) + source_images.append(image_url.strip()) + refs = normalize_reference_images(reference_image_urls) + if refs: + source_images.extend(refs) + if len(source_images) > 3: + return error_response( + error="xAI image editing supports at most 3 source images", + error_type="too_many_references", + provider=provider_name, + model="grok-imagine-image-quality", + prompt=prompt, + aspect_ratio=aspect, + ) + for index, source in enumerate(source_images): + field = "image_url" if index == 0 and image_url and image_url.strip() == source else "reference_image_urls" + lower = source.lower() + if not lower.startswith(("http://", "https://", "data:")): + path = Path(source).expanduser() + if not path.is_file(): + return error_response( + error=( + f"{field} must be a public HTTPS URL or data URI " + "(e.g. the `image`/`public_url` from a prior Imagine result)" + ), + error_type="invalid_image_url", + provider=provider_name, + model="grok-imagine-image-quality", + prompt=prompt, + aspect_ratio=aspect, + ) + is_edit = bool(source_images) modality = "image" if is_edit else "text" headers = { @@ -243,6 +282,13 @@ class XAIImageGenProvider(ImageGenProvider): } base_url = str(creds.get("base_url") or "https://api.x.ai/v1").strip().rstrip("/") + storage_options = build_xai_storage_options( + "image_gen", + filename_prefix="hermes-xai-image", + extension="png", + ) + storage_notice = maybe_mark_xai_storage_notice_seen("image_gen") + storage_cfg = read_xai_imagine_storage_config("image_gen") if is_edit: # Editing requires the quality model per xAI docs. The source @@ -250,7 +296,7 @@ class XAIImageGenProvider(ImageGenProvider): # are converted to a data URI here. edit_model = "grok-imagine-image-quality" try: - image_field = _xai_image_field(source_image) + image_fields = [_xai_image_field(source) for source in source_images] except Exception as exc: return error_response( error=f"Could not load source image for editing: {exc}", @@ -263,8 +309,11 @@ class XAIImageGenProvider(ImageGenProvider): payload: Dict[str, Any] = { "model": edit_model, "prompt": prompt, - "image": image_field, } + if len(image_fields) == 1: + payload["image"] = image_fields[0] + else: + payload["images"] = image_fields endpoint_url = f"{base_url}/images/edits" model_id = edit_model else: @@ -275,6 +324,8 @@ class XAIImageGenProvider(ImageGenProvider): "resolution": xai_res, } endpoint_url = f"{base_url}/images/generations" + if storage_options is not None: + payload["storage_options"] = storage_options try: response = requests.post( @@ -331,7 +382,8 @@ class XAIImageGenProvider(ImageGenProvider): aspect_ratio=aspect, ) - # Parse response β€” xAI returns data[0].b64_json or data[0].url + # Parse response - xAI returns data[0].b64_json, data[0].url, and + # optionally data[0].file_output when storage_options were requested. data = result.get("data", []) if not data: return error_response( @@ -346,8 +398,13 @@ class XAIImageGenProvider(ImageGenProvider): first = data[0] b64 = first.get("b64_json") url = first.get("url") + file_output = first.get("file_output") if isinstance(first, dict) else None + file_output = file_output if isinstance(file_output, dict) else {} + public_url = file_output.get("public_url") if isinstance(file_output.get("public_url"), str) else None - if b64: + if public_url: + image_ref = public_url + elif b64: try: saved_path = save_b64_image(b64, prefix=f"xai_{model_id}") except Exception as exc: @@ -389,9 +446,27 @@ class XAIImageGenProvider(ImageGenProvider): aspect_ratio=aspect, ) - extra: Dict[str, Any] = {} + extra: Dict[str, Any] = { + "storage_enabled": bool(storage_cfg["enabled"]), + } if not is_edit: extra["resolution"] = xai_res + if storage_notice: + extra["storage_notice"] = storage_notice + if public_url: + extra["public_url"] = public_url + if file_output: + for key in ( + "filename", + "expires_at", + "public_url_expires_at", + "public_url_error", + "storage_error", + ): + if key in file_output: + extra[key] = file_output[key] + if result.get("usage"): + extra["usage"] = result["usage"] return success_response( image=image_ref, diff --git a/plugins/video_gen/xai/__init__.py b/plugins/video_gen/xai/__init__.py index 308837b6131..edc981c78ab 100644 --- a/plugins/video_gen/xai/__init__.py +++ b/plugins/video_gen/xai/__init__.py @@ -1,10 +1,7 @@ """xAI Grok-Imagine video generation backend. -Surface: text-to-video and image-to-video (animate an input image) -through xAI's ``/videos/generations`` endpoint. Edit and extend are not -exposed in this unified surface β€” xAI is the only backend that supports -them and the inconsistency would force per-backend prose in the agent's -tool description. +Surface: text-to-video, image-to-video, and reference-to-video through the +unified video provider. xAI edit/extend are exposed through separate tools. Originally salvaged from PR #10600 by @Jaaneek; reshaped into the :class:`VideoGenProvider` plugin interface and trimmed to the @@ -14,8 +11,9 @@ Authentication: xAI Grok OAuth tokens (preferred β€” billed against the user's SuperGrok or X Premium+ subscription) or ``XAI_API_KEY``. Both routes are resolved through ``tools.xai_http.resolve_xai_http_credentials`` so a single login covers chat + TTS + image gen + video gen + transcription. -Output is an HTTPS URL from xAI's CDN; the gateway downloads and -delivers it. +When xAI storage is enabled, the primary ``video`` / ``public_url`` fields are the +stored files-cdn HTTPS link. Pass that public MP4 URL as ``video_url`` for +edit/extend; it is sent to xAI as ``video.url``. """ from __future__ import annotations @@ -46,13 +44,14 @@ logger = logging.getLogger(__name__) DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1" DEFAULT_TEXT_TO_VIDEO_MODEL = "grok-imagine-video" -DEFAULT_IMAGE_TO_VIDEO_MODEL = "grok-imagine-video-1.5-preview" +DEFAULT_IMAGE_TO_VIDEO_MODEL = "grok-imagine-video-1.5" DEFAULT_MODEL = DEFAULT_TEXT_TO_VIDEO_MODEL DEFAULT_DURATION = 8 DEFAULT_ASPECT_RATIO = "16:9" DEFAULT_RESOLUTION = "720p" DEFAULT_TIMEOUT_SECONDS = 240 DEFAULT_POLL_INTERVAL_SECONDS = 5 +DEFAULT_EXTEND_DURATION = 6 VALID_ASPECT_RATIOS = {"1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"} VALID_RESOLUTIONS = {"480p", "720p"} @@ -67,16 +66,20 @@ _MODELS: Dict[str, Dict[str, Any]] = { "price": "see https://docs.x.ai/developers/models/grok-imagine-video", "modalities": ["text", "image"], }, - "grok-imagine-video-1.5-preview": { - "display": "Grok Imagine Video 1.5 Preview", + "grok-imagine-video-1.5": { + "display": "Grok Imagine Video 1.5", "speed": "~60-240s", "strengths": "Latest xAI image-to-video model.", - "price": "see https://docs.x.ai/developers/models/grok-imagine-video-1.5-preview", + "price": "see https://docs.x.ai/developers/pricing", "modalities": ["image"], - "aliases": ["grok-imagine-video-1.5-2026-05-30"], }, } +_IMAGE_TO_VIDEO_COMPAT_MODEL_IDS = { + "grok-imagine-video-1.5-preview", + "grok-imagine-video-1.5-2026-05-30", +} + # --------------------------------------------------------------------------- # HTTP helpers @@ -145,21 +148,114 @@ def _image_ref_to_xai_url(value: str) -> str: return f"data:{mime};base64,{encoded}" -def _normalize_reference_images(reference_image_urls: Optional[List[str]]): - refs = [] +def _image_ref_to_xai_input(value: str) -> Optional[Dict[str, str]]: + ref = _image_ref_to_xai_url(value) + if not ref: + return None + lower = ref.lower() + if lower.startswith(("http://", "https://", "data:image/")): + return {"url": ref} + return None + + +def _xai_video_output_urls( + video: Dict[str, Any], +) -> Tuple[str, Optional[str], Optional[str]]: + """Return ``(public_video_url, temporary_url, stored_public_url)``. + + ``public_video_url`` is the stored files-cdn HTTPS MP4 (``public_url``) when + storage is enabled; otherwise xAI's temporary ``video.url``. Pass this value + as ``video_url`` for edit/extend chaining. + """ + file_output = video.get("file_output") if isinstance(video.get("file_output"), dict) else {} + file_output = file_output or {} + stored_public = file_output.get("public_url") + stored_public = stored_public.strip() if isinstance(stored_public, str) else None + temporary = video.get("url") + temporary = temporary.strip() if isinstance(temporary, str) else None + public_video_url = stored_public or temporary or "" + temporary_out = ( + temporary + if temporary and stored_public and temporary != stored_public + else None + ) + return public_video_url, temporary_out, stored_public + + +def _video_ref_to_xai_url(value: str) -> str: + """Return a URL/data URI accepted by xAI for video inputs.""" + ref = (value or "").strip() + if not ref: + return "" + lower = ref.lower() + if lower.startswith(("http://", "https://", "data:video/")): + return ref + + path = Path(ref).expanduser() + if not path.is_file(): + return ref + + mime = mimetypes.guess_type(path.name)[0] or "video/mp4" + if not mime.startswith("video/"): + return ref + + encoded = base64.b64encode(path.read_bytes()).decode("ascii") + return f"data:{mime};base64,{encoded}" + + +async def _video_input_from_public_url( + value: str, + *, + api_key: str, + base_url: str, +) -> Optional[Dict[str, str]]: + """Build xAI ``video`` input using a public HTTPS URL (``url`` field only).""" + ref = (value or "").strip() + if not ref: + return None + + path = Path(ref).expanduser() + if path.is_file(): + data_ref = _video_ref_to_xai_url(ref) + return {"url": data_ref} if data_ref else None + + lower = ref.lower() + if not lower.startswith(("http://", "https://")): + return None + + return {"url": ref} + + +def _normalize_reference_images( + reference_image_urls: Optional[List[str]], +) -> Tuple[Optional[List[Dict[str, str]]], Optional[str]]: + refs: List[Dict[str, str]] = [] for url in reference_image_urls or []: - normalized = _image_ref_to_xai_url(url) - if normalized: - refs.append({"url": normalized}) - return refs or None + cleaned = (url or "").strip() + if not cleaned: + continue + normalized = _image_ref_to_xai_input(cleaned) + if not normalized: + return None, ( + "reference_image_urls must be public HTTPS URLs or data URIs " + "(e.g. the `image`/`public_url` from a prior Imagine result)" + ) + refs.append(normalized) + return (refs if refs else None), None -def _clamp_duration(duration: Optional[int], has_reference_images: bool) -> int: - value = duration if duration is not None else DEFAULT_DURATION +def _clamp_duration( + duration: Optional[int], + *, + has_reference_images: bool = False, + max_seconds: int = 15, + default: int = DEFAULT_DURATION, +) -> int: + value = duration if duration is not None else default if value < 1: value = 1 - if value > 15: - value = 15 + if value > max_seconds: + value = max_seconds if has_reference_images and value > 10: value = 10 return value @@ -173,7 +269,7 @@ def _resolve_model_for_modality( ) -> str: """Select xAI's text/video model without treating config as a prompt override. - ``grok-imagine-video-1.5-preview`` currently rejects text-only video + ``grok-imagine-video-1.5`` currently rejects text-only video generation, but it is the desired image-to-video backend. Explicit tool ``model=`` still wins for users who intentionally request another model. """ @@ -182,7 +278,7 @@ def _resolve_model_for_modality( return requested if modality == "image": return DEFAULT_IMAGE_TO_VIDEO_MODEL - if requested == DEFAULT_IMAGE_TO_VIDEO_MODEL: + if requested == DEFAULT_IMAGE_TO_VIDEO_MODEL or requested in _IMAGE_TO_VIDEO_COMPAT_MODEL_IDS: return DEFAULT_TEXT_TO_VIDEO_MODEL return requested or DEFAULT_TEXT_TO_VIDEO_MODEL @@ -193,11 +289,11 @@ async def _submit( *, api_key: str, base_url: str, + endpoint: str = "generations", ) -> str: - """POST to /videos/generations β€” xAI's only public endpoint for our - text-to-video and image-to-video surface.""" + """POST to one of xAI's async video endpoints and return request_id.""" response = await client.post( - f"{base_url}/videos/generations", + f"{base_url}/videos/{endpoint}", headers={**_xai_headers(api_key), "x-idempotency-key": str(uuid.uuid4())}, json=payload, timeout=60, @@ -248,7 +344,7 @@ async def _poll( class XAIVideoGenProvider(VideoGenProvider): - """xAI Grok Imagine video backend (text-to-video + image-to-video).""" + """xAI Grok Imagine video backend.""" @property def name(self) -> str: @@ -275,10 +371,25 @@ class XAIVideoGenProvider(VideoGenProvider): # Grok OAuth (SuperGrok / Premium+) β€” TTS / image gen / video gen # all share the same credential resolver. The hook offers an # OAuth-vs-API-key choice when neither is configured. + try: + from tools.xai_http import xai_storage_notice_text + + storage_notice = xai_storage_notice_text("video_gen") + except Exception: + storage_notice = "" + tag = ( + "grok-imagine-video for text/reference; " + "grok-imagine-video-1.5 for image-to-video; " + "edit/extend: pass the stored public HTTPS MP4 (`video` / " + "`public_url` from a prior Imagine result); uses xAI Grok OAuth " + "or XAI_API_KEY" + ) + if storage_notice: + tag += f". {storage_notice}" return { "name": "xAI Grok Imagine", "badge": "paid", - "tag": "grok-imagine-video for text-to-video; grok-imagine-video-1.5-preview for image-to-video; uses xAI Grok OAuth or XAI_API_KEY", + "tag": tag, "env_vars": [], "post_setup": "xai_grok", } @@ -310,189 +421,479 @@ class XAIVideoGenProvider(VideoGenProvider): seed: Optional[int] = None, **kwargs: Any, ) -> Dict[str, Any]: - try: - loop = asyncio.new_event_loop() - try: - return loop.run_until_complete(self._generate_async( - prompt=prompt, - model=model, - explicit_model=bool(kwargs.get("_model_override_explicit")), - image_url=image_url, - reference_image_urls=reference_image_urls, - duration=duration, - aspect_ratio=aspect_ratio, - resolution=resolution, - )) - finally: - loop.close() - except Exception as exc: - logger.warning("xAI video gen unexpected failure: %s", exc, exc_info=True) - return error_response( - error=f"xAI video generation failed: {exc}", - error_type="api_error", - provider="xai", - model=model or DEFAULT_MODEL, - prompt=prompt, - aspect_ratio=aspect_ratio, - ) - - async def _generate_async( - self, - *, - prompt: str, - model: Optional[str], - explicit_model: bool, - image_url: Optional[str], - reference_image_urls: Optional[List[str]], - duration: Optional[int], - aspect_ratio: str, - resolution: str, - ) -> Dict[str, Any]: - api_key, base_url = _resolve_xai_credentials() - if not api_key: - return error_response( - error=( - "No xAI credentials found. Sign in via `hermes auth add xai-oauth` " - "(SuperGrok / Premium+) or set XAI_API_KEY from " - "https://console.x.ai/." - ), - error_type="auth_required", - provider="xai", prompt=prompt, - ) - - prompt = (prompt or "").strip() - image_url_norm = _image_ref_to_xai_url(image_url or "") or None - normalized_aspect_ratio = (aspect_ratio or DEFAULT_ASPECT_RATIO).strip() - normalized_resolution = (resolution or DEFAULT_RESOLUTION).strip().lower() - modality_used = "image" if image_url_norm else "text" - resolved_model = _resolve_model_for_modality( - model, - modality=modality_used, - explicit_model=explicit_model, + return run_xai_video_generation( + prompt=prompt, + model=model, + explicit_model=bool(kwargs.get("_model_override_explicit")), + image_url=image_url, + reference_image_urls=reference_image_urls, + duration=duration, + aspect_ratio=aspect_ratio, + resolution=resolution, ) - if not prompt: + +def has_xai_video_credentials() -> bool: + api_key, _ = _resolve_xai_credentials() + return bool(api_key) + + +def run_xai_video_generation( + *, + prompt: str, + model: Optional[str], + explicit_model: bool, + image_url: Optional[str], + reference_image_urls: Optional[List[str]], + duration: Optional[int], + aspect_ratio: str, + resolution: str, +) -> Dict[str, Any]: + return _run_xai_video_coroutine( + _generate_xai_video_async( + prompt=prompt, + model=model, + explicit_model=explicit_model, + image_url=image_url, + reference_image_urls=reference_image_urls, + duration=duration, + aspect_ratio=aspect_ratio, + resolution=resolution, + ), + operation_label="generation", + model=model, + prompt=prompt, + aspect_ratio=aspect_ratio, + ) + + +def run_xai_video_edit( + *, + prompt: str, + video_url: str, + model: Optional[str] = None, +) -> Dict[str, Any]: + return _run_xai_video_coroutine( + _edit_xai_video_async(prompt=prompt, video_url=video_url, model=model), + operation_label="edit", + model=model, + prompt=prompt, + aspect_ratio=DEFAULT_ASPECT_RATIO, + ) + + +def run_xai_video_extend( + *, + prompt: str, + video_url: str, + duration: Optional[int] = None, + model: Optional[str] = None, +) -> Dict[str, Any]: + return _run_xai_video_coroutine( + _extend_xai_video_async( + prompt=prompt, + video_url=video_url, + duration=duration, + model=model, + ), + operation_label="extend", + model=model, + prompt=prompt, + aspect_ratio=DEFAULT_ASPECT_RATIO, + ) + + +def _run_xai_video_coroutine( + coro, + *, + operation_label: str, + model: Optional[str], + prompt: str, + aspect_ratio: str, +) -> Dict[str, Any]: + try: + loop = asyncio.new_event_loop() + try: + return loop.run_until_complete(coro) + finally: + loop.close() + except Exception as exc: + logger.warning("xAI video %s unexpected failure: %s", operation_label, exc, exc_info=True) + return error_response( + error=f"xAI video {operation_label} failed: {exc}", + error_type="api_error", + provider="xai", + model=model or DEFAULT_MODEL, + prompt=prompt, + aspect_ratio=aspect_ratio, + ) + + +async def _generate_xai_video_async( + *, + prompt: str, + model: Optional[str], + explicit_model: bool, + image_url: Optional[str], + reference_image_urls: Optional[List[str]], + duration: Optional[int], + aspect_ratio: str, + resolution: str, +) -> Dict[str, Any]: + api_key, base_url = _resolve_xai_credentials() + if not api_key: + return _auth_required_response(prompt) + + prompt = (prompt or "").strip() + image_input = None + if (image_url or "").strip(): + image_input = _image_ref_to_xai_input(image_url) + if not image_input: return error_response( error=( - "prompt is required for xAI video generation " - "(text-to-video or image-to-video)" + "image_url must be a public HTTPS URL or data URI " + "(e.g. the `image`/`public_url` from a prior Imagine result)" ), - error_type="missing_prompt", - provider="xai", prompt=prompt, - ) - - refs = _normalize_reference_images(reference_image_urls) - if refs and len(refs) > MAX_REFERENCE_IMAGES: - return error_response( - error=f"reference_image_urls supports at most {MAX_REFERENCE_IMAGES} images on xAI", - error_type="too_many_references", - provider="xai", prompt=prompt, - ) - if image_url_norm and refs: - return error_response( - error="image_url and reference_image_urls cannot be combined on xAI", - error_type="conflicting_inputs", - provider="xai", prompt=prompt, - ) - - clamped_duration = _clamp_duration(duration, has_reference_images=bool(refs)) - - if normalized_aspect_ratio not in VALID_ASPECT_RATIOS: - normalized_aspect_ratio = DEFAULT_ASPECT_RATIO - if normalized_resolution not in VALID_RESOLUTIONS: - normalized_resolution = DEFAULT_RESOLUTION - - payload: Dict[str, Any] = { - "model": resolved_model, - "prompt": prompt, - "duration": clamped_duration, - "aspect_ratio": normalized_aspect_ratio, - "resolution": normalized_resolution, - } - if image_url_norm: - payload["image"] = {"url": image_url_norm} - if refs: - payload["reference_images"] = refs - - async with httpx.AsyncClient() as client: - try: - request_id = await _submit( - client, payload, api_key=api_key, base_url=base_url - ) - except httpx.HTTPStatusError as exc: - detail = "" - try: - detail = exc.response.text[:500] - except Exception: - pass - return error_response( - error=f"xAI submit failed ({exc.response.status_code}): {detail or exc}", - error_type="api_error", - provider="xai", - model=resolved_model, - prompt=prompt, - ) - - poll_result = await _poll( - client, request_id, - api_key=api_key, base_url=base_url, - timeout_seconds=DEFAULT_TIMEOUT_SECONDS, - poll_interval=DEFAULT_POLL_INTERVAL_SECONDS, - ) - - status = poll_result["status"] - body = poll_result["body"] - - if status == "done": - video = body.get("video") or {} - url = video.get("url") - if not url: - return error_response( - error="xAI video generation completed without a video URL", - error_type="empty_response", - provider="xai", - model=body.get("model") or resolved_model, - prompt=prompt, - ) - extra: Dict[str, Any] = { - "request_id": request_id, - "resolution": normalized_resolution, - } - if body.get("usage"): - extra["usage"] = body["usage"] - return success_response( - video=url, - model=body.get("model") or resolved_model, - prompt=prompt, - modality=modality_used, - aspect_ratio=normalized_aspect_ratio, - duration=video.get("duration") or clamped_duration, + error_type="invalid_image_url", provider="xai", - extra=extra, + prompt=prompt, ) + normalized_aspect_ratio = (aspect_ratio or DEFAULT_ASPECT_RATIO).strip() + normalized_resolution = (resolution or DEFAULT_RESOLUTION).strip().lower() + refs, refs_error = _normalize_reference_images(reference_image_urls) + if refs_error: + return error_response( + error=refs_error, + error_type="invalid_reference_image_urls", + provider="xai", + prompt=prompt, + ) - if status == "timeout": + if not prompt: + return error_response( + error="prompt is required for xAI video generation", + error_type="missing_prompt", + provider="xai", prompt=prompt, + ) + if refs and len(refs) > MAX_REFERENCE_IMAGES: + return error_response( + error=f"reference_image_urls supports at most {MAX_REFERENCE_IMAGES} images on xAI", + error_type="too_many_references", + provider="xai", prompt=prompt, + ) + if image_input and refs: + return error_response( + error="image_url and reference_image_urls cannot be combined on xAI", + error_type="conflicting_inputs", + provider="xai", prompt=prompt, + ) + + if normalized_aspect_ratio not in VALID_ASPECT_RATIOS: + normalized_aspect_ratio = DEFAULT_ASPECT_RATIO + if normalized_resolution not in VALID_RESOLUTIONS: + normalized_resolution = DEFAULT_RESOLUTION + + modality_used = "reference" if refs else ("image" if image_input else "text") + resolved_model = _resolve_model_for_modality( + model, + modality=modality_used, + explicit_model=explicit_model, + ) + if refs and resolved_model != DEFAULT_TEXT_TO_VIDEO_MODEL: + if explicit_model: return error_response( - error=f"Timed out waiting for video generation after {DEFAULT_TIMEOUT_SECONDS}s", - error_type="timeout", + error=( + "xAI reference-to-video requires " + f"{DEFAULT_TEXT_TO_VIDEO_MODEL}; got {resolved_model}" + ), + error_type="unsupported_model", + provider="xai", + model=resolved_model, + prompt=prompt, + ) + resolved_model = DEFAULT_TEXT_TO_VIDEO_MODEL + + clamped_duration = _clamp_duration(duration, has_reference_images=bool(refs)) + payload = { + "model": resolved_model, + "prompt": prompt, + "duration": clamped_duration, + "aspect_ratio": normalized_aspect_ratio, + "resolution": normalized_resolution, + } + if image_input: + payload["image"] = image_input + if refs: + payload["reference_images"] = refs + + return await _submit_xai_video_payload( + api_key=api_key, + base_url=base_url, + endpoint="generations", + payload=payload, + prompt=prompt, + resolved_model=resolved_model, + modality=modality_used, + aspect_ratio=normalized_aspect_ratio, + duration=clamped_duration, + operation="generate", + resolution=normalized_resolution, + ) + + +async def _run_xai_video_mutation( + *, + prompt: str, + video_url: str, + model: Optional[str], + endpoint: str, + operation: str, + duration: int, +) -> Dict[str, Any]: + """Edit or extend using a public HTTPS ``video_url`` input (``url`` on the wire).""" + api_key, base_url = _resolve_xai_credentials() + if not api_key: + return _auth_required_response(prompt) + + prompt = (prompt or "").strip() + video_input = await _video_input_from_public_url( + video_url or "", + api_key=api_key, + base_url=base_url, + ) + if not prompt: + return error_response( + error="prompt is required for xAI video edit/extend", + error_type="missing_prompt", + provider="xai", + prompt=prompt, + ) + if not video_input: + return error_response( + error=( + "video_url must be a public HTTPS MP4 URL " + "(the `video`/`public_url` from a prior Imagine result)" + ), + error_type="missing_video", + provider="xai", + prompt=prompt, + ) + + resolved_model = _resolve_model_for_modality( + model, + modality="text", + explicit_model=bool(model), + ) + payload: Dict[str, Any] = { + "model": resolved_model, + "prompt": prompt, + "video": video_input, + } + if endpoint == "extensions": + payload["duration"] = duration + + return await _submit_xai_video_payload( + api_key=api_key, + base_url=base_url, + endpoint=endpoint, + payload=payload, + prompt=prompt, + resolved_model=resolved_model, + modality=operation, + aspect_ratio=DEFAULT_ASPECT_RATIO, + duration=duration, + operation=operation, + ) + + +async def _edit_xai_video_async( + *, + prompt: str, + video_url: str, + model: Optional[str], +) -> Dict[str, Any]: + return await _run_xai_video_mutation( + prompt=prompt, + video_url=video_url, + model=model, + endpoint="edits", + operation="edit", + duration=DEFAULT_DURATION, + ) + + +async def _extend_xai_video_async( + *, + prompt: str, + video_url: str, + duration: Optional[int], + model: Optional[str], +) -> Dict[str, Any]: + clamped_duration = _clamp_duration( + duration, + max_seconds=10, + default=DEFAULT_EXTEND_DURATION, + ) + return await _run_xai_video_mutation( + prompt=prompt, + video_url=video_url, + model=model, + endpoint="extensions", + operation="extend", + duration=clamped_duration, + ) + + +def _auth_required_response(prompt: str) -> Dict[str, Any]: + return error_response( + error=( + "No xAI credentials found. Sign in via `hermes auth add xai-oauth` " + "(SuperGrok / Premium+) or set XAI_API_KEY from " + "https://console.x.ai/." + ), + error_type="auth_required", + provider="xai", prompt=prompt, + ) + + +async def _submit_xai_video_payload( + *, + api_key: str, + base_url: str, + endpoint: str, + payload: Dict[str, Any], + prompt: str, + resolved_model: str, + modality: str, + aspect_ratio: str, + duration: int, + operation: str, + resolution: Optional[str] = None, +) -> Dict[str, Any]: + try: + from tools.xai_http import ( + build_xai_storage_options, + maybe_mark_xai_storage_notice_seen, + read_xai_imagine_storage_config, + ) + + storage_options = build_xai_storage_options( + "video_gen", + filename_prefix="hermes-xai-video", + extension="mp4", + ) + storage_notice = maybe_mark_xai_storage_notice_seen("video_gen") + storage_cfg = read_xai_imagine_storage_config("video_gen") + except Exception: + storage_options = None + storage_notice = None + storage_cfg = {"enabled": False} + if storage_options is not None: + payload["storage_options"] = storage_options + + async with httpx.AsyncClient() as client: + try: + request_id = await _submit( + client, payload, api_key=api_key, base_url=base_url, + endpoint=endpoint, + ) + except httpx.HTTPStatusError as exc: + detail = "" + try: + detail = exc.response.text[:500] + except Exception: + pass + return error_response( + error=f"xAI submit failed ({exc.response.status_code}): {detail or exc}", + error_type="api_error", provider="xai", model=resolved_model, prompt=prompt, ) - message = ( - (body.get("error", {}) or {}).get("message") - or body.get("message") - or f"xAI video generation ended with status '{status}'" + poll_result = await _poll( + client, request_id, + api_key=api_key, base_url=base_url, + timeout_seconds=DEFAULT_TIMEOUT_SECONDS, + poll_interval=DEFAULT_POLL_INTERVAL_SECONDS, ) + + status = poll_result["status"] + body = poll_result["body"] + + if status == "done": + video = body.get("video") or {} + if not isinstance(video, dict): + video = {} + file_output = video.get("file_output") if isinstance(video.get("file_output"), dict) else {} + file_output = file_output or {} + public_video_url, temporary_url, stored_public_url = _xai_video_output_urls(video) + if not public_video_url: + return error_response( + error="xAI video request completed without a video URL", + error_type="empty_response", + provider="xai", + model=body.get("model") or resolved_model, + prompt=prompt, + ) + extra: Dict[str, Any] = { + "request_id": request_id, + "operation": operation, + "storage_enabled": bool(storage_cfg.get("enabled")), + } + if resolution: + extra["resolution"] = resolution + if storage_notice: + extra["storage_notice"] = storage_notice + if stored_public_url: + extra["public_url"] = stored_public_url + if temporary_url: + extra["temporary_url"] = temporary_url + if file_output: + for key in ( + "filename", + "expires_at", + "public_url_expires_at", + "public_url_error", + "storage_error", + ): + if key in file_output: + extra[key] = file_output[key] + if body.get("usage"): + extra["usage"] = body["usage"] + return success_response( + video=public_video_url, + model=body.get("model") or resolved_model, + prompt=prompt, + modality=modality, + aspect_ratio=aspect_ratio, + duration=video.get("duration") or duration, + provider="xai", + extra=extra, + ) + + if status == "timeout": return error_response( - error=message, - error_type=f"xai_{status}", + error=f"Timed out waiting for xAI video request after {DEFAULT_TIMEOUT_SECONDS}s", + error_type="timeout", provider="xai", model=resolved_model, prompt=prompt, ) + message = ( + (body.get("error", {}) or {}).get("message") + or body.get("message") + or f"xAI video request ended with status '{status}'" + ) + return error_response( + error=message, + error_type=f"xai_{status}", + provider="xai", + model=resolved_model, + prompt=prompt, + ) + # --------------------------------------------------------------------------- # Plugin entry point diff --git a/plugins/video_gen/xai/plugin.yaml b/plugins/video_gen/xai/plugin.yaml index 5e3e8b1ac21..3fec62e08fa 100644 --- a/plugins/video_gen/xai/plugin.yaml +++ b/plugins/video_gen/xai/plugin.yaml @@ -1,6 +1,6 @@ name: xai version: 1.0.0 -description: "xAI Grok Imagine video generation backend. Supports text-to-video, image-to-video, and reference-image-guided generation via the xAI async videos API." +description: "xAI Grok Imagine video generation backend. Supports text-to-video, image-to-video, reference-to-video, video editing, video extension, and stored public URLs via the xAI async videos API." author: NousResearch kind: backend requires_env: diff --git a/tests/plugins/image_gen/test_xai_provider.py b/tests/plugins/image_gen/test_xai_provider.py index d1e7beab15e..cf9708dae1d 100644 --- a/tests/plugins/image_gen/test_xai_provider.py +++ b/tests/plugins/image_gen/test_xai_provider.py @@ -16,9 +16,17 @@ import pytest @pytest.fixture(autouse=True) -def _fake_api_key(monkeypatch): +def _fake_api_key(monkeypatch, tmp_path): """Ensure XAI_API_KEY is set for all tests.""" monkeypatch.setenv("XAI_API_KEY", "test-key-12345") + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + try: + import hermes_cli.config as cfg_mod + + if hasattr(cfg_mod, "_invalidate_load_config_cache"): + cfg_mod._invalidate_load_config_cache() + except Exception: + pass # --------------------------------------------------------------------------- @@ -80,6 +88,13 @@ class TestXAIImageGenProvider: assert schema["env_vars"] == [] assert schema["post_setup"] == "xai_grok" + def test_capabilities_expose_total_source_image_limit(self): + from plugins.image_gen.xai import XAIImageGenProvider + + caps = XAIImageGenProvider().capabilities() + assert caps["max_reference_images"] == 2 + assert caps["max_source_images"] == 3 + # --------------------------------------------------------------------------- # Config tests @@ -318,6 +333,131 @@ class TestGenerate: f"resolution must be the literal '1k' or '2k', got {payload['resolution']!r}" ) + def test_image_edit_rejects_bare_file_id_input(self): + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = {"data": [{"url": "https://xai.image/edited.png"}]} + + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \ + patch("plugins.image_gen.xai.save_url_image", return_value="/tmp/edited.png"): + provider = XAIImageGenProvider() + result = provider.generate( + prompt="make the robot red", + image_url="file_03eb65b1-aa97-482f-9ef0-b04f9172ea00", + ) + + assert result["success"] is False + assert result["error_type"] == "invalid_image_url" + mock_post.assert_not_called() + + def test_image_edit_accepts_public_https_url(self): + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = {"data": [{"url": "https://xai.image/edited.png"}]} + + public_url = "https://files-cdn.x.ai/token/file_abc.png" + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \ + patch("plugins.image_gen.xai.save_url_image", return_value="/tmp/edited.png"): + provider = XAIImageGenProvider() + result = provider.generate( + prompt="make the robot red", + image_url=public_url, + ) + + assert result["success"] is True + payload = mock_post.call_args.kwargs.get("json") or mock_post.call_args[1].get("json") + assert payload["image"] == {"url": public_url, "type": "image_url"} + + def test_multi_image_edit_rejects_bare_file_id_inputs(self): + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = {"data": [{"url": "https://xai.image/edited.png"}]} + + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \ + patch("plugins.image_gen.xai.save_url_image", return_value="/tmp/edited.png"): + provider = XAIImageGenProvider() + result = provider.generate( + prompt="combine these robots into one product shot", + image_url="file_03eb65b1-aa97-482f-9ef0-b04f9172ea00", + reference_image_urls=[ + "file_54b48d6d-28ad-4982-9d72-bd3ac677c9bc", + "file_aa11bb22-cc33-44dd-88ee-ff0011223344", + ], + ) + + assert result["success"] is False + assert result["error_type"] == "invalid_image_url" + mock_post.assert_not_called() + + def test_multi_image_edit_rejects_more_than_three_sources(self): + from plugins.image_gen.xai import XAIImageGenProvider + + provider = XAIImageGenProvider() + result = provider.generate( + prompt="combine too many references", + image_url="file_1", + reference_image_urls=["file_2", "file_3", "file_4"], + ) + + assert result["success"] is False + assert result["error_type"] == "too_many_references" + + def test_storage_options_are_sent_by_default(self): + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = {"data": [{"b64_json": "dGVzdA=="}]} + + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \ + patch("plugins.image_gen.xai.save_b64_image", return_value="/tmp/test.png"): + provider = XAIImageGenProvider() + provider.generate(prompt="test") + + payload = mock_post.call_args.kwargs.get("json") or mock_post.call_args[1].get("json") + assert payload["storage_options"]["public_url"] is True + assert "expires_after" not in payload["storage_options"] + assert payload["storage_options"]["filename"].endswith(".png") + + def test_public_url_file_output_wins_over_temporary_url(self): + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = { + "data": [{ + "url": "https://imgen.x.ai/xai-tmp-imgen-test.jpeg", + "file_output": { + "file_id": "file-123", + "filename": "stored.png", + "public_url": "https://xai-files.example/stored.png", + "public_url_expires_at": 1234567890, + }, + }], + } + + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp), \ + patch("plugins.image_gen.xai.save_url_image") as mock_save_url: + provider = XAIImageGenProvider() + result = provider.generate(prompt="A cat playing piano") + + assert result["success"] is True + assert result["image"] == "https://xai-files.example/stored.png" + assert result["public_url"] == "https://xai-files.example/stored.png" + assert "file_id" not in result + mock_save_url.assert_not_called() + # --------------------------------------------------------------------------- # Registration test @@ -334,3 +474,21 @@ class TestRegistration: provider = mock_ctx.register_image_gen_provider.call_args[0][0] assert isinstance(provider, XAIImageGenProvider) assert provider.name == "xai" + + +def test_xai_image_field_expands_user_home(tmp_path, monkeypatch): + """A ~-prefixed local image path must load (expanduser), not raise io_error. + + Pre-flight validation uses ``Path(source).expanduser()`` so a ``~/...`` path + passes; ``_xai_image_field`` must expand it too or the load fails spuriously. + """ + from plugins.image_gen.xai import _xai_image_field + + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("USERPROFILE", str(tmp_path)) + img = tmp_path / "pic.png" + img.write_bytes(b"\x89PNG\r\n\x1a\n") + + field = _xai_image_field("~/pic.png") + assert field["type"] == "image_url" + assert field["url"].startswith("data:image/png;base64,") diff --git a/tests/plugins/video_gen/test_xai_plugin.py b/tests/plugins/video_gen/test_xai_plugin.py index 3df9086afab..eb495b96951 100644 --- a/tests/plugins/video_gen/test_xai_plugin.py +++ b/tests/plugins/video_gen/test_xai_plugin.py @@ -32,9 +32,9 @@ def test_xai_provider_lists_text_and_current_image_video_models(): ids = [model["id"] for model in models] assert ids[0] == "grok-imagine-video" - assert ids[1] == "grok-imagine-video-1.5-preview" + assert ids[1] == "grok-imagine-video-1.5" assert models[1]["modalities"] == ["image"] - assert models[1]["aliases"] == ["grok-imagine-video-1.5-2026-05-30"] + assert "aliases" not in models[1] def test_xai_routes_default_models_by_modality(): @@ -49,7 +49,7 @@ def test_xai_routes_default_models_by_modality(): "grok-imagine-video", modality="image", explicit_model=False, - ) == "grok-imagine-video-1.5-preview" + ) == "grok-imagine-video-1.5" assert _resolve_model_for_modality( "grok-imagine-video-1.5-preview", modality="text", @@ -62,15 +62,11 @@ def test_xai_routes_default_models_by_modality(): ) == "grok-imagine-video-1.5-preview" -def test_xai_capabilities_text_and_image_only(): - """xAI was previously advertised with edit/extend operations. The - simplified surface only exposes text-to-video and image-to-video β€” - confirm those are the only modalities advertised.""" +def test_xai_capabilities_keep_generate_surface_only(): from plugins.video_gen.xai import XAIVideoGenProvider caps = XAIVideoGenProvider().capabilities() assert caps["modalities"] == ["text", "image"] - # No 'operations' key in the simplified surface assert "operations" not in caps assert caps["max_reference_images"] == 7 @@ -148,3 +144,45 @@ def test_xai_no_operation_kwarg(): assert result["success"] is False # auth_required, NOT some signature error assert result["error_type"] in {"auth_required", "api_error"} + + +def test_xai_video_output_urls_prefers_stored_public_url(): + from plugins.video_gen.xai import _xai_video_output_urls + + public_url, temporary, stored = _xai_video_output_urls({ + "url": "https://vidgen.x.ai/xai-vidgen-bucket/out.mp4", + "file_output": { + "public_url": "https://files-cdn.x.ai/token/file_abc.mp4", + "file_id": "file_abc", + }, + }) + assert public_url == "https://files-cdn.x.ai/token/file_abc.mp4" + assert stored == "https://files-cdn.x.ai/token/file_abc.mp4" + assert temporary == "https://vidgen.x.ai/xai-vidgen-bucket/out.mp4" + + +@pytest.mark.asyncio +async def test_video_input_from_public_url_uses_url_field(): + from plugins.video_gen.xai import _video_input_from_public_url + + url = "https://files-cdn.x.ai/kRQVP6PRQlioVAUNC3GAdg/file_1faca9c3-9411-46ad-bb41-b9b8527789e6.mp4" + result = await _video_input_from_public_url( + url, + api_key="test-key", + base_url="https://api.x.ai/v1", + ) + assert result == {"url": url} + + +def test_video_input_from_public_url_rejects_bare_file_id(): + import asyncio + from plugins.video_gen.xai import _video_input_from_public_url + + result = asyncio.run( + _video_input_from_public_url( + "file_1faca9c3-9411-46ad-bb41-b9b8527789e6", + api_key="test-key", + base_url="https://api.x.ai/v1", + ) + ) + assert result is None diff --git a/tests/plugins/video_gen/test_xai_plugin_integration.py b/tests/plugins/video_gen/test_xai_plugin_integration.py index 22693d763e9..5a7c9930ed1 100644 --- a/tests/plugins/video_gen/test_xai_plugin_integration.py +++ b/tests/plugins/video_gen/test_xai_plugin_integration.py @@ -122,7 +122,7 @@ class TestXAIPayload: provider, captured = xai_provider provider.generate("animate this", image_url="https://example.com/cat.png") payload = _last_post(captured)["json"] - assert payload["model"] == "grok-imagine-video-1.5-preview" + assert payload["model"] == "grok-imagine-video-1.5" assert payload["image"] == {"url": "https://example.com/cat.png"} def test_local_image_path_is_sent_as_data_uri(self, xai_provider, tmp_path): @@ -133,7 +133,7 @@ class TestXAIPayload: provider.generate("animate this", image_url=str(image_path)) payload = _last_post(captured)["json"] - assert payload["model"] == "grok-imagine-video-1.5-preview" + assert payload["model"] == "grok-imagine-video-1.5" assert payload["image"]["url"].startswith("data:image/png;base64,") def test_explicit_model_override_is_honored_for_image(self, xai_provider): diff --git a/tests/tools/test_video_generation_dispatch.py b/tests/tools/test_video_generation_dispatch.py index 36551acbe02..0c4ded193a5 100644 --- a/tests/tools/test_video_generation_dispatch.py +++ b/tests/tools/test_video_generation_dispatch.py @@ -35,6 +35,9 @@ class _RecordingProvider(VideoGenProvider): def default_model(self) -> Optional[str]: return "model-a" + def capabilities(self) -> Dict[str, Any]: + return {"modalities": ["text", "image"]} + def generate(self, prompt, **kwargs): self.last_kwargs = {"prompt": prompt, **kwargs} modality = "image" if kwargs.get("image_url") else "text" @@ -113,14 +116,25 @@ class TestUnifiedDispatch: assert "error" in result assert "prompt" in result["error"].lower() + def test_edit_extend_args_are_rejected_by_generate_tool(self): + provider = _RecordingProvider("rec") + video_gen_registry.register_provider(provider) + result = self._run({ + "prompt": "make it rain", + "operation": "edit", + "video_url": "https://example.com/in.mp4", + }) + assert "error" in result + assert "provider-specific tool" in result["error"] + def test_provider_exception_caught(self): video_gen_registry.register_provider(_RaisingProvider()) result = self._run({"prompt": "x"}) assert result["success"] is False assert result["error_type"] == "provider_exception" - def test_operation_field_not_in_schema(self): - """Make sure we removed the operation field from the schema.""" + def test_edit_extend_fields_not_in_schema(self): from tools.video_generation_tool import VIDEO_GENERATE_SCHEMA - assert "operation" not in VIDEO_GENERATE_SCHEMA["parameters"]["properties"] - assert "video_url" not in VIDEO_GENERATE_SCHEMA["parameters"]["properties"] + props = VIDEO_GENERATE_SCHEMA["parameters"]["properties"] + assert "operation" not in props + assert "video_url" not in props diff --git a/tests/tools/test_video_generation_dynamic_schema.py b/tests/tools/test_video_generation_dynamic_schema.py index 590215468b5..a9565dab3e9 100644 --- a/tests/tools/test_video_generation_dynamic_schema.py +++ b/tests/tools/test_video_generation_dynamic_schema.py @@ -1,4 +1,4 @@ -"""Tests for the dynamic schema builder under the simplified surface.""" +"""Tests for the dynamic schema builder.""" from __future__ import annotations @@ -91,20 +91,13 @@ class TestDynamicSchemaBuilder: assert "No video backend is configured" in desc assert "hermes tools" in desc - def test_does_not_mention_edit_or_extend(self, cfg_home): - """The simplified surface only does textβ†’video and imageβ†’video. - The description must not mention edit/extend anywhere.""" + def test_generic_description_keeps_edit_extend_out_of_surface(self, cfg_home): from tools.video_generation_tool import _build_dynamic_video_schema, _GENERIC_DESCRIPTION desc = _build_dynamic_video_schema()["description"] - # Block words that would suggest functionality we removed - assert "edit" not in desc.lower() or "audio" in desc.lower() # 'audio' contains 'audi' not 'edit' - # Stronger: no occurrence of the words "edit" or "extend" as standalone - for forbidden in (" edit ", " edits ", " extend ", " extends "): - assert forbidden not in desc.lower(), f"description leaks '{forbidden.strip()}'" - # Sanity: the generic blurb itself is also clean - for forbidden in ("edit", "extend"): - assert forbidden not in _GENERIC_DESCRIPTION.lower() + assert "Video edit/extend workflows are not part of this unified surface" in desc + assert "operation='edit'" not in _GENERIC_DESCRIPTION + assert "operation='extend'" not in _GENERIC_DESCRIPTION def test_both_modalities_advertises_auto_routing(self, cfg_home): from tools.video_generation_tool import _build_dynamic_video_schema @@ -123,7 +116,6 @@ class TestDynamicSchemaBuilder: assert "Active backend: Both" in desc assert "text-to-video" in desc and "image-to-video" in desc assert "routes automatically" in desc - # operations bullet is gone assert "operations supported" not in desc def test_image_only_model_warns_about_required_image_url(self, cfg_home): diff --git a/tests/tools/test_video_generation_tool_surface_matrix.py b/tests/tools/test_video_generation_tool_surface_matrix.py index dfe1c762bb0..a338b20a94d 100644 --- a/tests/tools/test_video_generation_tool_surface_matrix.py +++ b/tests/tools/test_video_generation_tool_surface_matrix.py @@ -79,10 +79,21 @@ def matrix_env(tmp_path, monkeypatch): xai_calls.append({"url": url, "json": json}) return _Resp({"request_id": "req-1"}) async def get(self, url, headers=None, timeout=None): + payload = xai_calls[-1]["json"] + storage_options = payload.get("storage_options") or {} return _Resp({ "status": "done", - "video": {"url": "https://xai-cdn/out.mp4", "duration": 8}, - "model": xai_calls[-1]["json"].get("model", "grok-imagine-video"), + "video": { + "url": "https://xai-cdn/out.mp4", + "duration": 8, + "file_output": { + "file_id": "file-123", + "filename": storage_options.get("filename", "out.mp4"), + "public_url": "https://xai-files.example/out.mp4", + "public_url_expires_at": 1234567890, + }, + }, + "model": payload.get("model", "grok-imagine-video"), }) import plugins.video_gen.xai as xai_plugin monkeypatch.setattr(xai_plugin.httpx, "AsyncClient", lambda: _Client()) @@ -100,7 +111,7 @@ def matrix_env(tmp_path, monkeypatch): return tmp_path, fal_calls, xai_calls -def _invoke_tool(home, cfg: dict, args: dict) -> dict: +def _invoke_tool(home, cfg: dict, args: dict, tool_name: str = "video_generate") -> dict: """Write config, invoke the registered tool handler, return parsed JSON.""" (home / "config.yaml").write_text(yaml.safe_dump(cfg)) import hermes_cli.config as cfg_mod @@ -108,9 +119,9 @@ def _invoke_tool(home, cfg: dict, args: dict) -> dict: cfg_mod._invalidate_load_config_cache() from tools.registry import discover_builtin_tools, registry - if "video_generate" not in registry._tools: + if tool_name not in registry._tools: discover_builtin_tools() - handler = registry._tools["video_generate"].handler + handler = registry._tools[tool_name].handler return json.loads(handler(args)) @@ -205,6 +216,11 @@ def test_xai_text_only_via_tool_surface(matrix_env): assert payload["model"] == "grok-imagine-video" assert "image" not in payload assert "reference_images" not in payload + assert payload["storage_options"]["public_url"] is True + assert "expires_after" not in payload["storage_options"] + assert result["video"] == "https://xai-files.example/out.mp4" + assert result["public_url"] == "https://xai-files.example/out.mp4" + assert result.get("temporary_url") == "https://xai-cdn/out.mp4" def test_xai_text_plus_image_via_tool_surface(matrix_env): @@ -222,10 +238,157 @@ def test_xai_text_plus_image_via_tool_surface(matrix_env): assert len(xai_calls) == 1 assert xai_calls[0]["url"].endswith("/videos/generations") payload = xai_calls[0]["json"] or {} - assert payload["model"] == "grok-imagine-video-1.5-preview" + assert payload["model"] == "grok-imagine-video-1.5" assert payload["image"] == {"url": "https://example.com/img.png"} +def test_xai_image_to_video_rejects_bare_file_id_via_tool_surface(matrix_env): + home, _, xai_calls = matrix_env + + result = _invoke_tool( + home, + {"video_gen": {"provider": "xai"}}, + { + "prompt": "animate this robot waving", + "image_url": "file_03eb65b1-aa97-482f-9ef0-b04f9172ea00", + }, + ) + assert result["success"] is False + assert result.get("error_type") == "invalid_image_url" + assert len(xai_calls) == 0 + + +def test_xai_reference_to_video_via_tool_surface(matrix_env): + home, _, xai_calls = matrix_env + + result = _invoke_tool( + home, + {"video_gen": {"provider": "xai"}}, + { + "prompt": "put the jacket from the reference on the runway model", + "reference_image_urls": [ + "https://example.com/model.png", + "https://example.com/jacket.png", + ], + "duration": 15, + }, + ) + assert result["success"] is True + assert result["modality"] == "reference" + assert result["provider"] == "xai" + + payload = xai_calls[0]["json"] or {} + assert xai_calls[0]["url"].endswith("/videos/generations") + assert payload["model"] == "grok-imagine-video" + assert payload["duration"] == 10 + assert payload["reference_images"] == [ + {"url": "https://example.com/model.png"}, + {"url": "https://example.com/jacket.png"}, + ] + + +def test_xai_reference_to_video_rejects_bare_file_ids_via_tool_surface(matrix_env): + home, _, xai_calls = matrix_env + + result = _invoke_tool( + home, + {"video_gen": {"provider": "xai"}}, + { + "prompt": "use these references for a robot product shot", + "reference_image_urls": [ + "file_03eb65b1-aa97-482f-9ef0-b04f9172ea00", + "file_54b48d6d-28ad-4982-9d72-bd3ac677c9bc", + ], + }, + ) + assert result["success"] is False + assert result.get("error_type") == "invalid_reference_image_urls" + assert len(xai_calls) == 0 + + +def test_xai_video_edit_via_tool_surface(matrix_env): + home, _, xai_calls = matrix_env + + result = _invoke_tool( + home, + {"video_gen": {"provider": "xai"}}, + { + "prompt": "make the sky stormy", + "video_url": "https://example.com/source.mp4", + }, + tool_name="xai_video_edit", + ) + assert result["success"] is True + assert result["modality"] == "edit" + + payload = xai_calls[0]["json"] or {} + assert xai_calls[0]["url"].endswith("/videos/edits") + assert payload["model"] == "grok-imagine-video" + assert payload["video"] == {"url": "https://example.com/source.mp4"} + assert "duration" not in payload + assert "aspect_ratio" not in payload + assert "resolution" not in payload + + +def test_xai_video_extend_via_tool_surface(matrix_env): + home, _, xai_calls = matrix_env + + result = _invoke_tool( + home, + {"video_gen": {"provider": "xai"}}, + { + "prompt": "the camera pulls back to reveal the city", + "video_url": "https://example.com/source.mp4", + "duration": 15, + }, + tool_name="xai_video_extend", + ) + assert result["success"] is True + assert result["modality"] == "extend" + + payload = xai_calls[0]["json"] or {} + assert xai_calls[0]["url"].endswith("/videos/extensions") + assert payload["model"] == "grok-imagine-video" + assert payload["video"] == {"url": "https://example.com/source.mp4"} + assert payload["duration"] == 10 + + +def test_xai_video_edit_rejects_bare_file_id_via_tool_surface(matrix_env): + home, _, xai_calls = matrix_env + + result = _invoke_tool( + home, + {"video_gen": {"provider": "xai"}}, + { + "prompt": "make the sky stormy", + "video_url": "file-123", + }, + tool_name="xai_video_edit", + ) + assert result.get("success") is not True + assert "error" in result + assert "url" in result["error"].lower() + assert len(xai_calls) == 0 + + +def test_xai_video_extend_rejects_bare_file_id_via_tool_surface(matrix_env): + home, _, xai_calls = matrix_env + + result = _invoke_tool( + home, + {"video_gen": {"provider": "xai"}}, + { + "prompt": "continue into a sunrise", + "video_url": "file_25ac1c31-d6d8-48b2-8504-a97d282310c4", + }, + tool_name="xai_video_extend", + ) + assert result.get("success") is not True + assert "error" in result + assert "url" in result["error"].lower() + assert len(xai_calls) == 0 + + def test_xai_explicit_model_override_via_tool_surface(matrix_env): home, _, xai_calls = matrix_env diff --git a/tests/tools/test_xai_http_storage.py b/tests/tools/test_xai_http_storage.py new file mode 100644 index 00000000000..536077f4292 --- /dev/null +++ b/tests/tools/test_xai_http_storage.py @@ -0,0 +1,132 @@ +"""Tests for xAI Imagine storage helper behavior.""" + +from __future__ import annotations + +import yaml + + +def _invalidate_config_cache(): + try: + import hermes_cli.config as cfg_mod + + if hasattr(cfg_mod, "_invalidate_load_config_cache"): + cfg_mod._invalidate_load_config_cache() + except Exception: + pass + + +def test_storage_defaults_to_permanent_public_urls(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _invalidate_config_cache() + + from tools.xai_http import build_xai_storage_options + + storage = build_xai_storage_options( + "image_gen", + filename_prefix="hermes-xai-image", + extension="png", + ) + + assert storage is not None + assert storage["public_url"] is True + assert "expires_after" not in storage + assert storage["filename"].startswith("hermes-xai-image-") + assert storage["filename"].endswith(".png") + + +def test_storage_can_be_disabled(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "video_gen": { + "xai": { + "storage": { + "enabled": False, + }, + }, + }, + })) + _invalidate_config_cache() + + from tools.xai_http import build_xai_storage_options, xai_storage_notice_text + + assert build_xai_storage_options( + "video_gen", + filename_prefix="hermes-xai-video", + extension="mp4", + ) is None + assert xai_storage_notice_text("video_gen") == "" + + +def test_storage_can_be_permanent(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "image_gen": { + "xai": { + "storage": { + "expires_after": "permanent", + }, + }, + }, + })) + _invalidate_config_cache() + + from tools.xai_http import build_xai_storage_options + + storage = build_xai_storage_options( + "image_gen", + filename_prefix="hermes-xai-image", + extension="png", + ) + + assert storage is not None + assert "expires_after" not in storage + + +def test_storage_can_use_finite_retention(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "image_gen": { + "xai": { + "storage": { + "expires_after": 172800, + }, + }, + }, + })) + _invalidate_config_cache() + + from tools.xai_http import build_xai_storage_options + + storage = build_xai_storage_options( + "image_gen", + filename_prefix="hermes-xai-image", + extension="png", + ) + + assert storage is not None + assert storage["expires_after"] == 172800 + + +def test_invalid_storage_retention_falls_back_to_bounded_ttl(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "video_gen": { + "xai": { + "storage": { + "expires_after": "definitely-not-a-duration", + }, + }, + }, + })) + _invalidate_config_cache() + + from tools.xai_http import build_xai_storage_options + + storage = build_xai_storage_options( + "video_gen", + filename_prefix="hermes-xai-video", + extension="mp4", + ) + + assert storage is not None + assert storage["expires_after"] == 172800 diff --git a/tools/video_generation_tool.py b/tools/video_generation_tool.py index 789ead6a054..fe20ca0de0b 100644 --- a/tools/video_generation_tool.py +++ b/tools/video_generation_tool.py @@ -18,13 +18,11 @@ Generation. Unified surface --------------- -One tool covers the common cases β€” text-to-video, image-to-video, video -edit, video extend β€” with a compact schema: +One tool covers the common cases - text-to-video, image-to-video, and +reference-to-video - with a compact schema: - prompt text instruction (required for generate/edit) - operation "generate" | "edit" | "extend" - image_url drives image-to-video when operation=generate - video_url source video for edit/extend + prompt text instruction (required) + image_url drives image-to-video reference_image_urls list, up to provider-declared cap duration seconds (provider clamps) aspect_ratio "16:9" | "9:16" | "1:1" | ... @@ -38,6 +36,9 @@ Providers ignore parameters they do not support. The tool layer does **lightweight** validation (type/required-prompt) and lets each provider do its own clamping inside :meth:`VideoGenProvider.generate` β€” that keeps the tool surface stable as new providers ship with different capabilities. + +Video edit and video extend are intentionally not exposed here; providers with +those workflows should expose separate tools. """ from __future__ import annotations @@ -80,21 +81,20 @@ VIDEO_GENERATE_SCHEMA: Dict[str, Any] = { "image_url": { "type": "string", "description": ( - "Optional public URL of a still image. When provided, " + "Optional public HTTPS URL of a still image. When provided, " "the active backend routes to its image-to-video " "endpoint (animate the image); when omitted, it routes " - "to text-to-video. Pass either a URL the user supplied " - "or a path/URL from the conversation." + "to text-to-video. For xAI chaining, use the `image` or " + "`public_url` HTTPS URL from a prior Imagine result." ), }, "reference_image_urls": { "type": "array", "items": {"type": "string"}, "description": ( - "Optional list of reference image URLs (style or " - "character refs). Only supported by some backends; " - "the active backend's description below indicates whether " - "this is honored and what the max is." + "Optional list of public HTTPS reference image URLs " + "(style or character refs). For xAI chaining, use " + "`image` or `public_url` from prior Imagine results." ), }, "duration": { @@ -324,6 +324,11 @@ def _handle_video_generate(args: Dict[str, Any], **_kw: Any) -> str: # endpoint but our surface always needs a prompt. if not prompt: return tool_error("prompt is required for video generation") + if "operation" in args or "video_url" in args: + return tool_error( + "video_generate only supports text-to-video, image-to-video, and " + "reference-to-video; use a provider-specific tool for video edit/extend" + ) # Resolve the active provider. configured = _read_configured_video_provider() @@ -398,13 +403,13 @@ def _handle_video_generate(args: Dict[str, Any], **_kw: Any) -> str: # Dynamic schema β€” reflect the active backend's actual capabilities # --------------------------------------------------------------------------- # -# Why dynamic: the user's configured backend determines which operations -# (generate/edit/extend), modalities (text / image / refs), aspect ratios, -# resolutions, durations, and audio/negative-prompt flags are real. A model -# that calls video_generate without knowing the active backend wastes a -# turn on something like "fal-ai/veo3.1/image-to-video requires image_url". -# Surfacing the per-model surface in the description means the model -# usually gets the call right on the first try. +# Why dynamic: the user's configured backend determines which modalities +# (text / image / refs), aspect ratios, resolutions, durations, and +# audio/negative-prompt flags are real. A model that calls video_generate +# without knowing the active backend wastes a turn on something like +# "fal-ai/veo3.1/image-to-video requires image_url". Surfacing the per-model +# surface in the description means the model usually gets the call right on +# the first try. # # Memoization: model_tools.get_tool_definitions() keys its cache on # config.yaml mtime, so when the user changes provider/model via @@ -412,11 +417,12 @@ def _handle_video_generate(args: Dict[str, Any], **_kw: Any) -> str: _GENERIC_DESCRIPTION = ( - "Generate a video from a text prompt (text-to-video) or animate a " - "still image (image-to-video) using the user's configured video " - "generation backend. Pass `image_url` to animate that image; omit it " - "to generate from text alone. The backend auto-routes to the right " - "endpoint. The backend and model family are user-configured via " + "Generate a video from a text prompt (text-to-video), animate a " + "still image (image-to-video), or guide generation with reference images. " + "Pass `image_url` to animate an image or `reference_image_urls` for " + "reference-to-video. Video edit/extend workflows are not part of this " + "unified surface; use a dedicated provider-specific tool when one is " + "available. The backend and model family are user-configured via " "`hermes tools` β†’ Video Generation; the agent does not pick them. " "Long-running generations may take 30 seconds to several minutes β€” " "the call blocks until the video is ready. Returns the result in the " @@ -542,6 +548,21 @@ def _build_dynamic_video_schema() -> Dict[str, Any]: max_refs = caps.get("max_reference_images") or 0 if max_refs: parts.append(f"- reference_image_urls: up to {max_refs} images") + if configured == "xai": + parts.append( + "- chaining: for edit/extend pass the public HTTPS MP4 in `video` " + "or `public_url` from the prior Imagine result (files-cdn). For " + "image-to-video / reference-to-video pass public image URLs the " + "same way" + ) + try: + from tools.xai_http import xai_storage_notice_text + + notice = xai_storage_notice_text("video_gen") + except Exception: + notice = "" + if notice: + parts.append(f"- storage: {notice}") return {"description": "\n".join(parts)} diff --git a/tools/xai_http.py b/tools/xai_http.py index 8e94b64aa4b..fb1f523175f 100644 --- a/tools/xai_http.py +++ b/tools/xai_http.py @@ -2,9 +2,15 @@ from __future__ import annotations +import datetime import json import os -from typing import Dict +import uuid +from typing import Any, Dict, Optional + + +MAX_XAI_STORAGE_EXPIRES_AFTER_SECONDS = 30 * 24 * 60 * 60 +SAFE_XAI_STORAGE_EXPIRES_AFTER_SECONDS = 2 * 24 * 60 * 60 def has_xai_credentials() -> bool: @@ -72,6 +78,149 @@ def hermes_xai_user_agent() -> str: return f"Hermes-Agent/{__version__}" +def _load_config_section(section_name: str) -> Dict[str, Any]: + """Return a top-level Hermes config section as a dict, or empty.""" + try: + from hermes_cli.config import load_config + + cfg = load_config() + section = cfg.get(section_name) if isinstance(cfg, dict) else None + return section if isinstance(section, dict) else {} + except Exception: + return {} + + +def _coerce_bool(value: Any, default: bool) -> bool: + if isinstance(value, bool): + return value + if isinstance(value, str): + normalized = value.strip().lower() + if normalized in {"1", "true", "yes", "on", "enabled"}: + return True + if normalized in {"0", "false", "no", "off", "disabled"}: + return False + return default + + +def _coerce_expires_after(value: Any) -> Optional[int]: + """Normalize an xAI storage TTL. + + Returns: + int seconds for an expiring file, + None for permanent storage (omit expires_after on the wire). + """ + if value is None: + return None + if isinstance(value, str): + normalized = value.strip().lower() + if normalized in {"", "default"}: + return None + if normalized in {"none", "null", "never", "permanent", "forever", "0"}: + return None + try: + value = int(normalized) + except ValueError: + return SAFE_XAI_STORAGE_EXPIRES_AFTER_SECONDS + if isinstance(value, (int, float)): + seconds = int(value) + if seconds <= 0: + return None + return min(seconds, MAX_XAI_STORAGE_EXPIRES_AFTER_SECONDS) + return SAFE_XAI_STORAGE_EXPIRES_AFTER_SECONDS + + +def read_xai_imagine_storage_config(section_name: str) -> Dict[str, Any]: + """Read storage settings for xAI Imagine under image_gen/video_gen config. + + Supported config shape: + + image_gen: + xai: + storage: + enabled: true + public_url: true + expires_after: null # omit for permanent public URLs + + The same shape is accepted under ``video_gen.xai.storage``. Storage is on + by default so xAI returns permanent public URLs instead of short-lived CDN URLs. + """ + section = _load_config_section(section_name) + xai_section = section.get("xai") if isinstance(section, dict) else None + storage = xai_section.get("storage") if isinstance(xai_section, dict) else None + storage = storage if isinstance(storage, dict) else {} + + enabled = _coerce_bool(storage.get("enabled"), True) + public_url = _coerce_bool(storage.get("public_url"), True) + expires_after = _coerce_expires_after(storage.get("expires_after")) + + return { + "enabled": enabled, + "public_url": public_url, + "expires_after": expires_after, + } + + +def build_xai_storage_options( + section_name: str, + *, + filename_prefix: str, + extension: str, +) -> Optional[Dict[str, Any]]: + """Return an xAI ``storage_options`` payload, or None when disabled.""" + cfg = read_xai_imagine_storage_config(section_name) + if not cfg["enabled"]: + return None + + now = datetime.datetime.now(datetime.UTC) + ts = now.strftime("%Y%m%d-%H%M%S") + short = uuid.uuid4().hex[:8] + ext = extension.lstrip(".") or "bin" + payload: Dict[str, Any] = { + "filename": f"{filename_prefix}-{ts}-{short}.{ext}", + "public_url": bool(cfg["public_url"]), + } + if cfg["expires_after"] is not None: + payload["expires_after"] = cfg["expires_after"] + return payload + + +def xai_storage_notice_text(section_name: str) -> str: + """User-facing notice for first xAI Imagine storage use.""" + cfg = read_xai_imagine_storage_config(section_name) + if not cfg["enabled"]: + return "" + if cfg["expires_after"] is None: + retention = "without an automatic expiry" + else: + days = cfg["expires_after"] / (24 * 60 * 60) + retention = f"for about {days:g} day{'s' if days != 1 else ''}" + return ( + "xAI Imagine storage is enabled so generated media gets a reusable " + f"public URL {retention}. xAI may bill for stored files and public URL " + f"hosting. Disable this with `{section_name}.xai.storage.enabled: false` " + "or set `expires_after` to change the retention." + ) + + +def maybe_mark_xai_storage_notice_seen(section_name: str) -> Optional[str]: + """Return the storage notice once per Hermes home, then mark it seen.""" + notice = xai_storage_notice_text(section_name) + if not notice: + return None + try: + from hermes_constants import get_hermes_home + + marker_dir = get_hermes_home() / "state" + marker_dir.mkdir(parents=True, exist_ok=True) + marker = marker_dir / f"{section_name}_xai_storage_notice_seen" + if marker.exists(): + return None + marker.write_text(datetime.datetime.now(datetime.UTC).isoformat() + "\n") + return notice + except Exception: + return notice + + def resolve_xai_http_credentials(*, force_refresh: bool = False) -> Dict[str, str]: """Resolve bearer credentials for direct xAI HTTP endpoints. @@ -88,6 +237,21 @@ def resolve_xai_http_credentials(*, force_refresh: bool = False) -> Dict[str, st tokens where the proactive JWT check is a no-op, etc.), not as a default β€” the auth-store lock is held for the duration of the refresh. """ + try: + from hermes_cli.auth import resolve_xai_oauth_runtime_credentials + + creds = resolve_xai_oauth_runtime_credentials(force_refresh=force_refresh) + access_token = str(creds.get("api_key") or "").strip() + base_url = str(creds.get("base_url") or "").strip().rstrip("/") + if access_token: + return { + "provider": "xai-oauth", + "api_key": access_token, + "base_url": base_url or "https://api.x.ai/v1", + } + except Exception: + pass + if not force_refresh: try: from hermes_cli.runtime_provider import resolve_runtime_provider @@ -104,21 +268,6 @@ def resolve_xai_http_credentials(*, force_refresh: bool = False) -> Dict[str, st except Exception: pass - try: - from hermes_cli.auth import resolve_xai_oauth_runtime_credentials - - creds = resolve_xai_oauth_runtime_credentials(force_refresh=force_refresh) - access_token = str(creds.get("api_key") or "").strip() - base_url = str(creds.get("base_url") or "").strip().rstrip("/") - if access_token: - return { - "provider": "xai-oauth", - "api_key": access_token, - "base_url": base_url or "https://api.x.ai/v1", - } - except Exception: - pass - api_key = str(get_env_value("XAI_API_KEY") or "").strip() base_url = str(get_env_value("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/") return { diff --git a/tools/xai_video_tools.py b/tools/xai_video_tools.py new file mode 100644 index 00000000000..db63cc925cf --- /dev/null +++ b/tools/xai_video_tools.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 +"""xAI-specific Imagine video edit and extend tools.""" + +from __future__ import annotations + +import json +from typing import Any, Dict, Optional + +from hermes_cli.config import load_config +from plugins.video_gen.xai import ( + has_xai_video_credentials, + run_xai_video_edit, + run_xai_video_extend, +) +from tools.registry import registry, tool_error + + +def _configured_for_xai_video() -> bool: + try: + cfg = load_config() + except Exception: + return False + section = cfg.get("video_gen") if isinstance(cfg, dict) else None + return isinstance(section, dict) and section.get("provider") == "xai" + + +def _check_xai_video_requirements() -> bool: + return _configured_for_xai_video() and has_xai_video_credentials() + + +def _clean_string(value: Any) -> Optional[str]: + if isinstance(value, str) and value.strip(): + return value.strip() + return None + + +def _coerce_int(value: Any) -> Optional[int]: + if value is None: + return None + if isinstance(value, bool): + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _provider_not_configured_error() -> str: + return json.dumps({ + "success": False, + "error": ( + "xAI video edit/extend tools require `video_gen.provider` to be " + "configured as `xai` via `hermes tools` -> Video Generation." + ), + "error_type": "provider_not_configured", + "provider": "xai", + }) + + +def _normalize_public_video_url(video_url: Any) -> Optional[str]: + """Require a public HTTPS MP4 URL (``http``/``https`` only).""" + cleaned = _clean_string(video_url) + if not cleaned: + return None + if cleaned.lower().startswith(("http://", "https://")): + return cleaned + return None + + +XAI_VIDEO_EDIT_SCHEMA: Dict[str, Any] = { + "name": "xai_video_edit", + "description": ( + "Edit an existing video with xAI Imagine. This is separate from " + "`video_generate` because video editing is provider-specific. " + "`video_url` must be the public HTTPS MP4 URL from a prior Imagine " + "result (`video` or `public_url` on files-cdn)." + ), + "parameters": { + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "Instruction for how xAI should modify the source video.", + }, + "video_url": { + "type": "string", + "description": ( + "Public HTTPS MP4 URL of the source video β€” the `video` or " + "`public_url` from a prior xAI Imagine result." + ), + }, + "model": { + "type": "string", + "description": "Optional xAI Imagine model override.", + }, + }, + "required": ["prompt", "video_url"], + }, +} + + +XAI_VIDEO_EXTEND_SCHEMA: Dict[str, Any] = { + "name": "xai_video_extend", + "description": ( + "Extend an existing video with xAI Imagine. This is separate from " + "`video_generate` because video extension is provider-specific. " + "`video_url` must be the public HTTPS MP4 URL from a prior Imagine " + "result (`video` or `public_url` on files-cdn)." + ), + "parameters": { + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "Instruction for how xAI should continue the source video.", + }, + "video_url": { + "type": "string", + "description": ( + "Public HTTPS MP4 URL of the source video β€” the `video` or " + "`public_url` from a prior xAI Imagine result." + ), + }, + "duration": { + "type": "integer", + "description": ( + "Desired extension duration in seconds. xAI clamps this " + "to its supported range." + ), + }, + "model": { + "type": "string", + "description": "Optional xAI Imagine model override.", + }, + }, + "required": ["prompt", "video_url"], + }, +} + + +def _handle_xai_video_edit(args: Dict[str, Any], **_kw: Any) -> str: + prompt = _clean_string(args.get("prompt")) + video_url = _normalize_public_video_url(args.get("video_url")) + model = _clean_string(args.get("model")) + + if not prompt: + return tool_error("prompt is required for xAI video edit") + if not video_url: + return tool_error( + "video_url must be a public HTTPS MP4 URL (the `video`/`public_url` " + "from a prior Imagine result)" + ) + if not _configured_for_xai_video(): + return _provider_not_configured_error() + + result = run_xai_video_edit( + prompt=prompt, + video_url=video_url, + model=model, + ) + return json.dumps(result) + + +def _handle_xai_video_extend(args: Dict[str, Any], **_kw: Any) -> str: + prompt = _clean_string(args.get("prompt")) + video_url = _normalize_public_video_url(args.get("video_url")) + model = _clean_string(args.get("model")) + duration = _coerce_int(args.get("duration")) + + if not prompt: + return tool_error("prompt is required for xAI video extend") + if not video_url: + return tool_error( + "video_url must be a public HTTPS MP4 URL (the `video`/`public_url` " + "from a prior Imagine result)" + ) + if not _configured_for_xai_video(): + return _provider_not_configured_error() + + result = run_xai_video_extend( + prompt=prompt, + video_url=video_url, + duration=duration, + model=model, + ) + return json.dumps(result) + + +registry.register( + name="xai_video_edit", + toolset="video_gen", + schema=XAI_VIDEO_EDIT_SCHEMA, + handler=_handle_xai_video_edit, + check_fn=_check_xai_video_requirements, + requires_env=[], + is_async=False, + emoji="video", +) + +registry.register( + name="xai_video_extend", + toolset="video_gen", + schema=XAI_VIDEO_EXTEND_SCHEMA, + handler=_handle_xai_video_extend, + check_fn=_check_xai_video_requirements, + requires_env=[], + is_async=False, + emoji="video", +) diff --git a/toolsets.py b/toolsets.py index 1453c3505f8..083ab9d8913 100644 --- a/toolsets.py +++ b/toolsets.py @@ -139,10 +139,11 @@ TOOLSETS = { "description": ( "Video generation tools. Single ``video_generate`` tool covers " "text-to-video (prompt only) and image-to-video (prompt + " - "image_url) β€” the active backend auto-routes. Configure via " + "image_url), plus reference-to-video. Provider-specific edit/" + "extend workflows may appear as separate tools. Configure via " "``hermes tools`` β†’ Video Generation." ), - "tools": ["video_generate"], + "tools": ["video_generate", "xai_video_edit", "xai_video_extend"], "includes": [] },