mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-02 12:13:05 +00:00
feat(xai): Imagine public-URL storage, chaining & video edit/extend
Add durable public-URL output and URL-based chaining to xAI Grok Imagine: - Store generated media on files-cdn with permanent public HTTPS URLs (public_url: true, no expiry by default). - Chain by URL: generate -> edit -> extend each take a prior result's public HTTPS URL (or a data URI / local file for inputs). - Add provider-specific xai_video_edit and xai_video_extend tools. - Image generation: public-URL/storage output, multi-reference edits, and ~/ local-path support for image edits. Credentials use xAI Grok device-code OAuth (separate PR).
This commit is contained in:
parent
184c10cf97
commit
9ce79cd642
15 changed files with 1694 additions and 294 deletions
|
|
@ -66,7 +66,7 @@ CONFIGURABLE_TOOLSETS = [
|
|||
("vision", "👁️ Vision / Image Analysis", "vision_analyze"),
|
||||
("video", "🎬 Video Analysis", "video_analyze (requires video-capable model)"),
|
||||
("image_gen", "🎨 Image Generation", "image_generate"),
|
||||
("video_gen", "🎬 Video Generation", "video_generate (text-to-video + image-to-video)"),
|
||||
("video_gen", "🎬 Video Generation", "video_generate (text/image/reference)"),
|
||||
("x_search", "🐦 X (Twitter) Search", "x_search (requires xAI OAuth or XAI_API_KEY)"),
|
||||
("tts", "🔊 Text-to-Speech", "text_to_speech"),
|
||||
("skills", "📚 Skills", "list, view, manage"),
|
||||
|
|
@ -2785,6 +2785,49 @@ def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None
|
|||
_print_success(f" Model set to: {chosen}")
|
||||
|
||||
|
||||
def _configure_xai_imagine_storage(section_name: str, config: dict) -> None:
|
||||
"""Prompt for xAI Imagine stored public URL behavior."""
|
||||
section = config.setdefault(section_name, {})
|
||||
if not isinstance(section, dict):
|
||||
section = {}
|
||||
config[section_name] = section
|
||||
xai_cfg = section.setdefault("xai", {})
|
||||
if not isinstance(xai_cfg, dict):
|
||||
xai_cfg = {}
|
||||
section["xai"] = xai_cfg
|
||||
storage_cfg = xai_cfg.setdefault("storage", {})
|
||||
if not isinstance(storage_cfg, dict):
|
||||
storage_cfg = {}
|
||||
xai_cfg["storage"] = storage_cfg
|
||||
|
||||
_print_warning(
|
||||
" xAI Imagine can store generated media and create reusable public URLs. "
|
||||
"xAI may bill for stored files and public URL hosting."
|
||||
)
|
||||
idx = _prompt_choice(
|
||||
" Stored public URLs:",
|
||||
[
|
||||
"Enable public URLs without automatic expiry (recommended)",
|
||||
"Disable stored public URLs",
|
||||
"Enable public URLs for 2 days",
|
||||
],
|
||||
default=0,
|
||||
)
|
||||
if idx == 1:
|
||||
storage_cfg["enabled"] = False
|
||||
_print_success(" xAI stored public URLs disabled")
|
||||
elif idx == 2:
|
||||
storage_cfg["enabled"] = True
|
||||
storage_cfg["public_url"] = True
|
||||
storage_cfg["expires_after"] = 2 * 24 * 60 * 60
|
||||
_print_success(" xAI stored public URLs enabled for 2 days")
|
||||
else:
|
||||
storage_cfg["enabled"] = True
|
||||
storage_cfg["public_url"] = True
|
||||
storage_cfg["expires_after"] = None
|
||||
_print_success(" xAI stored public URLs enabled without automatic expiry")
|
||||
|
||||
|
||||
def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None:
|
||||
"""Persist a plugin-backed image generation provider selection."""
|
||||
img_cfg = config.setdefault("image_gen", {})
|
||||
|
|
@ -2795,6 +2838,8 @@ def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None:
|
|||
img_cfg["use_gateway"] = False
|
||||
_print_success(f" image_gen.provider set to: {plugin_name}")
|
||||
_configure_imagegen_model_for_plugin(plugin_name, config)
|
||||
if plugin_name == "xai":
|
||||
_configure_xai_imagine_storage("image_gen", config)
|
||||
|
||||
|
||||
# ─── Video Generation Model Pickers ───────────────────────────────────────────
|
||||
|
|
@ -2895,6 +2940,8 @@ def _select_plugin_video_gen_provider(plugin_name: str, config: dict, *, use_gat
|
|||
vid_cfg["use_gateway"] = use_gateway
|
||||
_print_success(f" video_gen.provider set to: {plugin_name}")
|
||||
_configure_videogen_model_for_plugin(plugin_name, config)
|
||||
if plugin_name == "xai":
|
||||
_configure_xai_imagine_storage("video_gen", config)
|
||||
|
||||
|
||||
def _write_provider_config(provider: dict, config: dict, *, managed_feature) -> None:
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from __future__ import annotations
|
|||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
|
@ -33,7 +34,14 @@ from agent.image_gen_provider import (
|
|||
save_url_image,
|
||||
success_response,
|
||||
)
|
||||
from tools.xai_http import hermes_xai_user_agent, resolve_xai_http_credentials
|
||||
from tools.xai_http import (
|
||||
build_xai_storage_options,
|
||||
hermes_xai_user_agent,
|
||||
maybe_mark_xai_storage_notice_seen,
|
||||
read_xai_imagine_storage_config,
|
||||
resolve_xai_http_credentials,
|
||||
xai_storage_notice_text,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -118,10 +126,8 @@ def _resolve_resolution() -> str:
|
|||
def _xai_image_field(source: str) -> Dict[str, str]:
|
||||
"""Build the xAI ``image`` field for an edit request.
|
||||
|
||||
xAI's ``/v1/images/edits`` accepts ``{"url": <ref>, "type": "image_url"}``
|
||||
where ``<ref>`` is a public URL or a base64 data URI. Public URLs and
|
||||
existing data URIs pass through unchanged; local file paths are read and
|
||||
encoded into a ``data:`` URI.
|
||||
xAI's ``/v1/images/edits`` accepts a public HTTPS URL or a base64 data URI.
|
||||
Local file paths are read and encoded into a ``data:`` URI.
|
||||
"""
|
||||
source = source.strip()
|
||||
lower = source.lower()
|
||||
|
|
@ -131,7 +137,7 @@ def _xai_image_field(source: str) -> Dict[str, str]:
|
|||
import base64
|
||||
import os as _os
|
||||
|
||||
with open(source, "rb") as fh:
|
||||
with open(_os.path.expanduser(source), "rb") as fh:
|
||||
raw = fh.read()
|
||||
ext = (_os.path.splitext(source)[1].lstrip(".") or "png").lower()
|
||||
if ext == "jpg":
|
||||
|
|
@ -176,19 +182,29 @@ class XAIImageGenProvider(ImageGenProvider):
|
|||
# hook (``hermes_cli/tools_config.py``); identical to the TTS / video
|
||||
# gen entries so users see the same OAuth-or-API-key choice for every
|
||||
# xAI service.
|
||||
storage_notice = xai_storage_notice_text("image_gen")
|
||||
tag = (
|
||||
"grok-imagine-image - text-to-image & image editing; uses xAI "
|
||||
"Grok OAuth or XAI_API_KEY"
|
||||
)
|
||||
if storage_notice:
|
||||
tag += f". {storage_notice}"
|
||||
return {
|
||||
"name": "xAI Grok Imagine (image)",
|
||||
"badge": "paid",
|
||||
"tag": "grok-imagine-image — text-to-image & image editing; uses xAI Grok OAuth or XAI_API_KEY",
|
||||
"tag": tag,
|
||||
"env_vars": [],
|
||||
"post_setup": "xai_grok",
|
||||
}
|
||||
|
||||
def capabilities(self) -> Dict[str, Any]:
|
||||
# xAI's /v1/images/edits supports image editing via grok-imagine-image
|
||||
# -quality. Single primary source image (multi-image editing exists as
|
||||
# a separate capability but we keep the primary edit surface here).
|
||||
return {"modalities": ["text", "image"], "max_reference_images": 1}
|
||||
# -quality, including up to 3 total source images.
|
||||
return {
|
||||
"modalities": ["text", "image"],
|
||||
"max_reference_images": 2,
|
||||
"max_source_images": 3,
|
||||
}
|
||||
|
||||
def generate(
|
||||
self,
|
||||
|
|
@ -224,16 +240,39 @@ class XAIImageGenProvider(ImageGenProvider):
|
|||
resolution = _resolve_resolution()
|
||||
xai_res = resolution if resolution in _XAI_RESOLUTIONS else DEFAULT_RESOLUTION
|
||||
|
||||
# Pick the primary source image: explicit image_url wins, else the
|
||||
# first reference image.
|
||||
source_image = None
|
||||
source_images: List[str] = []
|
||||
if isinstance(image_url, str) and image_url.strip():
|
||||
source_image = image_url.strip()
|
||||
else:
|
||||
refs = normalize_reference_images(reference_image_urls)
|
||||
if refs:
|
||||
source_image = refs[0]
|
||||
is_edit = bool(source_image)
|
||||
source_images.append(image_url.strip())
|
||||
refs = normalize_reference_images(reference_image_urls)
|
||||
if refs:
|
||||
source_images.extend(refs)
|
||||
if len(source_images) > 3:
|
||||
return error_response(
|
||||
error="xAI image editing supports at most 3 source images",
|
||||
error_type="too_many_references",
|
||||
provider=provider_name,
|
||||
model="grok-imagine-image-quality",
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
for index, source in enumerate(source_images):
|
||||
field = "image_url" if index == 0 and image_url and image_url.strip() == source else "reference_image_urls"
|
||||
lower = source.lower()
|
||||
if not lower.startswith(("http://", "https://", "data:")):
|
||||
path = Path(source).expanduser()
|
||||
if not path.is_file():
|
||||
return error_response(
|
||||
error=(
|
||||
f"{field} must be a public HTTPS URL or data URI "
|
||||
"(e.g. the `image`/`public_url` from a prior Imagine result)"
|
||||
),
|
||||
error_type="invalid_image_url",
|
||||
provider=provider_name,
|
||||
model="grok-imagine-image-quality",
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
is_edit = bool(source_images)
|
||||
modality = "image" if is_edit else "text"
|
||||
|
||||
headers = {
|
||||
|
|
@ -243,6 +282,13 @@ class XAIImageGenProvider(ImageGenProvider):
|
|||
}
|
||||
|
||||
base_url = str(creds.get("base_url") or "https://api.x.ai/v1").strip().rstrip("/")
|
||||
storage_options = build_xai_storage_options(
|
||||
"image_gen",
|
||||
filename_prefix="hermes-xai-image",
|
||||
extension="png",
|
||||
)
|
||||
storage_notice = maybe_mark_xai_storage_notice_seen("image_gen")
|
||||
storage_cfg = read_xai_imagine_storage_config("image_gen")
|
||||
|
||||
if is_edit:
|
||||
# Editing requires the quality model per xAI docs. The source
|
||||
|
|
@ -250,7 +296,7 @@ class XAIImageGenProvider(ImageGenProvider):
|
|||
# are converted to a data URI here.
|
||||
edit_model = "grok-imagine-image-quality"
|
||||
try:
|
||||
image_field = _xai_image_field(source_image)
|
||||
image_fields = [_xai_image_field(source) for source in source_images]
|
||||
except Exception as exc:
|
||||
return error_response(
|
||||
error=f"Could not load source image for editing: {exc}",
|
||||
|
|
@ -263,8 +309,11 @@ class XAIImageGenProvider(ImageGenProvider):
|
|||
payload: Dict[str, Any] = {
|
||||
"model": edit_model,
|
||||
"prompt": prompt,
|
||||
"image": image_field,
|
||||
}
|
||||
if len(image_fields) == 1:
|
||||
payload["image"] = image_fields[0]
|
||||
else:
|
||||
payload["images"] = image_fields
|
||||
endpoint_url = f"{base_url}/images/edits"
|
||||
model_id = edit_model
|
||||
else:
|
||||
|
|
@ -275,6 +324,8 @@ class XAIImageGenProvider(ImageGenProvider):
|
|||
"resolution": xai_res,
|
||||
}
|
||||
endpoint_url = f"{base_url}/images/generations"
|
||||
if storage_options is not None:
|
||||
payload["storage_options"] = storage_options
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
|
|
@ -331,7 +382,8 @@ class XAIImageGenProvider(ImageGenProvider):
|
|||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
# Parse response — xAI returns data[0].b64_json or data[0].url
|
||||
# Parse response - xAI returns data[0].b64_json, data[0].url, and
|
||||
# optionally data[0].file_output when storage_options were requested.
|
||||
data = result.get("data", [])
|
||||
if not data:
|
||||
return error_response(
|
||||
|
|
@ -346,8 +398,13 @@ class XAIImageGenProvider(ImageGenProvider):
|
|||
first = data[0]
|
||||
b64 = first.get("b64_json")
|
||||
url = first.get("url")
|
||||
file_output = first.get("file_output") if isinstance(first, dict) else None
|
||||
file_output = file_output if isinstance(file_output, dict) else {}
|
||||
public_url = file_output.get("public_url") if isinstance(file_output.get("public_url"), str) else None
|
||||
|
||||
if b64:
|
||||
if public_url:
|
||||
image_ref = public_url
|
||||
elif b64:
|
||||
try:
|
||||
saved_path = save_b64_image(b64, prefix=f"xai_{model_id}")
|
||||
except Exception as exc:
|
||||
|
|
@ -389,9 +446,27 @@ class XAIImageGenProvider(ImageGenProvider):
|
|||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
extra: Dict[str, Any] = {}
|
||||
extra: Dict[str, Any] = {
|
||||
"storage_enabled": bool(storage_cfg["enabled"]),
|
||||
}
|
||||
if not is_edit:
|
||||
extra["resolution"] = xai_res
|
||||
if storage_notice:
|
||||
extra["storage_notice"] = storage_notice
|
||||
if public_url:
|
||||
extra["public_url"] = public_url
|
||||
if file_output:
|
||||
for key in (
|
||||
"filename",
|
||||
"expires_at",
|
||||
"public_url_expires_at",
|
||||
"public_url_error",
|
||||
"storage_error",
|
||||
):
|
||||
if key in file_output:
|
||||
extra[key] = file_output[key]
|
||||
if result.get("usage"):
|
||||
extra["usage"] = result["usage"]
|
||||
|
||||
return success_response(
|
||||
image=image_ref,
|
||||
|
|
|
|||
|
|
@ -1,10 +1,7 @@
|
|||
"""xAI Grok-Imagine video generation backend.
|
||||
|
||||
Surface: text-to-video and image-to-video (animate an input image)
|
||||
through xAI's ``/videos/generations`` endpoint. Edit and extend are not
|
||||
exposed in this unified surface — xAI is the only backend that supports
|
||||
them and the inconsistency would force per-backend prose in the agent's
|
||||
tool description.
|
||||
Surface: text-to-video, image-to-video, and reference-to-video through the
|
||||
unified video provider. xAI edit/extend are exposed through separate tools.
|
||||
|
||||
Originally salvaged from PR #10600 by @Jaaneek; reshaped into the
|
||||
:class:`VideoGenProvider` plugin interface and trimmed to the
|
||||
|
|
@ -14,8 +11,9 @@ Authentication: xAI Grok OAuth tokens (preferred — billed against the
|
|||
user's SuperGrok or X Premium+ subscription) or ``XAI_API_KEY``. Both routes are
|
||||
resolved through ``tools.xai_http.resolve_xai_http_credentials`` so a
|
||||
single login covers chat + TTS + image gen + video gen + transcription.
|
||||
Output is an HTTPS URL from xAI's CDN; the gateway downloads and
|
||||
delivers it.
|
||||
When xAI storage is enabled, the primary ``video`` / ``public_url`` fields are the
|
||||
stored files-cdn HTTPS link. Pass that public MP4 URL as ``video_url`` for
|
||||
edit/extend; it is sent to xAI as ``video.url``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -46,13 +44,14 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"
|
||||
DEFAULT_TEXT_TO_VIDEO_MODEL = "grok-imagine-video"
|
||||
DEFAULT_IMAGE_TO_VIDEO_MODEL = "grok-imagine-video-1.5-preview"
|
||||
DEFAULT_IMAGE_TO_VIDEO_MODEL = "grok-imagine-video-1.5"
|
||||
DEFAULT_MODEL = DEFAULT_TEXT_TO_VIDEO_MODEL
|
||||
DEFAULT_DURATION = 8
|
||||
DEFAULT_ASPECT_RATIO = "16:9"
|
||||
DEFAULT_RESOLUTION = "720p"
|
||||
DEFAULT_TIMEOUT_SECONDS = 240
|
||||
DEFAULT_POLL_INTERVAL_SECONDS = 5
|
||||
DEFAULT_EXTEND_DURATION = 6
|
||||
|
||||
VALID_ASPECT_RATIOS = {"1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"}
|
||||
VALID_RESOLUTIONS = {"480p", "720p"}
|
||||
|
|
@ -67,16 +66,20 @@ _MODELS: Dict[str, Dict[str, Any]] = {
|
|||
"price": "see https://docs.x.ai/developers/models/grok-imagine-video",
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"grok-imagine-video-1.5-preview": {
|
||||
"display": "Grok Imagine Video 1.5 Preview",
|
||||
"grok-imagine-video-1.5": {
|
||||
"display": "Grok Imagine Video 1.5",
|
||||
"speed": "~60-240s",
|
||||
"strengths": "Latest xAI image-to-video model.",
|
||||
"price": "see https://docs.x.ai/developers/models/grok-imagine-video-1.5-preview",
|
||||
"price": "see https://docs.x.ai/developers/pricing",
|
||||
"modalities": ["image"],
|
||||
"aliases": ["grok-imagine-video-1.5-2026-05-30"],
|
||||
},
|
||||
}
|
||||
|
||||
_IMAGE_TO_VIDEO_COMPAT_MODEL_IDS = {
|
||||
"grok-imagine-video-1.5-preview",
|
||||
"grok-imagine-video-1.5-2026-05-30",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP helpers
|
||||
|
|
@ -145,21 +148,114 @@ def _image_ref_to_xai_url(value: str) -> str:
|
|||
return f"data:{mime};base64,{encoded}"
|
||||
|
||||
|
||||
def _normalize_reference_images(reference_image_urls: Optional[List[str]]):
|
||||
refs = []
|
||||
def _image_ref_to_xai_input(value: str) -> Optional[Dict[str, str]]:
|
||||
ref = _image_ref_to_xai_url(value)
|
||||
if not ref:
|
||||
return None
|
||||
lower = ref.lower()
|
||||
if lower.startswith(("http://", "https://", "data:image/")):
|
||||
return {"url": ref}
|
||||
return None
|
||||
|
||||
|
||||
def _xai_video_output_urls(
|
||||
video: Dict[str, Any],
|
||||
) -> Tuple[str, Optional[str], Optional[str]]:
|
||||
"""Return ``(public_video_url, temporary_url, stored_public_url)``.
|
||||
|
||||
``public_video_url`` is the stored files-cdn HTTPS MP4 (``public_url``) when
|
||||
storage is enabled; otherwise xAI's temporary ``video.url``. Pass this value
|
||||
as ``video_url`` for edit/extend chaining.
|
||||
"""
|
||||
file_output = video.get("file_output") if isinstance(video.get("file_output"), dict) else {}
|
||||
file_output = file_output or {}
|
||||
stored_public = file_output.get("public_url")
|
||||
stored_public = stored_public.strip() if isinstance(stored_public, str) else None
|
||||
temporary = video.get("url")
|
||||
temporary = temporary.strip() if isinstance(temporary, str) else None
|
||||
public_video_url = stored_public or temporary or ""
|
||||
temporary_out = (
|
||||
temporary
|
||||
if temporary and stored_public and temporary != stored_public
|
||||
else None
|
||||
)
|
||||
return public_video_url, temporary_out, stored_public
|
||||
|
||||
|
||||
def _video_ref_to_xai_url(value: str) -> str:
|
||||
"""Return a URL/data URI accepted by xAI for video inputs."""
|
||||
ref = (value or "").strip()
|
||||
if not ref:
|
||||
return ""
|
||||
lower = ref.lower()
|
||||
if lower.startswith(("http://", "https://", "data:video/")):
|
||||
return ref
|
||||
|
||||
path = Path(ref).expanduser()
|
||||
if not path.is_file():
|
||||
return ref
|
||||
|
||||
mime = mimetypes.guess_type(path.name)[0] or "video/mp4"
|
||||
if not mime.startswith("video/"):
|
||||
return ref
|
||||
|
||||
encoded = base64.b64encode(path.read_bytes()).decode("ascii")
|
||||
return f"data:{mime};base64,{encoded}"
|
||||
|
||||
|
||||
async def _video_input_from_public_url(
|
||||
value: str,
|
||||
*,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
) -> Optional[Dict[str, str]]:
|
||||
"""Build xAI ``video`` input using a public HTTPS URL (``url`` field only)."""
|
||||
ref = (value or "").strip()
|
||||
if not ref:
|
||||
return None
|
||||
|
||||
path = Path(ref).expanduser()
|
||||
if path.is_file():
|
||||
data_ref = _video_ref_to_xai_url(ref)
|
||||
return {"url": data_ref} if data_ref else None
|
||||
|
||||
lower = ref.lower()
|
||||
if not lower.startswith(("http://", "https://")):
|
||||
return None
|
||||
|
||||
return {"url": ref}
|
||||
|
||||
|
||||
def _normalize_reference_images(
|
||||
reference_image_urls: Optional[List[str]],
|
||||
) -> Tuple[Optional[List[Dict[str, str]]], Optional[str]]:
|
||||
refs: List[Dict[str, str]] = []
|
||||
for url in reference_image_urls or []:
|
||||
normalized = _image_ref_to_xai_url(url)
|
||||
if normalized:
|
||||
refs.append({"url": normalized})
|
||||
return refs or None
|
||||
cleaned = (url or "").strip()
|
||||
if not cleaned:
|
||||
continue
|
||||
normalized = _image_ref_to_xai_input(cleaned)
|
||||
if not normalized:
|
||||
return None, (
|
||||
"reference_image_urls must be public HTTPS URLs or data URIs "
|
||||
"(e.g. the `image`/`public_url` from a prior Imagine result)"
|
||||
)
|
||||
refs.append(normalized)
|
||||
return (refs if refs else None), None
|
||||
|
||||
|
||||
def _clamp_duration(duration: Optional[int], has_reference_images: bool) -> int:
|
||||
value = duration if duration is not None else DEFAULT_DURATION
|
||||
def _clamp_duration(
|
||||
duration: Optional[int],
|
||||
*,
|
||||
has_reference_images: bool = False,
|
||||
max_seconds: int = 15,
|
||||
default: int = DEFAULT_DURATION,
|
||||
) -> int:
|
||||
value = duration if duration is not None else default
|
||||
if value < 1:
|
||||
value = 1
|
||||
if value > 15:
|
||||
value = 15
|
||||
if value > max_seconds:
|
||||
value = max_seconds
|
||||
if has_reference_images and value > 10:
|
||||
value = 10
|
||||
return value
|
||||
|
|
@ -173,7 +269,7 @@ def _resolve_model_for_modality(
|
|||
) -> str:
|
||||
"""Select xAI's text/video model without treating config as a prompt override.
|
||||
|
||||
``grok-imagine-video-1.5-preview`` currently rejects text-only video
|
||||
``grok-imagine-video-1.5`` currently rejects text-only video
|
||||
generation, but it is the desired image-to-video backend. Explicit tool
|
||||
``model=`` still wins for users who intentionally request another model.
|
||||
"""
|
||||
|
|
@ -182,7 +278,7 @@ def _resolve_model_for_modality(
|
|||
return requested
|
||||
if modality == "image":
|
||||
return DEFAULT_IMAGE_TO_VIDEO_MODEL
|
||||
if requested == DEFAULT_IMAGE_TO_VIDEO_MODEL:
|
||||
if requested == DEFAULT_IMAGE_TO_VIDEO_MODEL or requested in _IMAGE_TO_VIDEO_COMPAT_MODEL_IDS:
|
||||
return DEFAULT_TEXT_TO_VIDEO_MODEL
|
||||
return requested or DEFAULT_TEXT_TO_VIDEO_MODEL
|
||||
|
||||
|
|
@ -193,11 +289,11 @@ async def _submit(
|
|||
*,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
endpoint: str = "generations",
|
||||
) -> str:
|
||||
"""POST to /videos/generations — xAI's only public endpoint for our
|
||||
text-to-video and image-to-video surface."""
|
||||
"""POST to one of xAI's async video endpoints and return request_id."""
|
||||
response = await client.post(
|
||||
f"{base_url}/videos/generations",
|
||||
f"{base_url}/videos/{endpoint}",
|
||||
headers={**_xai_headers(api_key), "x-idempotency-key": str(uuid.uuid4())},
|
||||
json=payload,
|
||||
timeout=60,
|
||||
|
|
@ -248,7 +344,7 @@ async def _poll(
|
|||
|
||||
|
||||
class XAIVideoGenProvider(VideoGenProvider):
|
||||
"""xAI Grok Imagine video backend (text-to-video + image-to-video)."""
|
||||
"""xAI Grok Imagine video backend."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
|
|
@ -275,10 +371,25 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||
# Grok OAuth (SuperGrok / Premium+) — TTS / image gen / video gen
|
||||
# all share the same credential resolver. The hook offers an
|
||||
# OAuth-vs-API-key choice when neither is configured.
|
||||
try:
|
||||
from tools.xai_http import xai_storage_notice_text
|
||||
|
||||
storage_notice = xai_storage_notice_text("video_gen")
|
||||
except Exception:
|
||||
storage_notice = ""
|
||||
tag = (
|
||||
"grok-imagine-video for text/reference; "
|
||||
"grok-imagine-video-1.5 for image-to-video; "
|
||||
"edit/extend: pass the stored public HTTPS MP4 (`video` / "
|
||||
"`public_url` from a prior Imagine result); uses xAI Grok OAuth "
|
||||
"or XAI_API_KEY"
|
||||
)
|
||||
if storage_notice:
|
||||
tag += f". {storage_notice}"
|
||||
return {
|
||||
"name": "xAI Grok Imagine",
|
||||
"badge": "paid",
|
||||
"tag": "grok-imagine-video for text-to-video; grok-imagine-video-1.5-preview for image-to-video; uses xAI Grok OAuth or XAI_API_KEY",
|
||||
"tag": tag,
|
||||
"env_vars": [],
|
||||
"post_setup": "xai_grok",
|
||||
}
|
||||
|
|
@ -310,189 +421,479 @@ class XAIVideoGenProvider(VideoGenProvider):
|
|||
seed: Optional[int] = None,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
try:
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
return loop.run_until_complete(self._generate_async(
|
||||
prompt=prompt,
|
||||
model=model,
|
||||
explicit_model=bool(kwargs.get("_model_override_explicit")),
|
||||
image_url=image_url,
|
||||
reference_image_urls=reference_image_urls,
|
||||
duration=duration,
|
||||
aspect_ratio=aspect_ratio,
|
||||
resolution=resolution,
|
||||
))
|
||||
finally:
|
||||
loop.close()
|
||||
except Exception as exc:
|
||||
logger.warning("xAI video gen unexpected failure: %s", exc, exc_info=True)
|
||||
return error_response(
|
||||
error=f"xAI video generation failed: {exc}",
|
||||
error_type="api_error",
|
||||
provider="xai",
|
||||
model=model or DEFAULT_MODEL,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect_ratio,
|
||||
)
|
||||
|
||||
async def _generate_async(
|
||||
self,
|
||||
*,
|
||||
prompt: str,
|
||||
model: Optional[str],
|
||||
explicit_model: bool,
|
||||
image_url: Optional[str],
|
||||
reference_image_urls: Optional[List[str]],
|
||||
duration: Optional[int],
|
||||
aspect_ratio: str,
|
||||
resolution: str,
|
||||
) -> Dict[str, Any]:
|
||||
api_key, base_url = _resolve_xai_credentials()
|
||||
if not api_key:
|
||||
return error_response(
|
||||
error=(
|
||||
"No xAI credentials found. Sign in via `hermes auth add xai-oauth` "
|
||||
"(SuperGrok / Premium+) or set XAI_API_KEY from "
|
||||
"https://console.x.ai/."
|
||||
),
|
||||
error_type="auth_required",
|
||||
provider="xai", prompt=prompt,
|
||||
)
|
||||
|
||||
prompt = (prompt or "").strip()
|
||||
image_url_norm = _image_ref_to_xai_url(image_url or "") or None
|
||||
normalized_aspect_ratio = (aspect_ratio or DEFAULT_ASPECT_RATIO).strip()
|
||||
normalized_resolution = (resolution or DEFAULT_RESOLUTION).strip().lower()
|
||||
modality_used = "image" if image_url_norm else "text"
|
||||
resolved_model = _resolve_model_for_modality(
|
||||
model,
|
||||
modality=modality_used,
|
||||
explicit_model=explicit_model,
|
||||
return run_xai_video_generation(
|
||||
prompt=prompt,
|
||||
model=model,
|
||||
explicit_model=bool(kwargs.get("_model_override_explicit")),
|
||||
image_url=image_url,
|
||||
reference_image_urls=reference_image_urls,
|
||||
duration=duration,
|
||||
aspect_ratio=aspect_ratio,
|
||||
resolution=resolution,
|
||||
)
|
||||
|
||||
if not prompt:
|
||||
|
||||
def has_xai_video_credentials() -> bool:
|
||||
api_key, _ = _resolve_xai_credentials()
|
||||
return bool(api_key)
|
||||
|
||||
|
||||
def run_xai_video_generation(
|
||||
*,
|
||||
prompt: str,
|
||||
model: Optional[str],
|
||||
explicit_model: bool,
|
||||
image_url: Optional[str],
|
||||
reference_image_urls: Optional[List[str]],
|
||||
duration: Optional[int],
|
||||
aspect_ratio: str,
|
||||
resolution: str,
|
||||
) -> Dict[str, Any]:
|
||||
return _run_xai_video_coroutine(
|
||||
_generate_xai_video_async(
|
||||
prompt=prompt,
|
||||
model=model,
|
||||
explicit_model=explicit_model,
|
||||
image_url=image_url,
|
||||
reference_image_urls=reference_image_urls,
|
||||
duration=duration,
|
||||
aspect_ratio=aspect_ratio,
|
||||
resolution=resolution,
|
||||
),
|
||||
operation_label="generation",
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect_ratio,
|
||||
)
|
||||
|
||||
|
||||
def run_xai_video_edit(
|
||||
*,
|
||||
prompt: str,
|
||||
video_url: str,
|
||||
model: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
return _run_xai_video_coroutine(
|
||||
_edit_xai_video_async(prompt=prompt, video_url=video_url, model=model),
|
||||
operation_label="edit",
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
aspect_ratio=DEFAULT_ASPECT_RATIO,
|
||||
)
|
||||
|
||||
|
||||
def run_xai_video_extend(
|
||||
*,
|
||||
prompt: str,
|
||||
video_url: str,
|
||||
duration: Optional[int] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
return _run_xai_video_coroutine(
|
||||
_extend_xai_video_async(
|
||||
prompt=prompt,
|
||||
video_url=video_url,
|
||||
duration=duration,
|
||||
model=model,
|
||||
),
|
||||
operation_label="extend",
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
aspect_ratio=DEFAULT_ASPECT_RATIO,
|
||||
)
|
||||
|
||||
|
||||
def _run_xai_video_coroutine(
|
||||
coro,
|
||||
*,
|
||||
operation_label: str,
|
||||
model: Optional[str],
|
||||
prompt: str,
|
||||
aspect_ratio: str,
|
||||
) -> Dict[str, Any]:
|
||||
try:
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
return loop.run_until_complete(coro)
|
||||
finally:
|
||||
loop.close()
|
||||
except Exception as exc:
|
||||
logger.warning("xAI video %s unexpected failure: %s", operation_label, exc, exc_info=True)
|
||||
return error_response(
|
||||
error=f"xAI video {operation_label} failed: {exc}",
|
||||
error_type="api_error",
|
||||
provider="xai",
|
||||
model=model or DEFAULT_MODEL,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect_ratio,
|
||||
)
|
||||
|
||||
|
||||
async def _generate_xai_video_async(
|
||||
*,
|
||||
prompt: str,
|
||||
model: Optional[str],
|
||||
explicit_model: bool,
|
||||
image_url: Optional[str],
|
||||
reference_image_urls: Optional[List[str]],
|
||||
duration: Optional[int],
|
||||
aspect_ratio: str,
|
||||
resolution: str,
|
||||
) -> Dict[str, Any]:
|
||||
api_key, base_url = _resolve_xai_credentials()
|
||||
if not api_key:
|
||||
return _auth_required_response(prompt)
|
||||
|
||||
prompt = (prompt or "").strip()
|
||||
image_input = None
|
||||
if (image_url or "").strip():
|
||||
image_input = _image_ref_to_xai_input(image_url)
|
||||
if not image_input:
|
||||
return error_response(
|
||||
error=(
|
||||
"prompt is required for xAI video generation "
|
||||
"(text-to-video or image-to-video)"
|
||||
"image_url must be a public HTTPS URL or data URI "
|
||||
"(e.g. the `image`/`public_url` from a prior Imagine result)"
|
||||
),
|
||||
error_type="missing_prompt",
|
||||
provider="xai", prompt=prompt,
|
||||
)
|
||||
|
||||
refs = _normalize_reference_images(reference_image_urls)
|
||||
if refs and len(refs) > MAX_REFERENCE_IMAGES:
|
||||
return error_response(
|
||||
error=f"reference_image_urls supports at most {MAX_REFERENCE_IMAGES} images on xAI",
|
||||
error_type="too_many_references",
|
||||
provider="xai", prompt=prompt,
|
||||
)
|
||||
if image_url_norm and refs:
|
||||
return error_response(
|
||||
error="image_url and reference_image_urls cannot be combined on xAI",
|
||||
error_type="conflicting_inputs",
|
||||
provider="xai", prompt=prompt,
|
||||
)
|
||||
|
||||
clamped_duration = _clamp_duration(duration, has_reference_images=bool(refs))
|
||||
|
||||
if normalized_aspect_ratio not in VALID_ASPECT_RATIOS:
|
||||
normalized_aspect_ratio = DEFAULT_ASPECT_RATIO
|
||||
if normalized_resolution not in VALID_RESOLUTIONS:
|
||||
normalized_resolution = DEFAULT_RESOLUTION
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": resolved_model,
|
||||
"prompt": prompt,
|
||||
"duration": clamped_duration,
|
||||
"aspect_ratio": normalized_aspect_ratio,
|
||||
"resolution": normalized_resolution,
|
||||
}
|
||||
if image_url_norm:
|
||||
payload["image"] = {"url": image_url_norm}
|
||||
if refs:
|
||||
payload["reference_images"] = refs
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
request_id = await _submit(
|
||||
client, payload, api_key=api_key, base_url=base_url
|
||||
)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
detail = ""
|
||||
try:
|
||||
detail = exc.response.text[:500]
|
||||
except Exception:
|
||||
pass
|
||||
return error_response(
|
||||
error=f"xAI submit failed ({exc.response.status_code}): {detail or exc}",
|
||||
error_type="api_error",
|
||||
provider="xai",
|
||||
model=resolved_model,
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
poll_result = await _poll(
|
||||
client, request_id,
|
||||
api_key=api_key, base_url=base_url,
|
||||
timeout_seconds=DEFAULT_TIMEOUT_SECONDS,
|
||||
poll_interval=DEFAULT_POLL_INTERVAL_SECONDS,
|
||||
)
|
||||
|
||||
status = poll_result["status"]
|
||||
body = poll_result["body"]
|
||||
|
||||
if status == "done":
|
||||
video = body.get("video") or {}
|
||||
url = video.get("url")
|
||||
if not url:
|
||||
return error_response(
|
||||
error="xAI video generation completed without a video URL",
|
||||
error_type="empty_response",
|
||||
provider="xai",
|
||||
model=body.get("model") or resolved_model,
|
||||
prompt=prompt,
|
||||
)
|
||||
extra: Dict[str, Any] = {
|
||||
"request_id": request_id,
|
||||
"resolution": normalized_resolution,
|
||||
}
|
||||
if body.get("usage"):
|
||||
extra["usage"] = body["usage"]
|
||||
return success_response(
|
||||
video=url,
|
||||
model=body.get("model") or resolved_model,
|
||||
prompt=prompt,
|
||||
modality=modality_used,
|
||||
aspect_ratio=normalized_aspect_ratio,
|
||||
duration=video.get("duration") or clamped_duration,
|
||||
error_type="invalid_image_url",
|
||||
provider="xai",
|
||||
extra=extra,
|
||||
prompt=prompt,
|
||||
)
|
||||
normalized_aspect_ratio = (aspect_ratio or DEFAULT_ASPECT_RATIO).strip()
|
||||
normalized_resolution = (resolution or DEFAULT_RESOLUTION).strip().lower()
|
||||
refs, refs_error = _normalize_reference_images(reference_image_urls)
|
||||
if refs_error:
|
||||
return error_response(
|
||||
error=refs_error,
|
||||
error_type="invalid_reference_image_urls",
|
||||
provider="xai",
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
if status == "timeout":
|
||||
if not prompt:
|
||||
return error_response(
|
||||
error="prompt is required for xAI video generation",
|
||||
error_type="missing_prompt",
|
||||
provider="xai", prompt=prompt,
|
||||
)
|
||||
if refs and len(refs) > MAX_REFERENCE_IMAGES:
|
||||
return error_response(
|
||||
error=f"reference_image_urls supports at most {MAX_REFERENCE_IMAGES} images on xAI",
|
||||
error_type="too_many_references",
|
||||
provider="xai", prompt=prompt,
|
||||
)
|
||||
if image_input and refs:
|
||||
return error_response(
|
||||
error="image_url and reference_image_urls cannot be combined on xAI",
|
||||
error_type="conflicting_inputs",
|
||||
provider="xai", prompt=prompt,
|
||||
)
|
||||
|
||||
if normalized_aspect_ratio not in VALID_ASPECT_RATIOS:
|
||||
normalized_aspect_ratio = DEFAULT_ASPECT_RATIO
|
||||
if normalized_resolution not in VALID_RESOLUTIONS:
|
||||
normalized_resolution = DEFAULT_RESOLUTION
|
||||
|
||||
modality_used = "reference" if refs else ("image" if image_input else "text")
|
||||
resolved_model = _resolve_model_for_modality(
|
||||
model,
|
||||
modality=modality_used,
|
||||
explicit_model=explicit_model,
|
||||
)
|
||||
if refs and resolved_model != DEFAULT_TEXT_TO_VIDEO_MODEL:
|
||||
if explicit_model:
|
||||
return error_response(
|
||||
error=f"Timed out waiting for video generation after {DEFAULT_TIMEOUT_SECONDS}s",
|
||||
error_type="timeout",
|
||||
error=(
|
||||
"xAI reference-to-video requires "
|
||||
f"{DEFAULT_TEXT_TO_VIDEO_MODEL}; got {resolved_model}"
|
||||
),
|
||||
error_type="unsupported_model",
|
||||
provider="xai",
|
||||
model=resolved_model,
|
||||
prompt=prompt,
|
||||
)
|
||||
resolved_model = DEFAULT_TEXT_TO_VIDEO_MODEL
|
||||
|
||||
clamped_duration = _clamp_duration(duration, has_reference_images=bool(refs))
|
||||
payload = {
|
||||
"model": resolved_model,
|
||||
"prompt": prompt,
|
||||
"duration": clamped_duration,
|
||||
"aspect_ratio": normalized_aspect_ratio,
|
||||
"resolution": normalized_resolution,
|
||||
}
|
||||
if image_input:
|
||||
payload["image"] = image_input
|
||||
if refs:
|
||||
payload["reference_images"] = refs
|
||||
|
||||
return await _submit_xai_video_payload(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
endpoint="generations",
|
||||
payload=payload,
|
||||
prompt=prompt,
|
||||
resolved_model=resolved_model,
|
||||
modality=modality_used,
|
||||
aspect_ratio=normalized_aspect_ratio,
|
||||
duration=clamped_duration,
|
||||
operation="generate",
|
||||
resolution=normalized_resolution,
|
||||
)
|
||||
|
||||
|
||||
async def _run_xai_video_mutation(
|
||||
*,
|
||||
prompt: str,
|
||||
video_url: str,
|
||||
model: Optional[str],
|
||||
endpoint: str,
|
||||
operation: str,
|
||||
duration: int,
|
||||
) -> Dict[str, Any]:
|
||||
"""Edit or extend using a public HTTPS ``video_url`` input (``url`` on the wire)."""
|
||||
api_key, base_url = _resolve_xai_credentials()
|
||||
if not api_key:
|
||||
return _auth_required_response(prompt)
|
||||
|
||||
prompt = (prompt or "").strip()
|
||||
video_input = await _video_input_from_public_url(
|
||||
video_url or "",
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
)
|
||||
if not prompt:
|
||||
return error_response(
|
||||
error="prompt is required for xAI video edit/extend",
|
||||
error_type="missing_prompt",
|
||||
provider="xai",
|
||||
prompt=prompt,
|
||||
)
|
||||
if not video_input:
|
||||
return error_response(
|
||||
error=(
|
||||
"video_url must be a public HTTPS MP4 URL "
|
||||
"(the `video`/`public_url` from a prior Imagine result)"
|
||||
),
|
||||
error_type="missing_video",
|
||||
provider="xai",
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
resolved_model = _resolve_model_for_modality(
|
||||
model,
|
||||
modality="text",
|
||||
explicit_model=bool(model),
|
||||
)
|
||||
payload: Dict[str, Any] = {
|
||||
"model": resolved_model,
|
||||
"prompt": prompt,
|
||||
"video": video_input,
|
||||
}
|
||||
if endpoint == "extensions":
|
||||
payload["duration"] = duration
|
||||
|
||||
return await _submit_xai_video_payload(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
endpoint=endpoint,
|
||||
payload=payload,
|
||||
prompt=prompt,
|
||||
resolved_model=resolved_model,
|
||||
modality=operation,
|
||||
aspect_ratio=DEFAULT_ASPECT_RATIO,
|
||||
duration=duration,
|
||||
operation=operation,
|
||||
)
|
||||
|
||||
|
||||
async def _edit_xai_video_async(
|
||||
*,
|
||||
prompt: str,
|
||||
video_url: str,
|
||||
model: Optional[str],
|
||||
) -> Dict[str, Any]:
|
||||
return await _run_xai_video_mutation(
|
||||
prompt=prompt,
|
||||
video_url=video_url,
|
||||
model=model,
|
||||
endpoint="edits",
|
||||
operation="edit",
|
||||
duration=DEFAULT_DURATION,
|
||||
)
|
||||
|
||||
|
||||
async def _extend_xai_video_async(
|
||||
*,
|
||||
prompt: str,
|
||||
video_url: str,
|
||||
duration: Optional[int],
|
||||
model: Optional[str],
|
||||
) -> Dict[str, Any]:
|
||||
clamped_duration = _clamp_duration(
|
||||
duration,
|
||||
max_seconds=10,
|
||||
default=DEFAULT_EXTEND_DURATION,
|
||||
)
|
||||
return await _run_xai_video_mutation(
|
||||
prompt=prompt,
|
||||
video_url=video_url,
|
||||
model=model,
|
||||
endpoint="extensions",
|
||||
operation="extend",
|
||||
duration=clamped_duration,
|
||||
)
|
||||
|
||||
|
||||
def _auth_required_response(prompt: str) -> Dict[str, Any]:
|
||||
return error_response(
|
||||
error=(
|
||||
"No xAI credentials found. Sign in via `hermes auth add xai-oauth` "
|
||||
"(SuperGrok / Premium+) or set XAI_API_KEY from "
|
||||
"https://console.x.ai/."
|
||||
),
|
||||
error_type="auth_required",
|
||||
provider="xai", prompt=prompt,
|
||||
)
|
||||
|
||||
|
||||
async def _submit_xai_video_payload(
|
||||
*,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
endpoint: str,
|
||||
payload: Dict[str, Any],
|
||||
prompt: str,
|
||||
resolved_model: str,
|
||||
modality: str,
|
||||
aspect_ratio: str,
|
||||
duration: int,
|
||||
operation: str,
|
||||
resolution: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
try:
|
||||
from tools.xai_http import (
|
||||
build_xai_storage_options,
|
||||
maybe_mark_xai_storage_notice_seen,
|
||||
read_xai_imagine_storage_config,
|
||||
)
|
||||
|
||||
storage_options = build_xai_storage_options(
|
||||
"video_gen",
|
||||
filename_prefix="hermes-xai-video",
|
||||
extension="mp4",
|
||||
)
|
||||
storage_notice = maybe_mark_xai_storage_notice_seen("video_gen")
|
||||
storage_cfg = read_xai_imagine_storage_config("video_gen")
|
||||
except Exception:
|
||||
storage_options = None
|
||||
storage_notice = None
|
||||
storage_cfg = {"enabled": False}
|
||||
if storage_options is not None:
|
||||
payload["storage_options"] = storage_options
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
request_id = await _submit(
|
||||
client, payload, api_key=api_key, base_url=base_url,
|
||||
endpoint=endpoint,
|
||||
)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
detail = ""
|
||||
try:
|
||||
detail = exc.response.text[:500]
|
||||
except Exception:
|
||||
pass
|
||||
return error_response(
|
||||
error=f"xAI submit failed ({exc.response.status_code}): {detail or exc}",
|
||||
error_type="api_error",
|
||||
provider="xai",
|
||||
model=resolved_model,
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
message = (
|
||||
(body.get("error", {}) or {}).get("message")
|
||||
or body.get("message")
|
||||
or f"xAI video generation ended with status '{status}'"
|
||||
poll_result = await _poll(
|
||||
client, request_id,
|
||||
api_key=api_key, base_url=base_url,
|
||||
timeout_seconds=DEFAULT_TIMEOUT_SECONDS,
|
||||
poll_interval=DEFAULT_POLL_INTERVAL_SECONDS,
|
||||
)
|
||||
|
||||
status = poll_result["status"]
|
||||
body = poll_result["body"]
|
||||
|
||||
if status == "done":
|
||||
video = body.get("video") or {}
|
||||
if not isinstance(video, dict):
|
||||
video = {}
|
||||
file_output = video.get("file_output") if isinstance(video.get("file_output"), dict) else {}
|
||||
file_output = file_output or {}
|
||||
public_video_url, temporary_url, stored_public_url = _xai_video_output_urls(video)
|
||||
if not public_video_url:
|
||||
return error_response(
|
||||
error="xAI video request completed without a video URL",
|
||||
error_type="empty_response",
|
||||
provider="xai",
|
||||
model=body.get("model") or resolved_model,
|
||||
prompt=prompt,
|
||||
)
|
||||
extra: Dict[str, Any] = {
|
||||
"request_id": request_id,
|
||||
"operation": operation,
|
||||
"storage_enabled": bool(storage_cfg.get("enabled")),
|
||||
}
|
||||
if resolution:
|
||||
extra["resolution"] = resolution
|
||||
if storage_notice:
|
||||
extra["storage_notice"] = storage_notice
|
||||
if stored_public_url:
|
||||
extra["public_url"] = stored_public_url
|
||||
if temporary_url:
|
||||
extra["temporary_url"] = temporary_url
|
||||
if file_output:
|
||||
for key in (
|
||||
"filename",
|
||||
"expires_at",
|
||||
"public_url_expires_at",
|
||||
"public_url_error",
|
||||
"storage_error",
|
||||
):
|
||||
if key in file_output:
|
||||
extra[key] = file_output[key]
|
||||
if body.get("usage"):
|
||||
extra["usage"] = body["usage"]
|
||||
return success_response(
|
||||
video=public_video_url,
|
||||
model=body.get("model") or resolved_model,
|
||||
prompt=prompt,
|
||||
modality=modality,
|
||||
aspect_ratio=aspect_ratio,
|
||||
duration=video.get("duration") or duration,
|
||||
provider="xai",
|
||||
extra=extra,
|
||||
)
|
||||
|
||||
if status == "timeout":
|
||||
return error_response(
|
||||
error=message,
|
||||
error_type=f"xai_{status}",
|
||||
error=f"Timed out waiting for xAI video request after {DEFAULT_TIMEOUT_SECONDS}s",
|
||||
error_type="timeout",
|
||||
provider="xai",
|
||||
model=resolved_model,
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
message = (
|
||||
(body.get("error", {}) or {}).get("message")
|
||||
or body.get("message")
|
||||
or f"xAI video request ended with status '{status}'"
|
||||
)
|
||||
return error_response(
|
||||
error=message,
|
||||
error_type=f"xai_{status}",
|
||||
provider="xai",
|
||||
model=resolved_model,
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin entry point
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
name: xai
|
||||
version: 1.0.0
|
||||
description: "xAI Grok Imagine video generation backend. Supports text-to-video, image-to-video, and reference-image-guided generation via the xAI async videos API."
|
||||
description: "xAI Grok Imagine video generation backend. Supports text-to-video, image-to-video, reference-to-video, video editing, video extension, and stored public URLs via the xAI async videos API."
|
||||
author: NousResearch
|
||||
kind: backend
|
||||
requires_env:
|
||||
|
|
|
|||
|
|
@ -16,9 +16,17 @@ import pytest
|
|||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _fake_api_key(monkeypatch):
|
||||
def _fake_api_key(monkeypatch, tmp_path):
|
||||
"""Ensure XAI_API_KEY is set for all tests."""
|
||||
monkeypatch.setenv("XAI_API_KEY", "test-key-12345")
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
try:
|
||||
import hermes_cli.config as cfg_mod
|
||||
|
||||
if hasattr(cfg_mod, "_invalidate_load_config_cache"):
|
||||
cfg_mod._invalidate_load_config_cache()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -80,6 +88,13 @@ class TestXAIImageGenProvider:
|
|||
assert schema["env_vars"] == []
|
||||
assert schema["post_setup"] == "xai_grok"
|
||||
|
||||
def test_capabilities_expose_total_source_image_limit(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
caps = XAIImageGenProvider().capabilities()
|
||||
assert caps["max_reference_images"] == 2
|
||||
assert caps["max_source_images"] == 3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config tests
|
||||
|
|
@ -318,6 +333,131 @@ class TestGenerate:
|
|||
f"resolution must be the literal '1k' or '2k', got {payload['resolution']!r}"
|
||||
)
|
||||
|
||||
def test_image_edit_rejects_bare_file_id_input(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_resp.json.return_value = {"data": [{"url": "https://xai.image/edited.png"}]}
|
||||
|
||||
with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \
|
||||
patch("plugins.image_gen.xai.save_url_image", return_value="/tmp/edited.png"):
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(
|
||||
prompt="make the robot red",
|
||||
image_url="file_03eb65b1-aa97-482f-9ef0-b04f9172ea00",
|
||||
)
|
||||
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "invalid_image_url"
|
||||
mock_post.assert_not_called()
|
||||
|
||||
def test_image_edit_accepts_public_https_url(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_resp.json.return_value = {"data": [{"url": "https://xai.image/edited.png"}]}
|
||||
|
||||
public_url = "https://files-cdn.x.ai/token/file_abc.png"
|
||||
with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \
|
||||
patch("plugins.image_gen.xai.save_url_image", return_value="/tmp/edited.png"):
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(
|
||||
prompt="make the robot red",
|
||||
image_url=public_url,
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
payload = mock_post.call_args.kwargs.get("json") or mock_post.call_args[1].get("json")
|
||||
assert payload["image"] == {"url": public_url, "type": "image_url"}
|
||||
|
||||
def test_multi_image_edit_rejects_bare_file_id_inputs(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_resp.json.return_value = {"data": [{"url": "https://xai.image/edited.png"}]}
|
||||
|
||||
with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \
|
||||
patch("plugins.image_gen.xai.save_url_image", return_value="/tmp/edited.png"):
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(
|
||||
prompt="combine these robots into one product shot",
|
||||
image_url="file_03eb65b1-aa97-482f-9ef0-b04f9172ea00",
|
||||
reference_image_urls=[
|
||||
"file_54b48d6d-28ad-4982-9d72-bd3ac677c9bc",
|
||||
"file_aa11bb22-cc33-44dd-88ee-ff0011223344",
|
||||
],
|
||||
)
|
||||
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "invalid_image_url"
|
||||
mock_post.assert_not_called()
|
||||
|
||||
def test_multi_image_edit_rejects_more_than_three_sources(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(
|
||||
prompt="combine too many references",
|
||||
image_url="file_1",
|
||||
reference_image_urls=["file_2", "file_3", "file_4"],
|
||||
)
|
||||
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "too_many_references"
|
||||
|
||||
def test_storage_options_are_sent_by_default(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_resp.json.return_value = {"data": [{"b64_json": "dGVzdA=="}]}
|
||||
|
||||
with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post, \
|
||||
patch("plugins.image_gen.xai.save_b64_image", return_value="/tmp/test.png"):
|
||||
provider = XAIImageGenProvider()
|
||||
provider.generate(prompt="test")
|
||||
|
||||
payload = mock_post.call_args.kwargs.get("json") or mock_post.call_args[1].get("json")
|
||||
assert payload["storage_options"]["public_url"] is True
|
||||
assert "expires_after" not in payload["storage_options"]
|
||||
assert payload["storage_options"]["filename"].endswith(".png")
|
||||
|
||||
def test_public_url_file_output_wins_over_temporary_url(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_resp.json.return_value = {
|
||||
"data": [{
|
||||
"url": "https://imgen.x.ai/xai-tmp-imgen-test.jpeg",
|
||||
"file_output": {
|
||||
"file_id": "file-123",
|
||||
"filename": "stored.png",
|
||||
"public_url": "https://xai-files.example/stored.png",
|
||||
"public_url_expires_at": 1234567890,
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp), \
|
||||
patch("plugins.image_gen.xai.save_url_image") as mock_save_url:
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(prompt="A cat playing piano")
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["image"] == "https://xai-files.example/stored.png"
|
||||
assert result["public_url"] == "https://xai-files.example/stored.png"
|
||||
assert "file_id" not in result
|
||||
mock_save_url.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registration test
|
||||
|
|
@ -334,3 +474,21 @@ class TestRegistration:
|
|||
provider = mock_ctx.register_image_gen_provider.call_args[0][0]
|
||||
assert isinstance(provider, XAIImageGenProvider)
|
||||
assert provider.name == "xai"
|
||||
|
||||
|
||||
def test_xai_image_field_expands_user_home(tmp_path, monkeypatch):
|
||||
"""A ~-prefixed local image path must load (expanduser), not raise io_error.
|
||||
|
||||
Pre-flight validation uses ``Path(source).expanduser()`` so a ``~/...`` path
|
||||
passes; ``_xai_image_field`` must expand it too or the load fails spuriously.
|
||||
"""
|
||||
from plugins.image_gen.xai import _xai_image_field
|
||||
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("USERPROFILE", str(tmp_path))
|
||||
img = tmp_path / "pic.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n")
|
||||
|
||||
field = _xai_image_field("~/pic.png")
|
||||
assert field["type"] == "image_url"
|
||||
assert field["url"].startswith("data:image/png;base64,")
|
||||
|
|
|
|||
|
|
@ -32,9 +32,9 @@ def test_xai_provider_lists_text_and_current_image_video_models():
|
|||
ids = [model["id"] for model in models]
|
||||
|
||||
assert ids[0] == "grok-imagine-video"
|
||||
assert ids[1] == "grok-imagine-video-1.5-preview"
|
||||
assert ids[1] == "grok-imagine-video-1.5"
|
||||
assert models[1]["modalities"] == ["image"]
|
||||
assert models[1]["aliases"] == ["grok-imagine-video-1.5-2026-05-30"]
|
||||
assert "aliases" not in models[1]
|
||||
|
||||
|
||||
def test_xai_routes_default_models_by_modality():
|
||||
|
|
@ -49,7 +49,7 @@ def test_xai_routes_default_models_by_modality():
|
|||
"grok-imagine-video",
|
||||
modality="image",
|
||||
explicit_model=False,
|
||||
) == "grok-imagine-video-1.5-preview"
|
||||
) == "grok-imagine-video-1.5"
|
||||
assert _resolve_model_for_modality(
|
||||
"grok-imagine-video-1.5-preview",
|
||||
modality="text",
|
||||
|
|
@ -62,15 +62,11 @@ def test_xai_routes_default_models_by_modality():
|
|||
) == "grok-imagine-video-1.5-preview"
|
||||
|
||||
|
||||
def test_xai_capabilities_text_and_image_only():
|
||||
"""xAI was previously advertised with edit/extend operations. The
|
||||
simplified surface only exposes text-to-video and image-to-video —
|
||||
confirm those are the only modalities advertised."""
|
||||
def test_xai_capabilities_keep_generate_surface_only():
|
||||
from plugins.video_gen.xai import XAIVideoGenProvider
|
||||
|
||||
caps = XAIVideoGenProvider().capabilities()
|
||||
assert caps["modalities"] == ["text", "image"]
|
||||
# No 'operations' key in the simplified surface
|
||||
assert "operations" not in caps
|
||||
assert caps["max_reference_images"] == 7
|
||||
|
||||
|
|
@ -148,3 +144,45 @@ def test_xai_no_operation_kwarg():
|
|||
assert result["success"] is False
|
||||
# auth_required, NOT some signature error
|
||||
assert result["error_type"] in {"auth_required", "api_error"}
|
||||
|
||||
|
||||
def test_xai_video_output_urls_prefers_stored_public_url():
|
||||
from plugins.video_gen.xai import _xai_video_output_urls
|
||||
|
||||
public_url, temporary, stored = _xai_video_output_urls({
|
||||
"url": "https://vidgen.x.ai/xai-vidgen-bucket/out.mp4",
|
||||
"file_output": {
|
||||
"public_url": "https://files-cdn.x.ai/token/file_abc.mp4",
|
||||
"file_id": "file_abc",
|
||||
},
|
||||
})
|
||||
assert public_url == "https://files-cdn.x.ai/token/file_abc.mp4"
|
||||
assert stored == "https://files-cdn.x.ai/token/file_abc.mp4"
|
||||
assert temporary == "https://vidgen.x.ai/xai-vidgen-bucket/out.mp4"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_video_input_from_public_url_uses_url_field():
|
||||
from plugins.video_gen.xai import _video_input_from_public_url
|
||||
|
||||
url = "https://files-cdn.x.ai/kRQVP6PRQlioVAUNC3GAdg/file_1faca9c3-9411-46ad-bb41-b9b8527789e6.mp4"
|
||||
result = await _video_input_from_public_url(
|
||||
url,
|
||||
api_key="test-key",
|
||||
base_url="https://api.x.ai/v1",
|
||||
)
|
||||
assert result == {"url": url}
|
||||
|
||||
|
||||
def test_video_input_from_public_url_rejects_bare_file_id():
|
||||
import asyncio
|
||||
from plugins.video_gen.xai import _video_input_from_public_url
|
||||
|
||||
result = asyncio.run(
|
||||
_video_input_from_public_url(
|
||||
"file_1faca9c3-9411-46ad-bb41-b9b8527789e6",
|
||||
api_key="test-key",
|
||||
base_url="https://api.x.ai/v1",
|
||||
)
|
||||
)
|
||||
assert result is None
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ class TestXAIPayload:
|
|||
provider, captured = xai_provider
|
||||
provider.generate("animate this", image_url="https://example.com/cat.png")
|
||||
payload = _last_post(captured)["json"]
|
||||
assert payload["model"] == "grok-imagine-video-1.5-preview"
|
||||
assert payload["model"] == "grok-imagine-video-1.5"
|
||||
assert payload["image"] == {"url": "https://example.com/cat.png"}
|
||||
|
||||
def test_local_image_path_is_sent_as_data_uri(self, xai_provider, tmp_path):
|
||||
|
|
@ -133,7 +133,7 @@ class TestXAIPayload:
|
|||
provider.generate("animate this", image_url=str(image_path))
|
||||
|
||||
payload = _last_post(captured)["json"]
|
||||
assert payload["model"] == "grok-imagine-video-1.5-preview"
|
||||
assert payload["model"] == "grok-imagine-video-1.5"
|
||||
assert payload["image"]["url"].startswith("data:image/png;base64,")
|
||||
|
||||
def test_explicit_model_override_is_honored_for_image(self, xai_provider):
|
||||
|
|
|
|||
|
|
@ -35,6 +35,9 @@ class _RecordingProvider(VideoGenProvider):
|
|||
def default_model(self) -> Optional[str]:
|
||||
return "model-a"
|
||||
|
||||
def capabilities(self) -> Dict[str, Any]:
|
||||
return {"modalities": ["text", "image"]}
|
||||
|
||||
def generate(self, prompt, **kwargs):
|
||||
self.last_kwargs = {"prompt": prompt, **kwargs}
|
||||
modality = "image" if kwargs.get("image_url") else "text"
|
||||
|
|
@ -113,14 +116,25 @@ class TestUnifiedDispatch:
|
|||
assert "error" in result
|
||||
assert "prompt" in result["error"].lower()
|
||||
|
||||
def test_edit_extend_args_are_rejected_by_generate_tool(self):
|
||||
provider = _RecordingProvider("rec")
|
||||
video_gen_registry.register_provider(provider)
|
||||
result = self._run({
|
||||
"prompt": "make it rain",
|
||||
"operation": "edit",
|
||||
"video_url": "https://example.com/in.mp4",
|
||||
})
|
||||
assert "error" in result
|
||||
assert "provider-specific tool" in result["error"]
|
||||
|
||||
def test_provider_exception_caught(self):
|
||||
video_gen_registry.register_provider(_RaisingProvider())
|
||||
result = self._run({"prompt": "x"})
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "provider_exception"
|
||||
|
||||
def test_operation_field_not_in_schema(self):
|
||||
"""Make sure we removed the operation field from the schema."""
|
||||
def test_edit_extend_fields_not_in_schema(self):
|
||||
from tools.video_generation_tool import VIDEO_GENERATE_SCHEMA
|
||||
assert "operation" not in VIDEO_GENERATE_SCHEMA["parameters"]["properties"]
|
||||
assert "video_url" not in VIDEO_GENERATE_SCHEMA["parameters"]["properties"]
|
||||
props = VIDEO_GENERATE_SCHEMA["parameters"]["properties"]
|
||||
assert "operation" not in props
|
||||
assert "video_url" not in props
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Tests for the dynamic schema builder under the simplified surface."""
|
||||
"""Tests for the dynamic schema builder."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -91,20 +91,13 @@ class TestDynamicSchemaBuilder:
|
|||
assert "No video backend is configured" in desc
|
||||
assert "hermes tools" in desc
|
||||
|
||||
def test_does_not_mention_edit_or_extend(self, cfg_home):
|
||||
"""The simplified surface only does text→video and image→video.
|
||||
The description must not mention edit/extend anywhere."""
|
||||
def test_generic_description_keeps_edit_extend_out_of_surface(self, cfg_home):
|
||||
from tools.video_generation_tool import _build_dynamic_video_schema, _GENERIC_DESCRIPTION
|
||||
|
||||
desc = _build_dynamic_video_schema()["description"]
|
||||
# Block words that would suggest functionality we removed
|
||||
assert "edit" not in desc.lower() or "audio" in desc.lower() # 'audio' contains 'audi' not 'edit'
|
||||
# Stronger: no occurrence of the words "edit" or "extend" as standalone
|
||||
for forbidden in (" edit ", " edits ", " extend ", " extends "):
|
||||
assert forbidden not in desc.lower(), f"description leaks '{forbidden.strip()}'"
|
||||
# Sanity: the generic blurb itself is also clean
|
||||
for forbidden in ("edit", "extend"):
|
||||
assert forbidden not in _GENERIC_DESCRIPTION.lower()
|
||||
assert "Video edit/extend workflows are not part of this unified surface" in desc
|
||||
assert "operation='edit'" not in _GENERIC_DESCRIPTION
|
||||
assert "operation='extend'" not in _GENERIC_DESCRIPTION
|
||||
|
||||
def test_both_modalities_advertises_auto_routing(self, cfg_home):
|
||||
from tools.video_generation_tool import _build_dynamic_video_schema
|
||||
|
|
@ -123,7 +116,6 @@ class TestDynamicSchemaBuilder:
|
|||
assert "Active backend: Both" in desc
|
||||
assert "text-to-video" in desc and "image-to-video" in desc
|
||||
assert "routes automatically" in desc
|
||||
# operations bullet is gone
|
||||
assert "operations supported" not in desc
|
||||
|
||||
def test_image_only_model_warns_about_required_image_url(self, cfg_home):
|
||||
|
|
|
|||
|
|
@ -79,10 +79,21 @@ def matrix_env(tmp_path, monkeypatch):
|
|||
xai_calls.append({"url": url, "json": json})
|
||||
return _Resp({"request_id": "req-1"})
|
||||
async def get(self, url, headers=None, timeout=None):
|
||||
payload = xai_calls[-1]["json"]
|
||||
storage_options = payload.get("storage_options") or {}
|
||||
return _Resp({
|
||||
"status": "done",
|
||||
"video": {"url": "https://xai-cdn/out.mp4", "duration": 8},
|
||||
"model": xai_calls[-1]["json"].get("model", "grok-imagine-video"),
|
||||
"video": {
|
||||
"url": "https://xai-cdn/out.mp4",
|
||||
"duration": 8,
|
||||
"file_output": {
|
||||
"file_id": "file-123",
|
||||
"filename": storage_options.get("filename", "out.mp4"),
|
||||
"public_url": "https://xai-files.example/out.mp4",
|
||||
"public_url_expires_at": 1234567890,
|
||||
},
|
||||
},
|
||||
"model": payload.get("model", "grok-imagine-video"),
|
||||
})
|
||||
import plugins.video_gen.xai as xai_plugin
|
||||
monkeypatch.setattr(xai_plugin.httpx, "AsyncClient", lambda: _Client())
|
||||
|
|
@ -100,7 +111,7 @@ def matrix_env(tmp_path, monkeypatch):
|
|||
return tmp_path, fal_calls, xai_calls
|
||||
|
||||
|
||||
def _invoke_tool(home, cfg: dict, args: dict) -> dict:
|
||||
def _invoke_tool(home, cfg: dict, args: dict, tool_name: str = "video_generate") -> dict:
|
||||
"""Write config, invoke the registered tool handler, return parsed JSON."""
|
||||
(home / "config.yaml").write_text(yaml.safe_dump(cfg))
|
||||
import hermes_cli.config as cfg_mod
|
||||
|
|
@ -108,9 +119,9 @@ def _invoke_tool(home, cfg: dict, args: dict) -> dict:
|
|||
cfg_mod._invalidate_load_config_cache()
|
||||
|
||||
from tools.registry import discover_builtin_tools, registry
|
||||
if "video_generate" not in registry._tools:
|
||||
if tool_name not in registry._tools:
|
||||
discover_builtin_tools()
|
||||
handler = registry._tools["video_generate"].handler
|
||||
handler = registry._tools[tool_name].handler
|
||||
return json.loads(handler(args))
|
||||
|
||||
|
||||
|
|
@ -205,6 +216,11 @@ def test_xai_text_only_via_tool_surface(matrix_env):
|
|||
assert payload["model"] == "grok-imagine-video"
|
||||
assert "image" not in payload
|
||||
assert "reference_images" not in payload
|
||||
assert payload["storage_options"]["public_url"] is True
|
||||
assert "expires_after" not in payload["storage_options"]
|
||||
assert result["video"] == "https://xai-files.example/out.mp4"
|
||||
assert result["public_url"] == "https://xai-files.example/out.mp4"
|
||||
assert result.get("temporary_url") == "https://xai-cdn/out.mp4"
|
||||
|
||||
|
||||
def test_xai_text_plus_image_via_tool_surface(matrix_env):
|
||||
|
|
@ -222,10 +238,157 @@ def test_xai_text_plus_image_via_tool_surface(matrix_env):
|
|||
assert len(xai_calls) == 1
|
||||
assert xai_calls[0]["url"].endswith("/videos/generations")
|
||||
payload = xai_calls[0]["json"] or {}
|
||||
assert payload["model"] == "grok-imagine-video-1.5-preview"
|
||||
assert payload["model"] == "grok-imagine-video-1.5"
|
||||
assert payload["image"] == {"url": "https://example.com/img.png"}
|
||||
|
||||
|
||||
def test_xai_image_to_video_rejects_bare_file_id_via_tool_surface(matrix_env):
|
||||
home, _, xai_calls = matrix_env
|
||||
|
||||
result = _invoke_tool(
|
||||
home,
|
||||
{"video_gen": {"provider": "xai"}},
|
||||
{
|
||||
"prompt": "animate this robot waving",
|
||||
"image_url": "file_03eb65b1-aa97-482f-9ef0-b04f9172ea00",
|
||||
},
|
||||
)
|
||||
assert result["success"] is False
|
||||
assert result.get("error_type") == "invalid_image_url"
|
||||
assert len(xai_calls) == 0
|
||||
|
||||
|
||||
def test_xai_reference_to_video_via_tool_surface(matrix_env):
|
||||
home, _, xai_calls = matrix_env
|
||||
|
||||
result = _invoke_tool(
|
||||
home,
|
||||
{"video_gen": {"provider": "xai"}},
|
||||
{
|
||||
"prompt": "put the jacket from the reference on the runway model",
|
||||
"reference_image_urls": [
|
||||
"https://example.com/model.png",
|
||||
"https://example.com/jacket.png",
|
||||
],
|
||||
"duration": 15,
|
||||
},
|
||||
)
|
||||
assert result["success"] is True
|
||||
assert result["modality"] == "reference"
|
||||
assert result["provider"] == "xai"
|
||||
|
||||
payload = xai_calls[0]["json"] or {}
|
||||
assert xai_calls[0]["url"].endswith("/videos/generations")
|
||||
assert payload["model"] == "grok-imagine-video"
|
||||
assert payload["duration"] == 10
|
||||
assert payload["reference_images"] == [
|
||||
{"url": "https://example.com/model.png"},
|
||||
{"url": "https://example.com/jacket.png"},
|
||||
]
|
||||
|
||||
|
||||
def test_xai_reference_to_video_rejects_bare_file_ids_via_tool_surface(matrix_env):
|
||||
home, _, xai_calls = matrix_env
|
||||
|
||||
result = _invoke_tool(
|
||||
home,
|
||||
{"video_gen": {"provider": "xai"}},
|
||||
{
|
||||
"prompt": "use these references for a robot product shot",
|
||||
"reference_image_urls": [
|
||||
"file_03eb65b1-aa97-482f-9ef0-b04f9172ea00",
|
||||
"file_54b48d6d-28ad-4982-9d72-bd3ac677c9bc",
|
||||
],
|
||||
},
|
||||
)
|
||||
assert result["success"] is False
|
||||
assert result.get("error_type") == "invalid_reference_image_urls"
|
||||
assert len(xai_calls) == 0
|
||||
|
||||
|
||||
def test_xai_video_edit_via_tool_surface(matrix_env):
|
||||
home, _, xai_calls = matrix_env
|
||||
|
||||
result = _invoke_tool(
|
||||
home,
|
||||
{"video_gen": {"provider": "xai"}},
|
||||
{
|
||||
"prompt": "make the sky stormy",
|
||||
"video_url": "https://example.com/source.mp4",
|
||||
},
|
||||
tool_name="xai_video_edit",
|
||||
)
|
||||
assert result["success"] is True
|
||||
assert result["modality"] == "edit"
|
||||
|
||||
payload = xai_calls[0]["json"] or {}
|
||||
assert xai_calls[0]["url"].endswith("/videos/edits")
|
||||
assert payload["model"] == "grok-imagine-video"
|
||||
assert payload["video"] == {"url": "https://example.com/source.mp4"}
|
||||
assert "duration" not in payload
|
||||
assert "aspect_ratio" not in payload
|
||||
assert "resolution" not in payload
|
||||
|
||||
|
||||
def test_xai_video_extend_via_tool_surface(matrix_env):
|
||||
home, _, xai_calls = matrix_env
|
||||
|
||||
result = _invoke_tool(
|
||||
home,
|
||||
{"video_gen": {"provider": "xai"}},
|
||||
{
|
||||
"prompt": "the camera pulls back to reveal the city",
|
||||
"video_url": "https://example.com/source.mp4",
|
||||
"duration": 15,
|
||||
},
|
||||
tool_name="xai_video_extend",
|
||||
)
|
||||
assert result["success"] is True
|
||||
assert result["modality"] == "extend"
|
||||
|
||||
payload = xai_calls[0]["json"] or {}
|
||||
assert xai_calls[0]["url"].endswith("/videos/extensions")
|
||||
assert payload["model"] == "grok-imagine-video"
|
||||
assert payload["video"] == {"url": "https://example.com/source.mp4"}
|
||||
assert payload["duration"] == 10
|
||||
|
||||
|
||||
def test_xai_video_edit_rejects_bare_file_id_via_tool_surface(matrix_env):
|
||||
home, _, xai_calls = matrix_env
|
||||
|
||||
result = _invoke_tool(
|
||||
home,
|
||||
{"video_gen": {"provider": "xai"}},
|
||||
{
|
||||
"prompt": "make the sky stormy",
|
||||
"video_url": "file-123",
|
||||
},
|
||||
tool_name="xai_video_edit",
|
||||
)
|
||||
assert result.get("success") is not True
|
||||
assert "error" in result
|
||||
assert "url" in result["error"].lower()
|
||||
assert len(xai_calls) == 0
|
||||
|
||||
|
||||
def test_xai_video_extend_rejects_bare_file_id_via_tool_surface(matrix_env):
|
||||
home, _, xai_calls = matrix_env
|
||||
|
||||
result = _invoke_tool(
|
||||
home,
|
||||
{"video_gen": {"provider": "xai"}},
|
||||
{
|
||||
"prompt": "continue into a sunrise",
|
||||
"video_url": "file_25ac1c31-d6d8-48b2-8504-a97d282310c4",
|
||||
},
|
||||
tool_name="xai_video_extend",
|
||||
)
|
||||
assert result.get("success") is not True
|
||||
assert "error" in result
|
||||
assert "url" in result["error"].lower()
|
||||
assert len(xai_calls) == 0
|
||||
|
||||
|
||||
def test_xai_explicit_model_override_via_tool_surface(matrix_env):
|
||||
home, _, xai_calls = matrix_env
|
||||
|
||||
|
|
|
|||
132
tests/tools/test_xai_http_storage.py
Normal file
132
tests/tools/test_xai_http_storage.py
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
"""Tests for xAI Imagine storage helper behavior."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def _invalidate_config_cache():
|
||||
try:
|
||||
import hermes_cli.config as cfg_mod
|
||||
|
||||
if hasattr(cfg_mod, "_invalidate_load_config_cache"):
|
||||
cfg_mod._invalidate_load_config_cache()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def test_storage_defaults_to_permanent_public_urls(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
_invalidate_config_cache()
|
||||
|
||||
from tools.xai_http import build_xai_storage_options
|
||||
|
||||
storage = build_xai_storage_options(
|
||||
"image_gen",
|
||||
filename_prefix="hermes-xai-image",
|
||||
extension="png",
|
||||
)
|
||||
|
||||
assert storage is not None
|
||||
assert storage["public_url"] is True
|
||||
assert "expires_after" not in storage
|
||||
assert storage["filename"].startswith("hermes-xai-image-")
|
||||
assert storage["filename"].endswith(".png")
|
||||
|
||||
|
||||
def test_storage_can_be_disabled(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / "config.yaml").write_text(yaml.safe_dump({
|
||||
"video_gen": {
|
||||
"xai": {
|
||||
"storage": {
|
||||
"enabled": False,
|
||||
},
|
||||
},
|
||||
},
|
||||
}))
|
||||
_invalidate_config_cache()
|
||||
|
||||
from tools.xai_http import build_xai_storage_options, xai_storage_notice_text
|
||||
|
||||
assert build_xai_storage_options(
|
||||
"video_gen",
|
||||
filename_prefix="hermes-xai-video",
|
||||
extension="mp4",
|
||||
) is None
|
||||
assert xai_storage_notice_text("video_gen") == ""
|
||||
|
||||
|
||||
def test_storage_can_be_permanent(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / "config.yaml").write_text(yaml.safe_dump({
|
||||
"image_gen": {
|
||||
"xai": {
|
||||
"storage": {
|
||||
"expires_after": "permanent",
|
||||
},
|
||||
},
|
||||
},
|
||||
}))
|
||||
_invalidate_config_cache()
|
||||
|
||||
from tools.xai_http import build_xai_storage_options
|
||||
|
||||
storage = build_xai_storage_options(
|
||||
"image_gen",
|
||||
filename_prefix="hermes-xai-image",
|
||||
extension="png",
|
||||
)
|
||||
|
||||
assert storage is not None
|
||||
assert "expires_after" not in storage
|
||||
|
||||
|
||||
def test_storage_can_use_finite_retention(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / "config.yaml").write_text(yaml.safe_dump({
|
||||
"image_gen": {
|
||||
"xai": {
|
||||
"storage": {
|
||||
"expires_after": 172800,
|
||||
},
|
||||
},
|
||||
},
|
||||
}))
|
||||
_invalidate_config_cache()
|
||||
|
||||
from tools.xai_http import build_xai_storage_options
|
||||
|
||||
storage = build_xai_storage_options(
|
||||
"image_gen",
|
||||
filename_prefix="hermes-xai-image",
|
||||
extension="png",
|
||||
)
|
||||
|
||||
assert storage is not None
|
||||
assert storage["expires_after"] == 172800
|
||||
|
||||
|
||||
def test_invalid_storage_retention_falls_back_to_bounded_ttl(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / "config.yaml").write_text(yaml.safe_dump({
|
||||
"video_gen": {
|
||||
"xai": {
|
||||
"storage": {
|
||||
"expires_after": "definitely-not-a-duration",
|
||||
},
|
||||
},
|
||||
},
|
||||
}))
|
||||
_invalidate_config_cache()
|
||||
|
||||
from tools.xai_http import build_xai_storage_options
|
||||
|
||||
storage = build_xai_storage_options(
|
||||
"video_gen",
|
||||
filename_prefix="hermes-xai-video",
|
||||
extension="mp4",
|
||||
)
|
||||
|
||||
assert storage is not None
|
||||
assert storage["expires_after"] == 172800
|
||||
|
|
@ -18,13 +18,11 @@ Generation.
|
|||
|
||||
Unified surface
|
||||
---------------
|
||||
One tool covers the common cases — text-to-video, image-to-video, video
|
||||
edit, video extend — with a compact schema:
|
||||
One tool covers the common cases - text-to-video, image-to-video, and
|
||||
reference-to-video - with a compact schema:
|
||||
|
||||
prompt text instruction (required for generate/edit)
|
||||
operation "generate" | "edit" | "extend"
|
||||
image_url drives image-to-video when operation=generate
|
||||
video_url source video for edit/extend
|
||||
prompt text instruction (required)
|
||||
image_url drives image-to-video
|
||||
reference_image_urls list, up to provider-declared cap
|
||||
duration seconds (provider clamps)
|
||||
aspect_ratio "16:9" | "9:16" | "1:1" | ...
|
||||
|
|
@ -38,6 +36,9 @@ Providers ignore parameters they do not support. The tool layer does
|
|||
**lightweight** validation (type/required-prompt) and lets each provider
|
||||
do its own clamping inside :meth:`VideoGenProvider.generate` — that keeps
|
||||
the tool surface stable as new providers ship with different capabilities.
|
||||
|
||||
Video edit and video extend are intentionally not exposed here; providers with
|
||||
those workflows should expose separate tools.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -80,21 +81,20 @@ VIDEO_GENERATE_SCHEMA: Dict[str, Any] = {
|
|||
"image_url": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional public URL of a still image. When provided, "
|
||||
"Optional public HTTPS URL of a still image. When provided, "
|
||||
"the active backend routes to its image-to-video "
|
||||
"endpoint (animate the image); when omitted, it routes "
|
||||
"to text-to-video. Pass either a URL the user supplied "
|
||||
"or a path/URL from the conversation."
|
||||
"to text-to-video. For xAI chaining, use the `image` or "
|
||||
"`public_url` HTTPS URL from a prior Imagine result."
|
||||
),
|
||||
},
|
||||
"reference_image_urls": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": (
|
||||
"Optional list of reference image URLs (style or "
|
||||
"character refs). Only supported by some backends; "
|
||||
"the active backend's description below indicates whether "
|
||||
"this is honored and what the max is."
|
||||
"Optional list of public HTTPS reference image URLs "
|
||||
"(style or character refs). For xAI chaining, use "
|
||||
"`image` or `public_url` from prior Imagine results."
|
||||
),
|
||||
},
|
||||
"duration": {
|
||||
|
|
@ -324,6 +324,11 @@ def _handle_video_generate(args: Dict[str, Any], **_kw: Any) -> str:
|
|||
# endpoint but our surface always needs a prompt.
|
||||
if not prompt:
|
||||
return tool_error("prompt is required for video generation")
|
||||
if "operation" in args or "video_url" in args:
|
||||
return tool_error(
|
||||
"video_generate only supports text-to-video, image-to-video, and "
|
||||
"reference-to-video; use a provider-specific tool for video edit/extend"
|
||||
)
|
||||
|
||||
# Resolve the active provider.
|
||||
configured = _read_configured_video_provider()
|
||||
|
|
@ -398,13 +403,13 @@ def _handle_video_generate(args: Dict[str, Any], **_kw: Any) -> str:
|
|||
# Dynamic schema — reflect the active backend's actual capabilities
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Why dynamic: the user's configured backend determines which operations
|
||||
# (generate/edit/extend), modalities (text / image / refs), aspect ratios,
|
||||
# resolutions, durations, and audio/negative-prompt flags are real. A model
|
||||
# that calls video_generate without knowing the active backend wastes a
|
||||
# turn on something like "fal-ai/veo3.1/image-to-video requires image_url".
|
||||
# Surfacing the per-model surface in the description means the model
|
||||
# usually gets the call right on the first try.
|
||||
# Why dynamic: the user's configured backend determines which modalities
|
||||
# (text / image / refs), aspect ratios, resolutions, durations, and
|
||||
# audio/negative-prompt flags are real. A model that calls video_generate
|
||||
# without knowing the active backend wastes a turn on something like
|
||||
# "fal-ai/veo3.1/image-to-video requires image_url". Surfacing the per-model
|
||||
# surface in the description means the model usually gets the call right on
|
||||
# the first try.
|
||||
#
|
||||
# Memoization: model_tools.get_tool_definitions() keys its cache on
|
||||
# config.yaml mtime, so when the user changes provider/model via
|
||||
|
|
@ -412,11 +417,12 @@ def _handle_video_generate(args: Dict[str, Any], **_kw: Any) -> str:
|
|||
|
||||
|
||||
_GENERIC_DESCRIPTION = (
|
||||
"Generate a video from a text prompt (text-to-video) or animate a "
|
||||
"still image (image-to-video) using the user's configured video "
|
||||
"generation backend. Pass `image_url` to animate that image; omit it "
|
||||
"to generate from text alone. The backend auto-routes to the right "
|
||||
"endpoint. The backend and model family are user-configured via "
|
||||
"Generate a video from a text prompt (text-to-video), animate a "
|
||||
"still image (image-to-video), or guide generation with reference images. "
|
||||
"Pass `image_url` to animate an image or `reference_image_urls` for "
|
||||
"reference-to-video. Video edit/extend workflows are not part of this "
|
||||
"unified surface; use a dedicated provider-specific tool when one is "
|
||||
"available. The backend and model family are user-configured via "
|
||||
"`hermes tools` → Video Generation; the agent does not pick them. "
|
||||
"Long-running generations may take 30 seconds to several minutes — "
|
||||
"the call blocks until the video is ready. Returns the result in the "
|
||||
|
|
@ -542,6 +548,21 @@ def _build_dynamic_video_schema() -> Dict[str, Any]:
|
|||
max_refs = caps.get("max_reference_images") or 0
|
||||
if max_refs:
|
||||
parts.append(f"- reference_image_urls: up to {max_refs} images")
|
||||
if configured == "xai":
|
||||
parts.append(
|
||||
"- chaining: for edit/extend pass the public HTTPS MP4 in `video` "
|
||||
"or `public_url` from the prior Imagine result (files-cdn). For "
|
||||
"image-to-video / reference-to-video pass public image URLs the "
|
||||
"same way"
|
||||
)
|
||||
try:
|
||||
from tools.xai_http import xai_storage_notice_text
|
||||
|
||||
notice = xai_storage_notice_text("video_gen")
|
||||
except Exception:
|
||||
notice = ""
|
||||
if notice:
|
||||
parts.append(f"- storage: {notice}")
|
||||
|
||||
return {"description": "\n".join(parts)}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,9 +2,15 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
from typing import Dict
|
||||
import uuid
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
MAX_XAI_STORAGE_EXPIRES_AFTER_SECONDS = 30 * 24 * 60 * 60
|
||||
SAFE_XAI_STORAGE_EXPIRES_AFTER_SECONDS = 2 * 24 * 60 * 60
|
||||
|
||||
|
||||
def has_xai_credentials() -> bool:
|
||||
|
|
@ -72,6 +78,149 @@ def hermes_xai_user_agent() -> str:
|
|||
return f"Hermes-Agent/{__version__}"
|
||||
|
||||
|
||||
def _load_config_section(section_name: str) -> Dict[str, Any]:
|
||||
"""Return a top-level Hermes config section as a dict, or empty."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
section = cfg.get(section_name) if isinstance(cfg, dict) else None
|
||||
return section if isinstance(section, dict) else {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _coerce_bool(value: Any, default: bool) -> bool:
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
normalized = value.strip().lower()
|
||||
if normalized in {"1", "true", "yes", "on", "enabled"}:
|
||||
return True
|
||||
if normalized in {"0", "false", "no", "off", "disabled"}:
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def _coerce_expires_after(value: Any) -> Optional[int]:
|
||||
"""Normalize an xAI storage TTL.
|
||||
|
||||
Returns:
|
||||
int seconds for an expiring file,
|
||||
None for permanent storage (omit expires_after on the wire).
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
normalized = value.strip().lower()
|
||||
if normalized in {"", "default"}:
|
||||
return None
|
||||
if normalized in {"none", "null", "never", "permanent", "forever", "0"}:
|
||||
return None
|
||||
try:
|
||||
value = int(normalized)
|
||||
except ValueError:
|
||||
return SAFE_XAI_STORAGE_EXPIRES_AFTER_SECONDS
|
||||
if isinstance(value, (int, float)):
|
||||
seconds = int(value)
|
||||
if seconds <= 0:
|
||||
return None
|
||||
return min(seconds, MAX_XAI_STORAGE_EXPIRES_AFTER_SECONDS)
|
||||
return SAFE_XAI_STORAGE_EXPIRES_AFTER_SECONDS
|
||||
|
||||
|
||||
def read_xai_imagine_storage_config(section_name: str) -> Dict[str, Any]:
|
||||
"""Read storage settings for xAI Imagine under image_gen/video_gen config.
|
||||
|
||||
Supported config shape:
|
||||
|
||||
image_gen:
|
||||
xai:
|
||||
storage:
|
||||
enabled: true
|
||||
public_url: true
|
||||
expires_after: null # omit for permanent public URLs
|
||||
|
||||
The same shape is accepted under ``video_gen.xai.storage``. Storage is on
|
||||
by default so xAI returns permanent public URLs instead of short-lived CDN URLs.
|
||||
"""
|
||||
section = _load_config_section(section_name)
|
||||
xai_section = section.get("xai") if isinstance(section, dict) else None
|
||||
storage = xai_section.get("storage") if isinstance(xai_section, dict) else None
|
||||
storage = storage if isinstance(storage, dict) else {}
|
||||
|
||||
enabled = _coerce_bool(storage.get("enabled"), True)
|
||||
public_url = _coerce_bool(storage.get("public_url"), True)
|
||||
expires_after = _coerce_expires_after(storage.get("expires_after"))
|
||||
|
||||
return {
|
||||
"enabled": enabled,
|
||||
"public_url": public_url,
|
||||
"expires_after": expires_after,
|
||||
}
|
||||
|
||||
|
||||
def build_xai_storage_options(
|
||||
section_name: str,
|
||||
*,
|
||||
filename_prefix: str,
|
||||
extension: str,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Return an xAI ``storage_options`` payload, or None when disabled."""
|
||||
cfg = read_xai_imagine_storage_config(section_name)
|
||||
if not cfg["enabled"]:
|
||||
return None
|
||||
|
||||
now = datetime.datetime.now(datetime.UTC)
|
||||
ts = now.strftime("%Y%m%d-%H%M%S")
|
||||
short = uuid.uuid4().hex[:8]
|
||||
ext = extension.lstrip(".") or "bin"
|
||||
payload: Dict[str, Any] = {
|
||||
"filename": f"{filename_prefix}-{ts}-{short}.{ext}",
|
||||
"public_url": bool(cfg["public_url"]),
|
||||
}
|
||||
if cfg["expires_after"] is not None:
|
||||
payload["expires_after"] = cfg["expires_after"]
|
||||
return payload
|
||||
|
||||
|
||||
def xai_storage_notice_text(section_name: str) -> str:
|
||||
"""User-facing notice for first xAI Imagine storage use."""
|
||||
cfg = read_xai_imagine_storage_config(section_name)
|
||||
if not cfg["enabled"]:
|
||||
return ""
|
||||
if cfg["expires_after"] is None:
|
||||
retention = "without an automatic expiry"
|
||||
else:
|
||||
days = cfg["expires_after"] / (24 * 60 * 60)
|
||||
retention = f"for about {days:g} day{'s' if days != 1 else ''}"
|
||||
return (
|
||||
"xAI Imagine storage is enabled so generated media gets a reusable "
|
||||
f"public URL {retention}. xAI may bill for stored files and public URL "
|
||||
f"hosting. Disable this with `{section_name}.xai.storage.enabled: false` "
|
||||
"or set `expires_after` to change the retention."
|
||||
)
|
||||
|
||||
|
||||
def maybe_mark_xai_storage_notice_seen(section_name: str) -> Optional[str]:
|
||||
"""Return the storage notice once per Hermes home, then mark it seen."""
|
||||
notice = xai_storage_notice_text(section_name)
|
||||
if not notice:
|
||||
return None
|
||||
try:
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
marker_dir = get_hermes_home() / "state"
|
||||
marker_dir.mkdir(parents=True, exist_ok=True)
|
||||
marker = marker_dir / f"{section_name}_xai_storage_notice_seen"
|
||||
if marker.exists():
|
||||
return None
|
||||
marker.write_text(datetime.datetime.now(datetime.UTC).isoformat() + "\n")
|
||||
return notice
|
||||
except Exception:
|
||||
return notice
|
||||
|
||||
|
||||
def resolve_xai_http_credentials(*, force_refresh: bool = False) -> Dict[str, str]:
|
||||
"""Resolve bearer credentials for direct xAI HTTP endpoints.
|
||||
|
||||
|
|
@ -88,6 +237,21 @@ def resolve_xai_http_credentials(*, force_refresh: bool = False) -> Dict[str, st
|
|||
tokens where the proactive JWT check is a no-op, etc.), not as a default —
|
||||
the auth-store lock is held for the duration of the refresh.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
|
||||
|
||||
creds = resolve_xai_oauth_runtime_credentials(force_refresh=force_refresh)
|
||||
access_token = str(creds.get("api_key") or "").strip()
|
||||
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
|
||||
if access_token:
|
||||
return {
|
||||
"provider": "xai-oauth",
|
||||
"api_key": access_token,
|
||||
"base_url": base_url or "https://api.x.ai/v1",
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not force_refresh:
|
||||
try:
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
|
@ -104,21 +268,6 @@ def resolve_xai_http_credentials(*, force_refresh: bool = False) -> Dict[str, st
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
|
||||
|
||||
creds = resolve_xai_oauth_runtime_credentials(force_refresh=force_refresh)
|
||||
access_token = str(creds.get("api_key") or "").strip()
|
||||
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
|
||||
if access_token:
|
||||
return {
|
||||
"provider": "xai-oauth",
|
||||
"api_key": access_token,
|
||||
"base_url": base_url or "https://api.x.ai/v1",
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
api_key = str(get_env_value("XAI_API_KEY") or "").strip()
|
||||
base_url = str(get_env_value("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
|
||||
return {
|
||||
|
|
|
|||
209
tools/xai_video_tools.py
Normal file
209
tools/xai_video_tools.py
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
#!/usr/bin/env python3
|
||||
"""xAI-specific Imagine video edit and extend tools."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from hermes_cli.config import load_config
|
||||
from plugins.video_gen.xai import (
|
||||
has_xai_video_credentials,
|
||||
run_xai_video_edit,
|
||||
run_xai_video_extend,
|
||||
)
|
||||
from tools.registry import registry, tool_error
|
||||
|
||||
|
||||
def _configured_for_xai_video() -> bool:
|
||||
try:
|
||||
cfg = load_config()
|
||||
except Exception:
|
||||
return False
|
||||
section = cfg.get("video_gen") if isinstance(cfg, dict) else None
|
||||
return isinstance(section, dict) and section.get("provider") == "xai"
|
||||
|
||||
|
||||
def _check_xai_video_requirements() -> bool:
|
||||
return _configured_for_xai_video() and has_xai_video_credentials()
|
||||
|
||||
|
||||
def _clean_string(value: Any) -> Optional[str]:
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip()
|
||||
return None
|
||||
|
||||
|
||||
def _coerce_int(value: Any) -> Optional[int]:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
return None
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _provider_not_configured_error() -> str:
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": (
|
||||
"xAI video edit/extend tools require `video_gen.provider` to be "
|
||||
"configured as `xai` via `hermes tools` -> Video Generation."
|
||||
),
|
||||
"error_type": "provider_not_configured",
|
||||
"provider": "xai",
|
||||
})
|
||||
|
||||
|
||||
def _normalize_public_video_url(video_url: Any) -> Optional[str]:
|
||||
"""Require a public HTTPS MP4 URL (``http``/``https`` only)."""
|
||||
cleaned = _clean_string(video_url)
|
||||
if not cleaned:
|
||||
return None
|
||||
if cleaned.lower().startswith(("http://", "https://")):
|
||||
return cleaned
|
||||
return None
|
||||
|
||||
|
||||
XAI_VIDEO_EDIT_SCHEMA: Dict[str, Any] = {
|
||||
"name": "xai_video_edit",
|
||||
"description": (
|
||||
"Edit an existing video with xAI Imagine. This is separate from "
|
||||
"`video_generate` because video editing is provider-specific. "
|
||||
"`video_url` must be the public HTTPS MP4 URL from a prior Imagine "
|
||||
"result (`video` or `public_url` on files-cdn)."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"description": "Instruction for how xAI should modify the source video.",
|
||||
},
|
||||
"video_url": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Public HTTPS MP4 URL of the source video — the `video` or "
|
||||
"`public_url` from a prior xAI Imagine result."
|
||||
),
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "Optional xAI Imagine model override.",
|
||||
},
|
||||
},
|
||||
"required": ["prompt", "video_url"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
XAI_VIDEO_EXTEND_SCHEMA: Dict[str, Any] = {
|
||||
"name": "xai_video_extend",
|
||||
"description": (
|
||||
"Extend an existing video with xAI Imagine. This is separate from "
|
||||
"`video_generate` because video extension is provider-specific. "
|
||||
"`video_url` must be the public HTTPS MP4 URL from a prior Imagine "
|
||||
"result (`video` or `public_url` on files-cdn)."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"description": "Instruction for how xAI should continue the source video.",
|
||||
},
|
||||
"video_url": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Public HTTPS MP4 URL of the source video — the `video` or "
|
||||
"`public_url` from a prior xAI Imagine result."
|
||||
),
|
||||
},
|
||||
"duration": {
|
||||
"type": "integer",
|
||||
"description": (
|
||||
"Desired extension duration in seconds. xAI clamps this "
|
||||
"to its supported range."
|
||||
),
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "Optional xAI Imagine model override.",
|
||||
},
|
||||
},
|
||||
"required": ["prompt", "video_url"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _handle_xai_video_edit(args: Dict[str, Any], **_kw: Any) -> str:
|
||||
prompt = _clean_string(args.get("prompt"))
|
||||
video_url = _normalize_public_video_url(args.get("video_url"))
|
||||
model = _clean_string(args.get("model"))
|
||||
|
||||
if not prompt:
|
||||
return tool_error("prompt is required for xAI video edit")
|
||||
if not video_url:
|
||||
return tool_error(
|
||||
"video_url must be a public HTTPS MP4 URL (the `video`/`public_url` "
|
||||
"from a prior Imagine result)"
|
||||
)
|
||||
if not _configured_for_xai_video():
|
||||
return _provider_not_configured_error()
|
||||
|
||||
result = run_xai_video_edit(
|
||||
prompt=prompt,
|
||||
video_url=video_url,
|
||||
model=model,
|
||||
)
|
||||
return json.dumps(result)
|
||||
|
||||
|
||||
def _handle_xai_video_extend(args: Dict[str, Any], **_kw: Any) -> str:
|
||||
prompt = _clean_string(args.get("prompt"))
|
||||
video_url = _normalize_public_video_url(args.get("video_url"))
|
||||
model = _clean_string(args.get("model"))
|
||||
duration = _coerce_int(args.get("duration"))
|
||||
|
||||
if not prompt:
|
||||
return tool_error("prompt is required for xAI video extend")
|
||||
if not video_url:
|
||||
return tool_error(
|
||||
"video_url must be a public HTTPS MP4 URL (the `video`/`public_url` "
|
||||
"from a prior Imagine result)"
|
||||
)
|
||||
if not _configured_for_xai_video():
|
||||
return _provider_not_configured_error()
|
||||
|
||||
result = run_xai_video_extend(
|
||||
prompt=prompt,
|
||||
video_url=video_url,
|
||||
duration=duration,
|
||||
model=model,
|
||||
)
|
||||
return json.dumps(result)
|
||||
|
||||
|
||||
registry.register(
|
||||
name="xai_video_edit",
|
||||
toolset="video_gen",
|
||||
schema=XAI_VIDEO_EDIT_SCHEMA,
|
||||
handler=_handle_xai_video_edit,
|
||||
check_fn=_check_xai_video_requirements,
|
||||
requires_env=[],
|
||||
is_async=False,
|
||||
emoji="video",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="xai_video_extend",
|
||||
toolset="video_gen",
|
||||
schema=XAI_VIDEO_EXTEND_SCHEMA,
|
||||
handler=_handle_xai_video_extend,
|
||||
check_fn=_check_xai_video_requirements,
|
||||
requires_env=[],
|
||||
is_async=False,
|
||||
emoji="video",
|
||||
)
|
||||
|
|
@ -139,10 +139,11 @@ TOOLSETS = {
|
|||
"description": (
|
||||
"Video generation tools. Single ``video_generate`` tool covers "
|
||||
"text-to-video (prompt only) and image-to-video (prompt + "
|
||||
"image_url) — the active backend auto-routes. Configure via "
|
||||
"image_url), plus reference-to-video. Provider-specific edit/"
|
||||
"extend workflows may appear as separate tools. Configure via "
|
||||
"``hermes tools`` → Video Generation."
|
||||
),
|
||||
"tools": ["video_generate"],
|
||||
"tools": ["video_generate", "xai_video_edit", "xai_video_extend"],
|
||||
"includes": []
|
||||
},
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue