feat(xai): upgrade to Responses API, add TTS provider

Cherry-picked and trimmed from PR #10600 by Jaaneek.

- Switch xAI transport from openai_chat to codex_responses (Responses API)
- Add codex_responses detection for xAI in all runtime_provider resolution paths
- Add xAI api_mode detection in AIAgent.__init__ (provider name + URL auto-detect)
- Add extra_headers passthrough for codex_responses requests
- Add x-grok-conv-id session header for xAI prompt caching
- Add xAI reasoning support (encrypted_content include, no effort param)
- Move x-grok-conv-id from chat_completions path to codex_responses path
- Add xAI TTS provider (dedicated /v1/tts endpoint with Opus conversion)
- Add xAI provider aliases (grok, x-ai, x.ai) across auth, models, providers, auxiliary
- Trim xAI model list to agentic models (grok-4.20-reasoning, grok-4-1-fast-reasoning)
- Add XAI_API_KEY/XAI_BASE_URL to OPTIONAL_ENV_VARS
- Add xAI TTS config section, setup wizard entry, tools_config provider option
- Add shared xai_http.py helper for User-Agent string

Co-authored-by: Jaaneek <Jaaneek@users.noreply.github.com>
This commit is contained in:
Teknium 2026-04-15 22:27:26 -07:00 committed by Teknium
parent 330ed12fb1
commit 0c1217d01e
14 changed files with 189 additions and 24 deletions

View file

@ -58,6 +58,9 @@ _PROVIDER_ALIASES = {
"google": "gemini", "google": "gemini",
"google-gemini": "gemini", "google-gemini": "gemini",
"google-ai-studio": "gemini", "google-ai-studio": "gemini",
"x-ai": "xai",
"x.ai": "xai",
"grok": "xai",
"glm": "zai", "glm": "zai",
"z-ai": "zai", "z-ai": "zai",
"z.ai": "zai", "z.ai": "zai",

View file

@ -928,6 +928,7 @@ def resolve_provider(
_PROVIDER_ALIASES = { _PROVIDER_ALIASES = {
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai", "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
"google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini", "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
"x-ai": "xai", "x.ai": "xai", "grok": "xai",
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding", "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn", "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
"arcee-ai": "arcee", "arceeai": "arcee", "arcee-ai": "arcee", "arceeai": "arcee",

View file

@ -566,7 +566,7 @@ DEFAULT_CONFIG = {
# Text-to-speech configuration # Text-to-speech configuration
"tts": { "tts": {
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local) "provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
"edge": { "edge": {
"voice": "en-US-AriaNeural", "voice": "en-US-AriaNeural",
# Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural # Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
@ -580,6 +580,12 @@ DEFAULT_CONFIG = {
"voice": "alloy", "voice": "alloy",
# Voices: alloy, echo, fable, onyx, nova, shimmer # Voices: alloy, echo, fable, onyx, nova, shimmer
}, },
"xai": {
"voice_id": "eve",
"language": "en",
"sample_rate": 24000,
"bit_rate": 128000,
},
"mistral": { "mistral": {
"model": "voxtral-mini-tts-2603", "model": "voxtral-mini-tts-2603",
"voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8", # Paul - Neutral "voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8", # Paul - Neutral
@ -836,6 +842,22 @@ OPTIONAL_ENV_VARS = {
"category": "provider", "category": "provider",
"advanced": True, "advanced": True,
}, },
"XAI_API_KEY": {
"description": "xAI API key",
"prompt": "xAI API key",
"url": "https://console.x.ai/",
"password": True,
"category": "provider",
"advanced": True,
},
"XAI_BASE_URL": {
"description": "xAI base URL override",
"prompt": "xAI base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"GLM_API_KEY": { "GLM_API_KEY": {
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)", "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
"prompt": "Z.AI / GLM API key", "prompt": "Z.AI / GLM API key",

View file

@ -4890,7 +4890,7 @@ For more help on a command:
) )
chat_parser.add_argument( chat_parser.add_argument(
"--provider", "--provider",
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"], choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
default=None, default=None,
help="Inference provider (default: auto)" help="Inference provider (default: auto)"
) )

View file

@ -145,17 +145,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"glm-4.5-flash", "glm-4.5-flash",
], ],
"xai": [ "xai": [
"grok-4.20-0309-reasoning", "grok-4.20-reasoning",
"grok-4.20-0309-non-reasoning",
"grok-4.20-multi-agent-0309",
"grok-4-1-fast-reasoning", "grok-4-1-fast-reasoning",
"grok-4-1-fast-non-reasoning",
"grok-4-fast-reasoning",
"grok-4-fast-non-reasoning",
"grok-4-0709",
"grok-code-fast-1",
"grok-3",
"grok-3-mini",
], ],
"kimi-coding": [ "kimi-coding": [
"kimi-for-coding", "kimi-for-coding",

View file

@ -143,6 +143,7 @@ def _tts_label(current_provider: str) -> str:
"openai": "OpenAI TTS", "openai": "OpenAI TTS",
"elevenlabs": "ElevenLabs", "elevenlabs": "ElevenLabs",
"edge": "Edge TTS", "edge": "Edge TTS",
"xai": "xAI TTS",
"mistral": "Mistral Voxtral TTS", "mistral": "Mistral Voxtral TTS",
"neutts": "NeuTTS", "neutts": "NeuTTS",
} }

View file

@ -128,7 +128,7 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_env_var="HF_BASE_URL", base_url_env_var="HF_BASE_URL",
), ),
"xai": HermesOverlay( "xai": HermesOverlay(
transport="openai_chat", transport="codex_responses",
base_url_override="https://api.x.ai/v1", base_url_override="https://api.x.ai/v1",
base_url_env_var="XAI_BASE_URL", base_url_env_var="XAI_BASE_URL",
), ),
@ -184,6 +184,7 @@ ALIASES: Dict[str, str] = {
# xai # xai
"x-ai": "xai", "x-ai": "xai",
"x.ai": "xai", "x.ai": "xai",
"grok": "xai",
# kimi-for-coding (models.dev ID) # kimi-for-coding (models.dev ID)
"kimi": "kimi-for-coding", "kimi": "kimi-for-coding",

View file

@ -41,6 +41,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
tool calls with reasoning (chat/completions returns 400). tool calls with reasoning (chat/completions returns 400).
""" """
normalized = (base_url or "").strip().lower().rstrip("/") normalized = (base_url or "").strip().lower().rstrip("/")
if "api.x.ai" in normalized:
return "codex_responses"
if "api.openai.com" in normalized and "openrouter" not in normalized: if "api.openai.com" in normalized and "openrouter" not in normalized:
return "codex_responses" return "codex_responses"
return None return None
@ -163,6 +165,8 @@ def _resolve_runtime_from_pool_entry(
base_url = cfg_base_url or base_url or "https://api.anthropic.com" base_url = cfg_base_url or base_url or "https://api.anthropic.com"
elif provider == "openrouter": elif provider == "openrouter":
base_url = base_url or OPENROUTER_BASE_URL base_url = base_url or OPENROUTER_BASE_URL
elif provider == "xai":
api_mode = "codex_responses"
elif provider == "nous": elif provider == "nous":
api_mode = "chat_completions" api_mode = "chat_completions"
elif provider == "copilot": elif provider == "copilot":
@ -628,6 +632,8 @@ def _resolve_explicit_runtime(
api_mode = "chat_completions" api_mode = "chat_completions"
if provider == "copilot": if provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, api_key) api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
elif provider == "xai":
api_mode = "codex_responses"
else: else:
configured_mode = _parse_api_mode(model_cfg.get("api_mode")) configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
if configured_mode: if configured_mode:
@ -924,6 +930,8 @@ def resolve_runtime_provider(
api_mode = "chat_completions" api_mode = "chat_completions"
if provider == "copilot": if provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", "")) api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
elif provider == "xai":
api_mode = "codex_responses"
else: else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower() configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Only honor persisted api_mode when it belongs to the same provider family. # Only honor persisted api_mode when it belongs to the same provider family.

View file

@ -920,6 +920,7 @@ def _setup_tts_provider(config: dict):
"edge": "Edge TTS", "edge": "Edge TTS",
"elevenlabs": "ElevenLabs", "elevenlabs": "ElevenLabs",
"openai": "OpenAI TTS", "openai": "OpenAI TTS",
"xai": "xAI TTS",
"minimax": "MiniMax TTS", "minimax": "MiniMax TTS",
"mistral": "Mistral Voxtral TTS", "mistral": "Mistral Voxtral TTS",
"neutts": "NeuTTS", "neutts": "NeuTTS",
@ -941,12 +942,13 @@ def _setup_tts_provider(config: dict):
"Edge TTS (free, cloud-based, no setup needed)", "Edge TTS (free, cloud-based, no setup needed)",
"ElevenLabs (premium quality, needs API key)", "ElevenLabs (premium quality, needs API key)",
"OpenAI TTS (good quality, needs API key)", "OpenAI TTS (good quality, needs API key)",
"xAI TTS (Grok voices, needs API key)",
"MiniMax TTS (high quality with voice cloning, needs API key)", "MiniMax TTS (high quality with voice cloning, needs API key)",
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)", "Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
"NeuTTS (local on-device, free, ~300MB model download)", "NeuTTS (local on-device, free, ~300MB model download)",
] ]
) )
providers.extend(["edge", "elevenlabs", "openai", "minimax", "mistral", "neutts"]) providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "neutts"])
choices.append(f"Keep current ({current_label})") choices.append(f"Keep current ({current_label})")
keep_current_idx = len(choices) - 1 keep_current_idx = len(choices) - 1
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx) idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@ -1012,6 +1014,23 @@ def _setup_tts_provider(config: dict):
print_warning("No API key provided. Falling back to Edge TTS.") print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge" selected = "edge"
elif selected == "xai":
existing = get_env_value("XAI_API_KEY")
if not existing:
print()
api_key = prompt("xAI API key for TTS", password=True)
if api_key:
save_env_value("XAI_API_KEY", api_key)
print_success("xAI TTS API key saved")
else:
from hermes_constants import display_hermes_home as _dhh
print_warning(
"No xAI API key provided for TTS. Configure XAI_API_KEY via "
f"hermes setup model or {_dhh()}/.env to use xAI TTS. "
"Falling back to Edge TTS."
)
selected = "edge"
elif selected == "minimax": elif selected == "minimax":
existing = get_env_value("MINIMAX_API_KEY") existing = get_env_value("MINIMAX_API_KEY")
if not existing: if not existing:

View file

@ -146,6 +146,14 @@ TOOL_CATEGORIES = {
], ],
"tts_provider": "openai", "tts_provider": "openai",
}, },
{
"name": "xAI TTS",
"tag": "Grok voices - requires xAI API key",
"env_vars": [
{"key": "XAI_API_KEY", "prompt": "xAI API key", "url": "https://console.x.ai/"},
],
"tts_provider": "xai",
},
{ {
"name": "ElevenLabs", "name": "ElevenLabs",
"badge": "paid", "badge": "paid",

View file

@ -691,9 +691,14 @@ class AIAgent:
self.api_mode = api_mode self.api_mode = api_mode
elif self.provider == "openai-codex": elif self.provider == "openai-codex":
self.api_mode = "codex_responses" self.api_mode = "codex_responses"
elif self.provider == "xai":
self.api_mode = "codex_responses"
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower: elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
self.api_mode = "codex_responses" self.api_mode = "codex_responses"
self.provider = "openai-codex" self.provider = "openai-codex"
elif (provider_name is None) and "api.x.ai" in self._base_url_lower:
self.api_mode = "codex_responses"
self.provider = "xai"
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower): elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
self.api_mode = "anthropic_messages" self.api_mode = "anthropic_messages"
self.provider = "anthropic" self.provider = "anthropic"
@ -4032,6 +4037,7 @@ class AIAgent:
"model", "instructions", "input", "tools", "store", "model", "instructions", "input", "tools", "store",
"reasoning", "include", "max_output_tokens", "temperature", "reasoning", "include", "max_output_tokens", "temperature",
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier", "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
"extra_headers",
} }
normalized: Dict[str, Any] = { normalized: Dict[str, Any] = {
"model": model, "model": model,
@ -4067,6 +4073,20 @@ class AIAgent:
if val is not None: if val is not None:
normalized[passthrough_key] = val normalized[passthrough_key] = val
extra_headers = api_kwargs.get("extra_headers")
if extra_headers is not None:
if not isinstance(extra_headers, dict):
raise ValueError("Codex Responses request 'extra_headers' must be an object.")
normalized_headers: Dict[str, str] = {}
for key, value in extra_headers.items():
if not isinstance(key, str) or not key.strip():
raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
if value is None:
continue
normalized_headers[key.strip()] = str(value)
if normalized_headers:
normalized["extra_headers"] = normalized_headers
if allow_stream: if allow_stream:
stream = api_kwargs.get("stream") stream = api_kwargs.get("stream")
if stream is not None and stream is not True: if stream is not None and stream is not True:
@ -6504,7 +6524,12 @@ class AIAgent:
if not is_github_responses: if not is_github_responses:
kwargs["prompt_cache_key"] = self.session_id kwargs["prompt_cache_key"] = self.session_id
if reasoning_enabled: is_xai_responses = self.provider == "xai" or "api.x.ai" in (self.base_url or "").lower()
if reasoning_enabled and is_xai_responses:
# xAI reasons automatically — no effort param, just include encrypted content
kwargs["include"] = ["reasoning.encrypted_content"]
elif reasoning_enabled:
if is_github_responses: if is_github_responses:
# Copilot's Responses route advertises reasoning-effort support, # Copilot's Responses route advertises reasoning-effort support,
# but not OpenAI-specific prompt cache or encrypted reasoning # but not OpenAI-specific prompt cache or encrypted reasoning
@ -6515,7 +6540,7 @@ class AIAgent:
else: else:
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"} kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
kwargs["include"] = ["reasoning.encrypted_content"] kwargs["include"] = ["reasoning.encrypted_content"]
elif not is_github_responses: elif not is_github_responses and not is_xai_responses:
kwargs["include"] = [] kwargs["include"] = []
if self.request_overrides: if self.request_overrides:
@ -6524,6 +6549,9 @@ class AIAgent:
if self.max_tokens is not None and not is_codex_backend: if self.max_tokens is not None and not is_codex_backend:
kwargs["max_output_tokens"] = self.max_tokens kwargs["max_output_tokens"] = self.max_tokens
if is_xai_responses and getattr(self, "session_id", None):
kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
return kwargs return kwargs
sanitized_messages = api_messages sanitized_messages = api_messages
@ -6706,12 +6734,6 @@ class AIAgent:
if extra_body: if extra_body:
api_kwargs["extra_body"] = extra_body api_kwargs["extra_body"] = extra_body
# xAI prompt caching: send x-grok-conv-id header to route requests
# to the same server, maximizing automatic cache hits.
# https://docs.x.ai/developers/advanced-api-usage/prompt-caching
if "x.ai" in self._base_url_lower and hasattr(self, "session_id") and self.session_id:
api_kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
# Priority Processing / generic request overrides (e.g. service_tier). # Priority Processing / generic request overrides (e.g. service_tier).
# Applied last so overrides win over any defaults set above. # Applied last so overrides win over any defaults set above.
if self.request_overrides: if self.request_overrides:

View file

@ -45,6 +45,7 @@ from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from tools.managed_tool_gateway import resolve_managed_tool_gateway from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
from tools.xai_http import hermes_xai_user_agent
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Lazy imports -- providers are imported only when actually used to avoid # Lazy imports -- providers are imported only when actually used to avoid
@ -93,6 +94,11 @@ DEFAULT_MINIMAX_VOICE_ID = "English_Graceful_Lady"
DEFAULT_MINIMAX_BASE_URL = "https://api.minimax.io/v1/t2a_v2" DEFAULT_MINIMAX_BASE_URL = "https://api.minimax.io/v1/t2a_v2"
DEFAULT_MISTRAL_TTS_MODEL = "voxtral-mini-tts-2603" DEFAULT_MISTRAL_TTS_MODEL = "voxtral-mini-tts-2603"
DEFAULT_MISTRAL_TTS_VOICE_ID = "c69964a6-ab8b-4f8a-9465-ec0925096ec8" # Paul - Neutral DEFAULT_MISTRAL_TTS_VOICE_ID = "c69964a6-ab8b-4f8a-9465-ec0925096ec8" # Paul - Neutral
DEFAULT_XAI_VOICE_ID = "eve"
DEFAULT_XAI_LANGUAGE = "en"
DEFAULT_XAI_SAMPLE_RATE = 24000
DEFAULT_XAI_BIT_RATE = 128000
DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"
def _get_default_output_dir() -> str: def _get_default_output_dir() -> str:
from hermes_constants import get_hermes_dir from hermes_constants import get_hermes_dir
@ -299,6 +305,71 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
close() close()
# ===========================================================================
# Provider: xAI TTS
# ===========================================================================
def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) -> str:
"""
Generate audio using xAI TTS.
xAI exposes a dedicated /v1/tts endpoint instead of the OpenAI audio.speech
API shape, so this is implemented as a separate backend.
"""
import requests
api_key = os.getenv("XAI_API_KEY", "").strip()
if not api_key:
raise ValueError("XAI_API_KEY not set. Get one at https://console.x.ai/")
xai_config = tts_config.get("xai", {})
voice_id = str(xai_config.get("voice_id", DEFAULT_XAI_VOICE_ID)).strip() or DEFAULT_XAI_VOICE_ID
language = str(xai_config.get("language", DEFAULT_XAI_LANGUAGE)).strip() or DEFAULT_XAI_LANGUAGE
sample_rate = int(xai_config.get("sample_rate", DEFAULT_XAI_SAMPLE_RATE))
bit_rate = int(xai_config.get("bit_rate", DEFAULT_XAI_BIT_RATE))
base_url = str(
xai_config.get("base_url")
or os.getenv("XAI_BASE_URL")
or DEFAULT_XAI_BASE_URL
).strip().rstrip("/")
# Match the documented minimal POST /v1/tts shape by default. Only send
# output_format when Hermes actually needs a non-default format/override.
codec = "wav" if output_path.endswith(".wav") else "mp3"
payload: Dict[str, Any] = {
"text": text,
"voice_id": voice_id,
"language": language,
}
if (
codec != "mp3"
or sample_rate != DEFAULT_XAI_SAMPLE_RATE
or (codec == "mp3" and bit_rate != DEFAULT_XAI_BIT_RATE)
):
output_format: Dict[str, Any] = {"codec": codec}
if sample_rate:
output_format["sample_rate"] = sample_rate
if codec == "mp3" and bit_rate:
output_format["bit_rate"] = bit_rate
payload["output_format"] = output_format
response = requests.post(
f"{base_url}/tts",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
"User-Agent": hermes_xai_user_agent(),
},
json=payload,
timeout=60,
)
response.raise_for_status()
with open(output_path, "wb") as f:
f.write(response.content)
return output_path
# =========================================================================== # ===========================================================================
# Provider: MiniMax TTS # Provider: MiniMax TTS
# =========================================================================== # ===========================================================================
@ -600,6 +671,10 @@ def text_to_speech_tool(
logger.info("Generating speech with MiniMax TTS...") logger.info("Generating speech with MiniMax TTS...")
_generate_minimax_tts(text, file_str, tts_config) _generate_minimax_tts(text, file_str, tts_config)
elif provider == "xai":
logger.info("Generating speech with xAI TTS...")
_generate_xai_tts(text, file_str, tts_config)
elif provider == "mistral": elif provider == "mistral":
try: try:
_import_mistral_client() _import_mistral_client()
@ -661,7 +736,7 @@ def text_to_speech_tool(
# Try Opus conversion for Telegram compatibility # Try Opus conversion for Telegram compatibility
# Edge TTS outputs MP3, NeuTTS outputs WAV — both need ffmpeg conversion # Edge TTS outputs MP3, NeuTTS outputs WAV — both need ffmpeg conversion
voice_compatible = False voice_compatible = False
if provider in ("edge", "neutts", "minimax") and not file_str.endswith(".ogg"): if provider in ("edge", "neutts", "minimax", "xai") and not file_str.endswith(".ogg"):
opus_path = _convert_to_opus(file_str) opus_path = _convert_to_opus(file_str)
if opus_path: if opus_path:
file_str = opus_path file_str = opus_path
@ -734,6 +809,8 @@ def check_tts_requirements() -> bool:
pass pass
if os.getenv("MINIMAX_API_KEY"): if os.getenv("MINIMAX_API_KEY"):
return True return True
if os.getenv("XAI_API_KEY"):
return True
try: try:
_import_mistral_client() _import_mistral_client()
if os.getenv("MISTRAL_API_KEY"): if os.getenv("MISTRAL_API_KEY"):

12
tools/xai_http.py Normal file
View file

@ -0,0 +1,12 @@
"""Shared helpers for direct xAI HTTP integrations."""
from __future__ import annotations
def hermes_xai_user_agent() -> str:
"""Return a stable Hermes-specific User-Agent for xAI HTTP calls."""
try:
from hermes_cli import __version__
except Exception:
__version__ = "unknown"
return f"Hermes-Agent/{__version__}"

View file

@ -151,7 +151,7 @@ TOOLSETS = {
}, },
"tts": { "tts": {
"description": "Text-to-speech: convert text to audio with Edge TTS (free), ElevenLabs, or OpenAI", "description": "Text-to-speech: convert text to audio with Edge TTS (free), ElevenLabs, OpenAI, or xAI",
"tools": ["text_to_speech"], "tools": ["text_to_speech"],
"includes": [] "includes": []
}, },