mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(xai): upgrade to Responses API, add TTS provider
Cherry-picked and trimmed from PR #10600 by Jaaneek. - Switch xAI transport from openai_chat to codex_responses (Responses API) - Add codex_responses detection for xAI in all runtime_provider resolution paths - Add xAI api_mode detection in AIAgent.__init__ (provider name + URL auto-detect) - Add extra_headers passthrough for codex_responses requests - Add x-grok-conv-id session header for xAI prompt caching - Add xAI reasoning support (encrypted_content include, no effort param) - Move x-grok-conv-id from chat_completions path to codex_responses path - Add xAI TTS provider (dedicated /v1/tts endpoint with Opus conversion) - Add xAI provider aliases (grok, x-ai, x.ai) across auth, models, providers, auxiliary - Trim xAI model list to agentic models (grok-4.20-reasoning, grok-4-1-fast-reasoning) - Add XAI_API_KEY/XAI_BASE_URL to OPTIONAL_ENV_VARS - Add xAI TTS config section, setup wizard entry, tools_config provider option - Add shared xai_http.py helper for User-Agent string Co-authored-by: Jaaneek <Jaaneek@users.noreply.github.com>
This commit is contained in:
parent
330ed12fb1
commit
0c1217d01e
14 changed files with 189 additions and 24 deletions
|
|
@ -58,6 +58,9 @@ _PROVIDER_ALIASES = {
|
||||||
"google": "gemini",
|
"google": "gemini",
|
||||||
"google-gemini": "gemini",
|
"google-gemini": "gemini",
|
||||||
"google-ai-studio": "gemini",
|
"google-ai-studio": "gemini",
|
||||||
|
"x-ai": "xai",
|
||||||
|
"x.ai": "xai",
|
||||||
|
"grok": "xai",
|
||||||
"glm": "zai",
|
"glm": "zai",
|
||||||
"z-ai": "zai",
|
"z-ai": "zai",
|
||||||
"z.ai": "zai",
|
"z.ai": "zai",
|
||||||
|
|
|
||||||
|
|
@ -928,6 +928,7 @@ def resolve_provider(
|
||||||
_PROVIDER_ALIASES = {
|
_PROVIDER_ALIASES = {
|
||||||
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
|
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
|
||||||
"google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
|
"google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
|
||||||
|
"x-ai": "xai", "x.ai": "xai", "grok": "xai",
|
||||||
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
|
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
|
||||||
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
|
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
|
||||||
"arcee-ai": "arcee", "arceeai": "arcee",
|
"arcee-ai": "arcee", "arceeai": "arcee",
|
||||||
|
|
|
||||||
|
|
@ -566,7 +566,7 @@ DEFAULT_CONFIG = {
|
||||||
|
|
||||||
# Text-to-speech configuration
|
# Text-to-speech configuration
|
||||||
"tts": {
|
"tts": {
|
||||||
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local)
|
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
|
||||||
"edge": {
|
"edge": {
|
||||||
"voice": "en-US-AriaNeural",
|
"voice": "en-US-AriaNeural",
|
||||||
# Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
|
# Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
|
||||||
|
|
@ -580,6 +580,12 @@ DEFAULT_CONFIG = {
|
||||||
"voice": "alloy",
|
"voice": "alloy",
|
||||||
# Voices: alloy, echo, fable, onyx, nova, shimmer
|
# Voices: alloy, echo, fable, onyx, nova, shimmer
|
||||||
},
|
},
|
||||||
|
"xai": {
|
||||||
|
"voice_id": "eve",
|
||||||
|
"language": "en",
|
||||||
|
"sample_rate": 24000,
|
||||||
|
"bit_rate": 128000,
|
||||||
|
},
|
||||||
"mistral": {
|
"mistral": {
|
||||||
"model": "voxtral-mini-tts-2603",
|
"model": "voxtral-mini-tts-2603",
|
||||||
"voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8", # Paul - Neutral
|
"voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8", # Paul - Neutral
|
||||||
|
|
@ -836,6 +842,22 @@ OPTIONAL_ENV_VARS = {
|
||||||
"category": "provider",
|
"category": "provider",
|
||||||
"advanced": True,
|
"advanced": True,
|
||||||
},
|
},
|
||||||
|
"XAI_API_KEY": {
|
||||||
|
"description": "xAI API key",
|
||||||
|
"prompt": "xAI API key",
|
||||||
|
"url": "https://console.x.ai/",
|
||||||
|
"password": True,
|
||||||
|
"category": "provider",
|
||||||
|
"advanced": True,
|
||||||
|
},
|
||||||
|
"XAI_BASE_URL": {
|
||||||
|
"description": "xAI base URL override",
|
||||||
|
"prompt": "xAI base URL (leave empty for default)",
|
||||||
|
"url": None,
|
||||||
|
"password": False,
|
||||||
|
"category": "provider",
|
||||||
|
"advanced": True,
|
||||||
|
},
|
||||||
"GLM_API_KEY": {
|
"GLM_API_KEY": {
|
||||||
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
|
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
|
||||||
"prompt": "Z.AI / GLM API key",
|
"prompt": "Z.AI / GLM API key",
|
||||||
|
|
|
||||||
|
|
@ -4890,7 +4890,7 @@ For more help on a command:
|
||||||
)
|
)
|
||||||
chat_parser.add_argument(
|
chat_parser.add_argument(
|
||||||
"--provider",
|
"--provider",
|
||||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
|
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
|
||||||
default=None,
|
default=None,
|
||||||
help="Inference provider (default: auto)"
|
help="Inference provider (default: auto)"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -145,17 +145,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||||
"glm-4.5-flash",
|
"glm-4.5-flash",
|
||||||
],
|
],
|
||||||
"xai": [
|
"xai": [
|
||||||
"grok-4.20-0309-reasoning",
|
"grok-4.20-reasoning",
|
||||||
"grok-4.20-0309-non-reasoning",
|
|
||||||
"grok-4.20-multi-agent-0309",
|
|
||||||
"grok-4-1-fast-reasoning",
|
"grok-4-1-fast-reasoning",
|
||||||
"grok-4-1-fast-non-reasoning",
|
|
||||||
"grok-4-fast-reasoning",
|
|
||||||
"grok-4-fast-non-reasoning",
|
|
||||||
"grok-4-0709",
|
|
||||||
"grok-code-fast-1",
|
|
||||||
"grok-3",
|
|
||||||
"grok-3-mini",
|
|
||||||
],
|
],
|
||||||
"kimi-coding": [
|
"kimi-coding": [
|
||||||
"kimi-for-coding",
|
"kimi-for-coding",
|
||||||
|
|
|
||||||
|
|
@ -143,6 +143,7 @@ def _tts_label(current_provider: str) -> str:
|
||||||
"openai": "OpenAI TTS",
|
"openai": "OpenAI TTS",
|
||||||
"elevenlabs": "ElevenLabs",
|
"elevenlabs": "ElevenLabs",
|
||||||
"edge": "Edge TTS",
|
"edge": "Edge TTS",
|
||||||
|
"xai": "xAI TTS",
|
||||||
"mistral": "Mistral Voxtral TTS",
|
"mistral": "Mistral Voxtral TTS",
|
||||||
"neutts": "NeuTTS",
|
"neutts": "NeuTTS",
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -128,7 +128,7 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||||
base_url_env_var="HF_BASE_URL",
|
base_url_env_var="HF_BASE_URL",
|
||||||
),
|
),
|
||||||
"xai": HermesOverlay(
|
"xai": HermesOverlay(
|
||||||
transport="openai_chat",
|
transport="codex_responses",
|
||||||
base_url_override="https://api.x.ai/v1",
|
base_url_override="https://api.x.ai/v1",
|
||||||
base_url_env_var="XAI_BASE_URL",
|
base_url_env_var="XAI_BASE_URL",
|
||||||
),
|
),
|
||||||
|
|
@ -184,6 +184,7 @@ ALIASES: Dict[str, str] = {
|
||||||
# xai
|
# xai
|
||||||
"x-ai": "xai",
|
"x-ai": "xai",
|
||||||
"x.ai": "xai",
|
"x.ai": "xai",
|
||||||
|
"grok": "xai",
|
||||||
|
|
||||||
# kimi-for-coding (models.dev ID)
|
# kimi-for-coding (models.dev ID)
|
||||||
"kimi": "kimi-for-coding",
|
"kimi": "kimi-for-coding",
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
|
||||||
tool calls with reasoning (chat/completions returns 400).
|
tool calls with reasoning (chat/completions returns 400).
|
||||||
"""
|
"""
|
||||||
normalized = (base_url or "").strip().lower().rstrip("/")
|
normalized = (base_url or "").strip().lower().rstrip("/")
|
||||||
|
if "api.x.ai" in normalized:
|
||||||
|
return "codex_responses"
|
||||||
if "api.openai.com" in normalized and "openrouter" not in normalized:
|
if "api.openai.com" in normalized and "openrouter" not in normalized:
|
||||||
return "codex_responses"
|
return "codex_responses"
|
||||||
return None
|
return None
|
||||||
|
|
@ -163,6 +165,8 @@ def _resolve_runtime_from_pool_entry(
|
||||||
base_url = cfg_base_url or base_url or "https://api.anthropic.com"
|
base_url = cfg_base_url or base_url or "https://api.anthropic.com"
|
||||||
elif provider == "openrouter":
|
elif provider == "openrouter":
|
||||||
base_url = base_url or OPENROUTER_BASE_URL
|
base_url = base_url or OPENROUTER_BASE_URL
|
||||||
|
elif provider == "xai":
|
||||||
|
api_mode = "codex_responses"
|
||||||
elif provider == "nous":
|
elif provider == "nous":
|
||||||
api_mode = "chat_completions"
|
api_mode = "chat_completions"
|
||||||
elif provider == "copilot":
|
elif provider == "copilot":
|
||||||
|
|
@ -628,6 +632,8 @@ def _resolve_explicit_runtime(
|
||||||
api_mode = "chat_completions"
|
api_mode = "chat_completions"
|
||||||
if provider == "copilot":
|
if provider == "copilot":
|
||||||
api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
|
api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
|
||||||
|
elif provider == "xai":
|
||||||
|
api_mode = "codex_responses"
|
||||||
else:
|
else:
|
||||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||||
if configured_mode:
|
if configured_mode:
|
||||||
|
|
@ -924,6 +930,8 @@ def resolve_runtime_provider(
|
||||||
api_mode = "chat_completions"
|
api_mode = "chat_completions"
|
||||||
if provider == "copilot":
|
if provider == "copilot":
|
||||||
api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
|
api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
|
||||||
|
elif provider == "xai":
|
||||||
|
api_mode = "codex_responses"
|
||||||
else:
|
else:
|
||||||
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
|
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||||
# Only honor persisted api_mode when it belongs to the same provider family.
|
# Only honor persisted api_mode when it belongs to the same provider family.
|
||||||
|
|
|
||||||
|
|
@ -920,6 +920,7 @@ def _setup_tts_provider(config: dict):
|
||||||
"edge": "Edge TTS",
|
"edge": "Edge TTS",
|
||||||
"elevenlabs": "ElevenLabs",
|
"elevenlabs": "ElevenLabs",
|
||||||
"openai": "OpenAI TTS",
|
"openai": "OpenAI TTS",
|
||||||
|
"xai": "xAI TTS",
|
||||||
"minimax": "MiniMax TTS",
|
"minimax": "MiniMax TTS",
|
||||||
"mistral": "Mistral Voxtral TTS",
|
"mistral": "Mistral Voxtral TTS",
|
||||||
"neutts": "NeuTTS",
|
"neutts": "NeuTTS",
|
||||||
|
|
@ -941,12 +942,13 @@ def _setup_tts_provider(config: dict):
|
||||||
"Edge TTS (free, cloud-based, no setup needed)",
|
"Edge TTS (free, cloud-based, no setup needed)",
|
||||||
"ElevenLabs (premium quality, needs API key)",
|
"ElevenLabs (premium quality, needs API key)",
|
||||||
"OpenAI TTS (good quality, needs API key)",
|
"OpenAI TTS (good quality, needs API key)",
|
||||||
|
"xAI TTS (Grok voices, needs API key)",
|
||||||
"MiniMax TTS (high quality with voice cloning, needs API key)",
|
"MiniMax TTS (high quality with voice cloning, needs API key)",
|
||||||
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
|
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
|
||||||
"NeuTTS (local on-device, free, ~300MB model download)",
|
"NeuTTS (local on-device, free, ~300MB model download)",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
providers.extend(["edge", "elevenlabs", "openai", "minimax", "mistral", "neutts"])
|
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "neutts"])
|
||||||
choices.append(f"Keep current ({current_label})")
|
choices.append(f"Keep current ({current_label})")
|
||||||
keep_current_idx = len(choices) - 1
|
keep_current_idx = len(choices) - 1
|
||||||
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
|
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
|
||||||
|
|
@ -1012,6 +1014,23 @@ def _setup_tts_provider(config: dict):
|
||||||
print_warning("No API key provided. Falling back to Edge TTS.")
|
print_warning("No API key provided. Falling back to Edge TTS.")
|
||||||
selected = "edge"
|
selected = "edge"
|
||||||
|
|
||||||
|
elif selected == "xai":
|
||||||
|
existing = get_env_value("XAI_API_KEY")
|
||||||
|
if not existing:
|
||||||
|
print()
|
||||||
|
api_key = prompt("xAI API key for TTS", password=True)
|
||||||
|
if api_key:
|
||||||
|
save_env_value("XAI_API_KEY", api_key)
|
||||||
|
print_success("xAI TTS API key saved")
|
||||||
|
else:
|
||||||
|
from hermes_constants import display_hermes_home as _dhh
|
||||||
|
print_warning(
|
||||||
|
"No xAI API key provided for TTS. Configure XAI_API_KEY via "
|
||||||
|
f"hermes setup model or {_dhh()}/.env to use xAI TTS. "
|
||||||
|
"Falling back to Edge TTS."
|
||||||
|
)
|
||||||
|
selected = "edge"
|
||||||
|
|
||||||
elif selected == "minimax":
|
elif selected == "minimax":
|
||||||
existing = get_env_value("MINIMAX_API_KEY")
|
existing = get_env_value("MINIMAX_API_KEY")
|
||||||
if not existing:
|
if not existing:
|
||||||
|
|
|
||||||
|
|
@ -146,6 +146,14 @@ TOOL_CATEGORIES = {
|
||||||
],
|
],
|
||||||
"tts_provider": "openai",
|
"tts_provider": "openai",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "xAI TTS",
|
||||||
|
"tag": "Grok voices - requires xAI API key",
|
||||||
|
"env_vars": [
|
||||||
|
{"key": "XAI_API_KEY", "prompt": "xAI API key", "url": "https://console.x.ai/"},
|
||||||
|
],
|
||||||
|
"tts_provider": "xai",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "ElevenLabs",
|
"name": "ElevenLabs",
|
||||||
"badge": "paid",
|
"badge": "paid",
|
||||||
|
|
|
||||||
38
run_agent.py
38
run_agent.py
|
|
@ -691,9 +691,14 @@ class AIAgent:
|
||||||
self.api_mode = api_mode
|
self.api_mode = api_mode
|
||||||
elif self.provider == "openai-codex":
|
elif self.provider == "openai-codex":
|
||||||
self.api_mode = "codex_responses"
|
self.api_mode = "codex_responses"
|
||||||
|
elif self.provider == "xai":
|
||||||
|
self.api_mode = "codex_responses"
|
||||||
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
|
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
|
||||||
self.api_mode = "codex_responses"
|
self.api_mode = "codex_responses"
|
||||||
self.provider = "openai-codex"
|
self.provider = "openai-codex"
|
||||||
|
elif (provider_name is None) and "api.x.ai" in self._base_url_lower:
|
||||||
|
self.api_mode = "codex_responses"
|
||||||
|
self.provider = "xai"
|
||||||
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
|
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
|
||||||
self.api_mode = "anthropic_messages"
|
self.api_mode = "anthropic_messages"
|
||||||
self.provider = "anthropic"
|
self.provider = "anthropic"
|
||||||
|
|
@ -4032,6 +4037,7 @@ class AIAgent:
|
||||||
"model", "instructions", "input", "tools", "store",
|
"model", "instructions", "input", "tools", "store",
|
||||||
"reasoning", "include", "max_output_tokens", "temperature",
|
"reasoning", "include", "max_output_tokens", "temperature",
|
||||||
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
|
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
|
||||||
|
"extra_headers",
|
||||||
}
|
}
|
||||||
normalized: Dict[str, Any] = {
|
normalized: Dict[str, Any] = {
|
||||||
"model": model,
|
"model": model,
|
||||||
|
|
@ -4067,6 +4073,20 @@ class AIAgent:
|
||||||
if val is not None:
|
if val is not None:
|
||||||
normalized[passthrough_key] = val
|
normalized[passthrough_key] = val
|
||||||
|
|
||||||
|
extra_headers = api_kwargs.get("extra_headers")
|
||||||
|
if extra_headers is not None:
|
||||||
|
if not isinstance(extra_headers, dict):
|
||||||
|
raise ValueError("Codex Responses request 'extra_headers' must be an object.")
|
||||||
|
normalized_headers: Dict[str, str] = {}
|
||||||
|
for key, value in extra_headers.items():
|
||||||
|
if not isinstance(key, str) or not key.strip():
|
||||||
|
raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
normalized_headers[key.strip()] = str(value)
|
||||||
|
if normalized_headers:
|
||||||
|
normalized["extra_headers"] = normalized_headers
|
||||||
|
|
||||||
if allow_stream:
|
if allow_stream:
|
||||||
stream = api_kwargs.get("stream")
|
stream = api_kwargs.get("stream")
|
||||||
if stream is not None and stream is not True:
|
if stream is not None and stream is not True:
|
||||||
|
|
@ -6504,7 +6524,12 @@ class AIAgent:
|
||||||
if not is_github_responses:
|
if not is_github_responses:
|
||||||
kwargs["prompt_cache_key"] = self.session_id
|
kwargs["prompt_cache_key"] = self.session_id
|
||||||
|
|
||||||
if reasoning_enabled:
|
is_xai_responses = self.provider == "xai" or "api.x.ai" in (self.base_url or "").lower()
|
||||||
|
|
||||||
|
if reasoning_enabled and is_xai_responses:
|
||||||
|
# xAI reasons automatically — no effort param, just include encrypted content
|
||||||
|
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||||
|
elif reasoning_enabled:
|
||||||
if is_github_responses:
|
if is_github_responses:
|
||||||
# Copilot's Responses route advertises reasoning-effort support,
|
# Copilot's Responses route advertises reasoning-effort support,
|
||||||
# but not OpenAI-specific prompt cache or encrypted reasoning
|
# but not OpenAI-specific prompt cache or encrypted reasoning
|
||||||
|
|
@ -6515,7 +6540,7 @@ class AIAgent:
|
||||||
else:
|
else:
|
||||||
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
|
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
|
||||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||||
elif not is_github_responses:
|
elif not is_github_responses and not is_xai_responses:
|
||||||
kwargs["include"] = []
|
kwargs["include"] = []
|
||||||
|
|
||||||
if self.request_overrides:
|
if self.request_overrides:
|
||||||
|
|
@ -6524,6 +6549,9 @@ class AIAgent:
|
||||||
if self.max_tokens is not None and not is_codex_backend:
|
if self.max_tokens is not None and not is_codex_backend:
|
||||||
kwargs["max_output_tokens"] = self.max_tokens
|
kwargs["max_output_tokens"] = self.max_tokens
|
||||||
|
|
||||||
|
if is_xai_responses and getattr(self, "session_id", None):
|
||||||
|
kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
|
||||||
|
|
||||||
return kwargs
|
return kwargs
|
||||||
|
|
||||||
sanitized_messages = api_messages
|
sanitized_messages = api_messages
|
||||||
|
|
@ -6706,12 +6734,6 @@ class AIAgent:
|
||||||
if extra_body:
|
if extra_body:
|
||||||
api_kwargs["extra_body"] = extra_body
|
api_kwargs["extra_body"] = extra_body
|
||||||
|
|
||||||
# xAI prompt caching: send x-grok-conv-id header to route requests
|
|
||||||
# to the same server, maximizing automatic cache hits.
|
|
||||||
# https://docs.x.ai/developers/advanced-api-usage/prompt-caching
|
|
||||||
if "x.ai" in self._base_url_lower and hasattr(self, "session_id") and self.session_id:
|
|
||||||
api_kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
|
|
||||||
|
|
||||||
# Priority Processing / generic request overrides (e.g. service_tier).
|
# Priority Processing / generic request overrides (e.g. service_tier).
|
||||||
# Applied last so overrides win over any defaults set above.
|
# Applied last so overrides win over any defaults set above.
|
||||||
if self.request_overrides:
|
if self.request_overrides:
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,7 @@ from hermes_constants import display_hermes_home
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
from tools.managed_tool_gateway import resolve_managed_tool_gateway
|
from tools.managed_tool_gateway import resolve_managed_tool_gateway
|
||||||
from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
|
from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
|
||||||
|
from tools.xai_http import hermes_xai_user_agent
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Lazy imports -- providers are imported only when actually used to avoid
|
# Lazy imports -- providers are imported only when actually used to avoid
|
||||||
|
|
@ -93,6 +94,11 @@ DEFAULT_MINIMAX_VOICE_ID = "English_Graceful_Lady"
|
||||||
DEFAULT_MINIMAX_BASE_URL = "https://api.minimax.io/v1/t2a_v2"
|
DEFAULT_MINIMAX_BASE_URL = "https://api.minimax.io/v1/t2a_v2"
|
||||||
DEFAULT_MISTRAL_TTS_MODEL = "voxtral-mini-tts-2603"
|
DEFAULT_MISTRAL_TTS_MODEL = "voxtral-mini-tts-2603"
|
||||||
DEFAULT_MISTRAL_TTS_VOICE_ID = "c69964a6-ab8b-4f8a-9465-ec0925096ec8" # Paul - Neutral
|
DEFAULT_MISTRAL_TTS_VOICE_ID = "c69964a6-ab8b-4f8a-9465-ec0925096ec8" # Paul - Neutral
|
||||||
|
DEFAULT_XAI_VOICE_ID = "eve"
|
||||||
|
DEFAULT_XAI_LANGUAGE = "en"
|
||||||
|
DEFAULT_XAI_SAMPLE_RATE = 24000
|
||||||
|
DEFAULT_XAI_BIT_RATE = 128000
|
||||||
|
DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"
|
||||||
|
|
||||||
def _get_default_output_dir() -> str:
|
def _get_default_output_dir() -> str:
|
||||||
from hermes_constants import get_hermes_dir
|
from hermes_constants import get_hermes_dir
|
||||||
|
|
@ -299,6 +305,71 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
|
||||||
close()
|
close()
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# Provider: xAI TTS
|
||||||
|
# ===========================================================================
|
||||||
|
def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Generate audio using xAI TTS.
|
||||||
|
|
||||||
|
xAI exposes a dedicated /v1/tts endpoint instead of the OpenAI audio.speech
|
||||||
|
API shape, so this is implemented as a separate backend.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
api_key = os.getenv("XAI_API_KEY", "").strip()
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError("XAI_API_KEY not set. Get one at https://console.x.ai/")
|
||||||
|
|
||||||
|
xai_config = tts_config.get("xai", {})
|
||||||
|
voice_id = str(xai_config.get("voice_id", DEFAULT_XAI_VOICE_ID)).strip() or DEFAULT_XAI_VOICE_ID
|
||||||
|
language = str(xai_config.get("language", DEFAULT_XAI_LANGUAGE)).strip() or DEFAULT_XAI_LANGUAGE
|
||||||
|
sample_rate = int(xai_config.get("sample_rate", DEFAULT_XAI_SAMPLE_RATE))
|
||||||
|
bit_rate = int(xai_config.get("bit_rate", DEFAULT_XAI_BIT_RATE))
|
||||||
|
base_url = str(
|
||||||
|
xai_config.get("base_url")
|
||||||
|
or os.getenv("XAI_BASE_URL")
|
||||||
|
or DEFAULT_XAI_BASE_URL
|
||||||
|
).strip().rstrip("/")
|
||||||
|
|
||||||
|
# Match the documented minimal POST /v1/tts shape by default. Only send
|
||||||
|
# output_format when Hermes actually needs a non-default format/override.
|
||||||
|
codec = "wav" if output_path.endswith(".wav") else "mp3"
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"text": text,
|
||||||
|
"voice_id": voice_id,
|
||||||
|
"language": language,
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
codec != "mp3"
|
||||||
|
or sample_rate != DEFAULT_XAI_SAMPLE_RATE
|
||||||
|
or (codec == "mp3" and bit_rate != DEFAULT_XAI_BIT_RATE)
|
||||||
|
):
|
||||||
|
output_format: Dict[str, Any] = {"codec": codec}
|
||||||
|
if sample_rate:
|
||||||
|
output_format["sample_rate"] = sample_rate
|
||||||
|
if codec == "mp3" and bit_rate:
|
||||||
|
output_format["bit_rate"] = bit_rate
|
||||||
|
payload["output_format"] = output_format
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{base_url}/tts",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"User-Agent": hermes_xai_user_agent(),
|
||||||
|
},
|
||||||
|
json=payload,
|
||||||
|
timeout=60,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
with open(output_path, "wb") as f:
|
||||||
|
f.write(response.content)
|
||||||
|
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
# Provider: MiniMax TTS
|
# Provider: MiniMax TTS
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
|
|
@ -600,6 +671,10 @@ def text_to_speech_tool(
|
||||||
logger.info("Generating speech with MiniMax TTS...")
|
logger.info("Generating speech with MiniMax TTS...")
|
||||||
_generate_minimax_tts(text, file_str, tts_config)
|
_generate_minimax_tts(text, file_str, tts_config)
|
||||||
|
|
||||||
|
elif provider == "xai":
|
||||||
|
logger.info("Generating speech with xAI TTS...")
|
||||||
|
_generate_xai_tts(text, file_str, tts_config)
|
||||||
|
|
||||||
elif provider == "mistral":
|
elif provider == "mistral":
|
||||||
try:
|
try:
|
||||||
_import_mistral_client()
|
_import_mistral_client()
|
||||||
|
|
@ -661,7 +736,7 @@ def text_to_speech_tool(
|
||||||
# Try Opus conversion for Telegram compatibility
|
# Try Opus conversion for Telegram compatibility
|
||||||
# Edge TTS outputs MP3, NeuTTS outputs WAV — both need ffmpeg conversion
|
# Edge TTS outputs MP3, NeuTTS outputs WAV — both need ffmpeg conversion
|
||||||
voice_compatible = False
|
voice_compatible = False
|
||||||
if provider in ("edge", "neutts", "minimax") and not file_str.endswith(".ogg"):
|
if provider in ("edge", "neutts", "minimax", "xai") and not file_str.endswith(".ogg"):
|
||||||
opus_path = _convert_to_opus(file_str)
|
opus_path = _convert_to_opus(file_str)
|
||||||
if opus_path:
|
if opus_path:
|
||||||
file_str = opus_path
|
file_str = opus_path
|
||||||
|
|
@ -734,6 +809,8 @@ def check_tts_requirements() -> bool:
|
||||||
pass
|
pass
|
||||||
if os.getenv("MINIMAX_API_KEY"):
|
if os.getenv("MINIMAX_API_KEY"):
|
||||||
return True
|
return True
|
||||||
|
if os.getenv("XAI_API_KEY"):
|
||||||
|
return True
|
||||||
try:
|
try:
|
||||||
_import_mistral_client()
|
_import_mistral_client()
|
||||||
if os.getenv("MISTRAL_API_KEY"):
|
if os.getenv("MISTRAL_API_KEY"):
|
||||||
|
|
|
||||||
12
tools/xai_http.py
Normal file
12
tools/xai_http.py
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
"""Shared helpers for direct xAI HTTP integrations."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def hermes_xai_user_agent() -> str:
|
||||||
|
"""Return a stable Hermes-specific User-Agent for xAI HTTP calls."""
|
||||||
|
try:
|
||||||
|
from hermes_cli import __version__
|
||||||
|
except Exception:
|
||||||
|
__version__ = "unknown"
|
||||||
|
return f"Hermes-Agent/{__version__}"
|
||||||
|
|
@ -151,7 +151,7 @@ TOOLSETS = {
|
||||||
},
|
},
|
||||||
|
|
||||||
"tts": {
|
"tts": {
|
||||||
"description": "Text-to-speech: convert text to audio with Edge TTS (free), ElevenLabs, or OpenAI",
|
"description": "Text-to-speech: convert text to audio with Edge TTS (free), ElevenLabs, OpenAI, or xAI",
|
||||||
"tools": ["text_to_speech"],
|
"tools": ["text_to_speech"],
|
||||||
"includes": []
|
"includes": []
|
||||||
},
|
},
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue