diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 73bff981f..14d764d7d 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -644,10 +644,14 @@ platform_toolsets: # Voice Transcription (Speech-to-Text) # ============================================================================= # Automatically transcribe voice messages on messaging platforms. -# Requires OPENAI_API_KEY in .env (uses OpenAI Whisper API directly). +# Providers: local (free, faster-whisper) | groq (free tier) | openai (Whisper API) | mistral (Voxtral Transcribe) +# Set the corresponding API key in .env: GROQ_API_KEY, OPENAI_API_KEY, or MISTRAL_API_KEY. stt: enabled: true + # provider: "local" # auto-detected if omitted model: "whisper-1" # whisper-1 (cheapest) | gpt-4o-mini-transcribe | gpt-4o-transcribe + # mistral: + # model: "voxtral-mini-latest" # voxtral-mini-latest | voxtral-mini-2602 # ============================================================================= # Response Pacing (Messaging Platforms) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 4f1142043..350d99cf8 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -413,7 +413,7 @@ DEFAULT_CONFIG = { "stt": { "enabled": True, - "provider": "local", # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API) + "provider": "local", # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API) | "mistral" (Voxtral Transcribe) "local": { "model": "base", # tiny, base, small, medium, large-v3 "language": "", # auto-detect by default; set to "en", "es", "fr", etc. to force @@ -421,6 +421,9 @@ DEFAULT_CONFIG = { "openai": { "model": "whisper-1", # whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe }, + "mistral": { + "model": "voxtral-mini-latest", # voxtral-mini-latest, voxtral-mini-2602 + }, }, "voice": { diff --git a/pyproject.toml b/pyproject.toml index 8982e6e46..de0e61060 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ mcp = ["mcp>=1.2.0,<2"] homeassistant = ["aiohttp>=3.9.0,<4"] sms = ["aiohttp>=3.9.0,<4"] acp = ["agent-client-protocol>=0.9.0,<1.0"] +mistral = ["mistralai>=2.3.0,<3"] dingtalk = ["dingtalk-stream>=0.1.0,<1"] feishu = ["lark-oapi>=1.5.3,<2"] rl = [ @@ -94,6 +95,7 @@ all = [ "hermes-agent[voice]", "hermes-agent[dingtalk]", "hermes-agent[feishu]", + "hermes-agent[mistral]", ] [project.scripts] diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py index 0cd4c8e3c..9f5fab629 100644 --- a/tests/tools/test_transcription_tools.py +++ b/tests/tools/test_transcription_tools.py @@ -48,6 +48,7 @@ def clean_env(monkeypatch): monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False) monkeypatch.delenv("OPENAI_API_KEY", raising=False) monkeypatch.delenv("GROQ_API_KEY", raising=False) + monkeypatch.delenv("MISTRAL_API_KEY", raising=False) monkeypatch.delenv("HERMES_LOCAL_STT_COMMAND", raising=False) monkeypatch.delenv("HERMES_LOCAL_STT_LANGUAGE", raising=False) @@ -858,3 +859,183 @@ class TestGetSttModelFromConfig: from tools.transcription_tools import get_stt_model_from_config assert get_stt_model_from_config() is None + + +# ============================================================================ +# _transcribe_mistral +# ============================================================================ + + +@pytest.fixture +def mock_mistral_module(): + """Inject a fake mistralai module into sys.modules for testing.""" + mock_client = MagicMock() + mock_client.__enter__ = MagicMock(return_value=mock_client) + mock_client.__exit__ = MagicMock(return_value=False) + mock_mistral_cls = MagicMock(return_value=mock_client) + fake_module = MagicMock() + fake_module.Mistral = mock_mistral_cls + with patch.dict("sys.modules", {"mistralai": fake_module}): + yield mock_client + + +class TestTranscribeMistral: + def test_no_key(self, monkeypatch): + monkeypatch.delenv("MISTRAL_API_KEY", raising=False) + from tools.transcription_tools import _transcribe_mistral + result = _transcribe_mistral("/tmp/test.ogg", "voxtral-mini-latest") + assert result["success"] is False + assert "MISTRAL_API_KEY" in result["error"] + + def test_successful_transcription(self, monkeypatch, sample_ogg, mock_mistral_module): + monkeypatch.setenv("MISTRAL_API_KEY", "test-key") + + mock_result = MagicMock() + mock_result.text = "hello from mistral" + mock_mistral_module.audio.transcriptions.complete.return_value = mock_result + + from tools.transcription_tools import _transcribe_mistral + result = _transcribe_mistral(sample_ogg, "voxtral-mini-latest") + + assert result["success"] is True + assert result["transcript"] == "hello from mistral" + assert result["provider"] == "mistral" + mock_mistral_module.audio.transcriptions.complete.assert_called_once() + mock_mistral_module.__exit__.assert_called_once() + + def test_api_error_returns_failure(self, monkeypatch, sample_ogg, mock_mistral_module): + monkeypatch.setenv("MISTRAL_API_KEY", "test-key") + mock_mistral_module.audio.transcriptions.complete.side_effect = RuntimeError("secret-key-leaked") + + from tools.transcription_tools import _transcribe_mistral + result = _transcribe_mistral(sample_ogg, "voxtral-mini-latest") + + assert result["success"] is False + assert "RuntimeError" in result["error"] + assert "secret-key-leaked" not in result["error"] + + def test_permission_error(self, monkeypatch, sample_ogg, mock_mistral_module): + monkeypatch.setenv("MISTRAL_API_KEY", "test-key") + mock_mistral_module.audio.transcriptions.complete.side_effect = PermissionError("denied") + + from tools.transcription_tools import _transcribe_mistral + result = _transcribe_mistral(sample_ogg, "voxtral-mini-latest") + + assert result["success"] is False + assert "Permission denied" in result["error"] + + +# ============================================================================ +# _get_provider — Mistral +# ============================================================================ + +class TestGetProviderMistral: + """Mistral-specific provider selection tests.""" + + def test_mistral_when_key_and_sdk_available(self, monkeypatch): + monkeypatch.setenv("MISTRAL_API_KEY", "test-key") + with patch("tools.transcription_tools._HAS_MISTRAL", True): + from tools.transcription_tools import _get_provider + assert _get_provider({"provider": "mistral"}) == "mistral" + + def test_mistral_explicit_no_key_returns_none(self, monkeypatch): + """Explicit mistral with no key returns none — no cross-provider fallback.""" + monkeypatch.delenv("MISTRAL_API_KEY", raising=False) + with patch("tools.transcription_tools._HAS_MISTRAL", True): + from tools.transcription_tools import _get_provider + assert _get_provider({"provider": "mistral"}) == "none" + + def test_mistral_explicit_no_sdk_returns_none(self, monkeypatch): + """Explicit mistral with key but no SDK returns none.""" + monkeypatch.setenv("MISTRAL_API_KEY", "test-key") + with patch("tools.transcription_tools._HAS_MISTRAL", False): + from tools.transcription_tools import _get_provider + assert _get_provider({"provider": "mistral"}) == "none" + + def test_auto_detect_mistral_after_openai(self, monkeypatch): + """Auto-detect: mistral is tried after openai when both are unavailable.""" + monkeypatch.delenv("GROQ_API_KEY", raising=False) + monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.setenv("MISTRAL_API_KEY", "test-key") + with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \ + patch("tools.transcription_tools._has_local_command", return_value=False), \ + patch("tools.transcription_tools._HAS_OPENAI", False), \ + patch("tools.transcription_tools._HAS_MISTRAL", True): + from tools.transcription_tools import _get_provider + assert _get_provider({}) == "mistral" + + def test_auto_detect_openai_preferred_over_mistral(self, monkeypatch): + """Auto-detect: openai is preferred over mistral (both paid, openai more common).""" + monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test") + monkeypatch.setenv("MISTRAL_API_KEY", "test-key") + monkeypatch.delenv("GROQ_API_KEY", raising=False) + with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \ + patch("tools.transcription_tools._has_local_command", return_value=False), \ + patch("tools.transcription_tools._HAS_OPENAI", True), \ + patch("tools.transcription_tools._HAS_MISTRAL", True): + from tools.transcription_tools import _get_provider + assert _get_provider({}) == "openai" + + def test_auto_detect_groq_preferred_over_mistral(self, monkeypatch): + """Auto-detect: groq (free) is preferred over mistral (paid).""" + monkeypatch.setenv("GROQ_API_KEY", "gsk-test") + monkeypatch.setenv("MISTRAL_API_KEY", "test-key") + with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \ + patch("tools.transcription_tools._has_local_command", return_value=False), \ + patch("tools.transcription_tools._HAS_OPENAI", True), \ + patch("tools.transcription_tools._HAS_MISTRAL", True): + from tools.transcription_tools import _get_provider + assert _get_provider({}) == "groq" + + def test_auto_detect_skips_mistral_without_sdk(self, monkeypatch): + """Auto-detect: mistral skipped when key is set but SDK is not installed.""" + monkeypatch.delenv("GROQ_API_KEY", raising=False) + monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.setenv("MISTRAL_API_KEY", "test-key") + with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \ + patch("tools.transcription_tools._has_local_command", return_value=False), \ + patch("tools.transcription_tools._HAS_OPENAI", False), \ + patch("tools.transcription_tools._HAS_MISTRAL", False): + from tools.transcription_tools import _get_provider + assert _get_provider({}) == "none" + + +# ============================================================================ +# transcribe_audio — Mistral dispatch +# ============================================================================ + +class TestTranscribeAudioMistralDispatch: + def test_dispatches_to_mistral(self, sample_ogg): + with patch("tools.transcription_tools._load_stt_config", return_value={"provider": "mistral"}), \ + patch("tools.transcription_tools._get_provider", return_value="mistral"), \ + patch("tools.transcription_tools._transcribe_mistral", + return_value={"success": True, "transcript": "hi", "provider": "mistral"}) as mock_mistral: + from tools.transcription_tools import transcribe_audio + result = transcribe_audio(sample_ogg) + + assert result["success"] is True + assert result["provider"] == "mistral" + mock_mistral.assert_called_once() + + def test_config_mistral_model_used(self, sample_ogg): + config = {"provider": "mistral", "mistral": {"model": "voxtral-mini-2602"}} + with patch("tools.transcription_tools._load_stt_config", return_value=config), \ + patch("tools.transcription_tools._get_provider", return_value="mistral"), \ + patch("tools.transcription_tools._transcribe_mistral", + return_value={"success": True, "transcript": "hi"}) as mock_mistral: + from tools.transcription_tools import transcribe_audio + transcribe_audio(sample_ogg, model=None) + + assert mock_mistral.call_args[0][1] == "voxtral-mini-2602" + + def test_model_override_passed_to_mistral(self, sample_ogg): + with patch("tools.transcription_tools._load_stt_config", return_value={}), \ + patch("tools.transcription_tools._get_provider", return_value="mistral"), \ + patch("tools.transcription_tools._transcribe_mistral", + return_value={"success": True, "transcript": "hi"}) as mock_mistral: + from tools.transcription_tools import transcribe_audio + transcribe_audio(sample_ogg, model="voxtral-mini-2602") + + assert mock_mistral.call_args[0][1] == "voxtral-mini-2602" diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index 4f07e5c47..296c74a21 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -57,6 +57,7 @@ def _safe_find_spec(module_name: str) -> bool: _HAS_FASTER_WHISPER = _safe_find_spec("faster_whisper") _HAS_OPENAI = _safe_find_spec("openai") +_HAS_MISTRAL = _safe_find_spec("mistralai") # --------------------------------------------------------------------------- # Constants @@ -67,6 +68,7 @@ DEFAULT_LOCAL_MODEL = "base" DEFAULT_LOCAL_STT_LANGUAGE = "en" DEFAULT_STT_MODEL = os.getenv("STT_OPENAI_MODEL", "whisper-1") DEFAULT_GROQ_STT_MODEL = os.getenv("STT_GROQ_MODEL", "whisper-large-v3-turbo") +DEFAULT_MISTRAL_STT_MODEL = os.getenv("STT_MISTRAL_MODEL", "voxtral-mini-latest") LOCAL_STT_COMMAND_ENV = "HERMES_LOCAL_STT_COMMAND" LOCAL_STT_LANGUAGE_ENV = "HERMES_LOCAL_STT_LANGUAGE" COMMON_LOCAL_BIN_DIRS = ("/opt/homebrew/bin", "/usr/local/bin") @@ -74,7 +76,7 @@ COMMON_LOCAL_BIN_DIRS = ("/opt/homebrew/bin", "/usr/local/bin") GROQ_BASE_URL = os.getenv("GROQ_BASE_URL", "https://api.groq.com/openai/v1") OPENAI_BASE_URL = os.getenv("STT_OPENAI_BASE_URL", "https://api.openai.com/v1") -SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg", ".aac"} +SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg", ".aac", ".flac"} LOCAL_NATIVE_AUDIO_FORMATS = {".wav", ".aiff", ".aif"} MAX_FILE_SIZE = 25 * 1024 * 1024 # 25 MB @@ -227,9 +229,18 @@ def _get_provider(stt_config: dict) -> str: ) return "none" + if provider == "mistral": + if _HAS_MISTRAL and os.getenv("MISTRAL_API_KEY"): + return "mistral" + logger.warning( + "STT provider 'mistral' configured but mistralai package " + "not installed or MISTRAL_API_KEY not set" + ) + return "none" + return provider # Unknown — let it fail downstream - # --- Auto-detect (no explicit provider): local > groq > openai --------- + # --- Auto-detect (no explicit provider): local > groq > openai > mistral - if _HAS_FASTER_WHISPER: return "local" @@ -241,6 +252,9 @@ def _get_provider(stt_config: dict) -> str: if _HAS_OPENAI and _has_openai_audio_backend(): logger.info("No local STT available, using OpenAI Whisper API") return "openai" + if _HAS_MISTRAL and os.getenv("MISTRAL_API_KEY"): + logger.info("No local STT available, using Mistral Voxtral Transcribe API") + return "mistral" return "none" # --------------------------------------------------------------------------- @@ -516,6 +530,45 @@ def _transcribe_openai(file_path: str, model_name: str) -> Dict[str, Any]: logger.error("OpenAI transcription failed: %s", e, exc_info=True) return {"success": False, "transcript": "", "error": f"Transcription failed: {e}"} +# --------------------------------------------------------------------------- +# Provider: mistral (Voxtral Transcribe API) +# --------------------------------------------------------------------------- + + +def _transcribe_mistral(file_path: str, model_name: str) -> Dict[str, Any]: + """Transcribe using Mistral Voxtral Transcribe API. + + Uses the ``mistralai`` Python SDK to call ``/v1/audio/transcriptions``. + Requires ``MISTRAL_API_KEY`` environment variable. + """ + api_key = os.getenv("MISTRAL_API_KEY") + if not api_key: + return {"success": False, "transcript": "", "error": "MISTRAL_API_KEY not set"} + + try: + from mistralai import Mistral + + with Mistral(api_key=api_key) as client: + with open(file_path, "rb") as audio_file: + result = client.audio.transcriptions.complete( + model=model_name, + file={"content": audio_file, "file_name": Path(file_path).name}, + ) + + transcript_text = _extract_transcript_text(result) + logger.info( + "Transcribed %s via Mistral API (%s, %d chars)", + Path(file_path).name, model_name, len(transcript_text), + ) + return {"success": True, "transcript": transcript_text, "provider": "mistral"} + + except PermissionError: + return {"success": False, "transcript": "", "error": f"Permission denied: {file_path}"} + except Exception as e: + logger.error("Mistral transcription failed: %s", e, exc_info=True) + return {"success": False, "transcript": "", "error": f"Mistral transcription failed: {type(e).__name__}"} + + # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- @@ -577,6 +630,11 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A model_name = model or openai_cfg.get("model", DEFAULT_STT_MODEL) return _transcribe_openai(file_path, model_name) + if provider == "mistral": + mistral_cfg = stt_config.get("mistral", {}) + model_name = model or mistral_cfg.get("model", DEFAULT_MISTRAL_STT_MODEL) + return _transcribe_mistral(file_path, model_name) + # No provider available return { "success": False, @@ -584,7 +642,8 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A "error": ( "No STT provider available. Install faster-whisper for free local " f"transcription, configure {LOCAL_STT_COMMAND_ENV} or install a local whisper CLI, " - "set GROQ_API_KEY for free Groq Whisper, or set VOICE_TOOLS_OPENAI_KEY " + "set GROQ_API_KEY for free Groq Whisper, set MISTRAL_API_KEY for Mistral " + "Voxtral Transcribe, or set VOICE_TOOLS_OPENAI_KEY " "or OPENAI_API_KEY for the OpenAI Whisper API." ), } diff --git a/uv.lock b/uv.lock index 8a5db5436..a3df304c9 100644 --- a/uv.lock +++ b/uv.lock @@ -1158,6 +1158,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/97/a8/c070e1340636acb38d4e6a7e45c46d168a462b48b9b3257e14ca0e5af79b/environs-14.6.0-py3-none-any.whl", hash = "sha256:f8fb3d6c6a55872b0c6db077a28f5a8c7b8984b7c32029613d44cef95cfc0812", size = 17205, upload-time = "2026-02-20T04:02:07.299Z" }, ] +[[package]] +name = "eval-type-backport" +version = "0.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fb/a3/cafafb4558fd638aadfe4121dc6cefb8d743368c085acb2f521df0f3d9d7/eval_type_backport-0.3.1.tar.gz", hash = "sha256:57e993f7b5b69d271e37482e62f74e76a0276c82490cf8e4f0dffeb6b332d5ed", size = 9445, upload-time = "2025-12-02T11:51:42.987Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/22/fdc2e30d43ff853720042fa15baa3e6122722be1a7950a98233ebb55cd71/eval_type_backport-0.3.1-py3-none-any.whl", hash = "sha256:279ab641905e9f11129f56a8a78f493518515b83402b860f6f06dd7c011fdfa8", size = 6063, upload-time = "2025-12-02T11:51:41.665Z" }, +] + [[package]] name = "exa-py" version = "2.10.2" @@ -1683,6 +1692,7 @@ all = [ { name = "honcho-ai" }, { name = "lark-oapi" }, { name = "mcp" }, + { name = "mistralai" }, { name = "modal" }, { name = "numpy" }, { name = "ptyprocess", marker = "sys_platform != 'win32'" }, @@ -1738,6 +1748,9 @@ messaging = [ { name = "slack-bolt" }, { name = "slack-sdk" }, ] +mistral = [ + { name = "mistralai" }, +] modal = [ { name = "modal" }, ] @@ -1803,6 +1816,7 @@ requires-dist = [ { name = "hermes-agent", extras = ["honcho"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["mcp"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["messaging"], marker = "extra == 'all'" }, + { name = "hermes-agent", extras = ["mistral"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["modal"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["pty"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["slack"], marker = "extra == 'all'" }, @@ -1817,6 +1831,7 @@ requires-dist = [ { name = "matrix-nio", extras = ["e2e"], marker = "extra == 'matrix'", specifier = ">=0.24.0,<1" }, { name = "mcp", marker = "extra == 'dev'", specifier = ">=1.2.0,<2" }, { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.2.0,<2" }, + { name = "mistralai", marker = "extra == 'mistral'", specifier = ">=2.3.0,<3" }, { name = "modal", marker = "extra == 'modal'", specifier = ">=1.0.0,<2" }, { name = "numpy", marker = "extra == 'voice'", specifier = ">=1.24.0,<3" }, { name = "openai", specifier = ">=2.21.0,<3" }, @@ -1846,7 +1861,7 @@ requires-dist = [ { name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" }, { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git" }, ] -provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "dingtalk", "feishu", "rl", "yc-bench", "all"] +provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "dingtalk", "feishu", "rl", "yc-bench", "all"] [[package]] name = "hf-transfer" @@ -2191,6 +2206,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/62/d9ba6323b9202dd2fe166beab8a86d29465c41a0288cbe229fac60c1ab8d/jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55", size = 8701, upload-time = "2023-09-01T12:34:42.563Z" }, ] +[[package]] +name = "jsonpath-python" +version = "1.1.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2d/db/2f4ecc24da35c6142b39c353d5b7c16eef955cc94b35a48d3fa47996d7c3/jsonpath_python-1.1.5.tar.gz", hash = "sha256:ceea2efd9e56add09330a2c9631ea3d55297b9619348c1055e5bfb9cb0b8c538", size = 87352, upload-time = "2026-03-17T06:16:40.597Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/50/1a313fb700526b134c71eb8a225d8b83be0385dbb0204337b4379c698cef/jsonpath_python-1.1.5-py3-none-any.whl", hash = "sha256:a60315404d70a65e76c9a782c84e50600480221d94a58af47b7b4d437351cb4b", size = 14090, upload-time = "2026-03-17T06:16:39.152Z" }, +] + [[package]] name = "jsonschema" version = "4.26.0" @@ -2616,6 +2640,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "mistralai" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "eval-type-backport" }, + { name = "httpx" }, + { name = "jsonpath-python" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "pydantic" }, + { name = "python-dateutil" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/05/40c38c8893f0ec858756b30f4a939378fc62cf33565af538a843497f3f24/mistralai-2.3.0.tar.gz", hash = "sha256:eb371a9b3b62552f3d4a274ecf5b2c48b90fd3439ecd1425e7f5163cdd87e29a", size = 387145, upload-time = "2026-04-03T15:06:48.927Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/57/d06cbfd96ec6dc45d5c1fe9456f7fcfcb9549c9fa91e213561d1d88729e7/mistralai-2.3.0-py3-none-any.whl", hash = "sha256:22111747c215f1632141660151924f06579f87cd8db2649e0b1f87721d076851", size = 925544, upload-time = "2026-04-03T15:06:47.593Z" }, +] + [[package]] name = "modal" version = "1.3.4" @@ -3073,32 +3116,32 @@ wheels = [ [[package]] name = "opentelemetry-api" -version = "1.40.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "importlib-metadata" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2c/1d/4049a9e8698361cc1a1aa03a6c59e4fa4c71e0c0f94a30f988a6876a2ae6/opentelemetry_api-1.40.0.tar.gz", hash = "sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f", size = 70851, upload-time = "2026-03-04T14:17:21.555Z" } +sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/bf/93795954016c522008da367da292adceed71cca6ee1717e1d64c83089099/opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9", size = 68676, upload-time = "2026-03-04T14:17:01.24Z" }, + { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" }, ] [[package]] name = "opentelemetry-exporter-otlp-proto-common" -version = "1.40.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-proto" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/51/bc/1559d46557fe6eca0b46c88d4c2676285f1f3be2e8d06bb5d15fbffc814a/opentelemetry_exporter_otlp_proto_common-1.40.0.tar.gz", hash = "sha256:1cbee86a4064790b362a86601ee7934f368b81cd4cc2f2e163902a6e7818a0fa", size = 20416, upload-time = "2026-03-04T14:17:23.801Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e9/9d/22d241b66f7bbde88a3bfa6847a351d2c46b84de23e71222c6aae25c7050/opentelemetry_exporter_otlp_proto_common-1.39.1.tar.gz", hash = "sha256:763370d4737a59741c89a67b50f9e39271639ee4afc999dadfe768541c027464", size = 20409, upload-time = "2025-12-11T13:32:40.885Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/ca/8f122055c97a932311a3f640273f084e738008933503d0c2563cd5d591fc/opentelemetry_exporter_otlp_proto_common-1.40.0-py3-none-any.whl", hash = "sha256:7081ff453835a82417bf38dccf122c827c3cbc94f2079b03bba02a3165f25149", size = 18369, upload-time = "2026-03-04T14:17:04.796Z" }, + { url = "https://files.pythonhosted.org/packages/8c/02/ffc3e143d89a27ac21fd557365b98bd0653b98de8a101151d5805b5d4c33/opentelemetry_exporter_otlp_proto_common-1.39.1-py3-none-any.whl", hash = "sha256:08f8a5862d64cc3435105686d0216c1365dc5701f86844a8cd56597d0c764fde", size = 18366, upload-time = "2025-12-11T13:32:20.2Z" }, ] [[package]] name = "opentelemetry-exporter-otlp-proto-http" -version = "1.40.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "googleapis-common-protos" }, @@ -3109,14 +3152,14 @@ dependencies = [ { name = "requests" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2e/fa/73d50e2c15c56be4d000c98e24221d494674b0cc95524e2a8cb3856d95a4/opentelemetry_exporter_otlp_proto_http-1.40.0.tar.gz", hash = "sha256:db48f5e0f33217588bbc00274a31517ba830da576e59503507c839b38fa0869c", size = 17772, upload-time = "2026-03-04T14:17:25.324Z" } +sdist = { url = "https://files.pythonhosted.org/packages/80/04/2a08fa9c0214ae38880df01e8bfae12b067ec0793446578575e5080d6545/opentelemetry_exporter_otlp_proto_http-1.39.1.tar.gz", hash = "sha256:31bdab9745c709ce90a49a0624c2bd445d31a28ba34275951a6a362d16a0b9cb", size = 17288, upload-time = "2025-12-11T13:32:42.029Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/3a/8865d6754e61c9fb170cdd530a124a53769ee5f740236064816eb0ca7301/opentelemetry_exporter_otlp_proto_http-1.40.0-py3-none-any.whl", hash = "sha256:a8d1dab28f504c5d96577d6509f80a8150e44e8f45f82cdbe0e34c99ab040069", size = 19960, upload-time = "2026-03-04T14:17:07.153Z" }, + { url = "https://files.pythonhosted.org/packages/95/f1/b27d3e2e003cd9a3592c43d099d2ed8d0a947c15281bf8463a256db0b46c/opentelemetry_exporter_otlp_proto_http-1.39.1-py3-none-any.whl", hash = "sha256:d9f5207183dd752a412c4cd564ca8875ececba13be6e9c6c370ffb752fd59985", size = 19641, upload-time = "2025-12-11T13:32:22.248Z" }, ] [[package]] name = "opentelemetry-instrumentation" -version = "0.61b0" +version = "0.60b1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, @@ -3124,14 +3167,14 @@ dependencies = [ { name = "packaging" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/da/37/6bf8e66bfcee5d3c6515b79cb2ee9ad05fe573c20f7ceb288d0e7eeec28c/opentelemetry_instrumentation-0.61b0.tar.gz", hash = "sha256:cb21b48db738c9de196eba6b805b4ff9de3b7f187e4bbf9a466fa170514f1fc7", size = 32606, upload-time = "2026-03-04T14:20:16.825Z" } +sdist = { url = "https://files.pythonhosted.org/packages/41/0f/7e6b713ac117c1f5e4e3300748af699b9902a2e5e34c9cf443dde25a01fa/opentelemetry_instrumentation-0.60b1.tar.gz", hash = "sha256:57ddc7974c6eb35865af0426d1a17132b88b2ed8586897fee187fd5b8944bd6a", size = 31706, upload-time = "2025-12-11T13:36:42.515Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d8/3e/f6f10f178b6316de67f0dfdbbb699a24fbe8917cf1743c1595fb9dcdd461/opentelemetry_instrumentation-0.61b0-py3-none-any.whl", hash = "sha256:92a93a280e69788e8f88391247cc530fd81f16f2b011979d4d6398f805cfbc63", size = 33448, upload-time = "2026-03-04T14:19:02.447Z" }, + { url = "https://files.pythonhosted.org/packages/77/d2/6788e83c5c86a2690101681aeef27eeb2a6bf22df52d3f263a22cee20915/opentelemetry_instrumentation-0.60b1-py3-none-any.whl", hash = "sha256:04480db952b48fb1ed0073f822f0ee26012b7be7c3eac1a3793122737c78632d", size = 33096, upload-time = "2025-12-11T13:35:33.067Z" }, ] [[package]] name = "opentelemetry-instrumentation-aiohttp-client" -version = "0.61b0" +version = "0.60b1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, @@ -3140,57 +3183,57 @@ dependencies = [ { name = "opentelemetry-util-http" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/24fed4de661de107f2426b28bbd87b51eaab28a2339b62f269a36ae24505/opentelemetry_instrumentation_aiohttp_client-0.61b0.tar.gz", hash = "sha256:c53ab3b88efcb7ce98c1129cc0389f0a1f214eb3675269b6c157770adcf47877", size = 19292, upload-time = "2026-03-04T14:20:18.408Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c0/79/95be90c555fd7efde79dcba36ea5c668815aa2d0a4250b63687e0f91c74a/opentelemetry_instrumentation_aiohttp_client-0.60b1.tar.gz", hash = "sha256:d0e7d5aa057791ca4d9090b0d3c9982f253c1a24b6bc78a734fc18d8dd97927b", size = 15907, upload-time = "2025-12-11T13:36:44.434Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/df/f3/1edc42716521a3f754ac32ffb908f102e0f131f8e43fcd9ab29cab286723/opentelemetry_instrumentation_aiohttp_client-0.61b0-py3-none-any.whl", hash = "sha256:09bc47514c162507b357366ce15578743fd6305078cf7d872db1c99c13fa6972", size = 14534, upload-time = "2026-03-04T14:19:05.165Z" }, + { url = "https://files.pythonhosted.org/packages/ca/f4/1a1ec632c86269750ae833c8fbdd4c8d15316eb1c21e3544e34791c805ee/opentelemetry_instrumentation_aiohttp_client-0.60b1-py3-none-any.whl", hash = "sha256:34c5097256a30b16c5a2a88a409ed82b92972a494c43212c85632d204a78c2a1", size = 12694, upload-time = "2025-12-11T13:35:35.034Z" }, ] [[package]] name = "opentelemetry-proto" -version = "1.40.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "protobuf" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4c/77/dd38991db037fdfce45849491cb61de5ab000f49824a00230afb112a4392/opentelemetry_proto-1.40.0.tar.gz", hash = "sha256:03f639ca129ba513f5819810f5b1f42bcb371391405d99c168fe6937c62febcd", size = 45667, upload-time = "2026-03-04T14:17:31.194Z" } +sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/b2/189b2577dde745b15625b3214302605b1353436219d42b7912e77fa8dc24/opentelemetry_proto-1.40.0-py3-none-any.whl", hash = "sha256:266c4385d88923a23d63e353e9761af0f47a6ed0d486979777fe4de59dc9b25f", size = 72073, upload-time = "2026-03-04T14:17:16.673Z" }, + { url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" }, ] [[package]] name = "opentelemetry-sdk" -version = "1.40.0" +version = "1.39.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, { name = "opentelemetry-semantic-conventions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/58/fd/3c3125b20ba18ce2155ba9ea74acb0ae5d25f8cd39cfd37455601b7955cc/opentelemetry_sdk-1.40.0.tar.gz", hash = "sha256:18e9f5ec20d859d268c7cb3c5198c8d105d073714db3de50b593b8c1345a48f2", size = 184252, upload-time = "2026-03-04T14:17:31.87Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/fb/c76080c9ba07e1e8235d24cdcc4d125ef7aa3edf23eb4e497c2e50889adc/opentelemetry_sdk-1.39.1.tar.gz", hash = "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6", size = 171460, upload-time = "2025-12-11T13:32:49.369Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/c5/6a852903d8bfac758c6dc6e9a68b015d3c33f2f1be5e9591e0f4b69c7e0a/opentelemetry_sdk-1.40.0-py3-none-any.whl", hash = "sha256:787d2154a71f4b3d81f20524a8ce061b7db667d24e46753f32a7bc48f1c1f3f1", size = 141951, upload-time = "2026-03-04T14:17:17.961Z" }, + { url = "https://files.pythonhosted.org/packages/7c/98/e91cf858f203d86f4eccdf763dcf01cf03f1dae80c3750f7e635bfa206b6/opentelemetry_sdk-1.39.1-py3-none-any.whl", hash = "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", size = 132565, upload-time = "2025-12-11T13:32:35.069Z" }, ] [[package]] name = "opentelemetry-semantic-conventions" -version = "0.61b0" +version = "0.60b1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6d/c0/4ae7973f3c2cfd2b6e321f1675626f0dab0a97027cc7a297474c9c8f3d04/opentelemetry_semantic_conventions-0.61b0.tar.gz", hash = "sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a", size = 145755, upload-time = "2026-03-04T14:17:32.664Z" } +sdist = { url = "https://files.pythonhosted.org/packages/91/df/553f93ed38bf22f4b999d9be9c185adb558982214f33eae539d3b5cd0858/opentelemetry_semantic_conventions-0.60b1.tar.gz", hash = "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", size = 137935, upload-time = "2025-12-11T13:32:50.487Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b2/37/cc6a55e448deaa9b27377d087da8615a3416d8ad523d5960b78dbeadd02a/opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2", size = 231621, upload-time = "2026-03-04T14:17:19.33Z" }, + { url = "https://files.pythonhosted.org/packages/7a/5e/5958555e09635d09b75de3c4f8b9cae7335ca545d77392ffe7331534c402/opentelemetry_semantic_conventions-0.60b1-py3-none-any.whl", hash = "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb", size = 219982, upload-time = "2025-12-11T13:32:36.955Z" }, ] [[package]] name = "opentelemetry-util-http" -version = "0.61b0" +version = "0.60b1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/57/3c/f0196223efc5c4ca19f8fad3d5462b171ac6333013335ce540c01af419e9/opentelemetry_util_http-0.61b0.tar.gz", hash = "sha256:1039cb891334ad2731affdf034d8fb8b48c239af9b6dd295e5fabd07f1c95572", size = 11361, upload-time = "2026-03-04T14:20:57.01Z" } +sdist = { url = "https://files.pythonhosted.org/packages/50/fc/c47bb04a1d8a941a4061307e1eddfa331ed4d0ab13d8a9781e6db256940a/opentelemetry_util_http-0.60b1.tar.gz", hash = "sha256:0d97152ca8c8a41ced7172d29d3622a219317f74ae6bb3027cfbdcf22c3cc0d6", size = 11053, upload-time = "2025-12-11T13:37:25.115Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/e5/c08aaaf2f64288d2b6ef65741d2de5454e64af3e050f34285fb1907492fe/opentelemetry_util_http-0.61b0-py3-none-any.whl", hash = "sha256:8e715e848233e9527ea47e275659ea60a57a75edf5206a3b937e236a6da5fc33", size = 9281, upload-time = "2026-03-04T14:20:08.364Z" }, + { url = "https://files.pythonhosted.org/packages/16/5c/d3f1733665f7cd582ef0842fb1d2ed0bc1fba10875160593342d22bba375/opentelemetry_util_http-0.60b1-py3-none-any.whl", hash = "sha256:66381ba28550c91bee14dcba8979ace443444af1ed609226634596b4b0faf199", size = 8947, upload-time = "2025-12-11T13:36:37.151Z" }, ] [[package]] diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md index ca64170d9..0cd4ed699 100644 --- a/website/docs/user-guide/features/tts.md +++ b/website/docs/user-guide/features/tts.md @@ -102,11 +102,13 @@ Local transcription works out of the box when `faster-whisper` is installed. If ```yaml # In ~/.hermes/config.yaml stt: - provider: "local" # "local" | "groq" | "openai" + provider: "local" # "local" | "groq" | "openai" | "mistral" local: model: "base" # tiny, base, small, medium, large-v3 openai: model: "whisper-1" # whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe + mistral: + model: "voxtral-mini-latest" # voxtral-mini-latest, voxtral-mini-2602 ``` ### Provider Details @@ -125,6 +127,8 @@ stt: **OpenAI API** — Accepts `VOICE_TOOLS_OPENAI_KEY` first and falls back to `OPENAI_API_KEY`. Supports `whisper-1`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. +**Mistral API (Voxtral Transcribe)** — Requires `MISTRAL_API_KEY`. Uses Mistral's [Voxtral Transcribe](https://docs.mistral.ai/capabilities/audio/speech_to_text/) models. Supports 13 languages, speaker diarization, and word-level timestamps. Install with `pip install hermes-agent[mistral]`. + **Custom local CLI fallback** — Set `HERMES_LOCAL_STT_COMMAND` if you want Hermes to call a local transcription command directly. The command template supports `{input_path}`, `{output_dir}`, `{language}`, and `{model}` placeholders. ### Fallback Behavior @@ -133,4 +137,5 @@ If your configured provider isn't available, Hermes automatically falls back: - **Local faster-whisper unavailable** → Tries a local `whisper` CLI or `HERMES_LOCAL_STT_COMMAND` before cloud providers - **Groq key not set** → Falls back to local transcription, then OpenAI - **OpenAI key not set** → Falls back to local transcription, then Groq +- **Mistral key/SDK not set** → Skipped in auto-detect; falls through to next available provider - **Nothing available** → Voice messages pass through with an accurate note to the user