diff --git a/tests/tools/test_transcription_dotenv_fallback.py b/tests/tools/test_transcription_dotenv_fallback.py new file mode 100644 index 0000000000..b99e6cc6c0 --- /dev/null +++ b/tests/tools/test_transcription_dotenv_fallback.py @@ -0,0 +1,208 @@ +"""Regression tests for the transcription_tools variant of #17140. + +Same class of bug as ``tools/tts_tool.py`` (fixed in PR #17163): the STT +provider call sites read API keys via ``os.getenv()``, which bypasses +``~/.hermes/.env`` entries. These tests confirm each STT provider now +consults ``get_env_value()`` and the provider auto-detect + explicit +selection gate (``_get_provider``) do the same. +""" + +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture(autouse=True) +def isolate_env(monkeypatch): + """Strip every STT-related env var so the test really exercises the + dotenv code path. If any of these survive into the test, the assertion + that ``get_env_value`` was consulted becomes meaningless because + ``os.environ`` already satisfies the lookup. + """ + for key in ( + "GROQ_API_KEY", + "MISTRAL_API_KEY", + "XAI_API_KEY", + "XAI_STT_BASE_URL", + ): + monkeypatch.delenv(key, raising=False) + + +class TestProviderSelectionGate: + """``_get_provider`` picks the STT backend. If it only consulted + ``os.environ`` a user with keys in ``~/.hermes/.env`` would be told + "no STT available" even though the actual transcribe call would + succeed. The gate lives behind ``is_stt_enabled(stt_config)``, so + configure ``{"enabled": True, "provider": ...}`` for explicit tests. + """ + + def test_explicit_groq_sees_dotenv(self): + from tools import transcription_tools as tt + + with patch.object(tt, "_HAS_FASTER_WHISPER", False), \ + patch.object(tt, "_HAS_OPENAI", True), \ + patch.object(tt, "_has_local_command", return_value=False), \ + patch("hermes_cli.config.load_env", + return_value={"GROQ_API_KEY": "dotenv-secret"}): + assert tt._get_provider({"enabled": True, "provider": "groq"}) == "groq" + + def test_explicit_mistral_sees_dotenv(self): + from tools import transcription_tools as tt + + with patch.object(tt, "_HAS_FASTER_WHISPER", False), \ + patch.object(tt, "_HAS_MISTRAL", True), \ + patch.object(tt, "_has_local_command", return_value=False), \ + patch("hermes_cli.config.load_env", + return_value={"MISTRAL_API_KEY": "dotenv-secret"}): + assert tt._get_provider({"enabled": True, "provider": "mistral"}) == "mistral" + + def test_explicit_xai_sees_dotenv(self): + from tools import transcription_tools as tt + + with patch.object(tt, "_HAS_FASTER_WHISPER", False), \ + patch.object(tt, "_has_local_command", return_value=False), \ + patch("hermes_cli.config.load_env", + return_value={"XAI_API_KEY": "dotenv-secret"}): + assert tt._get_provider({"enabled": True, "provider": "xai"}) == "xai" + + def test_auto_detect_sees_dotenv_groq(self): + """No local backend, no explicit provider — auto-detect should fall + through to Groq when its key lives in dotenv only. Before the fix + it would return 'none'.""" + from tools import transcription_tools as tt + + with patch.object(tt, "_HAS_FASTER_WHISPER", False), \ + patch.object(tt, "_HAS_OPENAI", True), \ + patch.object(tt, "_HAS_MISTRAL", False), \ + patch.object(tt, "_has_local_command", return_value=False), \ + patch.object(tt, "_has_openai_audio_backend", return_value=False), \ + patch("hermes_cli.config.load_env", + return_value={"GROQ_API_KEY": "dotenv-secret"}): + # No "provider" key → explicit=False → auto-detect branch + assert tt._get_provider({"enabled": True}) == "groq" + + +class TestTranscribeCallSitesReadDotenv: + """The actual transcribe functions must forward the dotenv-resolved + key into the provider SDK / HTTP call. We mock ``get_env_value`` and + capture what gets passed through.""" + + def test_transcribe_groq_forwards_dotenv_key(self): + from tools import transcription_tools as tt + + seen_keys: list = [] + + class FakeOpenAIClient: + def __init__(self, *, api_key=None, base_url=None, timeout=None, max_retries=None): + seen_keys.append(api_key) + self.audio = MagicMock() + self.audio.transcriptions.create.return_value = "hello" + def close(self): + pass + + fake_openai_module = MagicMock() + fake_openai_module.OpenAI = FakeOpenAIClient + fake_openai_module.APIError = Exception + fake_openai_module.APIConnectionError = Exception + fake_openai_module.APITimeoutError = Exception + + with patch.object(tt, "get_env_value", return_value="groq-dotenv-key"), \ + patch.object(tt, "_HAS_OPENAI", True), \ + patch.dict("sys.modules", {"openai": fake_openai_module}), \ + patch("builtins.open", MagicMock()): + result = tt._transcribe_groq("/tmp/fake.mp3", "whisper-large-v3-turbo") + + assert result["success"] is True + assert seen_keys == ["groq-dotenv-key"] + + def test_transcribe_mistral_forwards_dotenv_key(self): + from tools import transcription_tools as tt + + seen_keys: list = [] + + class FakeMistralClient: + def __init__(self, *, api_key=None): + seen_keys.append(api_key) + self.audio = MagicMock() + completion = MagicMock() + completion.text = "hi" + self.audio.transcriptions.complete.return_value = completion + def __enter__(self): return self + def __exit__(self, *a): return False + + fake_client_module = MagicMock() + fake_client_module.Mistral = FakeMistralClient + + with patch.object(tt, "get_env_value", return_value="mistral-dotenv-key"), \ + patch.dict("sys.modules", {"mistralai.client": fake_client_module}), \ + patch("builtins.open", MagicMock()): + result = tt._transcribe_mistral("/tmp/fake.mp3", "voxtral-mini-latest") + + assert result["success"] is True + assert seen_keys == ["mistral-dotenv-key"] + + def test_transcribe_xai_forwards_dotenv_key(self): + from tools import transcription_tools as tt + + captured: dict = {} + + def fake_post(url, **kwargs): + captured["url"] = url + captured["headers"] = kwargs.get("headers", {}) + response = MagicMock() + response.status_code = 200 + response.raise_for_status = MagicMock() + response.json.return_value = {"text": "hello"} + return response + + # get_env_value is consulted for both XAI_API_KEY and XAI_STT_BASE_URL. + # Return the key for the first call, None for base-url override + # (so it defaults to the module-level XAI_STT_BASE_URL). + def fake_get_env_value(name, default=None): + if name == "XAI_API_KEY": + return "xai-dotenv-key" + return None + + with patch.object(tt, "get_env_value", side_effect=fake_get_env_value), \ + patch("requests.post", side_effect=fake_post), \ + patch("builtins.open", MagicMock()): + result = tt._transcribe_xai("/tmp/fake.mp3", "grok-stt") + + assert result["success"] is True + assert captured["headers"]["Authorization"] == "Bearer xai-dotenv-key" + + +class TestEndToEndRegressionGuard: + """End-to-end probe: patch ``hermes_cli.config.load_env`` to simulate + ``~/.hermes/.env`` carrying the key while ``os.environ`` does not. + Before the fix ``_transcribe_xai`` called ``os.getenv("XAI_API_KEY")`` + directly and returned ``XAI_API_KEY not set``.""" + + def test_xai_key_only_in_dotenv_before_fix(self, monkeypatch): + from tools import transcription_tools as tt + + monkeypatch.delenv("XAI_API_KEY", raising=False) + + captured: dict = {} + + def fake_post(url, **kwargs): + captured["headers"] = kwargs.get("headers", {}) + response = MagicMock() + response.status_code = 200 + response.raise_for_status = MagicMock() + response.json.return_value = {"text": "ok"} + return response + + with patch("hermes_cli.config.load_env", + return_value={"XAI_API_KEY": "dotenv-secret"}): + # Sanity: get_env_value resolves through load_env when + # os.environ is empty. + from hermes_cli.config import get_env_value as live_get + assert live_get("XAI_API_KEY") == "dotenv-secret" + + with patch("requests.post", side_effect=fake_post), \ + patch("builtins.open", MagicMock()): + result = tt._transcribe_xai("/tmp/fake.mp3", "grok-stt") + + assert result["success"] is True + assert captured["headers"]["Authorization"] == "Bearer dotenv-secret" diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index bbc9a10e6a..702f8d1a69 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -42,6 +42,12 @@ from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_opena logger = logging.getLogger(__name__) +try: + from hermes_cli.config import get_env_value +except ImportError: + def get_env_value(name, default=None): + return os.getenv(name, default) + # --------------------------------------------------------------------------- # Optional imports — graceful degradation # --------------------------------------------------------------------------- @@ -222,7 +228,7 @@ def _get_provider(stt_config: dict) -> str: return "none" if provider == "groq": - if _HAS_OPENAI and os.getenv("GROQ_API_KEY"): + if _HAS_OPENAI and get_env_value("GROQ_API_KEY"): return "groq" logger.warning( "STT provider 'groq' configured but GROQ_API_KEY not set" @@ -238,7 +244,7 @@ def _get_provider(stt_config: dict) -> str: return "none" if provider == "mistral": - if _HAS_MISTRAL and os.getenv("MISTRAL_API_KEY"): + if _HAS_MISTRAL and get_env_value("MISTRAL_API_KEY"): return "mistral" logger.warning( "STT provider 'mistral' configured but mistralai package " @@ -247,7 +253,7 @@ def _get_provider(stt_config: dict) -> str: return "none" if provider == "xai": - if os.getenv("XAI_API_KEY"): + if get_env_value("XAI_API_KEY"): return "xai" logger.warning( "STT provider 'xai' configured but XAI_API_KEY not set" @@ -262,16 +268,16 @@ def _get_provider(stt_config: dict) -> str: return "local" if _has_local_command(): return "local_command" - if _HAS_OPENAI and os.getenv("GROQ_API_KEY"): + if _HAS_OPENAI and get_env_value("GROQ_API_KEY"): logger.info("No local STT available, using Groq Whisper API") return "groq" if _HAS_OPENAI and _has_openai_audio_backend(): logger.info("No local STT available, using OpenAI Whisper API") return "openai" - if _HAS_MISTRAL and os.getenv("MISTRAL_API_KEY"): + if _HAS_MISTRAL and get_env_value("MISTRAL_API_KEY"): logger.info("No local STT available, using Mistral Voxtral Transcribe API") return "mistral" - if os.getenv("XAI_API_KEY"): + if get_env_value("XAI_API_KEY"): logger.info("No local STT available, using xAI Grok STT API") return "xai" return "none" @@ -527,7 +533,7 @@ def _transcribe_local_command(file_path: str, model_name: str) -> Dict[str, Any] def _transcribe_groq(file_path: str, model_name: str) -> Dict[str, Any]: """Transcribe using Groq Whisper API (free tier available).""" - api_key = os.getenv("GROQ_API_KEY") + api_key = get_env_value("GROQ_API_KEY") if not api_key: return {"success": False, "transcript": "", "error": "GROQ_API_KEY not set"} @@ -640,7 +646,7 @@ def _transcribe_mistral(file_path: str, model_name: str) -> Dict[str, Any]: Uses the ``mistralai`` Python SDK to call ``/v1/audio/transcriptions``. Requires ``MISTRAL_API_KEY`` environment variable. """ - api_key = os.getenv("MISTRAL_API_KEY") + api_key = get_env_value("MISTRAL_API_KEY") if not api_key: return {"success": False, "transcript": "", "error": "MISTRAL_API_KEY not set"} @@ -680,7 +686,7 @@ def _transcribe_xai(file_path: str, model_name: str) -> Dict[str, Any]: Supports Inverse Text Normalization, diarization, and word-level timestamps. Requires ``XAI_API_KEY`` environment variable. """ - api_key = os.getenv("XAI_API_KEY") + api_key = get_env_value("XAI_API_KEY") if not api_key: return {"success": False, "transcript": "", "error": "XAI_API_KEY not set"} @@ -688,7 +694,7 @@ def _transcribe_xai(file_path: str, model_name: str) -> Dict[str, Any]: xai_config = stt_config.get("xai", {}) base_url = str( xai_config.get("base_url") - or os.getenv("XAI_STT_BASE_URL") + or get_env_value("XAI_STT_BASE_URL") or XAI_STT_BASE_URL ).strip().rstrip("/") language = str(