hermes-agent/tests/tools/test_transcription_command_providers.py

"""
Tests for the STT command-provider registry (``stt.providers.<name>``).

Mirrors ``tests/tools/test_tts_command_providers.py`` — same shape, same
invariants, adapted for the input=audio → output=transcript flow.

Covers:
- Resolution: built-in precedence, missing/unknown name, type/command gating
- Placeholder rendering: shell-quote-aware, doubled-brace preservation
- Helpers: timeout fallback, output_format validation, iter/has-any
- End-to-end via transcribe_audio(): command-provider wins when configured,
  built-ins still win when name collides, plugin coexistence

Nothing here talks to a real STT engine. The shell command writes a static
transcript to ``{output_path}`` using ``python -c`` so the tests run
identically on Linux, macOS, and Windows (with minor quoting differences).
"""

from __future__ import annotations

import os
import subprocess
import sys
import tempfile
import wave
from pathlib import Path
from typing import Optional
from unittest.mock import patch

import pytest

from tools.transcription_tools import (
    BUILTIN_STT_PROVIDERS,
    COMMAND_STT_OUTPUT_FORMATS,
    DEFAULT_COMMAND_STT_LANGUAGE,
    DEFAULT_COMMAND_STT_OUTPUT_FORMAT,
    DEFAULT_COMMAND_STT_TIMEOUT_SECONDS,
    _get_command_stt_output_format,
    _get_command_stt_timeout,
    _get_named_stt_provider_config,
    _has_any_command_stt_provider,
    _is_command_stt_provider_config,
    _iter_command_stt_providers,
    _quote_command_stt_placeholder,
    _render_command_stt_template,
    _resolve_command_stt_provider_config,
    _shell_quote_context_stt,
    _transcribe_command_stt,
    transcribe_audio,
)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _make_silent_wav(path: Path, seconds: float = 0.1) -> Path:
    """Write a minimal silent .wav file so _validate_audio_file accepts it."""
    path.parent.mkdir(parents=True, exist_ok=True)
    with wave.open(str(path), "wb") as w:
        w.setnchannels(1)
        w.setsampwidth(2)
        w.setframerate(8000)
        frames = b"\x00\x00" * int(8000 * seconds)
        w.writeframes(frames)
    return path


def _python_emit_command(transcript_text: str, output_placeholder: str = "{output_path}") -> str:
    """Return a portable shell command that writes ``transcript_text`` to {output_path}."""
    interpreter = sys.executable
    # Use repr() to embed the literal string safely; outer single quotes
    # avoid shell expansion of $ / ` / etc.
    payload = (
        "import sys; "
        f"open(sys.argv[1], 'w').write({transcript_text!r})"
    )
    return f'"{interpreter}" -c "{payload}" {output_placeholder}'


def _python_emit_stdout_command(transcript_text: str) -> str:
    """Return a portable shell command that writes transcript to stdout only."""
    interpreter = sys.executable
    payload = f"import sys; sys.stdout.write({transcript_text!r})"
    return f'"{interpreter}" -c "{payload}"'


# ---------------------------------------------------------------------------
# _resolve_command_stt_provider_config / built-in precedence
# ---------------------------------------------------------------------------


class TestResolveCommandSTTProviderConfig:
    def test_builtin_names_are_never_command_providers(self):
        cfg = {
            "providers": {
                "openai": {"type": "command", "command": "echo hi"},
                "groq": {"type": "command", "command": "echo hi"},
                "local": {"type": "command", "command": "echo hi"},
                "local_command": {"type": "command", "command": "echo hi"},
                "mistral": {"type": "command", "command": "echo hi"},
                "xai": {"type": "command", "command": "echo hi"},
            },
        }
        for name in BUILTIN_STT_PROVIDERS:
            assert _resolve_command_stt_provider_config(name, cfg) is None

    def test_missing_provider_returns_none(self):
        cfg = {"providers": {}}
        assert _resolve_command_stt_provider_config("nope", cfg) is None

    def test_empty_provider_returns_none(self):
        assert _resolve_command_stt_provider_config("", {}) is None
        assert _resolve_command_stt_provider_config(None, {}) is None  # type: ignore[arg-type]

    def test_none_provider_short_circuits(self):
        # "none" is the auto-detect-failed sentinel; never a command provider.
        cfg = {
            "providers": {
                "none": {"type": "command", "command": "echo hi"},
            },
        }
        assert _resolve_command_stt_provider_config("none", cfg) is None

    def test_provider_without_command_field_returns_none(self):
        cfg = {"providers": {"my-cli": {"type": "command"}}}
        assert _resolve_command_stt_provider_config("my-cli", cfg) is None

    def test_provider_with_empty_command_returns_none(self):
        cfg = {"providers": {"my-cli": {"type": "command", "command": "  "}}}
        assert _resolve_command_stt_provider_config("my-cli", cfg) is None

    def test_provider_with_explicit_type_other_than_command_returns_none(self):
        cfg = {"providers": {"my-cli": {"type": "http", "command": "echo hi"}}}
        assert _resolve_command_stt_provider_config("my-cli", cfg) is None

    def test_provider_with_command_string_and_no_type_resolves(self):
        cfg = {"providers": {"my-cli": {"command": "whisper {input_path}"}}}
        result = _resolve_command_stt_provider_config("my-cli", cfg)
        assert result is not None
        assert result["command"] == "whisper {input_path}"

    def test_provider_with_explicit_type_command_resolves(self):
        cfg = {"providers": {"my-cli": {"type": "command", "command": "echo hi"}}}
        result = _resolve_command_stt_provider_config("my-cli", cfg)
        assert result is not None

    def test_resolution_is_case_insensitive(self):
        cfg = {"providers": {"my-cli": {"type": "command", "command": "echo hi"}}}
        assert _resolve_command_stt_provider_config("MY-CLI", cfg) is not None
        assert _resolve_command_stt_provider_config(" my-cli ", cfg) is not None


# ---------------------------------------------------------------------------
# _get_named_stt_provider_config: legacy stt.<name> fallback
# ---------------------------------------------------------------------------


class TestGetNamedSTTProviderConfig:
    def test_canonical_stt_providers_lookup(self):
        cfg = {"providers": {"my-cli": {"command": "whisper {input_path}"}}}
        result = _get_named_stt_provider_config(cfg, "my-cli")
        assert result == {"command": "whisper {input_path}"}

    def test_legacy_stt_dot_name_fallback(self):
        # Users who followed the built-in layout (stt.openai.*) for their
        # custom name still work.
        cfg = {"my-cli": {"command": "whisper {input_path}"}}
        result = _get_named_stt_provider_config(cfg, "my-cli")
        assert result == {"command": "whisper {input_path}"}

    def test_builtin_name_is_not_legacy_resolved(self):
        # stt.openai has model/language but no command — must NOT be
        # mis-detected as a command provider.
        cfg = {"openai": {"model": "whisper-1", "language": "en"}}
        result = _get_named_stt_provider_config(cfg, "openai")
        assert result == {}

    def test_missing_returns_empty(self):
        assert _get_named_stt_provider_config({}, "nope") == {}
        assert _get_named_stt_provider_config({"providers": {}}, "nope") == {}

    def test_canonical_wins_over_legacy(self):
        cfg = {
            "providers": {"my-cli": {"command": "canonical"}},
            "my-cli": {"command": "legacy"},
        }
        assert _get_named_stt_provider_config(cfg, "my-cli")["command"] == "canonical"


# ---------------------------------------------------------------------------
# Helpers: timeout / format / iter / has-any
# ---------------------------------------------------------------------------


class TestSTTCommandHelpers:
    def test_timeout_uses_default_when_missing(self):
        assert _get_command_stt_timeout({}) == DEFAULT_COMMAND_STT_TIMEOUT_SECONDS

    def test_timeout_accepts_int_and_float(self):
        assert _get_command_stt_timeout({"timeout": 5}) == 5.0
        assert _get_command_stt_timeout({"timeout": 2.5}) == 2.5

    def test_timeout_falls_back_when_invalid(self):
        assert _get_command_stt_timeout({"timeout": "not-a-number"}) == \
            DEFAULT_COMMAND_STT_TIMEOUT_SECONDS
        assert _get_command_stt_timeout({"timeout": -5}) == \
            DEFAULT_COMMAND_STT_TIMEOUT_SECONDS
        assert _get_command_stt_timeout({"timeout": 0}) == \
            DEFAULT_COMMAND_STT_TIMEOUT_SECONDS

    def test_timeout_legacy_key(self):
        assert _get_command_stt_timeout({"timeout_seconds": 7}) == 7.0

    def test_output_format_defaults_to_txt(self):
        assert _get_command_stt_output_format({}) == DEFAULT_COMMAND_STT_OUTPUT_FORMAT
        assert DEFAULT_COMMAND_STT_OUTPUT_FORMAT == "txt"

    def test_output_format_validates_against_allowed_set(self):
        for fmt in COMMAND_STT_OUTPUT_FORMATS:
            assert _get_command_stt_output_format({"format": fmt}) == fmt

    def test_output_format_rejects_unknown(self):
        assert _get_command_stt_output_format({"format": "exe"}) == \
            DEFAULT_COMMAND_STT_OUTPUT_FORMAT
        assert _get_command_stt_output_format({"format": "../etc/passwd"}) == \
            DEFAULT_COMMAND_STT_OUTPUT_FORMAT

    def test_output_format_strips_leading_dot(self):
        assert _get_command_stt_output_format({"format": ".json"}) == "json"

    def test_output_format_legacy_key(self):
        assert _get_command_stt_output_format({"output_format": "srt"}) == "srt"

    def test_iter_command_providers_yields_only_command_type(self):
        cfg = {
            "providers": {
                "cmd-one": {"type": "command", "command": "x"},
                "no-cmd": {"type": "command"},  # no command field
                "wrong-type": {"type": "http", "command": "x"},
                "cmd-two": {"command": "y"},  # implicit type
            },
        }
        names = {name for name, _ in _iter_command_stt_providers(cfg)}
        assert names == {"cmd-one", "cmd-two"}

    def test_iter_command_providers_excludes_builtins(self):
        # Defense in depth — a user trying to register a built-in name as
        # a command provider should be silently ignored at iteration time.
        cfg = {
            "providers": {
                "openai": {"type": "command", "command": "x"},
                "groq": {"command": "y"},
                "custom": {"command": "z"},
            },
        }
        names = {name for name, _ in _iter_command_stt_providers(cfg)}
        assert names == {"custom"}

    def test_has_any_command_provider_false_when_none_configured(self):
        assert _has_any_command_stt_provider({"providers": {}}) is False

    def test_has_any_command_provider_true_when_one_configured(self):
        cfg = {"providers": {"custom": {"command": "x"}}}
        assert _has_any_command_stt_provider(cfg) is True


# ---------------------------------------------------------------------------
# Template rendering
# ---------------------------------------------------------------------------


class TestRenderCommandSTTTemplate:
    def test_renders_all_placeholders(self):
        rendered = _render_command_stt_template(
            "whisper {input_path} -o {output_path} --lang {language} --model {model}",
            {
                "input_path": "/tmp/audio.wav",
                "output_path": "/tmp/out.txt",
                "output_dir": "/tmp",
                "format": "txt",
                "language": "en",
                "model": "base",
            },
        )
        assert "/tmp/audio.wav" in rendered
        assert "/tmp/out.txt" in rendered
        assert "en" in rendered
        assert "base" in rendered

    def test_preserves_doubled_braces(self):
        rendered = _render_command_stt_template(
            'echo {{"foo": {input_path}}}',
            {"input_path": "audio.wav"},
        )
        # Doubled braces collapse to single braces — JSON snippets survive.
        assert rendered.startswith('echo {"foo":')
        assert rendered.endswith('}')
        assert "audio.wav" in rendered

    def test_shell_quote_outside_quotes_uses_shlex(self):
        rendered = _render_command_stt_template(
            "whisper {input_path}",
            {"input_path": "/tmp/has space.wav"},
        )
        # shlex.quote wraps strings with whitespace in single quotes.
        if os.name != "nt":
            assert "'/tmp/has space.wav'" in rendered

    def test_shell_quote_inside_single_quotes(self):
        rendered = _render_command_stt_template(
            "whisper '{input_path}'",
            {"input_path": "/tmp/he's-here.wav"},
        )
        # Inside '...': use the '\'' trick.
        assert r"he'\''s-here" in rendered

    def test_shell_quote_inside_double_quotes(self):
        rendered = _render_command_stt_template(
            'whisper "{input_path}"',
            {"input_path": "$VAR.wav"},
        )
        # Inside "...": $, `, " are escaped.
        assert r"\$VAR.wav" in rendered

    def test_placeholder_not_in_dict_passes_through(self):
        # Unknown placeholder isn't replaced — preserves literal text.
        rendered = _render_command_stt_template(
            "echo {unknown_name}",
            {"input_path": "x"},
        )
        assert rendered == "echo {unknown_name}"


# ---------------------------------------------------------------------------
# _transcribe_command_stt: end-to-end via the runner
# ---------------------------------------------------------------------------


class TestTranscribeCommandSTT:
    def test_writes_transcript_to_output_path(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "input.wav")
        cfg = {
            "type": "command",
            "command": _python_emit_command("hello world"),
        }
        result = _transcribe_command_stt(str(audio), "fake-cli", cfg, {})
        assert result["success"] is True
        assert result["transcript"] == "hello world"
        assert result["provider"] == "fake-cli"

    def test_reads_transcript_from_stdout_when_no_file(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "input.wav")
        cfg = {
            "type": "command",
            "command": _python_emit_stdout_command("stdout transcript"),
        }
        result = _transcribe_command_stt(str(audio), "fake-cli", cfg, {})
        assert result["success"] is True
        assert result["transcript"] == "stdout transcript"

    def test_missing_command_returns_error(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "input.wav")
        result = _transcribe_command_stt(str(audio), "fake-cli", {}, {})
        assert result["success"] is False
        assert "command is not configured" in result["error"]

    def test_missing_audio_returns_error(self, tmp_path):
        cfg = {"command": _python_emit_command("x")}
        result = _transcribe_command_stt(
            str(tmp_path / "does-not-exist.wav"), "fake-cli", cfg, {},
        )
        assert result["success"] is False
        assert "Audio file not found" in result["error"]

    def test_nonzero_exit_returns_error_with_stderr(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "input.wav")
        # Use a command that fails reliably across platforms.
        interpreter = sys.executable
        cfg = {
            "command": (
                f'"{interpreter}" -c "import sys; sys.stderr.write(\'boom\'); sys.exit(7)"'
            ),
        }
        result = _transcribe_command_stt(str(audio), "fake-cli", cfg, {})
        assert result["success"] is False
        assert "exited with code 7" in result["error"]
        assert "boom" in result["error"]

    def test_timeout_returns_clean_error(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "input.wav")
        interpreter = sys.executable
        cfg = {
            "command": f'"{interpreter}" -c "import time; time.sleep(5)"',
            "timeout": 0.5,
        }
        result = _transcribe_command_stt(str(audio), "slow-cli", cfg, {})
        assert result["success"] is False
        assert "timed out after" in result["error"]

    def test_model_override_passed_to_template(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "input.wav")
        # Write the model into the transcript so we can assert it propagated.
        interpreter = sys.executable
        payload = "import sys; open(sys.argv[2], 'w').write(sys.argv[1])"
        cfg = {
            "command": f'"{interpreter}" -c "{payload}" {{model}} {{output_path}}',
            "model": "config-model",
        }
        result = _transcribe_command_stt(
            str(audio), "fake-cli", cfg, {}, model_override="override-model",
        )
        assert result["success"] is True
        assert result["transcript"] == "override-model"

    def test_config_model_used_when_no_override(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "input.wav")
        interpreter = sys.executable
        payload = "import sys; open(sys.argv[2], 'w').write(sys.argv[1])"
        cfg = {
            "command": f'"{interpreter}" -c "{payload}" {{model}} {{output_path}}',
            "model": "config-model",
        }
        result = _transcribe_command_stt(str(audio), "fake-cli", cfg, {})
        assert result["transcript"] == "config-model"

    def test_language_from_provider_config_wins(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "input.wav")
        interpreter = sys.executable
        payload = "import sys; open(sys.argv[2], 'w').write(sys.argv[1])"
        cfg = {
            "command": f'"{interpreter}" -c "{payload}" {{language}} {{output_path}}',
            "language": "fr",
        }
        # stt.language is "es" but provider config says "fr" — provider wins.
        result = _transcribe_command_stt(
            str(audio), "fake-cli", cfg, {"language": "es"},
        )
        assert result["transcript"] == "fr"

    def test_language_falls_back_to_stt_section(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "input.wav")
        interpreter = sys.executable
        payload = "import sys; open(sys.argv[2], 'w').write(sys.argv[1])"
        cfg = {
            "command": f'"{interpreter}" -c "{payload}" {{language}} {{output_path}}',
        }
        result = _transcribe_command_stt(
            str(audio), "fake-cli", cfg, {"language": "ja"},
        )
        assert result["transcript"] == "ja"

    def test_language_defaults_to_en(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "input.wav")
        interpreter = sys.executable
        payload = "import sys; open(sys.argv[2], 'w').write(sys.argv[1])"
        cfg = {
            "command": f'"{interpreter}" -c "{payload}" {{language}} {{output_path}}',
        }
        result = _transcribe_command_stt(str(audio), "fake-cli", cfg, {})
        assert result["transcript"] == DEFAULT_COMMAND_STT_LANGUAGE


# ---------------------------------------------------------------------------
# End-to-end via transcribe_audio(): dispatcher integration
# ---------------------------------------------------------------------------


class TestTranscribeAudioDispatchToCommandProvider:
    """Verify ``transcribe_audio()`` picks command providers correctly.

    These tests bypass the lazy-load STT detection (faster-whisper /
    HERMES_LOCAL_STT_COMMAND) by patching ``_load_stt_config`` directly.
    """

    def _config_with_command_provider(self, name: str, command: str) -> dict:
        return {
            "provider": name,
            "providers": {
                name: {"type": "command", "command": command},
            },
        }

    def test_command_provider_dispatches_via_transcribe_audio(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "audio.wav")
        cfg = self._config_with_command_provider(
            "fake-cli", _python_emit_command("dispatched via command")
        )
        with patch("tools.transcription_tools._load_stt_config", return_value=cfg):
            result = transcribe_audio(str(audio))
        assert result["success"] is True
        assert result["transcript"] == "dispatched via command"
        assert result["provider"] == "fake-cli"

    def test_builtin_name_shadow_does_not_route_to_command(self, tmp_path):
        # User mis-configures stt.providers.openai as a command — must NOT
        # hijack the real OpenAI built-in. The built-in elif chain owns
        # the name; the command-provider resolver explicitly rejects it.
        audio = _make_silent_wav(tmp_path / "audio.wav")
        cfg = {
            "provider": "openai",
            "providers": {
                "openai": {"type": "command", "command": _python_emit_command("HIJACK")},
            },
        }
        with patch("tools.transcription_tools._load_stt_config", return_value=cfg):
            # openai dispatch will likely fail with no API key — that's fine,
            # what matters is the transcript is NOT "HIJACK" (which would
            # mean the command-provider hijacked the built-in name).
            result = transcribe_audio(str(audio))
        assert result.get("transcript") != "HIJACK"

    def test_unknown_provider_no_command_falls_through_to_error(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "audio.wav")
        cfg = {"provider": "unknown-cli"}
        with patch("tools.transcription_tools._load_stt_config", return_value=cfg):
            result = transcribe_audio(str(audio))
        assert result["success"] is False
        assert "No STT provider available" in result["error"]


# ---------------------------------------------------------------------------
# Command vs plugin precedence
# ---------------------------------------------------------------------------


class TestCommandWinsOverPlugin:
    """When a name has BOTH a command provider AND a registered plugin, the
    command provider must win — same precedence rule as TTS PR #17843
    (config is more local than plugin install).
    """

    def test_command_wins_when_both_configured(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "audio.wav")
        cfg = {
            "provider": "fake-cli",
            "providers": {
                "fake-cli": {
                    "type": "command",
                    "command": _python_emit_command("FROM_COMMAND"),
                },
            },
        }

        # Register a plugin under the SAME name. It must NOT fire.
        from agent.transcription_provider import TranscriptionProvider
        from agent.transcription_registry import (
            _reset_for_tests,
            register_provider,
        )

        class FakePlugin(TranscriptionProvider):
            @property
            def name(self) -> str:
                return "fake-cli"

            def transcribe(self, file_path, *, model=None, language=None, **extra):
                return {
                    "success": True,
                    "transcript": "FROM_PLUGIN",
                    "provider": self.name,
                }

        _reset_for_tests()
        try:
            register_provider(FakePlugin())
            with patch("tools.transcription_tools._load_stt_config", return_value=cfg):
                result = transcribe_audio(str(audio))
        finally:
            _reset_for_tests()

        assert result["success"] is True
        assert result["transcript"] == "FROM_COMMAND"

    def test_plugin_fires_when_no_command_provider(self, tmp_path):
        audio = _make_silent_wav(tmp_path / "audio.wav")
        cfg = {"provider": "fake-plugin"}

        from agent.transcription_provider import TranscriptionProvider
        from agent.transcription_registry import (
            _reset_for_tests,
            register_provider,
        )

        class FakePlugin(TranscriptionProvider):
            @property
            def name(self) -> str:
                return "fake-plugin"

            def transcribe(self, file_path, *, model=None, language=None, **extra):
                return {
                    "success": True,
                    "transcript": "FROM_PLUGIN",
                    "provider": self.name,
                }

        _reset_for_tests()
        try:
            register_provider(FakePlugin())
            with patch("tools.transcription_tools._load_stt_config", return_value=cfg):
                result = transcribe_audio(str(audio))
        finally:
            _reset_for_tests()

        assert result["success"] is True
        assert result["transcript"] == "FROM_PLUGIN"