fix(transcription): reject symlinked audio inputs (#10082)

* fix(transcription): reject symlinked audio inputs Validation runs before provider selection, so rejecting symbolic-link paths there prevents supported-extension links from being treated as normal audio files. Use os.path.islink to avoid perturbing the existing Path.stat error path and to reject links before resolving targets. Constraint: Keep validation platform-safe and avoid requiring symlink support where unavailable. Rejected: Use Path.is_symlink | it consumes pathlib stat calls and broke the existing stat error regression. Confidence: high Scope-risk: narrow Directive: Keep path hardening in _validate_audio_file before provider dispatch. Tested: source venv/bin/activate && python -m pytest tests/tools/test_transcription_tools.py::TestValidateAudioFileEdgeCases -q (5 passed) Tested: source venv/bin/activate && python -m pytest tests/tools/test_transcription_tools.py::TestValidateAudioFileEdgeCases tests/tools/test_transcription_tools.py::TestTranscribeAudioDispatch::test_invalid_file_short_circuits -q (6 passed) Tested: source venv/bin/activate && python -m compileall tools/transcription_tools.py tests/tools/test_transcription_tools.py Tested: git diff --check Not-tested: Full tests/tools/test_transcription_tools.py under .[dev] only; existing faster_whisper optional dependency tests fail with ModuleNotFoundError. * Keep transcription tests independent of optional whisper install The transcription suite mocks faster-whisper directly, so a minimal test stub keeps the branch verifiable in environments where the optional package is not installed. This preserves the existing mock-based coverage without adding a dependency. Constraint: faster-whisper is an optional local STT dependency and is absent from the current validation environment Rejected: Install faster-whisper just for branch validation | would add heavyweight environment coupling outside the patch scope Confidence: high Scope-risk: narrow Directive: Keep this as a test-only stub unless production import semantics change Tested: pytest tests/tools/test_transcription_tools.py -q --------- Co-authored-by: WuKongAI-CMU <210765158+WuKongAI-CMU@users.noreply.github.com>
2026-07-13 14:02:16 +00:00 · 2026-05-25 08:07:45 -04:00 · 2026-05-25 08:07:45 -04:00 · 95848b1cbc
commit 95848b1cbc
parent ee59ef1946
2 changed files with 26 additions and 0 deletions
--- a/tests/tools/test_transcription_tools.py
+++ b/tests/tools/test_transcription_tools.py
@ -6,13 +6,20 @@ end-to-end dispatch.  All external dependencies are mocked.
 """

 import os
+import sys
 import struct
 import subprocess
+import types
 import wave
 from unittest.mock import MagicMock, patch

 import pytest

+if "faster_whisper" not in sys.modules:
+    faster_whisper_stub = types.ModuleType("faster_whisper")
+    faster_whisper_stub.WhisperModel = MagicMock(name="WhisperModel")
+    sys.modules["faster_whisper"] = faster_whisper_stub
+

 # ============================================================================
 # Fixtures
@ -761,6 +768,23 @@ class TestValidateAudioFileEdgeCases:
        assert result is not None
        assert "not a file" in result["error"]

+    def test_symlink_with_supported_extension_is_rejected(self, tmp_path):
+        if not hasattr(os, "symlink"):
+            pytest.skip("symlinks are not supported on this platform")
+
+        target = tmp_path / "target.txt"
+        target.write_bytes(b"not audio")
+        link = tmp_path / "linked.wav"
+        try:
+            os.symlink(target, link)
+        except (OSError, NotImplementedError) as exc:
+            pytest.skip(f"symlink creation unavailable: {exc}")
+
+        from tools.transcription_tools import _validate_audio_file
+        result = _validate_audio_file(str(link))
+        assert result is not None
+        assert "symbolic link" in result["error"]
+
    def test_stat_oserror(self, tmp_path):
        f = tmp_path / "test.ogg"
        f.write_bytes(b"data")
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@ -998,6 +998,8 @@ def _validate_audio_file(file_path: str) -> Optional[Dict[str, Any]]:
    """Validate the audio file.  Returns an error dict or None if OK."""
    audio_path = Path(file_path)

+    if os.path.islink(audio_path):
+        return {"success": False, "transcript": "", "error": f"Path is a symbolic link: {file_path}"}
    if not audio_path.exists():
        return {"success": False, "transcript": "", "error": f"Audio file not found: {file_path}"}
    if not audio_path.is_file():