From 95848b1cbcf3549490cc413b701080d26d33e0f5 Mon Sep 17 00:00:00 2001 From: Peter Date: Mon, 25 May 2026 08:07:45 -0400 Subject: [PATCH] fix(transcription): reject symlinked audio inputs (#10082) * fix(transcription): reject symlinked audio inputs Validation runs before provider selection, so rejecting symbolic-link paths there prevents supported-extension links from being treated as normal audio files. Use os.path.islink to avoid perturbing the existing Path.stat error path and to reject links before resolving targets. Constraint: Keep validation platform-safe and avoid requiring symlink support where unavailable. Rejected: Use Path.is_symlink | it consumes pathlib stat calls and broke the existing stat error regression. Confidence: high Scope-risk: narrow Directive: Keep path hardening in _validate_audio_file before provider dispatch. Tested: source venv/bin/activate && python -m pytest tests/tools/test_transcription_tools.py::TestValidateAudioFileEdgeCases -q (5 passed) Tested: source venv/bin/activate && python -m pytest tests/tools/test_transcription_tools.py::TestValidateAudioFileEdgeCases tests/tools/test_transcription_tools.py::TestTranscribeAudioDispatch::test_invalid_file_short_circuits -q (6 passed) Tested: source venv/bin/activate && python -m compileall tools/transcription_tools.py tests/tools/test_transcription_tools.py Tested: git diff --check Not-tested: Full tests/tools/test_transcription_tools.py under .[dev] only; existing faster_whisper optional dependency tests fail with ModuleNotFoundError. * Keep transcription tests independent of optional whisper install The transcription suite mocks faster-whisper directly, so a minimal test stub keeps the branch verifiable in environments where the optional package is not installed. This preserves the existing mock-based coverage without adding a dependency. Constraint: faster-whisper is an optional local STT dependency and is absent from the current validation environment Rejected: Install faster-whisper just for branch validation | would add heavyweight environment coupling outside the patch scope Confidence: high Scope-risk: narrow Directive: Keep this as a test-only stub unless production import semantics change Tested: pytest tests/tools/test_transcription_tools.py -q --------- Co-authored-by: WuKongAI-CMU <210765158+WuKongAI-CMU@users.noreply.github.com> --- tests/tools/test_transcription_tools.py | 24 ++++++++++++++++++++++++ tools/transcription_tools.py | 2 ++ 2 files changed, 26 insertions(+) diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py index c7cf8950239..a424a06e17a 100644 --- a/tests/tools/test_transcription_tools.py +++ b/tests/tools/test_transcription_tools.py @@ -6,13 +6,20 @@ end-to-end dispatch. All external dependencies are mocked. """ import os +import sys import struct import subprocess +import types import wave from unittest.mock import MagicMock, patch import pytest +if "faster_whisper" not in sys.modules: + faster_whisper_stub = types.ModuleType("faster_whisper") + faster_whisper_stub.WhisperModel = MagicMock(name="WhisperModel") + sys.modules["faster_whisper"] = faster_whisper_stub + # ============================================================================ # Fixtures @@ -761,6 +768,23 @@ class TestValidateAudioFileEdgeCases: assert result is not None assert "not a file" in result["error"] + def test_symlink_with_supported_extension_is_rejected(self, tmp_path): + if not hasattr(os, "symlink"): + pytest.skip("symlinks are not supported on this platform") + + target = tmp_path / "target.txt" + target.write_bytes(b"not audio") + link = tmp_path / "linked.wav" + try: + os.symlink(target, link) + except (OSError, NotImplementedError) as exc: + pytest.skip(f"symlink creation unavailable: {exc}") + + from tools.transcription_tools import _validate_audio_file + result = _validate_audio_file(str(link)) + assert result is not None + assert "symbolic link" in result["error"] + def test_stat_oserror(self, tmp_path): f = tmp_path / "test.ogg" f.write_bytes(b"data") diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index 0a8e6e5054f..91396cca93e 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -998,6 +998,8 @@ def _validate_audio_file(file_path: str) -> Optional[Dict[str, Any]]: """Validate the audio file. Returns an error dict or None if OK.""" audio_path = Path(file_path) + if os.path.islink(audio_path): + return {"success": False, "transcript": "", "error": f"Path is a symbolic link: {file_path}"} if not audio_path.exists(): return {"success": False, "transcript": "", "error": f"Audio file not found: {file_path}"} if not audio_path.is_file():