mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(transcription): reject symlinked audio inputs (#10082)
* fix(transcription): reject symlinked audio inputs Validation runs before provider selection, so rejecting symbolic-link paths there prevents supported-extension links from being treated as normal audio files. Use os.path.islink to avoid perturbing the existing Path.stat error path and to reject links before resolving targets. Constraint: Keep validation platform-safe and avoid requiring symlink support where unavailable. Rejected: Use Path.is_symlink | it consumes pathlib stat calls and broke the existing stat error regression. Confidence: high Scope-risk: narrow Directive: Keep path hardening in _validate_audio_file before provider dispatch. Tested: source venv/bin/activate && python -m pytest tests/tools/test_transcription_tools.py::TestValidateAudioFileEdgeCases -q (5 passed) Tested: source venv/bin/activate && python -m pytest tests/tools/test_transcription_tools.py::TestValidateAudioFileEdgeCases tests/tools/test_transcription_tools.py::TestTranscribeAudioDispatch::test_invalid_file_short_circuits -q (6 passed) Tested: source venv/bin/activate && python -m compileall tools/transcription_tools.py tests/tools/test_transcription_tools.py Tested: git diff --check Not-tested: Full tests/tools/test_transcription_tools.py under .[dev] only; existing faster_whisper optional dependency tests fail with ModuleNotFoundError. * Keep transcription tests independent of optional whisper install The transcription suite mocks faster-whisper directly, so a minimal test stub keeps the branch verifiable in environments where the optional package is not installed. This preserves the existing mock-based coverage without adding a dependency. Constraint: faster-whisper is an optional local STT dependency and is absent from the current validation environment Rejected: Install faster-whisper just for branch validation | would add heavyweight environment coupling outside the patch scope Confidence: high Scope-risk: narrow Directive: Keep this as a test-only stub unless production import semantics change Tested: pytest tests/tools/test_transcription_tools.py -q --------- Co-authored-by: WuKongAI-CMU <210765158+WuKongAI-CMU@users.noreply.github.com>
This commit is contained in:
parent
ee59ef1946
commit
95848b1cbc
2 changed files with 26 additions and 0 deletions
|
|
@ -6,13 +6,20 @@ end-to-end dispatch. All external dependencies are mocked.
|
|||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import struct
|
||||
import subprocess
|
||||
import types
|
||||
import wave
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
if "faster_whisper" not in sys.modules:
|
||||
faster_whisper_stub = types.ModuleType("faster_whisper")
|
||||
faster_whisper_stub.WhisperModel = MagicMock(name="WhisperModel")
|
||||
sys.modules["faster_whisper"] = faster_whisper_stub
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Fixtures
|
||||
|
|
@ -761,6 +768,23 @@ class TestValidateAudioFileEdgeCases:
|
|||
assert result is not None
|
||||
assert "not a file" in result["error"]
|
||||
|
||||
def test_symlink_with_supported_extension_is_rejected(self, tmp_path):
|
||||
if not hasattr(os, "symlink"):
|
||||
pytest.skip("symlinks are not supported on this platform")
|
||||
|
||||
target = tmp_path / "target.txt"
|
||||
target.write_bytes(b"not audio")
|
||||
link = tmp_path / "linked.wav"
|
||||
try:
|
||||
os.symlink(target, link)
|
||||
except (OSError, NotImplementedError) as exc:
|
||||
pytest.skip(f"symlink creation unavailable: {exc}")
|
||||
|
||||
from tools.transcription_tools import _validate_audio_file
|
||||
result = _validate_audio_file(str(link))
|
||||
assert result is not None
|
||||
assert "symbolic link" in result["error"]
|
||||
|
||||
def test_stat_oserror(self, tmp_path):
|
||||
f = tmp_path / "test.ogg"
|
||||
f.write_bytes(b"data")
|
||||
|
|
|
|||
|
|
@ -998,6 +998,8 @@ def _validate_audio_file(file_path: str) -> Optional[Dict[str, Any]]:
|
|||
"""Validate the audio file. Returns an error dict or None if OK."""
|
||||
audio_path = Path(file_path)
|
||||
|
||||
if os.path.islink(audio_path):
|
||||
return {"success": False, "transcript": "", "error": f"Path is a symbolic link: {file_path}"}
|
||||
if not audio_path.exists():
|
||||
return {"success": False, "transcript": "", "error": f"Audio file not found: {file_path}"}
|
||||
if not audio_path.is_file():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue