Merge PR #262: improve error handling and validation in transcription_tools

Authored by aydnOktay. Adds file format and size validation before API calls, specific exception handling, and improved logging.
2026-06-14 09:11:54 +00:00 · 2026-03-04 21:33:03 -08:00 · 2026-03-04 21:33:03 -08:00 · 9306a1e06a
commit 9306a1e06a
parent 141b12bd39 196a13f3dc
1 changed files with 83 additions and 7 deletions
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@ -24,7 +24,7 @@ Usage:
 import logging
 import os
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Dict, Any

 logger = logging.getLogger(__name__)

@ -32,8 +32,14 @@ logger = logging.getLogger(__name__)
 # Default STT model -- cheapest and widely available
 DEFAULT_STT_MODEL = "whisper-1"

+# Supported audio formats
+SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg"}

-def transcribe_audio(file_path: str, model: Optional[str] = None) -> dict:
+# Maximum file size (25MB - OpenAI limit)
+MAX_FILE_SIZE = 25 * 1024 * 1024
+
+
+def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, Any]:
    """
    Transcribe an audio file using OpenAI's Whisper API.

@ -55,16 +61,50 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> dict:
        return {
            "success": False,
            "transcript": "",
-            "error": "VOICE_TOOLS_OPENAI_KEY not set",
+            "error": "VOICE_TOOLS_OPENAI_KEY or OPENAI_API_KEY not set",
        }

    audio_path = Path(file_path)
-    if not audio_path.is_file():
+    
+    # Validate file exists
+    if not audio_path.exists():
        return {
            "success": False,
            "transcript": "",
            "error": f"Audio file not found: {file_path}",
        }
+    
+    if not audio_path.is_file():
+        return {
+            "success": False,
+            "transcript": "",
+            "error": f"Path is not a file: {file_path}",
+        }
+    
+    # Validate file extension
+    if audio_path.suffix.lower() not in SUPPORTED_FORMATS:
+        return {
+            "success": False,
+            "transcript": "",
+            "error": f"Unsupported file format: {audio_path.suffix}. Supported formats: {', '.join(SUPPORTED_FORMATS)}",
+        }
+    
+    # Validate file size
+    try:
+        file_size = audio_path.stat().st_size
+        if file_size > MAX_FILE_SIZE:
+            return {
+                "success": False,
+                "transcript": "",
+                "error": f"File too large: {file_size / (1024*1024):.1f}MB (max {MAX_FILE_SIZE / (1024*1024)}MB)",
+            }
+    except OSError as e:
+        logger.error("Failed to get file size for %s: %s", file_path, e, exc_info=True)
+        return {
+            "success": False,
+            "transcript": "",
+            "error": f"Failed to access file: {e}",
+        }

    # Use provided model, or fall back to default
    if model is None:
@ -72,6 +112,7 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> dict:

    try:
        from openai import OpenAI
+        from openai import APIError, APIConnectionError, APITimeoutError

        client = OpenAI(api_key=api_key, base_url="https://api.openai.com/v1")

@ -92,10 +133,45 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> dict:
            "transcript": transcript_text,
        }

-    except Exception as e:
-        logger.error("Transcription error: %s", e)
+    except FileNotFoundError:
+        logger.error("Audio file not found: %s", file_path, exc_info=True)
        return {
            "success": False,
            "transcript": "",
-            "error": str(e),
+            "error": f"Audio file not found: {file_path}",
+        }
+    except PermissionError:
+        logger.error("Permission denied accessing file: %s", file_path, exc_info=True)
+        return {
+            "success": False,
+            "transcript": "",
+            "error": f"Permission denied: {file_path}",
+        }
+    except APIConnectionError as e:
+        logger.error("API connection error during transcription: %s", e, exc_info=True)
+        return {
+            "success": False,
+            "transcript": "",
+            "error": f"Connection error: {e}",
+        }
+    except APITimeoutError as e:
+        logger.error("API timeout during transcription: %s", e, exc_info=True)
+        return {
+            "success": False,
+            "transcript": "",
+            "error": f"Request timeout: {e}",
+        }
+    except APIError as e:
+        logger.error("OpenAI API error during transcription: %s", e, exc_info=True)
+        return {
+            "success": False,
+            "transcript": "",
+            "error": f"API error: {e}",
+        }
+    except Exception as e:
+        logger.error("Unexpected error during transcription: %s", e, exc_info=True)
+        return {
+            "success": False,
+            "transcript": "",
+            "error": f"Transcription failed: {e}",
        }