fix(security): patch path traversal, size bypass, and prompt injection in document processing

- Sanitize filenames in cache_document_from_bytes to prevent path traversal (strip directory components, null bytes, resolve check) - Reject documents with None file_size instead of silently allowing download - Cap text file injection at 100 KB to prevent oversized prompt payloads - Sanitize display_name in run.py context notes to block prompt injection via filenames - Add 35 unit tests covering document cache utilities and Telegram document handling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-27 01:11:40 +00:00 · 2026-02-27 11:53:46 -05:00 · 2026-02-27 11:53:46 -05:00 · fbb1923fad
commit fbb1923fad
parent b2172c4b2e
5 changed files with 516 additions and 6 deletions
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -209,11 +209,21 @@ def cache_document_from_bytes(data: bytes, filename: str) -> str:

    Returns:
        Absolute path to the cached document file as a string.
+
+    Raises:
+        ValueError: If the sanitized path escapes the cache directory.
    """
    cache_dir = get_document_cache_dir()
-    safe_name = filename if filename else "document"
+    # Sanitize: strip directory components, null bytes, and control characters
+    safe_name = Path(filename).name if filename else "document"
+    safe_name = safe_name.replace("\x00", "").strip()
+    if not safe_name or safe_name in (".", ".."):
+        safe_name = "document"
    cached_name = f"doc_{uuid.uuid4().hex[:12]}_{safe_name}"
    filepath = cache_dir / cached_name
+    # Final safety check: ensure path stays inside cache dir
+    if not filepath.resolve().is_relative_to(cache_dir.resolve()):
+        raise ValueError(f"Path traversal rejected: {filename!r}")
    filepath.write_bytes(data)
    return str(filepath)