From 77c4675a50db7abbfd191d4fba4746b4f3e1559e Mon Sep 17 00:00:00 2001
From: kiranvk2011 <kiranvk2011@gmail.com>
Date: Wed, 13 May 2026 03:26:09 +0000
Subject: [PATCH] fix(telegram): route image documents (.png/.jpg/.webp/.gif)
 through vision pipeline

When users send images as documents (Telegram file picker), they were
rejected with "Unsupported document type" because SUPPORTED_DOCUMENT_TYPES
only includes text/office formats. Add SUPPORTED_IMAGE_DOCUMENT_TYPES
to base.py and handle them in telegram.py before the document check.

- Add SUPPORTED_IMAGE_DOCUMENT_TYPES constant to base.py
- Add MIME reverse-lookup for image types in telegram.py
- Route image documents through cache_image_from_bytes + vision pipeline
- Handle media groups for image documents

Closes: #20128, #18620
---
 gateway/platforms/base.py     | 20 ++++++++++++++++++++
 gateway/platforms/telegram.py | 24 ++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index d507c60bd77..5157593ac57 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -837,6 +837,26 @@ SUPPORTED_DOCUMENT_TYPES = {
 }
 
 
+# ---------------------------------------------------------------------------
+# Image document types
+#
+# Image extensions that platforms may deliver as "documents" rather than
+# native photo attachments (Telegram users uploading via the file picker,
+# clients that wrap stickers/screenshots as files, etc.). When we see one
+# of these, we route the bytes through the image cache and the normal
+# vision/photo handling path instead of rejecting them as unsupported
+# documents.
+# ---------------------------------------------------------------------------
+
+SUPPORTED_IMAGE_DOCUMENT_TYPES = {
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".png": "image/png",
+    ".webp": "image/webp",
+    ".gif": "image/gif",
+}
+
+
 def get_document_cache_dir() -> Path:
     """Return the document cache directory, creating it if it doesn't exist."""
     DOCUMENT_CACHE_DIR.mkdir(parents=True, exist_ok=True)
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 19063b47dc5..46aeb14b3d8 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -76,6 +76,7 @@ from gateway.platforms.base import (
     resolve_proxy_url,
     SUPPORTED_VIDEO_TYPES,
     SUPPORTED_DOCUMENT_TYPES,
+    SUPPORTED_IMAGE_DOCUMENT_TYPES,
     utf16_len,
 )
 from gateway.platforms.telegram_network import (
@@ -4624,6 +4625,14 @@ class TelegramAdapter(BasePlatformAdapter):
                     video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
                     ext = video_mime_to_ext.get(doc.mime_type, "")
 
+                if not ext and doc.mime_type:
+                    # SUPPORTED_IMAGE_DOCUMENT_TYPES has duplicate values (.jpg + .jpeg
+                    # both map to image/jpeg); keep the first ext we encounter.
+                    image_mime_to_ext: dict[str, str] = {}
+                    for _ext, _mime in SUPPORTED_IMAGE_DOCUMENT_TYPES.items():
+                        image_mime_to_ext.setdefault(_mime, _ext)
+                    ext = image_mime_to_ext.get(doc.mime_type, "")
+
                 if ext in SUPPORTED_VIDEO_TYPES:
                     file_obj = await doc.get_file()
                     video_bytes = await file_obj.download_as_bytearray()
@@ -4635,6 +4644,21 @@ class TelegramAdapter(BasePlatformAdapter):
                     await self.handle_message(event)
                     return
 
+                if ext in SUPPORTED_IMAGE_DOCUMENT_TYPES:
+                    file_obj = await doc.get_file()
+                    image_bytes = await file_obj.download_as_bytearray()
+                    cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
+                    event.media_urls = [cached_path]
+                    event.media_types = [SUPPORTED_IMAGE_DOCUMENT_TYPES[ext]]
+                    event.message_type = MessageType.PHOTO
+                    logger.info("[Telegram] Cached user image document at %s", cached_path)
+                    media_group_id = getattr(msg, "media_group_id", None)
+                    if media_group_id:
+                        await self._queue_media_group_event(str(media_group_id), event)
+                    else:
+                        await self.handle_message(event)
+                    return
+
                 # Check if supported
                 if ext not in SUPPORTED_DOCUMENT_TYPES:
                     supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))