From 77c4675a50db7abbfd191d4fba4746b4f3e1559e Mon Sep 17 00:00:00 2001 From: kiranvk2011 Date: Wed, 13 May 2026 03:26:09 +0000 Subject: [PATCH] fix(telegram): route image documents (.png/.jpg/.webp/.gif) through vision pipeline When users send images as documents (Telegram file picker), they were rejected with "Unsupported document type" because SUPPORTED_DOCUMENT_TYPES only includes text/office formats. Add SUPPORTED_IMAGE_DOCUMENT_TYPES to base.py and handle them in telegram.py before the document check. - Add SUPPORTED_IMAGE_DOCUMENT_TYPES constant to base.py - Add MIME reverse-lookup for image types in telegram.py - Route image documents through cache_image_from_bytes + vision pipeline - Handle media groups for image documents Closes: #20128, #18620 --- gateway/platforms/base.py | 20 ++++++++++++++++++++ gateway/platforms/telegram.py | 24 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index d507c60bd77..5157593ac57 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -837,6 +837,26 @@ SUPPORTED_DOCUMENT_TYPES = { } +# --------------------------------------------------------------------------- +# Image document types +# +# Image extensions that platforms may deliver as "documents" rather than +# native photo attachments (Telegram users uploading via the file picker, +# clients that wrap stickers/screenshots as files, etc.). When we see one +# of these, we route the bytes through the image cache and the normal +# vision/photo handling path instead of rejecting them as unsupported +# documents. +# --------------------------------------------------------------------------- + +SUPPORTED_IMAGE_DOCUMENT_TYPES = { + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png", + ".webp": "image/webp", + ".gif": "image/gif", +} + + def get_document_cache_dir() -> Path: """Return the document cache directory, creating it if it doesn't exist.""" DOCUMENT_CACHE_DIR.mkdir(parents=True, exist_ok=True) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 19063b47dc5..46aeb14b3d8 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -76,6 +76,7 @@ from gateway.platforms.base import ( resolve_proxy_url, SUPPORTED_VIDEO_TYPES, SUPPORTED_DOCUMENT_TYPES, + SUPPORTED_IMAGE_DOCUMENT_TYPES, utf16_len, ) from gateway.platforms.telegram_network import ( @@ -4624,6 +4625,14 @@ class TelegramAdapter(BasePlatformAdapter): video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()} ext = video_mime_to_ext.get(doc.mime_type, "") + if not ext and doc.mime_type: + # SUPPORTED_IMAGE_DOCUMENT_TYPES has duplicate values (.jpg + .jpeg + # both map to image/jpeg); keep the first ext we encounter. + image_mime_to_ext: dict[str, str] = {} + for _ext, _mime in SUPPORTED_IMAGE_DOCUMENT_TYPES.items(): + image_mime_to_ext.setdefault(_mime, _ext) + ext = image_mime_to_ext.get(doc.mime_type, "") + if ext in SUPPORTED_VIDEO_TYPES: file_obj = await doc.get_file() video_bytes = await file_obj.download_as_bytearray() @@ -4635,6 +4644,21 @@ class TelegramAdapter(BasePlatformAdapter): await self.handle_message(event) return + if ext in SUPPORTED_IMAGE_DOCUMENT_TYPES: + file_obj = await doc.get_file() + image_bytes = await file_obj.download_as_bytearray() + cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext) + event.media_urls = [cached_path] + event.media_types = [SUPPORTED_IMAGE_DOCUMENT_TYPES[ext]] + event.message_type = MessageType.PHOTO + logger.info("[Telegram] Cached user image document at %s", cached_path) + media_group_id = getattr(msg, "media_group_id", None) + if media_group_id: + await self._queue_media_group_event(str(media_group_id), event) + else: + await self.handle_message(event) + return + # Check if supported if ext not in SUPPORTED_DOCUMENT_TYPES: supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))