fix(telegram): route image documents (.png/.jpg/.webp/.gif) through vision pipeline

When users send images as documents (Telegram file picker), they were
rejected with "Unsupported document type" because SUPPORTED_DOCUMENT_TYPES
only includes text/office formats. Add SUPPORTED_IMAGE_DOCUMENT_TYPES
to base.py and handle them in telegram.py before the document check.

- Add SUPPORTED_IMAGE_DOCUMENT_TYPES constant to base.py
- Add MIME reverse-lookup for image types in telegram.py
- Route image documents through cache_image_from_bytes + vision pipeline
- Handle media groups for image documents

Closes: #20128, #18620
This commit is contained in:
kiranvk2011 2026-05-13 03:26:09 +00:00 committed by Teknium
parent a4fb0a3ac3
commit 77c4675a50
2 changed files with 44 additions and 0 deletions

View file

@ -837,6 +837,26 @@ SUPPORTED_DOCUMENT_TYPES = {
}
# ---------------------------------------------------------------------------
# Image document types
#
# Image extensions that platforms may deliver as "documents" rather than
# native photo attachments (Telegram users uploading via the file picker,
# clients that wrap stickers/screenshots as files, etc.). When we see one
# of these, we route the bytes through the image cache and the normal
# vision/photo handling path instead of rejecting them as unsupported
# documents.
# ---------------------------------------------------------------------------
SUPPORTED_IMAGE_DOCUMENT_TYPES = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".webp": "image/webp",
".gif": "image/gif",
}
def get_document_cache_dir() -> Path:
"""Return the document cache directory, creating it if it doesn't exist."""
DOCUMENT_CACHE_DIR.mkdir(parents=True, exist_ok=True)

View file

@ -76,6 +76,7 @@ from gateway.platforms.base import (
resolve_proxy_url,
SUPPORTED_VIDEO_TYPES,
SUPPORTED_DOCUMENT_TYPES,
SUPPORTED_IMAGE_DOCUMENT_TYPES,
utf16_len,
)
from gateway.platforms.telegram_network import (
@ -4624,6 +4625,14 @@ class TelegramAdapter(BasePlatformAdapter):
video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
ext = video_mime_to_ext.get(doc.mime_type, "")
if not ext and doc.mime_type:
# SUPPORTED_IMAGE_DOCUMENT_TYPES has duplicate values (.jpg + .jpeg
# both map to image/jpeg); keep the first ext we encounter.
image_mime_to_ext: dict[str, str] = {}
for _ext, _mime in SUPPORTED_IMAGE_DOCUMENT_TYPES.items():
image_mime_to_ext.setdefault(_mime, _ext)
ext = image_mime_to_ext.get(doc.mime_type, "")
if ext in SUPPORTED_VIDEO_TYPES:
file_obj = await doc.get_file()
video_bytes = await file_obj.download_as_bytearray()
@ -4635,6 +4644,21 @@ class TelegramAdapter(BasePlatformAdapter):
await self.handle_message(event)
return
if ext in SUPPORTED_IMAGE_DOCUMENT_TYPES:
file_obj = await doc.get_file()
image_bytes = await file_obj.download_as_bytearray()
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
event.media_urls = [cached_path]
event.media_types = [SUPPORTED_IMAGE_DOCUMENT_TYPES[ext]]
event.message_type = MessageType.PHOTO
logger.info("[Telegram] Cached user image document at %s", cached_path)
media_group_id = getattr(msg, "media_group_id", None)
if media_group_id:
await self._queue_media_group_event(str(media_group_id), event)
else:
await self.handle_message(event)
return
# Check if supported
if ext not in SUPPORTED_DOCUMENT_TYPES:
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))