mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
fix(telegram): route image documents (.png/.jpg/.webp/.gif) through vision pipeline
When users send images as documents (Telegram file picker), they were rejected with "Unsupported document type" because SUPPORTED_DOCUMENT_TYPES only includes text/office formats. Add SUPPORTED_IMAGE_DOCUMENT_TYPES to base.py and handle them in telegram.py before the document check. - Add SUPPORTED_IMAGE_DOCUMENT_TYPES constant to base.py - Add MIME reverse-lookup for image types in telegram.py - Route image documents through cache_image_from_bytes + vision pipeline - Handle media groups for image documents Closes: #20128, #18620
This commit is contained in:
parent
a4fb0a3ac3
commit
77c4675a50
2 changed files with 44 additions and 0 deletions
|
|
@ -837,6 +837,26 @@ SUPPORTED_DOCUMENT_TYPES = {
|
|||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image document types
|
||||
#
|
||||
# Image extensions that platforms may deliver as "documents" rather than
|
||||
# native photo attachments (Telegram users uploading via the file picker,
|
||||
# clients that wrap stickers/screenshots as files, etc.). When we see one
|
||||
# of these, we route the bytes through the image cache and the normal
|
||||
# vision/photo handling path instead of rejecting them as unsupported
|
||||
# documents.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SUPPORTED_IMAGE_DOCUMENT_TYPES = {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".webp": "image/webp",
|
||||
".gif": "image/gif",
|
||||
}
|
||||
|
||||
|
||||
def get_document_cache_dir() -> Path:
|
||||
"""Return the document cache directory, creating it if it doesn't exist."""
|
||||
DOCUMENT_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ from gateway.platforms.base import (
|
|||
resolve_proxy_url,
|
||||
SUPPORTED_VIDEO_TYPES,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
SUPPORTED_IMAGE_DOCUMENT_TYPES,
|
||||
utf16_len,
|
||||
)
|
||||
from gateway.platforms.telegram_network import (
|
||||
|
|
@ -4624,6 +4625,14 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
|
||||
ext = video_mime_to_ext.get(doc.mime_type, "")
|
||||
|
||||
if not ext and doc.mime_type:
|
||||
# SUPPORTED_IMAGE_DOCUMENT_TYPES has duplicate values (.jpg + .jpeg
|
||||
# both map to image/jpeg); keep the first ext we encounter.
|
||||
image_mime_to_ext: dict[str, str] = {}
|
||||
for _ext, _mime in SUPPORTED_IMAGE_DOCUMENT_TYPES.items():
|
||||
image_mime_to_ext.setdefault(_mime, _ext)
|
||||
ext = image_mime_to_ext.get(doc.mime_type, "")
|
||||
|
||||
if ext in SUPPORTED_VIDEO_TYPES:
|
||||
file_obj = await doc.get_file()
|
||||
video_bytes = await file_obj.download_as_bytearray()
|
||||
|
|
@ -4635,6 +4644,21 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
if ext in SUPPORTED_IMAGE_DOCUMENT_TYPES:
|
||||
file_obj = await doc.get_file()
|
||||
image_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = [SUPPORTED_IMAGE_DOCUMENT_TYPES[ext]]
|
||||
event.message_type = MessageType.PHOTO
|
||||
logger.info("[Telegram] Cached user image document at %s", cached_path)
|
||||
media_group_id = getattr(msg, "media_group_id", None)
|
||||
if media_group_id:
|
||||
await self._queue_media_group_event(str(media_group_id), event)
|
||||
else:
|
||||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
# Check if supported
|
||||
if ext not in SUPPORTED_DOCUMENT_TYPES:
|
||||
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue