mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
feat(gateway): support [[as_document]] directive for skill media routing
Skills that produce large/lossless images (e.g. info-graph, where a
rendered JPG is 1-2 MB) currently lose quality in Telegram delivery
because `_IMAGE_EXTS` membership routes the file through
`send_multiple_images` → `sendMediaGroup`, which Telegram's server
re-encodes to JPEG @ 1280px max edge. The original bytes only survive
when the file goes through `send_document`, which the dispatch tables
in three places (`_process_message_background`, `_deliver_media_from_response`,
and the `send_message` tool's telegram path) only reach for files
whose extension is NOT in `_IMAGE_EXTS`.
This commit adds an `[[as_document]]` directive that mirrors the
existing `[[audio_as_voice]]` shape: a skill emits the directive once
in its response, and every image-extension MEDIA: file in that response
is delivered via `send_document` instead of `send_multiple_images` /
`sendPhoto`. The directive is detected at the dispatch sites (which see
the raw response) and the directive string is stripped from the
user-visible cleaned text in `extract_media` so it never leaks.
Granularity is intentionally all-or-nothing per response, matching
[[audio_as_voice]]'s scope. Skills that need fine control can split into
two responses.
Verified the targeted use case: info-graph emits
信息图已生成(...)
[[as_document]]
MEDIA:/tmp/info-graph-x/infographic.jpg
→ Telegram receives `infographic.jpg` via sendDocument, original 1MB
JPEG bytes preserved, no recompression. Forwarding and download
filenames stay clean (`infographic.jpg`).
Tests: +3 cases in TestExtractMedia covering directive strip, isolation
from voice flag, and coexistence with [[audio_as_voice]]. All
113 pre-existing media/extract/send tests pass.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8d363f8d54
commit
d34f03c32a
4 changed files with 94 additions and 14 deletions
|
|
@ -242,6 +242,12 @@ def _handle_send(args):
|
|||
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
|
||||
# Capture [[as_document]] directive before extract_media strips it.
|
||||
# Image-extension files in this batch will route through send_document
|
||||
# instead of send_photo so the original bytes survive (e.g. info-graph
|
||||
# JPGs where Telegram's sendPhoto recompresses to 1280px).
|
||||
force_document_attachments = "[[as_document]]" in message
|
||||
|
||||
media_files, cleaned_message = BasePlatformAdapter.extract_media(message)
|
||||
mirror_text = cleaned_message.strip() or _describe_media_for_mirror(media_files)
|
||||
|
||||
|
|
@ -277,6 +283,7 @@ def _handle_send(args):
|
|||
cleaned_message,
|
||||
thread_id=thread_id,
|
||||
media_files=media_files,
|
||||
force_document=force_document_attachments,
|
||||
)
|
||||
)
|
||||
if used_home_channel and isinstance(result, dict) and result.get("success"):
|
||||
|
|
@ -437,7 +444,7 @@ async def _send_via_adapter(platform, pconfig, chat_id, chunk):
|
|||
return {"error": f"No live adapter for platform '{platform.value}'. Is the gateway running with this platform connected?"}
|
||||
|
||||
|
||||
async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, media_files=None):
|
||||
async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, media_files=None, force_document=False):
|
||||
"""Route a message to the appropriate platform sender.
|
||||
|
||||
Long messages are automatically chunked to fit within platform limits
|
||||
|
|
@ -514,6 +521,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
|
|||
media_files=media_files if is_last else [],
|
||||
thread_id=thread_id,
|
||||
disable_link_previews=disable_link_previews,
|
||||
force_document=force_document,
|
||||
)
|
||||
if isinstance(result, dict) and result.get("error"):
|
||||
return result
|
||||
|
|
@ -667,7 +675,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
|
|||
return last_result
|
||||
|
||||
|
||||
async def _send_telegram(token, chat_id, message, media_files=None, thread_id=None, disable_link_previews=False):
|
||||
async def _send_telegram(token, chat_id, message, media_files=None, thread_id=None, disable_link_previews=False, force_document=False):
|
||||
"""Send via Telegram Bot API (one-shot, no polling needed).
|
||||
|
||||
Applies markdown→MarkdownV2 formatting (same as the gateway adapter)
|
||||
|
|
@ -750,7 +758,7 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
|
|||
ext = os.path.splitext(media_path)[1].lower()
|
||||
try:
|
||||
with open(media_path, "rb") as f:
|
||||
if ext in _IMAGE_EXTS:
|
||||
if ext in _IMAGE_EXTS and not force_document:
|
||||
last_msg = await bot.send_photo(
|
||||
chat_id=int_chat_id, photo=f, **thread_kwargs
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue