mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(gateway): accept any inbound file type across all messaging platforms
Authorization to message the agent is the gate, not the file extension. Previously the inbound-attachment allowlist (SUPPORTED_DOCUMENT_TYPES) was opt-OUT on Discord (allow_any_attachment defaulted false) and had no bypass at all on Telegram/Slack — so an .html (or any non-allowlisted type) was dropped or hard-rejected before the agent saw it. Now every authorized upload is cached and surfaced to the agent regardless of type: - base.cache_media_bytes(): unknown types cache as octet-stream (or the caller-supplied MIME) instead of returning None — fixes the chokepoint that Teams/Telegram-media route through. - discord/telegram/slack adapters: removed the allowlist reject/skip; any non-media attachment is typed DOCUMENT and cached. Known types keep their precise MIME. - Text inlining now gates on a shared _TEXT_INJECT_EXTENSIONS set (text + code + config + markup) instead of a blind UTF-8 decode, so binary formats (PDF/zip/docx) with ASCII headers are never inlined. - gateway/run.py emits the path-pointing context note for every DOCUMENT, including non text/application MIME types. - discord.allow_any_attachment is now a documented no-op kept for config back-compat. Validation: 357 gateway tests pass; E2E confirms .html/.bin/custom types cache, known types stay precise, PDFs are not inlined.
This commit is contained in:
parent
de6b3ae377
commit
4314d451ca
10 changed files with 239 additions and 192 deletions
|
|
@ -1248,6 +1248,33 @@ SUPPORTED_DOCUMENT_TYPES = {
|
|||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Text-injection extension allowlist
|
||||
#
|
||||
# Files whose contents are safe to inline into the prompt (UTF-8 text) when
|
||||
# small enough. This is intentionally an extension/MIME gate, NOT a blind
|
||||
# UTF-8 decode: binary formats like PDF/zip/docx can begin with decodable
|
||||
# ASCII headers and must never be inlined. Any uploaded file is still cached
|
||||
# and surfaced to the agent regardless of whether it lands in this set —
|
||||
# this only controls inline-vs-path-pointer for the prompt.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_TEXT_INJECT_EXTENSIONS = {
|
||||
".txt", ".md", ".markdown", ".csv", ".tsv", ".log",
|
||||
".json", ".jsonl", ".ndjson", ".xml", ".yaml", ".yml", ".toml",
|
||||
".ini", ".cfg", ".conf", ".env", ".properties",
|
||||
".html", ".htm", ".css", ".scss", ".sass", ".less",
|
||||
".py", ".pyi", ".js", ".mjs", ".cjs", ".ts", ".tsx", ".jsx",
|
||||
".sh", ".bash", ".zsh", ".fish", ".ps1", ".bat",
|
||||
".c", ".h", ".cpp", ".cc", ".hpp", ".cs", ".java", ".kt",
|
||||
".go", ".rs", ".rb", ".php", ".pl", ".lua", ".r", ".jl",
|
||||
".swift", ".m", ".scala", ".clj", ".ex", ".exs", ".erl",
|
||||
".sql", ".graphql", ".proto", ".tf", ".hcl",
|
||||
".dockerfile", ".makefile", ".cmake", ".gradle",
|
||||
".rst", ".tex", ".srt", ".vtt", ".diff", ".patch",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image document types
|
||||
#
|
||||
|
|
@ -1454,9 +1481,10 @@ def cache_media_bytes(
|
|||
|
||||
``default_kind`` ("image"/"video"/"audio"/"document") biases classification
|
||||
when the extension/MIME are ambiguous — e.g. a Telegram native photo whose
|
||||
file has no usable name. Unsupported document types return None so the
|
||||
caller can record an "unsupported" note. Images that fail validation
|
||||
(``cache_image_from_bytes`` raises ValueError) also return None.
|
||||
file has no usable name. Any non-image/video/audio file is cached as a
|
||||
document and surfaced to the agent (arbitrary types get
|
||||
``application/octet-stream``); only images that fail validation
|
||||
(``cache_image_from_bytes`` raises ValueError) return None.
|
||||
"""
|
||||
from tools.credential_files import to_agent_visible_cache_path
|
||||
|
||||
|
|
@ -1492,11 +1520,20 @@ def cache_media_bytes(
|
|||
out_mime = mime if mime.startswith("audio/") else f"audio/{aud_ext.lstrip('.')}"
|
||||
return CachedMedia(to_agent_visible_cache_path(path), out_mime, "audio", display)
|
||||
|
||||
if ext not in SUPPORTED_DOCUMENT_TYPES:
|
||||
return None
|
||||
|
||||
path = cache_document_from_bytes(data, filename or f"document{ext}")
|
||||
return CachedMedia(to_agent_visible_cache_path(path), SUPPORTED_DOCUMENT_TYPES[ext], "document", display or f"document{ext}")
|
||||
# Any other file type is cached and surfaced to the agent as a local path
|
||||
# so it can be inspected with terminal / read_file / etc. Authorization to
|
||||
# talk to the agent is the gate that matters — once a user is allowed to
|
||||
# message it, the file-extension allowlist must not silently drop their
|
||||
# uploads. Known extensions keep their precise MIME; everything else is
|
||||
# tagged application/octet-stream (or the caller-supplied MIME) so the
|
||||
# agent knows it's an arbitrary file and reaches for terminal tools.
|
||||
fallback_name = filename or (f"document{ext}" if ext else "document.bin")
|
||||
path = cache_document_from_bytes(data, fallback_name)
|
||||
if ext in SUPPORTED_DOCUMENT_TYPES:
|
||||
out_mime = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
else:
|
||||
out_mime = mime if mime else "application/octet-stream"
|
||||
return CachedMedia(to_agent_visible_cache_path(path), out_mime, "document", display or fallback_name)
|
||||
|
||||
|
||||
class MessageType(Enum):
|
||||
|
|
|
|||
|
|
@ -8688,8 +8688,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
guessed, _ = _mimetypes.guess_type(path)
|
||||
if guessed:
|
||||
mtype = guessed
|
||||
if not mtype.startswith(("application/", "text/")):
|
||||
continue
|
||||
else:
|
||||
mtype = "application/octet-stream"
|
||||
# Any accepted file gets a path-pointing context note — we accept
|
||||
# all file types now, so a non-text/non-application MIME (font/*,
|
||||
# model/*, etc.) must still tell the agent the file exists.
|
||||
|
||||
basename = os.path.basename(path)
|
||||
parts = basename.split("_", 2)
|
||||
|
|
|
|||
|
|
@ -2118,12 +2118,11 @@ DEFAULT_CONFIG = {
|
|||
# list_roles, member_info, search_members, fetch_messages, list_pins,
|
||||
# pin_message, unpin_message, create_thread, add_role, remove_role.
|
||||
"server_actions": "",
|
||||
# Accept arbitrary attachment file types (not just SUPPORTED_DOCUMENT_TYPES).
|
||||
# When True, any uploaded file is cached to disk with mime
|
||||
# application/octet-stream and the path is surfaced to the agent so it
|
||||
# can use terminal/read_file/etc. against it. Default False preserves
|
||||
# the historical allowlist behaviour.
|
||||
# Env override: DISCORD_ALLOW_ANY_ATTACHMENT.
|
||||
# DEPRECATED / no-op. Any uploaded file is now always cached and
|
||||
# surfaced to the agent regardless of file type — authorization to
|
||||
# message the agent is the gate, not the extension. Kept so existing
|
||||
# configs that set it do not error. Env override:
|
||||
# DISCORD_ALLOW_ANY_ATTACHMENT.
|
||||
"allow_any_attachment": False,
|
||||
# Maximum bytes per attachment the gateway will cache. The whole file
|
||||
# is held in memory while being written, so unlimited uploads carry a
|
||||
|
|
|
|||
|
|
@ -116,6 +116,7 @@ from gateway.platforms.base import (
|
|||
cache_audio_from_bytes,
|
||||
cache_document_from_bytes,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
_TEXT_INJECT_EXTENSIONS,
|
||||
validate_inbound_media_size,
|
||||
)
|
||||
from tools.url_safety import is_safe_url
|
||||
|
|
@ -5288,8 +5289,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
if normalized_content.startswith("/"):
|
||||
msg_type = MessageType.COMMAND
|
||||
elif all_attachments:
|
||||
_allow_any = self._discord_allow_any_attachment()
|
||||
# Check attachment types
|
||||
# Check attachment types. Any non-media attachment is treated as a
|
||||
# DOCUMENT regardless of extension — authorization to message the
|
||||
# agent is the gate, not the file type.
|
||||
for att in all_attachments:
|
||||
if att.content_type:
|
||||
if att.content_type.startswith("image/"):
|
||||
|
|
@ -5302,14 +5304,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
else:
|
||||
msg_type = MessageType.AUDIO
|
||||
else:
|
||||
doc_ext = ""
|
||||
if att.filename:
|
||||
_, doc_ext = os.path.splitext(att.filename)
|
||||
doc_ext = doc_ext.lower()
|
||||
if doc_ext in SUPPORTED_DOCUMENT_TYPES or _allow_any:
|
||||
msg_type = MessageType.DOCUMENT
|
||||
msg_type = MessageType.DOCUMENT
|
||||
break
|
||||
elif _allow_any:
|
||||
else:
|
||||
# No content_type at all (rare — discord usually fills it
|
||||
# in). Treat as a document so downstream pipelines surface
|
||||
# the path to the agent.
|
||||
|
|
@ -5398,71 +5395,79 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
if not ext and content_type:
|
||||
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
|
||||
ext = mime_to_ext.get(content_type, "")
|
||||
allow_any_attachment = self._discord_allow_any_attachment()
|
||||
in_allowlist = ext in SUPPORTED_DOCUMENT_TYPES
|
||||
if not in_allowlist and not allow_any_attachment:
|
||||
# Any file type is accepted — authorization to message the agent
|
||||
# is the gate, not the file extension. Known types keep their
|
||||
# precise MIME; unknown types fall back to the source content_type
|
||||
# or octet-stream so the agent reaches for terminal tools.
|
||||
max_doc_bytes = self._discord_max_attachment_bytes()
|
||||
if max_doc_bytes and att.size and att.size > max_doc_bytes:
|
||||
logger.warning(
|
||||
"[Discord] Unsupported document type '%s' (%s), skipping",
|
||||
ext or "unknown", content_type,
|
||||
"[Discord] Document too large (%s bytes > cap %s), skipping: %s",
|
||||
att.size, max_doc_bytes, att.filename,
|
||||
)
|
||||
else:
|
||||
max_doc_bytes = self._discord_max_attachment_bytes()
|
||||
if max_doc_bytes and att.size and att.size > max_doc_bytes:
|
||||
logger.warning(
|
||||
"[Discord] Document too large (%s bytes > cap %s), skipping: %s",
|
||||
att.size, max_doc_bytes, att.filename,
|
||||
try:
|
||||
raw_bytes = await self._cache_discord_document(att, ext)
|
||||
cached_path = cache_document_from_bytes(
|
||||
raw_bytes, att.filename or f"document{ext or '.bin'}"
|
||||
)
|
||||
else:
|
||||
try:
|
||||
raw_bytes = await self._cache_discord_document(att, ext)
|
||||
cached_path = cache_document_from_bytes(
|
||||
raw_bytes, att.filename or f"document{ext or '.bin'}"
|
||||
)
|
||||
if in_allowlist:
|
||||
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
else:
|
||||
# allow_any_attachment path: untyped file. Use the
|
||||
# source content_type if discord gave us one,
|
||||
# otherwise fall back to octet-stream so the agent
|
||||
# knows it's binary and reaches for terminal tools.
|
||||
doc_mime = (
|
||||
content_type
|
||||
if content_type and content_type != "unknown"
|
||||
else "application/octet-stream"
|
||||
)
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(doc_mime)
|
||||
logger.info(
|
||||
"[Discord] Cached user %s: %s",
|
||||
"document" if in_allowlist else "attachment",
|
||||
cached_path,
|
||||
)
|
||||
# Inject text content for plain-text documents (capped at 100 KB)
|
||||
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
||||
if in_allowlist and ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
try:
|
||||
text_content = raw_bytes.decode("utf-8")
|
||||
display_name = att.filename or f"document{ext}"
|
||||
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
|
||||
injection = f"[Content of {display_name}]:\n{text_content}"
|
||||
if pending_text_injection:
|
||||
pending_text_injection = f"{pending_text_injection}\n\n{injection}"
|
||||
else:
|
||||
pending_text_injection = injection
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
# NOTE: for the allow_any_attachment path we deliberately
|
||||
# do NOT inject a path string here. ``gateway/run.py``
|
||||
# already detects DOCUMENT-typed events with
|
||||
# ``application/octet-stream`` MIME and emits a context
|
||||
# note with the sandbox-translated cache path via
|
||||
# ``to_agent_visible_cache_path()`` (important for
|
||||
# Docker/Modal terminal backends).
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[Discord] Failed to cache document %s: %s",
|
||||
att.filename, e, exc_info=True,
|
||||
if in_allowlist:
|
||||
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
else:
|
||||
# Untyped file. Use the source content_type if
|
||||
# discord gave us one, otherwise fall back to
|
||||
# octet-stream so the agent knows it's binary and
|
||||
# reaches for terminal tools.
|
||||
doc_mime = (
|
||||
content_type
|
||||
if content_type and content_type != "unknown"
|
||||
else "application/octet-stream"
|
||||
)
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(doc_mime)
|
||||
logger.info(
|
||||
"[Discord] Cached user %s: %s",
|
||||
"document" if in_allowlist else "attachment",
|
||||
cached_path,
|
||||
)
|
||||
# Inject text content for any text-readable document
|
||||
# Inject text content for text-readable documents
|
||||
# (capped at 100 KB). Gate on a text-like extension/MIME
|
||||
# — NOT a blind UTF-8 decode, since binary formats like
|
||||
# PDF/zip/docx can have decodable ASCII headers. Unknown
|
||||
# but clearly-textual types (text/* MIME or a known text
|
||||
# extension) are inlined too; everything else relies on
|
||||
# ``gateway/run.py`` to emit a path-pointing context note.
|
||||
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
||||
_is_text = (
|
||||
ext in _TEXT_INJECT_EXTENSIONS
|
||||
or (content_type or "").startswith("text/")
|
||||
)
|
||||
if _is_text and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
try:
|
||||
text_content = raw_bytes.decode("utf-8")
|
||||
display_name = att.filename or f"document{ext or '.txt'}"
|
||||
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
|
||||
injection = f"[Content of {display_name}]:\n{text_content}"
|
||||
if pending_text_injection:
|
||||
pending_text_injection = f"{pending_text_injection}\n\n{injection}"
|
||||
else:
|
||||
pending_text_injection = injection
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
# NOTE: for the untyped-attachment path we deliberately
|
||||
# do NOT inject a path string here. ``gateway/run.py``
|
||||
# already detects DOCUMENT-typed events with
|
||||
# ``application/octet-stream`` MIME and emits a context
|
||||
# note with the sandbox-translated cache path via
|
||||
# ``to_agent_visible_cache_path()`` (important for
|
||||
# Docker/Modal terminal backends).
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[Discord] Failed to cache document %s: %s",
|
||||
att.filename, e, exc_info=True,
|
||||
)
|
||||
|
||||
# Use normalized_content (saved before auto-threading) instead of message.content,
|
||||
# to detect /slash commands in channel messages.
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ from gateway.platforms.base import (
|
|||
SendResult,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
SUPPORTED_VIDEO_TYPES,
|
||||
_TEXT_INJECT_EXTENSIONS,
|
||||
is_host_excluded_by_no_proxy,
|
||||
resolve_proxy_url,
|
||||
safe_url_for_log,
|
||||
|
|
@ -2698,8 +2699,12 @@ class SlackAdapter(BasePlatformAdapter):
|
|||
}
|
||||
ext = mime_to_ext.get(mimetype, "")
|
||||
|
||||
if ext not in SUPPORTED_DOCUMENT_TYPES:
|
||||
continue # Skip unsupported file types silently
|
||||
# Any file type is accepted — authorization to message the
|
||||
# agent is the gate, not the file extension. Known types keep
|
||||
# their precise MIME; unknown types fall back to the source
|
||||
# mimetype or octet-stream so the agent reaches for terminal
|
||||
# tools.
|
||||
in_allowlist = ext in SUPPORTED_DOCUMENT_TYPES
|
||||
|
||||
# Check file size (Slack limit: 20 MB for bots)
|
||||
file_size = f.get("size", 0)
|
||||
|
|
@ -2715,36 +2720,28 @@ class SlackAdapter(BasePlatformAdapter):
|
|||
url, team_id=team_id
|
||||
)
|
||||
cached_path = cache_document_from_bytes(
|
||||
raw_bytes, original_filename or f"document{ext}"
|
||||
raw_bytes, original_filename or f"document{ext or '.bin'}"
|
||||
)
|
||||
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
if in_allowlist:
|
||||
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
else:
|
||||
doc_mime = mimetype or "application/octet-stream"
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(doc_mime)
|
||||
logger.debug("[Slack] Cached user document: %s", cached_path)
|
||||
logger.debug("[Slack] Cached user document: %s (%s)", cached_path, doc_mime)
|
||||
|
||||
# Inject small text-ish files directly into the prompt so
|
||||
# snippets like JSON/YAML/configs are actually visible to the agent.
|
||||
# snippets like JSON/YAML/configs are actually visible to the
|
||||
# agent. Gate on a text-like extension/MIME — NOT a blind
|
||||
# UTF-8 decode, since binary formats (PDF/zip/docx) can have
|
||||
# decodable ASCII headers. Binary files are surfaced as a
|
||||
# cached path only (run.py emits a path-pointing note).
|
||||
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
||||
TEXT_INJECT_EXTENSIONS = {
|
||||
".md",
|
||||
".txt",
|
||||
".csv",
|
||||
".log",
|
||||
".json",
|
||||
".xml",
|
||||
".yaml",
|
||||
".yml",
|
||||
".toml",
|
||||
".ini",
|
||||
".cfg",
|
||||
}
|
||||
if (
|
||||
ext in TEXT_INJECT_EXTENSIONS
|
||||
and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES
|
||||
):
|
||||
_is_text = ext in _TEXT_INJECT_EXTENSIONS or (mimetype or "").startswith("text/")
|
||||
if _is_text and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
try:
|
||||
text_content = raw_bytes.decode("utf-8")
|
||||
display_name = original_filename or f"document{ext}"
|
||||
display_name = original_filename or f"document{ext or '.txt'}"
|
||||
display_name = re.sub(r"[^\w.\- ]", "_", display_name)
|
||||
injection = f"[Content of {display_name}]:\n{text_content}"
|
||||
if text:
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ from gateway.platforms.base import (
|
|||
SUPPORTED_VIDEO_TYPES,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
SUPPORTED_IMAGE_DOCUMENT_TYPES,
|
||||
_TEXT_INJECT_EXTENSIONS,
|
||||
utf16_len,
|
||||
)
|
||||
from plugins.platforms.telegram.telegram_network import (
|
||||
|
|
@ -6526,33 +6527,30 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
# ext-in-SUPPORTED_IMAGE_DOCUMENT_TYPES branch would be dead
|
||||
# code — the extension sets are identical.
|
||||
|
||||
# Check if supported
|
||||
if ext not in SUPPORTED_DOCUMENT_TYPES:
|
||||
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
|
||||
event.text = (
|
||||
f"Unsupported document type '{ext or 'unknown'}'. "
|
||||
f"Supported types: {supported_list}"
|
||||
)
|
||||
logger.info("[Telegram] Unsupported document type: %s", ext or "unknown")
|
||||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
# Download and cache
|
||||
# Download and cache. Any file type is accepted — authorization
|
||||
# to message the agent is the gate, not the file extension.
|
||||
# Known types keep their precise MIME; unknown types are tagged
|
||||
# application/octet-stream so the agent reaches for terminal tools.
|
||||
file_obj = await doc.get_file()
|
||||
doc_bytes = await file_obj.download_as_bytearray()
|
||||
raw_bytes = bytes(doc_bytes)
|
||||
cached_path = cache_document_from_bytes(raw_bytes, original_filename or f"document{ext}")
|
||||
mime_type = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
cached_path = cache_document_from_bytes(raw_bytes, original_filename or f"document{ext or '.bin'}")
|
||||
mime_type = SUPPORTED_DOCUMENT_TYPES.get(ext) or doc.mime_type or "application/octet-stream"
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = [mime_type]
|
||||
logger.info("[Telegram] Cached user document at %s", cached_path)
|
||||
logger.info("[Telegram] Cached user document at %s (%s)", cached_path, mime_type)
|
||||
|
||||
# For text files, inject content into event.text (capped at 100 KB)
|
||||
# For text-readable files, inject content into event.text (capped
|
||||
# at 100 KB). Gate on a text-like extension/MIME — NOT a blind
|
||||
# UTF-8 decode, since binary formats (PDF/zip/docx) can have
|
||||
# decodable ASCII headers. Binary files are surfaced as a cached
|
||||
# path only (run.py emits a path-pointing context note).
|
||||
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
||||
if ext in {".md", ".txt"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
_is_text = ext in _TEXT_INJECT_EXTENSIONS or (doc_mime or "").startswith("text/")
|
||||
if _is_text and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
try:
|
||||
text_content = raw_bytes.decode("utf-8")
|
||||
display_name = original_filename or f"document{ext}"
|
||||
display_name = original_filename or f"document{ext or '.txt'}"
|
||||
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
|
||||
injection = f"[Content of {display_name}]:\n{text_content}"
|
||||
if event.text:
|
||||
|
|
@ -6560,10 +6558,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
else:
|
||||
event.text = injection
|
||||
except UnicodeDecodeError:
|
||||
logger.warning(
|
||||
"[Telegram] Could not decode text file as UTF-8, skipping content injection",
|
||||
exc_info=True,
|
||||
)
|
||||
# Binary file — agent has the cached path and can use
|
||||
# terminal/read_file against it. No inline injection.
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("[Telegram] Failed to cache document: %s", e, exc_info=True)
|
||||
|
|
|
|||
|
|
@ -387,37 +387,18 @@ class TestIncomingDocumentHandling:
|
|||
|
||||
|
||||
class TestAllowAnyAttachment:
|
||||
"""Cover the discord.allow_any_attachment config flag.
|
||||
"""Cover accept-any-file-type inbound handling.
|
||||
|
||||
With the flag off (default), unknown file types are dropped. With it on,
|
||||
they get cached and surfaced to the agent as DOCUMENT events with
|
||||
application/octet-stream MIME so gateway/run.py emits a path-pointing
|
||||
context note.
|
||||
Authorization to message the agent is the gate, not the file extension.
|
||||
Unknown file types are cached and surfaced to the agent as DOCUMENT events
|
||||
with the source content_type (or application/octet-stream) so gateway/run.py
|
||||
emits a path-pointing context note. The legacy ``allow_any_attachment``
|
||||
config flag is now a no-op — acceptance is unconditional.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_type_skipped_by_default(self, adapter):
|
||||
"""Default (flag off): unknown extension is dropped.
|
||||
|
||||
With no text + no cached media, the adapter may legitimately decline
|
||||
to dispatch the event at all, so we don't assert on call_args here —
|
||||
we just verify the file wasn't cached.
|
||||
"""
|
||||
with _mock_aiohttp_download(b"should not be cached"):
|
||||
msg = make_message([
|
||||
make_attachment(filename="weird.xyz", content_type="application/x-custom")
|
||||
])
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
if adapter.handle_message.call_args is not None:
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.media_urls == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_type_cached_when_flag_on(self, adapter):
|
||||
"""Flag on: unknown extension is cached as application/octet-stream."""
|
||||
adapter.config.extra["allow_any_attachment"] = True
|
||||
|
||||
async def test_unknown_type_cached_by_default(self, adapter):
|
||||
"""Default: unknown extension is cached, not dropped."""
|
||||
with _mock_aiohttp_download(b"\x00\x01\x02 binary payload"):
|
||||
msg = make_message([
|
||||
make_attachment(filename="weird.xyz", content_type="application/x-custom")
|
||||
|
|
@ -430,16 +411,29 @@ class TestAllowAnyAttachment:
|
|||
# Falls back to the source content_type when we have one.
|
||||
assert event.media_types == ["application/x-custom"]
|
||||
assert event.message_type == MessageType.DOCUMENT
|
||||
# We deliberately do NOT inline arbitrary bytes — run.py emits the
|
||||
# path-pointing note based on DOCUMENT + octet-stream MIME.
|
||||
# We deliberately do NOT inline arbitrary (non-UTF-8) bytes — run.py
|
||||
# emits the path-pointing note based on DOCUMENT + octet-stream MIME.
|
||||
assert "[Content of" not in (event.text or "")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_type_no_content_type_becomes_octet_stream(self, adapter):
|
||||
"""Flag on + no content_type from discord: MIME falls back to octet-stream."""
|
||||
adapter.config.extra["allow_any_attachment"] = True
|
||||
async def test_html_cached_and_inlined(self, adapter):
|
||||
"""An .html upload is cached and (being UTF-8 text) inlined."""
|
||||
html = b"<html><body>hi</body></html>"
|
||||
with _mock_aiohttp_download(html):
|
||||
msg = make_message([
|
||||
make_attachment(filename="page.html", content_type="text/html")
|
||||
])
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
with _mock_aiohttp_download(b"raw bytes"):
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert len(event.media_urls) == 1
|
||||
assert event.message_type == MessageType.DOCUMENT
|
||||
assert event.media_types == ["text/html"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_type_no_content_type_becomes_octet_stream(self, adapter):
|
||||
"""No content_type from discord: MIME falls back to octet-stream."""
|
||||
with _mock_aiohttp_download(b"\x00raw bytes\x01"):
|
||||
msg = make_message([
|
||||
make_attachment(filename="mystery.bin", content_type=None)
|
||||
])
|
||||
|
|
@ -452,7 +446,6 @@ class TestAllowAnyAttachment:
|
|||
@pytest.mark.asyncio
|
||||
async def test_max_attachment_bytes_caps_uploads(self, adapter):
|
||||
"""discord.max_attachment_bytes overrides the historical 32 MiB cap."""
|
||||
adapter.config.extra["allow_any_attachment"] = True
|
||||
adapter.config.extra["max_attachment_bytes"] = 1024 # 1 KiB
|
||||
|
||||
msg = make_message([
|
||||
|
|
@ -470,7 +463,6 @@ class TestAllowAnyAttachment:
|
|||
@pytest.mark.asyncio
|
||||
async def test_max_attachment_bytes_zero_means_unlimited(self, adapter):
|
||||
"""max_attachment_bytes=0 disables the size cap entirely."""
|
||||
adapter.config.extra["allow_any_attachment"] = True
|
||||
adapter.config.extra["max_attachment_bytes"] = 0
|
||||
|
||||
# 64 MiB — would normally exceed the historical 32 MiB hardcoded cap.
|
||||
|
|
@ -488,14 +480,12 @@ class TestAllowAnyAttachment:
|
|||
assert len(event.media_urls) == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_allowlisted_doc_unchanged_when_flag_on(self, adapter):
|
||||
"""Flag on must not change handling of types already in SUPPORTED_DOCUMENT_TYPES.
|
||||
async def test_allowlisted_doc_unchanged(self, adapter):
|
||||
"""Types already in SUPPORTED_DOCUMENT_TYPES keep canonical handling.
|
||||
|
||||
A .txt should still get its content inlined (the historical behavior),
|
||||
and the MIME should still be the canonical text/plain — not whatever
|
||||
discord guessed.
|
||||
A .txt should still get its content inlined, and the MIME should still
|
||||
be the canonical text/plain — not whatever discord guessed.
|
||||
"""
|
||||
adapter.config.extra["allow_any_attachment"] = True
|
||||
file_content = b"still a text file"
|
||||
|
||||
with _mock_aiohttp_download(file_content):
|
||||
|
|
@ -510,14 +500,6 @@ class TestAllowAnyAttachment:
|
|||
assert "still a text file" in event.text
|
||||
assert event.media_types == ["text/plain"]
|
||||
|
||||
def test_helper_reads_env_fallback(self, adapter, monkeypatch):
|
||||
"""Helper falls back to DISCORD_ALLOW_ANY_ATTACHMENT env var."""
|
||||
assert adapter._discord_allow_any_attachment() is False
|
||||
monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true")
|
||||
assert adapter._discord_allow_any_attachment() is True
|
||||
monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "no")
|
||||
assert adapter._discord_allow_any_attachment() is False
|
||||
|
||||
def test_helper_config_overrides_env(self, adapter, monkeypatch):
|
||||
"""config.yaml setting wins over env var."""
|
||||
monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true")
|
||||
|
|
|
|||
|
|
@ -218,10 +218,25 @@ class TestCacheMediaBytes:
|
|||
assert result.kind == "document"
|
||||
assert result.media_type == "text/csv"
|
||||
|
||||
def test_unsupported_document_returns_none(self):
|
||||
def test_unknown_document_cached_as_octet_stream(self):
|
||||
"""Unknown file types are cached (not dropped) so the agent can inspect them.
|
||||
|
||||
Authorization to message the agent is the gate, not the file extension.
|
||||
"""
|
||||
from gateway.platforms.base import cache_media_bytes
|
||||
result = cache_media_bytes(b"MZ", filename="malware.exe", mime_type="application/x-msdownload")
|
||||
assert result is None
|
||||
result = cache_media_bytes(b"MZ", filename="program.exe", mime_type="application/x-msdownload")
|
||||
assert result is not None
|
||||
assert result.kind == "document"
|
||||
# Caller-supplied MIME is preserved when present.
|
||||
assert result.media_type == "application/x-msdownload"
|
||||
assert os.path.exists(result.path)
|
||||
|
||||
def test_unknown_document_no_mime_falls_back_to_octet_stream(self):
|
||||
from gateway.platforms.base import cache_media_bytes
|
||||
result = cache_media_bytes(b"\x00\x01\x02", filename="mystery.qux", mime_type="")
|
||||
assert result is not None
|
||||
assert result.kind == "document"
|
||||
assert result.media_type == "application/octet-stream"
|
||||
|
||||
def test_invalid_image_returns_none(self):
|
||||
from gateway.platforms.base import cache_media_bytes
|
||||
|
|
|
|||
|
|
@ -336,14 +336,25 @@ class TestDocumentDownloadBlock:
|
|||
assert event.media_types == ["application/pdf"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_filename_and_mime_rejected(self, adapter):
|
||||
doc = _make_document(file_name=None, mime_type=None, file_size=100)
|
||||
async def test_missing_filename_and_mime_cached_as_octet_stream(self, adapter):
|
||||
"""No filename and no mime: cached anyway as application/octet-stream.
|
||||
|
||||
Authorization to message the agent is the gate, not the file type — an
|
||||
untyped upload is still surfaced to the agent as a cached path.
|
||||
"""
|
||||
content = b"\x00\x01\x02 untyped payload"
|
||||
file_obj = _make_file_obj(content)
|
||||
doc = _make_document(
|
||||
file_name=None, mime_type=None, file_size=len(content), file_obj=file_obj,
|
||||
)
|
||||
msg = _make_message(document=doc)
|
||||
update = _make_update(msg)
|
||||
|
||||
await adapter._handle_media_message(update, MagicMock())
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert "Unsupported" in event.text
|
||||
assert len(event.media_urls) == 1
|
||||
assert event.media_types == ["application/octet-stream"]
|
||||
assert "Unsupported" not in (event.text or "")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unicode_decode_error_handled(self, adapter):
|
||||
|
|
|
|||
|
|
@ -617,24 +617,25 @@ Discord's per-upload size limit depends on the server's boost tier (25 MB free,
|
|||
|
||||
## Receiving Arbitrary File Types
|
||||
|
||||
By default the bot caches uploads that match a built-in allowlist — images, audio, video, PDF, text/markdown/csv/log, JSON/XML/YAML/TOML, zip, docx/xlsx/pptx. Anything else (a `.wav`, a `.bin`, a custom-extension dump) gets logged as `Unsupported document type` and dropped before the agent sees it.
|
||||
Any file type a user uploads is accepted. Authorization to message the agent is the gate — not the file extension. Every upload is downloaded, cached under `~/.hermes/cache/documents/`, and surfaced to the agent as a `DOCUMENT`-typed message event so it can inspect the file with `terminal` (`ffprobe`, `unzip`, `file`, `strings`, etc.) or `read_file`.
|
||||
|
||||
To accept arbitrary file types, enable `discord.allow_any_attachment`:
|
||||
- Known types (PDF, docx/xlsx/pptx, zip, images/audio/video, etc.) keep their precise MIME.
|
||||
- Unknown types fall back to the upload's reported content type, or `application/octet-stream` when none is given.
|
||||
- Small UTF-8-decodable files (text, code, config, HTML, CSS, JSON, YAML, ...) have their contents auto-injected into the prompt up to 100 KiB. Binary files that can't be decoded are surfaced as a path-pointing context note only (auto-translated for Docker/Modal sandboxed terminals via `to_agent_visible_cache_path`), so they don't blow up the context window.
|
||||
|
||||
The only inbound limit is the per-file size cap (default 32 MiB):
|
||||
|
||||
```yaml
|
||||
discord:
|
||||
allow_any_attachment: true
|
||||
# Optional — raise/disable the per-file size cap. Default is 32 MiB.
|
||||
# The whole file is held in memory while being cached, so unlimited
|
||||
# uploads carry a real memory cost.
|
||||
max_attachment_bytes: 33554432 # bytes; 0 = unlimited
|
||||
```
|
||||
|
||||
When the flag is on, any uploaded file is downloaded, cached under `~/.hermes/cache/documents/`, and surfaced to the agent as a `DOCUMENT`-typed message event with `application/octet-stream` MIME. The agent receives a context note pointing at the local path (auto-translated for Docker/Modal sandboxed terminals via `to_agent_visible_cache_path`) and can inspect the file with `terminal` (`ffprobe`, `unzip`, `file`, `strings`, etc.) or `read_file`. The file body is **not** inlined into the prompt — only the path — so binary uploads don't blow up the context window.
|
||||
Equivalent env var: `DISCORD_MAX_ATTACHMENT_BYTES=33554432` (or `0` for no cap).
|
||||
|
||||
Known-text formats already in the allowlist (`.txt`, `.md`, `.log`) continue to have their contents auto-injected up to 100 KiB; that behavior is unchanged when the flag is on.
|
||||
|
||||
Equivalent env vars: `DISCORD_ALLOW_ANY_ATTACHMENT=true` and `DISCORD_MAX_ATTACHMENT_BYTES=33554432` (or `0` for no cap).
|
||||
The legacy `discord.allow_any_attachment` flag is now a no-op — any file type is always accepted — and is kept only so existing configs don't error.
|
||||
|
||||
:::warning Memory cost of unlimited
|
||||
Disabling the size cap (`max_attachment_bytes: 0`) means a user can drop a multi-GB file on the bot and the gateway will dutifully buffer it through memory while caching to disk. Only set this in trusted single-user installs. For shared bots, keep the default 32 MiB or raise it conservatively.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue