fix(gateway): accept any inbound file type across all messaging platforms

Authorization to message the agent is the gate, not the file extension.
Previously the inbound-attachment allowlist (SUPPORTED_DOCUMENT_TYPES) was
opt-OUT on Discord (allow_any_attachment defaulted false) and had no bypass
at all on Telegram/Slack — so an .html (or any non-allowlisted type) was
dropped or hard-rejected before the agent saw it.

Now every authorized upload is cached and surfaced to the agent regardless
of type:
- base.cache_media_bytes(): unknown types cache as octet-stream (or the
  caller-supplied MIME) instead of returning None — fixes the chokepoint
  that Teams/Telegram-media route through.
- discord/telegram/slack adapters: removed the allowlist reject/skip; any
  non-media attachment is typed DOCUMENT and cached. Known types keep their
  precise MIME.
- Text inlining now gates on a shared _TEXT_INJECT_EXTENSIONS set (text +
  code + config + markup) instead of a blind UTF-8 decode, so binary formats
  (PDF/zip/docx) with ASCII headers are never inlined.
- gateway/run.py emits the path-pointing context note for every DOCUMENT,
  including non text/application MIME types.
- discord.allow_any_attachment is now a documented no-op kept for config
  back-compat.

Validation: 357 gateway tests pass; E2E confirms .html/.bin/custom types
cache, known types stay precise, PDFs are not inlined.
This commit is contained in:
teknium1 2026-06-21 20:31:40 -07:00 committed by Teknium
parent de6b3ae377
commit 4314d451ca
10 changed files with 239 additions and 192 deletions

View file

@ -1248,6 +1248,33 @@ SUPPORTED_DOCUMENT_TYPES = {
}
# ---------------------------------------------------------------------------
# Text-injection extension allowlist
#
# Files whose contents are safe to inline into the prompt (UTF-8 text) when
# small enough. This is intentionally an extension/MIME gate, NOT a blind
# UTF-8 decode: binary formats like PDF/zip/docx can begin with decodable
# ASCII headers and must never be inlined. Any uploaded file is still cached
# and surfaced to the agent regardless of whether it lands in this set —
# this only controls inline-vs-path-pointer for the prompt.
# ---------------------------------------------------------------------------
_TEXT_INJECT_EXTENSIONS = {
".txt", ".md", ".markdown", ".csv", ".tsv", ".log",
".json", ".jsonl", ".ndjson", ".xml", ".yaml", ".yml", ".toml",
".ini", ".cfg", ".conf", ".env", ".properties",
".html", ".htm", ".css", ".scss", ".sass", ".less",
".py", ".pyi", ".js", ".mjs", ".cjs", ".ts", ".tsx", ".jsx",
".sh", ".bash", ".zsh", ".fish", ".ps1", ".bat",
".c", ".h", ".cpp", ".cc", ".hpp", ".cs", ".java", ".kt",
".go", ".rs", ".rb", ".php", ".pl", ".lua", ".r", ".jl",
".swift", ".m", ".scala", ".clj", ".ex", ".exs", ".erl",
".sql", ".graphql", ".proto", ".tf", ".hcl",
".dockerfile", ".makefile", ".cmake", ".gradle",
".rst", ".tex", ".srt", ".vtt", ".diff", ".patch",
}
# ---------------------------------------------------------------------------
# Image document types
#
@ -1454,9 +1481,10 @@ def cache_media_bytes(
``default_kind`` ("image"/"video"/"audio"/"document") biases classification
when the extension/MIME are ambiguous e.g. a Telegram native photo whose
file has no usable name. Unsupported document types return None so the
caller can record an "unsupported" note. Images that fail validation
(``cache_image_from_bytes`` raises ValueError) also return None.
file has no usable name. Any non-image/video/audio file is cached as a
document and surfaced to the agent (arbitrary types get
``application/octet-stream``); only images that fail validation
(``cache_image_from_bytes`` raises ValueError) return None.
"""
from tools.credential_files import to_agent_visible_cache_path
@ -1492,11 +1520,20 @@ def cache_media_bytes(
out_mime = mime if mime.startswith("audio/") else f"audio/{aud_ext.lstrip('.')}"
return CachedMedia(to_agent_visible_cache_path(path), out_mime, "audio", display)
if ext not in SUPPORTED_DOCUMENT_TYPES:
return None
path = cache_document_from_bytes(data, filename or f"document{ext}")
return CachedMedia(to_agent_visible_cache_path(path), SUPPORTED_DOCUMENT_TYPES[ext], "document", display or f"document{ext}")
# Any other file type is cached and surfaced to the agent as a local path
# so it can be inspected with terminal / read_file / etc. Authorization to
# talk to the agent is the gate that matters — once a user is allowed to
# message it, the file-extension allowlist must not silently drop their
# uploads. Known extensions keep their precise MIME; everything else is
# tagged application/octet-stream (or the caller-supplied MIME) so the
# agent knows it's an arbitrary file and reaches for terminal tools.
fallback_name = filename or (f"document{ext}" if ext else "document.bin")
path = cache_document_from_bytes(data, fallback_name)
if ext in SUPPORTED_DOCUMENT_TYPES:
out_mime = SUPPORTED_DOCUMENT_TYPES[ext]
else:
out_mime = mime if mime else "application/octet-stream"
return CachedMedia(to_agent_visible_cache_path(path), out_mime, "document", display or fallback_name)
class MessageType(Enum):

View file

@ -8688,8 +8688,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
guessed, _ = _mimetypes.guess_type(path)
if guessed:
mtype = guessed
if not mtype.startswith(("application/", "text/")):
continue
else:
mtype = "application/octet-stream"
# Any accepted file gets a path-pointing context note — we accept
# all file types now, so a non-text/non-application MIME (font/*,
# model/*, etc.) must still tell the agent the file exists.
basename = os.path.basename(path)
parts = basename.split("_", 2)

View file

@ -2118,12 +2118,11 @@ DEFAULT_CONFIG = {
# list_roles, member_info, search_members, fetch_messages, list_pins,
# pin_message, unpin_message, create_thread, add_role, remove_role.
"server_actions": "",
# Accept arbitrary attachment file types (not just SUPPORTED_DOCUMENT_TYPES).
# When True, any uploaded file is cached to disk with mime
# application/octet-stream and the path is surfaced to the agent so it
# can use terminal/read_file/etc. against it. Default False preserves
# the historical allowlist behaviour.
# Env override: DISCORD_ALLOW_ANY_ATTACHMENT.
# DEPRECATED / no-op. Any uploaded file is now always cached and
# surfaced to the agent regardless of file type — authorization to
# message the agent is the gate, not the extension. Kept so existing
# configs that set it do not error. Env override:
# DISCORD_ALLOW_ANY_ATTACHMENT.
"allow_any_attachment": False,
# Maximum bytes per attachment the gateway will cache. The whole file
# is held in memory while being written, so unlimited uploads carry a

View file

@ -116,6 +116,7 @@ from gateway.platforms.base import (
cache_audio_from_bytes,
cache_document_from_bytes,
SUPPORTED_DOCUMENT_TYPES,
_TEXT_INJECT_EXTENSIONS,
validate_inbound_media_size,
)
from tools.url_safety import is_safe_url
@ -5288,8 +5289,9 @@ class DiscordAdapter(BasePlatformAdapter):
if normalized_content.startswith("/"):
msg_type = MessageType.COMMAND
elif all_attachments:
_allow_any = self._discord_allow_any_attachment()
# Check attachment types
# Check attachment types. Any non-media attachment is treated as a
# DOCUMENT regardless of extension — authorization to message the
# agent is the gate, not the file type.
for att in all_attachments:
if att.content_type:
if att.content_type.startswith("image/"):
@ -5302,14 +5304,9 @@ class DiscordAdapter(BasePlatformAdapter):
else:
msg_type = MessageType.AUDIO
else:
doc_ext = ""
if att.filename:
_, doc_ext = os.path.splitext(att.filename)
doc_ext = doc_ext.lower()
if doc_ext in SUPPORTED_DOCUMENT_TYPES or _allow_any:
msg_type = MessageType.DOCUMENT
msg_type = MessageType.DOCUMENT
break
elif _allow_any:
else:
# No content_type at all (rare — discord usually fills it
# in). Treat as a document so downstream pipelines surface
# the path to the agent.
@ -5398,71 +5395,79 @@ class DiscordAdapter(BasePlatformAdapter):
if not ext and content_type:
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
ext = mime_to_ext.get(content_type, "")
allow_any_attachment = self._discord_allow_any_attachment()
in_allowlist = ext in SUPPORTED_DOCUMENT_TYPES
if not in_allowlist and not allow_any_attachment:
# Any file type is accepted — authorization to message the agent
# is the gate, not the file extension. Known types keep their
# precise MIME; unknown types fall back to the source content_type
# or octet-stream so the agent reaches for terminal tools.
max_doc_bytes = self._discord_max_attachment_bytes()
if max_doc_bytes and att.size and att.size > max_doc_bytes:
logger.warning(
"[Discord] Unsupported document type '%s' (%s), skipping",
ext or "unknown", content_type,
"[Discord] Document too large (%s bytes > cap %s), skipping: %s",
att.size, max_doc_bytes, att.filename,
)
else:
max_doc_bytes = self._discord_max_attachment_bytes()
if max_doc_bytes and att.size and att.size > max_doc_bytes:
logger.warning(
"[Discord] Document too large (%s bytes > cap %s), skipping: %s",
att.size, max_doc_bytes, att.filename,
try:
raw_bytes = await self._cache_discord_document(att, ext)
cached_path = cache_document_from_bytes(
raw_bytes, att.filename or f"document{ext or '.bin'}"
)
else:
try:
raw_bytes = await self._cache_discord_document(att, ext)
cached_path = cache_document_from_bytes(
raw_bytes, att.filename or f"document{ext or '.bin'}"
)
if in_allowlist:
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
else:
# allow_any_attachment path: untyped file. Use the
# source content_type if discord gave us one,
# otherwise fall back to octet-stream so the agent
# knows it's binary and reaches for terminal tools.
doc_mime = (
content_type
if content_type and content_type != "unknown"
else "application/octet-stream"
)
media_urls.append(cached_path)
media_types.append(doc_mime)
logger.info(
"[Discord] Cached user %s: %s",
"document" if in_allowlist else "attachment",
cached_path,
)
# Inject text content for plain-text documents (capped at 100 KB)
MAX_TEXT_INJECT_BYTES = 100 * 1024
if in_allowlist and ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
try:
text_content = raw_bytes.decode("utf-8")
display_name = att.filename or f"document{ext}"
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
injection = f"[Content of {display_name}]:\n{text_content}"
if pending_text_injection:
pending_text_injection = f"{pending_text_injection}\n\n{injection}"
else:
pending_text_injection = injection
except UnicodeDecodeError:
pass
# NOTE: for the allow_any_attachment path we deliberately
# do NOT inject a path string here. ``gateway/run.py``
# already detects DOCUMENT-typed events with
# ``application/octet-stream`` MIME and emits a context
# note with the sandbox-translated cache path via
# ``to_agent_visible_cache_path()`` (important for
# Docker/Modal terminal backends).
except Exception as e:
logger.warning(
"[Discord] Failed to cache document %s: %s",
att.filename, e, exc_info=True,
if in_allowlist:
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
else:
# Untyped file. Use the source content_type if
# discord gave us one, otherwise fall back to
# octet-stream so the agent knows it's binary and
# reaches for terminal tools.
doc_mime = (
content_type
if content_type and content_type != "unknown"
else "application/octet-stream"
)
media_urls.append(cached_path)
media_types.append(doc_mime)
logger.info(
"[Discord] Cached user %s: %s",
"document" if in_allowlist else "attachment",
cached_path,
)
# Inject text content for any text-readable document
# Inject text content for text-readable documents
# (capped at 100 KB). Gate on a text-like extension/MIME
# — NOT a blind UTF-8 decode, since binary formats like
# PDF/zip/docx can have decodable ASCII headers. Unknown
# but clearly-textual types (text/* MIME or a known text
# extension) are inlined too; everything else relies on
# ``gateway/run.py`` to emit a path-pointing context note.
MAX_TEXT_INJECT_BYTES = 100 * 1024
_is_text = (
ext in _TEXT_INJECT_EXTENSIONS
or (content_type or "").startswith("text/")
)
if _is_text and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
try:
text_content = raw_bytes.decode("utf-8")
display_name = att.filename or f"document{ext or '.txt'}"
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
injection = f"[Content of {display_name}]:\n{text_content}"
if pending_text_injection:
pending_text_injection = f"{pending_text_injection}\n\n{injection}"
else:
pending_text_injection = injection
except UnicodeDecodeError:
pass
# NOTE: for the untyped-attachment path we deliberately
# do NOT inject a path string here. ``gateway/run.py``
# already detects DOCUMENT-typed events with
# ``application/octet-stream`` MIME and emits a context
# note with the sandbox-translated cache path via
# ``to_agent_visible_cache_path()`` (important for
# Docker/Modal terminal backends).
except Exception as e:
logger.warning(
"[Discord] Failed to cache document %s: %s",
att.filename, e, exc_info=True,
)
# Use normalized_content (saved before auto-threading) instead of message.content,
# to detect /slash commands in channel messages.

View file

@ -46,6 +46,7 @@ from gateway.platforms.base import (
SendResult,
SUPPORTED_DOCUMENT_TYPES,
SUPPORTED_VIDEO_TYPES,
_TEXT_INJECT_EXTENSIONS,
is_host_excluded_by_no_proxy,
resolve_proxy_url,
safe_url_for_log,
@ -2698,8 +2699,12 @@ class SlackAdapter(BasePlatformAdapter):
}
ext = mime_to_ext.get(mimetype, "")
if ext not in SUPPORTED_DOCUMENT_TYPES:
continue # Skip unsupported file types silently
# Any file type is accepted — authorization to message the
# agent is the gate, not the file extension. Known types keep
# their precise MIME; unknown types fall back to the source
# mimetype or octet-stream so the agent reaches for terminal
# tools.
in_allowlist = ext in SUPPORTED_DOCUMENT_TYPES
# Check file size (Slack limit: 20 MB for bots)
file_size = f.get("size", 0)
@ -2715,36 +2720,28 @@ class SlackAdapter(BasePlatformAdapter):
url, team_id=team_id
)
cached_path = cache_document_from_bytes(
raw_bytes, original_filename or f"document{ext}"
raw_bytes, original_filename or f"document{ext or '.bin'}"
)
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
if in_allowlist:
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
else:
doc_mime = mimetype or "application/octet-stream"
media_urls.append(cached_path)
media_types.append(doc_mime)
logger.debug("[Slack] Cached user document: %s", cached_path)
logger.debug("[Slack] Cached user document: %s (%s)", cached_path, doc_mime)
# Inject small text-ish files directly into the prompt so
# snippets like JSON/YAML/configs are actually visible to the agent.
# snippets like JSON/YAML/configs are actually visible to the
# agent. Gate on a text-like extension/MIME — NOT a blind
# UTF-8 decode, since binary formats (PDF/zip/docx) can have
# decodable ASCII headers. Binary files are surfaced as a
# cached path only (run.py emits a path-pointing note).
MAX_TEXT_INJECT_BYTES = 100 * 1024
TEXT_INJECT_EXTENSIONS = {
".md",
".txt",
".csv",
".log",
".json",
".xml",
".yaml",
".yml",
".toml",
".ini",
".cfg",
}
if (
ext in TEXT_INJECT_EXTENSIONS
and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES
):
_is_text = ext in _TEXT_INJECT_EXTENSIONS or (mimetype or "").startswith("text/")
if _is_text and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
try:
text_content = raw_bytes.decode("utf-8")
display_name = original_filename or f"document{ext}"
display_name = original_filename or f"document{ext or '.txt'}"
display_name = re.sub(r"[^\w.\- ]", "_", display_name)
injection = f"[Content of {display_name}]:\n{text_content}"
if text:

View file

@ -81,6 +81,7 @@ from gateway.platforms.base import (
SUPPORTED_VIDEO_TYPES,
SUPPORTED_DOCUMENT_TYPES,
SUPPORTED_IMAGE_DOCUMENT_TYPES,
_TEXT_INJECT_EXTENSIONS,
utf16_len,
)
from plugins.platforms.telegram.telegram_network import (
@ -6526,33 +6527,30 @@ class TelegramAdapter(BasePlatformAdapter):
# ext-in-SUPPORTED_IMAGE_DOCUMENT_TYPES branch would be dead
# code — the extension sets are identical.
# Check if supported
if ext not in SUPPORTED_DOCUMENT_TYPES:
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
event.text = (
f"Unsupported document type '{ext or 'unknown'}'. "
f"Supported types: {supported_list}"
)
logger.info("[Telegram] Unsupported document type: %s", ext or "unknown")
await self.handle_message(event)
return
# Download and cache
# Download and cache. Any file type is accepted — authorization
# to message the agent is the gate, not the file extension.
# Known types keep their precise MIME; unknown types are tagged
# application/octet-stream so the agent reaches for terminal tools.
file_obj = await doc.get_file()
doc_bytes = await file_obj.download_as_bytearray()
raw_bytes = bytes(doc_bytes)
cached_path = cache_document_from_bytes(raw_bytes, original_filename or f"document{ext}")
mime_type = SUPPORTED_DOCUMENT_TYPES[ext]
cached_path = cache_document_from_bytes(raw_bytes, original_filename or f"document{ext or '.bin'}")
mime_type = SUPPORTED_DOCUMENT_TYPES.get(ext) or doc.mime_type or "application/octet-stream"
event.media_urls = [cached_path]
event.media_types = [mime_type]
logger.info("[Telegram] Cached user document at %s", cached_path)
logger.info("[Telegram] Cached user document at %s (%s)", cached_path, mime_type)
# For text files, inject content into event.text (capped at 100 KB)
# For text-readable files, inject content into event.text (capped
# at 100 KB). Gate on a text-like extension/MIME — NOT a blind
# UTF-8 decode, since binary formats (PDF/zip/docx) can have
# decodable ASCII headers. Binary files are surfaced as a cached
# path only (run.py emits a path-pointing context note).
MAX_TEXT_INJECT_BYTES = 100 * 1024
if ext in {".md", ".txt"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
_is_text = ext in _TEXT_INJECT_EXTENSIONS or (doc_mime or "").startswith("text/")
if _is_text and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
try:
text_content = raw_bytes.decode("utf-8")
display_name = original_filename or f"document{ext}"
display_name = original_filename or f"document{ext or '.txt'}"
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
injection = f"[Content of {display_name}]:\n{text_content}"
if event.text:
@ -6560,10 +6558,9 @@ class TelegramAdapter(BasePlatformAdapter):
else:
event.text = injection
except UnicodeDecodeError:
logger.warning(
"[Telegram] Could not decode text file as UTF-8, skipping content injection",
exc_info=True,
)
# Binary file — agent has the cached path and can use
# terminal/read_file against it. No inline injection.
pass
except Exception as e:
logger.warning("[Telegram] Failed to cache document: %s", e, exc_info=True)

View file

@ -387,37 +387,18 @@ class TestIncomingDocumentHandling:
class TestAllowAnyAttachment:
"""Cover the discord.allow_any_attachment config flag.
"""Cover accept-any-file-type inbound handling.
With the flag off (default), unknown file types are dropped. With it on,
they get cached and surfaced to the agent as DOCUMENT events with
application/octet-stream MIME so gateway/run.py emits a path-pointing
context note.
Authorization to message the agent is the gate, not the file extension.
Unknown file types are cached and surfaced to the agent as DOCUMENT events
with the source content_type (or application/octet-stream) so gateway/run.py
emits a path-pointing context note. The legacy ``allow_any_attachment``
config flag is now a no-op acceptance is unconditional.
"""
@pytest.mark.asyncio
async def test_unknown_type_skipped_by_default(self, adapter):
"""Default (flag off): unknown extension is dropped.
With no text + no cached media, the adapter may legitimately decline
to dispatch the event at all, so we don't assert on call_args here —
we just verify the file wasn't cached.
"""
with _mock_aiohttp_download(b"should not be cached"):
msg = make_message([
make_attachment(filename="weird.xyz", content_type="application/x-custom")
])
await adapter._handle_message(msg)
if adapter.handle_message.call_args is not None:
event = adapter.handle_message.call_args[0][0]
assert event.media_urls == []
@pytest.mark.asyncio
async def test_unknown_type_cached_when_flag_on(self, adapter):
"""Flag on: unknown extension is cached as application/octet-stream."""
adapter.config.extra["allow_any_attachment"] = True
async def test_unknown_type_cached_by_default(self, adapter):
"""Default: unknown extension is cached, not dropped."""
with _mock_aiohttp_download(b"\x00\x01\x02 binary payload"):
msg = make_message([
make_attachment(filename="weird.xyz", content_type="application/x-custom")
@ -430,16 +411,29 @@ class TestAllowAnyAttachment:
# Falls back to the source content_type when we have one.
assert event.media_types == ["application/x-custom"]
assert event.message_type == MessageType.DOCUMENT
# We deliberately do NOT inline arbitrary bytes — run.py emits the
# path-pointing note based on DOCUMENT + octet-stream MIME.
# We deliberately do NOT inline arbitrary (non-UTF-8) bytes — run.py
# emits the path-pointing note based on DOCUMENT + octet-stream MIME.
assert "[Content of" not in (event.text or "")
@pytest.mark.asyncio
async def test_unknown_type_no_content_type_becomes_octet_stream(self, adapter):
"""Flag on + no content_type from discord: MIME falls back to octet-stream."""
adapter.config.extra["allow_any_attachment"] = True
async def test_html_cached_and_inlined(self, adapter):
"""An .html upload is cached and (being UTF-8 text) inlined."""
html = b"<html><body>hi</body></html>"
with _mock_aiohttp_download(html):
msg = make_message([
make_attachment(filename="page.html", content_type="text/html")
])
await adapter._handle_message(msg)
with _mock_aiohttp_download(b"raw bytes"):
event = adapter.handle_message.call_args[0][0]
assert len(event.media_urls) == 1
assert event.message_type == MessageType.DOCUMENT
assert event.media_types == ["text/html"]
@pytest.mark.asyncio
async def test_unknown_type_no_content_type_becomes_octet_stream(self, adapter):
"""No content_type from discord: MIME falls back to octet-stream."""
with _mock_aiohttp_download(b"\x00raw bytes\x01"):
msg = make_message([
make_attachment(filename="mystery.bin", content_type=None)
])
@ -452,7 +446,6 @@ class TestAllowAnyAttachment:
@pytest.mark.asyncio
async def test_max_attachment_bytes_caps_uploads(self, adapter):
"""discord.max_attachment_bytes overrides the historical 32 MiB cap."""
adapter.config.extra["allow_any_attachment"] = True
adapter.config.extra["max_attachment_bytes"] = 1024 # 1 KiB
msg = make_message([
@ -470,7 +463,6 @@ class TestAllowAnyAttachment:
@pytest.mark.asyncio
async def test_max_attachment_bytes_zero_means_unlimited(self, adapter):
"""max_attachment_bytes=0 disables the size cap entirely."""
adapter.config.extra["allow_any_attachment"] = True
adapter.config.extra["max_attachment_bytes"] = 0
# 64 MiB — would normally exceed the historical 32 MiB hardcoded cap.
@ -488,14 +480,12 @@ class TestAllowAnyAttachment:
assert len(event.media_urls) == 1
@pytest.mark.asyncio
async def test_allowlisted_doc_unchanged_when_flag_on(self, adapter):
"""Flag on must not change handling of types already in SUPPORTED_DOCUMENT_TYPES.
async def test_allowlisted_doc_unchanged(self, adapter):
"""Types already in SUPPORTED_DOCUMENT_TYPES keep canonical handling.
A .txt should still get its content inlined (the historical behavior),
and the MIME should still be the canonical text/plain not whatever
discord guessed.
A .txt should still get its content inlined, and the MIME should still
be the canonical text/plain not whatever discord guessed.
"""
adapter.config.extra["allow_any_attachment"] = True
file_content = b"still a text file"
with _mock_aiohttp_download(file_content):
@ -510,14 +500,6 @@ class TestAllowAnyAttachment:
assert "still a text file" in event.text
assert event.media_types == ["text/plain"]
def test_helper_reads_env_fallback(self, adapter, monkeypatch):
"""Helper falls back to DISCORD_ALLOW_ANY_ATTACHMENT env var."""
assert adapter._discord_allow_any_attachment() is False
monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true")
assert adapter._discord_allow_any_attachment() is True
monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "no")
assert adapter._discord_allow_any_attachment() is False
def test_helper_config_overrides_env(self, adapter, monkeypatch):
"""config.yaml setting wins over env var."""
monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true")

View file

@ -218,10 +218,25 @@ class TestCacheMediaBytes:
assert result.kind == "document"
assert result.media_type == "text/csv"
def test_unsupported_document_returns_none(self):
def test_unknown_document_cached_as_octet_stream(self):
"""Unknown file types are cached (not dropped) so the agent can inspect them.
Authorization to message the agent is the gate, not the file extension.
"""
from gateway.platforms.base import cache_media_bytes
result = cache_media_bytes(b"MZ", filename="malware.exe", mime_type="application/x-msdownload")
assert result is None
result = cache_media_bytes(b"MZ", filename="program.exe", mime_type="application/x-msdownload")
assert result is not None
assert result.kind == "document"
# Caller-supplied MIME is preserved when present.
assert result.media_type == "application/x-msdownload"
assert os.path.exists(result.path)
def test_unknown_document_no_mime_falls_back_to_octet_stream(self):
from gateway.platforms.base import cache_media_bytes
result = cache_media_bytes(b"\x00\x01\x02", filename="mystery.qux", mime_type="")
assert result is not None
assert result.kind == "document"
assert result.media_type == "application/octet-stream"
def test_invalid_image_returns_none(self):
from gateway.platforms.base import cache_media_bytes

View file

@ -336,14 +336,25 @@ class TestDocumentDownloadBlock:
assert event.media_types == ["application/pdf"]
@pytest.mark.asyncio
async def test_missing_filename_and_mime_rejected(self, adapter):
doc = _make_document(file_name=None, mime_type=None, file_size=100)
async def test_missing_filename_and_mime_cached_as_octet_stream(self, adapter):
"""No filename and no mime: cached anyway as application/octet-stream.
Authorization to message the agent is the gate, not the file type an
untyped upload is still surfaced to the agent as a cached path.
"""
content = b"\x00\x01\x02 untyped payload"
file_obj = _make_file_obj(content)
doc = _make_document(
file_name=None, mime_type=None, file_size=len(content), file_obj=file_obj,
)
msg = _make_message(document=doc)
update = _make_update(msg)
await adapter._handle_media_message(update, MagicMock())
event = adapter.handle_message.call_args[0][0]
assert "Unsupported" in event.text
assert len(event.media_urls) == 1
assert event.media_types == ["application/octet-stream"]
assert "Unsupported" not in (event.text or "")
@pytest.mark.asyncio
async def test_unicode_decode_error_handled(self, adapter):

View file

@ -617,24 +617,25 @@ Discord's per-upload size limit depends on the server's boost tier (25 MB free,
## Receiving Arbitrary File Types
By default the bot caches uploads that match a built-in allowlist — images, audio, video, PDF, text/markdown/csv/log, JSON/XML/YAML/TOML, zip, docx/xlsx/pptx. Anything else (a `.wav`, a `.bin`, a custom-extension dump) gets logged as `Unsupported document type` and dropped before the agent sees it.
Any file type a user uploads is accepted. Authorization to message the agent is the gate — not the file extension. Every upload is downloaded, cached under `~/.hermes/cache/documents/`, and surfaced to the agent as a `DOCUMENT`-typed message event so it can inspect the file with `terminal` (`ffprobe`, `unzip`, `file`, `strings`, etc.) or `read_file`.
To accept arbitrary file types, enable `discord.allow_any_attachment`:
- Known types (PDF, docx/xlsx/pptx, zip, images/audio/video, etc.) keep their precise MIME.
- Unknown types fall back to the upload's reported content type, or `application/octet-stream` when none is given.
- Small UTF-8-decodable files (text, code, config, HTML, CSS, JSON, YAML, ...) have their contents auto-injected into the prompt up to 100 KiB. Binary files that can't be decoded are surfaced as a path-pointing context note only (auto-translated for Docker/Modal sandboxed terminals via `to_agent_visible_cache_path`), so they don't blow up the context window.
The only inbound limit is the per-file size cap (default 32 MiB):
```yaml
discord:
allow_any_attachment: true
# Optional — raise/disable the per-file size cap. Default is 32 MiB.
# The whole file is held in memory while being cached, so unlimited
# uploads carry a real memory cost.
max_attachment_bytes: 33554432 # bytes; 0 = unlimited
```
When the flag is on, any uploaded file is downloaded, cached under `~/.hermes/cache/documents/`, and surfaced to the agent as a `DOCUMENT`-typed message event with `application/octet-stream` MIME. The agent receives a context note pointing at the local path (auto-translated for Docker/Modal sandboxed terminals via `to_agent_visible_cache_path`) and can inspect the file with `terminal` (`ffprobe`, `unzip`, `file`, `strings`, etc.) or `read_file`. The file body is **not** inlined into the prompt — only the path — so binary uploads don't blow up the context window.
Equivalent env var: `DISCORD_MAX_ATTACHMENT_BYTES=33554432` (or `0` for no cap).
Known-text formats already in the allowlist (`.txt`, `.md`, `.log`) continue to have their contents auto-injected up to 100 KiB; that behavior is unchanged when the flag is on.
Equivalent env vars: `DISCORD_ALLOW_ANY_ATTACHMENT=true` and `DISCORD_MAX_ATTACHMENT_BYTES=33554432` (or `0` for no cap).
The legacy `discord.allow_any_attachment` flag is now a no-op — any file type is always accepted — and is kept only so existing configs don't error.
:::warning Memory cost of unlimited
Disabling the size cap (`max_attachment_bytes: 0`) means a user can drop a multi-GB file on the bot and the gateway will dutifully buffer it through memory while caching to disk. Only set this in trusted single-user installs. For shared bots, keep the default 32 MiB or raise it conservatively.