diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 38bbec4cd66..46339b81471 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1248,6 +1248,33 @@ SUPPORTED_DOCUMENT_TYPES = { } +# --------------------------------------------------------------------------- +# Text-injection extension allowlist +# +# Files whose contents are safe to inline into the prompt (UTF-8 text) when +# small enough. This is intentionally an extension/MIME gate, NOT a blind +# UTF-8 decode: binary formats like PDF/zip/docx can begin with decodable +# ASCII headers and must never be inlined. Any uploaded file is still cached +# and surfaced to the agent regardless of whether it lands in this set — +# this only controls inline-vs-path-pointer for the prompt. +# --------------------------------------------------------------------------- + +_TEXT_INJECT_EXTENSIONS = { + ".txt", ".md", ".markdown", ".csv", ".tsv", ".log", + ".json", ".jsonl", ".ndjson", ".xml", ".yaml", ".yml", ".toml", + ".ini", ".cfg", ".conf", ".env", ".properties", + ".html", ".htm", ".css", ".scss", ".sass", ".less", + ".py", ".pyi", ".js", ".mjs", ".cjs", ".ts", ".tsx", ".jsx", + ".sh", ".bash", ".zsh", ".fish", ".ps1", ".bat", + ".c", ".h", ".cpp", ".cc", ".hpp", ".cs", ".java", ".kt", + ".go", ".rs", ".rb", ".php", ".pl", ".lua", ".r", ".jl", + ".swift", ".m", ".scala", ".clj", ".ex", ".exs", ".erl", + ".sql", ".graphql", ".proto", ".tf", ".hcl", + ".dockerfile", ".makefile", ".cmake", ".gradle", + ".rst", ".tex", ".srt", ".vtt", ".diff", ".patch", +} + + # --------------------------------------------------------------------------- # Image document types # @@ -1454,9 +1481,10 @@ def cache_media_bytes( ``default_kind`` ("image"/"video"/"audio"/"document") biases classification when the extension/MIME are ambiguous — e.g. a Telegram native photo whose - file has no usable name. Unsupported document types return None so the - caller can record an "unsupported" note. Images that fail validation - (``cache_image_from_bytes`` raises ValueError) also return None. + file has no usable name. Any non-image/video/audio file is cached as a + document and surfaced to the agent (arbitrary types get + ``application/octet-stream``); only images that fail validation + (``cache_image_from_bytes`` raises ValueError) return None. """ from tools.credential_files import to_agent_visible_cache_path @@ -1492,11 +1520,20 @@ def cache_media_bytes( out_mime = mime if mime.startswith("audio/") else f"audio/{aud_ext.lstrip('.')}" return CachedMedia(to_agent_visible_cache_path(path), out_mime, "audio", display) - if ext not in SUPPORTED_DOCUMENT_TYPES: - return None - - path = cache_document_from_bytes(data, filename or f"document{ext}") - return CachedMedia(to_agent_visible_cache_path(path), SUPPORTED_DOCUMENT_TYPES[ext], "document", display or f"document{ext}") + # Any other file type is cached and surfaced to the agent as a local path + # so it can be inspected with terminal / read_file / etc. Authorization to + # talk to the agent is the gate that matters — once a user is allowed to + # message it, the file-extension allowlist must not silently drop their + # uploads. Known extensions keep their precise MIME; everything else is + # tagged application/octet-stream (or the caller-supplied MIME) so the + # agent knows it's an arbitrary file and reaches for terminal tools. + fallback_name = filename or (f"document{ext}" if ext else "document.bin") + path = cache_document_from_bytes(data, fallback_name) + if ext in SUPPORTED_DOCUMENT_TYPES: + out_mime = SUPPORTED_DOCUMENT_TYPES[ext] + else: + out_mime = mime if mime else "application/octet-stream" + return CachedMedia(to_agent_visible_cache_path(path), out_mime, "document", display or fallback_name) class MessageType(Enum): diff --git a/gateway/run.py b/gateway/run.py index 3b35d3e3638..5b7c63a42f9 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -8688,8 +8688,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew guessed, _ = _mimetypes.guess_type(path) if guessed: mtype = guessed - if not mtype.startswith(("application/", "text/")): - continue + else: + mtype = "application/octet-stream" + # Any accepted file gets a path-pointing context note — we accept + # all file types now, so a non-text/non-application MIME (font/*, + # model/*, etc.) must still tell the agent the file exists. basename = os.path.basename(path) parts = basename.split("_", 2) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index f51d3ee2fe3..49f516da15d 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -2118,12 +2118,11 @@ DEFAULT_CONFIG = { # list_roles, member_info, search_members, fetch_messages, list_pins, # pin_message, unpin_message, create_thread, add_role, remove_role. "server_actions": "", - # Accept arbitrary attachment file types (not just SUPPORTED_DOCUMENT_TYPES). - # When True, any uploaded file is cached to disk with mime - # application/octet-stream and the path is surfaced to the agent so it - # can use terminal/read_file/etc. against it. Default False preserves - # the historical allowlist behaviour. - # Env override: DISCORD_ALLOW_ANY_ATTACHMENT. + # DEPRECATED / no-op. Any uploaded file is now always cached and + # surfaced to the agent regardless of file type — authorization to + # message the agent is the gate, not the extension. Kept so existing + # configs that set it do not error. Env override: + # DISCORD_ALLOW_ANY_ATTACHMENT. "allow_any_attachment": False, # Maximum bytes per attachment the gateway will cache. The whole file # is held in memory while being written, so unlimited uploads carry a diff --git a/plugins/platforms/discord/adapter.py b/plugins/platforms/discord/adapter.py index 1fc6692eac5..dc62aabf763 100644 --- a/plugins/platforms/discord/adapter.py +++ b/plugins/platforms/discord/adapter.py @@ -116,6 +116,7 @@ from gateway.platforms.base import ( cache_audio_from_bytes, cache_document_from_bytes, SUPPORTED_DOCUMENT_TYPES, + _TEXT_INJECT_EXTENSIONS, validate_inbound_media_size, ) from tools.url_safety import is_safe_url @@ -5288,8 +5289,9 @@ class DiscordAdapter(BasePlatformAdapter): if normalized_content.startswith("/"): msg_type = MessageType.COMMAND elif all_attachments: - _allow_any = self._discord_allow_any_attachment() - # Check attachment types + # Check attachment types. Any non-media attachment is treated as a + # DOCUMENT regardless of extension — authorization to message the + # agent is the gate, not the file type. for att in all_attachments: if att.content_type: if att.content_type.startswith("image/"): @@ -5302,14 +5304,9 @@ class DiscordAdapter(BasePlatformAdapter): else: msg_type = MessageType.AUDIO else: - doc_ext = "" - if att.filename: - _, doc_ext = os.path.splitext(att.filename) - doc_ext = doc_ext.lower() - if doc_ext in SUPPORTED_DOCUMENT_TYPES or _allow_any: - msg_type = MessageType.DOCUMENT + msg_type = MessageType.DOCUMENT break - elif _allow_any: + else: # No content_type at all (rare — discord usually fills it # in). Treat as a document so downstream pipelines surface # the path to the agent. @@ -5398,71 +5395,79 @@ class DiscordAdapter(BasePlatformAdapter): if not ext and content_type: mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()} ext = mime_to_ext.get(content_type, "") - allow_any_attachment = self._discord_allow_any_attachment() in_allowlist = ext in SUPPORTED_DOCUMENT_TYPES - if not in_allowlist and not allow_any_attachment: + # Any file type is accepted — authorization to message the agent + # is the gate, not the file extension. Known types keep their + # precise MIME; unknown types fall back to the source content_type + # or octet-stream so the agent reaches for terminal tools. + max_doc_bytes = self._discord_max_attachment_bytes() + if max_doc_bytes and att.size and att.size > max_doc_bytes: logger.warning( - "[Discord] Unsupported document type '%s' (%s), skipping", - ext or "unknown", content_type, + "[Discord] Document too large (%s bytes > cap %s), skipping: %s", + att.size, max_doc_bytes, att.filename, ) else: - max_doc_bytes = self._discord_max_attachment_bytes() - if max_doc_bytes and att.size and att.size > max_doc_bytes: - logger.warning( - "[Discord] Document too large (%s bytes > cap %s), skipping: %s", - att.size, max_doc_bytes, att.filename, + try: + raw_bytes = await self._cache_discord_document(att, ext) + cached_path = cache_document_from_bytes( + raw_bytes, att.filename or f"document{ext or '.bin'}" ) - else: - try: - raw_bytes = await self._cache_discord_document(att, ext) - cached_path = cache_document_from_bytes( - raw_bytes, att.filename or f"document{ext or '.bin'}" - ) - if in_allowlist: - doc_mime = SUPPORTED_DOCUMENT_TYPES[ext] - else: - # allow_any_attachment path: untyped file. Use the - # source content_type if discord gave us one, - # otherwise fall back to octet-stream so the agent - # knows it's binary and reaches for terminal tools. - doc_mime = ( - content_type - if content_type and content_type != "unknown" - else "application/octet-stream" - ) - media_urls.append(cached_path) - media_types.append(doc_mime) - logger.info( - "[Discord] Cached user %s: %s", - "document" if in_allowlist else "attachment", - cached_path, - ) - # Inject text content for plain-text documents (capped at 100 KB) - MAX_TEXT_INJECT_BYTES = 100 * 1024 - if in_allowlist and ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES: - try: - text_content = raw_bytes.decode("utf-8") - display_name = att.filename or f"document{ext}" - display_name = re.sub(r'[^\w.\- ]', '_', display_name) - injection = f"[Content of {display_name}]:\n{text_content}" - if pending_text_injection: - pending_text_injection = f"{pending_text_injection}\n\n{injection}" - else: - pending_text_injection = injection - except UnicodeDecodeError: - pass - # NOTE: for the allow_any_attachment path we deliberately - # do NOT inject a path string here. ``gateway/run.py`` - # already detects DOCUMENT-typed events with - # ``application/octet-stream`` MIME and emits a context - # note with the sandbox-translated cache path via - # ``to_agent_visible_cache_path()`` (important for - # Docker/Modal terminal backends). - except Exception as e: - logger.warning( - "[Discord] Failed to cache document %s: %s", - att.filename, e, exc_info=True, + if in_allowlist: + doc_mime = SUPPORTED_DOCUMENT_TYPES[ext] + else: + # Untyped file. Use the source content_type if + # discord gave us one, otherwise fall back to + # octet-stream so the agent knows it's binary and + # reaches for terminal tools. + doc_mime = ( + content_type + if content_type and content_type != "unknown" + else "application/octet-stream" ) + media_urls.append(cached_path) + media_types.append(doc_mime) + logger.info( + "[Discord] Cached user %s: %s", + "document" if in_allowlist else "attachment", + cached_path, + ) + # Inject text content for any text-readable document + # Inject text content for text-readable documents + # (capped at 100 KB). Gate on a text-like extension/MIME + # — NOT a blind UTF-8 decode, since binary formats like + # PDF/zip/docx can have decodable ASCII headers. Unknown + # but clearly-textual types (text/* MIME or a known text + # extension) are inlined too; everything else relies on + # ``gateway/run.py`` to emit a path-pointing context note. + MAX_TEXT_INJECT_BYTES = 100 * 1024 + _is_text = ( + ext in _TEXT_INJECT_EXTENSIONS + or (content_type or "").startswith("text/") + ) + if _is_text and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES: + try: + text_content = raw_bytes.decode("utf-8") + display_name = att.filename or f"document{ext or '.txt'}" + display_name = re.sub(r'[^\w.\- ]', '_', display_name) + injection = f"[Content of {display_name}]:\n{text_content}" + if pending_text_injection: + pending_text_injection = f"{pending_text_injection}\n\n{injection}" + else: + pending_text_injection = injection + except UnicodeDecodeError: + pass + # NOTE: for the untyped-attachment path we deliberately + # do NOT inject a path string here. ``gateway/run.py`` + # already detects DOCUMENT-typed events with + # ``application/octet-stream`` MIME and emits a context + # note with the sandbox-translated cache path via + # ``to_agent_visible_cache_path()`` (important for + # Docker/Modal terminal backends). + except Exception as e: + logger.warning( + "[Discord] Failed to cache document %s: %s", + att.filename, e, exc_info=True, + ) # Use normalized_content (saved before auto-threading) instead of message.content, # to detect /slash commands in channel messages. diff --git a/plugins/platforms/slack/adapter.py b/plugins/platforms/slack/adapter.py index 8bc0ed381e5..1ca68ec1666 100644 --- a/plugins/platforms/slack/adapter.py +++ b/plugins/platforms/slack/adapter.py @@ -46,6 +46,7 @@ from gateway.platforms.base import ( SendResult, SUPPORTED_DOCUMENT_TYPES, SUPPORTED_VIDEO_TYPES, + _TEXT_INJECT_EXTENSIONS, is_host_excluded_by_no_proxy, resolve_proxy_url, safe_url_for_log, @@ -2698,8 +2699,12 @@ class SlackAdapter(BasePlatformAdapter): } ext = mime_to_ext.get(mimetype, "") - if ext not in SUPPORTED_DOCUMENT_TYPES: - continue # Skip unsupported file types silently + # Any file type is accepted — authorization to message the + # agent is the gate, not the file extension. Known types keep + # their precise MIME; unknown types fall back to the source + # mimetype or octet-stream so the agent reaches for terminal + # tools. + in_allowlist = ext in SUPPORTED_DOCUMENT_TYPES # Check file size (Slack limit: 20 MB for bots) file_size = f.get("size", 0) @@ -2715,36 +2720,28 @@ class SlackAdapter(BasePlatformAdapter): url, team_id=team_id ) cached_path = cache_document_from_bytes( - raw_bytes, original_filename or f"document{ext}" + raw_bytes, original_filename or f"document{ext or '.bin'}" ) - doc_mime = SUPPORTED_DOCUMENT_TYPES[ext] + if in_allowlist: + doc_mime = SUPPORTED_DOCUMENT_TYPES[ext] + else: + doc_mime = mimetype or "application/octet-stream" media_urls.append(cached_path) media_types.append(doc_mime) - logger.debug("[Slack] Cached user document: %s", cached_path) + logger.debug("[Slack] Cached user document: %s (%s)", cached_path, doc_mime) # Inject small text-ish files directly into the prompt so - # snippets like JSON/YAML/configs are actually visible to the agent. + # snippets like JSON/YAML/configs are actually visible to the + # agent. Gate on a text-like extension/MIME — NOT a blind + # UTF-8 decode, since binary formats (PDF/zip/docx) can have + # decodable ASCII headers. Binary files are surfaced as a + # cached path only (run.py emits a path-pointing note). MAX_TEXT_INJECT_BYTES = 100 * 1024 - TEXT_INJECT_EXTENSIONS = { - ".md", - ".txt", - ".csv", - ".log", - ".json", - ".xml", - ".yaml", - ".yml", - ".toml", - ".ini", - ".cfg", - } - if ( - ext in TEXT_INJECT_EXTENSIONS - and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES - ): + _is_text = ext in _TEXT_INJECT_EXTENSIONS or (mimetype or "").startswith("text/") + if _is_text and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES: try: text_content = raw_bytes.decode("utf-8") - display_name = original_filename or f"document{ext}" + display_name = original_filename or f"document{ext or '.txt'}" display_name = re.sub(r"[^\w.\- ]", "_", display_name) injection = f"[Content of {display_name}]:\n{text_content}" if text: diff --git a/plugins/platforms/telegram/adapter.py b/plugins/platforms/telegram/adapter.py index 91cc4c14903..390acb61047 100644 --- a/plugins/platforms/telegram/adapter.py +++ b/plugins/platforms/telegram/adapter.py @@ -81,6 +81,7 @@ from gateway.platforms.base import ( SUPPORTED_VIDEO_TYPES, SUPPORTED_DOCUMENT_TYPES, SUPPORTED_IMAGE_DOCUMENT_TYPES, + _TEXT_INJECT_EXTENSIONS, utf16_len, ) from plugins.platforms.telegram.telegram_network import ( @@ -6526,33 +6527,30 @@ class TelegramAdapter(BasePlatformAdapter): # ext-in-SUPPORTED_IMAGE_DOCUMENT_TYPES branch would be dead # code — the extension sets are identical. - # Check if supported - if ext not in SUPPORTED_DOCUMENT_TYPES: - supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys())) - event.text = ( - f"Unsupported document type '{ext or 'unknown'}'. " - f"Supported types: {supported_list}" - ) - logger.info("[Telegram] Unsupported document type: %s", ext or "unknown") - await self.handle_message(event) - return - - # Download and cache + # Download and cache. Any file type is accepted — authorization + # to message the agent is the gate, not the file extension. + # Known types keep their precise MIME; unknown types are tagged + # application/octet-stream so the agent reaches for terminal tools. file_obj = await doc.get_file() doc_bytes = await file_obj.download_as_bytearray() raw_bytes = bytes(doc_bytes) - cached_path = cache_document_from_bytes(raw_bytes, original_filename or f"document{ext}") - mime_type = SUPPORTED_DOCUMENT_TYPES[ext] + cached_path = cache_document_from_bytes(raw_bytes, original_filename or f"document{ext or '.bin'}") + mime_type = SUPPORTED_DOCUMENT_TYPES.get(ext) or doc.mime_type or "application/octet-stream" event.media_urls = [cached_path] event.media_types = [mime_type] - logger.info("[Telegram] Cached user document at %s", cached_path) + logger.info("[Telegram] Cached user document at %s (%s)", cached_path, mime_type) - # For text files, inject content into event.text (capped at 100 KB) + # For text-readable files, inject content into event.text (capped + # at 100 KB). Gate on a text-like extension/MIME — NOT a blind + # UTF-8 decode, since binary formats (PDF/zip/docx) can have + # decodable ASCII headers. Binary files are surfaced as a cached + # path only (run.py emits a path-pointing context note). MAX_TEXT_INJECT_BYTES = 100 * 1024 - if ext in {".md", ".txt"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES: + _is_text = ext in _TEXT_INJECT_EXTENSIONS or (doc_mime or "").startswith("text/") + if _is_text and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES: try: text_content = raw_bytes.decode("utf-8") - display_name = original_filename or f"document{ext}" + display_name = original_filename or f"document{ext or '.txt'}" display_name = re.sub(r'[^\w.\- ]', '_', display_name) injection = f"[Content of {display_name}]:\n{text_content}" if event.text: @@ -6560,10 +6558,9 @@ class TelegramAdapter(BasePlatformAdapter): else: event.text = injection except UnicodeDecodeError: - logger.warning( - "[Telegram] Could not decode text file as UTF-8, skipping content injection", - exc_info=True, - ) + # Binary file — agent has the cached path and can use + # terminal/read_file against it. No inline injection. + pass except Exception as e: logger.warning("[Telegram] Failed to cache document: %s", e, exc_info=True) diff --git a/tests/gateway/test_discord_document_handling.py b/tests/gateway/test_discord_document_handling.py index 7b75c4a07f6..c9f8f53c283 100644 --- a/tests/gateway/test_discord_document_handling.py +++ b/tests/gateway/test_discord_document_handling.py @@ -387,37 +387,18 @@ class TestIncomingDocumentHandling: class TestAllowAnyAttachment: - """Cover the discord.allow_any_attachment config flag. + """Cover accept-any-file-type inbound handling. - With the flag off (default), unknown file types are dropped. With it on, - they get cached and surfaced to the agent as DOCUMENT events with - application/octet-stream MIME so gateway/run.py emits a path-pointing - context note. + Authorization to message the agent is the gate, not the file extension. + Unknown file types are cached and surfaced to the agent as DOCUMENT events + with the source content_type (or application/octet-stream) so gateway/run.py + emits a path-pointing context note. The legacy ``allow_any_attachment`` + config flag is now a no-op — acceptance is unconditional. """ @pytest.mark.asyncio - async def test_unknown_type_skipped_by_default(self, adapter): - """Default (flag off): unknown extension is dropped. - - With no text + no cached media, the adapter may legitimately decline - to dispatch the event at all, so we don't assert on call_args here — - we just verify the file wasn't cached. - """ - with _mock_aiohttp_download(b"should not be cached"): - msg = make_message([ - make_attachment(filename="weird.xyz", content_type="application/x-custom") - ]) - await adapter._handle_message(msg) - - if adapter.handle_message.call_args is not None: - event = adapter.handle_message.call_args[0][0] - assert event.media_urls == [] - - @pytest.mark.asyncio - async def test_unknown_type_cached_when_flag_on(self, adapter): - """Flag on: unknown extension is cached as application/octet-stream.""" - adapter.config.extra["allow_any_attachment"] = True - + async def test_unknown_type_cached_by_default(self, adapter): + """Default: unknown extension is cached, not dropped.""" with _mock_aiohttp_download(b"\x00\x01\x02 binary payload"): msg = make_message([ make_attachment(filename="weird.xyz", content_type="application/x-custom") @@ -430,16 +411,29 @@ class TestAllowAnyAttachment: # Falls back to the source content_type when we have one. assert event.media_types == ["application/x-custom"] assert event.message_type == MessageType.DOCUMENT - # We deliberately do NOT inline arbitrary bytes — run.py emits the - # path-pointing note based on DOCUMENT + octet-stream MIME. + # We deliberately do NOT inline arbitrary (non-UTF-8) bytes — run.py + # emits the path-pointing note based on DOCUMENT + octet-stream MIME. assert "[Content of" not in (event.text or "") @pytest.mark.asyncio - async def test_unknown_type_no_content_type_becomes_octet_stream(self, adapter): - """Flag on + no content_type from discord: MIME falls back to octet-stream.""" - adapter.config.extra["allow_any_attachment"] = True + async def test_html_cached_and_inlined(self, adapter): + """An .html upload is cached and (being UTF-8 text) inlined.""" + html = b"hi" + with _mock_aiohttp_download(html): + msg = make_message([ + make_attachment(filename="page.html", content_type="text/html") + ]) + await adapter._handle_message(msg) - with _mock_aiohttp_download(b"raw bytes"): + event = adapter.handle_message.call_args[0][0] + assert len(event.media_urls) == 1 + assert event.message_type == MessageType.DOCUMENT + assert event.media_types == ["text/html"] + + @pytest.mark.asyncio + async def test_unknown_type_no_content_type_becomes_octet_stream(self, adapter): + """No content_type from discord: MIME falls back to octet-stream.""" + with _mock_aiohttp_download(b"\x00raw bytes\x01"): msg = make_message([ make_attachment(filename="mystery.bin", content_type=None) ]) @@ -452,7 +446,6 @@ class TestAllowAnyAttachment: @pytest.mark.asyncio async def test_max_attachment_bytes_caps_uploads(self, adapter): """discord.max_attachment_bytes overrides the historical 32 MiB cap.""" - adapter.config.extra["allow_any_attachment"] = True adapter.config.extra["max_attachment_bytes"] = 1024 # 1 KiB msg = make_message([ @@ -470,7 +463,6 @@ class TestAllowAnyAttachment: @pytest.mark.asyncio async def test_max_attachment_bytes_zero_means_unlimited(self, adapter): """max_attachment_bytes=0 disables the size cap entirely.""" - adapter.config.extra["allow_any_attachment"] = True adapter.config.extra["max_attachment_bytes"] = 0 # 64 MiB — would normally exceed the historical 32 MiB hardcoded cap. @@ -488,14 +480,12 @@ class TestAllowAnyAttachment: assert len(event.media_urls) == 1 @pytest.mark.asyncio - async def test_allowlisted_doc_unchanged_when_flag_on(self, adapter): - """Flag on must not change handling of types already in SUPPORTED_DOCUMENT_TYPES. + async def test_allowlisted_doc_unchanged(self, adapter): + """Types already in SUPPORTED_DOCUMENT_TYPES keep canonical handling. - A .txt should still get its content inlined (the historical behavior), - and the MIME should still be the canonical text/plain — not whatever - discord guessed. + A .txt should still get its content inlined, and the MIME should still + be the canonical text/plain — not whatever discord guessed. """ - adapter.config.extra["allow_any_attachment"] = True file_content = b"still a text file" with _mock_aiohttp_download(file_content): @@ -510,14 +500,6 @@ class TestAllowAnyAttachment: assert "still a text file" in event.text assert event.media_types == ["text/plain"] - def test_helper_reads_env_fallback(self, adapter, monkeypatch): - """Helper falls back to DISCORD_ALLOW_ANY_ATTACHMENT env var.""" - assert adapter._discord_allow_any_attachment() is False - monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true") - assert adapter._discord_allow_any_attachment() is True - monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "no") - assert adapter._discord_allow_any_attachment() is False - def test_helper_config_overrides_env(self, adapter, monkeypatch): """config.yaml setting wins over env var.""" monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true") diff --git a/tests/gateway/test_document_cache.py b/tests/gateway/test_document_cache.py index d3c01e59eb0..38cf510e28d 100644 --- a/tests/gateway/test_document_cache.py +++ b/tests/gateway/test_document_cache.py @@ -218,10 +218,25 @@ class TestCacheMediaBytes: assert result.kind == "document" assert result.media_type == "text/csv" - def test_unsupported_document_returns_none(self): + def test_unknown_document_cached_as_octet_stream(self): + """Unknown file types are cached (not dropped) so the agent can inspect them. + + Authorization to message the agent is the gate, not the file extension. + """ from gateway.platforms.base import cache_media_bytes - result = cache_media_bytes(b"MZ", filename="malware.exe", mime_type="application/x-msdownload") - assert result is None + result = cache_media_bytes(b"MZ", filename="program.exe", mime_type="application/x-msdownload") + assert result is not None + assert result.kind == "document" + # Caller-supplied MIME is preserved when present. + assert result.media_type == "application/x-msdownload" + assert os.path.exists(result.path) + + def test_unknown_document_no_mime_falls_back_to_octet_stream(self): + from gateway.platforms.base import cache_media_bytes + result = cache_media_bytes(b"\x00\x01\x02", filename="mystery.qux", mime_type="") + assert result is not None + assert result.kind == "document" + assert result.media_type == "application/octet-stream" def test_invalid_image_returns_none(self): from gateway.platforms.base import cache_media_bytes diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py index b30f809fe39..a459f183c17 100644 --- a/tests/gateway/test_telegram_documents.py +++ b/tests/gateway/test_telegram_documents.py @@ -336,14 +336,25 @@ class TestDocumentDownloadBlock: assert event.media_types == ["application/pdf"] @pytest.mark.asyncio - async def test_missing_filename_and_mime_rejected(self, adapter): - doc = _make_document(file_name=None, mime_type=None, file_size=100) + async def test_missing_filename_and_mime_cached_as_octet_stream(self, adapter): + """No filename and no mime: cached anyway as application/octet-stream. + + Authorization to message the agent is the gate, not the file type — an + untyped upload is still surfaced to the agent as a cached path. + """ + content = b"\x00\x01\x02 untyped payload" + file_obj = _make_file_obj(content) + doc = _make_document( + file_name=None, mime_type=None, file_size=len(content), file_obj=file_obj, + ) msg = _make_message(document=doc) update = _make_update(msg) await adapter._handle_media_message(update, MagicMock()) event = adapter.handle_message.call_args[0][0] - assert "Unsupported" in event.text + assert len(event.media_urls) == 1 + assert event.media_types == ["application/octet-stream"] + assert "Unsupported" not in (event.text or "") @pytest.mark.asyncio async def test_unicode_decode_error_handled(self, adapter): diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md index 6ffa44db6c5..e54d2aef212 100644 --- a/website/docs/user-guide/messaging/discord.md +++ b/website/docs/user-guide/messaging/discord.md @@ -617,24 +617,25 @@ Discord's per-upload size limit depends on the server's boost tier (25 MB free, ## Receiving Arbitrary File Types -By default the bot caches uploads that match a built-in allowlist — images, audio, video, PDF, text/markdown/csv/log, JSON/XML/YAML/TOML, zip, docx/xlsx/pptx. Anything else (a `.wav`, a `.bin`, a custom-extension dump) gets logged as `Unsupported document type` and dropped before the agent sees it. +Any file type a user uploads is accepted. Authorization to message the agent is the gate — not the file extension. Every upload is downloaded, cached under `~/.hermes/cache/documents/`, and surfaced to the agent as a `DOCUMENT`-typed message event so it can inspect the file with `terminal` (`ffprobe`, `unzip`, `file`, `strings`, etc.) or `read_file`. -To accept arbitrary file types, enable `discord.allow_any_attachment`: +- Known types (PDF, docx/xlsx/pptx, zip, images/audio/video, etc.) keep their precise MIME. +- Unknown types fall back to the upload's reported content type, or `application/octet-stream` when none is given. +- Small UTF-8-decodable files (text, code, config, HTML, CSS, JSON, YAML, ...) have their contents auto-injected into the prompt up to 100 KiB. Binary files that can't be decoded are surfaced as a path-pointing context note only (auto-translated for Docker/Modal sandboxed terminals via `to_agent_visible_cache_path`), so they don't blow up the context window. + +The only inbound limit is the per-file size cap (default 32 MiB): ```yaml discord: - allow_any_attachment: true # Optional — raise/disable the per-file size cap. Default is 32 MiB. # The whole file is held in memory while being cached, so unlimited # uploads carry a real memory cost. max_attachment_bytes: 33554432 # bytes; 0 = unlimited ``` -When the flag is on, any uploaded file is downloaded, cached under `~/.hermes/cache/documents/`, and surfaced to the agent as a `DOCUMENT`-typed message event with `application/octet-stream` MIME. The agent receives a context note pointing at the local path (auto-translated for Docker/Modal sandboxed terminals via `to_agent_visible_cache_path`) and can inspect the file with `terminal` (`ffprobe`, `unzip`, `file`, `strings`, etc.) or `read_file`. The file body is **not** inlined into the prompt — only the path — so binary uploads don't blow up the context window. +Equivalent env var: `DISCORD_MAX_ATTACHMENT_BYTES=33554432` (or `0` for no cap). -Known-text formats already in the allowlist (`.txt`, `.md`, `.log`) continue to have their contents auto-injected up to 100 KiB; that behavior is unchanged when the flag is on. - -Equivalent env vars: `DISCORD_ALLOW_ANY_ATTACHMENT=true` and `DISCORD_MAX_ATTACHMENT_BYTES=33554432` (or `0` for no cap). +The legacy `discord.allow_any_attachment` flag is now a no-op — any file type is always accepted — and is kept only so existing configs don't error. :::warning Memory cost of unlimited Disabling the size cap (`max_attachment_bytes: 0`) means a user can drop a multi-GB file on the bot and the gateway will dutifully buffer it through memory while caching to disk. Only set this in trusted single-user installs. For shared bots, keep the default 32 MiB or raise it conservatively.