diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index a3904630fa9..9b8285e2a36 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -3564,6 +3564,43 @@ class DiscordAdapter(BasePlatformAdapter): return bool(configured) return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in {"false", "0", "no", "off"} + def _discord_allow_any_attachment(self) -> bool: + """Return whether Discord attachments bypass the SUPPORTED_DOCUMENT_TYPES allowlist. + + When True, any uploaded file is cached to disk and surfaced to the + agent as a local path so it can be inspected via terminal / read_file + / ffprobe / etc. Default False preserves the historical behaviour of + dropping unsupported types with a warning log. + """ + configured = self.config.extra.get("allow_any_attachment") + if configured is not None: + if isinstance(configured, str): + return configured.lower() not in {"false", "0", "no", "off", ""} + return bool(configured) + return os.getenv("DISCORD_ALLOW_ANY_ATTACHMENT", "false").lower() in {"true", "1", "yes", "on"} + + def _discord_max_attachment_bytes(self) -> int: + """Return the per-attachment byte cap. 0 means unlimited. + + The whole attachment is held in memory while being written to the + cache, so unlimited carries a real memory cost. Default 32 MiB + matches the historical hardcoded value. + """ + configured = self.config.extra.get("max_attachment_bytes") + if configured is None: + configured = os.getenv("DISCORD_MAX_ATTACHMENT_BYTES") + if configured is None or configured == "": + return 32 * 1024 * 1024 + try: + value = int(configured) + except (TypeError, ValueError): + logger.warning( + "[Discord] Invalid max_attachment_bytes value %r, falling back to 32 MiB", + configured, + ) + return 32 * 1024 * 1024 + return max(0, value) + def _discord_free_response_channels(self) -> set: """Return Discord channel IDs where no bot mention is required. @@ -4495,6 +4532,7 @@ class DiscordAdapter(BasePlatformAdapter): if normalized_content.startswith("/"): msg_type = MessageType.COMMAND elif all_attachments: + _allow_any = self._discord_allow_any_attachment() # Check attachment types for att in all_attachments: if att.content_type: @@ -4509,9 +4547,15 @@ class DiscordAdapter(BasePlatformAdapter): if att.filename: _, doc_ext = os.path.splitext(att.filename) doc_ext = doc_ext.lower() - if doc_ext in SUPPORTED_DOCUMENT_TYPES: + if doc_ext in SUPPORTED_DOCUMENT_TYPES or _allow_any: msg_type = MessageType.DOCUMENT break + elif _allow_any: + # No content_type at all (rare — discord usually fills it + # in). Treat as a document so downstream pipelines surface + # the path to the agent. + msg_type = MessageType.DOCUMENT + break # When auto-threading kicked in, route responses to the new thread effective_channel = auto_threaded_channel or message.channel @@ -4594,31 +4638,48 @@ class DiscordAdapter(BasePlatformAdapter): if not ext and content_type: mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()} ext = mime_to_ext.get(content_type, "") - if ext not in SUPPORTED_DOCUMENT_TYPES: + allow_any_attachment = self._discord_allow_any_attachment() + in_allowlist = ext in SUPPORTED_DOCUMENT_TYPES + if not in_allowlist and not allow_any_attachment: logger.warning( "[Discord] Unsupported document type '%s' (%s), skipping", ext or "unknown", content_type, ) else: - MAX_DOC_BYTES = 32 * 1024 * 1024 - if att.size and att.size > MAX_DOC_BYTES: + max_doc_bytes = self._discord_max_attachment_bytes() + if max_doc_bytes and att.size and att.size > max_doc_bytes: logger.warning( - "[Discord] Document too large (%s bytes), skipping: %s", - att.size, att.filename, + "[Discord] Document too large (%s bytes > cap %s), skipping: %s", + att.size, max_doc_bytes, att.filename, ) else: try: raw_bytes = await self._cache_discord_document(att, ext) cached_path = cache_document_from_bytes( - raw_bytes, att.filename or f"document{ext}" + raw_bytes, att.filename or f"document{ext or '.bin'}" ) - doc_mime = SUPPORTED_DOCUMENT_TYPES[ext] + if in_allowlist: + doc_mime = SUPPORTED_DOCUMENT_TYPES[ext] + else: + # allow_any_attachment path: untyped file. Use the + # source content_type if discord gave us one, + # otherwise fall back to octet-stream so the agent + # knows it's binary and reaches for terminal tools. + doc_mime = ( + content_type + if content_type and content_type != "unknown" + else "application/octet-stream" + ) media_urls.append(cached_path) media_types.append(doc_mime) - logger.info("[Discord] Cached user document: %s", cached_path) + logger.info( + "[Discord] Cached user %s: %s", + "document" if in_allowlist else "attachment", + cached_path, + ) # Inject text content for plain-text documents (capped at 100 KB) MAX_TEXT_INJECT_BYTES = 100 * 1024 - if ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES: + if in_allowlist and ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES: try: text_content = raw_bytes.decode("utf-8") display_name = att.filename or f"document{ext}" @@ -4630,6 +4691,13 @@ class DiscordAdapter(BasePlatformAdapter): pending_text_injection = injection except UnicodeDecodeError: pass + # NOTE: for the allow_any_attachment path we deliberately + # do NOT inject a path string here. ``gateway/run.py`` + # already detects DOCUMENT-typed events with + # ``application/octet-stream`` MIME and emits a context + # note with the sandbox-translated cache path via + # ``to_agent_visible_cache_path()`` (important for + # Docker/Modal terminal backends). except Exception as e: logger.warning( "[Discord] Failed to cache document %s: %s", diff --git a/hermes_cli/config.py b/hermes_cli/config.py index c1f68e1c88c..c41158e42ae 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1306,6 +1306,18 @@ DEFAULT_CONFIG = { # list_roles, member_info, search_members, fetch_messages, list_pins, # pin_message, unpin_message, create_thread, add_role, remove_role. "server_actions": "", + # Accept arbitrary attachment file types (not just SUPPORTED_DOCUMENT_TYPES). + # When True, any uploaded file is cached to disk with mime + # application/octet-stream and the path is surfaced to the agent so it + # can use terminal/read_file/etc. against it. Default False preserves + # the historical allowlist behaviour. + # Env override: DISCORD_ALLOW_ANY_ATTACHMENT. + "allow_any_attachment": False, + # Maximum bytes per attachment the gateway will cache. The whole file + # is held in memory while being written, so unlimited uploads carry a + # real memory cost. Default 32 MiB matches the historical hardcoded + # cap. Set to 0 for no cap. Env override: DISCORD_MAX_ATTACHMENT_BYTES. + "max_attachment_bytes": 33554432, }, # WhatsApp platform settings (gateway mode) diff --git a/tests/gateway/test_discord_document_handling.py b/tests/gateway/test_discord_document_handling.py index d3ad137b61c..0685b69663a 100644 --- a/tests/gateway/test_discord_document_handling.py +++ b/tests/gateway/test_discord_document_handling.py @@ -384,3 +384,148 @@ class TestIncomingDocumentHandling: assert event.message_type == MessageType.PHOTO assert event.media_urls == ["/tmp/cached_image.png"] assert event.media_types == ["image/png"] + + +class TestAllowAnyAttachment: + """Cover the discord.allow_any_attachment config flag. + + With the flag off (default), unknown file types are dropped. With it on, + they get cached and surfaced to the agent as DOCUMENT events with + application/octet-stream MIME so gateway/run.py emits a path-pointing + context note. + """ + + @pytest.mark.asyncio + async def test_unknown_type_skipped_by_default(self, adapter): + """Default (flag off): unknown extension is dropped. + + With no text + no cached media, the adapter may legitimately decline + to dispatch the event at all, so we don't assert on call_args here — + we just verify the file wasn't cached. + """ + with _mock_aiohttp_download(b"should not be cached"): + msg = make_message([ + make_attachment(filename="weird.xyz", content_type="application/x-custom") + ]) + await adapter._handle_message(msg) + + if adapter.handle_message.call_args is not None: + event = adapter.handle_message.call_args[0][0] + assert event.media_urls == [] + + @pytest.mark.asyncio + async def test_unknown_type_cached_when_flag_on(self, adapter): + """Flag on: unknown extension is cached as application/octet-stream.""" + adapter.config.extra["allow_any_attachment"] = True + + with _mock_aiohttp_download(b"\x00\x01\x02 binary payload"): + msg = make_message([ + make_attachment(filename="weird.xyz", content_type="application/x-custom") + ]) + await adapter._handle_message(msg) + + event = adapter.handle_message.call_args[0][0] + assert len(event.media_urls) == 1 + assert os.path.exists(event.media_urls[0]) + # Falls back to the source content_type when we have one. + assert event.media_types == ["application/x-custom"] + assert event.message_type == MessageType.DOCUMENT + # We deliberately do NOT inline arbitrary bytes — run.py emits the + # path-pointing note based on DOCUMENT + octet-stream MIME. + assert "[Content of" not in (event.text or "") + + @pytest.mark.asyncio + async def test_unknown_type_no_content_type_becomes_octet_stream(self, adapter): + """Flag on + no content_type from discord: MIME falls back to octet-stream.""" + adapter.config.extra["allow_any_attachment"] = True + + with _mock_aiohttp_download(b"raw bytes"): + msg = make_message([ + make_attachment(filename="mystery.bin", content_type=None) + ]) + await adapter._handle_message(msg) + + event = adapter.handle_message.call_args[0][0] + assert event.message_type == MessageType.DOCUMENT + assert event.media_types == ["application/octet-stream"] + + @pytest.mark.asyncio + async def test_max_attachment_bytes_caps_uploads(self, adapter): + """discord.max_attachment_bytes overrides the historical 32 MiB cap.""" + adapter.config.extra["allow_any_attachment"] = True + adapter.config.extra["max_attachment_bytes"] = 1024 # 1 KiB + + msg = make_message([ + make_attachment( + filename="too_big.xyz", + content_type="application/x-custom", + size=2048, + ) + ]) + await adapter._handle_message(msg) + + event = adapter.handle_message.call_args[0][0] + assert event.media_urls == [] + + @pytest.mark.asyncio + async def test_max_attachment_bytes_zero_means_unlimited(self, adapter): + """max_attachment_bytes=0 disables the size cap entirely.""" + adapter.config.extra["allow_any_attachment"] = True + adapter.config.extra["max_attachment_bytes"] = 0 + + # 64 MiB — would normally exceed the historical 32 MiB hardcoded cap. + with _mock_aiohttp_download(b"x" * 16): + msg = make_message([ + make_attachment( + filename="huge.xyz", + content_type="application/x-custom", + size=64 * 1024 * 1024, + ) + ]) + await adapter._handle_message(msg) + + event = adapter.handle_message.call_args[0][0] + assert len(event.media_urls) == 1 + + @pytest.mark.asyncio + async def test_allowlisted_doc_unchanged_when_flag_on(self, adapter): + """Flag on must not change handling of types already in SUPPORTED_DOCUMENT_TYPES. + + A .txt should still get its content inlined (the historical behavior), + and the MIME should still be the canonical text/plain — not whatever + discord guessed. + """ + adapter.config.extra["allow_any_attachment"] = True + file_content = b"still a text file" + + with _mock_aiohttp_download(file_content): + msg = make_message( + attachments=[make_attachment(filename="notes.txt", content_type="text/plain")], + content="check this", + ) + await adapter._handle_message(msg) + + event = adapter.handle_message.call_args[0][0] + assert "[Content of notes.txt]:" in event.text + assert "still a text file" in event.text + assert event.media_types == ["text/plain"] + + def test_helper_reads_env_fallback(self, adapter, monkeypatch): + """Helper falls back to DISCORD_ALLOW_ANY_ATTACHMENT env var.""" + assert adapter._discord_allow_any_attachment() is False + monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true") + assert adapter._discord_allow_any_attachment() is True + monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "no") + assert adapter._discord_allow_any_attachment() is False + + def test_helper_config_overrides_env(self, adapter, monkeypatch): + """config.yaml setting wins over env var.""" + monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true") + adapter.config.extra["allow_any_attachment"] = False + assert adapter._discord_allow_any_attachment() is False + + def test_max_bytes_helper_invalid_value_falls_back(self, adapter): + """Garbage in max_attachment_bytes config falls back to 32 MiB.""" + adapter.config.extra["max_attachment_bytes"] = "not-a-number" + assert adapter._discord_max_attachment_bytes() == 32 * 1024 * 1024 + diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 4866ac083ac..90aecba4412 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -258,6 +258,8 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI | `DISCORD_REQUIRE_MENTION` | Require an @mention before responding in server channels | | `DISCORD_FREE_RESPONSE_CHANNELS` | Comma-separated channel IDs where mention is not required | | `DISCORD_AUTO_THREAD` | Auto-thread long replies when supported | +| `DISCORD_ALLOW_ANY_ATTACHMENT` | When `true`, accept attachments of any file type (not just the built-in PDF/text/zip/office allowlist). Unknown types are cached and surfaced to the agent as a local path so it can inspect them via `terminal` / `read_file` / `ffprobe`. Default `false`. | +| `DISCORD_MAX_ATTACHMENT_BYTES` | Maximum bytes per attachment the gateway will cache. Default `33554432` (32 MiB). Set to `0` for no cap (attachments are held in memory while being written). | | `DISCORD_REACTIONS` | Enable emoji reactions on messages during processing (default: `true`) | | `DISCORD_IGNORED_CHANNELS` | Comma-separated channel IDs where the bot never responds | | `DISCORD_NO_THREAD_CHANNELS` | Comma-separated channel IDs where bot responds without auto-threading | diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md index 50f1641f093..5cad7a4a535 100644 --- a/website/docs/user-guide/messaging/discord.md +++ b/website/docs/user-guide/messaging/discord.md @@ -294,6 +294,8 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede | `DISCORD_ALLOW_MENTION_USERS` | No | `true` | When `true` (default), the bot can ping individual users by ID. | | `DISCORD_ALLOW_MENTION_REPLIED_USER` | No | `true` | When `true` (default), replying to a message pings the original author. | | `DISCORD_PROXY` | No | — | Proxy URL for Discord connections (HTTP, WebSocket, REST). Overrides `HTTPS_PROXY`/`ALL_PROXY`. Supports `http://`, `https://`, and `socks5://` schemes. | +| `DISCORD_ALLOW_ANY_ATTACHMENT` | No | `false` | When `true`, the bot accepts attachments of any file type (not just the built-in PDF/text/zip/office allowlist). Unknown types are cached to disk and surfaced to the agent as a local path with `application/octet-stream` MIME so it can inspect them with `terminal` / `read_file` / `ffprobe` / etc. | +| `DISCORD_MAX_ATTACHMENT_BYTES` | No | `33554432` | Maximum bytes per attachment the gateway will download and cache. Default 32 MiB. Set to `0` for no cap (attachments are held in memory while being written, so unlimited carries a real memory cost). | | `HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS` | No | `0.6` | Grace window the adapter waits before flushing a queued text chunk. Useful for smoothing streamed output. | | `HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS` | No | `2.0` | Delay between split chunks when a single message exceeds Discord's length limit. | @@ -613,6 +615,25 @@ The Discord adapter supports native file uploads for every common media type via Discord's per-upload size limit depends on the server's boost tier (25 MB free, up to 500 MB). If Hermes gets an HTTP 413, the adapter falls back to a link pointing at the local cache path rather than failing silently. +## Receiving Arbitrary File Types + +By default the bot caches uploads that match a built-in allowlist — images, audio, video, PDF, text/markdown/csv/log, JSON/XML/YAML/TOML, zip, docx/xlsx/pptx. Anything else (a `.wav`, a `.bin`, a custom-extension dump) gets logged as `Unsupported document type` and dropped before the agent sees it. + +To accept arbitrary file types, enable `discord.allow_any_attachment`: + +```yaml +discord: + allow_any_attachment: true + # Optional — raise/disable the per-file size cap. Default is 32 MiB. + # The whole file is held in memory while being cached, so unlimited + # uploads carry a real memory cost. + max_attachment_bytes: 33554432 # bytes; 0 = unlimited +``` + +When the flag is on, any uploaded file is downloaded, cached under `~/.hermes/cache/documents/`, and surfaced to the agent as a `DOCUMENT`-typed message event with `application/octet-stream` MIME. The agent receives a context note pointing at the local path (auto-translated for Docker/Modal sandboxed terminals via `to_agent_visible_cache_path`) and can inspect the file with `terminal` (`ffprobe`, `unzip`, `file`, `strings`, etc.) or `read_file`. The file body is **not** inlined into the prompt — only the path — so binary uploads don't blow up the context window. + +Known-text formats already in the allowlist (`.txt`, `.md`, `.log`) continue to have their contents auto-injected up to 100 KiB; that behavior is unchanged when the flag is on. + ## Home Channel You can designate a "home channel" where the bot sends proactive messages (such as cron job output, reminders, and notifications). There are two ways to set it: