mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
feat(discord): allow_any_attachment config to accept arbitrary file types
The Discord adapter silently dropped any attachment whose extension wasn't
in the SUPPORTED_DOCUMENT_TYPES allowlist (PDF, text family, zip, office).
Users uploading .wav / .bin / other unrecognized formats saw nothing in
their conversation — the file got logged as 'Unsupported document type'
and discarded before the agent ever saw it.
Add discord.allow_any_attachment (default false) to bypass the allowlist.
When on:
- Any file is downloaded, cached under ~/.hermes/cache/documents/, and
surfaced as a DOCUMENT-typed event with application/octet-stream MIME
- gateway/run.py already emits a context note with the cached path,
auto-translated via to_agent_visible_cache_path() for Docker/Modal
sandboxed terminals
- File body is NOT inlined — only the path — so binary uploads don't
blow up the context window
- Allowlisted text formats (.txt/.md/.log) keep their 100 KiB inline
behavior unchanged
Also adds discord.max_attachment_bytes (default 32 MiB matches the
historical hardcoded cap; 0 = unlimited) since users opting into arbitrary
types may want to raise the cap. The whole attachment is held in memory
while being cached, so unlimited carries a real memory cost.
Env overrides: DISCORD_ALLOW_ANY_ATTACHMENT, DISCORD_MAX_ATTACHMENT_BYTES.
Discord-only by deliberate scope. Telegram has hard 20 MB API limits and
Slack has its own caps — extending the same flag there is a separate
follow-up if/when requested.
This commit is contained in:
parent
3b39096904
commit
407a11b419
5 changed files with 258 additions and 10 deletions
|
|
@ -3564,6 +3564,43 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
return bool(configured)
|
||||
return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in {"false", "0", "no", "off"}
|
||||
|
||||
def _discord_allow_any_attachment(self) -> bool:
|
||||
"""Return whether Discord attachments bypass the SUPPORTED_DOCUMENT_TYPES allowlist.
|
||||
|
||||
When True, any uploaded file is cached to disk and surfaced to the
|
||||
agent as a local path so it can be inspected via terminal / read_file
|
||||
/ ffprobe / etc. Default False preserves the historical behaviour of
|
||||
dropping unsupported types with a warning log.
|
||||
"""
|
||||
configured = self.config.extra.get("allow_any_attachment")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() not in {"false", "0", "no", "off", ""}
|
||||
return bool(configured)
|
||||
return os.getenv("DISCORD_ALLOW_ANY_ATTACHMENT", "false").lower() in {"true", "1", "yes", "on"}
|
||||
|
||||
def _discord_max_attachment_bytes(self) -> int:
|
||||
"""Return the per-attachment byte cap. 0 means unlimited.
|
||||
|
||||
The whole attachment is held in memory while being written to the
|
||||
cache, so unlimited carries a real memory cost. Default 32 MiB
|
||||
matches the historical hardcoded value.
|
||||
"""
|
||||
configured = self.config.extra.get("max_attachment_bytes")
|
||||
if configured is None:
|
||||
configured = os.getenv("DISCORD_MAX_ATTACHMENT_BYTES")
|
||||
if configured is None or configured == "":
|
||||
return 32 * 1024 * 1024
|
||||
try:
|
||||
value = int(configured)
|
||||
except (TypeError, ValueError):
|
||||
logger.warning(
|
||||
"[Discord] Invalid max_attachment_bytes value %r, falling back to 32 MiB",
|
||||
configured,
|
||||
)
|
||||
return 32 * 1024 * 1024
|
||||
return max(0, value)
|
||||
|
||||
def _discord_free_response_channels(self) -> set:
|
||||
"""Return Discord channel IDs where no bot mention is required.
|
||||
|
||||
|
|
@ -4495,6 +4532,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
if normalized_content.startswith("/"):
|
||||
msg_type = MessageType.COMMAND
|
||||
elif all_attachments:
|
||||
_allow_any = self._discord_allow_any_attachment()
|
||||
# Check attachment types
|
||||
for att in all_attachments:
|
||||
if att.content_type:
|
||||
|
|
@ -4509,9 +4547,15 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
if att.filename:
|
||||
_, doc_ext = os.path.splitext(att.filename)
|
||||
doc_ext = doc_ext.lower()
|
||||
if doc_ext in SUPPORTED_DOCUMENT_TYPES:
|
||||
if doc_ext in SUPPORTED_DOCUMENT_TYPES or _allow_any:
|
||||
msg_type = MessageType.DOCUMENT
|
||||
break
|
||||
elif _allow_any:
|
||||
# No content_type at all (rare — discord usually fills it
|
||||
# in). Treat as a document so downstream pipelines surface
|
||||
# the path to the agent.
|
||||
msg_type = MessageType.DOCUMENT
|
||||
break
|
||||
|
||||
# When auto-threading kicked in, route responses to the new thread
|
||||
effective_channel = auto_threaded_channel or message.channel
|
||||
|
|
@ -4594,31 +4638,48 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
if not ext and content_type:
|
||||
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
|
||||
ext = mime_to_ext.get(content_type, "")
|
||||
if ext not in SUPPORTED_DOCUMENT_TYPES:
|
||||
allow_any_attachment = self._discord_allow_any_attachment()
|
||||
in_allowlist = ext in SUPPORTED_DOCUMENT_TYPES
|
||||
if not in_allowlist and not allow_any_attachment:
|
||||
logger.warning(
|
||||
"[Discord] Unsupported document type '%s' (%s), skipping",
|
||||
ext or "unknown", content_type,
|
||||
)
|
||||
else:
|
||||
MAX_DOC_BYTES = 32 * 1024 * 1024
|
||||
if att.size and att.size > MAX_DOC_BYTES:
|
||||
max_doc_bytes = self._discord_max_attachment_bytes()
|
||||
if max_doc_bytes and att.size and att.size > max_doc_bytes:
|
||||
logger.warning(
|
||||
"[Discord] Document too large (%s bytes), skipping: %s",
|
||||
att.size, att.filename,
|
||||
"[Discord] Document too large (%s bytes > cap %s), skipping: %s",
|
||||
att.size, max_doc_bytes, att.filename,
|
||||
)
|
||||
else:
|
||||
try:
|
||||
raw_bytes = await self._cache_discord_document(att, ext)
|
||||
cached_path = cache_document_from_bytes(
|
||||
raw_bytes, att.filename or f"document{ext}"
|
||||
raw_bytes, att.filename or f"document{ext or '.bin'}"
|
||||
)
|
||||
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
if in_allowlist:
|
||||
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
else:
|
||||
# allow_any_attachment path: untyped file. Use the
|
||||
# source content_type if discord gave us one,
|
||||
# otherwise fall back to octet-stream so the agent
|
||||
# knows it's binary and reaches for terminal tools.
|
||||
doc_mime = (
|
||||
content_type
|
||||
if content_type and content_type != "unknown"
|
||||
else "application/octet-stream"
|
||||
)
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(doc_mime)
|
||||
logger.info("[Discord] Cached user document: %s", cached_path)
|
||||
logger.info(
|
||||
"[Discord] Cached user %s: %s",
|
||||
"document" if in_allowlist else "attachment",
|
||||
cached_path,
|
||||
)
|
||||
# Inject text content for plain-text documents (capped at 100 KB)
|
||||
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
||||
if ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
if in_allowlist and ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
try:
|
||||
text_content = raw_bytes.decode("utf-8")
|
||||
display_name = att.filename or f"document{ext}"
|
||||
|
|
@ -4630,6 +4691,13 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
pending_text_injection = injection
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
# NOTE: for the allow_any_attachment path we deliberately
|
||||
# do NOT inject a path string here. ``gateway/run.py``
|
||||
# already detects DOCUMENT-typed events with
|
||||
# ``application/octet-stream`` MIME and emits a context
|
||||
# note with the sandbox-translated cache path via
|
||||
# ``to_agent_visible_cache_path()`` (important for
|
||||
# Docker/Modal terminal backends).
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[Discord] Failed to cache document %s: %s",
|
||||
|
|
|
|||
|
|
@ -1306,6 +1306,18 @@ DEFAULT_CONFIG = {
|
|||
# list_roles, member_info, search_members, fetch_messages, list_pins,
|
||||
# pin_message, unpin_message, create_thread, add_role, remove_role.
|
||||
"server_actions": "",
|
||||
# Accept arbitrary attachment file types (not just SUPPORTED_DOCUMENT_TYPES).
|
||||
# When True, any uploaded file is cached to disk with mime
|
||||
# application/octet-stream and the path is surfaced to the agent so it
|
||||
# can use terminal/read_file/etc. against it. Default False preserves
|
||||
# the historical allowlist behaviour.
|
||||
# Env override: DISCORD_ALLOW_ANY_ATTACHMENT.
|
||||
"allow_any_attachment": False,
|
||||
# Maximum bytes per attachment the gateway will cache. The whole file
|
||||
# is held in memory while being written, so unlimited uploads carry a
|
||||
# real memory cost. Default 32 MiB matches the historical hardcoded
|
||||
# cap. Set to 0 for no cap. Env override: DISCORD_MAX_ATTACHMENT_BYTES.
|
||||
"max_attachment_bytes": 33554432,
|
||||
},
|
||||
|
||||
# WhatsApp platform settings (gateway mode)
|
||||
|
|
|
|||
|
|
@ -384,3 +384,148 @@ class TestIncomingDocumentHandling:
|
|||
assert event.message_type == MessageType.PHOTO
|
||||
assert event.media_urls == ["/tmp/cached_image.png"]
|
||||
assert event.media_types == ["image/png"]
|
||||
|
||||
|
||||
class TestAllowAnyAttachment:
|
||||
"""Cover the discord.allow_any_attachment config flag.
|
||||
|
||||
With the flag off (default), unknown file types are dropped. With it on,
|
||||
they get cached and surfaced to the agent as DOCUMENT events with
|
||||
application/octet-stream MIME so gateway/run.py emits a path-pointing
|
||||
context note.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_type_skipped_by_default(self, adapter):
|
||||
"""Default (flag off): unknown extension is dropped.
|
||||
|
||||
With no text + no cached media, the adapter may legitimately decline
|
||||
to dispatch the event at all, so we don't assert on call_args here —
|
||||
we just verify the file wasn't cached.
|
||||
"""
|
||||
with _mock_aiohttp_download(b"should not be cached"):
|
||||
msg = make_message([
|
||||
make_attachment(filename="weird.xyz", content_type="application/x-custom")
|
||||
])
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
if adapter.handle_message.call_args is not None:
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.media_urls == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_type_cached_when_flag_on(self, adapter):
|
||||
"""Flag on: unknown extension is cached as application/octet-stream."""
|
||||
adapter.config.extra["allow_any_attachment"] = True
|
||||
|
||||
with _mock_aiohttp_download(b"\x00\x01\x02 binary payload"):
|
||||
msg = make_message([
|
||||
make_attachment(filename="weird.xyz", content_type="application/x-custom")
|
||||
])
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert len(event.media_urls) == 1
|
||||
assert os.path.exists(event.media_urls[0])
|
||||
# Falls back to the source content_type when we have one.
|
||||
assert event.media_types == ["application/x-custom"]
|
||||
assert event.message_type == MessageType.DOCUMENT
|
||||
# We deliberately do NOT inline arbitrary bytes — run.py emits the
|
||||
# path-pointing note based on DOCUMENT + octet-stream MIME.
|
||||
assert "[Content of" not in (event.text or "")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_type_no_content_type_becomes_octet_stream(self, adapter):
|
||||
"""Flag on + no content_type from discord: MIME falls back to octet-stream."""
|
||||
adapter.config.extra["allow_any_attachment"] = True
|
||||
|
||||
with _mock_aiohttp_download(b"raw bytes"):
|
||||
msg = make_message([
|
||||
make_attachment(filename="mystery.bin", content_type=None)
|
||||
])
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.message_type == MessageType.DOCUMENT
|
||||
assert event.media_types == ["application/octet-stream"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_max_attachment_bytes_caps_uploads(self, adapter):
|
||||
"""discord.max_attachment_bytes overrides the historical 32 MiB cap."""
|
||||
adapter.config.extra["allow_any_attachment"] = True
|
||||
adapter.config.extra["max_attachment_bytes"] = 1024 # 1 KiB
|
||||
|
||||
msg = make_message([
|
||||
make_attachment(
|
||||
filename="too_big.xyz",
|
||||
content_type="application/x-custom",
|
||||
size=2048,
|
||||
)
|
||||
])
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.media_urls == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_max_attachment_bytes_zero_means_unlimited(self, adapter):
|
||||
"""max_attachment_bytes=0 disables the size cap entirely."""
|
||||
adapter.config.extra["allow_any_attachment"] = True
|
||||
adapter.config.extra["max_attachment_bytes"] = 0
|
||||
|
||||
# 64 MiB — would normally exceed the historical 32 MiB hardcoded cap.
|
||||
with _mock_aiohttp_download(b"x" * 16):
|
||||
msg = make_message([
|
||||
make_attachment(
|
||||
filename="huge.xyz",
|
||||
content_type="application/x-custom",
|
||||
size=64 * 1024 * 1024,
|
||||
)
|
||||
])
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert len(event.media_urls) == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_allowlisted_doc_unchanged_when_flag_on(self, adapter):
|
||||
"""Flag on must not change handling of types already in SUPPORTED_DOCUMENT_TYPES.
|
||||
|
||||
A .txt should still get its content inlined (the historical behavior),
|
||||
and the MIME should still be the canonical text/plain — not whatever
|
||||
discord guessed.
|
||||
"""
|
||||
adapter.config.extra["allow_any_attachment"] = True
|
||||
file_content = b"still a text file"
|
||||
|
||||
with _mock_aiohttp_download(file_content):
|
||||
msg = make_message(
|
||||
attachments=[make_attachment(filename="notes.txt", content_type="text/plain")],
|
||||
content="check this",
|
||||
)
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert "[Content of notes.txt]:" in event.text
|
||||
assert "still a text file" in event.text
|
||||
assert event.media_types == ["text/plain"]
|
||||
|
||||
def test_helper_reads_env_fallback(self, adapter, monkeypatch):
|
||||
"""Helper falls back to DISCORD_ALLOW_ANY_ATTACHMENT env var."""
|
||||
assert adapter._discord_allow_any_attachment() is False
|
||||
monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true")
|
||||
assert adapter._discord_allow_any_attachment() is True
|
||||
monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "no")
|
||||
assert adapter._discord_allow_any_attachment() is False
|
||||
|
||||
def test_helper_config_overrides_env(self, adapter, monkeypatch):
|
||||
"""config.yaml setting wins over env var."""
|
||||
monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true")
|
||||
adapter.config.extra["allow_any_attachment"] = False
|
||||
assert adapter._discord_allow_any_attachment() is False
|
||||
|
||||
def test_max_bytes_helper_invalid_value_falls_back(self, adapter):
|
||||
"""Garbage in max_attachment_bytes config falls back to 32 MiB."""
|
||||
adapter.config.extra["max_attachment_bytes"] = "not-a-number"
|
||||
assert adapter._discord_max_attachment_bytes() == 32 * 1024 * 1024
|
||||
|
||||
|
|
|
|||
|
|
@ -258,6 +258,8 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
|
|||
| `DISCORD_REQUIRE_MENTION` | Require an @mention before responding in server channels |
|
||||
| `DISCORD_FREE_RESPONSE_CHANNELS` | Comma-separated channel IDs where mention is not required |
|
||||
| `DISCORD_AUTO_THREAD` | Auto-thread long replies when supported |
|
||||
| `DISCORD_ALLOW_ANY_ATTACHMENT` | When `true`, accept attachments of any file type (not just the built-in PDF/text/zip/office allowlist). Unknown types are cached and surfaced to the agent as a local path so it can inspect them via `terminal` / `read_file` / `ffprobe`. Default `false`. |
|
||||
| `DISCORD_MAX_ATTACHMENT_BYTES` | Maximum bytes per attachment the gateway will cache. Default `33554432` (32 MiB). Set to `0` for no cap (attachments are held in memory while being written). |
|
||||
| `DISCORD_REACTIONS` | Enable emoji reactions on messages during processing (default: `true`) |
|
||||
| `DISCORD_IGNORED_CHANNELS` | Comma-separated channel IDs where the bot never responds |
|
||||
| `DISCORD_NO_THREAD_CHANNELS` | Comma-separated channel IDs where bot responds without auto-threading |
|
||||
|
|
|
|||
|
|
@ -294,6 +294,8 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede
|
|||
| `DISCORD_ALLOW_MENTION_USERS` | No | `true` | When `true` (default), the bot can ping individual users by ID. |
|
||||
| `DISCORD_ALLOW_MENTION_REPLIED_USER` | No | `true` | When `true` (default), replying to a message pings the original author. |
|
||||
| `DISCORD_PROXY` | No | — | Proxy URL for Discord connections (HTTP, WebSocket, REST). Overrides `HTTPS_PROXY`/`ALL_PROXY`. Supports `http://`, `https://`, and `socks5://` schemes. |
|
||||
| `DISCORD_ALLOW_ANY_ATTACHMENT` | No | `false` | When `true`, the bot accepts attachments of any file type (not just the built-in PDF/text/zip/office allowlist). Unknown types are cached to disk and surfaced to the agent as a local path with `application/octet-stream` MIME so it can inspect them with `terminal` / `read_file` / `ffprobe` / etc. |
|
||||
| `DISCORD_MAX_ATTACHMENT_BYTES` | No | `33554432` | Maximum bytes per attachment the gateway will download and cache. Default 32 MiB. Set to `0` for no cap (attachments are held in memory while being written, so unlimited carries a real memory cost). |
|
||||
| `HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS` | No | `0.6` | Grace window the adapter waits before flushing a queued text chunk. Useful for smoothing streamed output. |
|
||||
| `HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS` | No | `2.0` | Delay between split chunks when a single message exceeds Discord's length limit. |
|
||||
|
||||
|
|
@ -613,6 +615,25 @@ The Discord adapter supports native file uploads for every common media type via
|
|||
|
||||
Discord's per-upload size limit depends on the server's boost tier (25 MB free, up to 500 MB). If Hermes gets an HTTP 413, the adapter falls back to a link pointing at the local cache path rather than failing silently.
|
||||
|
||||
## Receiving Arbitrary File Types
|
||||
|
||||
By default the bot caches uploads that match a built-in allowlist — images, audio, video, PDF, text/markdown/csv/log, JSON/XML/YAML/TOML, zip, docx/xlsx/pptx. Anything else (a `.wav`, a `.bin`, a custom-extension dump) gets logged as `Unsupported document type` and dropped before the agent sees it.
|
||||
|
||||
To accept arbitrary file types, enable `discord.allow_any_attachment`:
|
||||
|
||||
```yaml
|
||||
discord:
|
||||
allow_any_attachment: true
|
||||
# Optional — raise/disable the per-file size cap. Default is 32 MiB.
|
||||
# The whole file is held in memory while being cached, so unlimited
|
||||
# uploads carry a real memory cost.
|
||||
max_attachment_bytes: 33554432 # bytes; 0 = unlimited
|
||||
```
|
||||
|
||||
When the flag is on, any uploaded file is downloaded, cached under `~/.hermes/cache/documents/`, and surfaced to the agent as a `DOCUMENT`-typed message event with `application/octet-stream` MIME. The agent receives a context note pointing at the local path (auto-translated for Docker/Modal sandboxed terminals via `to_agent_visible_cache_path`) and can inspect the file with `terminal` (`ffprobe`, `unzip`, `file`, `strings`, etc.) or `read_file`. The file body is **not** inlined into the prompt — only the path — so binary uploads don't blow up the context window.
|
||||
|
||||
Known-text formats already in the allowlist (`.txt`, `.md`, `.log`) continue to have their contents auto-injected up to 100 KiB; that behavior is unchanged when the flag is on.
|
||||
|
||||
## Home Channel
|
||||
|
||||
You can designate a "home channel" where the bot sends proactive messages (such as cron job output, reminders, and notifications). There are two ways to set it:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue