fix(slack): harden attachment handling

Multiple overlapping Slack attachment improvements:

1. Upload retry with backoff on transient errors (429, 5xx, connection
   reset, rate_limited, service unavailable). New _is_retryable_upload_error
   helper covers three upload paths: _upload_file, send_video,
   send_document. Up to 3 attempts with 1.5s * attempt backoff.

2. Thread participation tracking: successful file uploads now add the
   thread_ts to _bot_message_ts, mirroring how text replies are tracked.
   This lets follow-up thread messages auto-trigger the bot (same
   engagement rules as replied threads).

3. Thread metadata preservation in the image redirect-guard fallback
   (send_image → send text fallback) and in two gateway.run.py send
   paths (image + document fallback calls).

4. HTML response rejection in _download_slack_file_bytes. Parallels
   the existing check in _download_slack_file. Guards against Slack
   returning a sign-in / redirect page as document bytes when scopes
   are missing, so the agent doesn't get HTML-as-a-PDF.

5. File lifecycle event acks (file_shared / file_created / file_change).
   These events arrive around snippet uploads. Acking them silences the
   slack_bolt 'Unhandled request' 404 warnings without changing behavior.

6. Post-loop message type classification so a mixed image+document upload
   classifies as PHOTO (or VOICE if no image), falling back to DOCUMENT.
   Previously, the per-file classification in the inbound loop could be
   overwritten unpredictably.

7. Expanded text-inject whitelist in inbound document handling to cover
   .csv, .json, .xml, .yaml, .yml, .toml, .ini, .cfg (up to 100KB) so
   snippets and config files are directly visible to the agent, not just
   cached as opaque uploads. Paired with new MIME entries in
   SUPPORTED_DOCUMENT_TYPES in base.py.

Squashed from two commits in #11819 so the single commit carries the
contributor's GitHub attribution (the original commits were authored
under a local dev hostname).
This commit is contained in:
ghostmfr 2026-04-26 18:16:15 -07:00 committed by Teknium
parent b16f9d438b
commit e818ec520a
5 changed files with 297 additions and 32 deletions

View file

@ -693,7 +693,15 @@ SUPPORTED_DOCUMENT_TYPES = {
".pdf": "application/pdf",
".md": "text/markdown",
".txt": "text/plain",
".csv": "text/csv",
".log": "text/plain",
".json": "application/json",
".xml": "application/xml",
".yaml": "application/yaml",
".yml": "application/yaml",
".toml": "application/toml",
".ini": "text/plain",
".cfg": "text/plain",
".zip": "application/zip",
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",

View file

@ -411,6 +411,21 @@ class SlackAdapter(BasePlatformAdapter):
async def handle_app_mention(event, say):
pass
# File lifecycle events can arrive around snippet uploads even when
# the actual user message is what we care about. Ack them so Slack
# doesn't log noisy 404 "unhandled request" warnings.
@self._app.event("file_shared")
async def handle_file_shared(event, say):
pass
@self._app.event("file_created")
async def handle_file_created(event, say):
pass
@self._app.event("file_change")
async def handle_file_change(event, say):
pass
@self._app.event("assistant_thread_started")
async def handle_assistant_thread_started(event, say):
await self._handle_assistant_thread_lifecycle_event(event)
@ -698,14 +713,61 @@ class SlackAdapter(BasePlatformAdapter):
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
thread_ts = self._resolve_thread_ts(reply_to, metadata)
last_exc = None
for attempt in range(3):
try:
result = await self._get_client(chat_id).files_upload_v2(
channel=chat_id,
file=file_path,
filename=os.path.basename(file_path),
initial_comment=caption or "",
thread_ts=self._resolve_thread_ts(reply_to, metadata),
thread_ts=thread_ts,
)
self._record_uploaded_file_thread(chat_id, thread_ts)
return SendResult(success=True, raw_response=result)
except Exception as exc:
last_exc = exc
if not self._is_retryable_upload_error(exc) or attempt >= 2:
raise
logger.debug(
"[Slack] Upload retry %d/2 for %s: %s",
attempt + 1,
file_path,
exc,
)
await asyncio.sleep(1.5 * (attempt + 1))
raise last_exc
def _record_uploaded_file_thread(self, chat_id: str, thread_ts: Optional[str]) -> None:
"""Treat successful file uploads as bot participation in a thread."""
if not thread_ts:
return
self._bot_message_ts.add(thread_ts)
if len(self._bot_message_ts) > self._BOT_TS_MAX:
excess = len(self._bot_message_ts) - self._BOT_TS_MAX // 2
for old_ts in list(self._bot_message_ts)[:excess]:
self._bot_message_ts.discard(old_ts)
def _is_retryable_upload_error(self, exc: Exception) -> bool:
"""Best-effort detection for transient Slack upload failures."""
status_code = getattr(getattr(exc, "response", None), "status_code", None)
if status_code is not None:
return status_code == 429 or status_code >= 500
body = " ".join(
str(part) for part in (
exc,
getattr(exc, "message", ""),
getattr(exc, "response", None),
) if part
).lower()
if "rate_limited" in body or "ratelimited" in body or "429" in body:
return True
if "connection reset" in body or "service unavailable" in body or "temporarily unavailable" in body:
return True
return self._is_retryable_error(body)
# ----- Markdown → mrkdwn conversion -----
@ -978,13 +1040,15 @@ class SlackAdapter(BasePlatformAdapter):
response = await client.get(image_url)
response.raise_for_status()
thread_ts = self._resolve_thread_ts(reply_to, metadata)
result = await self._get_client(chat_id).files_upload_v2(
channel=chat_id,
content=response.content,
filename="image.png",
initial_comment=caption or "",
thread_ts=self._resolve_thread_ts(reply_to, metadata),
thread_ts=thread_ts,
)
self._record_uploaded_file_thread(chat_id, thread_ts)
return SendResult(success=True, raw_response=result)
@ -997,7 +1061,12 @@ class SlackAdapter(BasePlatformAdapter):
)
# Fall back to sending the URL as text
text = f"{caption}\n{image_url}" if caption else image_url
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
return await self.send(
chat_id=chat_id,
content=text,
reply_to=reply_to,
metadata=metadata,
)
async def send_voice(
self,
@ -1037,15 +1106,33 @@ class SlackAdapter(BasePlatformAdapter):
if not os.path.exists(video_path):
return SendResult(success=False, error=f"Video file not found: {video_path}")
try:
thread_ts = self._resolve_thread_ts(reply_to, metadata)
last_exc = None
for attempt in range(3):
try:
result = await self._get_client(chat_id).files_upload_v2(
channel=chat_id,
file=video_path,
filename=os.path.basename(video_path),
initial_comment=caption or "",
thread_ts=self._resolve_thread_ts(reply_to, metadata),
thread_ts=thread_ts,
)
self._record_uploaded_file_thread(chat_id, thread_ts)
return SendResult(success=True, raw_response=result)
except Exception as exc:
last_exc = exc
if not self._is_retryable_upload_error(exc) or attempt >= 2:
raise
logger.debug(
"[Slack] Video upload retry %d/2 for %s: %s",
attempt + 1,
video_path,
exc,
)
await asyncio.sleep(1.5 * (attempt + 1))
raise last_exc
except Exception as e: # pragma: no cover - defensive logging
logger.error(
@ -1077,16 +1164,34 @@ class SlackAdapter(BasePlatformAdapter):
return SendResult(success=False, error=f"File not found: {file_path}")
display_name = file_name or os.path.basename(file_path)
thread_ts = self._resolve_thread_ts(reply_to, metadata)
try:
last_exc = None
for attempt in range(3):
try:
result = await self._get_client(chat_id).files_upload_v2(
channel=chat_id,
file=file_path,
filename=display_name,
initial_comment=caption or "",
thread_ts=self._resolve_thread_ts(reply_to, metadata),
thread_ts=thread_ts,
)
self._record_uploaded_file_thread(chat_id, thread_ts)
return SendResult(success=True, raw_response=result)
except Exception as exc:
last_exc = exc
if not self._is_retryable_upload_error(exc) or attempt >= 2:
raise
logger.debug(
"[Slack] Document upload retry %d/2 for %s: %s",
attempt + 1,
file_path,
exc,
)
await asyncio.sleep(1.5 * (attempt + 1))
raise last_exc
except Exception as e: # pragma: no cover - defensive logging
logger.error(
@ -1544,7 +1649,6 @@ class SlackAdapter(BasePlatformAdapter):
cached = await self._download_slack_file(url, ext, team_id=team_id)
media_urls.append(cached)
media_types.append(mimetype)
msg_type = MessageType.PHOTO
except Exception as e: # pragma: no cover - defensive logging
detail = self._describe_slack_download_failure(e, file_obj=f)
if detail:
@ -1560,7 +1664,6 @@ class SlackAdapter(BasePlatformAdapter):
cached = await self._download_slack_file(url, ext, audio=True, team_id=team_id)
media_urls.append(cached)
media_types.append(mimetype)
msg_type = MessageType.VOICE
except Exception as e: # pragma: no cover - defensive logging
detail = self._describe_slack_download_failure(e, file_obj=f)
if detail:
@ -1600,12 +1703,16 @@ class SlackAdapter(BasePlatformAdapter):
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
media_urls.append(cached_path)
media_types.append(doc_mime)
msg_type = MessageType.DOCUMENT
logger.debug("[Slack] Cached user document: %s", cached_path)
# Inject text content for .txt/.md files (capped at 100 KB)
# Inject small text-ish files directly into the prompt so
# snippets like JSON/YAML/configs are actually visible to the agent.
MAX_TEXT_INJECT_BYTES = 100 * 1024
if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
TEXT_INJECT_EXTENSIONS = {
".md", ".txt", ".csv", ".log", ".json", ".xml",
".yaml", ".yml", ".toml", ".ini", ".cfg",
}
if ext in TEXT_INJECT_EXTENSIONS and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
try:
text_content = raw_bytes.decode("utf-8")
display_name = original_filename or f"document{ext}"
@ -1630,6 +1737,14 @@ class SlackAdapter(BasePlatformAdapter):
notice_block = "[Slack attachment notice]\n" + "\n".join(f"- {n}" for n in attachment_notices)
text = f"{notice_block}\n\n{text}" if text else notice_block
if msg_type != MessageType.COMMAND and media_types:
if any(m.startswith("image/") for m in media_types):
msg_type = MessageType.PHOTO
elif any(m.startswith("audio/") for m in media_types):
msg_type = MessageType.VOICE
else:
msg_type = MessageType.DOCUMENT
# Resolve user display name (cached after first lookup)
user_name = await self._resolve_user_name(user_id, chat_id=channel_id)
@ -2205,10 +2320,19 @@ class SlackAdapter(BasePlatformAdapter):
headers={"Authorization": f"Bearer {bot_token}"},
)
response.raise_for_status()
ct = response.headers.get("content-type", "")
if "text/html" in ct:
raise ValueError(
"Slack returned HTML instead of file bytes "
f"(content-type: {ct}); "
"check bot token scopes and file permissions"
)
return response.content
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
except (httpx.TimeoutException, httpx.HTTPStatusError, ValueError) as exc:
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if isinstance(exc, ValueError):
raise
if attempt < 2:
logger.debug("Slack file download retry %d/2 for %s: %s",
attempt + 1, url[:80], exc)

View file

@ -6766,6 +6766,7 @@ class GatewayRunner:
chat_id=source.chat_id,
image_url=image_url,
caption=alt_text,
metadata=_thread_metadata,
)
except Exception:
pass
@ -6776,6 +6777,7 @@ class GatewayRunner:
await adapter.send_document(
chat_id=source.chat_id,
file_path=media_path,
metadata=_thread_metadata,
)
except Exception:
pass

View file

@ -735,6 +735,7 @@ class TestSlackDownloadSlackFileBytes:
fake_response = MagicMock()
fake_response.content = b"raw bytes here"
fake_response.raise_for_status = MagicMock()
fake_response.headers = {"content-type": "application/pdf"}
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=fake_response)
@ -750,6 +751,29 @@ class TestSlackDownloadSlackFileBytes:
result = asyncio.run(run())
assert result == b"raw bytes here"
def test_rejects_html_response(self):
"""Slack HTML sign-in pages should not be accepted as file bytes."""
adapter = _make_slack_adapter()
fake_response = MagicMock()
fake_response.content = b"<!DOCTYPE html><html><title>Slack</title></html>"
fake_response.raise_for_status = MagicMock()
fake_response.headers = {"content-type": "text/html; charset=utf-8"}
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=fake_response)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
async def run():
with patch("httpx.AsyncClient", return_value=mock_client):
await adapter._download_slack_file_bytes(
"https://files.slack.com/file.bin"
)
with pytest.raises(ValueError, match="HTML instead of file bytes"):
asyncio.run(run())
def test_retries_on_429_then_succeeds(self):
"""429 on first attempt is retried; raw bytes returned on second."""
adapter = _make_slack_adapter()
@ -757,6 +781,7 @@ class TestSlackDownloadSlackFileBytes:
ok_response = MagicMock()
ok_response.content = b"final bytes"
ok_response.raise_for_status = MagicMock()
ok_response.headers = {"content-type": "application/pdf"}
mock_client = AsyncMock()
mock_client.get = AsyncMock(

View file

@ -287,6 +287,40 @@ class TestSendDocument:
call_kwargs = adapter._app.client.files_upload_v2.call_args[1]
assert call_kwargs["thread_ts"] == "1234567890.123456"
@pytest.mark.asyncio
async def test_send_document_thread_upload_marks_bot_participation(self, adapter, tmp_path):
test_file = tmp_path / "notes.txt"
test_file.write_bytes(b"some notes")
adapter._app.client.files_upload_v2 = AsyncMock(return_value={"ok": True})
await adapter.send_document(
chat_id="C123",
file_path=str(test_file),
metadata={"thread_id": "1234567890.123456"},
)
assert "1234567890.123456" in adapter._bot_message_ts
@pytest.mark.asyncio
async def test_send_document_retries_transient_upload_error(self, adapter, tmp_path):
test_file = tmp_path / "notes.txt"
test_file.write_bytes(b"some notes")
adapter._app.client.files_upload_v2 = AsyncMock(
side_effect=[RuntimeError("Connection reset by peer"), {"ok": True}]
)
with patch("asyncio.sleep", new_callable=AsyncMock) as sleep_mock:
result = await adapter.send_document(
chat_id="C123",
file_path=str(test_file),
)
assert result.success
assert adapter._app.client.files_upload_v2.await_count == 2
sleep_mock.assert_awaited_once()
# ---------------------------------------------------------------------------
# TestSendVideo
@ -430,6 +464,36 @@ class TestIncomingDocumentHandling:
msg_event = adapter.handle_message.call_args[0][0]
assert "# Title" in msg_event.text
@pytest.mark.asyncio
async def test_json_snippet_injects_content(self, adapter):
"""A .json snippet should be treated as a text document and injected."""
content = b'{"hello": "world", "count": 2}'
with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl:
dl.return_value = content
event = self._make_event(
text="can you parse this",
files=[{
"mimetype": "text/plain",
"name": "zapfile.json",
"filetype": "json",
"pretty_type": "JSON",
"mode": "snippet",
"editable": True,
"url_private_download": "https://files.slack.com/zapfile.json",
"size": len(content),
}],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.message_type == MessageType.DOCUMENT
assert len(msg_event.media_urls) == 1
assert msg_event.media_types == ["application/json"]
assert '[Content of zapfile.json]' in msg_event.text
assert '"hello": "world"' in msg_event.text
assert 'can you parse this' in msg_event.text
@pytest.mark.asyncio
async def test_large_txt_not_injected(self, adapter):
"""A .txt file over 100KB should be cached but NOT injected."""
@ -2090,6 +2154,48 @@ class TestSendImageSSRFGuards:
assert "see this" in call_kwargs["text"]
assert "https://public.example/image.png" in call_kwargs["text"]
@pytest.mark.asyncio
async def test_send_image_fallback_preserves_thread_metadata(self, adapter):
redirect_response = MagicMock()
redirect_response.is_redirect = True
redirect_response.next_request = MagicMock(
url="http://169.254.169.254/latest/meta-data"
)
client_kwargs = {}
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
async def fake_get(_url):
for hook in client_kwargs["event_hooks"]["response"]:
await hook(redirect_response)
mock_client.get = AsyncMock(side_effect=fake_get)
adapter._app.client.files_upload_v2 = AsyncMock(return_value={"ok": True})
adapter._app.client.chat_postMessage = AsyncMock(return_value={"ts": "reply_ts"})
def fake_async_client(*args, **kwargs):
client_kwargs.update(kwargs)
return mock_client
def fake_is_safe_url(url):
return url == "https://public.example/image.png"
with (
patch("tools.url_safety.is_safe_url", side_effect=fake_is_safe_url),
patch("httpx.AsyncClient", side_effect=fake_async_client),
):
await adapter.send_image(
chat_id="C123",
image_url="https://public.example/image.png",
caption="see this",
metadata={"thread_id": "parent_ts_789"},
)
call_kwargs = adapter._app.client.chat_postMessage.call_args.kwargs
assert call_kwargs.get("thread_ts") == "parent_ts_789"
# ---------------------------------------------------------------------------
# TestProgressMessageThread