From e818ec520aa258214333ed0e11057ef8bc840038 Mon Sep 17 00:00:00 2001 From: ghostmfr <170458616+ghostmfr@users.noreply.github.com> Date: Sun, 26 Apr 2026 18:16:15 -0700 Subject: [PATCH] fix(slack): harden attachment handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple overlapping Slack attachment improvements: 1. Upload retry with backoff on transient errors (429, 5xx, connection reset, rate_limited, service unavailable). New _is_retryable_upload_error helper covers three upload paths: _upload_file, send_video, send_document. Up to 3 attempts with 1.5s * attempt backoff. 2. Thread participation tracking: successful file uploads now add the thread_ts to _bot_message_ts, mirroring how text replies are tracked. This lets follow-up thread messages auto-trigger the bot (same engagement rules as replied threads). 3. Thread metadata preservation in the image redirect-guard fallback (send_image → send text fallback) and in two gateway.run.py send paths (image + document fallback calls). 4. HTML response rejection in _download_slack_file_bytes. Parallels the existing check in _download_slack_file. Guards against Slack returning a sign-in / redirect page as document bytes when scopes are missing, so the agent doesn't get HTML-as-a-PDF. 5. File lifecycle event acks (file_shared / file_created / file_change). These events arrive around snippet uploads. Acking them silences the slack_bolt 'Unhandled request' 404 warnings without changing behavior. 6. Post-loop message type classification so a mixed image+document upload classifies as PHOTO (or VOICE if no image), falling back to DOCUMENT. Previously, the per-file classification in the inbound loop could be overwritten unpredictably. 7. Expanded text-inject whitelist in inbound document handling to cover .csv, .json, .xml, .yaml, .yml, .toml, .ini, .cfg (up to 100KB) so snippets and config files are directly visible to the agent, not just cached as opaque uploads. Paired with new MIME entries in SUPPORTED_DOCUMENT_TYPES in base.py. Squashed from two commits in #11819 so the single commit carries the contributor's GitHub attribution (the original commits were authored under a local dev hostname). --- gateway/platforms/base.py | 8 + gateway/platforms/slack.py | 188 +++++++++++++++++---- gateway/run.py | 2 + tests/gateway/test_media_download_retry.py | 25 +++ tests/gateway/test_slack.py | 106 ++++++++++++ 5 files changed, 297 insertions(+), 32 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 3068318e41..610cebdd2e 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -693,7 +693,15 @@ SUPPORTED_DOCUMENT_TYPES = { ".pdf": "application/pdf", ".md": "text/markdown", ".txt": "text/plain", + ".csv": "text/csv", ".log": "text/plain", + ".json": "application/json", + ".xml": "application/xml", + ".yaml": "application/yaml", + ".yml": "application/yaml", + ".toml": "application/toml", + ".ini": "text/plain", + ".cfg": "text/plain", ".zip": "application/zip", ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py index b45e390665..b4c6ddfe6a 100644 --- a/gateway/platforms/slack.py +++ b/gateway/platforms/slack.py @@ -411,6 +411,21 @@ class SlackAdapter(BasePlatformAdapter): async def handle_app_mention(event, say): pass + # File lifecycle events can arrive around snippet uploads even when + # the actual user message is what we care about. Ack them so Slack + # doesn't log noisy 404 "unhandled request" warnings. + @self._app.event("file_shared") + async def handle_file_shared(event, say): + pass + + @self._app.event("file_created") + async def handle_file_created(event, say): + pass + + @self._app.event("file_change") + async def handle_file_change(event, say): + pass + @self._app.event("assistant_thread_started") async def handle_assistant_thread_started(event, say): await self._handle_assistant_thread_lifecycle_event(event) @@ -698,14 +713,61 @@ class SlackAdapter(BasePlatformAdapter): if not os.path.exists(file_path): raise FileNotFoundError(f"File not found: {file_path}") - result = await self._get_client(chat_id).files_upload_v2( - channel=chat_id, - file=file_path, - filename=os.path.basename(file_path), - initial_comment=caption or "", - thread_ts=self._resolve_thread_ts(reply_to, metadata), - ) - return SendResult(success=True, raw_response=result) + thread_ts = self._resolve_thread_ts(reply_to, metadata) + last_exc = None + for attempt in range(3): + try: + result = await self._get_client(chat_id).files_upload_v2( + channel=chat_id, + file=file_path, + filename=os.path.basename(file_path), + initial_comment=caption or "", + thread_ts=thread_ts, + ) + self._record_uploaded_file_thread(chat_id, thread_ts) + return SendResult(success=True, raw_response=result) + except Exception as exc: + last_exc = exc + if not self._is_retryable_upload_error(exc) or attempt >= 2: + raise + logger.debug( + "[Slack] Upload retry %d/2 for %s: %s", + attempt + 1, + file_path, + exc, + ) + await asyncio.sleep(1.5 * (attempt + 1)) + + raise last_exc + + def _record_uploaded_file_thread(self, chat_id: str, thread_ts: Optional[str]) -> None: + """Treat successful file uploads as bot participation in a thread.""" + if not thread_ts: + return + self._bot_message_ts.add(thread_ts) + if len(self._bot_message_ts) > self._BOT_TS_MAX: + excess = len(self._bot_message_ts) - self._BOT_TS_MAX // 2 + for old_ts in list(self._bot_message_ts)[:excess]: + self._bot_message_ts.discard(old_ts) + + def _is_retryable_upload_error(self, exc: Exception) -> bool: + """Best-effort detection for transient Slack upload failures.""" + status_code = getattr(getattr(exc, "response", None), "status_code", None) + if status_code is not None: + return status_code == 429 or status_code >= 500 + + body = " ".join( + str(part) for part in ( + exc, + getattr(exc, "message", ""), + getattr(exc, "response", None), + ) if part + ).lower() + if "rate_limited" in body or "ratelimited" in body or "429" in body: + return True + if "connection reset" in body or "service unavailable" in body or "temporarily unavailable" in body: + return True + return self._is_retryable_error(body) # ----- Markdown → mrkdwn conversion ----- @@ -978,13 +1040,15 @@ class SlackAdapter(BasePlatformAdapter): response = await client.get(image_url) response.raise_for_status() + thread_ts = self._resolve_thread_ts(reply_to, metadata) result = await self._get_client(chat_id).files_upload_v2( channel=chat_id, content=response.content, filename="image.png", initial_comment=caption or "", - thread_ts=self._resolve_thread_ts(reply_to, metadata), + thread_ts=thread_ts, ) + self._record_uploaded_file_thread(chat_id, thread_ts) return SendResult(success=True, raw_response=result) @@ -997,7 +1061,12 @@ class SlackAdapter(BasePlatformAdapter): ) # Fall back to sending the URL as text text = f"{caption}\n{image_url}" if caption else image_url - return await self.send(chat_id=chat_id, content=text, reply_to=reply_to) + return await self.send( + chat_id=chat_id, + content=text, + reply_to=reply_to, + metadata=metadata, + ) async def send_voice( self, @@ -1038,14 +1107,32 @@ class SlackAdapter(BasePlatformAdapter): return SendResult(success=False, error=f"Video file not found: {video_path}") try: - result = await self._get_client(chat_id).files_upload_v2( - channel=chat_id, - file=video_path, - filename=os.path.basename(video_path), - initial_comment=caption or "", - thread_ts=self._resolve_thread_ts(reply_to, metadata), - ) - return SendResult(success=True, raw_response=result) + thread_ts = self._resolve_thread_ts(reply_to, metadata) + last_exc = None + for attempt in range(3): + try: + result = await self._get_client(chat_id).files_upload_v2( + channel=chat_id, + file=video_path, + filename=os.path.basename(video_path), + initial_comment=caption or "", + thread_ts=thread_ts, + ) + self._record_uploaded_file_thread(chat_id, thread_ts) + return SendResult(success=True, raw_response=result) + except Exception as exc: + last_exc = exc + if not self._is_retryable_upload_error(exc) or attempt >= 2: + raise + logger.debug( + "[Slack] Video upload retry %d/2 for %s: %s", + attempt + 1, + video_path, + exc, + ) + await asyncio.sleep(1.5 * (attempt + 1)) + + raise last_exc except Exception as e: # pragma: no cover - defensive logging logger.error( @@ -1077,16 +1164,34 @@ class SlackAdapter(BasePlatformAdapter): return SendResult(success=False, error=f"File not found: {file_path}") display_name = file_name or os.path.basename(file_path) + thread_ts = self._resolve_thread_ts(reply_to, metadata) try: - result = await self._get_client(chat_id).files_upload_v2( - channel=chat_id, - file=file_path, - filename=display_name, - initial_comment=caption or "", - thread_ts=self._resolve_thread_ts(reply_to, metadata), - ) - return SendResult(success=True, raw_response=result) + last_exc = None + for attempt in range(3): + try: + result = await self._get_client(chat_id).files_upload_v2( + channel=chat_id, + file=file_path, + filename=display_name, + initial_comment=caption or "", + thread_ts=thread_ts, + ) + self._record_uploaded_file_thread(chat_id, thread_ts) + return SendResult(success=True, raw_response=result) + except Exception as exc: + last_exc = exc + if not self._is_retryable_upload_error(exc) or attempt >= 2: + raise + logger.debug( + "[Slack] Document upload retry %d/2 for %s: %s", + attempt + 1, + file_path, + exc, + ) + await asyncio.sleep(1.5 * (attempt + 1)) + + raise last_exc except Exception as e: # pragma: no cover - defensive logging logger.error( @@ -1544,7 +1649,6 @@ class SlackAdapter(BasePlatformAdapter): cached = await self._download_slack_file(url, ext, team_id=team_id) media_urls.append(cached) media_types.append(mimetype) - msg_type = MessageType.PHOTO except Exception as e: # pragma: no cover - defensive logging detail = self._describe_slack_download_failure(e, file_obj=f) if detail: @@ -1560,7 +1664,6 @@ class SlackAdapter(BasePlatformAdapter): cached = await self._download_slack_file(url, ext, audio=True, team_id=team_id) media_urls.append(cached) media_types.append(mimetype) - msg_type = MessageType.VOICE except Exception as e: # pragma: no cover - defensive logging detail = self._describe_slack_download_failure(e, file_obj=f) if detail: @@ -1600,12 +1703,16 @@ class SlackAdapter(BasePlatformAdapter): doc_mime = SUPPORTED_DOCUMENT_TYPES[ext] media_urls.append(cached_path) media_types.append(doc_mime) - msg_type = MessageType.DOCUMENT logger.debug("[Slack] Cached user document: %s", cached_path) - # Inject text content for .txt/.md files (capped at 100 KB) + # Inject small text-ish files directly into the prompt so + # snippets like JSON/YAML/configs are actually visible to the agent. MAX_TEXT_INJECT_BYTES = 100 * 1024 - if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES: + TEXT_INJECT_EXTENSIONS = { + ".md", ".txt", ".csv", ".log", ".json", ".xml", + ".yaml", ".yml", ".toml", ".ini", ".cfg", + } + if ext in TEXT_INJECT_EXTENSIONS and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES: try: text_content = raw_bytes.decode("utf-8") display_name = original_filename or f"document{ext}" @@ -1630,6 +1737,14 @@ class SlackAdapter(BasePlatformAdapter): notice_block = "[Slack attachment notice]\n" + "\n".join(f"- {n}" for n in attachment_notices) text = f"{notice_block}\n\n{text}" if text else notice_block + if msg_type != MessageType.COMMAND and media_types: + if any(m.startswith("image/") for m in media_types): + msg_type = MessageType.PHOTO + elif any(m.startswith("audio/") for m in media_types): + msg_type = MessageType.VOICE + else: + msg_type = MessageType.DOCUMENT + # Resolve user display name (cached after first lookup) user_name = await self._resolve_user_name(user_id, chat_id=channel_id) @@ -2205,10 +2320,19 @@ class SlackAdapter(BasePlatformAdapter): headers={"Authorization": f"Bearer {bot_token}"}, ) response.raise_for_status() + ct = response.headers.get("content-type", "") + if "text/html" in ct: + raise ValueError( + "Slack returned HTML instead of file bytes " + f"(content-type: {ct}); " + "check bot token scopes and file permissions" + ) return response.content - except (httpx.TimeoutException, httpx.HTTPStatusError) as exc: + except (httpx.TimeoutException, httpx.HTTPStatusError, ValueError) as exc: if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429: raise + if isinstance(exc, ValueError): + raise if attempt < 2: logger.debug("Slack file download retry %d/2 for %s: %s", attempt + 1, url[:80], exc) diff --git a/gateway/run.py b/gateway/run.py index 5dcdb05f83..d84ed65f7a 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -6766,6 +6766,7 @@ class GatewayRunner: chat_id=source.chat_id, image_url=image_url, caption=alt_text, + metadata=_thread_metadata, ) except Exception: pass @@ -6776,6 +6777,7 @@ class GatewayRunner: await adapter.send_document( chat_id=source.chat_id, file_path=media_path, + metadata=_thread_metadata, ) except Exception: pass diff --git a/tests/gateway/test_media_download_retry.py b/tests/gateway/test_media_download_retry.py index 373ced1017..c43ad0929c 100644 --- a/tests/gateway/test_media_download_retry.py +++ b/tests/gateway/test_media_download_retry.py @@ -735,6 +735,7 @@ class TestSlackDownloadSlackFileBytes: fake_response = MagicMock() fake_response.content = b"raw bytes here" fake_response.raise_for_status = MagicMock() + fake_response.headers = {"content-type": "application/pdf"} mock_client = AsyncMock() mock_client.get = AsyncMock(return_value=fake_response) @@ -750,6 +751,29 @@ class TestSlackDownloadSlackFileBytes: result = asyncio.run(run()) assert result == b"raw bytes here" + def test_rejects_html_response(self): + """Slack HTML sign-in pages should not be accepted as file bytes.""" + adapter = _make_slack_adapter() + + fake_response = MagicMock() + fake_response.content = b"