From 0fd34e8c5a7a16eeb28e1eff46fd258d5e6f06c0 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 12 Jun 2026 02:05:41 -0700 Subject: [PATCH] fix(teams): cache document/video/audio attachments and classify as DOCUMENT (#44778) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Teams adapter only handled image/* attachments — documents (the application/vnd.microsoft.teams.file.download.info consent-free download payload and any direct-URL non-image attachment) never reached media_urls at all, so run.py's document-context injection had nothing to surface. Completes the class-wide sweep from PR #44695 (Signal/Email/SimpleX). - download.info attachments: fetch the pre-authed SharePoint downloadUrl (SSRF-guarded, same guard chain as base.py cache_*_from_url) and route through cache_media_bytes - direct-URL non-image attachments: same fetch + classify path - skip Teams' text/html message-body mirror and adaptive-card attachments - DOCUMENT > PHOTO > VIDEO > AUDIO precedence for mixed attachments, matching the Email precedence rationale from #44695 --- plugins/platforms/teams/adapter.py | 106 ++++++++++++++++++++- tests/gateway/test_teams.py | 146 +++++++++++++++++++++++++++++ 2 files changed, 249 insertions(+), 3 deletions(-) diff --git a/plugins/platforms/teams/adapter.py b/plugins/platforms/teams/adapter.py index 975ef5b4093..a7d024419e1 100644 --- a/plugins/platforms/teams/adapter.py +++ b/plugins/platforms/teams/adapter.py @@ -96,6 +96,7 @@ from gateway.platforms.base import ( MessageType, SendResult, cache_image_from_url, + cache_media_bytes, ) logger = logging.getLogger(__name__) @@ -725,6 +726,34 @@ class TeamsAdapter(BasePlatformAdapter): self._mark_disconnected() logger.info("[teams] Disconnected") + async def _fetch_attachment_bytes(self, url: str, timeout: float = 30.0) -> bytes: + """Download attachment bytes with SSRF protection. + + Teams file attachments carry pre-authenticated SharePoint download + URLs (no extra auth header needed). Validates the URL against the + SSRF guard and follows redirects through the shared redirect guard, + matching the cache_*_from_url helpers in gateway.platforms.base. + """ + from tools.url_safety import is_safe_url + from gateway.platforms.base import _ssrf_redirect_guard + + if not is_safe_url(url): + raise ValueError("Blocked unsafe attachment URL (SSRF protection)") + + import httpx + + async with httpx.AsyncClient( + timeout=timeout, + follow_redirects=True, + event_hooks={"response": [_ssrf_redirect_guard]}, + ) as client: + response = await client.get( + url, + headers={"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)"}, + ) + response.raise_for_status() + return response.content + async def _on_message(self, ctx: ActivityContext[MessageActivity]) -> None: """Process an incoming Teams message and dispatch to the gateway.""" activity = ctx.activity @@ -779,22 +808,93 @@ class TeamsAdapter(BasePlatformAdapter): guild_id=getattr(conv, "tenant_id", None) or self._tenant_id, ) - # Handle image attachments + # Handle attachments (images, documents, video, audio) media_urls = [] media_types = [] + media_kinds = [] for att in getattr(activity, "attachments", None) or []: content_url = getattr(att, "content_url", None) - content_type = getattr(att, "content_type", None) or "" + content_type = (getattr(att, "content_type", None) or "").lower() + att_name = getattr(att, "name", None) or "" + + # Skip non-file payloads: Teams mirrors the message body as a + # text/html attachment on every message, and adaptive/hero cards + # arrive as application/vnd.microsoft.card.* attachments. + if content_type in ("text/html", "text/plain") and not content_url: + continue + if content_type.startswith("application/vnd.microsoft.card"): + continue + + if content_type == "application/vnd.microsoft.teams.file.download.info": + # File consent-free download: content carries a pre-authed + # SharePoint downloadUrl plus the real file type. + content = getattr(att, "content", None) + if not isinstance(content, dict): + content = getattr(content, "__dict__", None) or {} + download_url = content.get("downloadUrl") or content.get("download_url") + file_type = (content.get("fileType") or content.get("file_type") or "").lstrip(".") + if not download_url: + continue + filename = att_name or (f"document.{file_type}" if file_type else "document") + try: + data = await self._fetch_attachment_bytes(download_url) + cached = cache_media_bytes(data, filename=filename, mime_type="") + if cached: + media_urls.append(cached.path) + media_types.append(cached.media_type) + media_kinds.append(cached.kind) + else: + logger.warning( + "[teams] Unsupported document type for attachment '%s', skipping", + filename, + ) + except Exception as e: + logger.warning("[teams] Failed to cache file attachment '%s': %s", filename, e) + continue + if content_url and content_type.startswith("image/"): try: cached = await cache_image_from_url(content_url) if cached: media_urls.append(cached) media_types.append(content_type) + media_kinds.append("image") except Exception as e: logger.warning("[teams] Failed to cache image attachment: %s", e) + continue - msg_type = MessageType.PHOTO if media_urls else MessageType.TEXT + if content_url: + # Direct-URL non-image attachment (video/audio/document). + try: + data = await self._fetch_attachment_bytes(content_url) + cached = cache_media_bytes( + data, filename=att_name, mime_type=content_type + ) + if cached: + media_urls.append(cached.path) + media_types.append(cached.media_type) + media_kinds.append(cached.kind) + except Exception as e: + logger.warning( + "[teams] Failed to cache attachment '%s' (%s): %s", + att_name or content_url, content_type, e, + ) + + # Classification: DOCUMENT wins over PHOTO/VIDEO/AUDIO for mixed + # attachments — run.py's image handling keys off the per-path image/* + # mime types regardless of message_type, but document-context + # injection gates strictly on MessageType.DOCUMENT (same precedence + # as Email/Signal, PR #44695). + if "document" in media_kinds: + msg_type = MessageType.DOCUMENT + elif "image" in media_kinds: + msg_type = MessageType.PHOTO + elif "video" in media_kinds: + msg_type = MessageType.VIDEO + elif "audio" in media_kinds: + msg_type = MessageType.AUDIO + else: + msg_type = MessageType.TEXT event = MessageEvent( text=text, diff --git a/tests/gateway/test_teams.py b/tests/gateway/test_teams.py index b9f575ef9f5..d4f56104a6a 100644 --- a/tests/gateway/test_teams.py +++ b/tests/gateway/test_teams.py @@ -713,6 +713,152 @@ class TestTeamsMessageHandling: assert adapter.handle_message.await_count == 1 +class TestTeamsAttachmentClassification: + """Document attachments must set MessageType.DOCUMENT so run.py's + document-context injection surfaces the cached file to the agent + (same bug class as Signal/Email/SimpleX, PR #44695).""" + + def _make_adapter(self): + adapter = TeamsAdapter(_make_config( + client_id="bot-id", client_secret="secret", tenant_id="tenant", + )) + adapter._app = MagicMock() + adapter._app.id = "bot-id" + adapter.handle_message = AsyncMock() + return adapter + + def _make_activity(self, attachments, text="see attached"): + activity = MagicMock() + activity.text = text + activity.id = "activity-att-001" + activity.from_ = MagicMock() + activity.from_.id = "user-123" + activity.from_.aad_object_id = "aad-456" + activity.from_.name = "Test User" + activity.conversation = MagicMock() + activity.conversation.id = "19:abc@thread.v2" + activity.conversation.conversation_type = "personal" + activity.conversation.name = "Test Chat" + activity.conversation.tenant_id = "tenant-789" + activity.attachments = attachments + return activity + + def _make_ctx(self, activity): + ctx = MagicMock() + ctx.activity = activity + return ctx + + def _file_download_attachment(self, name="report.pdf", file_type="pdf"): + att = MagicMock() + att.content_type = "application/vnd.microsoft.teams.file.download.info" + att.content_url = None + att.name = name + att.content = { + "downloadUrl": "https://contoso.sharepoint.com/download/x", + "fileType": file_type, + } + return att + + def _image_attachment(self): + att = MagicMock() + att.content_type = "image/png" + att.content_url = "https://smba.example.com/img.png" + att.name = "img.png" + return att + + def _html_body_attachment(self): + # Teams mirrors the message body as a text/html attachment + att = MagicMock() + att.content_type = "text/html" + att.content_url = None + att.name = "" + return att + + @pytest.mark.anyio + async def test_file_download_info_sets_document_type(self): + from gateway.platforms.base import MessageType + + adapter = self._make_adapter() + adapter._fetch_attachment_bytes = AsyncMock(return_value=b"%PDF-1.4 fake") + + activity = self._make_activity([self._file_download_attachment()]) + await adapter._on_message(self._make_ctx(activity)) + + event = adapter.handle_message.call_args[0][0] + assert event.message_type == MessageType.DOCUMENT, ( + f"Expected DOCUMENT, got {event.message_type}. " + "Documents must be classified as DOCUMENT so run.py injects file context." + ) + assert len(event.media_urls) == 1 + assert event.media_types == ["application/pdf"] + + @pytest.mark.anyio + async def test_mixed_image_and_document_prefers_document(self): + from gateway.platforms.base import MessageType + + adapter = self._make_adapter() + adapter._fetch_attachment_bytes = AsyncMock(return_value=b"%PDF-1.4 fake") + + async def fake_cache_image(url, *a, **kw): + return "/tmp/img.png" + + with pytest.MonkeyPatch.context() as mp: + mp.setattr(_teams_mod, "cache_image_from_url", fake_cache_image) + activity = self._make_activity([ + self._image_attachment(), + self._file_download_attachment(), + ]) + await adapter._on_message(self._make_ctx(activity)) + + event = adapter.handle_message.call_args[0][0] + assert event.message_type == MessageType.DOCUMENT + assert len(event.media_urls) == 2 + + @pytest.mark.anyio + async def test_html_body_attachment_stays_text(self): + from gateway.platforms.base import MessageType + + adapter = self._make_adapter() + activity = self._make_activity([self._html_body_attachment()]) + await adapter._on_message(self._make_ctx(activity)) + + event = adapter.handle_message.call_args[0][0] + assert event.message_type == MessageType.TEXT + assert event.media_urls == [] + + @pytest.mark.anyio + async def test_image_only_still_photo(self): + from gateway.platforms.base import MessageType + + adapter = self._make_adapter() + + async def fake_cache_image(url, *a, **kw): + return "/tmp/img.png" + + with pytest.MonkeyPatch.context() as mp: + mp.setattr(_teams_mod, "cache_image_from_url", fake_cache_image) + activity = self._make_activity([self._image_attachment()]) + await adapter._on_message(self._make_ctx(activity)) + + event = adapter.handle_message.call_args[0][0] + assert event.message_type == MessageType.PHOTO + assert event.media_urls == ["/tmp/img.png"] + + @pytest.mark.anyio + async def test_download_failure_degrades_to_text(self): + from gateway.platforms.base import MessageType + + adapter = self._make_adapter() + adapter._fetch_attachment_bytes = AsyncMock(side_effect=Exception("boom")) + + activity = self._make_activity([self._file_download_attachment()]) + await adapter._on_message(self._make_ctx(activity)) + + event = adapter.handle_message.call_args[0][0] + assert event.message_type == MessageType.TEXT + assert event.media_urls == [] + + # ── _standalone_send (out-of-process cron delivery) ──────────────────────