fix(teams): cache document/video/audio attachments and classify as DOCUMENT (#44778)

The Teams adapter only handled image/* attachments — documents (the
application/vnd.microsoft.teams.file.download.info consent-free download
payload and any direct-URL non-image attachment) never reached media_urls
at all, so run.py's document-context injection had nothing to surface.
Completes the class-wide sweep from PR #44695 (Signal/Email/SimpleX).

- download.info attachments: fetch the pre-authed SharePoint downloadUrl
  (SSRF-guarded, same guard chain as base.py cache_*_from_url) and route
  through cache_media_bytes
- direct-URL non-image attachments: same fetch + classify path
- skip Teams' text/html message-body mirror and adaptive-card attachments
- DOCUMENT > PHOTO > VIDEO > AUDIO precedence for mixed attachments,
  matching the Email precedence rationale from #44695
This commit is contained in:
Teknium 2026-06-12 02:05:41 -07:00 committed by GitHub
parent 7ba5df0d52
commit 0fd34e8c5a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 249 additions and 3 deletions

View file

@ -96,6 +96,7 @@ from gateway.platforms.base import (
MessageType,
SendResult,
cache_image_from_url,
cache_media_bytes,
)
logger = logging.getLogger(__name__)
@ -725,6 +726,34 @@ class TeamsAdapter(BasePlatformAdapter):
self._mark_disconnected()
logger.info("[teams] Disconnected")
async def _fetch_attachment_bytes(self, url: str, timeout: float = 30.0) -> bytes:
"""Download attachment bytes with SSRF protection.
Teams file attachments carry pre-authenticated SharePoint download
URLs (no extra auth header needed). Validates the URL against the
SSRF guard and follows redirects through the shared redirect guard,
matching the cache_*_from_url helpers in gateway.platforms.base.
"""
from tools.url_safety import is_safe_url
from gateway.platforms.base import _ssrf_redirect_guard
if not is_safe_url(url):
raise ValueError("Blocked unsafe attachment URL (SSRF protection)")
import httpx
async with httpx.AsyncClient(
timeout=timeout,
follow_redirects=True,
event_hooks={"response": [_ssrf_redirect_guard]},
) as client:
response = await client.get(
url,
headers={"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)"},
)
response.raise_for_status()
return response.content
async def _on_message(self, ctx: ActivityContext[MessageActivity]) -> None:
"""Process an incoming Teams message and dispatch to the gateway."""
activity = ctx.activity
@ -779,22 +808,93 @@ class TeamsAdapter(BasePlatformAdapter):
guild_id=getattr(conv, "tenant_id", None) or self._tenant_id,
)
# Handle image attachments
# Handle attachments (images, documents, video, audio)
media_urls = []
media_types = []
media_kinds = []
for att in getattr(activity, "attachments", None) or []:
content_url = getattr(att, "content_url", None)
content_type = getattr(att, "content_type", None) or ""
content_type = (getattr(att, "content_type", None) or "").lower()
att_name = getattr(att, "name", None) or ""
# Skip non-file payloads: Teams mirrors the message body as a
# text/html attachment on every message, and adaptive/hero cards
# arrive as application/vnd.microsoft.card.* attachments.
if content_type in ("text/html", "text/plain") and not content_url:
continue
if content_type.startswith("application/vnd.microsoft.card"):
continue
if content_type == "application/vnd.microsoft.teams.file.download.info":
# File consent-free download: content carries a pre-authed
# SharePoint downloadUrl plus the real file type.
content = getattr(att, "content", None)
if not isinstance(content, dict):
content = getattr(content, "__dict__", None) or {}
download_url = content.get("downloadUrl") or content.get("download_url")
file_type = (content.get("fileType") or content.get("file_type") or "").lstrip(".")
if not download_url:
continue
filename = att_name or (f"document.{file_type}" if file_type else "document")
try:
data = await self._fetch_attachment_bytes(download_url)
cached = cache_media_bytes(data, filename=filename, mime_type="")
if cached:
media_urls.append(cached.path)
media_types.append(cached.media_type)
media_kinds.append(cached.kind)
else:
logger.warning(
"[teams] Unsupported document type for attachment '%s', skipping",
filename,
)
except Exception as e:
logger.warning("[teams] Failed to cache file attachment '%s': %s", filename, e)
continue
if content_url and content_type.startswith("image/"):
try:
cached = await cache_image_from_url(content_url)
if cached:
media_urls.append(cached)
media_types.append(content_type)
media_kinds.append("image")
except Exception as e:
logger.warning("[teams] Failed to cache image attachment: %s", e)
continue
msg_type = MessageType.PHOTO if media_urls else MessageType.TEXT
if content_url:
# Direct-URL non-image attachment (video/audio/document).
try:
data = await self._fetch_attachment_bytes(content_url)
cached = cache_media_bytes(
data, filename=att_name, mime_type=content_type
)
if cached:
media_urls.append(cached.path)
media_types.append(cached.media_type)
media_kinds.append(cached.kind)
except Exception as e:
logger.warning(
"[teams] Failed to cache attachment '%s' (%s): %s",
att_name or content_url, content_type, e,
)
# Classification: DOCUMENT wins over PHOTO/VIDEO/AUDIO for mixed
# attachments — run.py's image handling keys off the per-path image/*
# mime types regardless of message_type, but document-context
# injection gates strictly on MessageType.DOCUMENT (same precedence
# as Email/Signal, PR #44695).
if "document" in media_kinds:
msg_type = MessageType.DOCUMENT
elif "image" in media_kinds:
msg_type = MessageType.PHOTO
elif "video" in media_kinds:
msg_type = MessageType.VIDEO
elif "audio" in media_kinds:
msg_type = MessageType.AUDIO
else:
msg_type = MessageType.TEXT
event = MessageEvent(
text=text,

View file

@ -713,6 +713,152 @@ class TestTeamsMessageHandling:
assert adapter.handle_message.await_count == 1
class TestTeamsAttachmentClassification:
"""Document attachments must set MessageType.DOCUMENT so run.py's
document-context injection surfaces the cached file to the agent
(same bug class as Signal/Email/SimpleX, PR #44695)."""
def _make_adapter(self):
adapter = TeamsAdapter(_make_config(
client_id="bot-id", client_secret="secret", tenant_id="tenant",
))
adapter._app = MagicMock()
adapter._app.id = "bot-id"
adapter.handle_message = AsyncMock()
return adapter
def _make_activity(self, attachments, text="see attached"):
activity = MagicMock()
activity.text = text
activity.id = "activity-att-001"
activity.from_ = MagicMock()
activity.from_.id = "user-123"
activity.from_.aad_object_id = "aad-456"
activity.from_.name = "Test User"
activity.conversation = MagicMock()
activity.conversation.id = "19:abc@thread.v2"
activity.conversation.conversation_type = "personal"
activity.conversation.name = "Test Chat"
activity.conversation.tenant_id = "tenant-789"
activity.attachments = attachments
return activity
def _make_ctx(self, activity):
ctx = MagicMock()
ctx.activity = activity
return ctx
def _file_download_attachment(self, name="report.pdf", file_type="pdf"):
att = MagicMock()
att.content_type = "application/vnd.microsoft.teams.file.download.info"
att.content_url = None
att.name = name
att.content = {
"downloadUrl": "https://contoso.sharepoint.com/download/x",
"fileType": file_type,
}
return att
def _image_attachment(self):
att = MagicMock()
att.content_type = "image/png"
att.content_url = "https://smba.example.com/img.png"
att.name = "img.png"
return att
def _html_body_attachment(self):
# Teams mirrors the message body as a text/html attachment
att = MagicMock()
att.content_type = "text/html"
att.content_url = None
att.name = ""
return att
@pytest.mark.anyio
async def test_file_download_info_sets_document_type(self):
from gateway.platforms.base import MessageType
adapter = self._make_adapter()
adapter._fetch_attachment_bytes = AsyncMock(return_value=b"%PDF-1.4 fake")
activity = self._make_activity([self._file_download_attachment()])
await adapter._on_message(self._make_ctx(activity))
event = adapter.handle_message.call_args[0][0]
assert event.message_type == MessageType.DOCUMENT, (
f"Expected DOCUMENT, got {event.message_type}. "
"Documents must be classified as DOCUMENT so run.py injects file context."
)
assert len(event.media_urls) == 1
assert event.media_types == ["application/pdf"]
@pytest.mark.anyio
async def test_mixed_image_and_document_prefers_document(self):
from gateway.platforms.base import MessageType
adapter = self._make_adapter()
adapter._fetch_attachment_bytes = AsyncMock(return_value=b"%PDF-1.4 fake")
async def fake_cache_image(url, *a, **kw):
return "/tmp/img.png"
with pytest.MonkeyPatch.context() as mp:
mp.setattr(_teams_mod, "cache_image_from_url", fake_cache_image)
activity = self._make_activity([
self._image_attachment(),
self._file_download_attachment(),
])
await adapter._on_message(self._make_ctx(activity))
event = adapter.handle_message.call_args[0][0]
assert event.message_type == MessageType.DOCUMENT
assert len(event.media_urls) == 2
@pytest.mark.anyio
async def test_html_body_attachment_stays_text(self):
from gateway.platforms.base import MessageType
adapter = self._make_adapter()
activity = self._make_activity([self._html_body_attachment()])
await adapter._on_message(self._make_ctx(activity))
event = adapter.handle_message.call_args[0][0]
assert event.message_type == MessageType.TEXT
assert event.media_urls == []
@pytest.mark.anyio
async def test_image_only_still_photo(self):
from gateway.platforms.base import MessageType
adapter = self._make_adapter()
async def fake_cache_image(url, *a, **kw):
return "/tmp/img.png"
with pytest.MonkeyPatch.context() as mp:
mp.setattr(_teams_mod, "cache_image_from_url", fake_cache_image)
activity = self._make_activity([self._image_attachment()])
await adapter._on_message(self._make_ctx(activity))
event = adapter.handle_message.call_args[0][0]
assert event.message_type == MessageType.PHOTO
assert event.media_urls == ["/tmp/img.png"]
@pytest.mark.anyio
async def test_download_failure_degrades_to_text(self):
from gateway.platforms.base import MessageType
adapter = self._make_adapter()
adapter._fetch_attachment_bytes = AsyncMock(side_effect=Exception("boom"))
activity = self._make_activity([self._file_download_attachment()])
await adapter._on_message(self._make_ctx(activity))
event = adapter.handle_message.call_args[0][0]
assert event.message_type == MessageType.TEXT
assert event.media_urls == []
# ── _standalone_send (out-of-process cron delivery) ──────────────────────