mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-13 09:01:54 +00:00
fix(teams): cache document/video/audio attachments and classify as DOCUMENT (#44778)
The Teams adapter only handled image/* attachments — documents (the application/vnd.microsoft.teams.file.download.info consent-free download payload and any direct-URL non-image attachment) never reached media_urls at all, so run.py's document-context injection had nothing to surface. Completes the class-wide sweep from PR #44695 (Signal/Email/SimpleX). - download.info attachments: fetch the pre-authed SharePoint downloadUrl (SSRF-guarded, same guard chain as base.py cache_*_from_url) and route through cache_media_bytes - direct-URL non-image attachments: same fetch + classify path - skip Teams' text/html message-body mirror and adaptive-card attachments - DOCUMENT > PHOTO > VIDEO > AUDIO precedence for mixed attachments, matching the Email precedence rationale from #44695
This commit is contained in:
parent
7ba5df0d52
commit
0fd34e8c5a
2 changed files with 249 additions and 3 deletions
|
|
@ -96,6 +96,7 @@ from gateway.platforms.base import (
|
|||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_media_bytes,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -725,6 +726,34 @@ class TeamsAdapter(BasePlatformAdapter):
|
|||
self._mark_disconnected()
|
||||
logger.info("[teams] Disconnected")
|
||||
|
||||
async def _fetch_attachment_bytes(self, url: str, timeout: float = 30.0) -> bytes:
|
||||
"""Download attachment bytes with SSRF protection.
|
||||
|
||||
Teams file attachments carry pre-authenticated SharePoint download
|
||||
URLs (no extra auth header needed). Validates the URL against the
|
||||
SSRF guard and follows redirects through the shared redirect guard,
|
||||
matching the cache_*_from_url helpers in gateway.platforms.base.
|
||||
"""
|
||||
from tools.url_safety import is_safe_url
|
||||
from gateway.platforms.base import _ssrf_redirect_guard
|
||||
|
||||
if not is_safe_url(url):
|
||||
raise ValueError("Blocked unsafe attachment URL (SSRF protection)")
|
||||
|
||||
import httpx
|
||||
|
||||
async with httpx.AsyncClient(
|
||||
timeout=timeout,
|
||||
follow_redirects=True,
|
||||
event_hooks={"response": [_ssrf_redirect_guard]},
|
||||
) as client:
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
|
||||
async def _on_message(self, ctx: ActivityContext[MessageActivity]) -> None:
|
||||
"""Process an incoming Teams message and dispatch to the gateway."""
|
||||
activity = ctx.activity
|
||||
|
|
@ -779,22 +808,93 @@ class TeamsAdapter(BasePlatformAdapter):
|
|||
guild_id=getattr(conv, "tenant_id", None) or self._tenant_id,
|
||||
)
|
||||
|
||||
# Handle image attachments
|
||||
# Handle attachments (images, documents, video, audio)
|
||||
media_urls = []
|
||||
media_types = []
|
||||
media_kinds = []
|
||||
for att in getattr(activity, "attachments", None) or []:
|
||||
content_url = getattr(att, "content_url", None)
|
||||
content_type = getattr(att, "content_type", None) or ""
|
||||
content_type = (getattr(att, "content_type", None) or "").lower()
|
||||
att_name = getattr(att, "name", None) or ""
|
||||
|
||||
# Skip non-file payloads: Teams mirrors the message body as a
|
||||
# text/html attachment on every message, and adaptive/hero cards
|
||||
# arrive as application/vnd.microsoft.card.* attachments.
|
||||
if content_type in ("text/html", "text/plain") and not content_url:
|
||||
continue
|
||||
if content_type.startswith("application/vnd.microsoft.card"):
|
||||
continue
|
||||
|
||||
if content_type == "application/vnd.microsoft.teams.file.download.info":
|
||||
# File consent-free download: content carries a pre-authed
|
||||
# SharePoint downloadUrl plus the real file type.
|
||||
content = getattr(att, "content", None)
|
||||
if not isinstance(content, dict):
|
||||
content = getattr(content, "__dict__", None) or {}
|
||||
download_url = content.get("downloadUrl") or content.get("download_url")
|
||||
file_type = (content.get("fileType") or content.get("file_type") or "").lstrip(".")
|
||||
if not download_url:
|
||||
continue
|
||||
filename = att_name or (f"document.{file_type}" if file_type else "document")
|
||||
try:
|
||||
data = await self._fetch_attachment_bytes(download_url)
|
||||
cached = cache_media_bytes(data, filename=filename, mime_type="")
|
||||
if cached:
|
||||
media_urls.append(cached.path)
|
||||
media_types.append(cached.media_type)
|
||||
media_kinds.append(cached.kind)
|
||||
else:
|
||||
logger.warning(
|
||||
"[teams] Unsupported document type for attachment '%s', skipping",
|
||||
filename,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("[teams] Failed to cache file attachment '%s': %s", filename, e)
|
||||
continue
|
||||
|
||||
if content_url and content_type.startswith("image/"):
|
||||
try:
|
||||
cached = await cache_image_from_url(content_url)
|
||||
if cached:
|
||||
media_urls.append(cached)
|
||||
media_types.append(content_type)
|
||||
media_kinds.append("image")
|
||||
except Exception as e:
|
||||
logger.warning("[teams] Failed to cache image attachment: %s", e)
|
||||
continue
|
||||
|
||||
msg_type = MessageType.PHOTO if media_urls else MessageType.TEXT
|
||||
if content_url:
|
||||
# Direct-URL non-image attachment (video/audio/document).
|
||||
try:
|
||||
data = await self._fetch_attachment_bytes(content_url)
|
||||
cached = cache_media_bytes(
|
||||
data, filename=att_name, mime_type=content_type
|
||||
)
|
||||
if cached:
|
||||
media_urls.append(cached.path)
|
||||
media_types.append(cached.media_type)
|
||||
media_kinds.append(cached.kind)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[teams] Failed to cache attachment '%s' (%s): %s",
|
||||
att_name or content_url, content_type, e,
|
||||
)
|
||||
|
||||
# Classification: DOCUMENT wins over PHOTO/VIDEO/AUDIO for mixed
|
||||
# attachments — run.py's image handling keys off the per-path image/*
|
||||
# mime types regardless of message_type, but document-context
|
||||
# injection gates strictly on MessageType.DOCUMENT (same precedence
|
||||
# as Email/Signal, PR #44695).
|
||||
if "document" in media_kinds:
|
||||
msg_type = MessageType.DOCUMENT
|
||||
elif "image" in media_kinds:
|
||||
msg_type = MessageType.PHOTO
|
||||
elif "video" in media_kinds:
|
||||
msg_type = MessageType.VIDEO
|
||||
elif "audio" in media_kinds:
|
||||
msg_type = MessageType.AUDIO
|
||||
else:
|
||||
msg_type = MessageType.TEXT
|
||||
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
|
|
|
|||
|
|
@ -713,6 +713,152 @@ class TestTeamsMessageHandling:
|
|||
assert adapter.handle_message.await_count == 1
|
||||
|
||||
|
||||
class TestTeamsAttachmentClassification:
|
||||
"""Document attachments must set MessageType.DOCUMENT so run.py's
|
||||
document-context injection surfaces the cached file to the agent
|
||||
(same bug class as Signal/Email/SimpleX, PR #44695)."""
|
||||
|
||||
def _make_adapter(self):
|
||||
adapter = TeamsAdapter(_make_config(
|
||||
client_id="bot-id", client_secret="secret", tenant_id="tenant",
|
||||
))
|
||||
adapter._app = MagicMock()
|
||||
adapter._app.id = "bot-id"
|
||||
adapter.handle_message = AsyncMock()
|
||||
return adapter
|
||||
|
||||
def _make_activity(self, attachments, text="see attached"):
|
||||
activity = MagicMock()
|
||||
activity.text = text
|
||||
activity.id = "activity-att-001"
|
||||
activity.from_ = MagicMock()
|
||||
activity.from_.id = "user-123"
|
||||
activity.from_.aad_object_id = "aad-456"
|
||||
activity.from_.name = "Test User"
|
||||
activity.conversation = MagicMock()
|
||||
activity.conversation.id = "19:abc@thread.v2"
|
||||
activity.conversation.conversation_type = "personal"
|
||||
activity.conversation.name = "Test Chat"
|
||||
activity.conversation.tenant_id = "tenant-789"
|
||||
activity.attachments = attachments
|
||||
return activity
|
||||
|
||||
def _make_ctx(self, activity):
|
||||
ctx = MagicMock()
|
||||
ctx.activity = activity
|
||||
return ctx
|
||||
|
||||
def _file_download_attachment(self, name="report.pdf", file_type="pdf"):
|
||||
att = MagicMock()
|
||||
att.content_type = "application/vnd.microsoft.teams.file.download.info"
|
||||
att.content_url = None
|
||||
att.name = name
|
||||
att.content = {
|
||||
"downloadUrl": "https://contoso.sharepoint.com/download/x",
|
||||
"fileType": file_type,
|
||||
}
|
||||
return att
|
||||
|
||||
def _image_attachment(self):
|
||||
att = MagicMock()
|
||||
att.content_type = "image/png"
|
||||
att.content_url = "https://smba.example.com/img.png"
|
||||
att.name = "img.png"
|
||||
return att
|
||||
|
||||
def _html_body_attachment(self):
|
||||
# Teams mirrors the message body as a text/html attachment
|
||||
att = MagicMock()
|
||||
att.content_type = "text/html"
|
||||
att.content_url = None
|
||||
att.name = ""
|
||||
return att
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_file_download_info_sets_document_type(self):
|
||||
from gateway.platforms.base import MessageType
|
||||
|
||||
adapter = self._make_adapter()
|
||||
adapter._fetch_attachment_bytes = AsyncMock(return_value=b"%PDF-1.4 fake")
|
||||
|
||||
activity = self._make_activity([self._file_download_attachment()])
|
||||
await adapter._on_message(self._make_ctx(activity))
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.message_type == MessageType.DOCUMENT, (
|
||||
f"Expected DOCUMENT, got {event.message_type}. "
|
||||
"Documents must be classified as DOCUMENT so run.py injects file context."
|
||||
)
|
||||
assert len(event.media_urls) == 1
|
||||
assert event.media_types == ["application/pdf"]
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_mixed_image_and_document_prefers_document(self):
|
||||
from gateway.platforms.base import MessageType
|
||||
|
||||
adapter = self._make_adapter()
|
||||
adapter._fetch_attachment_bytes = AsyncMock(return_value=b"%PDF-1.4 fake")
|
||||
|
||||
async def fake_cache_image(url, *a, **kw):
|
||||
return "/tmp/img.png"
|
||||
|
||||
with pytest.MonkeyPatch.context() as mp:
|
||||
mp.setattr(_teams_mod, "cache_image_from_url", fake_cache_image)
|
||||
activity = self._make_activity([
|
||||
self._image_attachment(),
|
||||
self._file_download_attachment(),
|
||||
])
|
||||
await adapter._on_message(self._make_ctx(activity))
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.message_type == MessageType.DOCUMENT
|
||||
assert len(event.media_urls) == 2
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_html_body_attachment_stays_text(self):
|
||||
from gateway.platforms.base import MessageType
|
||||
|
||||
adapter = self._make_adapter()
|
||||
activity = self._make_activity([self._html_body_attachment()])
|
||||
await adapter._on_message(self._make_ctx(activity))
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.message_type == MessageType.TEXT
|
||||
assert event.media_urls == []
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_image_only_still_photo(self):
|
||||
from gateway.platforms.base import MessageType
|
||||
|
||||
adapter = self._make_adapter()
|
||||
|
||||
async def fake_cache_image(url, *a, **kw):
|
||||
return "/tmp/img.png"
|
||||
|
||||
with pytest.MonkeyPatch.context() as mp:
|
||||
mp.setattr(_teams_mod, "cache_image_from_url", fake_cache_image)
|
||||
activity = self._make_activity([self._image_attachment()])
|
||||
await adapter._on_message(self._make_ctx(activity))
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.message_type == MessageType.PHOTO
|
||||
assert event.media_urls == ["/tmp/img.png"]
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_download_failure_degrades_to_text(self):
|
||||
from gateway.platforms.base import MessageType
|
||||
|
||||
adapter = self._make_adapter()
|
||||
adapter._fetch_attachment_bytes = AsyncMock(side_effect=Exception("boom"))
|
||||
|
||||
activity = self._make_activity([self._file_download_attachment()])
|
||||
await adapter._on_message(self._make_ctx(activity))
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.message_type == MessageType.TEXT
|
||||
assert event.media_urls == []
|
||||
|
||||
|
||||
# ── _standalone_send (out-of-process cron delivery) ──────────────────────
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue