fix(gateway): classify SimpleX non-image/non-audio files as DOCUMENT

SimpleX tagged unknown files application/octet-stream in media_types
but classification only handled audio/image, leaving msg_type TEXT —
run.py never injected the document context. Same bug class as #12845.
This commit is contained in:
Teknium 2026-06-11 22:58:34 -07:00
parent f03f161b39
commit 74180ebf0b
2 changed files with 76 additions and 0 deletions

View file

@ -625,6 +625,11 @@ class SimplexAdapter(BasePlatformAdapter):
msg_type = MessageType.VOICE
elif any(mt.startswith("image/") for mt in media_types):
msg_type = MessageType.PHOTO
else:
# Catch-all: non-image/non-audio files (tagged
# application/octet-stream above) are documents so run.py's
# document-context injection surfaces the file to the agent.
msg_type = MessageType.DOCUMENT
# Timestamp
ts_str = meta.get("itemTs") or meta.get("createdAt", "")

View file

@ -396,3 +396,74 @@ def test_register_calls_register_platform():
assert callable(kwargs["setup_fn"])
# SimpleX uses opaque IDs only — no PII to redact.
assert kwargs["pii_safe"] is True
# ---------------------------------------------------------------------------
# Inbound attachment message type classification
# ---------------------------------------------------------------------------
def _make_file_chat_item(file_path: str, file_name: str) -> dict:
"""Minimal direct-chat rcvMsgContent item carrying a completed file."""
return {
"chatInfo": {
"type": "direct",
"contact": {"contactId": 42, "localDisplayName": "tester"},
},
"chatItem": {
"chatDir": {"type": "directRcv"},
"meta": {"itemTs": "2026-01-01T00:00:00Z"},
"content": {
"type": "rcvMsgContent",
"msgContent": {"type": "file", "text": "here you go"},
},
"file": {
"fileId": 7,
"fileName": file_name,
"fileSource": {"filePath": file_path},
},
},
}
@pytest.mark.asyncio
async def test_document_file_sets_document_type():
"""A non-image/non-audio file must classify as DOCUMENT, not TEXT,
so run.py's document-context injection surfaces the path to the agent."""
from gateway.config import PlatformConfig
from gateway.platforms.base import MessageType
cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"})
adapter = SimplexAdapter(cfg)
dispatched = []
async def _capture(event):
dispatched.append(event)
adapter.handle_message = _capture
await adapter._handle_chat_item(_make_file_chat_item("/tmp/report.pdf", "report.pdf"))
assert dispatched, "_handle_chat_item did not dispatch any event"
assert dispatched[0].message_type == MessageType.DOCUMENT
assert dispatched[0].media_urls == ["/tmp/report.pdf"]
assert dispatched[0].media_types == ["application/octet-stream"]
@pytest.mark.asyncio
async def test_image_file_still_sets_photo_type():
"""Regression guard: image files keep classifying as PHOTO after the
document catch-all was added."""
from gateway.config import PlatformConfig
from gateway.platforms.base import MessageType
cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"})
adapter = SimplexAdapter(cfg)
dispatched = []
async def _capture(event):
dispatched.append(event)
adapter.handle_message = _capture
await adapter._handle_chat_item(_make_file_chat_item("/tmp/pic.jpg", "pic.jpg"))
assert dispatched, "_handle_chat_item did not dispatch any event"
assert dispatched[0].message_type == MessageType.PHOTO