diff --git a/plugins/platforms/photon/README.md b/plugins/platforms/photon/README.md index f4424b4e74a..7865649cdb6 100644 --- a/plugins/platforms/photon/README.md +++ b/plugins/platforms/photon/README.md @@ -114,14 +114,18 @@ All env vars are documented in `plugin.yaml`. The most important: | `PHOTON_HOME_CHANNEL` | (unset) | Default space id for cron delivery | | `PHOTON_ALLOWED_USERS` | (unset) | Comma-separated E.164 allowlist | | `PHOTON_REQUIRE_MENTION` | false | Gate group chats on a wake word | +| `PHOTON_MAX_INLINE_ATTACHMENT_BYTES` | 20 MB | Max inbound attachment size the sidecar reads & inlines | -## Limitations (current Photon API) +## Attachments & limitations -- **Inbound attachments are metadata only.** Inbound events carry the - filename + MIME type; the plugin surfaces a text marker - (`[Photon attachment received: …]`) so the agent knows something arrived. - The SDK exposes attachment bytes via `content.read()`/`stream()`, so - downloading them is a sidecar follow-up. +- **Inbound attachments are downloaded.** The sidecar reads the bytes + (`content.read()`) and base64-inlines them on the NDJSON event; the adapter + caches them to the shared media cache and populates `media_urls` / + `media_types`, so the agent sees the real image/file (vision included) — + parity with the BlueBubbles iMessage channel. Attachments larger than + `PHOTON_MAX_INLINE_ATTACHMENT_BYTES` (default 20 MB), or any byte read that + fails, fall back to a text marker (`[Photon attachment received: …]`) so the + agent still knows something arrived. - **Outbound attachments are supported.** Images, voice notes, video, and documents are sent via `space.send(attachment(...))` / `space.send(voice(...))` through the sidecar's `/send-attachment` diff --git a/plugins/platforms/photon/adapter.py b/plugins/platforms/photon/adapter.py index 3a057b17c39..ae0ab8b25be 100644 --- a/plugins/platforms/photon/adapter.py +++ b/plugins/platforms/photon/adapter.py @@ -24,6 +24,7 @@ Outbound: from __future__ import annotations import asyncio +import base64 import json import logging import os @@ -36,7 +37,7 @@ import sys import time from datetime import datetime, timezone from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional if TYPE_CHECKING: # Type checkers see ``httpx`` as the always-imported module, so every use @@ -422,8 +423,13 @@ class PhotonAdapter(BasePlatformAdapter): "space": {"id": "...", "type": "dm"|"group", "phone": "+E164"}, "sender": {"id": "+E164"}, "content": {"type": "text", "text": "..."} - | {"type": "attachment", "name", "mimeType", "size"}, + | {"type": "attachment", "id", "name", "mimeType", + "size", "data"?, "encoding"?}, "timestamp": "2026-05-14T19:06:32.000Z" + + Attachment content carries the bytes inline as base64 ``data`` (with + ``encoding == "base64"``) when the sidecar could read them within its + size cap; otherwise only metadata is present and we surface a marker. } """ space = event.get("space") or {} @@ -449,6 +455,11 @@ class PhotonAdapter(BasePlatformAdapter): except ValueError: timestamp = datetime.now(tz=timezone.utc) + # Media attachments (local cached paths) handed to the agent via the + # gateway's image-routing path, exactly like the BlueBubbles channel. + media_urls: List[str] = [] + media_types: List[str] = [] + ctype = content.get("type") if ctype == "text": text = content.get("text") or "" @@ -456,8 +467,20 @@ class PhotonAdapter(BasePlatformAdapter): elif ctype == "attachment": name = content.get("name") or "(unnamed)" mime = content.get("mimeType") or "" - text = f"[Photon attachment received: {name} ({mime})]" mtype = _attachment_message_type(mime) + cached = _cache_inbound_attachment(content, name, mime) + if cached: + media_urls.append(cached) + media_types.append(mime or "application/octet-stream") + # The real bytes are attached, so the agent sees the media + # itself — a short marker is enough text, and it keeps group + # mention-gating consistent with plain messages. + text = "(attachment)" + else: + # No bytes (over the sidecar cap, a failed read, or a caching + # failure) — fall back to a metadata marker so the agent still + # knows something arrived. + text = f"[Photon attachment received: {name} ({mime})]" else: text = f"[Photon content type not handled: {ctype}]" mtype = MessageType.TEXT @@ -489,6 +512,8 @@ class PhotonAdapter(BasePlatformAdapter): message_id=event.get("messageId"), raw_message=event, timestamp=timestamp, + media_urls=media_urls, + media_types=media_types, ) await self.handle_message(message_event) @@ -819,6 +844,77 @@ def _attachment_message_type(mime: str) -> MessageType: return MessageType.DOCUMENT +# MIME → file-extension maps for caching inbound attachment bytes. These mirror +# the BlueBubbles iMessage channel so both adapters name cached media the same. +_IMAGE_EXT_BY_MIME = { + "image/jpeg": ".jpg", + "image/png": ".png", + "image/gif": ".gif", + "image/webp": ".webp", + "image/heic": ".jpg", + "image/heif": ".jpg", + "image/tiff": ".jpg", +} +_AUDIO_EXT_BY_MIME = { + "audio/mp3": ".mp3", + "audio/mpeg": ".mp3", + "audio/ogg": ".ogg", + "audio/wav": ".wav", + "audio/x-caf": ".mp3", + "audio/mp4": ".m4a", + "audio/aac": ".m4a", +} + + +def _cache_inbound_attachment( + content: Dict[str, Any], name: str, mime: str +) -> Optional[str]: + """Decode a base64-inlined inbound attachment and cache it locally. + + The sidecar inlines the attachment bytes as ``content["data"]`` (base64). + We decode them and route to the shared media cache by MIME type, returning + the cached absolute path so the caller can populate ``media_urls`` (which + the gateway then hands to the model). Returns ``None`` when there are no + bytes (over the sidecar's inline cap or a failed read) or when caching + fails, so the caller can fall back to a text marker. + """ + data_b64 = content.get("data") + if not data_b64: + return None + try: + raw = base64.b64decode(data_b64) + except (ValueError, TypeError) as exc: + logger.warning("[photon] failed to decode inbound attachment bytes: %s", exc) + return None + + from gateway.platforms.base import ( + cache_audio_from_bytes, + cache_document_from_bytes, + cache_image_from_bytes, + ) + + mime = (mime or "").lower() + # Prefer the real extension from the filename; fall back to the MIME map. + suffix = Path(name).suffix if name else "" + try: + if mime.startswith("image/"): + ext = suffix or _IMAGE_EXT_BY_MIME.get(mime, ".jpg") + try: + return cache_image_from_bytes(raw, ext) + except ValueError: + # Bytes don't look like a supported image (e.g. HEIC magic) — + # still deliver them as a document rather than dropping them. + return cache_document_from_bytes(raw, name) + if mime.startswith("audio/"): + ext = suffix or _AUDIO_EXT_BY_MIME.get(mime, ".mp3") + return cache_audio_from_bytes(raw, ext) + # Video, application/*, and everything else → document cache. + return cache_document_from_bytes(raw, name) + except Exception as exc: + logger.warning("[photon] failed to cache inbound attachment %s: %s", name, exc) + return None + + # --------------------------------------------------------------------------- # Standalone (out-of-process) send for cron deliveries when the gateway # is not co-resident. Reuses a live sidecar already listening on the diff --git a/plugins/platforms/photon/cli.py b/plugins/platforms/photon/cli.py index ea79aabd6b6..77e571b6025 100644 --- a/plugins/platforms/photon/cli.py +++ b/plugins/platforms/photon/cli.py @@ -25,6 +25,8 @@ import subprocess import sys from pathlib import Path +from hermes_cli.colors import Colors, color + from . import auth as photon_auth _SIDECAR_DIR = Path(__file__).parent / "sidecar" @@ -175,19 +177,18 @@ def _cmd_setup(args: argparse.Namespace) -> int: # 4. Register the operator's phone number as a Spectrum user (idempotent). phone = args.phone or _prompt( - "[4/5] Your iMessage phone number (E.164, e.g. +15551234567): " + color( + "[4/5] Your iMessage phone number (E.164, e.g. +15551234567): ", + Colors.CYAN, + ) ) if not phone: print(" Skipped user registration (no phone given). Re-run with --phone later.") else: + # Name/email are optional and never prompted for — pass --first-name / + # --email if you want them sent to the dashboard. first_name = args.first_name email = args.email - # The dashboard may require a name/email; prompt interactively when - # we have a TTY and they weren't supplied, but allow skipping. - if first_name is None: - first_name = _prompt(" First name (optional, Enter to skip): ") or None - if email is None: - email = _prompt(" Email (optional, Enter to skip): ") or None try: _user, created = photon_auth.register_user_if_absent( token, dashboard_id, diff --git a/plugins/platforms/photon/sidecar/index.mjs b/plugins/platforms/photon/sidecar/index.mjs index 917560ddbf4..60f5ba2a8c8 100644 --- a/plugins/platforms/photon/sidecar/index.mjs +++ b/plugins/platforms/photon/sidecar/index.mjs @@ -48,6 +48,14 @@ const port = parseInt(process.env.PHOTON_SIDECAR_PORT || "8789", 10); const bind = process.env.PHOTON_SIDECAR_BIND || "127.0.0.1"; const sharedToken = process.env.PHOTON_SIDECAR_TOKEN; +// Inbound attachments are read into memory and base64-inlined on the NDJSON +// event so the Python adapter can cache the real bytes (and the agent can see +// the image). Cap the size we inline — above it we forward metadata only and +// the adapter surfaces a text marker, so one large video can't balloon a +// single NDJSON line. Override via PHOTON_MAX_INLINE_ATTACHMENT_BYTES. +const MAX_INLINE_ATTACHMENT_BYTES = + Number(process.env.PHOTON_MAX_INLINE_ATTACHMENT_BYTES) || 20 * 1024 * 1024; + if (!projectId || !projectSecret || !sharedToken) { console.error( "photon-sidecar: PHOTON_PROJECT_ID, PHOTON_PROJECT_SECRET and " + @@ -118,7 +126,7 @@ async function deliver(line) { } } -function normalizeContent(content) { +async function normalizeContent(content) { if (!content || typeof content !== "object") { return { type: "unknown" }; } @@ -126,20 +134,55 @@ function normalizeContent(content) { return { type: "text", text: content.text || "" }; } if (content.type === "attachment") { - // Bytes are reachable via content.read()/stream(); we surface metadata - // here and leave byte download to a follow-up (keeps the event small). - return { + const meta = { type: "attachment", id: content.id ?? null, name: content.name ?? null, mimeType: content.mimeType ?? null, size: typeof content.size === "number" ? content.size : null, }; + // Read the bytes eagerly and base64-inline them as `data` so the Python + // adapter can cache the real file (the agent then sees the image itself). + // The spectrum-ts attachment object may not outlive this stream + // iteration, so a lazy/on-demand fetch isn't safe. Over-cap attachments + // (when size is known up front) are forwarded as metadata only and the + // adapter falls back to a text marker. A read failure must never break + // the inbound loop — we just drop `data` and forward metadata. + if (meta.size !== null && meta.size > MAX_INLINE_ATTACHMENT_BYTES) { + console.error( + `photon-sidecar: attachment ${meta.name ?? meta.id} (${meta.size} bytes) ` + + `exceeds inline cap ${MAX_INLINE_ATTACHMENT_BYTES}; forwarding metadata only` + ); + return meta; + } + if (typeof content.read === "function") { + try { + const buf = await content.read(); + // Guard the case where size was unknown but the bytes turn out to be + // over the cap. + if (buf && buf.length > MAX_INLINE_ATTACHMENT_BYTES) { + console.error( + `photon-sidecar: attachment ${meta.name ?? meta.id} (${buf.length} bytes) ` + + `exceeds inline cap after read; forwarding metadata only` + ); + return meta; + } + meta.data = Buffer.from(buf).toString("base64"); + meta.encoding = "base64"; + } catch (e) { + console.error( + "photon-sidecar: failed to read attachment bytes " + + "(forwarding metadata only): " + + (e && e.stack ? e.stack : String(e)) + ); + } + } + return meta; } return { type: content.type || "unknown" }; } -function normalizeEvent(space, message) { +async function normalizeEvent(space, message) { try { const msgSpace = message.space || {}; const ts = message.timestamp; @@ -153,7 +196,7 @@ function normalizeEvent(space, message) { phone: space.phone ?? msgSpace.phone ?? null, }, sender: { id: message.sender ? message.sender.id : null }, - content: normalizeContent(message.content), + content: await normalizeContent(message.content), timestamp: ts instanceof Date ? ts.toISOString() : ts ? String(ts) : null, }; @@ -172,7 +215,7 @@ function normalizeEvent(space, message) { if (message && message.direction && message.direction !== "inbound") { continue; } - const event = normalizeEvent(space, message); + const event = await normalizeEvent(space, message); if (!event) continue; await deliver(JSON.stringify(event)); } diff --git a/tests/plugins/platforms/photon/test_inbound.py b/tests/plugins/platforms/photon/test_inbound.py index 656e91fc630..f3d4bfa328e 100644 --- a/tests/plugins/platforms/photon/test_inbound.py +++ b/tests/plugins/platforms/photon/test_inbound.py @@ -6,7 +6,9 @@ sidecar-event parsing without spawning the Node sidecar or binding ports. """ from __future__ import annotations +import base64 import json +from pathlib import Path from typing import Any, Dict, List import pytest @@ -80,28 +82,112 @@ async def test_dispatch_group_type(monkeypatch: pytest.MonkeyPatch) -> None: assert captured[0].source.chat_type == "group" +# A real 1x1 transparent PNG (passes base.py's _looks_like_image magic check). +_PNG_1X1_B64 = ( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYPhf" + "DwAChwGA60e6kgAAAABJRU5ErkJggg==" +) + + +def _attachment_event( + content: Dict[str, Any], msg_id: str = "spc-msg-att" +) -> Dict[str, Any]: + return { + "messageId": msg_id, + "space": {"id": "+15551234567", "type": "dm", "phone": "+15551234567"}, + "sender": {"id": "+15551234567"}, + "content": {"type": "attachment", **content}, + "timestamp": "2026-05-14T19:06:32.000Z", + } + + @pytest.mark.asyncio -async def test_dispatch_attachment_surfaces_marker(monkeypatch: pytest.MonkeyPatch) -> None: +async def test_dispatch_attachment_without_bytes_surfaces_marker( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """No inline ``data`` (over cap / failed sidecar read) -> text marker, no media.""" adapter = _make_adapter(monkeypatch) captured = _capture(adapter, monkeypatch) - event = { - "messageId": "spc-msg-att", - "space": {"id": "+15551234567", "type": "dm", "phone": "+15551234567"}, - "sender": {"id": "+15551234567"}, - "content": { - "type": "attachment", - "name": "IMG_4127.HEIC", - "mimeType": "image/heic", - "size": 12345, - }, - "timestamp": "2026-05-14T19:06:32.000Z", - } + event = _attachment_event( + {"name": "IMG_4127.HEIC", "mimeType": "image/heic", "size": 12345} + ) await adapter._dispatch_inbound(event) assert len(captured) == 1 - assert "Photon attachment received" in captured[0].text - assert "IMG_4127.HEIC" in captured[0].text - assert captured[0].message_type == MessageType.PHOTO + ev = captured[0] + assert "Photon attachment received" in ev.text + assert "IMG_4127.HEIC" in ev.text + assert ev.message_type == MessageType.PHOTO + assert ev.media_urls == [] + assert ev.media_types == [] + + +@pytest.mark.asyncio +async def test_dispatch_attachment_downloads_image( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Inline base64 image bytes are decoded, cached, and exposed as media.""" + adapter = _make_adapter(monkeypatch) + captured = _capture(adapter, monkeypatch) + + raw = base64.b64decode(_PNG_1X1_B64) + event = _attachment_event( + { + "name": "photo.png", + "mimeType": "image/png", + "size": len(raw), + "data": _PNG_1X1_B64, + "encoding": "base64", + } + ) + await adapter._dispatch_inbound(event) + + assert len(captured) == 1 + ev = captured[0] + assert ev.message_type == MessageType.PHOTO + assert ev.media_types == ["image/png"] + assert len(ev.media_urls) == 1 + cached = Path(ev.media_urls[0]) + try: + assert cached.is_file() + assert cached.read_bytes() == raw + assert ev.text == "(attachment)" + finally: + cached.unlink(missing_ok=True) + + +@pytest.mark.asyncio +async def test_dispatch_attachment_downloads_document( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Non-image attachments route through the document cache as DOCUMENT.""" + adapter = _make_adapter(monkeypatch) + captured = _capture(adapter, monkeypatch) + + raw = b"%PDF-1.4 hermes test document" + event = _attachment_event( + { + "name": "report.pdf", + "mimeType": "application/pdf", + "size": len(raw), + "data": base64.b64encode(raw).decode("ascii"), + "encoding": "base64", + } + ) + await adapter._dispatch_inbound(event) + + assert len(captured) == 1 + ev = captured[0] + assert ev.message_type == MessageType.DOCUMENT + assert ev.media_types == ["application/pdf"] + assert len(ev.media_urls) == 1 + cached = Path(ev.media_urls[0]) + try: + assert cached.is_file() + assert cached.read_bytes() == raw + assert ev.text == "(attachment)" + finally: + cached.unlink(missing_ok=True) @pytest.mark.asyncio