feat(photon): download and inline inbound attachments

This commit is contained in:
underthestars-zhy 2026-06-08 17:09:20 -07:00 committed by Teknium
parent b3aef57f21
commit 314af28e86
5 changed files with 269 additions and 39 deletions

View file

@ -114,14 +114,18 @@ All env vars are documented in `plugin.yaml`. The most important:
| `PHOTON_HOME_CHANNEL` | (unset) | Default space id for cron delivery |
| `PHOTON_ALLOWED_USERS` | (unset) | Comma-separated E.164 allowlist |
| `PHOTON_REQUIRE_MENTION` | false | Gate group chats on a wake word |
| `PHOTON_MAX_INLINE_ATTACHMENT_BYTES` | 20 MB | Max inbound attachment size the sidecar reads & inlines |
## Limitations (current Photon API)
## Attachments & limitations
- **Inbound attachments are metadata only.** Inbound events carry the
filename + MIME type; the plugin surfaces a text marker
(`[Photon attachment received: …]`) so the agent knows something arrived.
The SDK exposes attachment bytes via `content.read()`/`stream()`, so
downloading them is a sidecar follow-up.
- **Inbound attachments are downloaded.** The sidecar reads the bytes
(`content.read()`) and base64-inlines them on the NDJSON event; the adapter
caches them to the shared media cache and populates `media_urls` /
`media_types`, so the agent sees the real image/file (vision included) —
parity with the BlueBubbles iMessage channel. Attachments larger than
`PHOTON_MAX_INLINE_ATTACHMENT_BYTES` (default 20 MB), or any byte read that
fails, fall back to a text marker (`[Photon attachment received: …]`) so the
agent still knows something arrived.
- **Outbound attachments are supported.** Images, voice notes, video, and
documents are sent via `space.send(attachment(...))` /
`space.send(voice(...))` through the sidecar's `/send-attachment`

View file

@ -24,6 +24,7 @@ Outbound:
from __future__ import annotations
import asyncio
import base64
import json
import logging
import os
@ -36,7 +37,7 @@ import sys
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, Optional
from typing import TYPE_CHECKING, Any, Dict, List, Optional
if TYPE_CHECKING:
# Type checkers see ``httpx`` as the always-imported module, so every use
@ -422,8 +423,13 @@ class PhotonAdapter(BasePlatformAdapter):
"space": {"id": "...", "type": "dm"|"group", "phone": "+E164"},
"sender": {"id": "+E164"},
"content": {"type": "text", "text": "..."}
| {"type": "attachment", "name", "mimeType", "size"},
| {"type": "attachment", "id", "name", "mimeType",
"size", "data"?, "encoding"?},
"timestamp": "2026-05-14T19:06:32.000Z"
Attachment content carries the bytes inline as base64 ``data`` (with
``encoding == "base64"``) when the sidecar could read them within its
size cap; otherwise only metadata is present and we surface a marker.
}
"""
space = event.get("space") or {}
@ -449,6 +455,11 @@ class PhotonAdapter(BasePlatformAdapter):
except ValueError:
timestamp = datetime.now(tz=timezone.utc)
# Media attachments (local cached paths) handed to the agent via the
# gateway's image-routing path, exactly like the BlueBubbles channel.
media_urls: List[str] = []
media_types: List[str] = []
ctype = content.get("type")
if ctype == "text":
text = content.get("text") or ""
@ -456,8 +467,20 @@ class PhotonAdapter(BasePlatformAdapter):
elif ctype == "attachment":
name = content.get("name") or "(unnamed)"
mime = content.get("mimeType") or ""
text = f"[Photon attachment received: {name} ({mime})]"
mtype = _attachment_message_type(mime)
cached = _cache_inbound_attachment(content, name, mime)
if cached:
media_urls.append(cached)
media_types.append(mime or "application/octet-stream")
# The real bytes are attached, so the agent sees the media
# itself — a short marker is enough text, and it keeps group
# mention-gating consistent with plain messages.
text = "(attachment)"
else:
# No bytes (over the sidecar cap, a failed read, or a caching
# failure) — fall back to a metadata marker so the agent still
# knows something arrived.
text = f"[Photon attachment received: {name} ({mime})]"
else:
text = f"[Photon content type not handled: {ctype}]"
mtype = MessageType.TEXT
@ -489,6 +512,8 @@ class PhotonAdapter(BasePlatformAdapter):
message_id=event.get("messageId"),
raw_message=event,
timestamp=timestamp,
media_urls=media_urls,
media_types=media_types,
)
await self.handle_message(message_event)
@ -819,6 +844,77 @@ def _attachment_message_type(mime: str) -> MessageType:
return MessageType.DOCUMENT
# MIME → file-extension maps for caching inbound attachment bytes. These mirror
# the BlueBubbles iMessage channel so both adapters name cached media the same.
_IMAGE_EXT_BY_MIME = {
"image/jpeg": ".jpg",
"image/png": ".png",
"image/gif": ".gif",
"image/webp": ".webp",
"image/heic": ".jpg",
"image/heif": ".jpg",
"image/tiff": ".jpg",
}
_AUDIO_EXT_BY_MIME = {
"audio/mp3": ".mp3",
"audio/mpeg": ".mp3",
"audio/ogg": ".ogg",
"audio/wav": ".wav",
"audio/x-caf": ".mp3",
"audio/mp4": ".m4a",
"audio/aac": ".m4a",
}
def _cache_inbound_attachment(
content: Dict[str, Any], name: str, mime: str
) -> Optional[str]:
"""Decode a base64-inlined inbound attachment and cache it locally.
The sidecar inlines the attachment bytes as ``content["data"]`` (base64).
We decode them and route to the shared media cache by MIME type, returning
the cached absolute path so the caller can populate ``media_urls`` (which
the gateway then hands to the model). Returns ``None`` when there are no
bytes (over the sidecar's inline cap or a failed read) or when caching
fails, so the caller can fall back to a text marker.
"""
data_b64 = content.get("data")
if not data_b64:
return None
try:
raw = base64.b64decode(data_b64)
except (ValueError, TypeError) as exc:
logger.warning("[photon] failed to decode inbound attachment bytes: %s", exc)
return None
from gateway.platforms.base import (
cache_audio_from_bytes,
cache_document_from_bytes,
cache_image_from_bytes,
)
mime = (mime or "").lower()
# Prefer the real extension from the filename; fall back to the MIME map.
suffix = Path(name).suffix if name else ""
try:
if mime.startswith("image/"):
ext = suffix or _IMAGE_EXT_BY_MIME.get(mime, ".jpg")
try:
return cache_image_from_bytes(raw, ext)
except ValueError:
# Bytes don't look like a supported image (e.g. HEIC magic) —
# still deliver them as a document rather than dropping them.
return cache_document_from_bytes(raw, name)
if mime.startswith("audio/"):
ext = suffix or _AUDIO_EXT_BY_MIME.get(mime, ".mp3")
return cache_audio_from_bytes(raw, ext)
# Video, application/*, and everything else → document cache.
return cache_document_from_bytes(raw, name)
except Exception as exc:
logger.warning("[photon] failed to cache inbound attachment %s: %s", name, exc)
return None
# ---------------------------------------------------------------------------
# Standalone (out-of-process) send for cron deliveries when the gateway
# is not co-resident. Reuses a live sidecar already listening on the

View file

@ -25,6 +25,8 @@ import subprocess
import sys
from pathlib import Path
from hermes_cli.colors import Colors, color
from . import auth as photon_auth
_SIDECAR_DIR = Path(__file__).parent / "sidecar"
@ -175,19 +177,18 @@ def _cmd_setup(args: argparse.Namespace) -> int:
# 4. Register the operator's phone number as a Spectrum user (idempotent).
phone = args.phone or _prompt(
"[4/5] Your iMessage phone number (E.164, e.g. +15551234567): "
color(
"[4/5] Your iMessage phone number (E.164, e.g. +15551234567): ",
Colors.CYAN,
)
)
if not phone:
print(" Skipped user registration (no phone given). Re-run with --phone later.")
else:
# Name/email are optional and never prompted for — pass --first-name /
# --email if you want them sent to the dashboard.
first_name = args.first_name
email = args.email
# The dashboard may require a name/email; prompt interactively when
# we have a TTY and they weren't supplied, but allow skipping.
if first_name is None:
first_name = _prompt(" First name (optional, Enter to skip): ") or None
if email is None:
email = _prompt(" Email (optional, Enter to skip): ") or None
try:
_user, created = photon_auth.register_user_if_absent(
token, dashboard_id,

View file

@ -48,6 +48,14 @@ const port = parseInt(process.env.PHOTON_SIDECAR_PORT || "8789", 10);
const bind = process.env.PHOTON_SIDECAR_BIND || "127.0.0.1";
const sharedToken = process.env.PHOTON_SIDECAR_TOKEN;
// Inbound attachments are read into memory and base64-inlined on the NDJSON
// event so the Python adapter can cache the real bytes (and the agent can see
// the image). Cap the size we inline — above it we forward metadata only and
// the adapter surfaces a text marker, so one large video can't balloon a
// single NDJSON line. Override via PHOTON_MAX_INLINE_ATTACHMENT_BYTES.
const MAX_INLINE_ATTACHMENT_BYTES =
Number(process.env.PHOTON_MAX_INLINE_ATTACHMENT_BYTES) || 20 * 1024 * 1024;
if (!projectId || !projectSecret || !sharedToken) {
console.error(
"photon-sidecar: PHOTON_PROJECT_ID, PHOTON_PROJECT_SECRET and " +
@ -118,7 +126,7 @@ async function deliver(line) {
}
}
function normalizeContent(content) {
async function normalizeContent(content) {
if (!content || typeof content !== "object") {
return { type: "unknown" };
}
@ -126,20 +134,55 @@ function normalizeContent(content) {
return { type: "text", text: content.text || "" };
}
if (content.type === "attachment") {
// Bytes are reachable via content.read()/stream(); we surface metadata
// here and leave byte download to a follow-up (keeps the event small).
return {
const meta = {
type: "attachment",
id: content.id ?? null,
name: content.name ?? null,
mimeType: content.mimeType ?? null,
size: typeof content.size === "number" ? content.size : null,
};
// Read the bytes eagerly and base64-inline them as `data` so the Python
// adapter can cache the real file (the agent then sees the image itself).
// The spectrum-ts attachment object may not outlive this stream
// iteration, so a lazy/on-demand fetch isn't safe. Over-cap attachments
// (when size is known up front) are forwarded as metadata only and the
// adapter falls back to a text marker. A read failure must never break
// the inbound loop — we just drop `data` and forward metadata.
if (meta.size !== null && meta.size > MAX_INLINE_ATTACHMENT_BYTES) {
console.error(
`photon-sidecar: attachment ${meta.name ?? meta.id} (${meta.size} bytes) ` +
`exceeds inline cap ${MAX_INLINE_ATTACHMENT_BYTES}; forwarding metadata only`
);
return meta;
}
if (typeof content.read === "function") {
try {
const buf = await content.read();
// Guard the case where size was unknown but the bytes turn out to be
// over the cap.
if (buf && buf.length > MAX_INLINE_ATTACHMENT_BYTES) {
console.error(
`photon-sidecar: attachment ${meta.name ?? meta.id} (${buf.length} bytes) ` +
`exceeds inline cap after read; forwarding metadata only`
);
return meta;
}
meta.data = Buffer.from(buf).toString("base64");
meta.encoding = "base64";
} catch (e) {
console.error(
"photon-sidecar: failed to read attachment bytes " +
"(forwarding metadata only): " +
(e && e.stack ? e.stack : String(e))
);
}
}
return meta;
}
return { type: content.type || "unknown" };
}
function normalizeEvent(space, message) {
async function normalizeEvent(space, message) {
try {
const msgSpace = message.space || {};
const ts = message.timestamp;
@ -153,7 +196,7 @@ function normalizeEvent(space, message) {
phone: space.phone ?? msgSpace.phone ?? null,
},
sender: { id: message.sender ? message.sender.id : null },
content: normalizeContent(message.content),
content: await normalizeContent(message.content),
timestamp:
ts instanceof Date ? ts.toISOString() : ts ? String(ts) : null,
};
@ -172,7 +215,7 @@ function normalizeEvent(space, message) {
if (message && message.direction && message.direction !== "inbound") {
continue;
}
const event = normalizeEvent(space, message);
const event = await normalizeEvent(space, message);
if (!event) continue;
await deliver(JSON.stringify(event));
}

View file

@ -6,7 +6,9 @@ sidecar-event parsing without spawning the Node sidecar or binding ports.
"""
from __future__ import annotations
import base64
import json
from pathlib import Path
from typing import Any, Dict, List
import pytest
@ -80,28 +82,112 @@ async def test_dispatch_group_type(monkeypatch: pytest.MonkeyPatch) -> None:
assert captured[0].source.chat_type == "group"
# A real 1x1 transparent PNG (passes base.py's _looks_like_image magic check).
_PNG_1X1_B64 = (
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYPhf"
"DwAChwGA60e6kgAAAABJRU5ErkJggg=="
)
def _attachment_event(
content: Dict[str, Any], msg_id: str = "spc-msg-att"
) -> Dict[str, Any]:
return {
"messageId": msg_id,
"space": {"id": "+15551234567", "type": "dm", "phone": "+15551234567"},
"sender": {"id": "+15551234567"},
"content": {"type": "attachment", **content},
"timestamp": "2026-05-14T19:06:32.000Z",
}
@pytest.mark.asyncio
async def test_dispatch_attachment_surfaces_marker(monkeypatch: pytest.MonkeyPatch) -> None:
async def test_dispatch_attachment_without_bytes_surfaces_marker(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""No inline ``data`` (over cap / failed sidecar read) -> text marker, no media."""
adapter = _make_adapter(monkeypatch)
captured = _capture(adapter, monkeypatch)
event = {
"messageId": "spc-msg-att",
"space": {"id": "+15551234567", "type": "dm", "phone": "+15551234567"},
"sender": {"id": "+15551234567"},
"content": {
"type": "attachment",
"name": "IMG_4127.HEIC",
"mimeType": "image/heic",
"size": 12345,
},
"timestamp": "2026-05-14T19:06:32.000Z",
}
event = _attachment_event(
{"name": "IMG_4127.HEIC", "mimeType": "image/heic", "size": 12345}
)
await adapter._dispatch_inbound(event)
assert len(captured) == 1
assert "Photon attachment received" in captured[0].text
assert "IMG_4127.HEIC" in captured[0].text
assert captured[0].message_type == MessageType.PHOTO
ev = captured[0]
assert "Photon attachment received" in ev.text
assert "IMG_4127.HEIC" in ev.text
assert ev.message_type == MessageType.PHOTO
assert ev.media_urls == []
assert ev.media_types == []
@pytest.mark.asyncio
async def test_dispatch_attachment_downloads_image(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Inline base64 image bytes are decoded, cached, and exposed as media."""
adapter = _make_adapter(monkeypatch)
captured = _capture(adapter, monkeypatch)
raw = base64.b64decode(_PNG_1X1_B64)
event = _attachment_event(
{
"name": "photo.png",
"mimeType": "image/png",
"size": len(raw),
"data": _PNG_1X1_B64,
"encoding": "base64",
}
)
await adapter._dispatch_inbound(event)
assert len(captured) == 1
ev = captured[0]
assert ev.message_type == MessageType.PHOTO
assert ev.media_types == ["image/png"]
assert len(ev.media_urls) == 1
cached = Path(ev.media_urls[0])
try:
assert cached.is_file()
assert cached.read_bytes() == raw
assert ev.text == "(attachment)"
finally:
cached.unlink(missing_ok=True)
@pytest.mark.asyncio
async def test_dispatch_attachment_downloads_document(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Non-image attachments route through the document cache as DOCUMENT."""
adapter = _make_adapter(monkeypatch)
captured = _capture(adapter, monkeypatch)
raw = b"%PDF-1.4 hermes test document"
event = _attachment_event(
{
"name": "report.pdf",
"mimeType": "application/pdf",
"size": len(raw),
"data": base64.b64encode(raw).decode("ascii"),
"encoding": "base64",
}
)
await adapter._dispatch_inbound(event)
assert len(captured) == 1
ev = captured[0]
assert ev.message_type == MessageType.DOCUMENT
assert ev.media_types == ["application/pdf"]
assert len(ev.media_urls) == 1
cached = Path(ev.media_urls[0])
try:
assert cached.is_file()
assert cached.read_bytes() == raw
assert ev.text == "(attachment)"
finally:
cached.unlink(missing_ok=True)
@pytest.mark.asyncio