mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
feat(photon): download and inline inbound attachments
This commit is contained in:
parent
b3aef57f21
commit
314af28e86
5 changed files with 269 additions and 39 deletions
|
|
@ -114,14 +114,18 @@ All env vars are documented in `plugin.yaml`. The most important:
|
|||
| `PHOTON_HOME_CHANNEL` | (unset) | Default space id for cron delivery |
|
||||
| `PHOTON_ALLOWED_USERS` | (unset) | Comma-separated E.164 allowlist |
|
||||
| `PHOTON_REQUIRE_MENTION` | false | Gate group chats on a wake word |
|
||||
| `PHOTON_MAX_INLINE_ATTACHMENT_BYTES` | 20 MB | Max inbound attachment size the sidecar reads & inlines |
|
||||
|
||||
## Limitations (current Photon API)
|
||||
## Attachments & limitations
|
||||
|
||||
- **Inbound attachments are metadata only.** Inbound events carry the
|
||||
filename + MIME type; the plugin surfaces a text marker
|
||||
(`[Photon attachment received: …]`) so the agent knows something arrived.
|
||||
The SDK exposes attachment bytes via `content.read()`/`stream()`, so
|
||||
downloading them is a sidecar follow-up.
|
||||
- **Inbound attachments are downloaded.** The sidecar reads the bytes
|
||||
(`content.read()`) and base64-inlines them on the NDJSON event; the adapter
|
||||
caches them to the shared media cache and populates `media_urls` /
|
||||
`media_types`, so the agent sees the real image/file (vision included) —
|
||||
parity with the BlueBubbles iMessage channel. Attachments larger than
|
||||
`PHOTON_MAX_INLINE_ATTACHMENT_BYTES` (default 20 MB), or any byte read that
|
||||
fails, fall back to a text marker (`[Photon attachment received: …]`) so the
|
||||
agent still knows something arrived.
|
||||
- **Outbound attachments are supported.** Images, voice notes, video, and
|
||||
documents are sent via `space.send(attachment(...))` /
|
||||
`space.send(voice(...))` through the sidecar's `/send-attachment`
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ Outbound:
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -36,7 +37,7 @@ import sys
|
|||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# Type checkers see ``httpx`` as the always-imported module, so every use
|
||||
|
|
@ -422,8 +423,13 @@ class PhotonAdapter(BasePlatformAdapter):
|
|||
"space": {"id": "...", "type": "dm"|"group", "phone": "+E164"},
|
||||
"sender": {"id": "+E164"},
|
||||
"content": {"type": "text", "text": "..."}
|
||||
| {"type": "attachment", "name", "mimeType", "size"},
|
||||
| {"type": "attachment", "id", "name", "mimeType",
|
||||
"size", "data"?, "encoding"?},
|
||||
"timestamp": "2026-05-14T19:06:32.000Z"
|
||||
|
||||
Attachment content carries the bytes inline as base64 ``data`` (with
|
||||
``encoding == "base64"``) when the sidecar could read them within its
|
||||
size cap; otherwise only metadata is present and we surface a marker.
|
||||
}
|
||||
"""
|
||||
space = event.get("space") or {}
|
||||
|
|
@ -449,6 +455,11 @@ class PhotonAdapter(BasePlatformAdapter):
|
|||
except ValueError:
|
||||
timestamp = datetime.now(tz=timezone.utc)
|
||||
|
||||
# Media attachments (local cached paths) handed to the agent via the
|
||||
# gateway's image-routing path, exactly like the BlueBubbles channel.
|
||||
media_urls: List[str] = []
|
||||
media_types: List[str] = []
|
||||
|
||||
ctype = content.get("type")
|
||||
if ctype == "text":
|
||||
text = content.get("text") or ""
|
||||
|
|
@ -456,8 +467,20 @@ class PhotonAdapter(BasePlatformAdapter):
|
|||
elif ctype == "attachment":
|
||||
name = content.get("name") or "(unnamed)"
|
||||
mime = content.get("mimeType") or ""
|
||||
text = f"[Photon attachment received: {name} ({mime})]"
|
||||
mtype = _attachment_message_type(mime)
|
||||
cached = _cache_inbound_attachment(content, name, mime)
|
||||
if cached:
|
||||
media_urls.append(cached)
|
||||
media_types.append(mime or "application/octet-stream")
|
||||
# The real bytes are attached, so the agent sees the media
|
||||
# itself — a short marker is enough text, and it keeps group
|
||||
# mention-gating consistent with plain messages.
|
||||
text = "(attachment)"
|
||||
else:
|
||||
# No bytes (over the sidecar cap, a failed read, or a caching
|
||||
# failure) — fall back to a metadata marker so the agent still
|
||||
# knows something arrived.
|
||||
text = f"[Photon attachment received: {name} ({mime})]"
|
||||
else:
|
||||
text = f"[Photon content type not handled: {ctype}]"
|
||||
mtype = MessageType.TEXT
|
||||
|
|
@ -489,6 +512,8 @@ class PhotonAdapter(BasePlatformAdapter):
|
|||
message_id=event.get("messageId"),
|
||||
raw_message=event,
|
||||
timestamp=timestamp,
|
||||
media_urls=media_urls,
|
||||
media_types=media_types,
|
||||
)
|
||||
await self.handle_message(message_event)
|
||||
|
||||
|
|
@ -819,6 +844,77 @@ def _attachment_message_type(mime: str) -> MessageType:
|
|||
return MessageType.DOCUMENT
|
||||
|
||||
|
||||
# MIME → file-extension maps for caching inbound attachment bytes. These mirror
|
||||
# the BlueBubbles iMessage channel so both adapters name cached media the same.
|
||||
_IMAGE_EXT_BY_MIME = {
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/gif": ".gif",
|
||||
"image/webp": ".webp",
|
||||
"image/heic": ".jpg",
|
||||
"image/heif": ".jpg",
|
||||
"image/tiff": ".jpg",
|
||||
}
|
||||
_AUDIO_EXT_BY_MIME = {
|
||||
"audio/mp3": ".mp3",
|
||||
"audio/mpeg": ".mp3",
|
||||
"audio/ogg": ".ogg",
|
||||
"audio/wav": ".wav",
|
||||
"audio/x-caf": ".mp3",
|
||||
"audio/mp4": ".m4a",
|
||||
"audio/aac": ".m4a",
|
||||
}
|
||||
|
||||
|
||||
def _cache_inbound_attachment(
|
||||
content: Dict[str, Any], name: str, mime: str
|
||||
) -> Optional[str]:
|
||||
"""Decode a base64-inlined inbound attachment and cache it locally.
|
||||
|
||||
The sidecar inlines the attachment bytes as ``content["data"]`` (base64).
|
||||
We decode them and route to the shared media cache by MIME type, returning
|
||||
the cached absolute path so the caller can populate ``media_urls`` (which
|
||||
the gateway then hands to the model). Returns ``None`` when there are no
|
||||
bytes (over the sidecar's inline cap or a failed read) or when caching
|
||||
fails, so the caller can fall back to a text marker.
|
||||
"""
|
||||
data_b64 = content.get("data")
|
||||
if not data_b64:
|
||||
return None
|
||||
try:
|
||||
raw = base64.b64decode(data_b64)
|
||||
except (ValueError, TypeError) as exc:
|
||||
logger.warning("[photon] failed to decode inbound attachment bytes: %s", exc)
|
||||
return None
|
||||
|
||||
from gateway.platforms.base import (
|
||||
cache_audio_from_bytes,
|
||||
cache_document_from_bytes,
|
||||
cache_image_from_bytes,
|
||||
)
|
||||
|
||||
mime = (mime or "").lower()
|
||||
# Prefer the real extension from the filename; fall back to the MIME map.
|
||||
suffix = Path(name).suffix if name else ""
|
||||
try:
|
||||
if mime.startswith("image/"):
|
||||
ext = suffix or _IMAGE_EXT_BY_MIME.get(mime, ".jpg")
|
||||
try:
|
||||
return cache_image_from_bytes(raw, ext)
|
||||
except ValueError:
|
||||
# Bytes don't look like a supported image (e.g. HEIC magic) —
|
||||
# still deliver them as a document rather than dropping them.
|
||||
return cache_document_from_bytes(raw, name)
|
||||
if mime.startswith("audio/"):
|
||||
ext = suffix or _AUDIO_EXT_BY_MIME.get(mime, ".mp3")
|
||||
return cache_audio_from_bytes(raw, ext)
|
||||
# Video, application/*, and everything else → document cache.
|
||||
return cache_document_from_bytes(raw, name)
|
||||
except Exception as exc:
|
||||
logger.warning("[photon] failed to cache inbound attachment %s: %s", name, exc)
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Standalone (out-of-process) send for cron deliveries when the gateway
|
||||
# is not co-resident. Reuses a live sidecar already listening on the
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ import subprocess
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
|
||||
from . import auth as photon_auth
|
||||
|
||||
_SIDECAR_DIR = Path(__file__).parent / "sidecar"
|
||||
|
|
@ -175,19 +177,18 @@ def _cmd_setup(args: argparse.Namespace) -> int:
|
|||
|
||||
# 4. Register the operator's phone number as a Spectrum user (idempotent).
|
||||
phone = args.phone or _prompt(
|
||||
"[4/5] Your iMessage phone number (E.164, e.g. +15551234567): "
|
||||
color(
|
||||
"[4/5] Your iMessage phone number (E.164, e.g. +15551234567): ",
|
||||
Colors.CYAN,
|
||||
)
|
||||
)
|
||||
if not phone:
|
||||
print(" Skipped user registration (no phone given). Re-run with --phone later.")
|
||||
else:
|
||||
# Name/email are optional and never prompted for — pass --first-name /
|
||||
# --email if you want them sent to the dashboard.
|
||||
first_name = args.first_name
|
||||
email = args.email
|
||||
# The dashboard may require a name/email; prompt interactively when
|
||||
# we have a TTY and they weren't supplied, but allow skipping.
|
||||
if first_name is None:
|
||||
first_name = _prompt(" First name (optional, Enter to skip): ") or None
|
||||
if email is None:
|
||||
email = _prompt(" Email (optional, Enter to skip): ") or None
|
||||
try:
|
||||
_user, created = photon_auth.register_user_if_absent(
|
||||
token, dashboard_id,
|
||||
|
|
|
|||
|
|
@ -48,6 +48,14 @@ const port = parseInt(process.env.PHOTON_SIDECAR_PORT || "8789", 10);
|
|||
const bind = process.env.PHOTON_SIDECAR_BIND || "127.0.0.1";
|
||||
const sharedToken = process.env.PHOTON_SIDECAR_TOKEN;
|
||||
|
||||
// Inbound attachments are read into memory and base64-inlined on the NDJSON
|
||||
// event so the Python adapter can cache the real bytes (and the agent can see
|
||||
// the image). Cap the size we inline — above it we forward metadata only and
|
||||
// the adapter surfaces a text marker, so one large video can't balloon a
|
||||
// single NDJSON line. Override via PHOTON_MAX_INLINE_ATTACHMENT_BYTES.
|
||||
const MAX_INLINE_ATTACHMENT_BYTES =
|
||||
Number(process.env.PHOTON_MAX_INLINE_ATTACHMENT_BYTES) || 20 * 1024 * 1024;
|
||||
|
||||
if (!projectId || !projectSecret || !sharedToken) {
|
||||
console.error(
|
||||
"photon-sidecar: PHOTON_PROJECT_ID, PHOTON_PROJECT_SECRET and " +
|
||||
|
|
@ -118,7 +126,7 @@ async function deliver(line) {
|
|||
}
|
||||
}
|
||||
|
||||
function normalizeContent(content) {
|
||||
async function normalizeContent(content) {
|
||||
if (!content || typeof content !== "object") {
|
||||
return { type: "unknown" };
|
||||
}
|
||||
|
|
@ -126,20 +134,55 @@ function normalizeContent(content) {
|
|||
return { type: "text", text: content.text || "" };
|
||||
}
|
||||
if (content.type === "attachment") {
|
||||
// Bytes are reachable via content.read()/stream(); we surface metadata
|
||||
// here and leave byte download to a follow-up (keeps the event small).
|
||||
return {
|
||||
const meta = {
|
||||
type: "attachment",
|
||||
id: content.id ?? null,
|
||||
name: content.name ?? null,
|
||||
mimeType: content.mimeType ?? null,
|
||||
size: typeof content.size === "number" ? content.size : null,
|
||||
};
|
||||
// Read the bytes eagerly and base64-inline them as `data` so the Python
|
||||
// adapter can cache the real file (the agent then sees the image itself).
|
||||
// The spectrum-ts attachment object may not outlive this stream
|
||||
// iteration, so a lazy/on-demand fetch isn't safe. Over-cap attachments
|
||||
// (when size is known up front) are forwarded as metadata only and the
|
||||
// adapter falls back to a text marker. A read failure must never break
|
||||
// the inbound loop — we just drop `data` and forward metadata.
|
||||
if (meta.size !== null && meta.size > MAX_INLINE_ATTACHMENT_BYTES) {
|
||||
console.error(
|
||||
`photon-sidecar: attachment ${meta.name ?? meta.id} (${meta.size} bytes) ` +
|
||||
`exceeds inline cap ${MAX_INLINE_ATTACHMENT_BYTES}; forwarding metadata only`
|
||||
);
|
||||
return meta;
|
||||
}
|
||||
if (typeof content.read === "function") {
|
||||
try {
|
||||
const buf = await content.read();
|
||||
// Guard the case where size was unknown but the bytes turn out to be
|
||||
// over the cap.
|
||||
if (buf && buf.length > MAX_INLINE_ATTACHMENT_BYTES) {
|
||||
console.error(
|
||||
`photon-sidecar: attachment ${meta.name ?? meta.id} (${buf.length} bytes) ` +
|
||||
`exceeds inline cap after read; forwarding metadata only`
|
||||
);
|
||||
return meta;
|
||||
}
|
||||
meta.data = Buffer.from(buf).toString("base64");
|
||||
meta.encoding = "base64";
|
||||
} catch (e) {
|
||||
console.error(
|
||||
"photon-sidecar: failed to read attachment bytes " +
|
||||
"(forwarding metadata only): " +
|
||||
(e && e.stack ? e.stack : String(e))
|
||||
);
|
||||
}
|
||||
}
|
||||
return meta;
|
||||
}
|
||||
return { type: content.type || "unknown" };
|
||||
}
|
||||
|
||||
function normalizeEvent(space, message) {
|
||||
async function normalizeEvent(space, message) {
|
||||
try {
|
||||
const msgSpace = message.space || {};
|
||||
const ts = message.timestamp;
|
||||
|
|
@ -153,7 +196,7 @@ function normalizeEvent(space, message) {
|
|||
phone: space.phone ?? msgSpace.phone ?? null,
|
||||
},
|
||||
sender: { id: message.sender ? message.sender.id : null },
|
||||
content: normalizeContent(message.content),
|
||||
content: await normalizeContent(message.content),
|
||||
timestamp:
|
||||
ts instanceof Date ? ts.toISOString() : ts ? String(ts) : null,
|
||||
};
|
||||
|
|
@ -172,7 +215,7 @@ function normalizeEvent(space, message) {
|
|||
if (message && message.direction && message.direction !== "inbound") {
|
||||
continue;
|
||||
}
|
||||
const event = normalizeEvent(space, message);
|
||||
const event = await normalizeEvent(space, message);
|
||||
if (!event) continue;
|
||||
await deliver(JSON.stringify(event));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,9 @@ sidecar-event parsing without spawning the Node sidecar or binding ports.
|
|||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import pytest
|
||||
|
|
@ -80,28 +82,112 @@ async def test_dispatch_group_type(monkeypatch: pytest.MonkeyPatch) -> None:
|
|||
assert captured[0].source.chat_type == "group"
|
||||
|
||||
|
||||
# A real 1x1 transparent PNG (passes base.py's _looks_like_image magic check).
|
||||
_PNG_1X1_B64 = (
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYPhf"
|
||||
"DwAChwGA60e6kgAAAABJRU5ErkJggg=="
|
||||
)
|
||||
|
||||
|
||||
def _attachment_event(
|
||||
content: Dict[str, Any], msg_id: str = "spc-msg-att"
|
||||
) -> Dict[str, Any]:
|
||||
return {
|
||||
"messageId": msg_id,
|
||||
"space": {"id": "+15551234567", "type": "dm", "phone": "+15551234567"},
|
||||
"sender": {"id": "+15551234567"},
|
||||
"content": {"type": "attachment", **content},
|
||||
"timestamp": "2026-05-14T19:06:32.000Z",
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_attachment_surfaces_marker(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
async def test_dispatch_attachment_without_bytes_surfaces_marker(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""No inline ``data`` (over cap / failed sidecar read) -> text marker, no media."""
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
captured = _capture(adapter, monkeypatch)
|
||||
|
||||
event = {
|
||||
"messageId": "spc-msg-att",
|
||||
"space": {"id": "+15551234567", "type": "dm", "phone": "+15551234567"},
|
||||
"sender": {"id": "+15551234567"},
|
||||
"content": {
|
||||
"type": "attachment",
|
||||
"name": "IMG_4127.HEIC",
|
||||
"mimeType": "image/heic",
|
||||
"size": 12345,
|
||||
},
|
||||
"timestamp": "2026-05-14T19:06:32.000Z",
|
||||
}
|
||||
event = _attachment_event(
|
||||
{"name": "IMG_4127.HEIC", "mimeType": "image/heic", "size": 12345}
|
||||
)
|
||||
await adapter._dispatch_inbound(event)
|
||||
assert len(captured) == 1
|
||||
assert "Photon attachment received" in captured[0].text
|
||||
assert "IMG_4127.HEIC" in captured[0].text
|
||||
assert captured[0].message_type == MessageType.PHOTO
|
||||
ev = captured[0]
|
||||
assert "Photon attachment received" in ev.text
|
||||
assert "IMG_4127.HEIC" in ev.text
|
||||
assert ev.message_type == MessageType.PHOTO
|
||||
assert ev.media_urls == []
|
||||
assert ev.media_types == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_attachment_downloads_image(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Inline base64 image bytes are decoded, cached, and exposed as media."""
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
captured = _capture(adapter, monkeypatch)
|
||||
|
||||
raw = base64.b64decode(_PNG_1X1_B64)
|
||||
event = _attachment_event(
|
||||
{
|
||||
"name": "photo.png",
|
||||
"mimeType": "image/png",
|
||||
"size": len(raw),
|
||||
"data": _PNG_1X1_B64,
|
||||
"encoding": "base64",
|
||||
}
|
||||
)
|
||||
await adapter._dispatch_inbound(event)
|
||||
|
||||
assert len(captured) == 1
|
||||
ev = captured[0]
|
||||
assert ev.message_type == MessageType.PHOTO
|
||||
assert ev.media_types == ["image/png"]
|
||||
assert len(ev.media_urls) == 1
|
||||
cached = Path(ev.media_urls[0])
|
||||
try:
|
||||
assert cached.is_file()
|
||||
assert cached.read_bytes() == raw
|
||||
assert ev.text == "(attachment)"
|
||||
finally:
|
||||
cached.unlink(missing_ok=True)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_attachment_downloads_document(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Non-image attachments route through the document cache as DOCUMENT."""
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
captured = _capture(adapter, monkeypatch)
|
||||
|
||||
raw = b"%PDF-1.4 hermes test document"
|
||||
event = _attachment_event(
|
||||
{
|
||||
"name": "report.pdf",
|
||||
"mimeType": "application/pdf",
|
||||
"size": len(raw),
|
||||
"data": base64.b64encode(raw).decode("ascii"),
|
||||
"encoding": "base64",
|
||||
}
|
||||
)
|
||||
await adapter._dispatch_inbound(event)
|
||||
|
||||
assert len(captured) == 1
|
||||
ev = captured[0]
|
||||
assert ev.message_type == MessageType.DOCUMENT
|
||||
assert ev.media_types == ["application/pdf"]
|
||||
assert len(ev.media_urls) == 1
|
||||
cached = Path(ev.media_urls[0])
|
||||
try:
|
||||
assert cached.is_file()
|
||||
assert cached.read_bytes() == raw
|
||||
assert ev.text == "(attachment)"
|
||||
finally:
|
||||
cached.unlink(missing_ok=True)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue