feat(photon): wire outbound media via spectrum-ts attachment() (#42397)

Photon now exposes attachment send (Ray Sun, photon-nousresearch), so
the Photon plugin gains outbound media to match the BlueBubbles iMessage
channel.

- sidecar: new /send-attachment endpoint wrapping space.send(attachment())
  / space.send(voice()); caption sent as a trailing text bubble.
- adapter: override send_image/send_image_file/send_voice/send_video/
  send_document/send_animation. URL helpers cache to a local path first
  (cache_image_from_url), file helpers pass through. Defense-in-depth
  path re-validation before the path reaches the Node sidecar.
- _standalone_send (cron): send text first, then each media_file as a
  /send-attachment call (is_voice -> voice builder).
- docs/README: flip the 'outbound attachments not wired' note.
This commit is contained in:
Teknium 2026-06-08 15:29:16 -07:00 committed by GitHub
parent 5e9d7a7661
commit 4615e08d3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 507 additions and 25 deletions

View file

@ -106,14 +106,16 @@ All env vars are documented in `plugin.yaml`. The most important are:
## Limitations (current Photon API)
- **Attachments are metadata only.** Inbound webhooks include the
- **Inbound attachments are metadata only.** Inbound webhooks include the
filename + MIME type but no download URL. The plugin surfaces a
text marker (`[Photon attachment received: …]`) so the agent knows
something arrived, but cannot read the bytes. Photon's docs note
an attachment retrieval endpoint is on the roadmap.
- **Outbound attachments are not supported yet.** Adding them is
straightforward once the sidecar wires up `attachment(...)` /
`space.send(attachment(...))` from `spectrum-ts`.
- **Outbound attachments are supported.** Images, voice notes, video,
and documents are sent via `space.send(attachment(...))` /
`space.send(voice(...))` through the sidecar's `/send-attachment`
endpoint. A caption is delivered as a separate text bubble after the
media.
- **Reactions, message effects, polls** — not exposed yet; the
`spectrum-ts` SDK supports them, and the sidecar is the natural
place to add them when the agent has reason to use them.

View file

@ -14,8 +14,10 @@ Outbound:
Photon does not currently expose a public HTTP send-message
endpoint, so the adapter spawns a small Node sidecar (see
``sidecar/index.mjs``) that runs the ``spectrum-ts`` SDK. Each
``send`` / ``send_typing`` call from Hermes is a loopback POST to
the sidecar with a shared bearer token.
``send`` / ``send_typing`` / attachment call from Hermes is a
loopback POST to the sidecar with a shared bearer token. Outbound
media (images, voice notes, video, documents) goes through
spectrum-ts' ``attachment()`` / ``voice()`` content builders.
When Photon ships an HTTP send endpoint we can collapse the sidecar
into ``_send_via_http`` and drop the Node dependency entirely.
@ -670,6 +672,99 @@ class PhotonAdapter(BasePlatformAdapter):
) -> SendResult:
return await self._sidecar_send(chat_id, content, reply_to=reply_to)
# -- Outbound media (parity with the BlueBubbles iMessage channel) -----
#
# Photon ships outbound attachments via spectrum-ts' `attachment()` /
# `voice()` content builders. The sidecar's `/send-attachment` endpoint
# wraps `space.send(attachment(path, {...}))`. These overrides mirror
# BlueBubbles: URL-based helpers cache to a local path first, file-based
# helpers pass the path straight through.
async def send_image(
self,
chat_id: str,
image_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
try:
from gateway.platforms.base import cache_image_from_url
local_path = await cache_image_from_url(image_url)
except Exception:
# Couldn't fetch the URL — fall back to sending it as text.
return await super().send_image(chat_id, image_url, caption, reply_to)
return await self._sidecar_send_attachment(
chat_id, local_path, caption=caption, reply_to=reply_to,
)
async def send_image_file(
self,
chat_id: str,
image_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
return await self._sidecar_send_attachment(
chat_id, image_path, caption=caption, reply_to=reply_to,
)
async def send_voice(
self,
chat_id: str,
audio_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
return await self._sidecar_send_attachment(
chat_id, audio_path, caption=caption, reply_to=reply_to, kind="voice",
)
async def send_video(
self,
chat_id: str,
video_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
return await self._sidecar_send_attachment(
chat_id, video_path, caption=caption, reply_to=reply_to,
)
async def send_document(
self,
chat_id: str,
file_path: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
return await self._sidecar_send_attachment(
chat_id, file_path, name=file_name, caption=caption, reply_to=reply_to,
)
async def send_animation(
self,
chat_id: str,
animation_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
# iMessage renders GIFs inline as ordinary image attachments.
return await self.send_image(
chat_id, animation_url, caption, reply_to, metadata,
)
async def send_typing(self, chat_id: str, metadata=None) -> None:
try:
await self._sidecar_call("/typing", {"spaceId": chat_id})
@ -704,6 +799,57 @@ class PhotonAdapter(BasePlatformAdapter):
return SendResult(success=False, error=str(e))
return SendResult(success=True, message_id=data.get("messageId"))
async def _sidecar_send_attachment(
self,
space_id: str,
path: str,
*,
name: Optional[str] = None,
mime_type: Optional[str] = None,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
kind: str = "attachment",
) -> SendResult:
"""POST a local file to the sidecar's ``/send-attachment`` endpoint.
``kind`` is ``"voice"`` for audio sent as a voice note (downgrades
to a plain audio attachment on platforms without voice notes),
otherwise ``"attachment"``. spectrum-ts infers ``name`` and
``mimeType`` from the file extension; we only pass overrides when
Hermes supplied them.
"""
# Defense-in-depth: re-validate the path before handing it to the
# Node sidecar. The gateway already filters MEDIA paths, but
# send_*_file / cron callers may pass arbitrary strings.
safe_path = self.validate_media_delivery_path(str(path))
if not safe_path:
return SendResult(
success=False, error=f"unsafe or missing attachment path: {path}"
)
if not mime_type:
import mimetypes
guessed, _ = mimetypes.guess_type(safe_path)
mime_type = guessed or None
body: Dict[str, Any] = {
"spaceId": space_id,
"path": safe_path,
"kind": "voice" if kind == "voice" else "attachment",
}
if name:
body["name"] = name
if mime_type:
body["mimeType"] = mime_type
if caption:
body["caption"] = caption
if reply_to:
body["replyTo"] = reply_to
try:
data = await self._sidecar_call("/send-attachment", body)
except Exception as e:
return SendResult(success=False, error=str(e))
return SendResult(success=True, message_id=data.get("messageId"))
async def _sidecar_call(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]:
if self._http_client is None:
raise RuntimeError("Photon adapter not connected")
@ -753,8 +899,8 @@ async def _standalone_send(
message: str,
*,
thread_id: Optional[str] = None, # noqa: ARG001 — Spectrum has no threads yet
media_files: Optional[list] = None, # noqa: ARG001 — attachment send not supported yet
force_document: bool = False, # noqa: ARG001
media_files: Optional[list] = None,
force_document: bool = False, # noqa: ARG001 — iMessage auto-detects file kind
) -> Dict[str, Any]:
if not HTTPX_AVAILABLE:
return {"error": "httpx not installed"}
@ -771,20 +917,54 @@ async def _standalone_send(
"cannot spawn the sidecar themselves."
)
}
body: Dict[str, Any] = {"spaceId": chat_id, "text": message[:_MAX_MESSAGE_LENGTH]}
base = f"http://{_DEFAULT_SIDECAR_BIND}:{port}"
headers = {"X-Hermes-Sidecar-Token": token}
last_message_id: Optional[str] = None
try:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
f"http://{_DEFAULT_SIDECAR_BIND}:{port}/send",
json=body,
headers={"X-Hermes-Sidecar-Token": token},
)
if resp.status_code != 200:
return {"error": f"sidecar returned {resp.status_code}: {resp.text[:200]}"}
data = resp.json() or {}
if not data.get("ok"):
return {"error": data.get("error") or "sidecar reported failure"}
return {"success": True, "message_id": data.get("messageId")}
# 1. Text body first (if any), so it leads the conversation.
if message:
resp = await client.post(
f"{base}/send",
json={"spaceId": chat_id, "text": message[:_MAX_MESSAGE_LENGTH]},
headers=headers,
)
if resp.status_code != 200:
return {"error": f"sidecar returned {resp.status_code}: {resp.text[:200]}"}
data = resp.json() or {}
if not data.get("ok"):
return {"error": data.get("error") or "sidecar reported failure"}
last_message_id = data.get("messageId")
# 2. Each attachment as a separate /send-attachment call.
# media_files is List[Tuple[path, is_voice]] (see
# BasePlatformAdapter.filter_media_delivery_paths).
import mimetypes
for media_path, is_voice in media_files or []:
safe_path = BasePlatformAdapter.validate_media_delivery_path(str(media_path))
if not safe_path:
logger.warning("[photon] standalone send skipping unsafe path")
continue
guessed, _ = mimetypes.guess_type(safe_path)
att_body: Dict[str, Any] = {
"spaceId": chat_id,
"path": safe_path,
"kind": "voice" if is_voice else "attachment",
}
if guessed:
att_body["mimeType"] = guessed
resp = await client.post(
f"{base}/send-attachment", json=att_body, headers=headers,
)
if resp.status_code != 200:
return {"error": f"sidecar returned {resp.status_code}: {resp.text[:200]}"}
data = resp.json() or {}
if not data.get("ok"):
return {"error": data.get("error") or "sidecar reported failure"}
last_message_id = data.get("messageId") or last_message_id
return {"success": True, "message_id": last_message_id}
except Exception as e:
return {"error": f"Photon standalone send failed: {e}"}

View file

@ -12,6 +12,10 @@
// - POST /healthz -> {"ok": true}
// - POST /send -> {"ok": true, "messageId": "..."}
// body: {"spaceId": "...", "text": "...", "replyTo": "..." | null}
// - POST /send-attachment -> {"ok": true, "messageId": "..."}
// body: {"spaceId": "...", "path": "...", "name": "..." | null,
// "mimeType": "..." | null, "caption": "..." | null,
// "kind": "attachment" | "voice", "replyTo": "..." | null}
// - POST /typing -> {"ok": true}
// body: {"spaceId": "..."}
// - POST /shutdown -> {"ok": true}; then process exits
@ -48,9 +52,9 @@ if (!projectId || !projectSecret || !sharedToken) {
// Lazy-load spectrum-ts so a missing install fails with a clear message
// instead of a cryptic module-resolution error during import.
let Spectrum, imessage;
let Spectrum, imessage, attachment, voice;
try {
({ Spectrum } = await import("spectrum-ts"));
({ Spectrum, attachment, voice } = await import("spectrum-ts"));
({ imessage } = await import("spectrum-ts/providers/imessage"));
} catch (e) {
console.error(
@ -179,6 +183,44 @@ const server = http.createServer(async (req, res) => {
: await space.send(text);
return ok(res, { messageId: result?.id || result?.messageId || null });
}
if (req.url === "/send-attachment") {
const { spaceId, path, name, mimeType, caption, kind, replyTo } =
body || {};
if (!spaceId || typeof path !== "string" || !path) {
return badRequest(res, "spaceId and path are required");
}
const space = await resolveSpace(spaceId);
// spectrum-ts infers name + MIME from the file extension; pass
// overrides only when Hermes supplied them so a known-good
// inference isn't clobbered with an empty string.
const opts = {};
if (name) opts.name = name;
if (mimeType) opts.mimeType = mimeType;
const builder =
kind === "voice"
? voice(path, Object.keys(opts).length ? opts : undefined)
: attachment(path, Object.keys(opts).length ? opts : undefined);
const sendOpts = replyTo ? { replyTo } : undefined;
const result = sendOpts
? await space.send(builder, sendOpts)
: await space.send(builder);
// iMessage delivers the caption as a separate bubble; send it
// after the media so the attachment renders first.
if (caption && typeof caption === "string") {
try {
await space.send(caption);
} catch (e) {
console.error(
"photon-sidecar: attachment sent but caption failed: " +
(e && e.stack ? e.stack : String(e))
);
}
}
return ok(res, { messageId: result?.id || result?.messageId || null });
}
if (req.url === "/typing") {
const { spaceId } = body || {};
if (!spaceId) return badRequest(res, "spaceId is required");

View file

@ -0,0 +1,255 @@
"""Outbound-media tests for PhotonAdapter.
Photon ships outbound attachments via spectrum-ts' ``attachment()`` /
``voice()`` content builders, reached through the Node sidecar's
``/send-attachment`` endpoint. These tests stub ``_sidecar_call`` so we
can assert the endpoint + body shape each ``send_*`` override produces
without spawning Node or binding ports.
"""
from __future__ import annotations
import os
from typing import Any, Dict, List, Tuple
import pytest
from gateway.config import PlatformConfig
from plugins.platforms.photon import adapter as photon_adapter
from plugins.platforms.photon.adapter import PhotonAdapter
def _make_adapter(monkeypatch: pytest.MonkeyPatch) -> PhotonAdapter:
monkeypatch.setenv("PHOTON_PROJECT_ID", "test-project-id")
monkeypatch.setenv("PHOTON_PROJECT_SECRET", "test-project-secret")
monkeypatch.delenv("PHOTON_WEBHOOK_SECRET", raising=False)
cfg = PlatformConfig(enabled=True, token="", extra={})
return PhotonAdapter(cfg)
def _capture_sidecar(adapter: PhotonAdapter) -> List[Tuple[str, Dict[str, Any]]]:
"""Replace ``_sidecar_call`` with a recorder that returns a fixed id."""
calls: List[Tuple[str, Dict[str, Any]]] = []
async def _fake_call(path: str, body: Dict[str, Any]) -> Dict[str, Any]:
calls.append((path, body))
return {"ok": True, "messageId": "msg-123"}
adapter._sidecar_call = _fake_call # type: ignore[assignment]
return calls
@pytest.fixture()
def real_file(tmp_path) -> str:
p = tmp_path / "photo.jpg"
p.write_bytes(b"\xff\xd8\xff\xe0fake-jpeg")
return str(p)
def _patch_safe_path(monkeypatch: pytest.MonkeyPatch) -> None:
"""Make path validation a passthrough so tmp files outside the cache pass."""
monkeypatch.setattr(
PhotonAdapter,
"validate_media_delivery_path",
staticmethod(lambda p: p if os.path.exists(p) else None),
)
@pytest.mark.asyncio
async def test_send_image_file_hits_attachment_endpoint(
monkeypatch: pytest.MonkeyPatch, real_file: str
) -> None:
_patch_safe_path(monkeypatch)
adapter = _make_adapter(monkeypatch)
calls = _capture_sidecar(adapter)
result = await adapter.send_image_file(
"any;-;+15551234567", real_file, caption="look"
)
assert result.success is True
assert result.message_id == "msg-123"
assert len(calls) == 1
path, body = calls[0]
assert path == "/send-attachment"
assert body["spaceId"] == "any;-;+15551234567"
assert body["path"] == real_file
assert body["kind"] == "attachment"
assert body["caption"] == "look"
assert body["mimeType"] == "image/jpeg" # inferred from .jpg
@pytest.mark.asyncio
async def test_send_voice_marks_kind_voice(
monkeypatch: pytest.MonkeyPatch, tmp_path
) -> None:
_patch_safe_path(monkeypatch)
audio = tmp_path / "note.m4a"
audio.write_bytes(b"fake-audio")
adapter = _make_adapter(monkeypatch)
calls = _capture_sidecar(adapter)
result = await adapter.send_voice("any;-;+1", str(audio))
assert result.success is True
path, body = calls[0]
assert path == "/send-attachment"
assert body["kind"] == "voice"
@pytest.mark.asyncio
async def test_send_document_passes_filename(
monkeypatch: pytest.MonkeyPatch, tmp_path
) -> None:
_patch_safe_path(monkeypatch)
doc = tmp_path / "report.pdf"
doc.write_bytes(b"%PDF-1.4 fake")
adapter = _make_adapter(monkeypatch)
calls = _capture_sidecar(adapter)
await adapter.send_document("any;-;+1", str(doc), file_name="Q3.pdf")
_, body = calls[0]
assert body["kind"] == "attachment"
assert body["name"] == "Q3.pdf"
assert body["mimeType"] == "application/pdf"
@pytest.mark.asyncio
async def test_send_video_passes_through(
monkeypatch: pytest.MonkeyPatch, tmp_path
) -> None:
_patch_safe_path(monkeypatch)
vid = tmp_path / "clip.mp4"
vid.write_bytes(b"fake-mp4")
adapter = _make_adapter(monkeypatch)
calls = _capture_sidecar(adapter)
await adapter.send_video("any;+;groupguid", str(vid), caption="watch")
_, body = calls[0]
assert body["kind"] == "attachment"
assert body["caption"] == "watch"
@pytest.mark.asyncio
async def test_send_image_url_caches_then_sends_attachment(
monkeypatch: pytest.MonkeyPatch, real_file: str
) -> None:
_patch_safe_path(monkeypatch)
adapter = _make_adapter(monkeypatch)
calls = _capture_sidecar(adapter)
async def _fake_cache(url: str, *a, **k) -> str:
assert url == "https://example.com/cat.jpg"
return real_file
import gateway.platforms.base as base_mod
monkeypatch.setattr(base_mod, "cache_image_from_url", _fake_cache)
result = await adapter.send_image(
"any;-;+1", "https://example.com/cat.jpg", caption="cat"
)
assert result.success is True
path, body = calls[0]
assert path == "/send-attachment"
assert body["path"] == real_file
assert body["caption"] == "cat"
@pytest.mark.asyncio
async def test_send_image_url_fetch_failure_falls_back_to_text(
monkeypatch: pytest.MonkeyPatch
) -> None:
adapter = _make_adapter(monkeypatch)
calls = _capture_sidecar(adapter)
async def _boom(url: str, *a, **k) -> str:
raise RuntimeError("network down")
import gateway.platforms.base as base_mod
monkeypatch.setattr(base_mod, "cache_image_from_url", _boom)
result = await adapter.send_image(
"any;-;+1", "https://example.com/cat.jpg", caption="cat"
)
# Fallback path: base send_image() routes to send() → /send (text).
assert result.success is True
assert calls[0][0] == "/send"
assert "https://example.com/cat.jpg" in calls[0][1]["text"]
@pytest.mark.asyncio
async def test_send_attachment_rejects_unsafe_path(
monkeypatch: pytest.MonkeyPatch
) -> None:
# Default validation (no passthrough patch) should reject a nonexistent /
# traversal path, returning a failed SendResult without calling the sidecar.
monkeypatch.setattr(
PhotonAdapter,
"validate_media_delivery_path",
staticmethod(lambda p: None),
)
adapter = _make_adapter(monkeypatch)
calls = _capture_sidecar(adapter)
result = await adapter.send_image_file("any;-;+1", "/etc/passwd")
assert result.success is False
assert "unsafe" in (result.error or "")
assert calls == [] # never reached the sidecar
@pytest.mark.asyncio
async def test_standalone_send_text_then_attachments(
monkeypatch: pytest.MonkeyPatch, tmp_path
) -> None:
_patch_safe_path(monkeypatch)
img = tmp_path / "a.png"
img.write_bytes(b"\x89PNG fake")
monkeypatch.setenv("PHOTON_SIDECAR_TOKEN", "tok")
posted: List[Tuple[str, Dict[str, Any]]] = []
class _Resp:
status_code = 200
@staticmethod
def json() -> Dict[str, Any]:
return {"ok": True, "messageId": "m-9"}
class _FakeClient:
def __init__(self, *a, **k):
pass
async def __aenter__(self):
return self
async def __aexit__(self, *a):
return False
async def post(self, url: str, json: Dict[str, Any], headers=None):
posted.append((url, json))
return _Resp()
monkeypatch.setattr(photon_adapter.httpx, "AsyncClient", _FakeClient)
cfg = PlatformConfig(enabled=True, token="", extra={})
result = await photon_adapter._standalone_send(
cfg,
"any;-;+1",
"hello",
media_files=[(str(img), False)],
)
assert result.get("success") is True
# First call is the text /send, second is /send-attachment.
assert posted[0][0].endswith("/send")
assert posted[0][1]["text"] == "hello"
assert posted[1][0].endswith("/send-attachment")
assert posted[1][1]["path"] == str(img)
assert posted[1][1]["kind"] == "attachment"
assert posted[1][1]["mimeType"] == "image/png"

View file

@ -206,11 +206,14 @@ hermes photon webhook delete <webhook-id> # remove one
## Limits today
- **Attachments are metadata-only.** Inbound webhooks carry the
- **Inbound attachments are metadata-only.** Inbound webhooks carry the
filename + MIME type but no download URL — Photon documents an
attachment retrieval endpoint as roadmap.
- **Outbound attachments not wired yet.** Easy to add in the sidecar
once the agent has reason to send them.
- **Outbound attachments are supported.** Hermes sends images, voice
notes, video, and documents through spectrum-ts' `attachment()` /
`voice()` content builders via the sidecar's `/send-attachment`
endpoint. Captions arrive as a separate iMessage bubble after the
media.
- **Photon's free quotas:** 5,000 messages per server per day,
50 new-conversation initiations per shared line per day. Increases
available — email `help@photon.codes`.