feat(photon): wire outbound media via spectrum-ts attachment() (#42397)

Photon now exposes attachment send (Ray Sun, photon-nousresearch), so
the Photon plugin gains outbound media to match the BlueBubbles iMessage
channel.

- sidecar: new /send-attachment endpoint wrapping space.send(attachment())
  / space.send(voice()); caption sent as a trailing text bubble.
- adapter: override send_image/send_image_file/send_voice/send_video/
  send_document/send_animation. URL helpers cache to a local path first
  (cache_image_from_url), file helpers pass through. Defense-in-depth
  path re-validation before the path reaches the Node sidecar.
- _standalone_send (cron): send text first, then each media_file as a
  /send-attachment call (is_voice -> voice builder).
- docs/README: flip the 'outbound attachments not wired' note.
This commit is contained in:
Teknium 2026-06-08 15:29:16 -07:00 committed by GitHub
parent 5e9d7a7661
commit 4615e08d3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 507 additions and 25 deletions

View file

@ -106,14 +106,16 @@ All env vars are documented in `plugin.yaml`. The most important are:
## Limitations (current Photon API)
- **Attachments are metadata only.** Inbound webhooks include the
- **Inbound attachments are metadata only.** Inbound webhooks include the
filename + MIME type but no download URL. The plugin surfaces a
text marker (`[Photon attachment received: …]`) so the agent knows
something arrived, but cannot read the bytes. Photon's docs note
an attachment retrieval endpoint is on the roadmap.
- **Outbound attachments are not supported yet.** Adding them is
straightforward once the sidecar wires up `attachment(...)` /
`space.send(attachment(...))` from `spectrum-ts`.
- **Outbound attachments are supported.** Images, voice notes, video,
and documents are sent via `space.send(attachment(...))` /
`space.send(voice(...))` through the sidecar's `/send-attachment`
endpoint. A caption is delivered as a separate text bubble after the
media.
- **Reactions, message effects, polls** — not exposed yet; the
`spectrum-ts` SDK supports them, and the sidecar is the natural
place to add them when the agent has reason to use them.

View file

@ -14,8 +14,10 @@ Outbound:
Photon does not currently expose a public HTTP send-message
endpoint, so the adapter spawns a small Node sidecar (see
``sidecar/index.mjs``) that runs the ``spectrum-ts`` SDK. Each
``send`` / ``send_typing`` call from Hermes is a loopback POST to
the sidecar with a shared bearer token.
``send`` / ``send_typing`` / attachment call from Hermes is a
loopback POST to the sidecar with a shared bearer token. Outbound
media (images, voice notes, video, documents) goes through
spectrum-ts' ``attachment()`` / ``voice()`` content builders.
When Photon ships an HTTP send endpoint we can collapse the sidecar
into ``_send_via_http`` and drop the Node dependency entirely.
@ -670,6 +672,99 @@ class PhotonAdapter(BasePlatformAdapter):
) -> SendResult:
return await self._sidecar_send(chat_id, content, reply_to=reply_to)
# -- Outbound media (parity with the BlueBubbles iMessage channel) -----
#
# Photon ships outbound attachments via spectrum-ts' `attachment()` /
# `voice()` content builders. The sidecar's `/send-attachment` endpoint
# wraps `space.send(attachment(path, {...}))`. These overrides mirror
# BlueBubbles: URL-based helpers cache to a local path first, file-based
# helpers pass the path straight through.
async def send_image(
self,
chat_id: str,
image_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
try:
from gateway.platforms.base import cache_image_from_url
local_path = await cache_image_from_url(image_url)
except Exception:
# Couldn't fetch the URL — fall back to sending it as text.
return await super().send_image(chat_id, image_url, caption, reply_to)
return await self._sidecar_send_attachment(
chat_id, local_path, caption=caption, reply_to=reply_to,
)
async def send_image_file(
self,
chat_id: str,
image_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
return await self._sidecar_send_attachment(
chat_id, image_path, caption=caption, reply_to=reply_to,
)
async def send_voice(
self,
chat_id: str,
audio_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
return await self._sidecar_send_attachment(
chat_id, audio_path, caption=caption, reply_to=reply_to, kind="voice",
)
async def send_video(
self,
chat_id: str,
video_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
return await self._sidecar_send_attachment(
chat_id, video_path, caption=caption, reply_to=reply_to,
)
async def send_document(
self,
chat_id: str,
file_path: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
return await self._sidecar_send_attachment(
chat_id, file_path, name=file_name, caption=caption, reply_to=reply_to,
)
async def send_animation(
self,
chat_id: str,
animation_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
# iMessage renders GIFs inline as ordinary image attachments.
return await self.send_image(
chat_id, animation_url, caption, reply_to, metadata,
)
async def send_typing(self, chat_id: str, metadata=None) -> None:
try:
await self._sidecar_call("/typing", {"spaceId": chat_id})
@ -704,6 +799,57 @@ class PhotonAdapter(BasePlatformAdapter):
return SendResult(success=False, error=str(e))
return SendResult(success=True, message_id=data.get("messageId"))
async def _sidecar_send_attachment(
self,
space_id: str,
path: str,
*,
name: Optional[str] = None,
mime_type: Optional[str] = None,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
kind: str = "attachment",
) -> SendResult:
"""POST a local file to the sidecar's ``/send-attachment`` endpoint.
``kind`` is ``"voice"`` for audio sent as a voice note (downgrades
to a plain audio attachment on platforms without voice notes),
otherwise ``"attachment"``. spectrum-ts infers ``name`` and
``mimeType`` from the file extension; we only pass overrides when
Hermes supplied them.
"""
# Defense-in-depth: re-validate the path before handing it to the
# Node sidecar. The gateway already filters MEDIA paths, but
# send_*_file / cron callers may pass arbitrary strings.
safe_path = self.validate_media_delivery_path(str(path))
if not safe_path:
return SendResult(
success=False, error=f"unsafe or missing attachment path: {path}"
)
if not mime_type:
import mimetypes
guessed, _ = mimetypes.guess_type(safe_path)
mime_type = guessed or None
body: Dict[str, Any] = {
"spaceId": space_id,
"path": safe_path,
"kind": "voice" if kind == "voice" else "attachment",
}
if name:
body["name"] = name
if mime_type:
body["mimeType"] = mime_type
if caption:
body["caption"] = caption
if reply_to:
body["replyTo"] = reply_to
try:
data = await self._sidecar_call("/send-attachment", body)
except Exception as e:
return SendResult(success=False, error=str(e))
return SendResult(success=True, message_id=data.get("messageId"))
async def _sidecar_call(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]:
if self._http_client is None:
raise RuntimeError("Photon adapter not connected")
@ -753,8 +899,8 @@ async def _standalone_send(
message: str,
*,
thread_id: Optional[str] = None, # noqa: ARG001 — Spectrum has no threads yet
media_files: Optional[list] = None, # noqa: ARG001 — attachment send not supported yet
force_document: bool = False, # noqa: ARG001
media_files: Optional[list] = None,
force_document: bool = False, # noqa: ARG001 — iMessage auto-detects file kind
) -> Dict[str, Any]:
if not HTTPX_AVAILABLE:
return {"error": "httpx not installed"}
@ -771,20 +917,54 @@ async def _standalone_send(
"cannot spawn the sidecar themselves."
)
}
body: Dict[str, Any] = {"spaceId": chat_id, "text": message[:_MAX_MESSAGE_LENGTH]}
base = f"http://{_DEFAULT_SIDECAR_BIND}:{port}"
headers = {"X-Hermes-Sidecar-Token": token}
last_message_id: Optional[str] = None
try:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
f"http://{_DEFAULT_SIDECAR_BIND}:{port}/send",
json=body,
headers={"X-Hermes-Sidecar-Token": token},
)
if resp.status_code != 200:
return {"error": f"sidecar returned {resp.status_code}: {resp.text[:200]}"}
data = resp.json() or {}
if not data.get("ok"):
return {"error": data.get("error") or "sidecar reported failure"}
return {"success": True, "message_id": data.get("messageId")}
# 1. Text body first (if any), so it leads the conversation.
if message:
resp = await client.post(
f"{base}/send",
json={"spaceId": chat_id, "text": message[:_MAX_MESSAGE_LENGTH]},
headers=headers,
)
if resp.status_code != 200:
return {"error": f"sidecar returned {resp.status_code}: {resp.text[:200]}"}
data = resp.json() or {}
if not data.get("ok"):
return {"error": data.get("error") or "sidecar reported failure"}
last_message_id = data.get("messageId")
# 2. Each attachment as a separate /send-attachment call.
# media_files is List[Tuple[path, is_voice]] (see
# BasePlatformAdapter.filter_media_delivery_paths).
import mimetypes
for media_path, is_voice in media_files or []:
safe_path = BasePlatformAdapter.validate_media_delivery_path(str(media_path))
if not safe_path:
logger.warning("[photon] standalone send skipping unsafe path")
continue
guessed, _ = mimetypes.guess_type(safe_path)
att_body: Dict[str, Any] = {
"spaceId": chat_id,
"path": safe_path,
"kind": "voice" if is_voice else "attachment",
}
if guessed:
att_body["mimeType"] = guessed
resp = await client.post(
f"{base}/send-attachment", json=att_body, headers=headers,
)
if resp.status_code != 200:
return {"error": f"sidecar returned {resp.status_code}: {resp.text[:200]}"}
data = resp.json() or {}
if not data.get("ok"):
return {"error": data.get("error") or "sidecar reported failure"}
last_message_id = data.get("messageId") or last_message_id
return {"success": True, "message_id": last_message_id}
except Exception as e:
return {"error": f"Photon standalone send failed: {e}"}

View file

@ -12,6 +12,10 @@
// - POST /healthz -> {"ok": true}
// - POST /send -> {"ok": true, "messageId": "..."}
// body: {"spaceId": "...", "text": "...", "replyTo": "..." | null}
// - POST /send-attachment -> {"ok": true, "messageId": "..."}
// body: {"spaceId": "...", "path": "...", "name": "..." | null,
// "mimeType": "..." | null, "caption": "..." | null,
// "kind": "attachment" | "voice", "replyTo": "..." | null}
// - POST /typing -> {"ok": true}
// body: {"spaceId": "..."}
// - POST /shutdown -> {"ok": true}; then process exits
@ -48,9 +52,9 @@ if (!projectId || !projectSecret || !sharedToken) {
// Lazy-load spectrum-ts so a missing install fails with a clear message
// instead of a cryptic module-resolution error during import.
let Spectrum, imessage;
let Spectrum, imessage, attachment, voice;
try {
({ Spectrum } = await import("spectrum-ts"));
({ Spectrum, attachment, voice } = await import("spectrum-ts"));
({ imessage } = await import("spectrum-ts/providers/imessage"));
} catch (e) {
console.error(
@ -179,6 +183,44 @@ const server = http.createServer(async (req, res) => {
: await space.send(text);
return ok(res, { messageId: result?.id || result?.messageId || null });
}
if (req.url === "/send-attachment") {
const { spaceId, path, name, mimeType, caption, kind, replyTo } =
body || {};
if (!spaceId || typeof path !== "string" || !path) {
return badRequest(res, "spaceId and path are required");
}
const space = await resolveSpace(spaceId);
// spectrum-ts infers name + MIME from the file extension; pass
// overrides only when Hermes supplied them so a known-good
// inference isn't clobbered with an empty string.
const opts = {};
if (name) opts.name = name;
if (mimeType) opts.mimeType = mimeType;
const builder =
kind === "voice"
? voice(path, Object.keys(opts).length ? opts : undefined)
: attachment(path, Object.keys(opts).length ? opts : undefined);
const sendOpts = replyTo ? { replyTo } : undefined;
const result = sendOpts
? await space.send(builder, sendOpts)
: await space.send(builder);
// iMessage delivers the caption as a separate bubble; send it
// after the media so the attachment renders first.
if (caption && typeof caption === "string") {
try {
await space.send(caption);
} catch (e) {
console.error(
"photon-sidecar: attachment sent but caption failed: " +
(e && e.stack ? e.stack : String(e))
);
}
}
return ok(res, { messageId: result?.id || result?.messageId || null });
}
if (req.url === "/typing") {
const { spaceId } = body || {};
if (!spaceId) return badRequest(res, "spaceId is required");