feat(desktop+gateway): remote media relay — attach images/PDFs and display gateway images over the network

Desktop connected to a remote gateway can now attach images and PDFs and
display agent-written images. Previously the desktop passed a LOCAL file path
to image.attach; on a remote gateway that path doesn't exist, so the image was
silently dropped ("skipped unreadable path") and the vision model never saw it.
The reverse direction was also broken — images the agent wrote on the gateway
rendered as dead links in the remote client.

Gateway (tui_gateway/server.py):
- image.attach_bytes: base64 byte upload written into the gateway's own images
  dir and queued via the existing native-image-attach pipeline. Magic-byte
  extension sniffing, data-URL prefix + whitespace tolerance, 25 MB cap,
  structured error codes. Accepts content_base64/filename (canonical) and
  data/ext (older-desktop aliases).
- pdf.attach: renders each page to PNG via pdftoppm (poppler-utils) at 150 DPI
  and queues the pages as images; 50 MB / 25-page caps. Accepts host path or
  base64 upload.
- Shared helpers (_decode_attach_base64, _sniff_image_ext, _queue_attached_image)
  so the two methods and the existing image.attach don't duplicate logic.

Gateway (hermes_cli/web_server.py):
- GET /api/media: returns a gateway-local image as a base64 data URL so remote
  clients can display it. Auth-gated like every /api route, extension
  allowlist + size cap, AND confined to the gateway's own media roots
  (images/screenshots/cache, resolved symlink-safe) so an authed caller can't
  read image-extension files anywhere on disk.

Desktop (apps/desktop):
- syncImageAttachmentsForSubmit uploads bytes via image.attach_bytes when the
  connection mode is 'remote'; the local fast path is unchanged.
- media.ts gains isRemoteGateway() + gatewayMediaDataUrl(); directive-text and
  markdown-text fetch images over /api/media in remote mode.

Consolidates the competing remote-media PRs (#38876, #40317, #21908, #39437)
into one coherent implementation, taking the strongest parts of each and adding
shared-helper cleanup plus the /api/media root-confinement hardening on top.
The per-profile gateway switching from #38876 is intentionally left out as a
separable feature. TUI file uploads (#40492) remain a separate surface.

Tested: 11 new tui_gateway tests + 5 /api/media endpoint tests + desktop
media.remote unit tests; full tui_gateway + web_server suites green (472
passed); tsc -b clean; E2E verified the full attach→disk→queue and
gateway-path→data-URL display round-trip plus the out-of-root security block.

Co-authored-by: Max Mitcham <maxmitcham@mac.home>
Co-authored-by: Justlrnal4 <Justlrnal4@users.noreply.github.com>
Co-authored-by: Chris Cook <ccook@nvms.com>
Co-authored-by: Thomas Paquette <thomas.paquette@gmail.com>
This commit is contained in:
teknium1 2026-06-07 04:37:38 -07:00 committed by Teknium
parent 20fd0bde5d
commit 16786f3bb3
11 changed files with 759 additions and 11 deletions

View file

@ -34,6 +34,7 @@ import { requestDesktopOnboarding } from '@/store/onboarding'
import { $activeGatewayProfile, $newChatProfile, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile'
import {
$busy,
$connection,
$messages,
$yoloActive,
setAwaitingResponse,
@ -80,6 +81,28 @@ function inlineErrorMessage(error: unknown, fallback: string): string {
return (raw.match(/Error invoking remote method '[^']+': Error: (.+)$/)?.[1] ?? raw).replace(/^Error:\s*/, '').trim()
}
function base64FromDataUrl(dataUrl: string): string {
const comma = dataUrl.indexOf(',')
return comma >= 0 ? dataUrl.slice(comma + 1) : ''
}
function imageFilenameFromPath(filePath: string): string {
return filePath.split(/[\\/]/).filter(Boolean).pop() || 'image.png'
}
// Remote gateway: the local composer-image file lives on THIS machine's disk,
// not the gateway's, so read the bytes here and upload them via
// image.attach_bytes. Returns null when the file can't be read.
async function readImageForRemoteAttach(
filePath: string
): Promise<{ contentBase64: string; filename: string } | null> {
const dataUrl = await window.hermesDesktop?.readFileDataUrl(filePath)
const contentBase64 = dataUrl ? base64FromDataUrl(dataUrl) : ''
return contentBase64 ? { contentBase64, filename: imageFilenameFromPath(filePath) } : null
}
interface PromptActionsOptions {
activeSessionId: string | null
activeSessionIdRef: MutableRefObject<string | null>
@ -197,16 +220,36 @@ export function usePromptActions({
) => {
const updateComposerAttachments = options.updateComposerAttachments ?? true
const images = attachments.filter(attachment => attachment.kind === 'image' && attachment.path)
const remote = $connection.get()?.mode === 'remote'
for (const attachment of images) {
if (attachment.attachedSessionId === sessionId) {
continue
}
const result = await requestGateway<ImageAttachResponse>('image.attach', {
session_id: sessionId,
path: attachment.path
})
let result: ImageAttachResponse
if (remote) {
// The gateway is on another machine — it can't read attachment.path
// (a path on THIS disk). Upload the bytes via image.attach_bytes.
const payload = attachment.path ? await readImageForRemoteAttach(attachment.path) : null
if (!payload) {
const label = attachment.label || (attachment.path ? pathLabel(attachment.path) : 'image')
throw new Error(`Could not read ${label}`)
}
result = await requestGateway<ImageAttachResponse>('image.attach_bytes', {
session_id: sessionId,
content_base64: payload.contentBase64,
filename: payload.filename
})
} else {
result = await requestGateway<ImageAttachResponse>('image.attach', {
session_id: sessionId,
path: attachment.path
})
}
if (!result.attached) {
const label = attachment.label || (attachment.path ? pathLabel(attachment.path) : 'image')

View file

@ -13,6 +13,13 @@ export interface ImageAttachResponse {
path?: string
text?: string
message?: string
// Returned by the byte-upload variant (image.attach_bytes) used in remote mode.
count?: number
bytes?: number
name?: string
width?: number
height?: number
token_estimate?: number
}
export interface ImageDetachResponse {

View file

@ -7,6 +7,7 @@ import { Fragment, useEffect, useMemo, useState } from 'react'
import { ZoomableImage } from '@/components/chat/zoomable-image'
import { extractEmbeddedImages } from '@/lib/embedded-images'
import { gatewayMediaDataUrl, isRemoteGateway } from '@/lib/media'
const HERMES_REF_TYPES = ['file', 'folder', 'url', 'image', 'tool', 'line', 'terminal', 'session'] as const
type HermesRefType = (typeof HERMES_REF_TYPES)[number]
@ -327,25 +328,32 @@ export const DirectiveText: TextMessagePartComponent = ({ text }: TextMessagePar
* messages render after the backend embeds the data URL, so the UX is stable
* across initial send and refresh. */
const DirectiveImage: FC<{ id: string; label: string }> = ({ id, label }) => {
const remote = /^(?:https?|data):/i.test(id)
const [src, setSrc] = useState<string | null>(remote ? id : null)
const isUrl = /^(?:https?|data):/i.test(id)
const [src, setSrc] = useState<string | null>(isUrl ? id : null)
const [failed, setFailed] = useState(false)
useEffect(() => {
if (remote || !id) {
if (isUrl || !id) {
return
}
let alive = true
void window.hermesDesktop
?.readFileDataUrl(id)
.then(url => alive && setSrc(url))
// Remote gateway: the image lives on the gateway's disk, not ours — fetch
// it over the authenticated API. Local: read it straight off this disk.
const load =
window.hermesDesktop && isRemoteGateway()
? gatewayMediaDataUrl(id)
: window.hermesDesktop?.readFileDataUrl(id)
void Promise.resolve(load)
.then(url => alive && url && setSrc(url))
.catch(() => alive && setFailed(true))
return () => {
alive = false
}
}, [id, remote])
}, [id, isUrl])
if (failed) {
return <DirectiveChip id={id} label={label} type="image" />

View file

@ -17,6 +17,8 @@ import { createMemoizedMathPlugin } from '@/lib/katex-memo'
import { preprocessMarkdown } from '@/lib/markdown-preprocess'
import {
filePathFromMediaPath,
gatewayMediaDataUrl,
isRemoteGateway,
mediaExternalUrl,
mediaKind,
mediaName,
@ -51,6 +53,12 @@ async function mediaSrc(path: string): Promise<string> {
return mediaStreamUrl(path)
}
// Remote gateway: the image lives on the gateway machine, so read it over the
// authenticated API rather than this machine's disk.
if (window.hermesDesktop && isRemoteGateway()) {
return gatewayMediaDataUrl(path)
}
if (!window.hermesDesktop?.readFileDataUrl) {
return mediaExternalUrl(path)
}

View file

@ -0,0 +1,58 @@
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import { $connection } from '@/store/session'
import { filePathFromMediaPath, gatewayMediaDataUrl, isRemoteGateway } from './media'
describe('isRemoteGateway', () => {
afterEach(() => {
$connection.set(null)
})
it('is false with no connection', () => {
$connection.set(null)
expect(isRemoteGateway()).toBe(false)
})
it('is false in local mode', () => {
$connection.set({ mode: 'local' } as never)
expect(isRemoteGateway()).toBe(false)
})
it('is true in remote mode', () => {
$connection.set({ mode: 'remote' } as never)
expect(isRemoteGateway()).toBe(true)
})
})
describe('filePathFromMediaPath', () => {
it('passes through a plain path', () => {
expect(filePathFromMediaPath('/home/u/.hermes/images/a.png')).toBe('/home/u/.hermes/images/a.png')
})
it('decodes a file:// URL with encoded characters', () => {
expect(filePathFromMediaPath('file:///tmp/a%20b.png')).toBe('/tmp/a b.png')
})
})
describe('gatewayMediaDataUrl', () => {
const api = vi.fn(async () => ({ data_url: 'data:image/png;base64,ZHVtbXk=' }))
beforeEach(() => {
api.mockClear()
vi.stubGlobal('window', { hermesDesktop: { api } })
})
afterEach(() => {
vi.unstubAllGlobals()
})
it('requests the encoded gateway path and returns the data URL', async () => {
const url = await gatewayMediaDataUrl('/home/u/.hermes/images/a b.png')
expect(url).toBe('data:image/png;base64,ZHVtbXk=')
expect(api).toHaveBeenCalledWith({
path: '/api/media?path=%2Fhome%2Fu%2F.hermes%2Fimages%2Fa%20b.png'
})
})
})

View file

@ -1,3 +1,5 @@
import { $connection } from '@/store/session'
export type MediaKind = 'audio' | 'image' | 'video' | 'file'
interface MediaInfo {
@ -89,6 +91,26 @@ export function filePathFromMediaPath(path: string): string {
}
}
// True when this desktop shell is wired to a remote gateway. Local media paths
// then live on the gateway machine, not this disk, so we fetch them over the API.
export function isRemoteGateway(): boolean {
return $connection.get()?.mode === 'remote'
}
// Fetch a gateway-local image as a data URL via the authenticated REST bridge.
// Used in remote mode where readFileDataUrl (which reads THIS machine's disk)
// can't see files the agent wrote on the gateway. Requires the gateway to
// expose GET /api/media (hermes_cli/web_server.py).
export async function gatewayMediaDataUrl(path: string): Promise<string> {
const file = filePathFromMediaPath(path)
const result = await window.hermesDesktop!.api<{ data_url: string }>({
path: `/api/media?path=${encodeURIComponent(file)}`
})
return result.data_url
}
export function mediaDisplayLabel(path: string): string {
const escaped = mediaName(path).replace(/[[\]\\]/g, '\\$&')
const kind = mediaKind(path)

View file

@ -796,6 +796,74 @@ def _probe_gateway_health() -> tuple[bool, dict | None]:
return False, None
# Image MIME types this endpoint will serve. Extension-allowlisted so an
# authenticated caller can't pull non-image files through it.
_MEDIA_CONTENT_TYPES = {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".webp": "image/webp",
".svg": "image/svg+xml",
".bmp": "image/bmp",
".ico": "image/x-icon",
}
_MEDIA_MAX_BYTES = 25 * 1024 * 1024
def _media_serve_roots() -> list[Path]:
"""Directories ``GET /api/media`` is allowed to read from.
Confined to where the agent and attach pipeline actually write media on the
gateway host its images dir and cache subtree. This stops an authenticated
client from reading image-extension files anywhere on disk (e.g. a renamed
key or a screenshot outside the cache) merely because the suffix passes the
allowlist.
"""
home = get_hermes_home()
roots = [home / "images", home / "screenshots", home / "cache"]
out: list[Path] = []
for root in roots:
try:
out.append(root.resolve())
except (OSError, RuntimeError):
continue
return out
@app.get("/api/media")
async def get_media(path: str):
"""Return a gateway-local image file as a base64 data URL.
Lets remote clients (the desktop app over the network, or the web dashboard
in a browser) display images the agent wrote to *this* machine's filesystem
they can't read the gateway's local disk directly.
Auth-gated by the session token like every other /api route. Restricted to
an image-extension allowlist, a size cap, AND the gateway's own media roots
(resolved, symlink-safe) so it can't be used to read arbitrary files.
"""
try:
target = Path(path).expanduser().resolve()
except (OSError, RuntimeError):
raise HTTPException(status_code=400, detail="Invalid path")
if target.suffix.lower() not in _MEDIA_CONTENT_TYPES:
raise HTTPException(status_code=415, detail="Unsupported media type")
roots = _media_serve_roots()
if not any(target == root or root in target.parents for root in roots):
raise HTTPException(status_code=403, detail="Path outside media roots")
if not target.is_file():
raise HTTPException(status_code=404, detail="File not found")
if target.stat().st_size > _MEDIA_MAX_BYTES:
raise HTTPException(status_code=413, detail="File too large")
encoded = base64.b64encode(target.read_bytes()).decode("ascii")
return {"data_url": f"data:{_MEDIA_CONTENT_TYPES[target.suffix.lower()]};base64,{encoded}"}
@app.get("/api/status")
async def get_status():
current_ver, latest_ver = check_config_version()

View file

@ -47,6 +47,9 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
AUTHOR_MAP = {
"yusufalweshdemir@gmail.com": "Dusk1e",
"804436395@qq.com": "LaPhilosophie",
"maxmitcham@mac.home": "maxtrigify",
"ccook@nvms.com": "ccook1963",
"thomas.paquette@gmail.com": "RyTsYdUp",
"266365592+bmoore210@users.noreply.github.com": "bmoore210",
"manishbyatroy@gmail.com": "manishbyatroy",
"chilltulpa@gmail.com": "TheGardenGallery",

View file

@ -243,6 +243,57 @@ class TestWebServerEndpoints:
assert "hermes_home" in data
assert "active_sessions" in data
# ── GET /api/media (remote image display) ───────────────────────────
def test_get_media_serves_image_in_root(self):
"""An image under the gateway's images dir is returned as a data URL."""
from hermes_constants import get_hermes_home
img_dir = get_hermes_home() / "images"
img_dir.mkdir(parents=True, exist_ok=True)
img = img_dir / "shot.png"
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16)
resp = self.client.get("/api/media", params={"path": str(img)})
assert resp.status_code == 200
assert resp.json()["data_url"].startswith("data:image/png;base64,")
def test_get_media_rejects_path_outside_roots(self, tmp_path):
"""An image-extension file outside the media roots is forbidden."""
outside = tmp_path / "secret.png"
outside.write_bytes(b"\x89PNG\r\n\x1a\n")
resp = self.client.get("/api/media", params={"path": str(outside)})
assert resp.status_code == 403
def test_get_media_rejects_non_image_extension(self):
from hermes_constants import get_hermes_home
img_dir = get_hermes_home() / "images"
img_dir.mkdir(parents=True, exist_ok=True)
env = img_dir / "leak.env"
env.write_text("SECRET=1")
resp = self.client.get("/api/media", params={"path": str(env)})
assert resp.status_code == 415
def test_get_media_404_for_missing_file(self):
from hermes_constants import get_hermes_home
missing = get_hermes_home() / "images" / "nope.png"
resp = self.client.get("/api/media", params={"path": str(missing)})
assert resp.status_code == 404
def test_get_media_requires_auth(self):
from hermes_cli.web_server import _SESSION_HEADER_NAME
resp = self.client.get(
"/api/media",
params={"path": "/tmp/x.png"},
headers={_SESSION_HEADER_NAME: "wrong-token"},
)
assert resp.status_code == 401
# ── Dashboard font override ─────────────────────────────────────────
def test_get_dashboard_font_defaults_to_theme(self):

View file

@ -5774,3 +5774,215 @@ def test_notification_event_dedup_key_keeps_completions_one_shot():
assert server._notification_event_dedup_key(first) == server._notification_event_dedup_key(
replay
)
# --- image.attach_bytes / pdf.attach (remote-client byte upload) -------------
# Smallest valid 1x1 PNG, base64-encoded.
_PNG_1X1_B64 = (
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk"
"+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
)
def _attach_bytes_cli(monkeypatch):
fake_cli = types.ModuleType("cli")
fake_cli._IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}
monkeypatch.setitem(sys.modules, "cli", fake_cli)
def test_image_attach_bytes_writes_to_gateway_dir(monkeypatch, tmp_path):
"""Remote client uploads base64 bytes; gateway writes them to its own disk."""
_attach_bytes_cli(monkeypatch)
monkeypatch.setattr(server, "_hermes_home", tmp_path)
server._sessions["abx"] = _session()
resp = server.handle_request(
{
"id": "1",
"method": "image.attach_bytes",
"params": {
"session_id": "abx",
"content_base64": _PNG_1X1_B64,
"filename": "shot.png",
},
}
)
res = resp["result"]
assert res["attached"] is True
written = Path(res["path"])
assert written.is_file()
assert written.parent == tmp_path / "images"
assert written.read_bytes().startswith(b"\x89PNG")
assert len(server._sessions["abx"]["attached_images"]) == 1
assert res["bytes"] > 0
def test_image_attach_bytes_accepts_data_url_prefix(monkeypatch, tmp_path):
_attach_bytes_cli(monkeypatch)
monkeypatch.setattr(server, "_hermes_home", tmp_path)
server._sessions["abx2"] = _session()
resp = server.handle_request(
{
"id": "1",
"method": "image.attach_bytes",
"params": {
"session_id": "abx2",
"content_base64": f"data:image/png;base64,{_PNG_1X1_B64}",
},
}
)
assert resp["result"]["attached"] is True
def test_image_attach_bytes_data_alias_and_magic_sniff(monkeypatch, tmp_path):
"""Older desktop builds send `data` (not content_base64); ext sniffed from bytes."""
_attach_bytes_cli(monkeypatch)
monkeypatch.setattr(server, "_hermes_home", tmp_path)
server._sessions["abx3"] = _session()
resp = server.handle_request(
{
"id": "1",
"method": "image.attach_bytes",
"params": {"session_id": "abx3", "data": _PNG_1X1_B64},
}
)
res = resp["result"]
assert res["attached"] is True
assert Path(res["path"]).suffix == ".png" # sniffed from magic bytes
def test_image_attach_bytes_rejects_invalid_base64(monkeypatch, tmp_path):
_attach_bytes_cli(monkeypatch)
monkeypatch.setattr(server, "_hermes_home", tmp_path)
server._sessions["abx4"] = _session()
resp = server.handle_request(
{
"id": "1",
"method": "image.attach_bytes",
"params": {"session_id": "abx4", "content_base64": "!!!not base64!!!"},
}
)
assert "error" in resp
assert resp["error"]["code"] == 4017
def test_image_attach_bytes_rejects_oversize(monkeypatch, tmp_path):
import base64 as _b64
_attach_bytes_cli(monkeypatch)
monkeypatch.setattr(server, "_hermes_home", tmp_path)
monkeypatch.setattr(server, "_ATTACH_BYTES_MAX_BYTES", 10)
server._sessions["abx5"] = _session()
big = _b64.b64encode(b"\x89PNG\r\n\x1a\n" + b"0" * 100).decode("ascii")
resp = server.handle_request(
{
"id": "1",
"method": "image.attach_bytes",
"params": {"session_id": "abx5", "content_base64": big},
}
)
assert "error" in resp
assert resp["error"]["code"] == 4018
def test_image_attach_bytes_rejects_unsupported_extension(monkeypatch, tmp_path):
_attach_bytes_cli(monkeypatch)
monkeypatch.setattr(server, "_hermes_home", tmp_path)
server._sessions["abx6"] = _session()
# filename hint forces a non-image extension; magic sniff is bypassed by hint
resp = server.handle_request(
{
"id": "1",
"method": "image.attach_bytes",
"params": {
"session_id": "abx6",
"content_base64": _PNG_1X1_B64,
"filename": "evil.exe",
},
}
)
assert "error" in resp
assert resp["error"]["code"] == 4016
def test_pdf_attach_requires_poppler(monkeypatch, tmp_path):
"""Without pdftoppm on PATH, pdf.attach returns a clear 5028."""
_attach_bytes_cli(monkeypatch)
monkeypatch.setattr(server, "_hermes_home", tmp_path)
monkeypatch.setattr("shutil.which", lambda _name: None)
server._sessions["pdf1"] = _session()
resp = server.handle_request(
{
"id": "1",
"method": "pdf.attach",
"params": {"session_id": "pdf1", "content_base64": "JVBERi0xLjQK"},
}
)
assert "error" in resp
assert resp["error"]["code"] == 5028
def test_pdf_attach_rejects_non_pdf_bytes(monkeypatch, tmp_path):
import base64 as _b64
_attach_bytes_cli(monkeypatch)
monkeypatch.setattr(server, "_hermes_home", tmp_path)
monkeypatch.setattr("shutil.which", lambda _name: "/usr/bin/pdftoppm")
server._sessions["pdf2"] = _session()
not_pdf = _b64.b64encode(b"this is not a pdf").decode("ascii")
resp = server.handle_request(
{
"id": "1",
"method": "pdf.attach",
"params": {"session_id": "pdf2", "content_base64": not_pdf},
}
)
assert "error" in resp
assert resp["error"]["code"] == 4017
def test_pdf_attach_requires_path_or_bytes(monkeypatch, tmp_path):
_attach_bytes_cli(monkeypatch)
monkeypatch.setattr(server, "_hermes_home", tmp_path)
monkeypatch.setattr("shutil.which", lambda _name: "/usr/bin/pdftoppm")
server._sessions["pdf3"] = _session()
resp = server.handle_request(
{"id": "1", "method": "pdf.attach", "params": {"session_id": "pdf3"}}
)
assert "error" in resp
assert resp["error"]["code"] == 4015
def test_decode_attach_base64_helper():
import base64 as _b64
raw = _b64.b64encode(b"hello").decode("ascii")
assert server._decode_attach_base64(raw, mime_prefix="image/") == b"hello"
assert (
server._decode_attach_base64(f"data:image/png;base64,{raw}", mime_prefix="image/")
== b"hello"
)
# whitespace inside payload is tolerated
assert server._decode_attach_base64(raw[:4] + "\n" + raw[4:], mime_prefix="image/") == b"hello"
assert server._decode_attach_base64("@@@", mime_prefix="image/") is None
def test_sniff_image_ext_magic_and_filename():
assert server._sniff_image_ext(b"\x89PNG\r\n\x1a\n") == ".png"
assert server._sniff_image_ext(b"\xff\xd8\xff\xe0") == ".jpg"
assert server._sniff_image_ext(b"GIF89a....") == ".gif"
assert server._sniff_image_ext(b"RIFF1234WEBPxxxx") == ".webp"
assert server._sniff_image_ext(b"BM......") == ".bmp"
assert server._sniff_image_ext(b"unknown") == ".png" # fallback
# filename hint wins over magic bytes
assert server._sniff_image_ext(b"\x89PNG", "photo.jpeg") == ".jpeg"

View file

@ -5097,6 +5097,274 @@ def _(rid, params: dict) -> dict:
return _err(rid, 5027, str(e))
# Byte-upload attach caps. 25 MB matches Anthropic's per-image limit; 50 MB / 25
# pages bounds a single PDF drop so it can't blow the context budget.
_ATTACH_BYTES_MAX_BYTES = 25 * 1024 * 1024
_PDF_ATTACH_MAX_BYTES = 50 * 1024 * 1024
_PDF_ATTACH_MAX_PAGES = 25
# Leading magic bytes → file extension, for filename-less uploads.
_IMAGE_MAGIC: tuple[tuple[bytes, str], ...] = (
(b"\x89PNG\r\n\x1a\n", ".png"),
(b"\xff\xd8\xff", ".jpg"),
(b"GIF87a", ".gif"),
(b"GIF89a", ".gif"),
(b"BM", ".bmp"),
)
def _decode_attach_base64(raw: str, *, mime_prefix: str) -> bytes | None:
"""Decode a base64 (optionally data-URL-wrapped) payload.
Accepts ``data:<mime_prefix>...;base64,<b64>`` plus embedded whitespace.
Returns the decoded bytes, or ``None`` when the input isn't valid base64.
"""
import base64 as _base64
import re as _re
cleaned = raw.strip()
m = _re.match(
rf"^data:{_re.escape(mime_prefix)}[a-zA-Z0-9.+-]*;base64,(.*)$",
cleaned,
_re.DOTALL,
)
if m:
cleaned = m.group(1)
cleaned = _re.sub(r"\s+", "", cleaned)
try:
return _base64.b64decode(cleaned, validate=True)
except Exception:
return None
def _sniff_image_ext(img_bytes: bytes, filename: str = "") -> str:
"""Resolve an image extension from a filename hint, else magic bytes.
Falls back to ``.png``. WebP needs the RIFF/WEBP container check, handled
before the generic table.
"""
if filename:
suffix = Path(filename).suffix.lower()
if suffix:
return suffix
head = img_bytes[:16]
if head.startswith(b"RIFF") and head[8:12] == b"WEBP":
return ".webp"
for sig, ext in _IMAGE_MAGIC:
if head.startswith(sig):
return ext
return ".png"
def _allowed_image_extensions() -> frozenset[str]:
try:
from cli import _IMAGE_EXTENSIONS
return frozenset(_IMAGE_EXTENSIONS)
except Exception:
return frozenset({".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"})
def _queue_attached_image(session: dict, img_bytes: bytes, ext: str, *, prefix: str) -> Path:
"""Write image bytes into the gateway's images dir and queue them.
Mirrors what ``image.attach`` does for a local path: appends to
``session["attached_images"]`` so the next ``prompt.submit`` picks it up via
the existing native-image-attach pipeline. Returns the written path.
"""
session["image_counter"] = session.get("image_counter", 0) + 1
img_dir = _hermes_home / "images"
img_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
img_path = img_dir / f"{prefix}_{ts}_{session['image_counter']}{ext}"
try:
img_path.write_bytes(img_bytes)
except Exception:
session["image_counter"] = max(0, session["image_counter"] - 1)
raise
session.setdefault("attached_images", []).append(str(img_path))
return img_path
@method("image.attach_bytes")
def _(rid, params: dict) -> dict:
"""Attach an image to the session from base64 bytes (remote-client path).
A desktop app or web dashboard running on a DIFFERENT machine than the
gateway can't hand us a local path — that file only exists on the client's
disk. So it uploads the raw image bytes (base64) and we write them into the
gateway's own images dir. The response shape mirrors ``image.attach`` so the
client treats both identically.
Params:
content_base64 / data (str, required): base64 image bytes. Accepts a
``data:image/...;base64,`` prefix and embedded whitespace. ``data`` is
an accepted alias for older desktop builds.
filename / ext (str, optional): extension hint. Without it, magic bytes
identify PNG/JPEG/GIF/WebP/BMP, falling back to ``.png``.
"""
session, err = _sess(params, rid)
if err:
return err
raw_b64 = str(params.get("content_base64") or params.get("data") or "").strip()
if not raw_b64:
return _err(rid, 4015, "content_base64 required")
img_bytes = _decode_attach_base64(raw_b64, mime_prefix="image/")
if img_bytes is None:
return _err(rid, 4017, "data is not valid base64")
if not img_bytes:
return _err(rid, 4017, "image is empty")
if len(img_bytes) > _ATTACH_BYTES_MAX_BYTES:
mb = _ATTACH_BYTES_MAX_BYTES // (1024 * 1024)
return _err(rid, 4018, f"image too large ({len(img_bytes)} bytes; cap is {mb} MB)")
filename = str(params.get("filename", "") or "")
ext_hint = str(params.get("ext", "") or "").strip().lower()
if ext_hint and not ext_hint.startswith("."):
ext_hint = "." + ext_hint
ext = _sniff_image_ext(img_bytes, filename or (f"x{ext_hint}" if ext_hint else ""))
if ext not in _allowed_image_extensions():
return _err(rid, 4016, f"unsupported image extension: {ext}")
try:
img_path = _queue_attached_image(session, img_bytes, ext, prefix="upload")
except Exception as e:
return _err(rid, 5027, f"write failed: {e}")
return _ok(
rid,
{
"attached": True,
"path": str(img_path),
"count": len(session["attached_images"]),
"remainder": "",
"text": f"[User attached image: {img_path.name}]",
"bytes": len(img_bytes),
**_image_meta(img_path),
},
)
@method("pdf.attach")
def _(rid, params: dict) -> dict:
"""Attach a PDF by rendering each page to PNG and queuing the pages.
Anthropic's vision pipeline accepts images, not PDFs, so this runs
``pdftoppm`` (poppler-utils) at 150 DPI per page and queues each rendered
page as an attached image. Accepts either a host ``path`` (local mode) or
base64 ``content_base64`` (remote upload). Caps at 50 MB / 25 pages per call.
Requires ``pdftoppm`` on $PATH (``apt install poppler-utils``); returns 5028
if missing.
"""
import shutil
import subprocess
import tempfile
session, err = _sess(params, rid)
if err:
return err
if shutil.which("pdftoppm") is None:
return _err(rid, 5028, "pdftoppm not installed (poppler-utils package required)")
raw_path = str(params.get("path", "") or "").strip()
raw_b64 = str(params.get("content_base64") or params.get("data") or "").strip()
if not raw_path and not raw_b64:
return _err(rid, 4015, "path or content_base64 required")
with tempfile.TemporaryDirectory(prefix="pdf_attach_") as td:
td_path = Path(td)
if raw_b64:
pdf_bytes = _decode_attach_base64(raw_b64, mime_prefix="application/pdf")
if pdf_bytes is None:
return _err(rid, 4017, "data is not valid base64")
if not pdf_bytes:
return _err(rid, 4017, "decoded PDF is empty")
if len(pdf_bytes) > _PDF_ATTACH_MAX_BYTES:
mb = _PDF_ATTACH_MAX_BYTES // (1024 * 1024)
return _err(rid, 4018, f"PDF too large ({len(pdf_bytes)} bytes; cap is {mb} MB)")
if pdf_bytes[:5] != b"%PDF-":
return _err(rid, 4017, "payload is not a PDF (missing %PDF- magic bytes)")
pdf_path = td_path / "input.pdf"
pdf_path.write_bytes(pdf_bytes)
display_name = str(params.get("filename", "") or "uploaded.pdf")
else:
try:
from cli import _resolve_attachment_path
resolved = _resolve_attachment_path(raw_path)
except Exception:
resolved = None
if resolved is None or not Path(resolved).is_file():
return _err(rid, 4016, f"PDF not found: {raw_path}")
if Path(resolved).suffix.lower() != ".pdf":
return _err(rid, 4016, f"not a PDF: {Path(resolved).name}")
if Path(resolved).stat().st_size > _PDF_ATTACH_MAX_BYTES:
mb = _PDF_ATTACH_MAX_BYTES // (1024 * 1024)
return _err(rid, 4018, f"PDF too large; cap is {mb} MB")
pdf_path = Path(resolved)
display_name = pdf_path.name
try:
first_page = int(params.get("first_page") or 1)
last_page_param = params.get("last_page")
last_page = int(last_page_param) if last_page_param is not None else None
except (TypeError, ValueError):
return _err(rid, 4015, "first_page/last_page must be integers")
if first_page < 1:
return _err(rid, 4015, "first_page must be >= 1")
if last_page is None:
last_page = first_page + _PDF_ATTACH_MAX_PAGES - 1
if last_page < first_page:
return _err(rid, 4015, "last_page must be >= first_page")
if last_page - first_page + 1 > _PDF_ATTACH_MAX_PAGES:
return _err(rid, 4019, f"page range exceeds cap of {_PDF_ATTACH_MAX_PAGES} pages per attach call")
out_prefix = td_path / "page"
argv = [
"pdftoppm", "-png", "-r", "150",
"-f", str(first_page), "-l", str(last_page),
str(pdf_path), str(out_prefix),
]
try:
res = subprocess.run(argv, capture_output=True, text=True, timeout=120)
except subprocess.TimeoutExpired:
return _err(rid, 5028, "pdftoppm timed out (>120s)")
if res.returncode != 0:
tail = (res.stderr or res.stdout or "").strip().splitlines()[-3:]
return _err(rid, 5028, "pdftoppm failed: " + " | ".join(tail))
rendered = sorted(td_path.glob("page-*.png"))
if not rendered:
return _err(rid, 5028, "pdftoppm produced no pages (corrupt PDF?)")
attached_pages = []
for src in rendered:
page_num = src.stem.split("-", 1)[-1]
try:
page_int = int(page_num)
except ValueError:
page_int = first_page + len(attached_pages)
dst = _queue_attached_image(session, src.read_bytes(), ".png", prefix=f"pdf_p{page_num}")
attached_pages.append({"path": str(dst), "page": page_int, **_image_meta(dst)})
return _ok(
rid,
{
"attached": True,
"filename": display_name,
"pages_attached": len(attached_pages),
"pages": attached_pages,
"count": len(session["attached_images"]),
"text": f"[User attached PDF: {display_name} ({len(attached_pages)} page(s))]",
},
)
@method("image.detach")
def _(rid, params: dict) -> dict:
session, err = _sess(params, rid)