hermes-agent/apps/desktop/src/lib/media.ts
teknium1 16786f3bb3 feat(desktop+gateway): remote media relay — attach images/PDFs and display gateway images over the network
Desktop connected to a remote gateway can now attach images and PDFs and
display agent-written images. Previously the desktop passed a LOCAL file path
to image.attach; on a remote gateway that path doesn't exist, so the image was
silently dropped ("skipped unreadable path") and the vision model never saw it.
The reverse direction was also broken — images the agent wrote on the gateway
rendered as dead links in the remote client.

Gateway (tui_gateway/server.py):
- image.attach_bytes: base64 byte upload written into the gateway's own images
  dir and queued via the existing native-image-attach pipeline. Magic-byte
  extension sniffing, data-URL prefix + whitespace tolerance, 25 MB cap,
  structured error codes. Accepts content_base64/filename (canonical) and
  data/ext (older-desktop aliases).
- pdf.attach: renders each page to PNG via pdftoppm (poppler-utils) at 150 DPI
  and queues the pages as images; 50 MB / 25-page caps. Accepts host path or
  base64 upload.
- Shared helpers (_decode_attach_base64, _sniff_image_ext, _queue_attached_image)
  so the two methods and the existing image.attach don't duplicate logic.

Gateway (hermes_cli/web_server.py):
- GET /api/media: returns a gateway-local image as a base64 data URL so remote
  clients can display it. Auth-gated like every /api route, extension
  allowlist + size cap, AND confined to the gateway's own media roots
  (images/screenshots/cache, resolved symlink-safe) so an authed caller can't
  read image-extension files anywhere on disk.

Desktop (apps/desktop):
- syncImageAttachmentsForSubmit uploads bytes via image.attach_bytes when the
  connection mode is 'remote'; the local fast path is unchanged.
- media.ts gains isRemoteGateway() + gatewayMediaDataUrl(); directive-text and
  markdown-text fetch images over /api/media in remote mode.

Consolidates the competing remote-media PRs (#38876, #40317, #21908, #39437)
into one coherent implementation, taking the strongest parts of each and adding
shared-helper cleanup plus the /api/media root-confinement hardening on top.
The per-profile gateway switching from #38876 is intentionally left out as a
separable feature. TUI file uploads (#40492) remain a separate surface.

Tested: 11 new tui_gateway tests + 5 /api/media endpoint tests + desktop
media.remote unit tests; full tui_gateway + web_server suites green (472
passed); tsc -b clean; E2E verified the full attach→disk→queue and
gateway-path→data-URL display round-trip plus the out-of-root security block.

Co-authored-by: Max Mitcham <maxmitcham@mac.home>
Co-authored-by: Justlrnal4 <Justlrnal4@users.noreply.github.com>
Co-authored-by: Chris Cook <ccook@nvms.com>
Co-authored-by: Thomas Paquette <thomas.paquette@gmail.com>
2026-06-07 10:05:53 -07:00

119 lines
3.7 KiB
TypeScript

import { $connection } from '@/store/session'
export type MediaKind = 'audio' | 'image' | 'video' | 'file'
interface MediaInfo {
kind: MediaKind
mime: string
}
const MEDIA_BY_EXT: Record<string, MediaInfo> = {
avi: { kind: 'video', mime: 'video/x-msvideo' },
bmp: { kind: 'image', mime: 'image/bmp' },
flac: { kind: 'audio', mime: 'audio/flac' },
gif: { kind: 'image', mime: 'image/gif' },
jpeg: { kind: 'image', mime: 'image/jpeg' },
jpg: { kind: 'image', mime: 'image/jpeg' },
m4a: { kind: 'audio', mime: 'audio/mp4' },
mkv: { kind: 'video', mime: 'video/x-matroska' },
mov: { kind: 'video', mime: 'video/quicktime' },
mp3: { kind: 'audio', mime: 'audio/mpeg' },
mp4: { kind: 'video', mime: 'video/mp4' },
ogg: { kind: 'audio', mime: 'audio/ogg' },
opus: { kind: 'audio', mime: 'audio/ogg; codecs=opus' },
png: { kind: 'image', mime: 'image/png' },
svg: { kind: 'image', mime: 'image/svg+xml' },
wav: { kind: 'audio', mime: 'audio/wav' },
webm: { kind: 'video', mime: 'video/webm' },
webp: { kind: 'image', mime: 'image/webp' }
}
function mediaInfo(path: string): MediaInfo | undefined {
const ext = path.split(/[?#]/, 1)[0]?.split('.').pop()?.toLowerCase()
return ext ? MEDIA_BY_EXT[ext] : undefined
}
export function mediaKind(path: string): MediaKind {
return mediaInfo(path)?.kind ?? 'file'
}
export function mediaMime(path: string): string {
return mediaInfo(path)?.mime ?? 'application/octet-stream'
}
export function mediaName(path: string): string {
try {
const url = new URL(path)
return url.pathname.split('/').filter(Boolean).pop() || path
} catch {
return path.split(/[\\/]/).filter(Boolean).pop() || path
}
}
export function mediaMarkdownHref(path: string): string {
return `#media:${encodeURIComponent(path)}`
}
export function mediaExternalUrl(path: string): string {
return /^(?:https?|file):/i.test(path) ? path : `file://${path}`
}
// Custom Electron scheme (registered in electron/main.cjs) that streams a local
// file with Range support. Used for audio/video so playback bypasses the data
// URL size cap and supports seeking. `path` may be a plain path or `file://…`.
export function mediaStreamUrl(path: string): string {
return `hermes-media://stream/${encodeURIComponent(filePathFromMediaPath(path))}`
}
export function mediaPathFromMarkdownHref(href?: string): string | null {
if (!href?.startsWith('#media:')) {
return null
}
try {
return decodeURIComponent(href.slice('#media:'.length))
} catch {
return null
}
}
export function filePathFromMediaPath(path: string): string {
if (!path.startsWith('file:')) {
return path
}
try {
return decodeURIComponent(new URL(path).pathname)
} catch {
return path.replace(/^file:\/\//, '')
}
}
// True when this desktop shell is wired to a remote gateway. Local media paths
// then live on the gateway machine, not this disk, so we fetch them over the API.
export function isRemoteGateway(): boolean {
return $connection.get()?.mode === 'remote'
}
// Fetch a gateway-local image as a data URL via the authenticated REST bridge.
// Used in remote mode where readFileDataUrl (which reads THIS machine's disk)
// can't see files the agent wrote on the gateway. Requires the gateway to
// expose GET /api/media (hermes_cli/web_server.py).
export async function gatewayMediaDataUrl(path: string): Promise<string> {
const file = filePathFromMediaPath(path)
const result = await window.hermesDesktop!.api<{ data_url: string }>({
path: `/api/media?path=${encodeURIComponent(file)}`
})
return result.data_url
}
export function mediaDisplayLabel(path: string): string {
const escaped = mediaName(path).replace(/[[\]\\]/g, '\\$&')
const kind = mediaKind(path)
return `${kind[0].toUpperCase()}${kind.slice(1)}: ${escaped}`
}