mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
Desktop connected to a remote gateway can now attach images and PDFs and
display agent-written images. Previously the desktop passed a LOCAL file path
to image.attach; on a remote gateway that path doesn't exist, so the image was
silently dropped ("skipped unreadable path") and the vision model never saw it.
The reverse direction was also broken — images the agent wrote on the gateway
rendered as dead links in the remote client.
Gateway (tui_gateway/server.py):
- image.attach_bytes: base64 byte upload written into the gateway's own images
dir and queued via the existing native-image-attach pipeline. Magic-byte
extension sniffing, data-URL prefix + whitespace tolerance, 25 MB cap,
structured error codes. Accepts content_base64/filename (canonical) and
data/ext (older-desktop aliases).
- pdf.attach: renders each page to PNG via pdftoppm (poppler-utils) at 150 DPI
and queues the pages as images; 50 MB / 25-page caps. Accepts host path or
base64 upload.
- Shared helpers (_decode_attach_base64, _sniff_image_ext, _queue_attached_image)
so the two methods and the existing image.attach don't duplicate logic.
Gateway (hermes_cli/web_server.py):
- GET /api/media: returns a gateway-local image as a base64 data URL so remote
clients can display it. Auth-gated like every /api route, extension
allowlist + size cap, AND confined to the gateway's own media roots
(images/screenshots/cache, resolved symlink-safe) so an authed caller can't
read image-extension files anywhere on disk.
Desktop (apps/desktop):
- syncImageAttachmentsForSubmit uploads bytes via image.attach_bytes when the
connection mode is 'remote'; the local fast path is unchanged.
- media.ts gains isRemoteGateway() + gatewayMediaDataUrl(); directive-text and
markdown-text fetch images over /api/media in remote mode.
Consolidates the competing remote-media PRs (#38876, #40317, #21908, #39437)
into one coherent implementation, taking the strongest parts of each and adding
shared-helper cleanup plus the /api/media root-confinement hardening on top.
The per-profile gateway switching from #38876 is intentionally left out as a
separable feature. TUI file uploads (#40492) remain a separate surface.
Tested: 11 new tui_gateway tests + 5 /api/media endpoint tests + desktop
media.remote unit tests; full tui_gateway + web_server suites green (472
passed); tsc -b clean; E2E verified the full attach→disk→queue and
gateway-path→data-URL display round-trip plus the out-of-root security block.
Co-authored-by: Max Mitcham <maxmitcham@mac.home>
Co-authored-by: Justlrnal4 <Justlrnal4@users.noreply.github.com>
Co-authored-by: Chris Cook <ccook@nvms.com>
Co-authored-by: Thomas Paquette <thomas.paquette@gmail.com>
119 lines
3.7 KiB
TypeScript
119 lines
3.7 KiB
TypeScript
import { $connection } from '@/store/session'
|
|
|
|
export type MediaKind = 'audio' | 'image' | 'video' | 'file'
|
|
|
|
interface MediaInfo {
|
|
kind: MediaKind
|
|
mime: string
|
|
}
|
|
|
|
const MEDIA_BY_EXT: Record<string, MediaInfo> = {
|
|
avi: { kind: 'video', mime: 'video/x-msvideo' },
|
|
bmp: { kind: 'image', mime: 'image/bmp' },
|
|
flac: { kind: 'audio', mime: 'audio/flac' },
|
|
gif: { kind: 'image', mime: 'image/gif' },
|
|
jpeg: { kind: 'image', mime: 'image/jpeg' },
|
|
jpg: { kind: 'image', mime: 'image/jpeg' },
|
|
m4a: { kind: 'audio', mime: 'audio/mp4' },
|
|
mkv: { kind: 'video', mime: 'video/x-matroska' },
|
|
mov: { kind: 'video', mime: 'video/quicktime' },
|
|
mp3: { kind: 'audio', mime: 'audio/mpeg' },
|
|
mp4: { kind: 'video', mime: 'video/mp4' },
|
|
ogg: { kind: 'audio', mime: 'audio/ogg' },
|
|
opus: { kind: 'audio', mime: 'audio/ogg; codecs=opus' },
|
|
png: { kind: 'image', mime: 'image/png' },
|
|
svg: { kind: 'image', mime: 'image/svg+xml' },
|
|
wav: { kind: 'audio', mime: 'audio/wav' },
|
|
webm: { kind: 'video', mime: 'video/webm' },
|
|
webp: { kind: 'image', mime: 'image/webp' }
|
|
}
|
|
|
|
function mediaInfo(path: string): MediaInfo | undefined {
|
|
const ext = path.split(/[?#]/, 1)[0]?.split('.').pop()?.toLowerCase()
|
|
|
|
return ext ? MEDIA_BY_EXT[ext] : undefined
|
|
}
|
|
|
|
export function mediaKind(path: string): MediaKind {
|
|
return mediaInfo(path)?.kind ?? 'file'
|
|
}
|
|
|
|
export function mediaMime(path: string): string {
|
|
return mediaInfo(path)?.mime ?? 'application/octet-stream'
|
|
}
|
|
|
|
export function mediaName(path: string): string {
|
|
try {
|
|
const url = new URL(path)
|
|
|
|
return url.pathname.split('/').filter(Boolean).pop() || path
|
|
} catch {
|
|
return path.split(/[\\/]/).filter(Boolean).pop() || path
|
|
}
|
|
}
|
|
|
|
export function mediaMarkdownHref(path: string): string {
|
|
return `#media:${encodeURIComponent(path)}`
|
|
}
|
|
|
|
export function mediaExternalUrl(path: string): string {
|
|
return /^(?:https?|file):/i.test(path) ? path : `file://${path}`
|
|
}
|
|
|
|
// Custom Electron scheme (registered in electron/main.cjs) that streams a local
|
|
// file with Range support. Used for audio/video so playback bypasses the data
|
|
// URL size cap and supports seeking. `path` may be a plain path or `file://…`.
|
|
export function mediaStreamUrl(path: string): string {
|
|
return `hermes-media://stream/${encodeURIComponent(filePathFromMediaPath(path))}`
|
|
}
|
|
|
|
export function mediaPathFromMarkdownHref(href?: string): string | null {
|
|
if (!href?.startsWith('#media:')) {
|
|
return null
|
|
}
|
|
|
|
try {
|
|
return decodeURIComponent(href.slice('#media:'.length))
|
|
} catch {
|
|
return null
|
|
}
|
|
}
|
|
|
|
export function filePathFromMediaPath(path: string): string {
|
|
if (!path.startsWith('file:')) {
|
|
return path
|
|
}
|
|
|
|
try {
|
|
return decodeURIComponent(new URL(path).pathname)
|
|
} catch {
|
|
return path.replace(/^file:\/\//, '')
|
|
}
|
|
}
|
|
|
|
// True when this desktop shell is wired to a remote gateway. Local media paths
|
|
// then live on the gateway machine, not this disk, so we fetch them over the API.
|
|
export function isRemoteGateway(): boolean {
|
|
return $connection.get()?.mode === 'remote'
|
|
}
|
|
|
|
// Fetch a gateway-local image as a data URL via the authenticated REST bridge.
|
|
// Used in remote mode where readFileDataUrl (which reads THIS machine's disk)
|
|
// can't see files the agent wrote on the gateway. Requires the gateway to
|
|
// expose GET /api/media (hermes_cli/web_server.py).
|
|
export async function gatewayMediaDataUrl(path: string): Promise<string> {
|
|
const file = filePathFromMediaPath(path)
|
|
|
|
const result = await window.hermesDesktop!.api<{ data_url: string }>({
|
|
path: `/api/media?path=${encodeURIComponent(file)}`
|
|
})
|
|
|
|
return result.data_url
|
|
}
|
|
|
|
export function mediaDisplayLabel(path: string): string {
|
|
const escaped = mediaName(path).replace(/[[\]\\]/g, '\\$&')
|
|
const kind = mediaKind(path)
|
|
|
|
return `${kind[0].toUpperCase()}${kind.slice(1)}: ${escaped}`
|
|
}
|