mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
feat(desktop+gateway): remote media relay — attach images/PDFs and display gateway images over the network
Desktop connected to a remote gateway can now attach images and PDFs and
display agent-written images. Previously the desktop passed a LOCAL file path
to image.attach; on a remote gateway that path doesn't exist, so the image was
silently dropped ("skipped unreadable path") and the vision model never saw it.
The reverse direction was also broken — images the agent wrote on the gateway
rendered as dead links in the remote client.
Gateway (tui_gateway/server.py):
- image.attach_bytes: base64 byte upload written into the gateway's own images
dir and queued via the existing native-image-attach pipeline. Magic-byte
extension sniffing, data-URL prefix + whitespace tolerance, 25 MB cap,
structured error codes. Accepts content_base64/filename (canonical) and
data/ext (older-desktop aliases).
- pdf.attach: renders each page to PNG via pdftoppm (poppler-utils) at 150 DPI
and queues the pages as images; 50 MB / 25-page caps. Accepts host path or
base64 upload.
- Shared helpers (_decode_attach_base64, _sniff_image_ext, _queue_attached_image)
so the two methods and the existing image.attach don't duplicate logic.
Gateway (hermes_cli/web_server.py):
- GET /api/media: returns a gateway-local image as a base64 data URL so remote
clients can display it. Auth-gated like every /api route, extension
allowlist + size cap, AND confined to the gateway's own media roots
(images/screenshots/cache, resolved symlink-safe) so an authed caller can't
read image-extension files anywhere on disk.
Desktop (apps/desktop):
- syncImageAttachmentsForSubmit uploads bytes via image.attach_bytes when the
connection mode is 'remote'; the local fast path is unchanged.
- media.ts gains isRemoteGateway() + gatewayMediaDataUrl(); directive-text and
markdown-text fetch images over /api/media in remote mode.
Consolidates the competing remote-media PRs (#38876, #40317, #21908, #39437)
into one coherent implementation, taking the strongest parts of each and adding
shared-helper cleanup plus the /api/media root-confinement hardening on top.
The per-profile gateway switching from #38876 is intentionally left out as a
separable feature. TUI file uploads (#40492) remain a separate surface.
Tested: 11 new tui_gateway tests + 5 /api/media endpoint tests + desktop
media.remote unit tests; full tui_gateway + web_server suites green (472
passed); tsc -b clean; E2E verified the full attach→disk→queue and
gateway-path→data-URL display round-trip plus the out-of-root security block.
Co-authored-by: Max Mitcham <maxmitcham@mac.home>
Co-authored-by: Justlrnal4 <Justlrnal4@users.noreply.github.com>
Co-authored-by: Chris Cook <ccook@nvms.com>
Co-authored-by: Thomas Paquette <thomas.paquette@gmail.com>
This commit is contained in:
parent
20fd0bde5d
commit
16786f3bb3
11 changed files with 759 additions and 11 deletions
|
|
@ -34,6 +34,7 @@ import { requestDesktopOnboarding } from '@/store/onboarding'
|
|||
import { $activeGatewayProfile, $newChatProfile, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile'
|
||||
import {
|
||||
$busy,
|
||||
$connection,
|
||||
$messages,
|
||||
$yoloActive,
|
||||
setAwaitingResponse,
|
||||
|
|
@ -80,6 +81,28 @@ function inlineErrorMessage(error: unknown, fallback: string): string {
|
|||
return (raw.match(/Error invoking remote method '[^']+': Error: (.+)$/)?.[1] ?? raw).replace(/^Error:\s*/, '').trim()
|
||||
}
|
||||
|
||||
function base64FromDataUrl(dataUrl: string): string {
|
||||
const comma = dataUrl.indexOf(',')
|
||||
|
||||
return comma >= 0 ? dataUrl.slice(comma + 1) : ''
|
||||
}
|
||||
|
||||
function imageFilenameFromPath(filePath: string): string {
|
||||
return filePath.split(/[\\/]/).filter(Boolean).pop() || 'image.png'
|
||||
}
|
||||
|
||||
// Remote gateway: the local composer-image file lives on THIS machine's disk,
|
||||
// not the gateway's, so read the bytes here and upload them via
|
||||
// image.attach_bytes. Returns null when the file can't be read.
|
||||
async function readImageForRemoteAttach(
|
||||
filePath: string
|
||||
): Promise<{ contentBase64: string; filename: string } | null> {
|
||||
const dataUrl = await window.hermesDesktop?.readFileDataUrl(filePath)
|
||||
const contentBase64 = dataUrl ? base64FromDataUrl(dataUrl) : ''
|
||||
|
||||
return contentBase64 ? { contentBase64, filename: imageFilenameFromPath(filePath) } : null
|
||||
}
|
||||
|
||||
interface PromptActionsOptions {
|
||||
activeSessionId: string | null
|
||||
activeSessionIdRef: MutableRefObject<string | null>
|
||||
|
|
@ -197,16 +220,36 @@ export function usePromptActions({
|
|||
) => {
|
||||
const updateComposerAttachments = options.updateComposerAttachments ?? true
|
||||
const images = attachments.filter(attachment => attachment.kind === 'image' && attachment.path)
|
||||
const remote = $connection.get()?.mode === 'remote'
|
||||
|
||||
for (const attachment of images) {
|
||||
if (attachment.attachedSessionId === sessionId) {
|
||||
continue
|
||||
}
|
||||
|
||||
const result = await requestGateway<ImageAttachResponse>('image.attach', {
|
||||
session_id: sessionId,
|
||||
path: attachment.path
|
||||
})
|
||||
let result: ImageAttachResponse
|
||||
|
||||
if (remote) {
|
||||
// The gateway is on another machine — it can't read attachment.path
|
||||
// (a path on THIS disk). Upload the bytes via image.attach_bytes.
|
||||
const payload = attachment.path ? await readImageForRemoteAttach(attachment.path) : null
|
||||
|
||||
if (!payload) {
|
||||
const label = attachment.label || (attachment.path ? pathLabel(attachment.path) : 'image')
|
||||
throw new Error(`Could not read ${label}`)
|
||||
}
|
||||
|
||||
result = await requestGateway<ImageAttachResponse>('image.attach_bytes', {
|
||||
session_id: sessionId,
|
||||
content_base64: payload.contentBase64,
|
||||
filename: payload.filename
|
||||
})
|
||||
} else {
|
||||
result = await requestGateway<ImageAttachResponse>('image.attach', {
|
||||
session_id: sessionId,
|
||||
path: attachment.path
|
||||
})
|
||||
}
|
||||
|
||||
if (!result.attached) {
|
||||
const label = attachment.label || (attachment.path ? pathLabel(attachment.path) : 'image')
|
||||
|
|
|
|||
|
|
@ -13,6 +13,13 @@ export interface ImageAttachResponse {
|
|||
path?: string
|
||||
text?: string
|
||||
message?: string
|
||||
// Returned by the byte-upload variant (image.attach_bytes) used in remote mode.
|
||||
count?: number
|
||||
bytes?: number
|
||||
name?: string
|
||||
width?: number
|
||||
height?: number
|
||||
token_estimate?: number
|
||||
}
|
||||
|
||||
export interface ImageDetachResponse {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import { Fragment, useEffect, useMemo, useState } from 'react'
|
|||
|
||||
import { ZoomableImage } from '@/components/chat/zoomable-image'
|
||||
import { extractEmbeddedImages } from '@/lib/embedded-images'
|
||||
import { gatewayMediaDataUrl, isRemoteGateway } from '@/lib/media'
|
||||
|
||||
const HERMES_REF_TYPES = ['file', 'folder', 'url', 'image', 'tool', 'line', 'terminal', 'session'] as const
|
||||
type HermesRefType = (typeof HERMES_REF_TYPES)[number]
|
||||
|
|
@ -327,25 +328,32 @@ export const DirectiveText: TextMessagePartComponent = ({ text }: TextMessagePar
|
|||
* messages render after the backend embeds the data URL, so the UX is stable
|
||||
* across initial send and refresh. */
|
||||
const DirectiveImage: FC<{ id: string; label: string }> = ({ id, label }) => {
|
||||
const remote = /^(?:https?|data):/i.test(id)
|
||||
const [src, setSrc] = useState<string | null>(remote ? id : null)
|
||||
const isUrl = /^(?:https?|data):/i.test(id)
|
||||
const [src, setSrc] = useState<string | null>(isUrl ? id : null)
|
||||
const [failed, setFailed] = useState(false)
|
||||
|
||||
useEffect(() => {
|
||||
if (remote || !id) {
|
||||
if (isUrl || !id) {
|
||||
return
|
||||
}
|
||||
|
||||
let alive = true
|
||||
void window.hermesDesktop
|
||||
?.readFileDataUrl(id)
|
||||
.then(url => alive && setSrc(url))
|
||||
|
||||
// Remote gateway: the image lives on the gateway's disk, not ours — fetch
|
||||
// it over the authenticated API. Local: read it straight off this disk.
|
||||
const load =
|
||||
window.hermesDesktop && isRemoteGateway()
|
||||
? gatewayMediaDataUrl(id)
|
||||
: window.hermesDesktop?.readFileDataUrl(id)
|
||||
|
||||
void Promise.resolve(load)
|
||||
.then(url => alive && url && setSrc(url))
|
||||
.catch(() => alive && setFailed(true))
|
||||
|
||||
return () => {
|
||||
alive = false
|
||||
}
|
||||
}, [id, remote])
|
||||
}, [id, isUrl])
|
||||
|
||||
if (failed) {
|
||||
return <DirectiveChip id={id} label={label} type="image" />
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ import { createMemoizedMathPlugin } from '@/lib/katex-memo'
|
|||
import { preprocessMarkdown } from '@/lib/markdown-preprocess'
|
||||
import {
|
||||
filePathFromMediaPath,
|
||||
gatewayMediaDataUrl,
|
||||
isRemoteGateway,
|
||||
mediaExternalUrl,
|
||||
mediaKind,
|
||||
mediaName,
|
||||
|
|
@ -51,6 +53,12 @@ async function mediaSrc(path: string): Promise<string> {
|
|||
return mediaStreamUrl(path)
|
||||
}
|
||||
|
||||
// Remote gateway: the image lives on the gateway machine, so read it over the
|
||||
// authenticated API rather than this machine's disk.
|
||||
if (window.hermesDesktop && isRemoteGateway()) {
|
||||
return gatewayMediaDataUrl(path)
|
||||
}
|
||||
|
||||
if (!window.hermesDesktop?.readFileDataUrl) {
|
||||
return mediaExternalUrl(path)
|
||||
}
|
||||
|
|
|
|||
58
apps/desktop/src/lib/media.remote.test.ts
Normal file
58
apps/desktop/src/lib/media.remote.test.ts
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { $connection } from '@/store/session'
|
||||
|
||||
import { filePathFromMediaPath, gatewayMediaDataUrl, isRemoteGateway } from './media'
|
||||
|
||||
describe('isRemoteGateway', () => {
|
||||
afterEach(() => {
|
||||
$connection.set(null)
|
||||
})
|
||||
|
||||
it('is false with no connection', () => {
|
||||
$connection.set(null)
|
||||
expect(isRemoteGateway()).toBe(false)
|
||||
})
|
||||
|
||||
it('is false in local mode', () => {
|
||||
$connection.set({ mode: 'local' } as never)
|
||||
expect(isRemoteGateway()).toBe(false)
|
||||
})
|
||||
|
||||
it('is true in remote mode', () => {
|
||||
$connection.set({ mode: 'remote' } as never)
|
||||
expect(isRemoteGateway()).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('filePathFromMediaPath', () => {
|
||||
it('passes through a plain path', () => {
|
||||
expect(filePathFromMediaPath('/home/u/.hermes/images/a.png')).toBe('/home/u/.hermes/images/a.png')
|
||||
})
|
||||
|
||||
it('decodes a file:// URL with encoded characters', () => {
|
||||
expect(filePathFromMediaPath('file:///tmp/a%20b.png')).toBe('/tmp/a b.png')
|
||||
})
|
||||
})
|
||||
|
||||
describe('gatewayMediaDataUrl', () => {
|
||||
const api = vi.fn(async () => ({ data_url: 'data:image/png;base64,ZHVtbXk=' }))
|
||||
|
||||
beforeEach(() => {
|
||||
api.mockClear()
|
||||
vi.stubGlobal('window', { hermesDesktop: { api } })
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals()
|
||||
})
|
||||
|
||||
it('requests the encoded gateway path and returns the data URL', async () => {
|
||||
const url = await gatewayMediaDataUrl('/home/u/.hermes/images/a b.png')
|
||||
|
||||
expect(url).toBe('data:image/png;base64,ZHVtbXk=')
|
||||
expect(api).toHaveBeenCalledWith({
|
||||
path: '/api/media?path=%2Fhome%2Fu%2F.hermes%2Fimages%2Fa%20b.png'
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
import { $connection } from '@/store/session'
|
||||
|
||||
export type MediaKind = 'audio' | 'image' | 'video' | 'file'
|
||||
|
||||
interface MediaInfo {
|
||||
|
|
@ -89,6 +91,26 @@ export function filePathFromMediaPath(path: string): string {
|
|||
}
|
||||
}
|
||||
|
||||
// True when this desktop shell is wired to a remote gateway. Local media paths
|
||||
// then live on the gateway machine, not this disk, so we fetch them over the API.
|
||||
export function isRemoteGateway(): boolean {
|
||||
return $connection.get()?.mode === 'remote'
|
||||
}
|
||||
|
||||
// Fetch a gateway-local image as a data URL via the authenticated REST bridge.
|
||||
// Used in remote mode where readFileDataUrl (which reads THIS machine's disk)
|
||||
// can't see files the agent wrote on the gateway. Requires the gateway to
|
||||
// expose GET /api/media (hermes_cli/web_server.py).
|
||||
export async function gatewayMediaDataUrl(path: string): Promise<string> {
|
||||
const file = filePathFromMediaPath(path)
|
||||
|
||||
const result = await window.hermesDesktop!.api<{ data_url: string }>({
|
||||
path: `/api/media?path=${encodeURIComponent(file)}`
|
||||
})
|
||||
|
||||
return result.data_url
|
||||
}
|
||||
|
||||
export function mediaDisplayLabel(path: string): string {
|
||||
const escaped = mediaName(path).replace(/[[\]\\]/g, '\\$&')
|
||||
const kind = mediaKind(path)
|
||||
|
|
|
|||
|
|
@ -796,6 +796,74 @@ def _probe_gateway_health() -> tuple[bool, dict | None]:
|
|||
return False, None
|
||||
|
||||
|
||||
# Image MIME types this endpoint will serve. Extension-allowlisted so an
|
||||
# authenticated caller can't pull non-image files through it.
|
||||
_MEDIA_CONTENT_TYPES = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".svg": "image/svg+xml",
|
||||
".bmp": "image/bmp",
|
||||
".ico": "image/x-icon",
|
||||
}
|
||||
_MEDIA_MAX_BYTES = 25 * 1024 * 1024
|
||||
|
||||
|
||||
def _media_serve_roots() -> list[Path]:
|
||||
"""Directories ``GET /api/media`` is allowed to read from.
|
||||
|
||||
Confined to where the agent and attach pipeline actually write media on the
|
||||
gateway host — its images dir and cache subtree. This stops an authenticated
|
||||
client from reading image-extension files anywhere on disk (e.g. a renamed
|
||||
key or a screenshot outside the cache) merely because the suffix passes the
|
||||
allowlist.
|
||||
"""
|
||||
home = get_hermes_home()
|
||||
roots = [home / "images", home / "screenshots", home / "cache"]
|
||||
out: list[Path] = []
|
||||
for root in roots:
|
||||
try:
|
||||
out.append(root.resolve())
|
||||
except (OSError, RuntimeError):
|
||||
continue
|
||||
return out
|
||||
|
||||
|
||||
@app.get("/api/media")
|
||||
async def get_media(path: str):
|
||||
"""Return a gateway-local image file as a base64 data URL.
|
||||
|
||||
Lets remote clients (the desktop app over the network, or the web dashboard
|
||||
in a browser) display images the agent wrote to *this* machine's filesystem
|
||||
— they can't read the gateway's local disk directly.
|
||||
|
||||
Auth-gated by the session token like every other /api route. Restricted to
|
||||
an image-extension allowlist, a size cap, AND the gateway's own media roots
|
||||
(resolved, symlink-safe) so it can't be used to read arbitrary files.
|
||||
"""
|
||||
try:
|
||||
target = Path(path).expanduser().resolve()
|
||||
except (OSError, RuntimeError):
|
||||
raise HTTPException(status_code=400, detail="Invalid path")
|
||||
|
||||
if target.suffix.lower() not in _MEDIA_CONTENT_TYPES:
|
||||
raise HTTPException(status_code=415, detail="Unsupported media type")
|
||||
|
||||
roots = _media_serve_roots()
|
||||
if not any(target == root or root in target.parents for root in roots):
|
||||
raise HTTPException(status_code=403, detail="Path outside media roots")
|
||||
|
||||
if not target.is_file():
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
if target.stat().st_size > _MEDIA_MAX_BYTES:
|
||||
raise HTTPException(status_code=413, detail="File too large")
|
||||
|
||||
encoded = base64.b64encode(target.read_bytes()).decode("ascii")
|
||||
return {"data_url": f"data:{_MEDIA_CONTENT_TYPES[target.suffix.lower()]};base64,{encoded}"}
|
||||
|
||||
|
||||
@app.get("/api/status")
|
||||
async def get_status():
|
||||
current_ver, latest_ver = check_config_version()
|
||||
|
|
|
|||
|
|
@ -47,6 +47,9 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
|||
AUTHOR_MAP = {
|
||||
"yusufalweshdemir@gmail.com": "Dusk1e",
|
||||
"804436395@qq.com": "LaPhilosophie",
|
||||
"maxmitcham@mac.home": "maxtrigify",
|
||||
"ccook@nvms.com": "ccook1963",
|
||||
"thomas.paquette@gmail.com": "RyTsYdUp",
|
||||
"266365592+bmoore210@users.noreply.github.com": "bmoore210",
|
||||
"manishbyatroy@gmail.com": "manishbyatroy",
|
||||
"chilltulpa@gmail.com": "TheGardenGallery",
|
||||
|
|
|
|||
|
|
@ -243,6 +243,57 @@ class TestWebServerEndpoints:
|
|||
assert "hermes_home" in data
|
||||
assert "active_sessions" in data
|
||||
|
||||
# ── GET /api/media (remote image display) ───────────────────────────
|
||||
|
||||
def test_get_media_serves_image_in_root(self):
|
||||
"""An image under the gateway's images dir is returned as a data URL."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
img_dir = get_hermes_home() / "images"
|
||||
img_dir.mkdir(parents=True, exist_ok=True)
|
||||
img = img_dir / "shot.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16)
|
||||
|
||||
resp = self.client.get("/api/media", params={"path": str(img)})
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["data_url"].startswith("data:image/png;base64,")
|
||||
|
||||
def test_get_media_rejects_path_outside_roots(self, tmp_path):
|
||||
"""An image-extension file outside the media roots is forbidden."""
|
||||
outside = tmp_path / "secret.png"
|
||||
outside.write_bytes(b"\x89PNG\r\n\x1a\n")
|
||||
|
||||
resp = self.client.get("/api/media", params={"path": str(outside)})
|
||||
assert resp.status_code == 403
|
||||
|
||||
def test_get_media_rejects_non_image_extension(self):
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
img_dir = get_hermes_home() / "images"
|
||||
img_dir.mkdir(parents=True, exist_ok=True)
|
||||
env = img_dir / "leak.env"
|
||||
env.write_text("SECRET=1")
|
||||
|
||||
resp = self.client.get("/api/media", params={"path": str(env)})
|
||||
assert resp.status_code == 415
|
||||
|
||||
def test_get_media_404_for_missing_file(self):
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
missing = get_hermes_home() / "images" / "nope.png"
|
||||
resp = self.client.get("/api/media", params={"path": str(missing)})
|
||||
assert resp.status_code == 404
|
||||
|
||||
def test_get_media_requires_auth(self):
|
||||
from hermes_cli.web_server import _SESSION_HEADER_NAME
|
||||
|
||||
resp = self.client.get(
|
||||
"/api/media",
|
||||
params={"path": "/tmp/x.png"},
|
||||
headers={_SESSION_HEADER_NAME: "wrong-token"},
|
||||
)
|
||||
assert resp.status_code == 401
|
||||
|
||||
# ── Dashboard font override ─────────────────────────────────────────
|
||||
|
||||
def test_get_dashboard_font_defaults_to_theme(self):
|
||||
|
|
|
|||
|
|
@ -5774,3 +5774,215 @@ def test_notification_event_dedup_key_keeps_completions_one_shot():
|
|||
assert server._notification_event_dedup_key(first) == server._notification_event_dedup_key(
|
||||
replay
|
||||
)
|
||||
|
||||
|
||||
# --- image.attach_bytes / pdf.attach (remote-client byte upload) -------------
|
||||
|
||||
# Smallest valid 1x1 PNG, base64-encoded.
|
||||
_PNG_1X1_B64 = (
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk"
|
||||
"+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
|
||||
)
|
||||
|
||||
|
||||
def _attach_bytes_cli(monkeypatch):
|
||||
fake_cli = types.ModuleType("cli")
|
||||
fake_cli._IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}
|
||||
monkeypatch.setitem(sys.modules, "cli", fake_cli)
|
||||
|
||||
|
||||
def test_image_attach_bytes_writes_to_gateway_dir(monkeypatch, tmp_path):
|
||||
"""Remote client uploads base64 bytes; gateway writes them to its own disk."""
|
||||
_attach_bytes_cli(monkeypatch)
|
||||
monkeypatch.setattr(server, "_hermes_home", tmp_path)
|
||||
server._sessions["abx"] = _session()
|
||||
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "image.attach_bytes",
|
||||
"params": {
|
||||
"session_id": "abx",
|
||||
"content_base64": _PNG_1X1_B64,
|
||||
"filename": "shot.png",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
res = resp["result"]
|
||||
assert res["attached"] is True
|
||||
written = Path(res["path"])
|
||||
assert written.is_file()
|
||||
assert written.parent == tmp_path / "images"
|
||||
assert written.read_bytes().startswith(b"\x89PNG")
|
||||
assert len(server._sessions["abx"]["attached_images"]) == 1
|
||||
assert res["bytes"] > 0
|
||||
|
||||
|
||||
def test_image_attach_bytes_accepts_data_url_prefix(monkeypatch, tmp_path):
|
||||
_attach_bytes_cli(monkeypatch)
|
||||
monkeypatch.setattr(server, "_hermes_home", tmp_path)
|
||||
server._sessions["abx2"] = _session()
|
||||
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "image.attach_bytes",
|
||||
"params": {
|
||||
"session_id": "abx2",
|
||||
"content_base64": f"data:image/png;base64,{_PNG_1X1_B64}",
|
||||
},
|
||||
}
|
||||
)
|
||||
assert resp["result"]["attached"] is True
|
||||
|
||||
|
||||
def test_image_attach_bytes_data_alias_and_magic_sniff(monkeypatch, tmp_path):
|
||||
"""Older desktop builds send `data` (not content_base64); ext sniffed from bytes."""
|
||||
_attach_bytes_cli(monkeypatch)
|
||||
monkeypatch.setattr(server, "_hermes_home", tmp_path)
|
||||
server._sessions["abx3"] = _session()
|
||||
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "image.attach_bytes",
|
||||
"params": {"session_id": "abx3", "data": _PNG_1X1_B64},
|
||||
}
|
||||
)
|
||||
res = resp["result"]
|
||||
assert res["attached"] is True
|
||||
assert Path(res["path"]).suffix == ".png" # sniffed from magic bytes
|
||||
|
||||
|
||||
def test_image_attach_bytes_rejects_invalid_base64(monkeypatch, tmp_path):
|
||||
_attach_bytes_cli(monkeypatch)
|
||||
monkeypatch.setattr(server, "_hermes_home", tmp_path)
|
||||
server._sessions["abx4"] = _session()
|
||||
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "image.attach_bytes",
|
||||
"params": {"session_id": "abx4", "content_base64": "!!!not base64!!!"},
|
||||
}
|
||||
)
|
||||
assert "error" in resp
|
||||
assert resp["error"]["code"] == 4017
|
||||
|
||||
|
||||
def test_image_attach_bytes_rejects_oversize(monkeypatch, tmp_path):
|
||||
import base64 as _b64
|
||||
|
||||
_attach_bytes_cli(monkeypatch)
|
||||
monkeypatch.setattr(server, "_hermes_home", tmp_path)
|
||||
monkeypatch.setattr(server, "_ATTACH_BYTES_MAX_BYTES", 10)
|
||||
server._sessions["abx5"] = _session()
|
||||
|
||||
big = _b64.b64encode(b"\x89PNG\r\n\x1a\n" + b"0" * 100).decode("ascii")
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "image.attach_bytes",
|
||||
"params": {"session_id": "abx5", "content_base64": big},
|
||||
}
|
||||
)
|
||||
assert "error" in resp
|
||||
assert resp["error"]["code"] == 4018
|
||||
|
||||
|
||||
def test_image_attach_bytes_rejects_unsupported_extension(monkeypatch, tmp_path):
|
||||
_attach_bytes_cli(monkeypatch)
|
||||
monkeypatch.setattr(server, "_hermes_home", tmp_path)
|
||||
server._sessions["abx6"] = _session()
|
||||
|
||||
# filename hint forces a non-image extension; magic sniff is bypassed by hint
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "image.attach_bytes",
|
||||
"params": {
|
||||
"session_id": "abx6",
|
||||
"content_base64": _PNG_1X1_B64,
|
||||
"filename": "evil.exe",
|
||||
},
|
||||
}
|
||||
)
|
||||
assert "error" in resp
|
||||
assert resp["error"]["code"] == 4016
|
||||
|
||||
|
||||
def test_pdf_attach_requires_poppler(monkeypatch, tmp_path):
|
||||
"""Without pdftoppm on PATH, pdf.attach returns a clear 5028."""
|
||||
_attach_bytes_cli(monkeypatch)
|
||||
monkeypatch.setattr(server, "_hermes_home", tmp_path)
|
||||
monkeypatch.setattr("shutil.which", lambda _name: None)
|
||||
server._sessions["pdf1"] = _session()
|
||||
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "pdf.attach",
|
||||
"params": {"session_id": "pdf1", "content_base64": "JVBERi0xLjQK"},
|
||||
}
|
||||
)
|
||||
assert "error" in resp
|
||||
assert resp["error"]["code"] == 5028
|
||||
|
||||
|
||||
def test_pdf_attach_rejects_non_pdf_bytes(monkeypatch, tmp_path):
|
||||
import base64 as _b64
|
||||
|
||||
_attach_bytes_cli(monkeypatch)
|
||||
monkeypatch.setattr(server, "_hermes_home", tmp_path)
|
||||
monkeypatch.setattr("shutil.which", lambda _name: "/usr/bin/pdftoppm")
|
||||
server._sessions["pdf2"] = _session()
|
||||
|
||||
not_pdf = _b64.b64encode(b"this is not a pdf").decode("ascii")
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "pdf.attach",
|
||||
"params": {"session_id": "pdf2", "content_base64": not_pdf},
|
||||
}
|
||||
)
|
||||
assert "error" in resp
|
||||
assert resp["error"]["code"] == 4017
|
||||
|
||||
|
||||
def test_pdf_attach_requires_path_or_bytes(monkeypatch, tmp_path):
|
||||
_attach_bytes_cli(monkeypatch)
|
||||
monkeypatch.setattr(server, "_hermes_home", tmp_path)
|
||||
monkeypatch.setattr("shutil.which", lambda _name: "/usr/bin/pdftoppm")
|
||||
server._sessions["pdf3"] = _session()
|
||||
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "pdf.attach", "params": {"session_id": "pdf3"}}
|
||||
)
|
||||
assert "error" in resp
|
||||
assert resp["error"]["code"] == 4015
|
||||
|
||||
|
||||
def test_decode_attach_base64_helper():
|
||||
import base64 as _b64
|
||||
|
||||
raw = _b64.b64encode(b"hello").decode("ascii")
|
||||
assert server._decode_attach_base64(raw, mime_prefix="image/") == b"hello"
|
||||
assert (
|
||||
server._decode_attach_base64(f"data:image/png;base64,{raw}", mime_prefix="image/")
|
||||
== b"hello"
|
||||
)
|
||||
# whitespace inside payload is tolerated
|
||||
assert server._decode_attach_base64(raw[:4] + "\n" + raw[4:], mime_prefix="image/") == b"hello"
|
||||
assert server._decode_attach_base64("@@@", mime_prefix="image/") is None
|
||||
|
||||
|
||||
def test_sniff_image_ext_magic_and_filename():
|
||||
assert server._sniff_image_ext(b"\x89PNG\r\n\x1a\n") == ".png"
|
||||
assert server._sniff_image_ext(b"\xff\xd8\xff\xe0") == ".jpg"
|
||||
assert server._sniff_image_ext(b"GIF89a....") == ".gif"
|
||||
assert server._sniff_image_ext(b"RIFF1234WEBPxxxx") == ".webp"
|
||||
assert server._sniff_image_ext(b"BM......") == ".bmp"
|
||||
assert server._sniff_image_ext(b"unknown") == ".png" # fallback
|
||||
# filename hint wins over magic bytes
|
||||
assert server._sniff_image_ext(b"\x89PNG", "photo.jpeg") == ".jpeg"
|
||||
|
|
|
|||
|
|
@ -5097,6 +5097,274 @@ def _(rid, params: dict) -> dict:
|
|||
return _err(rid, 5027, str(e))
|
||||
|
||||
|
||||
# Byte-upload attach caps. 25 MB matches Anthropic's per-image limit; 50 MB / 25
|
||||
# pages bounds a single PDF drop so it can't blow the context budget.
|
||||
_ATTACH_BYTES_MAX_BYTES = 25 * 1024 * 1024
|
||||
_PDF_ATTACH_MAX_BYTES = 50 * 1024 * 1024
|
||||
_PDF_ATTACH_MAX_PAGES = 25
|
||||
|
||||
# Leading magic bytes → file extension, for filename-less uploads.
|
||||
_IMAGE_MAGIC: tuple[tuple[bytes, str], ...] = (
|
||||
(b"\x89PNG\r\n\x1a\n", ".png"),
|
||||
(b"\xff\xd8\xff", ".jpg"),
|
||||
(b"GIF87a", ".gif"),
|
||||
(b"GIF89a", ".gif"),
|
||||
(b"BM", ".bmp"),
|
||||
)
|
||||
|
||||
|
||||
def _decode_attach_base64(raw: str, *, mime_prefix: str) -> bytes | None:
|
||||
"""Decode a base64 (optionally data-URL-wrapped) payload.
|
||||
|
||||
Accepts ``data:<mime_prefix>...;base64,<b64>`` plus embedded whitespace.
|
||||
Returns the decoded bytes, or ``None`` when the input isn't valid base64.
|
||||
"""
|
||||
import base64 as _base64
|
||||
import re as _re
|
||||
|
||||
cleaned = raw.strip()
|
||||
m = _re.match(
|
||||
rf"^data:{_re.escape(mime_prefix)}[a-zA-Z0-9.+-]*;base64,(.*)$",
|
||||
cleaned,
|
||||
_re.DOTALL,
|
||||
)
|
||||
if m:
|
||||
cleaned = m.group(1)
|
||||
cleaned = _re.sub(r"\s+", "", cleaned)
|
||||
try:
|
||||
return _base64.b64decode(cleaned, validate=True)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _sniff_image_ext(img_bytes: bytes, filename: str = "") -> str:
|
||||
"""Resolve an image extension from a filename hint, else magic bytes.
|
||||
|
||||
Falls back to ``.png``. WebP needs the RIFF/WEBP container check, handled
|
||||
before the generic table.
|
||||
"""
|
||||
if filename:
|
||||
suffix = Path(filename).suffix.lower()
|
||||
if suffix:
|
||||
return suffix
|
||||
head = img_bytes[:16]
|
||||
if head.startswith(b"RIFF") and head[8:12] == b"WEBP":
|
||||
return ".webp"
|
||||
for sig, ext in _IMAGE_MAGIC:
|
||||
if head.startswith(sig):
|
||||
return ext
|
||||
return ".png"
|
||||
|
||||
|
||||
def _allowed_image_extensions() -> frozenset[str]:
|
||||
try:
|
||||
from cli import _IMAGE_EXTENSIONS
|
||||
|
||||
return frozenset(_IMAGE_EXTENSIONS)
|
||||
except Exception:
|
||||
return frozenset({".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"})
|
||||
|
||||
|
||||
def _queue_attached_image(session: dict, img_bytes: bytes, ext: str, *, prefix: str) -> Path:
|
||||
"""Write image bytes into the gateway's images dir and queue them.
|
||||
|
||||
Mirrors what ``image.attach`` does for a local path: appends to
|
||||
``session["attached_images"]`` so the next ``prompt.submit`` picks it up via
|
||||
the existing native-image-attach pipeline. Returns the written path.
|
||||
"""
|
||||
session["image_counter"] = session.get("image_counter", 0) + 1
|
||||
img_dir = _hermes_home / "images"
|
||||
img_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
img_path = img_dir / f"{prefix}_{ts}_{session['image_counter']}{ext}"
|
||||
try:
|
||||
img_path.write_bytes(img_bytes)
|
||||
except Exception:
|
||||
session["image_counter"] = max(0, session["image_counter"] - 1)
|
||||
raise
|
||||
session.setdefault("attached_images", []).append(str(img_path))
|
||||
return img_path
|
||||
|
||||
|
||||
@method("image.attach_bytes")
|
||||
def _(rid, params: dict) -> dict:
|
||||
"""Attach an image to the session from base64 bytes (remote-client path).
|
||||
|
||||
A desktop app or web dashboard running on a DIFFERENT machine than the
|
||||
gateway can't hand us a local path — that file only exists on the client's
|
||||
disk. So it uploads the raw image bytes (base64) and we write them into the
|
||||
gateway's own images dir. The response shape mirrors ``image.attach`` so the
|
||||
client treats both identically.
|
||||
|
||||
Params:
|
||||
content_base64 / data (str, required): base64 image bytes. Accepts a
|
||||
``data:image/...;base64,`` prefix and embedded whitespace. ``data`` is
|
||||
an accepted alias for older desktop builds.
|
||||
filename / ext (str, optional): extension hint. Without it, magic bytes
|
||||
identify PNG/JPEG/GIF/WebP/BMP, falling back to ``.png``.
|
||||
"""
|
||||
session, err = _sess(params, rid)
|
||||
if err:
|
||||
return err
|
||||
|
||||
raw_b64 = str(params.get("content_base64") or params.get("data") or "").strip()
|
||||
if not raw_b64:
|
||||
return _err(rid, 4015, "content_base64 required")
|
||||
|
||||
img_bytes = _decode_attach_base64(raw_b64, mime_prefix="image/")
|
||||
if img_bytes is None:
|
||||
return _err(rid, 4017, "data is not valid base64")
|
||||
if not img_bytes:
|
||||
return _err(rid, 4017, "image is empty")
|
||||
if len(img_bytes) > _ATTACH_BYTES_MAX_BYTES:
|
||||
mb = _ATTACH_BYTES_MAX_BYTES // (1024 * 1024)
|
||||
return _err(rid, 4018, f"image too large ({len(img_bytes)} bytes; cap is {mb} MB)")
|
||||
|
||||
filename = str(params.get("filename", "") or "")
|
||||
ext_hint = str(params.get("ext", "") or "").strip().lower()
|
||||
if ext_hint and not ext_hint.startswith("."):
|
||||
ext_hint = "." + ext_hint
|
||||
ext = _sniff_image_ext(img_bytes, filename or (f"x{ext_hint}" if ext_hint else ""))
|
||||
if ext not in _allowed_image_extensions():
|
||||
return _err(rid, 4016, f"unsupported image extension: {ext}")
|
||||
|
||||
try:
|
||||
img_path = _queue_attached_image(session, img_bytes, ext, prefix="upload")
|
||||
except Exception as e:
|
||||
return _err(rid, 5027, f"write failed: {e}")
|
||||
|
||||
return _ok(
|
||||
rid,
|
||||
{
|
||||
"attached": True,
|
||||
"path": str(img_path),
|
||||
"count": len(session["attached_images"]),
|
||||
"remainder": "",
|
||||
"text": f"[User attached image: {img_path.name}]",
|
||||
"bytes": len(img_bytes),
|
||||
**_image_meta(img_path),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@method("pdf.attach")
|
||||
def _(rid, params: dict) -> dict:
|
||||
"""Attach a PDF by rendering each page to PNG and queuing the pages.
|
||||
|
||||
Anthropic's vision pipeline accepts images, not PDFs, so this runs
|
||||
``pdftoppm`` (poppler-utils) at 150 DPI per page and queues each rendered
|
||||
page as an attached image. Accepts either a host ``path`` (local mode) or
|
||||
base64 ``content_base64`` (remote upload). Caps at 50 MB / 25 pages per call.
|
||||
|
||||
Requires ``pdftoppm`` on $PATH (``apt install poppler-utils``); returns 5028
|
||||
if missing.
|
||||
"""
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
session, err = _sess(params, rid)
|
||||
if err:
|
||||
return err
|
||||
|
||||
if shutil.which("pdftoppm") is None:
|
||||
return _err(rid, 5028, "pdftoppm not installed (poppler-utils package required)")
|
||||
|
||||
raw_path = str(params.get("path", "") or "").strip()
|
||||
raw_b64 = str(params.get("content_base64") or params.get("data") or "").strip()
|
||||
if not raw_path and not raw_b64:
|
||||
return _err(rid, 4015, "path or content_base64 required")
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="pdf_attach_") as td:
|
||||
td_path = Path(td)
|
||||
if raw_b64:
|
||||
pdf_bytes = _decode_attach_base64(raw_b64, mime_prefix="application/pdf")
|
||||
if pdf_bytes is None:
|
||||
return _err(rid, 4017, "data is not valid base64")
|
||||
if not pdf_bytes:
|
||||
return _err(rid, 4017, "decoded PDF is empty")
|
||||
if len(pdf_bytes) > _PDF_ATTACH_MAX_BYTES:
|
||||
mb = _PDF_ATTACH_MAX_BYTES // (1024 * 1024)
|
||||
return _err(rid, 4018, f"PDF too large ({len(pdf_bytes)} bytes; cap is {mb} MB)")
|
||||
if pdf_bytes[:5] != b"%PDF-":
|
||||
return _err(rid, 4017, "payload is not a PDF (missing %PDF- magic bytes)")
|
||||
pdf_path = td_path / "input.pdf"
|
||||
pdf_path.write_bytes(pdf_bytes)
|
||||
display_name = str(params.get("filename", "") or "uploaded.pdf")
|
||||
else:
|
||||
try:
|
||||
from cli import _resolve_attachment_path
|
||||
|
||||
resolved = _resolve_attachment_path(raw_path)
|
||||
except Exception:
|
||||
resolved = None
|
||||
if resolved is None or not Path(resolved).is_file():
|
||||
return _err(rid, 4016, f"PDF not found: {raw_path}")
|
||||
if Path(resolved).suffix.lower() != ".pdf":
|
||||
return _err(rid, 4016, f"not a PDF: {Path(resolved).name}")
|
||||
if Path(resolved).stat().st_size > _PDF_ATTACH_MAX_BYTES:
|
||||
mb = _PDF_ATTACH_MAX_BYTES // (1024 * 1024)
|
||||
return _err(rid, 4018, f"PDF too large; cap is {mb} MB")
|
||||
pdf_path = Path(resolved)
|
||||
display_name = pdf_path.name
|
||||
|
||||
try:
|
||||
first_page = int(params.get("first_page") or 1)
|
||||
last_page_param = params.get("last_page")
|
||||
last_page = int(last_page_param) if last_page_param is not None else None
|
||||
except (TypeError, ValueError):
|
||||
return _err(rid, 4015, "first_page/last_page must be integers")
|
||||
|
||||
if first_page < 1:
|
||||
return _err(rid, 4015, "first_page must be >= 1")
|
||||
if last_page is None:
|
||||
last_page = first_page + _PDF_ATTACH_MAX_PAGES - 1
|
||||
if last_page < first_page:
|
||||
return _err(rid, 4015, "last_page must be >= first_page")
|
||||
if last_page - first_page + 1 > _PDF_ATTACH_MAX_PAGES:
|
||||
return _err(rid, 4019, f"page range exceeds cap of {_PDF_ATTACH_MAX_PAGES} pages per attach call")
|
||||
|
||||
out_prefix = td_path / "page"
|
||||
argv = [
|
||||
"pdftoppm", "-png", "-r", "150",
|
||||
"-f", str(first_page), "-l", str(last_page),
|
||||
str(pdf_path), str(out_prefix),
|
||||
]
|
||||
try:
|
||||
res = subprocess.run(argv, capture_output=True, text=True, timeout=120)
|
||||
except subprocess.TimeoutExpired:
|
||||
return _err(rid, 5028, "pdftoppm timed out (>120s)")
|
||||
if res.returncode != 0:
|
||||
tail = (res.stderr or res.stdout or "").strip().splitlines()[-3:]
|
||||
return _err(rid, 5028, "pdftoppm failed: " + " | ".join(tail))
|
||||
|
||||
rendered = sorted(td_path.glob("page-*.png"))
|
||||
if not rendered:
|
||||
return _err(rid, 5028, "pdftoppm produced no pages (corrupt PDF?)")
|
||||
|
||||
attached_pages = []
|
||||
for src in rendered:
|
||||
page_num = src.stem.split("-", 1)[-1]
|
||||
try:
|
||||
page_int = int(page_num)
|
||||
except ValueError:
|
||||
page_int = first_page + len(attached_pages)
|
||||
dst = _queue_attached_image(session, src.read_bytes(), ".png", prefix=f"pdf_p{page_num}")
|
||||
attached_pages.append({"path": str(dst), "page": page_int, **_image_meta(dst)})
|
||||
|
||||
return _ok(
|
||||
rid,
|
||||
{
|
||||
"attached": True,
|
||||
"filename": display_name,
|
||||
"pages_attached": len(attached_pages),
|
||||
"pages": attached_pages,
|
||||
"count": len(session["attached_images"]),
|
||||
"text": f"[User attached PDF: {display_name} ({len(attached_pages)} page(s))]",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@method("image.detach")
|
||||
def _(rid, params: dict) -> dict:
|
||||
session, err = _sess(params, rid)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue