diff --git a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts index f68b43299c6..173d5f28d40 100644 --- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts @@ -34,6 +34,7 @@ import { requestDesktopOnboarding } from '@/store/onboarding' import { $activeGatewayProfile, $newChatProfile, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile' import { $busy, + $connection, $messages, $yoloActive, setAwaitingResponse, @@ -80,6 +81,28 @@ function inlineErrorMessage(error: unknown, fallback: string): string { return (raw.match(/Error invoking remote method '[^']+': Error: (.+)$/)?.[1] ?? raw).replace(/^Error:\s*/, '').trim() } +function base64FromDataUrl(dataUrl: string): string { + const comma = dataUrl.indexOf(',') + + return comma >= 0 ? dataUrl.slice(comma + 1) : '' +} + +function imageFilenameFromPath(filePath: string): string { + return filePath.split(/[\\/]/).filter(Boolean).pop() || 'image.png' +} + +// Remote gateway: the local composer-image file lives on THIS machine's disk, +// not the gateway's, so read the bytes here and upload them via +// image.attach_bytes. Returns null when the file can't be read. +async function readImageForRemoteAttach( + filePath: string +): Promise<{ contentBase64: string; filename: string } | null> { + const dataUrl = await window.hermesDesktop?.readFileDataUrl(filePath) + const contentBase64 = dataUrl ? base64FromDataUrl(dataUrl) : '' + + return contentBase64 ? { contentBase64, filename: imageFilenameFromPath(filePath) } : null +} + interface PromptActionsOptions { activeSessionId: string | null activeSessionIdRef: MutableRefObject @@ -197,16 +220,36 @@ export function usePromptActions({ ) => { const updateComposerAttachments = options.updateComposerAttachments ?? true const images = attachments.filter(attachment => attachment.kind === 'image' && attachment.path) + const remote = $connection.get()?.mode === 'remote' for (const attachment of images) { if (attachment.attachedSessionId === sessionId) { continue } - const result = await requestGateway('image.attach', { - session_id: sessionId, - path: attachment.path - }) + let result: ImageAttachResponse + + if (remote) { + // The gateway is on another machine — it can't read attachment.path + // (a path on THIS disk). Upload the bytes via image.attach_bytes. + const payload = attachment.path ? await readImageForRemoteAttach(attachment.path) : null + + if (!payload) { + const label = attachment.label || (attachment.path ? pathLabel(attachment.path) : 'image') + throw new Error(`Could not read ${label}`) + } + + result = await requestGateway('image.attach_bytes', { + session_id: sessionId, + content_base64: payload.contentBase64, + filename: payload.filename + }) + } else { + result = await requestGateway('image.attach', { + session_id: sessionId, + path: attachment.path + }) + } if (!result.attached) { const label = attachment.label || (attachment.path ? pathLabel(attachment.path) : 'image') diff --git a/apps/desktop/src/app/types.ts b/apps/desktop/src/app/types.ts index fc39a6b80e2..23fd1c6f48f 100644 --- a/apps/desktop/src/app/types.ts +++ b/apps/desktop/src/app/types.ts @@ -13,6 +13,13 @@ export interface ImageAttachResponse { path?: string text?: string message?: string + // Returned by the byte-upload variant (image.attach_bytes) used in remote mode. + count?: number + bytes?: number + name?: string + width?: number + height?: number + token_estimate?: number } export interface ImageDetachResponse { diff --git a/apps/desktop/src/components/assistant-ui/directive-text.tsx b/apps/desktop/src/components/assistant-ui/directive-text.tsx index c1cde84d4d0..79f772d450f 100644 --- a/apps/desktop/src/components/assistant-ui/directive-text.tsx +++ b/apps/desktop/src/components/assistant-ui/directive-text.tsx @@ -7,6 +7,7 @@ import { Fragment, useEffect, useMemo, useState } from 'react' import { ZoomableImage } from '@/components/chat/zoomable-image' import { extractEmbeddedImages } from '@/lib/embedded-images' +import { gatewayMediaDataUrl, isRemoteGateway } from '@/lib/media' const HERMES_REF_TYPES = ['file', 'folder', 'url', 'image', 'tool', 'line', 'terminal', 'session'] as const type HermesRefType = (typeof HERMES_REF_TYPES)[number] @@ -327,25 +328,32 @@ export const DirectiveText: TextMessagePartComponent = ({ text }: TextMessagePar * messages render after the backend embeds the data URL, so the UX is stable * across initial send and refresh. */ const DirectiveImage: FC<{ id: string; label: string }> = ({ id, label }) => { - const remote = /^(?:https?|data):/i.test(id) - const [src, setSrc] = useState(remote ? id : null) + const isUrl = /^(?:https?|data):/i.test(id) + const [src, setSrc] = useState(isUrl ? id : null) const [failed, setFailed] = useState(false) useEffect(() => { - if (remote || !id) { + if (isUrl || !id) { return } let alive = true - void window.hermesDesktop - ?.readFileDataUrl(id) - .then(url => alive && setSrc(url)) + + // Remote gateway: the image lives on the gateway's disk, not ours — fetch + // it over the authenticated API. Local: read it straight off this disk. + const load = + window.hermesDesktop && isRemoteGateway() + ? gatewayMediaDataUrl(id) + : window.hermesDesktop?.readFileDataUrl(id) + + void Promise.resolve(load) + .then(url => alive && url && setSrc(url)) .catch(() => alive && setFailed(true)) return () => { alive = false } - }, [id, remote]) + }, [id, isUrl]) if (failed) { return diff --git a/apps/desktop/src/components/assistant-ui/markdown-text.tsx b/apps/desktop/src/components/assistant-ui/markdown-text.tsx index 3ec9db314ec..30f77234f46 100644 --- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx +++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx @@ -17,6 +17,8 @@ import { createMemoizedMathPlugin } from '@/lib/katex-memo' import { preprocessMarkdown } from '@/lib/markdown-preprocess' import { filePathFromMediaPath, + gatewayMediaDataUrl, + isRemoteGateway, mediaExternalUrl, mediaKind, mediaName, @@ -51,6 +53,12 @@ async function mediaSrc(path: string): Promise { return mediaStreamUrl(path) } + // Remote gateway: the image lives on the gateway machine, so read it over the + // authenticated API rather than this machine's disk. + if (window.hermesDesktop && isRemoteGateway()) { + return gatewayMediaDataUrl(path) + } + if (!window.hermesDesktop?.readFileDataUrl) { return mediaExternalUrl(path) } diff --git a/apps/desktop/src/lib/media.remote.test.ts b/apps/desktop/src/lib/media.remote.test.ts new file mode 100644 index 00000000000..9de4885a517 --- /dev/null +++ b/apps/desktop/src/lib/media.remote.test.ts @@ -0,0 +1,58 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +import { $connection } from '@/store/session' + +import { filePathFromMediaPath, gatewayMediaDataUrl, isRemoteGateway } from './media' + +describe('isRemoteGateway', () => { + afterEach(() => { + $connection.set(null) + }) + + it('is false with no connection', () => { + $connection.set(null) + expect(isRemoteGateway()).toBe(false) + }) + + it('is false in local mode', () => { + $connection.set({ mode: 'local' } as never) + expect(isRemoteGateway()).toBe(false) + }) + + it('is true in remote mode', () => { + $connection.set({ mode: 'remote' } as never) + expect(isRemoteGateway()).toBe(true) + }) +}) + +describe('filePathFromMediaPath', () => { + it('passes through a plain path', () => { + expect(filePathFromMediaPath('/home/u/.hermes/images/a.png')).toBe('/home/u/.hermes/images/a.png') + }) + + it('decodes a file:// URL with encoded characters', () => { + expect(filePathFromMediaPath('file:///tmp/a%20b.png')).toBe('/tmp/a b.png') + }) +}) + +describe('gatewayMediaDataUrl', () => { + const api = vi.fn(async () => ({ data_url: 'data:image/png;base64,ZHVtbXk=' })) + + beforeEach(() => { + api.mockClear() + vi.stubGlobal('window', { hermesDesktop: { api } }) + }) + + afterEach(() => { + vi.unstubAllGlobals() + }) + + it('requests the encoded gateway path and returns the data URL', async () => { + const url = await gatewayMediaDataUrl('/home/u/.hermes/images/a b.png') + + expect(url).toBe('data:image/png;base64,ZHVtbXk=') + expect(api).toHaveBeenCalledWith({ + path: '/api/media?path=%2Fhome%2Fu%2F.hermes%2Fimages%2Fa%20b.png' + }) + }) +}) diff --git a/apps/desktop/src/lib/media.ts b/apps/desktop/src/lib/media.ts index d326b7a3a78..145558b42aa 100644 --- a/apps/desktop/src/lib/media.ts +++ b/apps/desktop/src/lib/media.ts @@ -1,3 +1,5 @@ +import { $connection } from '@/store/session' + export type MediaKind = 'audio' | 'image' | 'video' | 'file' interface MediaInfo { @@ -89,6 +91,26 @@ export function filePathFromMediaPath(path: string): string { } } +// True when this desktop shell is wired to a remote gateway. Local media paths +// then live on the gateway machine, not this disk, so we fetch them over the API. +export function isRemoteGateway(): boolean { + return $connection.get()?.mode === 'remote' +} + +// Fetch a gateway-local image as a data URL via the authenticated REST bridge. +// Used in remote mode where readFileDataUrl (which reads THIS machine's disk) +// can't see files the agent wrote on the gateway. Requires the gateway to +// expose GET /api/media (hermes_cli/web_server.py). +export async function gatewayMediaDataUrl(path: string): Promise { + const file = filePathFromMediaPath(path) + + const result = await window.hermesDesktop!.api<{ data_url: string }>({ + path: `/api/media?path=${encodeURIComponent(file)}` + }) + + return result.data_url +} + export function mediaDisplayLabel(path: string): string { const escaped = mediaName(path).replace(/[[\]\\]/g, '\\$&') const kind = mediaKind(path) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index bebdfe1b27b..7a4703f2dbc 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -796,6 +796,74 @@ def _probe_gateway_health() -> tuple[bool, dict | None]: return False, None +# Image MIME types this endpoint will serve. Extension-allowlisted so an +# authenticated caller can't pull non-image files through it. +_MEDIA_CONTENT_TYPES = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + ".svg": "image/svg+xml", + ".bmp": "image/bmp", + ".ico": "image/x-icon", +} +_MEDIA_MAX_BYTES = 25 * 1024 * 1024 + + +def _media_serve_roots() -> list[Path]: + """Directories ``GET /api/media`` is allowed to read from. + + Confined to where the agent and attach pipeline actually write media on the + gateway host — its images dir and cache subtree. This stops an authenticated + client from reading image-extension files anywhere on disk (e.g. a renamed + key or a screenshot outside the cache) merely because the suffix passes the + allowlist. + """ + home = get_hermes_home() + roots = [home / "images", home / "screenshots", home / "cache"] + out: list[Path] = [] + for root in roots: + try: + out.append(root.resolve()) + except (OSError, RuntimeError): + continue + return out + + +@app.get("/api/media") +async def get_media(path: str): + """Return a gateway-local image file as a base64 data URL. + + Lets remote clients (the desktop app over the network, or the web dashboard + in a browser) display images the agent wrote to *this* machine's filesystem + — they can't read the gateway's local disk directly. + + Auth-gated by the session token like every other /api route. Restricted to + an image-extension allowlist, a size cap, AND the gateway's own media roots + (resolved, symlink-safe) so it can't be used to read arbitrary files. + """ + try: + target = Path(path).expanduser().resolve() + except (OSError, RuntimeError): + raise HTTPException(status_code=400, detail="Invalid path") + + if target.suffix.lower() not in _MEDIA_CONTENT_TYPES: + raise HTTPException(status_code=415, detail="Unsupported media type") + + roots = _media_serve_roots() + if not any(target == root or root in target.parents for root in roots): + raise HTTPException(status_code=403, detail="Path outside media roots") + + if not target.is_file(): + raise HTTPException(status_code=404, detail="File not found") + if target.stat().st_size > _MEDIA_MAX_BYTES: + raise HTTPException(status_code=413, detail="File too large") + + encoded = base64.b64encode(target.read_bytes()).decode("ascii") + return {"data_url": f"data:{_MEDIA_CONTENT_TYPES[target.suffix.lower()]};base64,{encoded}"} + + @app.get("/api/status") async def get_status(): current_ver, latest_ver = check_config_version() diff --git a/scripts/release.py b/scripts/release.py index bba7f93ffbb..08fe0b04741 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -47,6 +47,9 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" AUTHOR_MAP = { "yusufalweshdemir@gmail.com": "Dusk1e", "804436395@qq.com": "LaPhilosophie", + "maxmitcham@mac.home": "maxtrigify", + "ccook@nvms.com": "ccook1963", + "thomas.paquette@gmail.com": "RyTsYdUp", "266365592+bmoore210@users.noreply.github.com": "bmoore210", "manishbyatroy@gmail.com": "manishbyatroy", "chilltulpa@gmail.com": "TheGardenGallery", diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py index 278c0ee3432..11e6eb4dea0 100644 --- a/tests/hermes_cli/test_web_server.py +++ b/tests/hermes_cli/test_web_server.py @@ -243,6 +243,57 @@ class TestWebServerEndpoints: assert "hermes_home" in data assert "active_sessions" in data + # ── GET /api/media (remote image display) ─────────────────────────── + + def test_get_media_serves_image_in_root(self): + """An image under the gateway's images dir is returned as a data URL.""" + from hermes_constants import get_hermes_home + + img_dir = get_hermes_home() / "images" + img_dir.mkdir(parents=True, exist_ok=True) + img = img_dir / "shot.png" + img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 16) + + resp = self.client.get("/api/media", params={"path": str(img)}) + assert resp.status_code == 200 + assert resp.json()["data_url"].startswith("data:image/png;base64,") + + def test_get_media_rejects_path_outside_roots(self, tmp_path): + """An image-extension file outside the media roots is forbidden.""" + outside = tmp_path / "secret.png" + outside.write_bytes(b"\x89PNG\r\n\x1a\n") + + resp = self.client.get("/api/media", params={"path": str(outside)}) + assert resp.status_code == 403 + + def test_get_media_rejects_non_image_extension(self): + from hermes_constants import get_hermes_home + + img_dir = get_hermes_home() / "images" + img_dir.mkdir(parents=True, exist_ok=True) + env = img_dir / "leak.env" + env.write_text("SECRET=1") + + resp = self.client.get("/api/media", params={"path": str(env)}) + assert resp.status_code == 415 + + def test_get_media_404_for_missing_file(self): + from hermes_constants import get_hermes_home + + missing = get_hermes_home() / "images" / "nope.png" + resp = self.client.get("/api/media", params={"path": str(missing)}) + assert resp.status_code == 404 + + def test_get_media_requires_auth(self): + from hermes_cli.web_server import _SESSION_HEADER_NAME + + resp = self.client.get( + "/api/media", + params={"path": "/tmp/x.png"}, + headers={_SESSION_HEADER_NAME: "wrong-token"}, + ) + assert resp.status_code == 401 + # ── Dashboard font override ───────────────────────────────────────── def test_get_dashboard_font_defaults_to_theme(self): diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index d8b4723e3a2..9ae79ed0cbf 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -5774,3 +5774,215 @@ def test_notification_event_dedup_key_keeps_completions_one_shot(): assert server._notification_event_dedup_key(first) == server._notification_event_dedup_key( replay ) + + +# --- image.attach_bytes / pdf.attach (remote-client byte upload) ------------- + +# Smallest valid 1x1 PNG, base64-encoded. +_PNG_1X1_B64 = ( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk" + "+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" +) + + +def _attach_bytes_cli(monkeypatch): + fake_cli = types.ModuleType("cli") + fake_cli._IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"} + monkeypatch.setitem(sys.modules, "cli", fake_cli) + + +def test_image_attach_bytes_writes_to_gateway_dir(monkeypatch, tmp_path): + """Remote client uploads base64 bytes; gateway writes them to its own disk.""" + _attach_bytes_cli(monkeypatch) + monkeypatch.setattr(server, "_hermes_home", tmp_path) + server._sessions["abx"] = _session() + + resp = server.handle_request( + { + "id": "1", + "method": "image.attach_bytes", + "params": { + "session_id": "abx", + "content_base64": _PNG_1X1_B64, + "filename": "shot.png", + }, + } + ) + + res = resp["result"] + assert res["attached"] is True + written = Path(res["path"]) + assert written.is_file() + assert written.parent == tmp_path / "images" + assert written.read_bytes().startswith(b"\x89PNG") + assert len(server._sessions["abx"]["attached_images"]) == 1 + assert res["bytes"] > 0 + + +def test_image_attach_bytes_accepts_data_url_prefix(monkeypatch, tmp_path): + _attach_bytes_cli(monkeypatch) + monkeypatch.setattr(server, "_hermes_home", tmp_path) + server._sessions["abx2"] = _session() + + resp = server.handle_request( + { + "id": "1", + "method": "image.attach_bytes", + "params": { + "session_id": "abx2", + "content_base64": f"data:image/png;base64,{_PNG_1X1_B64}", + }, + } + ) + assert resp["result"]["attached"] is True + + +def test_image_attach_bytes_data_alias_and_magic_sniff(monkeypatch, tmp_path): + """Older desktop builds send `data` (not content_base64); ext sniffed from bytes.""" + _attach_bytes_cli(monkeypatch) + monkeypatch.setattr(server, "_hermes_home", tmp_path) + server._sessions["abx3"] = _session() + + resp = server.handle_request( + { + "id": "1", + "method": "image.attach_bytes", + "params": {"session_id": "abx3", "data": _PNG_1X1_B64}, + } + ) + res = resp["result"] + assert res["attached"] is True + assert Path(res["path"]).suffix == ".png" # sniffed from magic bytes + + +def test_image_attach_bytes_rejects_invalid_base64(monkeypatch, tmp_path): + _attach_bytes_cli(monkeypatch) + monkeypatch.setattr(server, "_hermes_home", tmp_path) + server._sessions["abx4"] = _session() + + resp = server.handle_request( + { + "id": "1", + "method": "image.attach_bytes", + "params": {"session_id": "abx4", "content_base64": "!!!not base64!!!"}, + } + ) + assert "error" in resp + assert resp["error"]["code"] == 4017 + + +def test_image_attach_bytes_rejects_oversize(monkeypatch, tmp_path): + import base64 as _b64 + + _attach_bytes_cli(monkeypatch) + monkeypatch.setattr(server, "_hermes_home", tmp_path) + monkeypatch.setattr(server, "_ATTACH_BYTES_MAX_BYTES", 10) + server._sessions["abx5"] = _session() + + big = _b64.b64encode(b"\x89PNG\r\n\x1a\n" + b"0" * 100).decode("ascii") + resp = server.handle_request( + { + "id": "1", + "method": "image.attach_bytes", + "params": {"session_id": "abx5", "content_base64": big}, + } + ) + assert "error" in resp + assert resp["error"]["code"] == 4018 + + +def test_image_attach_bytes_rejects_unsupported_extension(monkeypatch, tmp_path): + _attach_bytes_cli(monkeypatch) + monkeypatch.setattr(server, "_hermes_home", tmp_path) + server._sessions["abx6"] = _session() + + # filename hint forces a non-image extension; magic sniff is bypassed by hint + resp = server.handle_request( + { + "id": "1", + "method": "image.attach_bytes", + "params": { + "session_id": "abx6", + "content_base64": _PNG_1X1_B64, + "filename": "evil.exe", + }, + } + ) + assert "error" in resp + assert resp["error"]["code"] == 4016 + + +def test_pdf_attach_requires_poppler(monkeypatch, tmp_path): + """Without pdftoppm on PATH, pdf.attach returns a clear 5028.""" + _attach_bytes_cli(monkeypatch) + monkeypatch.setattr(server, "_hermes_home", tmp_path) + monkeypatch.setattr("shutil.which", lambda _name: None) + server._sessions["pdf1"] = _session() + + resp = server.handle_request( + { + "id": "1", + "method": "pdf.attach", + "params": {"session_id": "pdf1", "content_base64": "JVBERi0xLjQK"}, + } + ) + assert "error" in resp + assert resp["error"]["code"] == 5028 + + +def test_pdf_attach_rejects_non_pdf_bytes(monkeypatch, tmp_path): + import base64 as _b64 + + _attach_bytes_cli(monkeypatch) + monkeypatch.setattr(server, "_hermes_home", tmp_path) + monkeypatch.setattr("shutil.which", lambda _name: "/usr/bin/pdftoppm") + server._sessions["pdf2"] = _session() + + not_pdf = _b64.b64encode(b"this is not a pdf").decode("ascii") + resp = server.handle_request( + { + "id": "1", + "method": "pdf.attach", + "params": {"session_id": "pdf2", "content_base64": not_pdf}, + } + ) + assert "error" in resp + assert resp["error"]["code"] == 4017 + + +def test_pdf_attach_requires_path_or_bytes(monkeypatch, tmp_path): + _attach_bytes_cli(monkeypatch) + monkeypatch.setattr(server, "_hermes_home", tmp_path) + monkeypatch.setattr("shutil.which", lambda _name: "/usr/bin/pdftoppm") + server._sessions["pdf3"] = _session() + + resp = server.handle_request( + {"id": "1", "method": "pdf.attach", "params": {"session_id": "pdf3"}} + ) + assert "error" in resp + assert resp["error"]["code"] == 4015 + + +def test_decode_attach_base64_helper(): + import base64 as _b64 + + raw = _b64.b64encode(b"hello").decode("ascii") + assert server._decode_attach_base64(raw, mime_prefix="image/") == b"hello" + assert ( + server._decode_attach_base64(f"data:image/png;base64,{raw}", mime_prefix="image/") + == b"hello" + ) + # whitespace inside payload is tolerated + assert server._decode_attach_base64(raw[:4] + "\n" + raw[4:], mime_prefix="image/") == b"hello" + assert server._decode_attach_base64("@@@", mime_prefix="image/") is None + + +def test_sniff_image_ext_magic_and_filename(): + assert server._sniff_image_ext(b"\x89PNG\r\n\x1a\n") == ".png" + assert server._sniff_image_ext(b"\xff\xd8\xff\xe0") == ".jpg" + assert server._sniff_image_ext(b"GIF89a....") == ".gif" + assert server._sniff_image_ext(b"RIFF1234WEBPxxxx") == ".webp" + assert server._sniff_image_ext(b"BM......") == ".bmp" + assert server._sniff_image_ext(b"unknown") == ".png" # fallback + # filename hint wins over magic bytes + assert server._sniff_image_ext(b"\x89PNG", "photo.jpeg") == ".jpeg" diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 888ce009ec6..0e55905428f 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -5097,6 +5097,274 @@ def _(rid, params: dict) -> dict: return _err(rid, 5027, str(e)) +# Byte-upload attach caps. 25 MB matches Anthropic's per-image limit; 50 MB / 25 +# pages bounds a single PDF drop so it can't blow the context budget. +_ATTACH_BYTES_MAX_BYTES = 25 * 1024 * 1024 +_PDF_ATTACH_MAX_BYTES = 50 * 1024 * 1024 +_PDF_ATTACH_MAX_PAGES = 25 + +# Leading magic bytes → file extension, for filename-less uploads. +_IMAGE_MAGIC: tuple[tuple[bytes, str], ...] = ( + (b"\x89PNG\r\n\x1a\n", ".png"), + (b"\xff\xd8\xff", ".jpg"), + (b"GIF87a", ".gif"), + (b"GIF89a", ".gif"), + (b"BM", ".bmp"), +) + + +def _decode_attach_base64(raw: str, *, mime_prefix: str) -> bytes | None: + """Decode a base64 (optionally data-URL-wrapped) payload. + + Accepts ``data:...;base64,`` plus embedded whitespace. + Returns the decoded bytes, or ``None`` when the input isn't valid base64. + """ + import base64 as _base64 + import re as _re + + cleaned = raw.strip() + m = _re.match( + rf"^data:{_re.escape(mime_prefix)}[a-zA-Z0-9.+-]*;base64,(.*)$", + cleaned, + _re.DOTALL, + ) + if m: + cleaned = m.group(1) + cleaned = _re.sub(r"\s+", "", cleaned) + try: + return _base64.b64decode(cleaned, validate=True) + except Exception: + return None + + +def _sniff_image_ext(img_bytes: bytes, filename: str = "") -> str: + """Resolve an image extension from a filename hint, else magic bytes. + + Falls back to ``.png``. WebP needs the RIFF/WEBP container check, handled + before the generic table. + """ + if filename: + suffix = Path(filename).suffix.lower() + if suffix: + return suffix + head = img_bytes[:16] + if head.startswith(b"RIFF") and head[8:12] == b"WEBP": + return ".webp" + for sig, ext in _IMAGE_MAGIC: + if head.startswith(sig): + return ext + return ".png" + + +def _allowed_image_extensions() -> frozenset[str]: + try: + from cli import _IMAGE_EXTENSIONS + + return frozenset(_IMAGE_EXTENSIONS) + except Exception: + return frozenset({".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}) + + +def _queue_attached_image(session: dict, img_bytes: bytes, ext: str, *, prefix: str) -> Path: + """Write image bytes into the gateway's images dir and queue them. + + Mirrors what ``image.attach`` does for a local path: appends to + ``session["attached_images"]`` so the next ``prompt.submit`` picks it up via + the existing native-image-attach pipeline. Returns the written path. + """ + session["image_counter"] = session.get("image_counter", 0) + 1 + img_dir = _hermes_home / "images" + img_dir.mkdir(parents=True, exist_ok=True) + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + img_path = img_dir / f"{prefix}_{ts}_{session['image_counter']}{ext}" + try: + img_path.write_bytes(img_bytes) + except Exception: + session["image_counter"] = max(0, session["image_counter"] - 1) + raise + session.setdefault("attached_images", []).append(str(img_path)) + return img_path + + +@method("image.attach_bytes") +def _(rid, params: dict) -> dict: + """Attach an image to the session from base64 bytes (remote-client path). + + A desktop app or web dashboard running on a DIFFERENT machine than the + gateway can't hand us a local path — that file only exists on the client's + disk. So it uploads the raw image bytes (base64) and we write them into the + gateway's own images dir. The response shape mirrors ``image.attach`` so the + client treats both identically. + + Params: + content_base64 / data (str, required): base64 image bytes. Accepts a + ``data:image/...;base64,`` prefix and embedded whitespace. ``data`` is + an accepted alias for older desktop builds. + filename / ext (str, optional): extension hint. Without it, magic bytes + identify PNG/JPEG/GIF/WebP/BMP, falling back to ``.png``. + """ + session, err = _sess(params, rid) + if err: + return err + + raw_b64 = str(params.get("content_base64") or params.get("data") or "").strip() + if not raw_b64: + return _err(rid, 4015, "content_base64 required") + + img_bytes = _decode_attach_base64(raw_b64, mime_prefix="image/") + if img_bytes is None: + return _err(rid, 4017, "data is not valid base64") + if not img_bytes: + return _err(rid, 4017, "image is empty") + if len(img_bytes) > _ATTACH_BYTES_MAX_BYTES: + mb = _ATTACH_BYTES_MAX_BYTES // (1024 * 1024) + return _err(rid, 4018, f"image too large ({len(img_bytes)} bytes; cap is {mb} MB)") + + filename = str(params.get("filename", "") or "") + ext_hint = str(params.get("ext", "") or "").strip().lower() + if ext_hint and not ext_hint.startswith("."): + ext_hint = "." + ext_hint + ext = _sniff_image_ext(img_bytes, filename or (f"x{ext_hint}" if ext_hint else "")) + if ext not in _allowed_image_extensions(): + return _err(rid, 4016, f"unsupported image extension: {ext}") + + try: + img_path = _queue_attached_image(session, img_bytes, ext, prefix="upload") + except Exception as e: + return _err(rid, 5027, f"write failed: {e}") + + return _ok( + rid, + { + "attached": True, + "path": str(img_path), + "count": len(session["attached_images"]), + "remainder": "", + "text": f"[User attached image: {img_path.name}]", + "bytes": len(img_bytes), + **_image_meta(img_path), + }, + ) + + +@method("pdf.attach") +def _(rid, params: dict) -> dict: + """Attach a PDF by rendering each page to PNG and queuing the pages. + + Anthropic's vision pipeline accepts images, not PDFs, so this runs + ``pdftoppm`` (poppler-utils) at 150 DPI per page and queues each rendered + page as an attached image. Accepts either a host ``path`` (local mode) or + base64 ``content_base64`` (remote upload). Caps at 50 MB / 25 pages per call. + + Requires ``pdftoppm`` on $PATH (``apt install poppler-utils``); returns 5028 + if missing. + """ + import shutil + import subprocess + import tempfile + + session, err = _sess(params, rid) + if err: + return err + + if shutil.which("pdftoppm") is None: + return _err(rid, 5028, "pdftoppm not installed (poppler-utils package required)") + + raw_path = str(params.get("path", "") or "").strip() + raw_b64 = str(params.get("content_base64") or params.get("data") or "").strip() + if not raw_path and not raw_b64: + return _err(rid, 4015, "path or content_base64 required") + + with tempfile.TemporaryDirectory(prefix="pdf_attach_") as td: + td_path = Path(td) + if raw_b64: + pdf_bytes = _decode_attach_base64(raw_b64, mime_prefix="application/pdf") + if pdf_bytes is None: + return _err(rid, 4017, "data is not valid base64") + if not pdf_bytes: + return _err(rid, 4017, "decoded PDF is empty") + if len(pdf_bytes) > _PDF_ATTACH_MAX_BYTES: + mb = _PDF_ATTACH_MAX_BYTES // (1024 * 1024) + return _err(rid, 4018, f"PDF too large ({len(pdf_bytes)} bytes; cap is {mb} MB)") + if pdf_bytes[:5] != b"%PDF-": + return _err(rid, 4017, "payload is not a PDF (missing %PDF- magic bytes)") + pdf_path = td_path / "input.pdf" + pdf_path.write_bytes(pdf_bytes) + display_name = str(params.get("filename", "") or "uploaded.pdf") + else: + try: + from cli import _resolve_attachment_path + + resolved = _resolve_attachment_path(raw_path) + except Exception: + resolved = None + if resolved is None or not Path(resolved).is_file(): + return _err(rid, 4016, f"PDF not found: {raw_path}") + if Path(resolved).suffix.lower() != ".pdf": + return _err(rid, 4016, f"not a PDF: {Path(resolved).name}") + if Path(resolved).stat().st_size > _PDF_ATTACH_MAX_BYTES: + mb = _PDF_ATTACH_MAX_BYTES // (1024 * 1024) + return _err(rid, 4018, f"PDF too large; cap is {mb} MB") + pdf_path = Path(resolved) + display_name = pdf_path.name + + try: + first_page = int(params.get("first_page") or 1) + last_page_param = params.get("last_page") + last_page = int(last_page_param) if last_page_param is not None else None + except (TypeError, ValueError): + return _err(rid, 4015, "first_page/last_page must be integers") + + if first_page < 1: + return _err(rid, 4015, "first_page must be >= 1") + if last_page is None: + last_page = first_page + _PDF_ATTACH_MAX_PAGES - 1 + if last_page < first_page: + return _err(rid, 4015, "last_page must be >= first_page") + if last_page - first_page + 1 > _PDF_ATTACH_MAX_PAGES: + return _err(rid, 4019, f"page range exceeds cap of {_PDF_ATTACH_MAX_PAGES} pages per attach call") + + out_prefix = td_path / "page" + argv = [ + "pdftoppm", "-png", "-r", "150", + "-f", str(first_page), "-l", str(last_page), + str(pdf_path), str(out_prefix), + ] + try: + res = subprocess.run(argv, capture_output=True, text=True, timeout=120) + except subprocess.TimeoutExpired: + return _err(rid, 5028, "pdftoppm timed out (>120s)") + if res.returncode != 0: + tail = (res.stderr or res.stdout or "").strip().splitlines()[-3:] + return _err(rid, 5028, "pdftoppm failed: " + " | ".join(tail)) + + rendered = sorted(td_path.glob("page-*.png")) + if not rendered: + return _err(rid, 5028, "pdftoppm produced no pages (corrupt PDF?)") + + attached_pages = [] + for src in rendered: + page_num = src.stem.split("-", 1)[-1] + try: + page_int = int(page_num) + except ValueError: + page_int = first_page + len(attached_pages) + dst = _queue_attached_image(session, src.read_bytes(), ".png", prefix=f"pdf_p{page_num}") + attached_pages.append({"path": str(dst), "page": page_int, **_image_meta(dst)}) + + return _ok( + rid, + { + "attached": True, + "filename": display_name, + "pages_attached": len(attached_pages), + "pages": attached_pages, + "count": len(session["attached_images"]), + "text": f"[User attached PDF: {display_name} ({len(attached_pages)} page(s))]", + }, + ) + + @method("image.detach") def _(rid, params: dict) -> dict: session, err = _sess(params, rid)