feat(desktop+gateway): remote-gateway file attachments via file.attach

@file: attachments now work when the desktop is connected to a remote
gateway. Previously a referenced file resolved to a client-disk path the
gateway couldn't see, so context_references rejected it with "path is
outside the allowed workspace" and the agent never saw the file.

Adds a file.attach RPC (sibling to the existing image.attach_bytes /
pdf.attach byte-upload pipeline): the desktop uploads the file bytes, the
gateway stages them into <workspace>/.hermes/desktop-attachments/ and
returns a workspace-relative @file: ref that resolves cleanly. Local mode
passes the path directly; a gateway-visible file outside the workspace is
copied in; an in-workspace file is referenced as-is with no copy.

Consolidates the file-sync design from #38615 (LeonSGP43) and the
host-file-staging idea from #33455 (Carry00), rebased onto the
image/PDF remote-media helpers already on main.

Co-authored-by: LeonSGP43 <cine.dreamer.one@gmail.com>
This commit is contained in:
teknium1 2026-06-08 23:01:10 -07:00 committed by Teknium
parent e687292eb4
commit dbbd1d4d05
6 changed files with 603 additions and 53 deletions

View file

@ -4,6 +4,8 @@ import { useEffect } from 'react'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import { $sessions, setSessions } from '@/store/session'
import { $connection } from '@/store/session'
import type { ComposerAttachment } from '@/store/composer'
import type { SessionInfo } from '@/types/hermes'
import { usePromptActions } from './use-prompt-actions'
@ -42,7 +44,10 @@ function sessionInfo(overrides: Partial<SessionInfo> = {}): SessionInfo {
interface HarnessHandle {
steerPrompt: (text: string) => Promise<boolean>
submitText: (text: string, options?: { attachments?: never[]; fromQueue?: boolean }) => Promise<boolean>
submitText: (
text: string,
options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean }
) => Promise<boolean>
}
function Harness({
@ -314,3 +319,92 @@ describe('usePromptActions steerPrompt', () => {
expect(requestGateway).not.toHaveBeenCalled()
})
})
describe('usePromptActions file attachment sync', () => {
afterEach(() => {
cleanup()
$connection.set(null)
vi.restoreAllMocks()
})
function fileAttachment(): ComposerAttachment {
return {
id: 'file:report.txt',
kind: 'file',
label: 'report.txt',
path: '/Users/alice/Downloads/report.txt',
refText: '@file:`/Users/alice/Downloads/report.txt`'
}
}
it('uploads file bytes via file.attach on a remote gateway and submits the rewritten ref', async () => {
// Remote gateway can't read the client-disk path, so the desktop must upload
// the bytes and submit the workspace-relative ref the gateway hands back —
// not the original /Users/... path (which would dead-end as "outside the
// allowed workspace").
$connection.set({ mode: 'remote' } as never)
Object.defineProperty(window, 'hermesDesktop', {
configurable: true,
value: { readFileDataUrl: vi.fn(async () => 'data:text/plain;base64,aGVsbG8=') }
})
const calls: { method: string; params?: Record<string, unknown> }[] = []
const requestGateway = vi.fn(async (method: string, params?: Record<string, unknown>) => {
calls.push({ method, params })
if (method === 'file.attach') {
return {
attached: true,
path: '/remote/work/.hermes/desktop-attachments/report.txt',
ref_text: '@file:.hermes/desktop-attachments/report.txt',
uploaded: true
} as never
}
return {} as never
})
let handle: HarnessHandle | null = null
render(<Harness onReady={h => (handle = h)} refreshSessions={async () => undefined} requestGateway={requestGateway} />)
const ok = await handle!.submitText('convert this to epub', { attachments: [fileAttachment()] })
expect(ok).toBe(true)
expect(calls.map(c => c.method)).toEqual(['file.attach', 'prompt.submit'])
expect(calls[0]?.params).toMatchObject({
session_id: RUNTIME_SESSION_ID,
path: '/Users/alice/Downloads/report.txt',
name: 'report.txt',
data_url: 'data:text/plain;base64,aGVsbG8='
})
expect(calls[1]?.params).toEqual({
session_id: RUNTIME_SESSION_ID,
text: '@file:.hermes/desktop-attachments/report.txt\n\nconvert this to epub'
})
})
it('passes the path directly via file.attach in local mode (no byte upload)', async () => {
$connection.set({ mode: 'local' } as never)
const calls: { method: string; params?: Record<string, unknown> }[] = []
const requestGateway = vi.fn(async (method: string, params?: Record<string, unknown>) => {
calls.push({ method, params })
if (method === 'file.attach') {
return { attached: true, ref_text: '@file:data/report.txt', uploaded: false } as never
}
return {} as never
})
let handle: HarnessHandle | null = null
render(<Harness onReady={h => (handle = h)} refreshSessions={async () => undefined} requestGateway={requestGateway} />)
const ok = await handle!.submitText('summarize', { attachments: [fileAttachment()] })
expect(ok).toBe(true)
expect(calls[0]?.method).toBe('file.attach')
// Local mode sends no data_url — the gateway shares this disk.
expect(calls[0]?.params).not.toHaveProperty('data_url')
expect(calls[1]).toEqual({
method: 'prompt.submit',
params: { session_id: RUNTIME_SESSION_ID, text: '@file:data/report.txt\n\nsummarize' }
})
})
})

View file

@ -47,6 +47,7 @@ import {
import type {
ClientSessionState,
FileAttachResponse,
ImageAttachResponse,
SessionSteerResponse,
SessionTitleResponse,
@ -103,6 +104,20 @@ async function readImageForRemoteAttach(
return contentBase64 ? { contentBase64, filename: imageFilenameFromPath(filePath) } : null
}
// Read a non-image file as a data URL for upload via file.attach. Returns null
// when the desktop bridge can't read the file (e.g. it was moved/deleted).
async function readFileDataUrlForAttach(filePath: string): Promise<string | null> {
const reader = window.hermesDesktop?.readFileDataUrl
if (!reader) {
return null
}
const dataUrl = await reader(filePath)
return dataUrl || null
}
interface PromptActionsOptions {
activeSessionId: string | null
activeSessionIdRef: MutableRefObject<string | null>
@ -212,62 +227,114 @@ export function usePromptActions({
[selectedStoredSessionIdRef, updateSessionState]
)
const syncImageAttachmentsForSubmit = useCallback(
const syncAttachmentsForSubmit = useCallback(
async (
sessionId: string,
attachments: ComposerAttachment[],
options: { updateComposerAttachments?: boolean } = {}
) => {
): Promise<ComposerAttachment[]> => {
const updateComposerAttachments = options.updateComposerAttachments ?? true
const images = attachments.filter(attachment => attachment.kind === 'image' && attachment.path)
const remote = $connection.get()?.mode === 'remote'
const synced: ComposerAttachment[] = []
for (const attachment of images) {
if (attachment.attachedSessionId === sessionId) {
for (const attachment of attachments) {
// Already-synced or pathless refs (terminal, url, etc.) pass through.
if (!attachment.path || attachment.attachedSessionId === sessionId) {
synced.push(attachment)
continue
}
let result: ImageAttachResponse
if (attachment.kind === 'image') {
let result: ImageAttachResponse
if (remote) {
// The gateway is on another machine — it can't read attachment.path
// (a path on THIS disk). Upload the bytes via image.attach_bytes.
const payload = attachment.path ? await readImageForRemoteAttach(attachment.path) : null
if (remote) {
// The gateway is on another machine — it can't read attachment.path
// (a path on THIS disk). Upload the bytes via image.attach_bytes.
const payload = await readImageForRemoteAttach(attachment.path)
if (!payload) {
const label = attachment.label || (attachment.path ? pathLabel(attachment.path) : 'image')
throw new Error(`Could not read ${label}`)
if (!payload) {
const label = attachment.label || pathLabel(attachment.path)
throw new Error(`Could not read ${label}`)
}
result = await requestGateway<ImageAttachResponse>('image.attach_bytes', {
session_id: sessionId,
content_base64: payload.contentBase64,
filename: payload.filename
})
} else {
result = await requestGateway<ImageAttachResponse>('image.attach', {
session_id: sessionId,
path: attachment.path
})
}
result = await requestGateway<ImageAttachResponse>('image.attach_bytes', {
session_id: sessionId,
content_base64: payload.contentBase64,
filename: payload.filename
})
} else {
result = await requestGateway<ImageAttachResponse>('image.attach', {
session_id: sessionId,
path: attachment.path
})
}
if (!result.attached) {
const label = attachment.label || pathLabel(attachment.path)
throw new Error(result.message || `Could not attach ${label}`)
}
if (!result.attached) {
const label = attachment.label || (attachment.path ? pathLabel(attachment.path) : 'image')
throw new Error(result.message || `Could not attach ${label}`)
}
const attachedPath = result.path || attachment.path
if (updateComposerAttachments) {
addComposerAttachment({
const attachedPath = result.path || attachment.path
const nextAttachment: ComposerAttachment = {
...attachment,
id: attachment.id,
label: attachedPath ? pathLabel(attachedPath) : attachment.label,
path: attachedPath,
attachedSessionId: sessionId
})
}
if (updateComposerAttachments) {
addComposerAttachment(nextAttachment)
}
synced.push(nextAttachment)
continue
}
if (attachment.kind === 'file') {
// Non-image file refs are @file: paths the gateway reads with its file
// tools. On a remote gateway the desktop path doesn't exist there, so
// upload the bytes; the gateway stages them into the session workspace
// and hands back a workspace-relative ref that actually resolves.
// Local mode can pass the path directly (gateway shares this disk).
const dataUrl = remote ? await readFileDataUrlForAttach(attachment.path) : null
if (remote && !dataUrl) {
const label = attachment.label || pathLabel(attachment.path)
throw new Error(`Could not read ${label}`)
}
const result = await requestGateway<FileAttachResponse>('file.attach', {
session_id: sessionId,
path: attachment.path,
name: attachment.label || pathLabel(attachment.path),
...(dataUrl ? { data_url: dataUrl } : {})
})
if (!result.attached || !result.ref_text) {
const label = attachment.label || pathLabel(attachment.path)
throw new Error(result.message || `Could not attach ${label}`)
}
const nextAttachment: ComposerAttachment = {
...attachment,
id: attachment.id,
refText: result.ref_text,
attachedSessionId: sessionId
}
if (updateComposerAttachments) {
addComposerAttachment(nextAttachment)
}
synced.push(nextAttachment)
continue
}
synced.push(attachment)
}
return synced
},
[requestGateway]
)
@ -278,35 +345,42 @@ export function usePromptActions({
const usingComposerAttachments = !options?.attachments
const attachments = options?.attachments ?? $composerAttachments.get()
const contextRefs = attachments
.map(a => a.refText)
.filter(Boolean)
.join('\n')
const terminalContextBlocks = terminalContextBlocksFromDraft(rawText).join('\n\n')
const hasImage = attachments.some(a => a.kind === 'image')
const attachmentRefs = attachments.map(attachmentDisplayText).filter((r): r is string => Boolean(r))
const text =
[contextRefs, terminalContextBlocks, visibleText].filter(Boolean).join('\n\n') ||
(hasImage ? 'What do you see in this image?' : '')
// Refs are recomputed after sync (file.attach rewrites @file: refs to
// workspace-relative paths the remote gateway can resolve). Seed the
// optimistic message with the pre-sync refs, then rewrite once synced.
let attachmentRefs = attachments.map(attachmentDisplayText).filter((r): r is string => Boolean(r))
const buildContextText = (atts: ComposerAttachment[]): string => {
const contextRefs = atts
.map(a => a.refText)
.filter(Boolean)
.join('\n')
return (
[contextRefs, terminalContextBlocks, visibleText].filter(Boolean).join('\n\n') ||
(atts.some(a => a.kind === 'image') ? 'What do you see in this image?' : '')
)
}
// Queue drains fire on the busy→false settle edge, where busyRef (synced
// from $busy by a separate effect) may still read true — honoring it would
// bounce the drained send. The drain lock serializes them; the user path
// keeps the guard so a stray Enter mid-turn can't double-submit.
if (!text || (!options?.fromQueue && busyRef.current)) {
const hasSendable = Boolean(visibleText || terminalContextBlocks || attachments.length || hasImage)
if (!hasSendable || (!options?.fromQueue && busyRef.current)) {
return false
}
const optimisticId = `user-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
const userMessage: ChatMessage = {
const buildUserMessage = (): ChatMessage => ({
id: optimisticId,
role: 'user',
parts: [textPart(visibleText || (attachmentRefs.length ? '' : attachments.map(a => a.label).join(', ')))],
attachmentRefs
}
})
const releaseBusy = () => {
setMutableRef(busyRef, false)
@ -323,7 +397,7 @@ export function usePromptActions({
...state,
messages: state.messages.some(m => m.id === optimisticId)
? state.messages
: [...state.messages, userMessage],
: [...state.messages, buildUserMessage()],
busy: true,
awaitingResponse: true,
pendingBranchGroup: null,
@ -336,6 +410,18 @@ export function usePromptActions({
selectedStoredSessionIdRef.current
)
// After sync rewrites refs, refresh the optimistic message in place so the
// transcript shows the resolved @file: ref rather than the local path.
const rewriteOptimistic = (sid: string) =>
updateSessionState(
sid,
state => ({
...state,
messages: state.messages.map(message => (message.id === optimisticId ? buildUserMessage() : message))
}),
selectedStoredSessionIdRef.current
)
const dropOptimistic = (sid: null | string) => {
if (!sid) {
setMessages(current => current.filter(m => m.id !== optimisticId))
@ -366,7 +452,7 @@ export function usePromptActions({
if (sessionId) {
seedOptimistic(sessionId)
} else {
setMessages(current => [...current, userMessage])
setMessages(current => [...current, buildUserMessage()])
}
if (!sessionId) {
@ -392,9 +478,14 @@ export function usePromptActions({
}
try {
await syncImageAttachmentsForSubmit(sessionId, attachments, {
const syncedAttachments = await syncAttachmentsForSubmit(sessionId, attachments, {
updateComposerAttachments: usingComposerAttachments
})
// Rewrite the optimistic message + prompt text with the synced refs so
// the gateway receives @file: paths that resolve in its workspace.
attachmentRefs = syncedAttachments.map(attachmentDisplayText).filter((r): r is string => Boolean(r))
rewriteOptimistic(sessionId)
const text = buildContextText(syncedAttachments)
await requestGateway('prompt.submit', { session_id: sessionId, text })
if (usingComposerAttachments) {
@ -442,7 +533,7 @@ export function usePromptActions({
createBackendSessionForSend,
requestGateway,
selectedStoredSessionIdRef,
syncImageAttachmentsForSubmit,
syncAttachmentsForSubmit,
updateSessionState
]
)

View file

@ -27,6 +27,20 @@ export interface ImageDetachResponse {
count?: number
}
export interface FileAttachResponse {
attached?: boolean
message?: string
// Gateway-side absolute path the file was staged to.
path?: string
// Workspace-relative path used to build ref_text.
ref_path?: string
// Rewritten @file: ref that resolves on the gateway (workspace-relative).
ref_text?: string
// True when bytes/host file were copied into the session workspace.
uploaded?: boolean
name?: string
}
export interface SlashExecResponse {
output?: string
warning?: string

View file

@ -88,7 +88,8 @@ function isUpdateToastSnoozed(): boolean {
// Must match tui_gateway's DESKTOP_BACKEND_CONTRACT that this build was written
// against. The backend reports its own value in session runtime info; a lower
// value (or none — a pre-GUI checkout) means GUI<->backend skew.
const REQUIRED_BACKEND_CONTRACT = 1
// v2: requires the file.attach RPC (remote-gateway non-image file upload).
const REQUIRED_BACKEND_CONTRACT = 2
const SKEW_TOAST_ID = 'backend-contract-skew'
/**