feat: add image pasting capability

This commit is contained in:
Brooklyn Nicholson 2026-04-04 13:00:55 -05:00
parent 5a5d90c85a
commit 2893e9df71
3 changed files with 97 additions and 16 deletions

View file

@ -195,7 +195,13 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):
def _init_session(sid: str, key: str, agent, history: list):
_sessions[sid] = {"agent": agent, "session_key": key, "history": history}
_sessions[sid] = {
"agent": agent,
"session_key": key,
"history": history,
"attached_images": [],
"image_counter": 0,
}
try:
from tools.approval import register_gateway_notify, load_permanent_allowlist
register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
@ -210,6 +216,38 @@ def _with_checkpoints(session, fn):
return fn(session["agent"]._checkpoint_mgr, os.getenv("TERMINAL_CWD", os.getcwd()))
def _enrich_with_attached_images(user_text: str, image_paths: list[str]) -> str:
"""Pre-analyze attached images via vision and prepend descriptions to user text."""
import asyncio, json as _json
from tools.vision_tools import vision_analyze_tool
prompt = (
"Describe everything visible in this image in thorough detail. "
"Include any text, code, data, objects, people, layout, colors, "
"and any other notable visual information."
)
parts: list[str] = []
for path in image_paths:
p = Path(path)
if not p.exists():
continue
hint = f"[You can examine it with vision_analyze using image_url: {p}]"
try:
r = _json.loads(asyncio.run(vision_analyze_tool(image_url=str(p), user_prompt=prompt)))
desc = r.get("analysis", "") if r.get("success") else None
parts.append(f"[The user attached an image:\n{desc}]\n{hint}" if desc
else f"[The user attached an image but analysis failed.]\n{hint}")
except Exception:
parts.append(f"[The user attached an image but analysis failed.]\n{hint}")
text = user_text or ""
prefix = "\n\n".join(parts)
if prefix:
return f"{prefix}\n\n{text}" if text else prefix
return text or "What do you see in this image?"
# ── Methods: session ─────────────────────────────────────────────────
@method("session.create")
@ -367,8 +405,10 @@ def _(rid, params: dict) -> dict:
def run():
try:
images = session.pop("attached_images", [])
prompt = _enrich_with_attached_images(text, images) if images else text
result = agent.run_conversation(
text, conversation_history=list(history),
prompt, conversation_history=list(history),
stream_callback=lambda delta: _emit("message.delta", sid, {"text": delta}),
)
if isinstance(result, dict):
@ -385,6 +425,32 @@ def _(rid, params: dict) -> dict:
return _ok(rid, {"status": "streaming"})
@method("clipboard.paste")
def _(rid, params: dict) -> dict:
session, err = _sess(params, rid)
if err:
return err
try:
from datetime import datetime
from hermes_cli.clipboard import has_clipboard_image, save_clipboard_image
except Exception as e:
return _err(rid, 5027, f"clipboard unavailable: {e}")
if not has_clipboard_image():
return _ok(rid, {"attached": False, "message": "No image found in clipboard"})
img_dir = _hermes_home / "images"
img_dir.mkdir(parents=True, exist_ok=True)
session["image_counter"] = session.get("image_counter", 0) + 1
img_path = img_dir / f"clip_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{session['image_counter']}.png"
if not save_clipboard_image(img_path):
return _ok(rid, {"attached": False, "message": "Clipboard has image but extraction failed"})
session.setdefault("attached_images", []).append(str(img_path))
return _ok(rid, {"attached": True, "path": str(img_path), "count": len(session["attached_images"])})
@method("prompt.background")
def _(rid, params: dict) -> dict:
text, parent = params.get("text", ""), params.get("session_id", "")

View file

@ -251,6 +251,11 @@ export function App({ gw }: { gw: GatewayClient }) {
})
}
const paste = () =>
rpc('clipboard.paste', { session_id: sid }).then((r: any) =>
sys(r.attached ? `📎 image #${r.count} attached` : r.message || 'no image in clipboard')
)
const interpolate = (text: string, then: (result: string) => void) => {
setStatus('interpolating…')
const matches = [...text.matchAll(new RegExp(INTERPOLATION_RE.source, 'g'))]
@ -387,6 +392,10 @@ export function App({ gw }: { gw: GatewayClient }) {
setMessages([])
}
if (key.ctrl && ch === 'v') {
return paste()
}
if (key.escape) {
clearIn()
}
@ -1091,7 +1100,7 @@ export function App({ gw }: { gw: GatewayClient }) {
return true
case 'paste':
sys("clipboard paste: use your terminal's paste shortcut (images not yet supported in TUI)")
paste()
return true
@ -1211,27 +1220,25 @@ export function App({ gw }: { gw: GatewayClient }) {
return true
case 'update':
sys('update not available in TUI mode — run: pip install -U hermes-agent')
case 'hermes': {
const argv = name === 'update' ? ['update'] : arg.split(/\s+/).filter(Boolean)
return true
case 'hermes':
if (!arg) {
sys(
'usage: /hermes <args…> non-interactive `hermes` CLI (e.g. sessions list, chat -q "hi"). Interactive setup/browse/edit must run in a separate terminal.'
)
if (!argv.length) {
sys('usage: /hermes <args…> (e.g. sessions list, chat -q "hi")')
return true
}
rpc('cli.exec', { argv: arg.split(/\s+/).filter(Boolean) })
if (name === 'update') {
setBusy(true)
setStatus('updating…')
}
rpc('cli.exec', { argv, timeout: name === 'update' ? 600 : 240 })
.then((r: any) => {
if (r.blocked) {
sys(r.hint ?? 'blocked')
return
return sys(r.hint ?? 'blocked')
}
sys(r.output ?? '(no output)')
if (r.code !== 0) {
@ -1239,8 +1246,15 @@ export function App({ gw }: { gw: GatewayClient }) {
}
})
.catch((e: Error) => sys(`error: ${e.message}`))
.finally(() => {
if (name === 'update') {
setStatus('ready')
setBusy(false)
}
})
return true
}
case 'model':
if (!arg) {

View file

@ -23,6 +23,7 @@ export const HOTKEYS: [string, string][] = [
['Ctrl+C', 'interrupt / clear / exit'],
['Ctrl+D', 'exit'],
['Ctrl+L', 'clear screen'],
['Ctrl+V', 'paste clipboard image (same as /paste)'],
['Tab', 'complete /commands (registry-aware)'],
['↑/↓', 'queue edit (if queued) / input history'],
['PgUp/PgDn', 'scroll messages'],