diff --git a/tui_gateway/server.py b/tui_gateway/server.py index c0e9849ae..84c86a054 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -195,7 +195,13 @@ def _make_agent(sid: str, key: str, session_id: str | None = None): def _init_session(sid: str, key: str, agent, history: list): - _sessions[sid] = {"agent": agent, "session_key": key, "history": history} + _sessions[sid] = { + "agent": agent, + "session_key": key, + "history": history, + "attached_images": [], + "image_counter": 0, + } try: from tools.approval import register_gateway_notify, load_permanent_allowlist register_gateway_notify(key, lambda data: _emit("approval.request", sid, data)) @@ -210,6 +216,38 @@ def _with_checkpoints(session, fn): return fn(session["agent"]._checkpoint_mgr, os.getenv("TERMINAL_CWD", os.getcwd())) +def _enrich_with_attached_images(user_text: str, image_paths: list[str]) -> str: + """Pre-analyze attached images via vision and prepend descriptions to user text.""" + import asyncio, json as _json + from tools.vision_tools import vision_analyze_tool + + prompt = ( + "Describe everything visible in this image in thorough detail. " + "Include any text, code, data, objects, people, layout, colors, " + "and any other notable visual information." + ) + + parts: list[str] = [] + for path in image_paths: + p = Path(path) + if not p.exists(): + continue + hint = f"[You can examine it with vision_analyze using image_url: {p}]" + try: + r = _json.loads(asyncio.run(vision_analyze_tool(image_url=str(p), user_prompt=prompt))) + desc = r.get("analysis", "") if r.get("success") else None + parts.append(f"[The user attached an image:\n{desc}]\n{hint}" if desc + else f"[The user attached an image but analysis failed.]\n{hint}") + except Exception: + parts.append(f"[The user attached an image but analysis failed.]\n{hint}") + + text = user_text or "" + prefix = "\n\n".join(parts) + if prefix: + return f"{prefix}\n\n{text}" if text else prefix + return text or "What do you see in this image?" + + # ── Methods: session ───────────────────────────────────────────────── @method("session.create") @@ -367,8 +405,10 @@ def _(rid, params: dict) -> dict: def run(): try: + images = session.pop("attached_images", []) + prompt = _enrich_with_attached_images(text, images) if images else text result = agent.run_conversation( - text, conversation_history=list(history), + prompt, conversation_history=list(history), stream_callback=lambda delta: _emit("message.delta", sid, {"text": delta}), ) if isinstance(result, dict): @@ -385,6 +425,32 @@ def _(rid, params: dict) -> dict: return _ok(rid, {"status": "streaming"}) +@method("clipboard.paste") +def _(rid, params: dict) -> dict: + session, err = _sess(params, rid) + if err: + return err + try: + from datetime import datetime + from hermes_cli.clipboard import has_clipboard_image, save_clipboard_image + except Exception as e: + return _err(rid, 5027, f"clipboard unavailable: {e}") + + if not has_clipboard_image(): + return _ok(rid, {"attached": False, "message": "No image found in clipboard"}) + + img_dir = _hermes_home / "images" + img_dir.mkdir(parents=True, exist_ok=True) + session["image_counter"] = session.get("image_counter", 0) + 1 + img_path = img_dir / f"clip_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{session['image_counter']}.png" + + if not save_clipboard_image(img_path): + return _ok(rid, {"attached": False, "message": "Clipboard has image but extraction failed"}) + + session.setdefault("attached_images", []).append(str(img_path)) + return _ok(rid, {"attached": True, "path": str(img_path), "count": len(session["attached_images"])}) + + @method("prompt.background") def _(rid, params: dict) -> dict: text, parent = params.get("text", ""), params.get("session_id", "") diff --git a/ui-tui/src/app.tsx b/ui-tui/src/app.tsx index 774e47948..dacfc9518 100644 --- a/ui-tui/src/app.tsx +++ b/ui-tui/src/app.tsx @@ -251,6 +251,11 @@ export function App({ gw }: { gw: GatewayClient }) { }) } + const paste = () => + rpc('clipboard.paste', { session_id: sid }).then((r: any) => + sys(r.attached ? `📎 image #${r.count} attached` : r.message || 'no image in clipboard') + ) + const interpolate = (text: string, then: (result: string) => void) => { setStatus('interpolating…') const matches = [...text.matchAll(new RegExp(INTERPOLATION_RE.source, 'g'))] @@ -387,6 +392,10 @@ export function App({ gw }: { gw: GatewayClient }) { setMessages([]) } + if (key.ctrl && ch === 'v') { + return paste() + } + if (key.escape) { clearIn() } @@ -1091,7 +1100,7 @@ export function App({ gw }: { gw: GatewayClient }) { return true case 'paste': - sys("clipboard paste: use your terminal's paste shortcut (images not yet supported in TUI)") + paste() return true @@ -1211,27 +1220,25 @@ export function App({ gw }: { gw: GatewayClient }) { return true case 'update': - sys('update not available in TUI mode — run: pip install -U hermes-agent') + case 'hermes': { + const argv = name === 'update' ? ['update'] : arg.split(/\s+/).filter(Boolean) - return true - - case 'hermes': - if (!arg) { - sys( - 'usage: /hermes non-interactive `hermes` CLI (e.g. sessions list, chat -q "hi"). Interactive setup/browse/edit must run in a separate terminal.' - ) + if (!argv.length) { + sys('usage: /hermes (e.g. sessions list, chat -q "hi")') return true } - rpc('cli.exec', { argv: arg.split(/\s+/).filter(Boolean) }) + if (name === 'update') { + setBusy(true) + setStatus('updating…') + } + + rpc('cli.exec', { argv, timeout: name === 'update' ? 600 : 240 }) .then((r: any) => { if (r.blocked) { - sys(r.hint ?? 'blocked') - - return + return sys(r.hint ?? 'blocked') } - sys(r.output ?? '(no output)') if (r.code !== 0) { @@ -1239,8 +1246,15 @@ export function App({ gw }: { gw: GatewayClient }) { } }) .catch((e: Error) => sys(`error: ${e.message}`)) + .finally(() => { + if (name === 'update') { + setStatus('ready') + setBusy(false) + } + }) return true + } case 'model': if (!arg) { diff --git a/ui-tui/src/constants.ts b/ui-tui/src/constants.ts index f638b3f43..87c1fdac2 100644 --- a/ui-tui/src/constants.ts +++ b/ui-tui/src/constants.ts @@ -23,6 +23,7 @@ export const HOTKEYS: [string, string][] = [ ['Ctrl+C', 'interrupt / clear / exit'], ['Ctrl+D', 'exit'], ['Ctrl+L', 'clear screen'], + ['Ctrl+V', 'paste clipboard image (same as /paste)'], ['Tab', 'complete /commands (registry-aware)'], ['↑/↓', 'queue edit (if queued) / input history'], ['PgUp/PgDn', 'scroll messages'],