mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: add image pasting capability
This commit is contained in:
parent
5a5d90c85a
commit
2893e9df71
3 changed files with 97 additions and 16 deletions
|
|
@ -195,7 +195,13 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):
|
|||
|
||||
|
||||
def _init_session(sid: str, key: str, agent, history: list):
|
||||
_sessions[sid] = {"agent": agent, "session_key": key, "history": history}
|
||||
_sessions[sid] = {
|
||||
"agent": agent,
|
||||
"session_key": key,
|
||||
"history": history,
|
||||
"attached_images": [],
|
||||
"image_counter": 0,
|
||||
}
|
||||
try:
|
||||
from tools.approval import register_gateway_notify, load_permanent_allowlist
|
||||
register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
|
||||
|
|
@ -210,6 +216,38 @@ def _with_checkpoints(session, fn):
|
|||
return fn(session["agent"]._checkpoint_mgr, os.getenv("TERMINAL_CWD", os.getcwd()))
|
||||
|
||||
|
||||
def _enrich_with_attached_images(user_text: str, image_paths: list[str]) -> str:
|
||||
"""Pre-analyze attached images via vision and prepend descriptions to user text."""
|
||||
import asyncio, json as _json
|
||||
from tools.vision_tools import vision_analyze_tool
|
||||
|
||||
prompt = (
|
||||
"Describe everything visible in this image in thorough detail. "
|
||||
"Include any text, code, data, objects, people, layout, colors, "
|
||||
"and any other notable visual information."
|
||||
)
|
||||
|
||||
parts: list[str] = []
|
||||
for path in image_paths:
|
||||
p = Path(path)
|
||||
if not p.exists():
|
||||
continue
|
||||
hint = f"[You can examine it with vision_analyze using image_url: {p}]"
|
||||
try:
|
||||
r = _json.loads(asyncio.run(vision_analyze_tool(image_url=str(p), user_prompt=prompt)))
|
||||
desc = r.get("analysis", "") if r.get("success") else None
|
||||
parts.append(f"[The user attached an image:\n{desc}]\n{hint}" if desc
|
||||
else f"[The user attached an image but analysis failed.]\n{hint}")
|
||||
except Exception:
|
||||
parts.append(f"[The user attached an image but analysis failed.]\n{hint}")
|
||||
|
||||
text = user_text or ""
|
||||
prefix = "\n\n".join(parts)
|
||||
if prefix:
|
||||
return f"{prefix}\n\n{text}" if text else prefix
|
||||
return text or "What do you see in this image?"
|
||||
|
||||
|
||||
# ── Methods: session ─────────────────────────────────────────────────
|
||||
|
||||
@method("session.create")
|
||||
|
|
@ -367,8 +405,10 @@ def _(rid, params: dict) -> dict:
|
|||
|
||||
def run():
|
||||
try:
|
||||
images = session.pop("attached_images", [])
|
||||
prompt = _enrich_with_attached_images(text, images) if images else text
|
||||
result = agent.run_conversation(
|
||||
text, conversation_history=list(history),
|
||||
prompt, conversation_history=list(history),
|
||||
stream_callback=lambda delta: _emit("message.delta", sid, {"text": delta}),
|
||||
)
|
||||
if isinstance(result, dict):
|
||||
|
|
@ -385,6 +425,32 @@ def _(rid, params: dict) -> dict:
|
|||
return _ok(rid, {"status": "streaming"})
|
||||
|
||||
|
||||
@method("clipboard.paste")
|
||||
def _(rid, params: dict) -> dict:
|
||||
session, err = _sess(params, rid)
|
||||
if err:
|
||||
return err
|
||||
try:
|
||||
from datetime import datetime
|
||||
from hermes_cli.clipboard import has_clipboard_image, save_clipboard_image
|
||||
except Exception as e:
|
||||
return _err(rid, 5027, f"clipboard unavailable: {e}")
|
||||
|
||||
if not has_clipboard_image():
|
||||
return _ok(rid, {"attached": False, "message": "No image found in clipboard"})
|
||||
|
||||
img_dir = _hermes_home / "images"
|
||||
img_dir.mkdir(parents=True, exist_ok=True)
|
||||
session["image_counter"] = session.get("image_counter", 0) + 1
|
||||
img_path = img_dir / f"clip_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{session['image_counter']}.png"
|
||||
|
||||
if not save_clipboard_image(img_path):
|
||||
return _ok(rid, {"attached": False, "message": "Clipboard has image but extraction failed"})
|
||||
|
||||
session.setdefault("attached_images", []).append(str(img_path))
|
||||
return _ok(rid, {"attached": True, "path": str(img_path), "count": len(session["attached_images"])})
|
||||
|
||||
|
||||
@method("prompt.background")
|
||||
def _(rid, params: dict) -> dict:
|
||||
text, parent = params.get("text", ""), params.get("session_id", "")
|
||||
|
|
|
|||
|
|
@ -251,6 +251,11 @@ export function App({ gw }: { gw: GatewayClient }) {
|
|||
})
|
||||
}
|
||||
|
||||
const paste = () =>
|
||||
rpc('clipboard.paste', { session_id: sid }).then((r: any) =>
|
||||
sys(r.attached ? `📎 image #${r.count} attached` : r.message || 'no image in clipboard')
|
||||
)
|
||||
|
||||
const interpolate = (text: string, then: (result: string) => void) => {
|
||||
setStatus('interpolating…')
|
||||
const matches = [...text.matchAll(new RegExp(INTERPOLATION_RE.source, 'g'))]
|
||||
|
|
@ -387,6 +392,10 @@ export function App({ gw }: { gw: GatewayClient }) {
|
|||
setMessages([])
|
||||
}
|
||||
|
||||
if (key.ctrl && ch === 'v') {
|
||||
return paste()
|
||||
}
|
||||
|
||||
if (key.escape) {
|
||||
clearIn()
|
||||
}
|
||||
|
|
@ -1091,7 +1100,7 @@ export function App({ gw }: { gw: GatewayClient }) {
|
|||
return true
|
||||
|
||||
case 'paste':
|
||||
sys("clipboard paste: use your terminal's paste shortcut (images not yet supported in TUI)")
|
||||
paste()
|
||||
|
||||
return true
|
||||
|
||||
|
|
@ -1211,27 +1220,25 @@ export function App({ gw }: { gw: GatewayClient }) {
|
|||
return true
|
||||
|
||||
case 'update':
|
||||
sys('update not available in TUI mode — run: pip install -U hermes-agent')
|
||||
case 'hermes': {
|
||||
const argv = name === 'update' ? ['update'] : arg.split(/\s+/).filter(Boolean)
|
||||
|
||||
return true
|
||||
|
||||
case 'hermes':
|
||||
if (!arg) {
|
||||
sys(
|
||||
'usage: /hermes <args…> non-interactive `hermes` CLI (e.g. sessions list, chat -q "hi"). Interactive setup/browse/edit must run in a separate terminal.'
|
||||
)
|
||||
if (!argv.length) {
|
||||
sys('usage: /hermes <args…> (e.g. sessions list, chat -q "hi")')
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
rpc('cli.exec', { argv: arg.split(/\s+/).filter(Boolean) })
|
||||
if (name === 'update') {
|
||||
setBusy(true)
|
||||
setStatus('updating…')
|
||||
}
|
||||
|
||||
rpc('cli.exec', { argv, timeout: name === 'update' ? 600 : 240 })
|
||||
.then((r: any) => {
|
||||
if (r.blocked) {
|
||||
sys(r.hint ?? 'blocked')
|
||||
|
||||
return
|
||||
return sys(r.hint ?? 'blocked')
|
||||
}
|
||||
|
||||
sys(r.output ?? '(no output)')
|
||||
|
||||
if (r.code !== 0) {
|
||||
|
|
@ -1239,8 +1246,15 @@ export function App({ gw }: { gw: GatewayClient }) {
|
|||
}
|
||||
})
|
||||
.catch((e: Error) => sys(`error: ${e.message}`))
|
||||
.finally(() => {
|
||||
if (name === 'update') {
|
||||
setStatus('ready')
|
||||
setBusy(false)
|
||||
}
|
||||
})
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
case 'model':
|
||||
if (!arg) {
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ export const HOTKEYS: [string, string][] = [
|
|||
['Ctrl+C', 'interrupt / clear / exit'],
|
||||
['Ctrl+D', 'exit'],
|
||||
['Ctrl+L', 'clear screen'],
|
||||
['Ctrl+V', 'paste clipboard image (same as /paste)'],
|
||||
['Tab', 'complete /commands (registry-aware)'],
|
||||
['↑/↓', 'queue edit (if queued) / input history'],
|
||||
['PgUp/PgDn', 'scroll messages'],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue