fix(termux): add local image chat route

This commit is contained in:
adybag14-cyber 2026-04-09 12:09:11 +02:00 committed by Teknium
parent a3aed1bd26
commit 096b3f9f12
6 changed files with 380 additions and 40 deletions

277
cli.py
View file

@ -1008,7 +1008,7 @@ def _cprint(text: str):
# ---------------------------------------------------------------------------
# File-drop detection — extracted as a pure function for testability.
# File-drop / local attachment detection — extracted as pure helpers for tests.
# ---------------------------------------------------------------------------
_IMAGE_EXTENSIONS = frozenset({
@ -1017,12 +1017,91 @@ _IMAGE_EXTENSIONS = frozenset({
})
def _detect_file_drop(user_input: str) -> "dict | None":
"""Detect if *user_input* is a dragged/pasted file path, not a slash command.
def _is_termux_environment() -> bool:
prefix = os.getenv("PREFIX", "")
return bool(os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix)
When a user drags a file into the terminal, macOS pastes the absolute path
(e.g. ``/Users/roland/Desktop/file.png``) which starts with ``/`` and would
otherwise be mistaken for a slash command.
def _split_path_input(raw: str) -> tuple[str, str]:
"""Split a leading file path token from trailing free-form text.
Supports quoted paths and backslash-escaped spaces so callers can accept
inputs like:
/tmp/pic.png describe this
~/storage/shared/My\ Photos/cat.png what is this?
"/storage/emulated/0/DCIM/Camera/cat 1.png" summarize
"""
raw = str(raw or "").strip()
if not raw:
return "", ""
if raw[0] in {'"', "'"}:
quote = raw[0]
pos = 1
while pos < len(raw):
ch = raw[pos]
if ch == '\\' and pos + 1 < len(raw):
pos += 2
continue
if ch == quote:
token = raw[1:pos]
remainder = raw[pos + 1 :].strip()
return token, remainder
pos += 1
return raw[1:], ""
pos = 0
while pos < len(raw):
ch = raw[pos]
if ch == '\\' and pos + 1 < len(raw) and raw[pos + 1] == ' ':
pos += 2
elif ch == ' ':
break
else:
pos += 1
token = raw[:pos].replace('\\ ', ' ')
remainder = raw[pos:].strip()
return token, remainder
def _resolve_attachment_path(raw_path: str) -> Path | None:
"""Resolve a user-supplied local attachment path.
Accepts quoted or unquoted paths, expands ``~`` and env vars, and resolves
relative paths from ``TERMINAL_CWD`` when set (matching terminal tool cwd).
Returns ``None`` when the path does not resolve to an existing file.
"""
token = str(raw_path or "").strip()
if not token:
return None
if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")):
token = token[1:-1].strip()
if not token:
return None
expanded = os.path.expandvars(os.path.expanduser(token))
path = Path(expanded)
if not path.is_absolute():
base_dir = Path(os.getenv("TERMINAL_CWD", os.getcwd()))
path = base_dir / path
try:
resolved = path.resolve()
except Exception:
resolved = path
if not resolved.exists() or not resolved.is_file():
return None
return resolved
def _detect_file_drop(user_input: str) -> "dict | None":
"""Detect if *user_input* starts with a real local file path.
This catches dragged/pasted paths before they are mistaken for slash
commands, and also supports Termux-friendly paths like ``~/storage/...``.
Returns a dict on match::
@ -1034,29 +1113,31 @@ def _detect_file_drop(user_input: str) -> "dict | None":
Returns ``None`` when the input is not a real file path.
"""
if not isinstance(user_input, str) or not user_input.startswith("/"):
if not isinstance(user_input, str):
return None
# Walk the string absorbing backslash-escaped spaces ("\ ").
raw = user_input
pos = 0
while pos < len(raw):
ch = raw[pos]
if ch == '\\' and pos + 1 < len(raw) and raw[pos + 1] == ' ':
pos += 2 # skip escaped space
elif ch == ' ':
break
else:
pos += 1
first_token_raw = raw[:pos]
first_token = first_token_raw.replace('\\ ', ' ')
drop_path = Path(first_token)
if not drop_path.exists() or not drop_path.is_file():
stripped = user_input.strip()
if not stripped:
return None
starts_like_path = (
stripped.startswith("/")
or stripped.startswith("~")
or stripped.startswith("./")
or stripped.startswith("../")
or stripped.startswith('"/')
or stripped.startswith('"~')
or stripped.startswith("'/")
or stripped.startswith("'~")
)
if not starts_like_path:
return None
first_token, remainder = _split_path_input(stripped)
drop_path = _resolve_attachment_path(first_token)
if drop_path is None:
return None
remainder = raw[pos:].strip()
return {
"path": drop_path,
"is_image": drop_path.suffix.lower() in _IMAGE_EXTENSIONS,
@ -1064,6 +1145,69 @@ def _detect_file_drop(user_input: str) -> "dict | None":
}
def _format_image_attachment_badges(attached_images: list[Path], image_counter: int, width: int | None = None) -> str:
"""Format the attached-image badge row for the interactive CLI.
Narrow terminals such as Termux should get a compact summary that fits on a
single row, while wider terminals can show the classic per-image badges.
"""
if not attached_images:
return ""
width = width or shutil.get_terminal_size((80, 24)).columns
def _trunc(name: str, limit: int) -> str:
return name if len(name) <= limit else name[: max(1, limit - 3)] + "..."
if width < 52:
if len(attached_images) == 1:
return f"[📎 {_trunc(attached_images[0].name, 20)}]"
return f"[📎 {len(attached_images)} images attached]"
if width < 80:
if len(attached_images) == 1:
return f"[📎 {_trunc(attached_images[0].name, 32)}]"
first = _trunc(attached_images[0].name, 20)
extra = len(attached_images) - 1
return f"[📎 {first}] [+{extra}]"
base = image_counter - len(attached_images) + 1
return " ".join(
f"[📎 Image #{base + i}]"
for i in range(len(attached_images))
)
def _collect_query_images(query: str | None, image_arg: str | None = None) -> tuple[str, list[Path]]:
"""Collect local image attachments for single-query CLI flows."""
message = query or ""
images: list[Path] = []
if isinstance(message, str):
dropped = _detect_file_drop(message)
if dropped and dropped.get("is_image"):
images.append(dropped["path"])
message = dropped["remainder"] or f"[User attached image: {dropped['path'].name}]"
if image_arg:
explicit_path = _resolve_attachment_path(image_arg)
if explicit_path is None:
raise ValueError(f"Image file not found: {image_arg}")
if explicit_path.suffix.lower() not in _IMAGE_EXTENSIONS:
raise ValueError(f"Not a supported image file: {explicit_path}")
images.append(explicit_path)
deduped: list[Path] = []
seen: set[str] = set()
for img in images:
key = str(img)
if key in seen:
continue
seen.add(key)
deduped.append(img)
return message, deduped
class ChatConsole:
"""Rich Console adapter for prompt_toolkit's patch_stdout context.
@ -2946,6 +3090,14 @@ class HermesCLI:
doesn't fire for image-only clipboard content (e.g., VSCode terminal,
Windows Terminal with WSL2).
"""
if _is_termux_environment():
_cprint(
f" {_DIM}Clipboard image paste is not available on Termux — "
f"use /image <path> or paste a local image path like "
f"~/storage/shared/Pictures/cat.png{_RST}"
)
return
from hermes_cli.clipboard import has_clipboard_image
if has_clipboard_image():
if self._try_attach_clipboard_image():
@ -2956,7 +3108,31 @@ class HermesCLI:
else:
_cprint(f" {_DIM}(._.) No image found in clipboard{_RST}")
def _preprocess_images_with_vision(self, text: str, images: list) -> str:
def _handle_image_command(self, cmd_original: str):
"""Handle /image <path> — attach a local image file for the next prompt."""
raw_args = (cmd_original.split(None, 1)[1].strip() if " " in cmd_original else "")
if not raw_args:
hint = "~/storage/shared/Pictures/cat.png" if _is_termux_environment() else "/path/to/image.png"
_cprint(f" {_DIM}Usage: /image <path> e.g. /image {hint}{_RST}")
return
path_token, _remainder = _split_path_input(raw_args)
image_path = _resolve_attachment_path(path_token)
if image_path is None:
_cprint(f" {_DIM}(>_<) File not found: {path_token}{_RST}")
return
if image_path.suffix.lower() not in _IMAGE_EXTENSIONS:
_cprint(f" {_DIM}(._.) Not a supported image file: {image_path.name}{_RST}")
return
self._attached_images.append(image_path)
_cprint(f" 📎 Attached image: {image_path.name}")
if _remainder:
_cprint(f" {_DIM}Now type your prompt (or use --image in single-query mode): {_remainder}{_RST}")
elif _is_termux_environment():
_cprint(f" {_DIM}Tip: type your next message, or run hermes chat -q --image {image_path} \"What do you see?\"{_RST}")
def _preprocess_images_with_vision(self, text: str, images: list, *, announce: bool = True) -> str:
"""Analyze attached images via the vision tool and return enriched text.
Instead of embedding raw base64 ``image_url`` content parts in the
@ -2983,7 +3159,8 @@ class HermesCLI:
if not img_path.exists():
continue
size_kb = img_path.stat().st_size // 1024
_cprint(f" {_DIM}👁️ analyzing {img_path.name} ({size_kb}KB)...{_RST}")
if announce:
_cprint(f" {_DIM}👁️ analyzing {img_path.name} ({size_kb}KB)...{_RST}")
try:
result_json = _asyncio.run(
vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt)
@ -2996,21 +3173,24 @@ class HermesCLI:
f"[If you need a closer look, use vision_analyze with "
f"image_url: {img_path}]"
)
_cprint(f" {_DIM}✓ image analyzed{_RST}")
if announce:
_cprint(f" {_DIM}✓ image analyzed{_RST}")
else:
enriched_parts.append(
f"[The user attached an image but it couldn't be analyzed. "
f"You can try examining it with vision_analyze using "
f"image_url: {img_path}]"
)
_cprint(f" {_DIM}⚠ vision analysis failed — path included for retry{_RST}")
if announce:
_cprint(f" {_DIM}⚠ vision analysis failed — path included for retry{_RST}")
except Exception as e:
enriched_parts.append(
f"[The user attached an image but analysis failed ({e}). "
f"You can try examining it with vision_analyze using "
f"image_url: {img_path}]"
)
_cprint(f" {_DIM}⚠ vision analysis error — path included for retry{_RST}")
if announce:
_cprint(f" {_DIM}⚠ vision analysis error — path included for retry{_RST}")
# Combine: vision descriptions first, then the user's original text
user_text = text if isinstance(text, str) and text else ""
@ -3104,7 +3284,10 @@ class HermesCLI:
_cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
_cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}")
_cprint(f" {_DIM}Paste image: Alt+V (or /paste){_RST}\n")
if _is_termux_environment():
_cprint(f" {_DIM}Attach image: /image ~/storage/shared/Pictures/cat.png or start your prompt with a local image path{_RST}\n")
else:
_cprint(f" {_DIM}Paste image: Alt+V (or /paste){_RST}\n")
def show_tools(self):
"""Display available tools with kawaii ASCII art."""
@ -4550,6 +4733,8 @@ class HermesCLI:
self._show_insights(cmd_original)
elif canonical == "paste":
self._handle_paste_command()
elif canonical == "image":
self._handle_image_command(cmd_original)
elif canonical == "reload-mcp":
with self._busy_command(self._slow_command_status(cmd_original)):
self._reload_mcp()
@ -8066,10 +8251,9 @@ class HermesCLI:
def _get_image_bar():
if not cli_ref._attached_images:
return []
base = cli_ref._image_counter - len(cli_ref._attached_images) + 1
badges = " ".join(
f"[📎 Image #{base + i}]"
for i in range(len(cli_ref._attached_images))
badges = _format_image_attachment_badges(
cli_ref._attached_images,
cli_ref._image_counter,
)
return [("class:image-badge", f" {badges} ")]
@ -8542,6 +8726,7 @@ class HermesCLI:
def main(
query: str = None,
q: str = None,
image: str = None,
toolsets: str = None,
skills: str | list[str] | tuple[str, ...] = None,
model: str = None,
@ -8567,6 +8752,7 @@ def main(
Args:
query: Single query to execute (then exit). Alias: -q
q: Shorthand for --query
image: Optional local image path to attach to a single query
toolsets: Comma-separated list of toolsets to enable (e.g., "web,terminal")
skills: Comma-separated or repeated list of skills to preload for the session
model: Model to use (default: anthropic/claude-opus-4-20250514)
@ -8587,6 +8773,7 @@ def main(
python cli.py --toolsets web,terminal # Use specific toolsets
python cli.py --skills hermes-agent-dev,github-auth
python cli.py -q "What is Python?" # Single query mode
python cli.py -q "Describe this" --image ~/storage/shared/Pictures/cat.png
python cli.py --list-tools # List tools and exit
python cli.py --resume 20260225_143052_a1b2c3 # Resume session
python cli.py -w # Start in isolated git worktree
@ -8709,13 +8896,21 @@ def main(
atexit.register(_run_cleanup)
# Handle single query mode
if query:
if query or image:
query, single_query_images = _collect_query_images(query, image)
if quiet:
# Quiet mode: suppress banner, spinner, tool previews.
# Only print the final response and parseable session info.
cli.tool_progress_mode = "off"
if cli._ensure_runtime_credentials():
turn_route = cli._resolve_turn_agent_config(query)
effective_query = query
if single_query_images:
effective_query = cli._preprocess_images_with_vision(
query,
single_query_images,
announce=False,
)
turn_route = cli._resolve_turn_agent_config(effective_query)
if turn_route["signature"] != cli._active_agent_route_signature:
cli.agent = None
if cli._init_agent(
@ -8726,7 +8921,7 @@ def main(
cli.agent.quiet_mode = True
cli.agent.suppress_status_output = True
result = cli.agent.run_conversation(
user_message=query,
user_message=effective_query,
conversation_history=cli.conversation_history,
)
response = result.get("final_response", "") if isinstance(result, dict) else str(result)
@ -8741,8 +8936,10 @@ def main(
sys.exit(1)
else:
cli.show_banner()
cli.console.print(f"[bold blue]Query:[/] {query}")
cli.chat(query)
_query_label = query or ("[image attached]" if single_query_images else "")
if _query_label:
cli.console.print(f"[bold blue]Query:[/] {_query_label}")
cli.chat(query, images=single_query_images or None)
cli._print_exit_summary()
return

View file

@ -135,6 +135,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
cli_only=True, aliases=("gateway",)),
CommandDef("paste", "Check clipboard for an image and attach it", "Info",
cli_only=True),
CommandDef("image", "Attach a local image file for your next prompt", "Info",
cli_only=True, args_hint="<path>"),
CommandDef("update", "Update Hermes Agent to the latest version", "Info",
gateway_only=True),

View file

@ -646,6 +646,7 @@ def cmd_chat(args):
"verbose": args.verbose,
"quiet": getattr(args, "quiet", False),
"query": args.query,
"image": getattr(args, "image", None),
"resume": getattr(args, "resume", None),
"worktree": getattr(args, "worktree", False),
"checkpoints": getattr(args, "checkpoints", False),
@ -4291,6 +4292,10 @@ For more help on a command:
"-q", "--query",
help="Single query (non-interactive mode)"
)
chat_parser.add_argument(
"--image",
help="Optional local image path to attach to a single query"
)
chat_parser.add_argument(
"-m", "--model",
help="Model to use (e.g., anthropic/claude-sonnet-4)"

View file

@ -147,6 +147,20 @@ class TestEscapedSpaces:
assert result["path"] == tmp_image_with_spaces
assert result["remainder"] == "what is this?"
def test_tilde_prefixed_path(self, tmp_path, monkeypatch):
home = tmp_path / "home"
img = home / "storage" / "shared" / "Pictures" / "cat.png"
img.parent.mkdir(parents=True, exist_ok=True)
img.write_bytes(b"\x89PNG\r\n\x1a\n")
monkeypatch.setenv("HOME", str(home))
result = _detect_file_drop("~/storage/shared/Pictures/cat.png what is this?")
assert result is not None
assert result["path"] == img
assert result["is_image"] is True
assert result["remainder"] == "what is this?"
# ---------------------------------------------------------------------------
# Tests: edge cases

View file

@ -0,0 +1,98 @@
from pathlib import Path
from unittest.mock import patch
from cli import (
HermesCLI,
_collect_query_images,
_format_image_attachment_badges,
)
def _make_cli():
cli_obj = HermesCLI.__new__(HermesCLI)
cli_obj._attached_images = []
return cli_obj
def _make_image(path: Path) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(b"\x89PNG\r\n\x1a\n")
return path
class TestImageCommand:
def test_handle_image_command_attaches_local_image(self, tmp_path):
img = _make_image(tmp_path / "photo.png")
cli_obj = _make_cli()
with patch("cli._cprint"):
cli_obj._handle_image_command(f"/image {img}")
assert cli_obj._attached_images == [img]
def test_handle_image_command_supports_quoted_path_with_spaces(self, tmp_path):
img = _make_image(tmp_path / "my photo.png")
cli_obj = _make_cli()
with patch("cli._cprint"):
cli_obj._handle_image_command(f'/image "{img}"')
assert cli_obj._attached_images == [img]
def test_handle_image_command_rejects_non_image_file(self, tmp_path):
file_path = tmp_path / "notes.txt"
file_path.write_text("hello\n", encoding="utf-8")
cli_obj = _make_cli()
with patch("cli._cprint") as mock_print:
cli_obj._handle_image_command(f"/image {file_path}")
assert cli_obj._attached_images == []
rendered = " ".join(str(arg) for call in mock_print.call_args_list for arg in call.args)
assert "Not a supported image file" in rendered
class TestCollectQueryImages:
def test_collect_query_images_accepts_explicit_image_arg(self, tmp_path):
img = _make_image(tmp_path / "diagram.png")
message, images = _collect_query_images("describe this", str(img))
assert message == "describe this"
assert images == [img]
def test_collect_query_images_extracts_leading_path(self, tmp_path):
img = _make_image(tmp_path / "camera.png")
message, images = _collect_query_images(f"{img} what do you see?")
assert message == "what do you see?"
assert images == [img]
def test_collect_query_images_supports_tilde_paths(self, tmp_path, monkeypatch):
home = tmp_path / "home"
img = _make_image(home / "storage" / "shared" / "Pictures" / "cat.png")
monkeypatch.setenv("HOME", str(home))
message, images = _collect_query_images("describe this", "~/storage/shared/Pictures/cat.png")
assert message == "describe this"
assert images == [img]
class TestImageBadgeFormatting:
def test_compact_badges_use_filename_on_narrow_terminals(self, tmp_path):
img = _make_image(tmp_path / "Screenshot 2026-04-09 at 11.22.33 AM.png")
badges = _format_image_attachment_badges([img], image_counter=1, width=40)
assert badges.startswith("[📎 ")
assert "Image #1" not in badges
def test_compact_badges_summarize_multiple_images(self, tmp_path):
img1 = _make_image(tmp_path / "one.png")
img2 = _make_image(tmp_path / "two.png")
badges = _format_image_attachment_badges([img1, img2], image_counter=2, width=45)
assert badges == "[📎 2 images attached]"

View file

@ -49,6 +49,30 @@ def test_chat_subcommand_accepts_skills_flag(monkeypatch):
}
def test_chat_subcommand_accepts_image_flag(monkeypatch):
import hermes_cli.main as main_mod
captured = {}
def fake_cmd_chat(args):
captured["query"] = args.query
captured["image"] = args.image
monkeypatch.setattr(main_mod, "cmd_chat", fake_cmd_chat)
monkeypatch.setattr(
sys,
"argv",
["hermes", "chat", "-q", "hello", "--image", "~/storage/shared/Pictures/cat.png"],
)
main_mod.main()
assert captured == {
"query": "hello",
"image": "~/storage/shared/Pictures/cat.png",
}
def test_continue_worktree_and_skills_flags_work_together(monkeypatch):
import hermes_cli.main as main_mod