From 975e13091e9562011254fba1dbc3b4245589bb74 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 20 May 2026 23:05:46 -0700
Subject: [PATCH] fix(cli): honour image-routing decision in quiet-mode -q
 --image path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The interactive CLI input path consults decide_image_input_mode() to pick
between native image_url attachment and the vision_analyze text pipeline,
but the non-interactive 'hermes chat -Q -q ... --image FOO' path
unconditionally called _preprocess_images_with_vision() — so even with
`model.supports_vision: true` set, --image always went through the
text-pipeline. Symptom: vision_analyze runs 4-5s per image and the model
sees a lossy text summary instead of the actual pixels.

Mirror the interactive path: load config, call decide_image_input_mode,
branch on native vs text. Falls back to the text-pipeline on any import
or build error (Pyright-clean: _build_parts guarded with `is not None`).

Live E2E (provider=custom, base_url=openrouter, anthropic/claude-haiku-4.5,
red 64x64 PNG):
  baseline (no override): vision_analyze called (8 log lines), 5.8s
  with supports_vision:   vision_analyze NOT called (0 log lines),  3.9s
Same model, same image, single knob flips text→native routing.
---
 cli.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 47 insertions(+), 6 deletions(-)

diff --git a/cli.py b/cli.py
index 9e1b0a628e5..2783ca31bf2 100644
--- a/cli.py
+++ b/cli.py
@@ -14423,13 +14423,54 @@ def main(
             # Only print the final response and parseable session info.
             cli.tool_progress_mode = "off"
             if cli._ensure_runtime_credentials():
-                effective_query = query
+                effective_query: Any = query
                 if single_query_images:
-                    effective_query = cli._preprocess_images_with_vision(
-                        query,
-                        single_query_images,
-                        announce=False,
-                    )
+                    # Honour the same image-routing decision used by the
+                    # interactive path. With a vision-capable model (incl.
+                    # custom-provider models declared via
+                    # `model.supports_vision: true`), attach images natively
+                    # as image_url content parts. Otherwise fall back to the
+                    # text-pipeline (vision_analyze pre-description).
+                    _img_mode = "text"
+                    _build_parts = None
+                    try:
+                        from agent.image_routing import (
+                            build_native_content_parts as _build_parts,  # noqa: F811
+                        )
+                        from agent.image_routing import decide_image_input_mode
+                        from hermes_cli.config import load_config
+
+                        _img_mode = decide_image_input_mode(
+                            (cli.provider or "").strip(),
+                            (cli.model or "").strip(),
+                            load_config(),
+                        )
+                    except Exception:
+                        _img_mode = "text"
+
+                    if _img_mode == "native" and _build_parts is not None:
+                        try:
+                            _parts, _skipped = _build_parts(
+                                query if isinstance(query, str) else "",
+                                [str(p) for p in single_query_images],
+                            )
+                            if any(p.get("type") == "image_url" for p in _parts):
+                                effective_query = _parts
+                            else:
+                                # All images unreadable — text fallback.
+                                effective_query = cli._preprocess_images_with_vision(
+                                    query, single_query_images, announce=False,
+                                )
+                        except Exception:
+                            effective_query = cli._preprocess_images_with_vision(
+                                query, single_query_images, announce=False,
+                            )
+                    else:
+                        effective_query = cli._preprocess_images_with_vision(
+                            query,
+                            single_query_images,
+                            announce=False,
+                        )
                 turn_route = cli._resolve_turn_agent_config(effective_query)
                 if turn_route["signature"] != cli._active_agent_route_signature:
                     cli.agent = None