fix(cli): honour image-routing decision in quiet-mode -q --image path

The interactive CLI input path consults decide_image_input_mode() to pick between native image_url attachment and the vision_analyze text pipeline, but the non-interactive 'hermes chat -Q -q ... --image FOO' path unconditionally called _preprocess_images_with_vision() — so even with `model.supports_vision: true` set, --image always went through the text-pipeline. Symptom: vision_analyze runs 4-5s per image and the model sees a lossy text summary instead of the actual pixels. Mirror the interactive path: load config, call decide_image_input_mode, branch on native vs text. Falls back to the text-pipeline on any import or build error (Pyright-clean: _build_parts guarded with `is not None`). Live E2E (provider=custom, base_url=openrouter, anthropic/claude-haiku-4.5, red 64x64 PNG): baseline (no override): vision_analyze called (8 log lines), 5.8s with supports_vision: vision_analyze NOT called (0 log lines), 3.9s Same model, same image, single knob flips text→native routing.
2026-07-16 14:32:34 +00:00 · 2026-05-20 23:05:46 -07:00 · 2026-05-20 23:05:46 -07:00 · 975e13091e
commit 975e13091e
parent 32aea113f0
1 changed files with 47 additions and 6 deletions
--- a/cli.py
+++ b/cli.py
@ -14423,13 +14423,54 @@ def main(
            # Only print the final response and parseable session info.
            cli.tool_progress_mode = "off"
            if cli._ensure_runtime_credentials():
-                effective_query = query
+                effective_query: Any = query
                if single_query_images:
-                    effective_query = cli._preprocess_images_with_vision(
-                        query,
-                        single_query_images,
-                        announce=False,
-                    )
+                    # Honour the same image-routing decision used by the
+                    # interactive path. With a vision-capable model (incl.
+                    # custom-provider models declared via
+                    # `model.supports_vision: true`), attach images natively
+                    # as image_url content parts. Otherwise fall back to the
+                    # text-pipeline (vision_analyze pre-description).
+                    _img_mode = "text"
+                    _build_parts = None
+                    try:
+                        from agent.image_routing import (
+                            build_native_content_parts as _build_parts,  # noqa: F811
+                        )
+                        from agent.image_routing import decide_image_input_mode
+                        from hermes_cli.config import load_config
+
+                        _img_mode = decide_image_input_mode(
+                            (cli.provider or "").strip(),
+                            (cli.model or "").strip(),
+                            load_config(),
+                        )
+                    except Exception:
+                        _img_mode = "text"
+
+                    if _img_mode == "native" and _build_parts is not None:
+                        try:
+                            _parts, _skipped = _build_parts(
+                                query if isinstance(query, str) else "",
+                                [str(p) for p in single_query_images],
+                            )
+                            if any(p.get("type") == "image_url" for p in _parts):
+                                effective_query = _parts
+                            else:
+                                # All images unreadable — text fallback.
+                                effective_query = cli._preprocess_images_with_vision(
+                                    query, single_query_images, announce=False,
+                                )
+                        except Exception:
+                            effective_query = cli._preprocess_images_with_vision(
+                                query, single_query_images, announce=False,
+                            )
+                    else:
+                        effective_query = cli._preprocess_images_with_vision(
+                            query,
+                            single_query_images,
+                            announce=False,
+                        )
                turn_route = cli._resolve_turn_agent_config(effective_query)
                if turn_route["signature"] != cli._active_agent_route_signature:
                    cli.agent = None