mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-01 07:01:41 +00:00
fix(cli): honour image-routing decision in quiet-mode -q --image path
The interactive CLI input path consults decide_image_input_mode() to pick between native image_url attachment and the vision_analyze text pipeline, but the non-interactive 'hermes chat -Q -q ... --image FOO' path unconditionally called _preprocess_images_with_vision() — so even with `model.supports_vision: true` set, --image always went through the text-pipeline. Symptom: vision_analyze runs 4-5s per image and the model sees a lossy text summary instead of the actual pixels. Mirror the interactive path: load config, call decide_image_input_mode, branch on native vs text. Falls back to the text-pipeline on any import or build error (Pyright-clean: _build_parts guarded with `is not None`). Live E2E (provider=custom, base_url=openrouter, anthropic/claude-haiku-4.5, red 64x64 PNG): baseline (no override): vision_analyze called (8 log lines), 5.8s with supports_vision: vision_analyze NOT called (0 log lines), 3.9s Same model, same image, single knob flips text→native routing.
This commit is contained in:
parent
32aea113f0
commit
975e13091e
1 changed files with 47 additions and 6 deletions
53
cli.py
53
cli.py
|
|
@ -14423,13 +14423,54 @@ def main(
|
|||
# Only print the final response and parseable session info.
|
||||
cli.tool_progress_mode = "off"
|
||||
if cli._ensure_runtime_credentials():
|
||||
effective_query = query
|
||||
effective_query: Any = query
|
||||
if single_query_images:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query,
|
||||
single_query_images,
|
||||
announce=False,
|
||||
)
|
||||
# Honour the same image-routing decision used by the
|
||||
# interactive path. With a vision-capable model (incl.
|
||||
# custom-provider models declared via
|
||||
# `model.supports_vision: true`), attach images natively
|
||||
# as image_url content parts. Otherwise fall back to the
|
||||
# text-pipeline (vision_analyze pre-description).
|
||||
_img_mode = "text"
|
||||
_build_parts = None
|
||||
try:
|
||||
from agent.image_routing import (
|
||||
build_native_content_parts as _build_parts, # noqa: F811
|
||||
)
|
||||
from agent.image_routing import decide_image_input_mode
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
_img_mode = decide_image_input_mode(
|
||||
(cli.provider or "").strip(),
|
||||
(cli.model or "").strip(),
|
||||
load_config(),
|
||||
)
|
||||
except Exception:
|
||||
_img_mode = "text"
|
||||
|
||||
if _img_mode == "native" and _build_parts is not None:
|
||||
try:
|
||||
_parts, _skipped = _build_parts(
|
||||
query if isinstance(query, str) else "",
|
||||
[str(p) for p in single_query_images],
|
||||
)
|
||||
if any(p.get("type") == "image_url" for p in _parts):
|
||||
effective_query = _parts
|
||||
else:
|
||||
# All images unreadable — text fallback.
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query, single_query_images, announce=False,
|
||||
)
|
||||
except Exception:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query, single_query_images, announce=False,
|
||||
)
|
||||
else:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query,
|
||||
single_query_images,
|
||||
announce=False,
|
||||
)
|
||||
turn_route = cli._resolve_turn_agent_config(effective_query)
|
||||
if turn_route["signature"] != cli._active_agent_route_signature:
|
||||
cli.agent = None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue