mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
fix(vision): detect Ollama vision models via /api/show (#54511)
When local Ollama models are absent from models.dev, probe the Ollama server's /api/show capabilities so attached images are routed natively instead of being stripped as non-vision input.
This commit is contained in:
parent
b481348fbc
commit
d7e573e54d
2 changed files with 144 additions and 4 deletions
|
|
@ -251,6 +251,78 @@ def _supports_vision_override(
|
|||
return None
|
||||
|
||||
|
||||
def _resolve_inference_base_url(
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
provider: str,
|
||||
) -> str:
|
||||
"""Best-effort base URL for the active inference provider."""
|
||||
try:
|
||||
from agent.auxiliary_client import _RUNTIME_MAIN_BASE_URL
|
||||
|
||||
runtime = str(_RUNTIME_MAIN_BASE_URL or "").strip()
|
||||
if runtime:
|
||||
return runtime
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not isinstance(cfg, dict):
|
||||
return ""
|
||||
|
||||
model_cfg_raw = cfg.get("model")
|
||||
model_cfg: Dict[str, Any] = model_cfg_raw if isinstance(model_cfg_raw, dict) else {}
|
||||
base_url = str(model_cfg.get("base_url") or "").strip()
|
||||
if base_url:
|
||||
return base_url
|
||||
|
||||
config_provider = str(model_cfg.get("provider") or "").strip()
|
||||
candidate_names: set[str] = set()
|
||||
for p in filter(None, (provider, config_provider)):
|
||||
candidate_names.add(p)
|
||||
if p.lower().startswith("custom:"):
|
||||
candidate_names.add(p.split(":", 1)[1])
|
||||
else:
|
||||
candidate_names.add(f"custom:{p}")
|
||||
|
||||
providers_cfg = cfg.get("providers")
|
||||
if isinstance(providers_cfg, dict):
|
||||
for name in candidate_names:
|
||||
entry = providers_cfg.get(name)
|
||||
if isinstance(entry, dict):
|
||||
bu = str(entry.get("base_url") or "").strip()
|
||||
if bu:
|
||||
return bu
|
||||
|
||||
custom_providers = cfg.get("custom_providers")
|
||||
if isinstance(custom_providers, list):
|
||||
lowered = {n.lower() for n in candidate_names}
|
||||
for entry_raw in custom_providers:
|
||||
if not isinstance(entry_raw, dict):
|
||||
continue
|
||||
entry_name = str(entry_raw.get("name") or "").strip()
|
||||
if entry_name not in candidate_names and entry_name.lower() not in lowered:
|
||||
continue
|
||||
bu = str(entry_raw.get("base_url") or "").strip()
|
||||
if bu:
|
||||
return bu
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def _should_probe_ollama_vision(provider: str, base_url: str) -> bool:
|
||||
"""True when the active provider likely fronts a local Ollama server."""
|
||||
p = (provider or "").strip().lower()
|
||||
if p == "ollama":
|
||||
return True
|
||||
if not base_url:
|
||||
return False
|
||||
try:
|
||||
from agent.model_metadata import detect_local_server_type
|
||||
|
||||
return detect_local_server_type(base_url) == "ollama"
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _coerce_mode(raw: Any) -> str:
|
||||
"""Normalize a config value into one of the valid modes."""
|
||||
if not isinstance(raw, str):
|
||||
|
|
@ -302,15 +374,33 @@ def _lookup_supports_vision(
|
|||
return override
|
||||
if not provider or not model:
|
||||
return None
|
||||
caps = None
|
||||
try:
|
||||
from agent.models_dev import get_model_capabilities
|
||||
caps = get_model_capabilities(provider, model)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
|
||||
return None
|
||||
if caps is None:
|
||||
return None
|
||||
return bool(caps.supports_vision)
|
||||
if caps is not None:
|
||||
return bool(caps.supports_vision)
|
||||
|
||||
base_url = _resolve_inference_base_url(cfg, provider)
|
||||
if not base_url and (provider or "").strip().lower() == "ollama":
|
||||
base_url = "http://localhost:11434/v1"
|
||||
if _should_probe_ollama_vision(provider, base_url):
|
||||
try:
|
||||
from agent.model_metadata import query_ollama_supports_vision
|
||||
|
||||
ollama_vision = query_ollama_supports_vision(model, base_url)
|
||||
if ollama_vision is not None:
|
||||
return ollama_vision
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug(
|
||||
"image_routing: ollama vision probe failed for %s:%s — %s",
|
||||
provider,
|
||||
model,
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def decide_image_input_mode(
|
||||
|
|
|
|||
|
|
@ -1199,6 +1199,56 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
|
|||
return None
|
||||
|
||||
|
||||
def query_ollama_supports_vision(model: str, base_url: str, api_key: str = "") -> Optional[bool]:
|
||||
"""Return True/False when Ollama ``/api/show`` reports vision support.
|
||||
|
||||
Uses the ``capabilities`` field on Ollama 0.6.0+ and falls back to
|
||||
``model_info.*.vision.block_count`` on older servers. Returns None when
|
||||
the server is unreachable, not Ollama, or the model is unknown.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
bare_model = _strip_provider_prefix(model)
|
||||
if not bare_model or not base_url:
|
||||
return None
|
||||
|
||||
try:
|
||||
if detect_local_server_type(base_url, api_key=api_key) != "ollama":
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
server_url = base_url.rstrip("/")
|
||||
if server_url.endswith("/v1"):
|
||||
server_url = server_url[:-3]
|
||||
|
||||
headers = _auth_headers(api_key)
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=3.0, headers=headers) as client:
|
||||
resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
data = resp.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
caps = data.get("capabilities")
|
||||
if isinstance(caps, list):
|
||||
if any(str(cap).lower() == "vision" for cap in caps):
|
||||
return True
|
||||
if caps:
|
||||
return False
|
||||
|
||||
model_info = data.get("model_info")
|
||||
if isinstance(model_info, dict):
|
||||
for key in model_info:
|
||||
if "vision.block_count" in str(key).lower():
|
||||
return True
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _query_ollama_api_show(model: str, base_url: str, api_key: str = "") -> Optional[int]:
|
||||
"""Query an Ollama server's native ``/api/show`` for context length.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue