feat(agent): allow declaring supports_vision via user config

Custom/local provider models absent from models.dev get classified as
non-vision and have their image content stripped before reaching the
upstream API. Surface a user-facing override:

  model:
    supports_vision: true

  providers:
    my-vllm:
      models:
        my-llava:
          supports_vision: true

The override short-circuits the models.dev lookup in
_model_supports_vision(), which is the single gate guarding image-strip
preprocessing on every transport path.

Refs #8731.
This commit is contained in:
Muspi Merol 2026-04-30 20:06:19 +08:00 committed by Teknium
parent b4afc6546e
commit 24c7ce0fb8
2 changed files with 41 additions and 4 deletions

View file

@ -3200,17 +3200,29 @@ class AIAgent:
Used to decide whether to strip image content parts from API-bound
messages (for non-vision models) or let the provider adapter handle
them natively (for vision-capable models).
Resolution order:
1. ``model.supports_vision`` (top-level, single-model shortcut)
2. ``providers.<provider>.models.<model>.supports_vision``
3. models.dev capability lookup
Custom/local models absent from models.dev would otherwise be
misclassified as non-vision and have their images stripped.
"""
try:
from agent.models_dev import get_model_capabilities
from hermes_cli.config import cfg_get, load_config
cfg = load_config()
provider = (getattr(self, "provider", "") or "").strip()
model = (getattr(self, "model", "") or "").strip()
for keys in (("model", "supports_vision"),
("providers", provider, "models", model, "supports_vision")):
override = cfg_get(cfg, *keys)
if override is not None:
return bool(override)
from agent.models_dev import get_model_capabilities
if not provider or not model:
return False
caps = get_model_capabilities(provider, model)
if caps is None:
return False
return bool(caps.supports_vision)
return bool(caps and caps.supports_vision)
except Exception:
return False

View file

@ -168,3 +168,28 @@ class TestModelSupportsVision:
agent = _make_agent()
with patch("agent.models_dev.get_model_capabilities", side_effect=RuntimeError("boom")):
assert agent._model_supports_vision() is False
def test_top_level_model_override_wins(self):
agent = _make_agent()
agent.provider = "custom"
agent.model = "my-llava"
with patch("hermes_cli.config.load_config", return_value={"model": {"supports_vision": True}}), \
patch("agent.models_dev.get_model_capabilities", return_value=None):
assert agent._model_supports_vision() is True
def test_per_provider_per_model_override_wins(self):
agent = _make_agent()
agent.provider = "custom"
agent.model = "my-llava"
cfg = {"providers": {"custom": {"models": {"my-llava": {"supports_vision": True}}}}}
with patch("hermes_cli.config.load_config", return_value=cfg), \
patch("agent.models_dev.get_model_capabilities", return_value=None):
assert agent._model_supports_vision() is True
def test_override_false_disables_vision_for_models_dev_models(self):
agent = _make_agent()
fake_caps = MagicMock()
fake_caps.supports_vision = True
with patch("hermes_cli.config.load_config", return_value={"model": {"supports_vision": False}}), \
patch("agent.models_dev.get_model_capabilities", return_value=fake_caps):
assert agent._model_supports_vision() is False