From 24c7ce0fb86d14846c6e67ac96285e0662eb26e9 Mon Sep 17 00:00:00 2001 From: Muspi Merol Date: Thu, 30 Apr 2026 20:06:19 +0800 Subject: [PATCH] feat(agent): allow declaring supports_vision via user config Custom/local provider models absent from models.dev get classified as non-vision and have their image content stripped before reaching the upstream API. Surface a user-facing override: model: supports_vision: true providers: my-vllm: models: my-llava: supports_vision: true The override short-circuits the models.dev lookup in _model_supports_vision(), which is the single gate guarding image-strip preprocessing on every transport path. Refs #8731. --- run_agent.py | 20 ++++++++++++--- .../test_vision_aware_preprocessing.py | 25 +++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/run_agent.py b/run_agent.py index 6c4d54d7581..d76894cf05b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3200,17 +3200,29 @@ class AIAgent: Used to decide whether to strip image content parts from API-bound messages (for non-vision models) or let the provider adapter handle them natively (for vision-capable models). + + Resolution order: + 1. ``model.supports_vision`` (top-level, single-model shortcut) + 2. ``providers..models..supports_vision`` + 3. models.dev capability lookup + Custom/local models absent from models.dev would otherwise be + misclassified as non-vision and have their images stripped. """ try: - from agent.models_dev import get_model_capabilities + from hermes_cli.config import cfg_get, load_config + cfg = load_config() provider = (getattr(self, "provider", "") or "").strip() model = (getattr(self, "model", "") or "").strip() + for keys in (("model", "supports_vision"), + ("providers", provider, "models", model, "supports_vision")): + override = cfg_get(cfg, *keys) + if override is not None: + return bool(override) + from agent.models_dev import get_model_capabilities if not provider or not model: return False caps = get_model_capabilities(provider, model) - if caps is None: - return False - return bool(caps.supports_vision) + return bool(caps and caps.supports_vision) except Exception: return False diff --git a/tests/run_agent/test_vision_aware_preprocessing.py b/tests/run_agent/test_vision_aware_preprocessing.py index 5211ead2a47..08fe6502e96 100644 --- a/tests/run_agent/test_vision_aware_preprocessing.py +++ b/tests/run_agent/test_vision_aware_preprocessing.py @@ -168,3 +168,28 @@ class TestModelSupportsVision: agent = _make_agent() with patch("agent.models_dev.get_model_capabilities", side_effect=RuntimeError("boom")): assert agent._model_supports_vision() is False + + def test_top_level_model_override_wins(self): + agent = _make_agent() + agent.provider = "custom" + agent.model = "my-llava" + with patch("hermes_cli.config.load_config", return_value={"model": {"supports_vision": True}}), \ + patch("agent.models_dev.get_model_capabilities", return_value=None): + assert agent._model_supports_vision() is True + + def test_per_provider_per_model_override_wins(self): + agent = _make_agent() + agent.provider = "custom" + agent.model = "my-llava" + cfg = {"providers": {"custom": {"models": {"my-llava": {"supports_vision": True}}}}} + with patch("hermes_cli.config.load_config", return_value=cfg), \ + patch("agent.models_dev.get_model_capabilities", return_value=None): + assert agent._model_supports_vision() is True + + def test_override_false_disables_vision_for_models_dev_models(self): + agent = _make_agent() + fake_caps = MagicMock() + fake_caps.supports_vision = True + with patch("hermes_cli.config.load_config", return_value={"model": {"supports_vision": False}}), \ + patch("agent.models_dev.get_model_capabilities", return_value=fake_caps): + assert agent._model_supports_vision() is False