hermes-agent/tests/agent/test_vision_non_vision_fallthrough.py

280 lines
10 KiB
Python

"""Verify vision auto-detection skips non-vision models to aggregator fallback.
Regression test for #14744 -- when the user's main provider (e.g. ollama-cloud)
is not in _PROVIDER_VISION_MODELS and the main model is not vision-capable,
the auto-detection should skip directly to aggregator fallbacks instead of
sending an image payload to a text-only model.
"""
from types import SimpleNamespace
from unittest.mock import patch, MagicMock
import pytest
from agent.auxiliary_client import _is_likely_vision_model, call_llm
# -- _is_likely_vision_model heuristic --
class TestIsLikelyVisionModel:
"""Tests for the vision model name heuristic."""
@pytest.mark.parametrize("model", [
"gpt-4o",
"gpt-4o-mini",
"claude-sonnet-4.6",
"gemini-3-flash-preview",
"mimo-v2-omni",
"mimo-v2.5",
"llava-v1.6",
"qwen-vl-plus",
"qwen2-vl-7b",
"glm-5v-turbo",
"glm-4v",
"pixtral-12b",
"internvl2-8b",
"cogvlm-17b",
"idefics2-8b",
"some-vision-model",
"my-multimodal-v3",
"deepseek-vl-7b",
])
def test_vision_models_detected(self, model):
assert _is_likely_vision_model(model) is True
@pytest.mark.parametrize("model", [
"llama3",
"llama3:70b",
"mistral-7b",
"qwen3:14b",
"deepseek-coder-v2",
"codestral-latest",
"nemotron-3-nano:30b",
"phi-3-mini",
"mixtral-8x7b",
"",
])
def test_non_vision_models_rejected(self, model):
assert _is_likely_vision_model(model) is False
def test_none_returns_false(self):
assert _is_likely_vision_model("") is False
# -- Vision auto-detection with non-vision main model --
class TestVisionNonVisionFallthrough:
"""Vision auto-detect must skip non-vision main models (#14744)."""
def test_ollama_cloud_non_vision_skips_to_aggregator(self):
"""ollama-cloud with llama3 must skip to aggregator, not try llama3."""
mock_aggregator_client = MagicMock()
with patch(
"agent.auxiliary_client._read_main_provider",
return_value="ollama-cloud",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="llama3",
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client.resolve_provider_client",
) as mock_resolve, patch(
"agent.auxiliary_client._resolve_strict_vision_backend",
return_value=(mock_aggregator_client, "google/gemini-3-flash-preview"),
):
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
# Should have fallen through to the aggregator
assert client is mock_aggregator_client
assert model == "google/gemini-3-flash-preview"
# resolve_provider_client must NOT have been called with ollama-cloud
for call in mock_resolve.call_args_list:
assert call.args[0] != "ollama-cloud" or call.args[1] != "llama3", (
"Should not have tried llama3 on ollama-cloud for vision"
)
def test_ollama_with_llava_uses_main_provider(self):
"""ollama with llava (vision model) must use main provider directly."""
mock_client = MagicMock()
with patch(
"agent.auxiliary_client._read_main_provider",
return_value="ollama",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="llava-v1.6",
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "llava-v1.6"),
):
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "ollama"
assert client is mock_client
assert model == "llava-v1.6"
def test_provider_with_vision_override_still_works(self):
"""xiaomi with explicit vision override must still use the override."""
mock_client = MagicMock()
with patch(
"agent.auxiliary_client._read_main_provider",
return_value="xiaomi",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="mimo-v2-pro",
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "mimo-v2.5"),
):
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "xiaomi"
assert client is mock_client
assert model == "mimo-v2.5"
def test_named_custom_provider_unknown_model_is_trusted(self):
"""Named custom providers should not be skipped by the name heuristic."""
mock_client = MagicMock()
with patch(
"agent.auxiliary_client._read_main_provider",
return_value="beans",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="my-company-vlm",
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client._get_named_custom_provider_entry",
return_value={"name": "beans", "base_url": "http://vlm.test/v1"},
), patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "my-company-vlm"),
):
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "beans"
assert client is mock_client
assert model == "my-company-vlm"
def test_named_custom_provider_can_declare_vision_model_override(self):
"""Named custom providers can route vision to a dedicated model."""
mock_client = MagicMock()
with patch(
"agent.auxiliary_client._read_main_provider",
return_value="beans",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="chat-model",
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client._get_named_custom_provider_entry",
return_value={
"name": "beans",
"base_url": "http://vlm.test/v1",
"models": {"chat-model": {"vision_model": "vision-model"}},
},
), patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "vision-model"),
) as mock_resolve:
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "beans"
assert client is mock_client
assert model == "vision-model"
assert mock_resolve.call_args.args[:2] == ("beans", "vision-model")
def test_non_vision_model_all_aggregators_fail(self):
"""Non-vision main + no aggregators available must return None."""
with patch(
"agent.auxiliary_client._read_main_provider",
return_value="ollama-cloud",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="qwen3:14b",
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client._resolve_strict_vision_backend",
return_value=(None, None),
):
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert client is None
assert model is None
class VisionUnsupportedError(Exception):
def __init__(self, message, status_code=400):
super().__init__(message)
self.status_code = status_code
class TestVisionCapabilityFallback:
def test_call_llm_retries_auto_vision_on_capability_error(self):
"""A text-only main provider should fall through to strict vision backends."""
primary_client = MagicMock()
fallback_client = MagicMock()
response = SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content="ok"))]
)
primary_client.chat.completions.create.side_effect = VisionUnsupportedError(
"This model does not support image input"
)
fallback_client.chat.completions.create.return_value = response
with patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client.resolve_vision_provider_client",
return_value=("ollama-cloud", primary_client, "llama3"),
), patch(
"agent.auxiliary_client._build_call_kwargs",
return_value={
"model": "llama3",
"messages": [{"role": "user", "content": "analyze"}],
},
), patch(
"agent.auxiliary_client._try_vision_fallback",
return_value=(fallback_client, "google/gemini-3-flash-preview", "openrouter"),
):
result = call_llm(
task="vision",
messages=[{"role": "user", "content": "analyze"}],
)
assert result is response
assert primary_client.chat.completions.create.call_count == 1
assert fallback_client.chat.completions.create.call_count == 1