mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
280 lines
10 KiB
Python
280 lines
10 KiB
Python
"""Verify vision auto-detection skips non-vision models to aggregator fallback.
|
|
|
|
Regression test for #14744 -- when the user's main provider (e.g. ollama-cloud)
|
|
is not in _PROVIDER_VISION_MODELS and the main model is not vision-capable,
|
|
the auto-detection should skip directly to aggregator fallbacks instead of
|
|
sending an image payload to a text-only model.
|
|
"""
|
|
|
|
from types import SimpleNamespace
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pytest
|
|
|
|
from agent.auxiliary_client import _is_likely_vision_model, call_llm
|
|
|
|
|
|
# -- _is_likely_vision_model heuristic --
|
|
|
|
|
|
class TestIsLikelyVisionModel:
|
|
"""Tests for the vision model name heuristic."""
|
|
|
|
@pytest.mark.parametrize("model", [
|
|
"gpt-4o",
|
|
"gpt-4o-mini",
|
|
"claude-sonnet-4.6",
|
|
"gemini-3-flash-preview",
|
|
"mimo-v2-omni",
|
|
"mimo-v2.5",
|
|
"llava-v1.6",
|
|
"qwen-vl-plus",
|
|
"qwen2-vl-7b",
|
|
"glm-5v-turbo",
|
|
"glm-4v",
|
|
"pixtral-12b",
|
|
"internvl2-8b",
|
|
"cogvlm-17b",
|
|
"idefics2-8b",
|
|
"some-vision-model",
|
|
"my-multimodal-v3",
|
|
"deepseek-vl-7b",
|
|
])
|
|
def test_vision_models_detected(self, model):
|
|
assert _is_likely_vision_model(model) is True
|
|
|
|
@pytest.mark.parametrize("model", [
|
|
"llama3",
|
|
"llama3:70b",
|
|
"mistral-7b",
|
|
"qwen3:14b",
|
|
"deepseek-coder-v2",
|
|
"codestral-latest",
|
|
"nemotron-3-nano:30b",
|
|
"phi-3-mini",
|
|
"mixtral-8x7b",
|
|
"",
|
|
])
|
|
def test_non_vision_models_rejected(self, model):
|
|
assert _is_likely_vision_model(model) is False
|
|
|
|
def test_none_returns_false(self):
|
|
assert _is_likely_vision_model("") is False
|
|
|
|
|
|
# -- Vision auto-detection with non-vision main model --
|
|
|
|
|
|
class TestVisionNonVisionFallthrough:
|
|
"""Vision auto-detect must skip non-vision main models (#14744)."""
|
|
|
|
def test_ollama_cloud_non_vision_skips_to_aggregator(self):
|
|
"""ollama-cloud with llama3 must skip to aggregator, not try llama3."""
|
|
mock_aggregator_client = MagicMock()
|
|
|
|
with patch(
|
|
"agent.auxiliary_client._read_main_provider",
|
|
return_value="ollama-cloud",
|
|
), patch(
|
|
"agent.auxiliary_client._read_main_model",
|
|
return_value="llama3",
|
|
), patch(
|
|
"agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("auto", None, None, None, None),
|
|
), patch(
|
|
"agent.auxiliary_client.resolve_provider_client",
|
|
) as mock_resolve, patch(
|
|
"agent.auxiliary_client._resolve_strict_vision_backend",
|
|
return_value=(mock_aggregator_client, "google/gemini-3-flash-preview"),
|
|
):
|
|
from agent.auxiliary_client import resolve_vision_provider_client
|
|
|
|
provider, client, model = resolve_vision_provider_client()
|
|
|
|
# Should have fallen through to the aggregator
|
|
assert client is mock_aggregator_client
|
|
assert model == "google/gemini-3-flash-preview"
|
|
# resolve_provider_client must NOT have been called with ollama-cloud
|
|
for call in mock_resolve.call_args_list:
|
|
assert call.args[0] != "ollama-cloud" or call.args[1] != "llama3", (
|
|
"Should not have tried llama3 on ollama-cloud for vision"
|
|
)
|
|
|
|
def test_ollama_with_llava_uses_main_provider(self):
|
|
"""ollama with llava (vision model) must use main provider directly."""
|
|
mock_client = MagicMock()
|
|
|
|
with patch(
|
|
"agent.auxiliary_client._read_main_provider",
|
|
return_value="ollama",
|
|
), patch(
|
|
"agent.auxiliary_client._read_main_model",
|
|
return_value="llava-v1.6",
|
|
), patch(
|
|
"agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("auto", None, None, None, None),
|
|
), patch(
|
|
"agent.auxiliary_client.resolve_provider_client",
|
|
return_value=(mock_client, "llava-v1.6"),
|
|
):
|
|
from agent.auxiliary_client import resolve_vision_provider_client
|
|
|
|
provider, client, model = resolve_vision_provider_client()
|
|
|
|
assert provider == "ollama"
|
|
assert client is mock_client
|
|
assert model == "llava-v1.6"
|
|
|
|
def test_provider_with_vision_override_still_works(self):
|
|
"""xiaomi with explicit vision override must still use the override."""
|
|
mock_client = MagicMock()
|
|
|
|
with patch(
|
|
"agent.auxiliary_client._read_main_provider",
|
|
return_value="xiaomi",
|
|
), patch(
|
|
"agent.auxiliary_client._read_main_model",
|
|
return_value="mimo-v2-pro",
|
|
), patch(
|
|
"agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("auto", None, None, None, None),
|
|
), patch(
|
|
"agent.auxiliary_client.resolve_provider_client",
|
|
return_value=(mock_client, "mimo-v2.5"),
|
|
):
|
|
from agent.auxiliary_client import resolve_vision_provider_client
|
|
|
|
provider, client, model = resolve_vision_provider_client()
|
|
|
|
assert provider == "xiaomi"
|
|
assert client is mock_client
|
|
assert model == "mimo-v2.5"
|
|
|
|
def test_named_custom_provider_unknown_model_is_trusted(self):
|
|
"""Named custom providers should not be skipped by the name heuristic."""
|
|
mock_client = MagicMock()
|
|
|
|
with patch(
|
|
"agent.auxiliary_client._read_main_provider",
|
|
return_value="beans",
|
|
), patch(
|
|
"agent.auxiliary_client._read_main_model",
|
|
return_value="my-company-vlm",
|
|
), patch(
|
|
"agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("auto", None, None, None, None),
|
|
), patch(
|
|
"agent.auxiliary_client._get_named_custom_provider_entry",
|
|
return_value={"name": "beans", "base_url": "http://vlm.test/v1"},
|
|
), patch(
|
|
"agent.auxiliary_client.resolve_provider_client",
|
|
return_value=(mock_client, "my-company-vlm"),
|
|
):
|
|
from agent.auxiliary_client import resolve_vision_provider_client
|
|
|
|
provider, client, model = resolve_vision_provider_client()
|
|
|
|
assert provider == "beans"
|
|
assert client is mock_client
|
|
assert model == "my-company-vlm"
|
|
|
|
def test_named_custom_provider_can_declare_vision_model_override(self):
|
|
"""Named custom providers can route vision to a dedicated model."""
|
|
mock_client = MagicMock()
|
|
|
|
with patch(
|
|
"agent.auxiliary_client._read_main_provider",
|
|
return_value="beans",
|
|
), patch(
|
|
"agent.auxiliary_client._read_main_model",
|
|
return_value="chat-model",
|
|
), patch(
|
|
"agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("auto", None, None, None, None),
|
|
), patch(
|
|
"agent.auxiliary_client._get_named_custom_provider_entry",
|
|
return_value={
|
|
"name": "beans",
|
|
"base_url": "http://vlm.test/v1",
|
|
"models": {"chat-model": {"vision_model": "vision-model"}},
|
|
},
|
|
), patch(
|
|
"agent.auxiliary_client.resolve_provider_client",
|
|
return_value=(mock_client, "vision-model"),
|
|
) as mock_resolve:
|
|
from agent.auxiliary_client import resolve_vision_provider_client
|
|
|
|
provider, client, model = resolve_vision_provider_client()
|
|
|
|
assert provider == "beans"
|
|
assert client is mock_client
|
|
assert model == "vision-model"
|
|
assert mock_resolve.call_args.args[:2] == ("beans", "vision-model")
|
|
|
|
def test_non_vision_model_all_aggregators_fail(self):
|
|
"""Non-vision main + no aggregators available must return None."""
|
|
with patch(
|
|
"agent.auxiliary_client._read_main_provider",
|
|
return_value="ollama-cloud",
|
|
), patch(
|
|
"agent.auxiliary_client._read_main_model",
|
|
return_value="qwen3:14b",
|
|
), patch(
|
|
"agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("auto", None, None, None, None),
|
|
), patch(
|
|
"agent.auxiliary_client._resolve_strict_vision_backend",
|
|
return_value=(None, None),
|
|
):
|
|
from agent.auxiliary_client import resolve_vision_provider_client
|
|
|
|
provider, client, model = resolve_vision_provider_client()
|
|
|
|
assert client is None
|
|
assert model is None
|
|
|
|
|
|
class VisionUnsupportedError(Exception):
|
|
def __init__(self, message, status_code=400):
|
|
super().__init__(message)
|
|
self.status_code = status_code
|
|
|
|
|
|
class TestVisionCapabilityFallback:
|
|
def test_call_llm_retries_auto_vision_on_capability_error(self):
|
|
"""A text-only main provider should fall through to strict vision backends."""
|
|
primary_client = MagicMock()
|
|
fallback_client = MagicMock()
|
|
response = SimpleNamespace(
|
|
choices=[SimpleNamespace(message=SimpleNamespace(content="ok"))]
|
|
)
|
|
|
|
primary_client.chat.completions.create.side_effect = VisionUnsupportedError(
|
|
"This model does not support image input"
|
|
)
|
|
fallback_client.chat.completions.create.return_value = response
|
|
|
|
with patch(
|
|
"agent.auxiliary_client._resolve_task_provider_model",
|
|
return_value=("auto", None, None, None, None),
|
|
), patch(
|
|
"agent.auxiliary_client.resolve_vision_provider_client",
|
|
return_value=("ollama-cloud", primary_client, "llama3"),
|
|
), patch(
|
|
"agent.auxiliary_client._build_call_kwargs",
|
|
return_value={
|
|
"model": "llama3",
|
|
"messages": [{"role": "user", "content": "analyze"}],
|
|
},
|
|
), patch(
|
|
"agent.auxiliary_client._try_vision_fallback",
|
|
return_value=(fallback_client, "google/gemini-3-flash-preview", "openrouter"),
|
|
):
|
|
result = call_llm(
|
|
task="vision",
|
|
messages=[{"role": "user", "content": "analyze"}],
|
|
)
|
|
|
|
assert result is response
|
|
assert primary_client.chat.completions.create.call_count == 1
|
|
assert fallback_client.chat.completions.create.call_count == 1
|