mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(models): prefer image modalities for vision routing
This commit is contained in:
parent
6e46f99e7e
commit
14f38822fa
3 changed files with 39 additions and 6 deletions
|
|
@ -381,14 +381,18 @@ def get_model_capabilities(provider: str, model: str) -> Optional[ModelCapabilit
|
|||
|
||||
# Extract capability flags (default to False if missing)
|
||||
supports_tools = bool(entry.get("tool_call", False))
|
||||
# Vision: check both the `attachment` flag and `modalities.input` for "image".
|
||||
# Some models (e.g. gemma-4) list image in input modalities but not attachment.
|
||||
# Vision: prefer explicit `modalities.input` when models.dev provides it.
|
||||
# The older `attachment` flag can be stale or too broad for image routing;
|
||||
# fall back to it only when the input modalities are absent/invalid.
|
||||
input_mods = entry.get("modalities", {})
|
||||
if isinstance(input_mods, dict):
|
||||
input_mods = input_mods.get("input", [])
|
||||
input_mods = input_mods.get("input")
|
||||
else:
|
||||
input_mods = []
|
||||
supports_vision = bool(entry.get("attachment", False)) or "image" in input_mods
|
||||
input_mods = None
|
||||
if isinstance(input_mods, list):
|
||||
supports_vision = "image" in input_mods
|
||||
else:
|
||||
supports_vision = bool(entry.get("attachment", False))
|
||||
supports_reasoning = bool(entry.get("reasoning", False))
|
||||
|
||||
# Extract limits
|
||||
|
|
|
|||
|
|
@ -109,6 +109,21 @@ class TestDecideImageInputMode:
|
|||
with patch("agent.image_routing._lookup_supports_vision", return_value=True):
|
||||
assert decide_image_input_mode("anthropic", "claude-sonnet-4", cfg) == "native"
|
||||
|
||||
def test_auto_uses_text_for_text_only_modalities_even_with_attachment_flag(self):
|
||||
registry = {
|
||||
"xiaomi": {
|
||||
"models": {
|
||||
"mimo-v2.5-pro": {
|
||||
"attachment": True,
|
||||
"modalities": {"input": ["text"]},
|
||||
"tool_call": True,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=registry):
|
||||
assert decide_image_input_mode("xiaomi", "mimo-v2.5-pro", {}) == "text"
|
||||
|
||||
|
||||
# ─── build_native_content_parts ──────────────────────────────────────────────
|
||||
|
||||
|
|
|
|||
|
|
@ -223,6 +223,13 @@ CAPS_REGISTRY = {
|
|||
"tool_call": True,
|
||||
"limit": {"context": 32000, "output": 8192},
|
||||
},
|
||||
"text-only-with-stale-attachment": {
|
||||
"id": "text-only-with-stale-attachment",
|
||||
"attachment": True,
|
||||
"tool_call": True,
|
||||
"modalities": {"input": ["text"]},
|
||||
"limit": {"context": 128000, "output": 8192},
|
||||
},
|
||||
},
|
||||
},
|
||||
"anthropic": {
|
||||
|
|
@ -243,7 +250,7 @@ class TestGetModelCapabilities:
|
|||
"""Tests for get_model_capabilities vision detection."""
|
||||
|
||||
def test_vision_from_attachment_flag(self):
|
||||
"""Models with attachment=True should report supports_vision=True."""
|
||||
"""Models with attachment=True and no modalities should report supports_vision=True."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
caps = get_model_capabilities("anthropic", "claude-sonnet-4")
|
||||
assert caps is not None
|
||||
|
|
@ -257,6 +264,13 @@ class TestGetModelCapabilities:
|
|||
assert caps is not None
|
||||
assert caps.supports_vision is True
|
||||
|
||||
def test_text_only_modalities_override_stale_attachment_flag(self):
|
||||
"""Text-only modalities must win over stale attachment=True metadata."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
caps = get_model_capabilities("google", "text-only-with-stale-attachment")
|
||||
assert caps is not None
|
||||
assert caps.supports_vision is False
|
||||
|
||||
def test_no_vision_without_attachment_or_modalities(self):
|
||||
"""Models with neither attachment nor image modality should be non-vision."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue