mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-14 04:02:26 +00:00
fix(models): prefer image modalities for vision routing
This commit is contained in:
parent
6e46f99e7e
commit
14f38822fa
3 changed files with 39 additions and 6 deletions
|
|
@ -381,14 +381,18 @@ def get_model_capabilities(provider: str, model: str) -> Optional[ModelCapabilit
|
||||||
|
|
||||||
# Extract capability flags (default to False if missing)
|
# Extract capability flags (default to False if missing)
|
||||||
supports_tools = bool(entry.get("tool_call", False))
|
supports_tools = bool(entry.get("tool_call", False))
|
||||||
# Vision: check both the `attachment` flag and `modalities.input` for "image".
|
# Vision: prefer explicit `modalities.input` when models.dev provides it.
|
||||||
# Some models (e.g. gemma-4) list image in input modalities but not attachment.
|
# The older `attachment` flag can be stale or too broad for image routing;
|
||||||
|
# fall back to it only when the input modalities are absent/invalid.
|
||||||
input_mods = entry.get("modalities", {})
|
input_mods = entry.get("modalities", {})
|
||||||
if isinstance(input_mods, dict):
|
if isinstance(input_mods, dict):
|
||||||
input_mods = input_mods.get("input", [])
|
input_mods = input_mods.get("input")
|
||||||
else:
|
else:
|
||||||
input_mods = []
|
input_mods = None
|
||||||
supports_vision = bool(entry.get("attachment", False)) or "image" in input_mods
|
if isinstance(input_mods, list):
|
||||||
|
supports_vision = "image" in input_mods
|
||||||
|
else:
|
||||||
|
supports_vision = bool(entry.get("attachment", False))
|
||||||
supports_reasoning = bool(entry.get("reasoning", False))
|
supports_reasoning = bool(entry.get("reasoning", False))
|
||||||
|
|
||||||
# Extract limits
|
# Extract limits
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,21 @@ class TestDecideImageInputMode:
|
||||||
with patch("agent.image_routing._lookup_supports_vision", return_value=True):
|
with patch("agent.image_routing._lookup_supports_vision", return_value=True):
|
||||||
assert decide_image_input_mode("anthropic", "claude-sonnet-4", cfg) == "native"
|
assert decide_image_input_mode("anthropic", "claude-sonnet-4", cfg) == "native"
|
||||||
|
|
||||||
|
def test_auto_uses_text_for_text_only_modalities_even_with_attachment_flag(self):
|
||||||
|
registry = {
|
||||||
|
"xiaomi": {
|
||||||
|
"models": {
|
||||||
|
"mimo-v2.5-pro": {
|
||||||
|
"attachment": True,
|
||||||
|
"modalities": {"input": ["text"]},
|
||||||
|
"tool_call": True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
with patch("agent.models_dev.fetch_models_dev", return_value=registry):
|
||||||
|
assert decide_image_input_mode("xiaomi", "mimo-v2.5-pro", {}) == "text"
|
||||||
|
|
||||||
|
|
||||||
# ─── build_native_content_parts ──────────────────────────────────────────────
|
# ─── build_native_content_parts ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -223,6 +223,13 @@ CAPS_REGISTRY = {
|
||||||
"tool_call": True,
|
"tool_call": True,
|
||||||
"limit": {"context": 32000, "output": 8192},
|
"limit": {"context": 32000, "output": 8192},
|
||||||
},
|
},
|
||||||
|
"text-only-with-stale-attachment": {
|
||||||
|
"id": "text-only-with-stale-attachment",
|
||||||
|
"attachment": True,
|
||||||
|
"tool_call": True,
|
||||||
|
"modalities": {"input": ["text"]},
|
||||||
|
"limit": {"context": 128000, "output": 8192},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"anthropic": {
|
"anthropic": {
|
||||||
|
|
@ -243,7 +250,7 @@ class TestGetModelCapabilities:
|
||||||
"""Tests for get_model_capabilities vision detection."""
|
"""Tests for get_model_capabilities vision detection."""
|
||||||
|
|
||||||
def test_vision_from_attachment_flag(self):
|
def test_vision_from_attachment_flag(self):
|
||||||
"""Models with attachment=True should report supports_vision=True."""
|
"""Models with attachment=True and no modalities should report supports_vision=True."""
|
||||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||||
caps = get_model_capabilities("anthropic", "claude-sonnet-4")
|
caps = get_model_capabilities("anthropic", "claude-sonnet-4")
|
||||||
assert caps is not None
|
assert caps is not None
|
||||||
|
|
@ -257,6 +264,13 @@ class TestGetModelCapabilities:
|
||||||
assert caps is not None
|
assert caps is not None
|
||||||
assert caps.supports_vision is True
|
assert caps.supports_vision is True
|
||||||
|
|
||||||
|
def test_text_only_modalities_override_stale_attachment_flag(self):
|
||||||
|
"""Text-only modalities must win over stale attachment=True metadata."""
|
||||||
|
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||||
|
caps = get_model_capabilities("google", "text-only-with-stale-attachment")
|
||||||
|
assert caps is not None
|
||||||
|
assert caps.supports_vision is False
|
||||||
|
|
||||||
def test_no_vision_without_attachment_or_modalities(self):
|
def test_no_vision_without_attachment_or_modalities(self):
|
||||||
"""Models with neither attachment nor image modality should be non-vision."""
|
"""Models with neither attachment nor image modality should be non-vision."""
|
||||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue