mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(vision): auto-resize oversized images, increase default timeout, fix vision capability detection
Cherry-picked from PR #7749 by kshitijk4poor with modifications: - Raise hard image limit from 5 MB to 20 MB (matches most restrictive provider) - Send images at full resolution first; only auto-resize to 5 MB on API failure - Add _is_image_size_error() helper to detect size-related API rejections - Auto-resize uses Pillow (soft dep) with progressive downscale + JPEG quality reduction - Fix get_model_capabilities() to check modalities.input for vision support - Increase default vision timeout from 30s to 120s (matches hardcoded fallback intent) - Applied retry-with-resize to both vision_analyze_tool and browser_vision Closes #7740
This commit is contained in:
parent
06e1d9cdd4
commit
50bb4fe010
6 changed files with 399 additions and 25 deletions
|
|
@ -7,6 +7,7 @@ from agent.models_dev import (
|
|||
PROVIDER_TO_MODELS_DEV,
|
||||
_extract_context,
|
||||
fetch_models_dev,
|
||||
get_model_capabilities,
|
||||
lookup_models_dev_context,
|
||||
)
|
||||
|
||||
|
|
@ -195,3 +196,88 @@ class TestFetchModelsDev:
|
|||
result = fetch_models_dev()
|
||||
mock_get.assert_not_called()
|
||||
assert result == SAMPLE_REGISTRY
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_model_capabilities — vision via modalities.input
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
CAPS_REGISTRY = {
|
||||
"google": {
|
||||
"id": "google",
|
||||
"models": {
|
||||
"gemma-4-31b-it": {
|
||||
"id": "gemma-4-31b-it",
|
||||
"attachment": False,
|
||||
"tool_call": True,
|
||||
"modalities": {"input": ["text", "image"]},
|
||||
"limit": {"context": 128000, "output": 8192},
|
||||
},
|
||||
"gemma-3-1b": {
|
||||
"id": "gemma-3-1b",
|
||||
"tool_call": True,
|
||||
"limit": {"context": 32000, "output": 8192},
|
||||
},
|
||||
},
|
||||
},
|
||||
"anthropic": {
|
||||
"id": "anthropic",
|
||||
"models": {
|
||||
"claude-sonnet-4": {
|
||||
"id": "claude-sonnet-4",
|
||||
"attachment": True,
|
||||
"tool_call": True,
|
||||
"limit": {"context": 200000, "output": 64000},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class TestGetModelCapabilities:
|
||||
"""Tests for get_model_capabilities vision detection."""
|
||||
|
||||
def test_vision_from_attachment_flag(self):
|
||||
"""Models with attachment=True should report supports_vision=True."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
caps = get_model_capabilities("anthropic", "claude-sonnet-4")
|
||||
assert caps is not None
|
||||
assert caps.supports_vision is True
|
||||
|
||||
def test_vision_from_modalities_input_image(self):
|
||||
"""Models with 'image' in modalities.input but attachment=False should
|
||||
still report supports_vision=True (the core fix in this PR)."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
caps = get_model_capabilities("google", "gemma-4-31b-it")
|
||||
assert caps is not None
|
||||
assert caps.supports_vision is True
|
||||
|
||||
def test_no_vision_without_attachment_or_modalities(self):
|
||||
"""Models with neither attachment nor image modality should be non-vision."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
caps = get_model_capabilities("google", "gemma-3-1b")
|
||||
assert caps is not None
|
||||
assert caps.supports_vision is False
|
||||
|
||||
def test_modalities_non_dict_handled(self):
|
||||
"""Non-dict modalities field should not crash."""
|
||||
registry = {
|
||||
"google": {"id": "google", "models": {
|
||||
"weird-model": {
|
||||
"id": "weird-model",
|
||||
"modalities": "text", # not a dict
|
||||
"limit": {"context": 200000, "output": 8192},
|
||||
},
|
||||
}},
|
||||
}
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=registry):
|
||||
caps = get_model_capabilities("gemini", "weird-model")
|
||||
assert caps is not None
|
||||
assert caps.supports_vision is False
|
||||
|
||||
def test_model_not_found_returns_none(self):
|
||||
"""Unknown model should return None."""
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=CAPS_REGISTRY):
|
||||
caps = get_model_capabilities("anthropic", "nonexistent-model")
|
||||
assert caps is None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue