mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-26 01:01:40 +00:00
fix(vision): auto-resize oversized images, increase default timeout, fix vision capability detection
Cherry-picked from PR #7749 by kshitijk4poor with modifications: - Raise hard image limit from 5 MB to 20 MB (matches most restrictive provider) - Send images at full resolution first; only auto-resize to 5 MB on API failure - Add _is_image_size_error() helper to detect size-related API rejections - Auto-resize uses Pillow (soft dep) with progressive downscale + JPEG quality reduction - Fix get_model_capabilities() to check modalities.input for vision support - Increase default vision timeout from 30s to 120s (matches hardcoded fallback intent) - Applied retry-with-resize to both vision_analyze_tool and browser_vision Closes #7740
This commit is contained in:
parent
06e1d9cdd4
commit
50bb4fe010
6 changed files with 399 additions and 25 deletions
|
|
@ -15,6 +15,10 @@ from tools.vision_tools import (
|
|||
_handle_vision_analyze,
|
||||
_determine_mime_type,
|
||||
_image_to_base64_data_url,
|
||||
_resize_image_for_vision,
|
||||
_is_image_size_error,
|
||||
_MAX_BASE64_BYTES,
|
||||
_RESIZE_TARGET_BYTES,
|
||||
vision_analyze_tool,
|
||||
check_vision_requirements,
|
||||
get_debug_session_info,
|
||||
|
|
@ -590,11 +594,13 @@ class TestBase64SizeLimit:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_oversized_image_rejected_before_api_call(self, tmp_path):
|
||||
"""Images exceeding 5 MB base64 should fail with a clear size error."""
|
||||
"""Images exceeding the 20 MB hard limit should fail with a clear error."""
|
||||
img = tmp_path / "huge.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * (4 * 1024 * 1024))
|
||||
|
||||
with patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock) as mock_llm:
|
||||
# Patch the hard limit to a small value so the test runs fast.
|
||||
with patch("tools.vision_tools._MAX_BASE64_BYTES", 1000), \
|
||||
patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock) as mock_llm:
|
||||
result = json.loads(await vision_analyze_tool(str(img), "describe this"))
|
||||
|
||||
assert result["success"] is False
|
||||
|
|
@ -686,3 +692,124 @@ class TestVisionRegistration:
|
|||
|
||||
entry = registry._tools.get("vision_analyze")
|
||||
assert callable(entry.handler)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _resize_image_for_vision — auto-resize oversized images
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestResizeImageForVision:
|
||||
"""Tests for the auto-resize function."""
|
||||
|
||||
def test_small_image_returned_as_is(self, tmp_path):
|
||||
"""Images under the limit should be returned unchanged."""
|
||||
# Create a small 10x10 red PNG
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
pytest.skip("Pillow not installed")
|
||||
img = Image.new("RGB", (10, 10), (255, 0, 0))
|
||||
path = tmp_path / "small.png"
|
||||
img.save(path, "PNG")
|
||||
|
||||
result = _resize_image_for_vision(path, mime_type="image/png")
|
||||
assert result.startswith("data:image/png;base64,")
|
||||
assert len(result) < _MAX_BASE64_BYTES
|
||||
|
||||
def test_large_image_is_resized(self, tmp_path):
|
||||
"""Images over the default target should be auto-resized to fit."""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
pytest.skip("Pillow not installed")
|
||||
# Create a large image that will exceed 5 MB in base64
|
||||
# A 4000x4000 uncompressed PNG will be large
|
||||
img = Image.new("RGB", (4000, 4000), (128, 200, 50))
|
||||
path = tmp_path / "large.png"
|
||||
img.save(path, "PNG")
|
||||
|
||||
result = _resize_image_for_vision(path, mime_type="image/png")
|
||||
assert result.startswith("data:image/png;base64,")
|
||||
# Default target is _RESIZE_TARGET_BYTES (5 MB), not _MAX_BASE64_BYTES (20 MB)
|
||||
assert len(result) <= _RESIZE_TARGET_BYTES
|
||||
|
||||
def test_custom_max_bytes(self, tmp_path):
|
||||
"""The max_base64_bytes parameter should be respected."""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
pytest.skip("Pillow not installed")
|
||||
img = Image.new("RGB", (200, 200), (0, 128, 255))
|
||||
path = tmp_path / "medium.png"
|
||||
img.save(path, "PNG")
|
||||
|
||||
# Set a very low limit to force resizing
|
||||
result = _resize_image_for_vision(path, max_base64_bytes=500)
|
||||
# Should still return a valid data URL
|
||||
assert result.startswith("data:image/")
|
||||
|
||||
def test_jpeg_output_for_non_png(self, tmp_path):
|
||||
"""Non-PNG images should be resized as JPEG."""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
pytest.skip("Pillow not installed")
|
||||
img = Image.new("RGB", (2000, 2000), (255, 128, 0))
|
||||
path = tmp_path / "photo.jpg"
|
||||
img.save(path, "JPEG", quality=95)
|
||||
|
||||
result = _resize_image_for_vision(path, mime_type="image/jpeg",
|
||||
max_base64_bytes=50_000)
|
||||
assert result.startswith("data:image/jpeg;base64,")
|
||||
|
||||
def test_constants_sane(self):
|
||||
"""Hard limit should be larger than resize target."""
|
||||
assert _MAX_BASE64_BYTES == 20 * 1024 * 1024
|
||||
assert _RESIZE_TARGET_BYTES == 5 * 1024 * 1024
|
||||
assert _MAX_BASE64_BYTES > _RESIZE_TARGET_BYTES
|
||||
|
||||
def test_no_pillow_returns_original(self, tmp_path):
|
||||
"""Without Pillow, oversized images should be returned as-is."""
|
||||
# Create a dummy file
|
||||
path = tmp_path / "test.png"
|
||||
# Write enough bytes to exceed a tiny limit
|
||||
path.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 1000)
|
||||
|
||||
with patch("tools.vision_tools._image_to_base64_data_url") as mock_b64:
|
||||
# Simulate a large base64 result
|
||||
mock_b64.return_value = "data:image/png;base64," + "A" * 200
|
||||
with patch.dict("sys.modules", {"PIL": None, "PIL.Image": None}):
|
||||
result = _resize_image_for_vision(path, max_base64_bytes=100)
|
||||
# Should return the original (oversized) data url
|
||||
assert len(result) > 100
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _is_image_size_error — detect size-related API errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIsImageSizeError:
|
||||
"""Tests for the size-error detection helper."""
|
||||
|
||||
def test_too_large_message(self):
|
||||
assert _is_image_size_error(Exception("Request payload too large"))
|
||||
|
||||
def test_413_status(self):
|
||||
assert _is_image_size_error(Exception("HTTP 413 Payload Too Large"))
|
||||
|
||||
def test_invalid_request(self):
|
||||
assert _is_image_size_error(Exception("invalid_request_error: image too big"))
|
||||
|
||||
def test_exceeds_limit(self):
|
||||
assert _is_image_size_error(Exception("Image exceeds maximum size"))
|
||||
|
||||
def test_unrelated_error(self):
|
||||
assert not _is_image_size_error(Exception("Connection refused"))
|
||||
|
||||
def test_auth_error(self):
|
||||
assert not _is_image_size_error(Exception("401 Unauthorized"))
|
||||
|
||||
def test_empty_message(self):
|
||||
assert not _is_image_size_error(Exception(""))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue