mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(vision): preserve aspect ratio during auto-resize
Independent halving of width and height caused aspect ratio distortion for extreme dimensions (e.g. 8000x200 panoramas). When one axis hit the 64px floor, the other kept shrinking — collapsing the ratio toward 1:1. Use proportional scaling instead: when either dimension hits the floor, derive the effective scale factor and apply it to both axes. Add tests for extreme panorama (8000x200) and tall narrow (200x6000) images to verify aspect ratio preservation.
This commit is contained in:
parent
4e3e87b677
commit
f751a0bd18
2 changed files with 69 additions and 2 deletions
|
|
@ -769,6 +769,62 @@ class TestResizeImageForVision:
|
|||
assert _RESIZE_TARGET_BYTES == 5 * 1024 * 1024
|
||||
assert _MAX_BASE64_BYTES > _RESIZE_TARGET_BYTES
|
||||
|
||||
def test_extreme_aspect_ratio_preserved(self, tmp_path):
|
||||
"""Extreme aspect ratios should be preserved during resize."""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
pytest.skip("Pillow not installed")
|
||||
# Very wide panorama: 8000x200
|
||||
img = Image.new("RGB", (8000, 200), (100, 150, 200))
|
||||
path = tmp_path / "panorama.png"
|
||||
img.save(path, "PNG")
|
||||
|
||||
result = _resize_image_for_vision(path, mime_type="image/png",
|
||||
max_base64_bytes=50_000)
|
||||
assert result.startswith("data:image/")
|
||||
# Decode and check aspect ratio is roughly preserved
|
||||
import base64
|
||||
header, b64data = result.split(",", 1)
|
||||
raw = base64.b64decode(b64data)
|
||||
from io import BytesIO
|
||||
resized = Image.open(BytesIO(raw))
|
||||
original_ratio = 8000 / 200 # 40:1
|
||||
resized_ratio = resized.width / resized.height if resized.height > 0 else 0
|
||||
# Allow some tolerance (floor clamping), but ratio should stay above 10:1
|
||||
# With independent halving, ratio would collapse to ~1:1. Proportional
|
||||
# scaling should keep it well above 10.
|
||||
assert resized_ratio > 10, (
|
||||
f"Aspect ratio collapsed: {resized.width}x{resized.height} "
|
||||
f"(ratio {resized_ratio:.1f}, expected >10)"
|
||||
)
|
||||
|
||||
def test_tall_narrow_image_preserved(self, tmp_path):
|
||||
"""Tall narrow images should also preserve aspect ratio."""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
pytest.skip("Pillow not installed")
|
||||
# Very tall: 200x6000
|
||||
img = Image.new("RGB", (200, 6000), (200, 100, 50))
|
||||
path = tmp_path / "tall.png"
|
||||
img.save(path, "PNG")
|
||||
|
||||
result = _resize_image_for_vision(path, mime_type="image/png",
|
||||
max_base64_bytes=50_000)
|
||||
assert result.startswith("data:image/")
|
||||
import base64
|
||||
from io import BytesIO
|
||||
header, b64data = result.split(",", 1)
|
||||
raw = base64.b64decode(b64data)
|
||||
resized = Image.open(BytesIO(raw))
|
||||
original_ratio = 6000 / 200 # 30:1 (h/w)
|
||||
resized_ratio = resized.height / resized.width if resized.width > 0 else 0
|
||||
assert resized_ratio > 5, (
|
||||
f"Aspect ratio collapsed: {resized.width}x{resized.height} "
|
||||
f"(h/w ratio {resized_ratio:.1f}, expected >5)"
|
||||
)
|
||||
|
||||
def test_no_pillow_returns_original(self, tmp_path):
|
||||
"""Without Pillow, oversized images should be returned as-is."""
|
||||
# Create a dummy file
|
||||
|
|
|
|||
|
|
@ -357,8 +357,19 @@ def _resize_image_for_vision(image_path: Path, mime_type: Optional[str] = None,
|
|||
|
||||
for attempt in range(5):
|
||||
if attempt > 0:
|
||||
new_w = max(img.width // 2, 64)
|
||||
new_h = max(img.height // 2, 64)
|
||||
# Proportional scaling: halve the longer side and scale the
|
||||
# shorter side to preserve aspect ratio (min dimension 64).
|
||||
scale = 0.5
|
||||
new_w = max(int(img.width * scale), 64)
|
||||
new_h = max(int(img.height * scale), 64)
|
||||
# Re-derive the scale from whichever dimension hit the floor
|
||||
# so both axes shrink by the same factor.
|
||||
if new_w == 64 and img.width > 0:
|
||||
effective_scale = 64 / img.width
|
||||
new_h = max(int(img.height * effective_scale), 64)
|
||||
elif new_h == 64 and img.height > 0:
|
||||
effective_scale = 64 / img.height
|
||||
new_w = max(int(img.width * effective_scale), 64)
|
||||
# Stop if dimensions can't shrink further
|
||||
if (new_w, new_h) == prev_dims:
|
||||
break
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue