mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-21 10:22:18 +00:00
fix(agent): accept pixel-correct image downscale when bytes grow (#48013)
The image-too-large reactive shrink (try_shrink_image_parts_in_messages)
conflated two independent constraints: it always rejected a resize whose
re-encoded bytes were >= the original, even when the shrink was driven by a
PIXEL-DIMENSION cap (Anthropic many-image 2000px) rather than the byte budget.
Downscaled screenshot PNGs routinely re-encode LARGER in bytes, so the
dimension-correct result was discarded and the image left oversized -> the
provider re-rejected on retry and the session wedged forever.
Fix: track which constraint triggered the shrink (bytes vs dimension) and gate
the accept on the SAME axis.
* dimension path: accept the result as long as it is now within max_dimension,
regardless of byte size (verify via Pillow; fall back to the byte gate only
when the re-encode can't be decoded).
* bytes path: still require bytes to shrink, but ALSO re-check the per-side cap
when it's active — _resize_image_for_vision returns a best-effort, possibly
over-cap blob when it exhausts its halving budget on a very-high-aspect
image, so a byte-shrink alone can leave it over the dimension cap and
re-brick on retry.
Extend the unshrinkable-oversized guard to the pixel axis so a partial shrink
doesn't burn the one-shot retry.
Single shared agent path -> fixes CLI, TUI, and gateway alike.
Adds a real-Pillow runnable proof (repro_48013_image_shrink_brick.py) that
reproduces the issue's per-image table (bricks 3/5 before, passes 5/5 after)
plus unit invariants for the dimension and bytes accept/reject paths,
partial-progress accounting, and the bytes-path still-over-cap regression
surfaced by adversarial review.
Closes #48013
This commit is contained in:
parent
ac00e73688
commit
990273d90a
3 changed files with 496 additions and 39 deletions
|
|
@ -712,33 +712,58 @@ def try_shrink_image_parts_in_messages(
|
|||
# actually brought under the target.
|
||||
unshrinkable_oversized = 0
|
||||
|
||||
def _shrink_data_url(url: str) -> Optional[str]:
|
||||
"""Return a smaller data URL, or None if shrink can't help."""
|
||||
if not isinstance(url, str) or not url.startswith("data:"):
|
||||
def _decode_pixels(data_url: str) -> Optional[tuple]:
|
||||
"""Return ``(width, height)`` of a base64 data URL, or None on failure.
|
||||
|
||||
Soft-depends on Pillow; returns None (caller falls back to a
|
||||
bytes-only check) if Pillow is missing or the payload is corrupt.
|
||||
"""
|
||||
try:
|
||||
import base64 as _b64_dim
|
||||
import io as _io_dim
|
||||
header_d, _, data_d = data_url.partition(",")
|
||||
if not data_d or not data_url.startswith("data:"):
|
||||
return None
|
||||
from PIL import Image as _PILImage
|
||||
with _PILImage.open(_io_dim.BytesIO(_b64_dim.b64decode(data_d))) as _img:
|
||||
return _img.size
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# Check both byte size AND pixel dimensions.
|
||||
def _shrink_data_url(url: str) -> tuple:
|
||||
"""Return ``(resized_url, unshrinkable)`` for a data URL.
|
||||
|
||||
``resized_url`` is a smaller/dimension-correct data URL, or None when
|
||||
no rewrite was applied. ``unshrinkable`` is True only when the image
|
||||
exceeded a constraint (byte-size or dimensions) and the resize failed
|
||||
to satisfy *that same* constraint — so the caller knows retrying is
|
||||
pointless even if a different image in the request shrank.
|
||||
"""
|
||||
if not isinstance(url, str) or not url.startswith("data:"):
|
||||
return None, False
|
||||
|
||||
# Determine which constraint is binding. The accept/reject gate below
|
||||
# MUST be checked against the same axis that triggered the shrink: a
|
||||
# downscaled screenshot PNG routinely re-encodes to *more* bytes than
|
||||
# the original (PNG compression is non-monotonic in image size — a
|
||||
# smaller raster with LANCZOS resampling noise compresses worse than a
|
||||
# larger smooth one). Rejecting a pixel-correct downscale purely
|
||||
# because its bytes grew permanently wedges sessions on the Anthropic
|
||||
# many-image 2000px path (#48013).
|
||||
needs_shrink = len(url) > target_bytes # over byte budget
|
||||
triggered_by = "bytes" if needs_shrink else None
|
||||
if not needs_shrink:
|
||||
# Even if bytes are fine, check pixel dimensions against the
|
||||
# provider's reported per-side cap. A screenshot can be tiny in
|
||||
# bytes yet too large in pixels.
|
||||
try:
|
||||
import base64 as _b64_dim
|
||||
header_d, _, data_d = url.partition(",")
|
||||
if not data_d:
|
||||
return None
|
||||
raw_d = _b64_dim.b64decode(data_d)
|
||||
from PIL import Image as _PILImage
|
||||
import io as _io_dim
|
||||
with _PILImage.open(_io_dim.BytesIO(raw_d)) as _img:
|
||||
if max(_img.size) <= max_dimension:
|
||||
return None # both bytes and pixels are fine
|
||||
needs_shrink = True # pixels exceed limit, force shrink
|
||||
except Exception:
|
||||
# If we can't check dimensions (Pillow unavailable, corrupt
|
||||
# image, etc.), fall back to byte-only check.
|
||||
return None
|
||||
# Bytes are fine — check pixel dimensions against the provider's
|
||||
# reported per-side cap. A screenshot can be tiny in bytes yet
|
||||
# too large in pixels.
|
||||
dims = _decode_pixels(url)
|
||||
if dims is None:
|
||||
# Pillow missing or corrupt data — fall back to byte-only.
|
||||
return None, False
|
||||
if max(dims) <= max_dimension:
|
||||
return None, False # both bytes and pixels are within limits
|
||||
needs_shrink = True
|
||||
triggered_by = "dimension"
|
||||
|
||||
try:
|
||||
header, _, data = url.partition(",")
|
||||
|
|
@ -770,13 +795,45 @@ def try_shrink_image_parts_in_messages(
|
|||
Path(tmp.name).unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
if not resized or len(resized) >= len(url):
|
||||
# Shrink didn't help (or made it bigger — corrupt input?).
|
||||
return None
|
||||
return resized
|
||||
if not resized:
|
||||
# Resize returned nothing — Pillow couldn't help.
|
||||
return None, True
|
||||
if triggered_by == "bytes":
|
||||
# Byte budget is the binding constraint — bytes must shrink.
|
||||
if len(resized) >= len(url):
|
||||
return None, True # re-encode made it bigger
|
||||
# The per-side dimension cap is ALSO an active provider
|
||||
# constraint on this request (the caller passes the parsed cap
|
||||
# to both this helper and the resizer). _resize_image_for_vision
|
||||
# returns a best-effort, possibly-over-cap blob when it
|
||||
# exhausts its halving budget — it freezes the long side once
|
||||
# the short side hits its 64px floor, so a very-high-aspect
|
||||
# image can stay over the cap even after bytes shrank. If the
|
||||
# output is still over the cap, retrying would re-400 on
|
||||
# dimensions; treat it as unshrinkable. (Skip when dims can't
|
||||
# be decoded — preserves historical byte-only behaviour.)
|
||||
new_dims = _decode_pixels(resized)
|
||||
if new_dims is not None and max(new_dims) > max_dimension:
|
||||
return None, True
|
||||
return resized, False
|
||||
# triggered_by == "dimension": the per-side cap is binding. The
|
||||
# re-encode may have grown in bytes; accept it as long as it is now
|
||||
# within the dimension cap. Verify the new dimensions when we can.
|
||||
new_dims = _decode_pixels(resized)
|
||||
if new_dims is not None:
|
||||
if max(new_dims) <= max_dimension:
|
||||
return resized, False
|
||||
# Still over the per-side cap — the resize didn't satisfy it.
|
||||
return None, True
|
||||
# Couldn't verify the re-encode's dimensions (corrupt output or
|
||||
# Pillow gone mid-call). Fall back to the historical "bytes must
|
||||
# shrink" gate so we never accept an unverifiable, byte-larger blob.
|
||||
if len(resized) >= len(url):
|
||||
return None, True
|
||||
return resized, False
|
||||
except Exception as exc:
|
||||
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
|
||||
return None
|
||||
return None, triggered_by is not None
|
||||
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict):
|
||||
|
|
@ -795,20 +852,18 @@ def try_shrink_image_parts_in_messages(
|
|||
# OpenAI Responses: {"image_url": "data:..."}
|
||||
if isinstance(image_value, dict):
|
||||
url = image_value.get("url", "")
|
||||
resized = _shrink_data_url(url)
|
||||
resized, unshrinkable = _shrink_data_url(url)
|
||||
if resized:
|
||||
image_value["url"] = resized
|
||||
changed_count += 1
|
||||
elif isinstance(url, str) and url.startswith("data:") \
|
||||
and len(url) > target_bytes:
|
||||
elif unshrinkable:
|
||||
unshrinkable_oversized += 1
|
||||
elif isinstance(image_value, str):
|
||||
resized = _shrink_data_url(image_value)
|
||||
resized, unshrinkable = _shrink_data_url(image_value)
|
||||
if resized:
|
||||
part["image_url"] = resized
|
||||
changed_count += 1
|
||||
elif image_value.startswith("data:") \
|
||||
and len(image_value) > target_bytes:
|
||||
elif unshrinkable:
|
||||
unshrinkable_oversized += 1
|
||||
|
||||
if changed_count:
|
||||
|
|
|
|||
179
tests/run_agent/repro_48013_image_shrink_brick.py
Normal file
179
tests/run_agent/repro_48013_image_shrink_brick.py
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Runnable proof for issue #48013 — image-dimension 400 session brick.
|
||||
|
||||
Before the fix, ``agent.conversation_compression.try_shrink_image_parts_in_messages``
|
||||
silently discarded a *pixel-correct* downscale whenever the re-encoded PNG was
|
||||
larger in bytes than the original (the common case for downscaled Retina
|
||||
screenshots). The image was left at its original oversized dimensions, the
|
||||
provider re-rejected it on retry, and the session wedged forever on the
|
||||
Anthropic many-image 2000px path.
|
||||
|
||||
This script reproduces the exact scenario with REAL Pillow (no mocks): it
|
||||
synthesizes screenshot-like PNGs at the dimensions from the issue's table —
|
||||
images that are small in bytes (under the 4 MB budget) but over the 2000px
|
||||
per-side cap — and runs the real recovery helper. It asserts every image is
|
||||
brought under the cap and that the helper reports success.
|
||||
|
||||
Run directly to see a human-readable report:
|
||||
|
||||
python tests/run_agent/repro_48013_image_shrink_brick.py
|
||||
|
||||
Or as a pytest smoke test (skipped automatically when Pillow is absent):
|
||||
|
||||
scripts/run_tests.sh tests/run_agent/repro_48013_image_shrink_brick.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# Make the repo root importable when run as a plain script.
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(_REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_REPO_ROOT))
|
||||
|
||||
PIL = pytest.importorskip("PIL", reason="Pillow required for the real-resize proof")
|
||||
from PIL import Image, ImageDraw # noqa: E402
|
||||
|
||||
from agent.conversation_compression import ( # noqa: E402
|
||||
try_shrink_image_parts_in_messages,
|
||||
)
|
||||
|
||||
# The many-image per-side cap Anthropic reported in the wild (issue #48013).
|
||||
MANY_IMAGE_CAP = 2000
|
||||
BYTE_BUDGET = 4 * 1024 * 1024
|
||||
|
||||
# Dimensions straight from the issue's per-image table. The "REJECTED" rows
|
||||
# are the ones that bricked: tall/large screenshots whose downscale re-encodes
|
||||
# to MORE PNG bytes than the original.
|
||||
CASES = [
|
||||
(2344, 778), # wide — shrank even before the fix
|
||||
(2374, 1144), # wide — shrank even before the fix
|
||||
(2097, 1476), # REJECTED before fix
|
||||
(2247, 1544), # REJECTED before fix
|
||||
(2263, 1644), # REJECTED before fix
|
||||
]
|
||||
|
||||
|
||||
def _make_screenshot_png(width: int, height: int) -> bytes:
|
||||
"""A screenshot-like PNG: mostly flat UI regions so it compresses small.
|
||||
|
||||
Flat regions keep the byte size well under the 4 MB budget, forcing the
|
||||
DIMENSION path (not the byte path) — exactly the code that bricked. The
|
||||
downscale of such an image re-encodes to a comparable-or-larger PNG, which
|
||||
is what the old byte gate wrongly rejected.
|
||||
"""
|
||||
img = Image.new("RGB", (width, height), (245, 245, 247))
|
||||
draw = ImageDraw.Draw(img)
|
||||
for y in range(0, height, 40):
|
||||
shade = 255 - (y // 40) % 6 * 4
|
||||
draw.rectangle([20, y + 5, width - 20, y + 30], fill=(shade, 250, 250))
|
||||
for x in range(0, width, 160):
|
||||
draw.rectangle([x, 0, x + 2, height], fill=(220, 220, 225))
|
||||
draw.text((40, 40), "Some UI text " * 30, fill=(20, 20, 20))
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="PNG", optimize=False)
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def _data_url(raw: bytes) -> str:
|
||||
return "data:image/png;base64," + base64.b64encode(raw).decode("ascii")
|
||||
|
||||
|
||||
def _decode_dims(data_url: str) -> tuple[int, int]:
|
||||
payload = data_url.partition(",")[2]
|
||||
with Image.open(io.BytesIO(base64.b64decode(payload))) as img:
|
||||
return img.size
|
||||
|
||||
|
||||
def run_proof(verbose: bool = False) -> list[dict]:
|
||||
"""Run the recovery against every case; return per-case results."""
|
||||
results: list[dict] = []
|
||||
for width, height in CASES:
|
||||
raw = _make_screenshot_png(width, height)
|
||||
url = _data_url(raw)
|
||||
# Sanity: this case must be UNDER the byte budget and OVER the pixel cap,
|
||||
# i.e. it exercises the dimension path that bricked.
|
||||
under_byte_budget = len(url) <= BYTE_BUDGET
|
||||
over_pixel_cap = max(width, height) > MANY_IMAGE_CAP
|
||||
|
||||
msgs = [{
|
||||
"role": "user",
|
||||
"content": [{"type": "image_url", "image_url": {"url": url}}],
|
||||
}]
|
||||
changed = try_shrink_image_parts_in_messages(
|
||||
msgs, max_dimension=MANY_IMAGE_CAP,
|
||||
)
|
||||
out_url = msgs[0]["content"][0]["image_url"]["url"]
|
||||
out_dims = _decode_dims(out_url)
|
||||
|
||||
result = {
|
||||
"orig": (width, height),
|
||||
"orig_bytes": len(raw),
|
||||
"under_byte_budget": under_byte_budget,
|
||||
"over_pixel_cap": over_pixel_cap,
|
||||
"changed": changed,
|
||||
"result_dims": out_dims,
|
||||
"under_cap_after": max(out_dims) <= MANY_IMAGE_CAP,
|
||||
}
|
||||
results.append(result)
|
||||
if verbose:
|
||||
status = "OK" if result["under_cap_after"] else "BRICK"
|
||||
print(
|
||||
f" {width}x{height} ({len(raw)//1024:>3} KB)"
|
||||
f" -> changed={changed!s:>5}"
|
||||
f" result={out_dims[0]}x{out_dims[1]}"
|
||||
f" [{status}]"
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def test_issue_48013_dimension_shrink_does_not_brick():
|
||||
"""Every dimension-oversized screenshot must be brought under the cap."""
|
||||
results = run_proof()
|
||||
assert results, "no cases ran"
|
||||
for r in results:
|
||||
# Precondition: we really are on the dimension path.
|
||||
assert r["under_byte_budget"], (
|
||||
f"{r['orig']} must be under the byte budget to exercise the bug"
|
||||
)
|
||||
assert r["over_pixel_cap"], f"{r['orig']} must exceed the pixel cap"
|
||||
# The fix: image lands under the cap and the helper reports success.
|
||||
assert r["under_cap_after"], (
|
||||
f"BRICK: {r['orig']} left at {r['result_dims']} "
|
||||
f"(> {MANY_IMAGE_CAP}px) — the shrink recovery discarded a "
|
||||
f"pixel-correct downscale (#48013)"
|
||||
)
|
||||
assert r["changed"] is True, (
|
||||
f"{r['orig']} shrank but helper reported no progress — caller "
|
||||
f"would surface the original error and burn the one-shot retry"
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print("Issue #48013 proof — image-dimension shrink must not brick sessions")
|
||||
print(f"(many-image per-side cap = {MANY_IMAGE_CAP}px, byte budget = "
|
||||
f"{BYTE_BUDGET // (1024 * 1024)} MB)\n")
|
||||
results = run_proof(verbose=True)
|
||||
bricked = [r for r in results if not r["under_cap_after"]]
|
||||
no_progress = [r for r in results if r["under_cap_after"] and not r["changed"]]
|
||||
print()
|
||||
if bricked:
|
||||
print(f"FAIL: {len(bricked)} image(s) still over the pixel cap (BRICK).")
|
||||
return 1
|
||||
if no_progress:
|
||||
print(f"FAIL: {len(no_progress)} image(s) shrank but helper reported "
|
||||
f"no progress (would burn the retry).")
|
||||
return 1
|
||||
print(f"PASS: all {len(results)} dimension-oversized screenshots brought "
|
||||
f"under {MANY_IMAGE_CAP}px and reported as progress.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
|
@ -108,11 +108,36 @@ def _big_png_data_url(size_kb: int) -> str:
|
|||
return "data:image/png;base64," + base64.b64encode(raw).decode("ascii")
|
||||
|
||||
|
||||
def _install_fake_pillow(monkeypatch, size: tuple[int, int]) -> None:
|
||||
"""Install the tiny subset of Pillow used by the shrink preflight."""
|
||||
def _install_fake_pillow(
|
||||
monkeypatch,
|
||||
size: tuple[int, int],
|
||||
*,
|
||||
shrunk_size: tuple[int, int] | None = None,
|
||||
sizes: list[tuple[int, int]] | None = None,
|
||||
) -> None:
|
||||
"""Install the tiny subset of Pillow used by the shrink preflight.
|
||||
|
||||
The shrink helper decodes pixel dimensions twice for the dimension path:
|
||||
once on the *original* data URL (to decide it's oversized) and once on the
|
||||
*re-encoded* result (to confirm the downscale landed under the cap). To
|
||||
model that honestly, ``_FakeImage`` can return a sequence of sizes across
|
||||
successive ``open()`` calls:
|
||||
|
||||
* ``sizes=[...]`` — explicit per-call size list (clamped to last).
|
||||
* ``shrunk_size=(w, h)`` — shorthand for ``[size, shrunk_size]``: first
|
||||
decode is the oversized original, second is the in-cap re-encode.
|
||||
* neither — every decode returns ``size`` (legacy behaviour).
|
||||
"""
|
||||
call_count = {"n": 0}
|
||||
target_sizes = sizes or [
|
||||
size,
|
||||
shrunk_size if shrunk_size is not None else size,
|
||||
]
|
||||
|
||||
class _FakeImage:
|
||||
def __init__(self):
|
||||
self.size = size
|
||||
self.size = target_sizes[min(call_count["n"], len(target_sizes) - 1)]
|
||||
call_count["n"] += 1
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
|
@ -203,9 +228,10 @@ class TestShrinkImagePartsHelper:
|
|||
assert msgs[0]["content"][1]["image_url"]["url"] == shrunk
|
||||
|
||||
def test_many_image_dimension_limit_rewritten(self, monkeypatch):
|
||||
"""A 2000px many-image rejection must shrink images below 8000px."""
|
||||
"""A 2000px many-image rejection must shrink images below the cap."""
|
||||
agent = _make_agent()
|
||||
_install_fake_pillow(monkeypatch, (2501, 100))
|
||||
# Original decodes oversized (2501px); the re-encode decodes in-cap.
|
||||
_install_fake_pillow(monkeypatch, (2501, 100), shrunk_size=(1500, 60))
|
||||
oversized_for_many = _big_png_data_url(100)
|
||||
shrunk = "data:image/jpeg;base64," + "M" * 1000
|
||||
seen = {}
|
||||
|
|
@ -392,3 +418,200 @@ class TestShrinkImagePartsHelper:
|
|||
assert msgs[0]["content"][0]["image_url"]["url"] == small
|
||||
# The unshrinkable one is left as-is (caller surfaces original error).
|
||||
assert msgs[0]["content"][1]["image_url"]["url"] == unshrinkable
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# #48013: the dimension path must accept a pixel-correct downscale even
|
||||
# when the re-encoded PNG grew in bytes. Before the fix, the byte gate
|
||||
# (`len(resized) >= len(url)`) discarded the dimension-correct result and
|
||||
# left the image oversized, bricking the session on the Anthropic
|
||||
# many-image 2000px path.
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def test_dimension_shrink_with_byte_growth_accepted(self, monkeypatch):
|
||||
"""A dimension-driven shrink is accepted even if its bytes grow.
|
||||
|
||||
Regression for #48013. The original (2501px, under the 4 MB byte
|
||||
budget) is oversized on pixels only. The re-encode lands at 1500px
|
||||
(in-cap) but is *larger in bytes* — the historical byte gate would
|
||||
reject it. The fix keys the accept gate on the binding constraint
|
||||
(dimensions), so the pixel-correct result is kept.
|
||||
"""
|
||||
agent = _make_agent()
|
||||
_install_fake_pillow(monkeypatch, (2501, 100), shrunk_size=(1500, 60))
|
||||
original_url = _big_png_data_url(100) # ~100 KB → well under 4 MB
|
||||
# A *byte-larger* re-encode (the brick trigger): 200 KB payload.
|
||||
dimensionally_shrunk = "data:image/png;base64," + "G" * 200 * 1024
|
||||
seen = {}
|
||||
|
||||
def _fake_resize(path, mime_type=None, max_base64_bytes=None, max_dimension=None):
|
||||
seen["max_dimension"] = max_dimension
|
||||
return dimensionally_shrunk
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.vision_tools._resize_image_for_vision",
|
||||
_fake_resize,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
msgs = [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": original_url}},
|
||||
],
|
||||
}]
|
||||
# The re-encode is byte-LARGER than the original — proves the byte gate
|
||||
# is no longer the rejection driver on the dimension path.
|
||||
assert len(dimensionally_shrunk) > len(original_url)
|
||||
assert agent._try_shrink_image_parts_in_messages(
|
||||
msgs, max_dimension=2000,
|
||||
) is True
|
||||
assert seen["max_dimension"] == 2000
|
||||
assert msgs[0]["content"][0]["image_url"]["url"] == dimensionally_shrunk
|
||||
|
||||
def test_dimension_shrink_failure_still_blocks_retry(self, monkeypatch):
|
||||
"""A dimension-oversized image that stays oversized is unshrinkable.
|
||||
|
||||
If the re-encode is *still* over the per-side cap, the helper must
|
||||
report no progress (return False) so the one-shot retry isn't burned
|
||||
re-sending a payload the provider already rejected.
|
||||
"""
|
||||
agent = _make_agent()
|
||||
# Both decodes report oversized: original and re-encode are 2501px.
|
||||
_install_fake_pillow(monkeypatch, (2501, 100))
|
||||
original_url = _big_png_data_url(100)
|
||||
still_oversized = "data:image/png;base64," + "H" * 120 * 1024
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.vision_tools._resize_image_for_vision",
|
||||
lambda *a, **kw: still_oversized,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
msgs = [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": original_url}},
|
||||
],
|
||||
}]
|
||||
assert agent._try_shrink_image_parts_in_messages(
|
||||
msgs, max_dimension=2000,
|
||||
) is False
|
||||
# Original left untouched — caller surfaces the provider's 400.
|
||||
assert msgs[0]["content"][0]["image_url"]["url"] == original_url
|
||||
|
||||
def test_mixed_dimension_partial_progress_returns_false(self, monkeypatch):
|
||||
"""Partial dimension-path progress must not falsely burn the retry.
|
||||
|
||||
Two dimension-oversized images: the first re-encodes in-cap, the
|
||||
second stays oversized. Even though one part changed, an oversized
|
||||
image survives, so retrying would 400 again — the helper must report
|
||||
False. (Mirrors the byte-path
|
||||
``test_mixed_one_shrinkable_one_not_returns_false`` invariant for the
|
||||
pixel axis.)
|
||||
"""
|
||||
agent = _make_agent()
|
||||
# Decode order: img1 orig (2501) -> img1 re-encode (1500, in-cap) ->
|
||||
# img2 orig (2501) -> img2 re-encode (2501, still over).
|
||||
_install_fake_pillow(
|
||||
monkeypatch,
|
||||
(2501, 100),
|
||||
sizes=[(2501, 100), (1500, 60), (2501, 100), (2501, 100)],
|
||||
)
|
||||
first = _big_png_data_url(100)
|
||||
second = _big_png_data_url(90)
|
||||
calls = {"n": 0}
|
||||
|
||||
def _fake_resize(path, mime_type=None, max_base64_bytes=None, max_dimension=None):
|
||||
calls["n"] += 1
|
||||
if calls["n"] == 1:
|
||||
return "data:image/png;base64," + "G" * 200 * 1024 # in-cap
|
||||
return "data:image/png;base64," + "H" * 120 * 1024 # still over
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.vision_tools._resize_image_for_vision",
|
||||
_fake_resize,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
msgs = [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": first}},
|
||||
{"type": "image_url", "image_url": {"url": second}},
|
||||
],
|
||||
}]
|
||||
assert agent._try_shrink_image_parts_in_messages(
|
||||
msgs, max_dimension=2000,
|
||||
) is False
|
||||
|
||||
def test_byte_oversized_but_pixel_oversized_after_shrink_blocks_retry(self, monkeypatch):
|
||||
"""Bytes-triggered shrink must ALSO honour the active per-side cap.
|
||||
|
||||
Adversarial-review regression (#48013, round 2): an image over BOTH the
|
||||
4 MB byte budget AND the per-side pixel cap can be byte-shrunk yet stay
|
||||
over the cap (``_resize_image_for_vision`` returns a best-effort blob
|
||||
when it exhausts its halving budget on a very-high-aspect image). The
|
||||
byte-path accept gate originally checked only ``len(resized) < len(url)``
|
||||
and reported success, so the caller retried and the provider re-rejected
|
||||
on dimensions — re-bricking the session. The fix re-checks the pixel
|
||||
cap on the byte path too; a still-over-cap result must be unshrinkable.
|
||||
"""
|
||||
agent = _make_agent()
|
||||
# On the BYTE path, _decode_pixels is called once — on the RESIZED blob.
|
||||
# Script that single decode to report still-over-cap dims (2560 > 2000).
|
||||
_install_fake_pillow(monkeypatch, (2560, 64), sizes=[(2560, 64)])
|
||||
# Over the 4 MB byte budget so the BYTE path is taken (triggered_by="bytes").
|
||||
oversized_url = _big_png_data_url(5000) # ~5 MB raw → ~6.7 MB b64
|
||||
# Byte-SMALLER re-encode, but its decoded dims are still over the cap.
|
||||
byte_smaller_still_over = "data:image/png;base64," + "K" * 1000
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.vision_tools._resize_image_for_vision",
|
||||
lambda *a, **kw: byte_smaller_still_over,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
msgs = [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": oversized_url}},
|
||||
],
|
||||
}]
|
||||
# Bytes shrank, but the per-side cap is still violated → no real
|
||||
# progress; the helper must NOT report success (would burn the retry).
|
||||
assert len(byte_smaller_still_over) < len(oversized_url)
|
||||
assert agent._try_shrink_image_parts_in_messages(
|
||||
msgs, max_dimension=2000,
|
||||
) is False
|
||||
# Original left in place — caller surfaces the provider's 400.
|
||||
assert msgs[0]["content"][0]["image_url"]["url"] == oversized_url
|
||||
|
||||
def test_byte_oversized_with_no_dim_cap_accepts_byte_shrink(self, monkeypatch):
|
||||
"""Bytes path with the default 8000px cap still accepts a byte shrink.
|
||||
|
||||
Guards the fix above against over-reach: when no tight dimension cap is
|
||||
active (default 8000px) and the byte-shrunk re-encode is comfortably
|
||||
within it, the byte path must keep accepting on byte-shrinkage alone.
|
||||
"""
|
||||
agent = _make_agent()
|
||||
# Byte path → single _decode_pixels call on the resized blob; report
|
||||
# in-cap dims so the byte-shrink is accepted under the default 8000 cap.
|
||||
_install_fake_pillow(monkeypatch, (1250, 50), sizes=[(1250, 50)])
|
||||
oversized_url = _big_png_data_url(5000)
|
||||
shrunk = "data:image/jpeg;base64," + "L" * 1000
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.vision_tools._resize_image_for_vision",
|
||||
lambda *a, **kw: shrunk,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
msgs = [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": oversized_url}},
|
||||
],
|
||||
}]
|
||||
# Default cap (8000) — no explicit max_dimension passed.
|
||||
assert agent._try_shrink_image_parts_in_messages(msgs) is True
|
||||
assert msgs[0]["content"][0]["image_url"]["url"] == shrunk
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue