fix(agent): accept pixel-correct image downscale when bytes grow (#48013)

The image-too-large reactive shrink (try_shrink_image_parts_in_messages)
conflated two independent constraints: it always rejected a resize whose
re-encoded bytes were >= the original, even when the shrink was driven by a
PIXEL-DIMENSION cap (Anthropic many-image 2000px) rather than the byte budget.
Downscaled screenshot PNGs routinely re-encode LARGER in bytes, so the
dimension-correct result was discarded and the image left oversized -> the
provider re-rejected on retry and the session wedged forever.

Fix: track which constraint triggered the shrink (bytes vs dimension) and gate
the accept on the SAME axis.
  * dimension path: accept the result as long as it is now within max_dimension,
    regardless of byte size (verify via Pillow; fall back to the byte gate only
    when the re-encode can't be decoded).
  * bytes path: still require bytes to shrink, but ALSO re-check the per-side cap
    when it's active — _resize_image_for_vision returns a best-effort, possibly
    over-cap blob when it exhausts its halving budget on a very-high-aspect
    image, so a byte-shrink alone can leave it over the dimension cap and
    re-brick on retry.
Extend the unshrinkable-oversized guard to the pixel axis so a partial shrink
doesn't burn the one-shot retry.

Single shared agent path -> fixes CLI, TUI, and gateway alike.

Adds a real-Pillow runnable proof (repro_48013_image_shrink_brick.py) that
reproduces the issue's per-image table (bricks 3/5 before, passes 5/5 after)
plus unit invariants for the dimension and bytes accept/reject paths,
partial-progress accounting, and the bytes-path still-over-cap regression
surfaced by adversarial review.

Closes #48013
This commit is contained in:
alt-glitch 2026-06-19 20:11:12 +05:30 committed by Teknium
parent ac00e73688
commit 990273d90a
3 changed files with 496 additions and 39 deletions

View file

@ -712,33 +712,58 @@ def try_shrink_image_parts_in_messages(
# actually brought under the target.
unshrinkable_oversized = 0
def _shrink_data_url(url: str) -> Optional[str]:
"""Return a smaller data URL, or None if shrink can't help."""
if not isinstance(url, str) or not url.startswith("data:"):
def _decode_pixels(data_url: str) -> Optional[tuple]:
"""Return ``(width, height)`` of a base64 data URL, or None on failure.
Soft-depends on Pillow; returns None (caller falls back to a
bytes-only check) if Pillow is missing or the payload is corrupt.
"""
try:
import base64 as _b64_dim
import io as _io_dim
header_d, _, data_d = data_url.partition(",")
if not data_d or not data_url.startswith("data:"):
return None
from PIL import Image as _PILImage
with _PILImage.open(_io_dim.BytesIO(_b64_dim.b64decode(data_d))) as _img:
return _img.size
except Exception:
return None
# Check both byte size AND pixel dimensions.
def _shrink_data_url(url: str) -> tuple:
"""Return ``(resized_url, unshrinkable)`` for a data URL.
``resized_url`` is a smaller/dimension-correct data URL, or None when
no rewrite was applied. ``unshrinkable`` is True only when the image
exceeded a constraint (byte-size or dimensions) and the resize failed
to satisfy *that same* constraint so the caller knows retrying is
pointless even if a different image in the request shrank.
"""
if not isinstance(url, str) or not url.startswith("data:"):
return None, False
# Determine which constraint is binding. The accept/reject gate below
# MUST be checked against the same axis that triggered the shrink: a
# downscaled screenshot PNG routinely re-encodes to *more* bytes than
# the original (PNG compression is non-monotonic in image size — a
# smaller raster with LANCZOS resampling noise compresses worse than a
# larger smooth one). Rejecting a pixel-correct downscale purely
# because its bytes grew permanently wedges sessions on the Anthropic
# many-image 2000px path (#48013).
needs_shrink = len(url) > target_bytes # over byte budget
triggered_by = "bytes" if needs_shrink else None
if not needs_shrink:
# Even if bytes are fine, check pixel dimensions against the
# provider's reported per-side cap. A screenshot can be tiny in
# bytes yet too large in pixels.
try:
import base64 as _b64_dim
header_d, _, data_d = url.partition(",")
if not data_d:
return None
raw_d = _b64_dim.b64decode(data_d)
from PIL import Image as _PILImage
import io as _io_dim
with _PILImage.open(_io_dim.BytesIO(raw_d)) as _img:
if max(_img.size) <= max_dimension:
return None # both bytes and pixels are fine
needs_shrink = True # pixels exceed limit, force shrink
except Exception:
# If we can't check dimensions (Pillow unavailable, corrupt
# image, etc.), fall back to byte-only check.
return None
# Bytes are fine — check pixel dimensions against the provider's
# reported per-side cap. A screenshot can be tiny in bytes yet
# too large in pixels.
dims = _decode_pixels(url)
if dims is None:
# Pillow missing or corrupt data — fall back to byte-only.
return None, False
if max(dims) <= max_dimension:
return None, False # both bytes and pixels are within limits
needs_shrink = True
triggered_by = "dimension"
try:
header, _, data = url.partition(",")
@ -770,13 +795,45 @@ def try_shrink_image_parts_in_messages(
Path(tmp.name).unlink(missing_ok=True)
except Exception:
pass
if not resized or len(resized) >= len(url):
# Shrink didn't help (or made it bigger — corrupt input?).
return None
return resized
if not resized:
# Resize returned nothing — Pillow couldn't help.
return None, True
if triggered_by == "bytes":
# Byte budget is the binding constraint — bytes must shrink.
if len(resized) >= len(url):
return None, True # re-encode made it bigger
# The per-side dimension cap is ALSO an active provider
# constraint on this request (the caller passes the parsed cap
# to both this helper and the resizer). _resize_image_for_vision
# returns a best-effort, possibly-over-cap blob when it
# exhausts its halving budget — it freezes the long side once
# the short side hits its 64px floor, so a very-high-aspect
# image can stay over the cap even after bytes shrank. If the
# output is still over the cap, retrying would re-400 on
# dimensions; treat it as unshrinkable. (Skip when dims can't
# be decoded — preserves historical byte-only behaviour.)
new_dims = _decode_pixels(resized)
if new_dims is not None and max(new_dims) > max_dimension:
return None, True
return resized, False
# triggered_by == "dimension": the per-side cap is binding. The
# re-encode may have grown in bytes; accept it as long as it is now
# within the dimension cap. Verify the new dimensions when we can.
new_dims = _decode_pixels(resized)
if new_dims is not None:
if max(new_dims) <= max_dimension:
return resized, False
# Still over the per-side cap — the resize didn't satisfy it.
return None, True
# Couldn't verify the re-encode's dimensions (corrupt output or
# Pillow gone mid-call). Fall back to the historical "bytes must
# shrink" gate so we never accept an unverifiable, byte-larger blob.
if len(resized) >= len(url):
return None, True
return resized, False
except Exception as exc:
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
return None
return None, triggered_by is not None
for msg in api_messages:
if not isinstance(msg, dict):
@ -795,20 +852,18 @@ def try_shrink_image_parts_in_messages(
# OpenAI Responses: {"image_url": "data:..."}
if isinstance(image_value, dict):
url = image_value.get("url", "")
resized = _shrink_data_url(url)
resized, unshrinkable = _shrink_data_url(url)
if resized:
image_value["url"] = resized
changed_count += 1
elif isinstance(url, str) and url.startswith("data:") \
and len(url) > target_bytes:
elif unshrinkable:
unshrinkable_oversized += 1
elif isinstance(image_value, str):
resized = _shrink_data_url(image_value)
resized, unshrinkable = _shrink_data_url(image_value)
if resized:
part["image_url"] = resized
changed_count += 1
elif image_value.startswith("data:") \
and len(image_value) > target_bytes:
elif unshrinkable:
unshrinkable_oversized += 1
if changed_count:

View file

@ -0,0 +1,179 @@
#!/usr/bin/env python3
"""Runnable proof for issue #48013 — image-dimension 400 session brick.
Before the fix, ``agent.conversation_compression.try_shrink_image_parts_in_messages``
silently discarded a *pixel-correct* downscale whenever the re-encoded PNG was
larger in bytes than the original (the common case for downscaled Retina
screenshots). The image was left at its original oversized dimensions, the
provider re-rejected it on retry, and the session wedged forever on the
Anthropic many-image 2000px path.
This script reproduces the exact scenario with REAL Pillow (no mocks): it
synthesizes screenshot-like PNGs at the dimensions from the issue's table —
images that are small in bytes (under the 4 MB budget) but over the 2000px
per-side cap and runs the real recovery helper. It asserts every image is
brought under the cap and that the helper reports success.
Run directly to see a human-readable report:
python tests/run_agent/repro_48013_image_shrink_brick.py
Or as a pytest smoke test (skipped automatically when Pillow is absent):
scripts/run_tests.sh tests/run_agent/repro_48013_image_shrink_brick.py
"""
from __future__ import annotations
import base64
import io
import sys
from pathlib import Path
import pytest
# Make the repo root importable when run as a plain script.
_REPO_ROOT = Path(__file__).resolve().parents[2]
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
PIL = pytest.importorskip("PIL", reason="Pillow required for the real-resize proof")
from PIL import Image, ImageDraw # noqa: E402
from agent.conversation_compression import ( # noqa: E402
try_shrink_image_parts_in_messages,
)
# The many-image per-side cap Anthropic reported in the wild (issue #48013).
MANY_IMAGE_CAP = 2000
BYTE_BUDGET = 4 * 1024 * 1024
# Dimensions straight from the issue's per-image table. The "REJECTED" rows
# are the ones that bricked: tall/large screenshots whose downscale re-encodes
# to MORE PNG bytes than the original.
CASES = [
(2344, 778), # wide — shrank even before the fix
(2374, 1144), # wide — shrank even before the fix
(2097, 1476), # REJECTED before fix
(2247, 1544), # REJECTED before fix
(2263, 1644), # REJECTED before fix
]
def _make_screenshot_png(width: int, height: int) -> bytes:
"""A screenshot-like PNG: mostly flat UI regions so it compresses small.
Flat regions keep the byte size well under the 4 MB budget, forcing the
DIMENSION path (not the byte path) exactly the code that bricked. The
downscale of such an image re-encodes to a comparable-or-larger PNG, which
is what the old byte gate wrongly rejected.
"""
img = Image.new("RGB", (width, height), (245, 245, 247))
draw = ImageDraw.Draw(img)
for y in range(0, height, 40):
shade = 255 - (y // 40) % 6 * 4
draw.rectangle([20, y + 5, width - 20, y + 30], fill=(shade, 250, 250))
for x in range(0, width, 160):
draw.rectangle([x, 0, x + 2, height], fill=(220, 220, 225))
draw.text((40, 40), "Some UI text " * 30, fill=(20, 20, 20))
buf = io.BytesIO()
img.save(buf, format="PNG", optimize=False)
return buf.getvalue()
def _data_url(raw: bytes) -> str:
return "data:image/png;base64," + base64.b64encode(raw).decode("ascii")
def _decode_dims(data_url: str) -> tuple[int, int]:
payload = data_url.partition(",")[2]
with Image.open(io.BytesIO(base64.b64decode(payload))) as img:
return img.size
def run_proof(verbose: bool = False) -> list[dict]:
"""Run the recovery against every case; return per-case results."""
results: list[dict] = []
for width, height in CASES:
raw = _make_screenshot_png(width, height)
url = _data_url(raw)
# Sanity: this case must be UNDER the byte budget and OVER the pixel cap,
# i.e. it exercises the dimension path that bricked.
under_byte_budget = len(url) <= BYTE_BUDGET
over_pixel_cap = max(width, height) > MANY_IMAGE_CAP
msgs = [{
"role": "user",
"content": [{"type": "image_url", "image_url": {"url": url}}],
}]
changed = try_shrink_image_parts_in_messages(
msgs, max_dimension=MANY_IMAGE_CAP,
)
out_url = msgs[0]["content"][0]["image_url"]["url"]
out_dims = _decode_dims(out_url)
result = {
"orig": (width, height),
"orig_bytes": len(raw),
"under_byte_budget": under_byte_budget,
"over_pixel_cap": over_pixel_cap,
"changed": changed,
"result_dims": out_dims,
"under_cap_after": max(out_dims) <= MANY_IMAGE_CAP,
}
results.append(result)
if verbose:
status = "OK" if result["under_cap_after"] else "BRICK"
print(
f" {width}x{height} ({len(raw)//1024:>3} KB)"
f" -> changed={changed!s:>5}"
f" result={out_dims[0]}x{out_dims[1]}"
f" [{status}]"
)
return results
def test_issue_48013_dimension_shrink_does_not_brick():
"""Every dimension-oversized screenshot must be brought under the cap."""
results = run_proof()
assert results, "no cases ran"
for r in results:
# Precondition: we really are on the dimension path.
assert r["under_byte_budget"], (
f"{r['orig']} must be under the byte budget to exercise the bug"
)
assert r["over_pixel_cap"], f"{r['orig']} must exceed the pixel cap"
# The fix: image lands under the cap and the helper reports success.
assert r["under_cap_after"], (
f"BRICK: {r['orig']} left at {r['result_dims']} "
f"(> {MANY_IMAGE_CAP}px) — the shrink recovery discarded a "
f"pixel-correct downscale (#48013)"
)
assert r["changed"] is True, (
f"{r['orig']} shrank but helper reported no progress — caller "
f"would surface the original error and burn the one-shot retry"
)
def main() -> int:
print("Issue #48013 proof — image-dimension shrink must not brick sessions")
print(f"(many-image per-side cap = {MANY_IMAGE_CAP}px, byte budget = "
f"{BYTE_BUDGET // (1024 * 1024)} MB)\n")
results = run_proof(verbose=True)
bricked = [r for r in results if not r["under_cap_after"]]
no_progress = [r for r in results if r["under_cap_after"] and not r["changed"]]
print()
if bricked:
print(f"FAIL: {len(bricked)} image(s) still over the pixel cap (BRICK).")
return 1
if no_progress:
print(f"FAIL: {len(no_progress)} image(s) shrank but helper reported "
f"no progress (would burn the retry).")
return 1
print(f"PASS: all {len(results)} dimension-oversized screenshots brought "
f"under {MANY_IMAGE_CAP}px and reported as progress.")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -108,11 +108,36 @@ def _big_png_data_url(size_kb: int) -> str:
return "data:image/png;base64," + base64.b64encode(raw).decode("ascii")
def _install_fake_pillow(monkeypatch, size: tuple[int, int]) -> None:
"""Install the tiny subset of Pillow used by the shrink preflight."""
def _install_fake_pillow(
monkeypatch,
size: tuple[int, int],
*,
shrunk_size: tuple[int, int] | None = None,
sizes: list[tuple[int, int]] | None = None,
) -> None:
"""Install the tiny subset of Pillow used by the shrink preflight.
The shrink helper decodes pixel dimensions twice for the dimension path:
once on the *original* data URL (to decide it's oversized) and once on the
*re-encoded* result (to confirm the downscale landed under the cap). To
model that honestly, ``_FakeImage`` can return a sequence of sizes across
successive ``open()`` calls:
* ``sizes=[...]`` explicit per-call size list (clamped to last).
* ``shrunk_size=(w, h)`` shorthand for ``[size, shrunk_size]``: first
decode is the oversized original, second is the in-cap re-encode.
* neither every decode returns ``size`` (legacy behaviour).
"""
call_count = {"n": 0}
target_sizes = sizes or [
size,
shrunk_size if shrunk_size is not None else size,
]
class _FakeImage:
def __init__(self):
self.size = size
self.size = target_sizes[min(call_count["n"], len(target_sizes) - 1)]
call_count["n"] += 1
def __enter__(self):
return self
@ -203,9 +228,10 @@ class TestShrinkImagePartsHelper:
assert msgs[0]["content"][1]["image_url"]["url"] == shrunk
def test_many_image_dimension_limit_rewritten(self, monkeypatch):
"""A 2000px many-image rejection must shrink images below 8000px."""
"""A 2000px many-image rejection must shrink images below the cap."""
agent = _make_agent()
_install_fake_pillow(monkeypatch, (2501, 100))
# Original decodes oversized (2501px); the re-encode decodes in-cap.
_install_fake_pillow(monkeypatch, (2501, 100), shrunk_size=(1500, 60))
oversized_for_many = _big_png_data_url(100)
shrunk = "data:image/jpeg;base64," + "M" * 1000
seen = {}
@ -392,3 +418,200 @@ class TestShrinkImagePartsHelper:
assert msgs[0]["content"][0]["image_url"]["url"] == small
# The unshrinkable one is left as-is (caller surfaces original error).
assert msgs[0]["content"][1]["image_url"]["url"] == unshrinkable
# ------------------------------------------------------------------
# #48013: the dimension path must accept a pixel-correct downscale even
# when the re-encoded PNG grew in bytes. Before the fix, the byte gate
# (`len(resized) >= len(url)`) discarded the dimension-correct result and
# left the image oversized, bricking the session on the Anthropic
# many-image 2000px path.
# ------------------------------------------------------------------
def test_dimension_shrink_with_byte_growth_accepted(self, monkeypatch):
"""A dimension-driven shrink is accepted even if its bytes grow.
Regression for #48013. The original (2501px, under the 4 MB byte
budget) is oversized on pixels only. The re-encode lands at 1500px
(in-cap) but is *larger in bytes* the historical byte gate would
reject it. The fix keys the accept gate on the binding constraint
(dimensions), so the pixel-correct result is kept.
"""
agent = _make_agent()
_install_fake_pillow(monkeypatch, (2501, 100), shrunk_size=(1500, 60))
original_url = _big_png_data_url(100) # ~100 KB → well under 4 MB
# A *byte-larger* re-encode (the brick trigger): 200 KB payload.
dimensionally_shrunk = "data:image/png;base64," + "G" * 200 * 1024
seen = {}
def _fake_resize(path, mime_type=None, max_base64_bytes=None, max_dimension=None):
seen["max_dimension"] = max_dimension
return dimensionally_shrunk
monkeypatch.setattr(
"tools.vision_tools._resize_image_for_vision",
_fake_resize,
raising=False,
)
msgs = [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": original_url}},
],
}]
# The re-encode is byte-LARGER than the original — proves the byte gate
# is no longer the rejection driver on the dimension path.
assert len(dimensionally_shrunk) > len(original_url)
assert agent._try_shrink_image_parts_in_messages(
msgs, max_dimension=2000,
) is True
assert seen["max_dimension"] == 2000
assert msgs[0]["content"][0]["image_url"]["url"] == dimensionally_shrunk
def test_dimension_shrink_failure_still_blocks_retry(self, monkeypatch):
"""A dimension-oversized image that stays oversized is unshrinkable.
If the re-encode is *still* over the per-side cap, the helper must
report no progress (return False) so the one-shot retry isn't burned
re-sending a payload the provider already rejected.
"""
agent = _make_agent()
# Both decodes report oversized: original and re-encode are 2501px.
_install_fake_pillow(monkeypatch, (2501, 100))
original_url = _big_png_data_url(100)
still_oversized = "data:image/png;base64," + "H" * 120 * 1024
monkeypatch.setattr(
"tools.vision_tools._resize_image_for_vision",
lambda *a, **kw: still_oversized,
raising=False,
)
msgs = [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": original_url}},
],
}]
assert agent._try_shrink_image_parts_in_messages(
msgs, max_dimension=2000,
) is False
# Original left untouched — caller surfaces the provider's 400.
assert msgs[0]["content"][0]["image_url"]["url"] == original_url
def test_mixed_dimension_partial_progress_returns_false(self, monkeypatch):
"""Partial dimension-path progress must not falsely burn the retry.
Two dimension-oversized images: the first re-encodes in-cap, the
second stays oversized. Even though one part changed, an oversized
image survives, so retrying would 400 again the helper must report
False. (Mirrors the byte-path
``test_mixed_one_shrinkable_one_not_returns_false`` invariant for the
pixel axis.)
"""
agent = _make_agent()
# Decode order: img1 orig (2501) -> img1 re-encode (1500, in-cap) ->
# img2 orig (2501) -> img2 re-encode (2501, still over).
_install_fake_pillow(
monkeypatch,
(2501, 100),
sizes=[(2501, 100), (1500, 60), (2501, 100), (2501, 100)],
)
first = _big_png_data_url(100)
second = _big_png_data_url(90)
calls = {"n": 0}
def _fake_resize(path, mime_type=None, max_base64_bytes=None, max_dimension=None):
calls["n"] += 1
if calls["n"] == 1:
return "data:image/png;base64," + "G" * 200 * 1024 # in-cap
return "data:image/png;base64," + "H" * 120 * 1024 # still over
monkeypatch.setattr(
"tools.vision_tools._resize_image_for_vision",
_fake_resize,
raising=False,
)
msgs = [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": first}},
{"type": "image_url", "image_url": {"url": second}},
],
}]
assert agent._try_shrink_image_parts_in_messages(
msgs, max_dimension=2000,
) is False
def test_byte_oversized_but_pixel_oversized_after_shrink_blocks_retry(self, monkeypatch):
"""Bytes-triggered shrink must ALSO honour the active per-side cap.
Adversarial-review regression (#48013, round 2): an image over BOTH the
4 MB byte budget AND the per-side pixel cap can be byte-shrunk yet stay
over the cap (``_resize_image_for_vision`` returns a best-effort blob
when it exhausts its halving budget on a very-high-aspect image). The
byte-path accept gate originally checked only ``len(resized) < len(url)``
and reported success, so the caller retried and the provider re-rejected
on dimensions re-bricking the session. The fix re-checks the pixel
cap on the byte path too; a still-over-cap result must be unshrinkable.
"""
agent = _make_agent()
# On the BYTE path, _decode_pixels is called once — on the RESIZED blob.
# Script that single decode to report still-over-cap dims (2560 > 2000).
_install_fake_pillow(monkeypatch, (2560, 64), sizes=[(2560, 64)])
# Over the 4 MB byte budget so the BYTE path is taken (triggered_by="bytes").
oversized_url = _big_png_data_url(5000) # ~5 MB raw → ~6.7 MB b64
# Byte-SMALLER re-encode, but its decoded dims are still over the cap.
byte_smaller_still_over = "data:image/png;base64," + "K" * 1000
monkeypatch.setattr(
"tools.vision_tools._resize_image_for_vision",
lambda *a, **kw: byte_smaller_still_over,
raising=False,
)
msgs = [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": oversized_url}},
],
}]
# Bytes shrank, but the per-side cap is still violated → no real
# progress; the helper must NOT report success (would burn the retry).
assert len(byte_smaller_still_over) < len(oversized_url)
assert agent._try_shrink_image_parts_in_messages(
msgs, max_dimension=2000,
) is False
# Original left in place — caller surfaces the provider's 400.
assert msgs[0]["content"][0]["image_url"]["url"] == oversized_url
def test_byte_oversized_with_no_dim_cap_accepts_byte_shrink(self, monkeypatch):
"""Bytes path with the default 8000px cap still accepts a byte shrink.
Guards the fix above against over-reach: when no tight dimension cap is
active (default 8000px) and the byte-shrunk re-encode is comfortably
within it, the byte path must keep accepting on byte-shrinkage alone.
"""
agent = _make_agent()
# Byte path → single _decode_pixels call on the resized blob; report
# in-cap dims so the byte-shrink is accepted under the default 8000 cap.
_install_fake_pillow(monkeypatch, (1250, 50), sizes=[(1250, 50)])
oversized_url = _big_png_data_url(5000)
shrunk = "data:image/jpeg;base64," + "L" * 1000
monkeypatch.setattr(
"tools.vision_tools._resize_image_for_vision",
lambda *a, **kw: shrunk,
raising=False,
)
msgs = [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": oversized_url}},
],
}]
# Default cap (8000) — no explicit max_dimension passed.
assert agent._try_shrink_image_parts_in_messages(msgs) is True
assert msgs[0]["content"][0]["image_url"]["url"] == shrunk