fix(agent): accept pixel-correct image downscale when bytes grow (#48013)

The image-too-large reactive shrink (try_shrink_image_parts_in_messages) conflated two independent constraints: it always rejected a resize whose re-encoded bytes were >= the original, even when the shrink was driven by a PIXEL-DIMENSION cap (Anthropic many-image 2000px) rather than the byte budget. Downscaled screenshot PNGs routinely re-encode LARGER in bytes, so the dimension-correct result was discarded and the image left oversized -> the provider re-rejected on retry and the session wedged forever. Fix: track which constraint triggered the shrink (bytes vs dimension) and gate the accept on the SAME axis. * dimension path: accept the result as long as it is now within max_dimension, regardless of byte size (verify via Pillow; fall back to the byte gate only when the re-encode can't be decoded). * bytes path: still require bytes to shrink, but ALSO re-check the per-side cap when it's active — _resize_image_for_vision returns a best-effort, possibly over-cap blob when it exhausts its halving budget on a very-high-aspect image, so a byte-shrink alone can leave it over the dimension cap and re-brick on retry. Extend the unshrinkable-oversized guard to the pixel axis so a partial shrink doesn't burn the one-shot retry. Single shared agent path -> fixes CLI, TUI, and gateway alike. Adds a real-Pillow runnable proof (repro_48013_image_shrink_brick.py) that reproduces the issue's per-image table (bricks 3/5 before, passes 5/5 after) plus unit invariants for the dimension and bytes accept/reject paths, partial-progress accounting, and the bytes-path still-over-cap regression surfaced by adversarial review. Closes #48013
2026-06-21 10:22:18 +00:00 · 2026-06-19 20:11:12 +05:30 · 2026-06-19 20:11:12 +05:30 · 990273d90a
commit 990273d90a
parent ac00e73688
3 changed files with 496 additions and 39 deletions
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@ -712,33 +712,58 @@ def try_shrink_image_parts_in_messages(
    # actually brought under the target.
    unshrinkable_oversized = 0

-    def _shrink_data_url(url: str) -> Optional[str]:
-        """Return a smaller data URL, or None if shrink can't help."""
-        if not isinstance(url, str) or not url.startswith("data:"):
+    def _decode_pixels(data_url: str) -> Optional[tuple]:
+        """Return ``(width, height)`` of a base64 data URL, or None on failure.
+
+        Soft-depends on Pillow; returns None (caller falls back to a
+        bytes-only check) if Pillow is missing or the payload is corrupt.
+        """
+        try:
+            import base64 as _b64_dim
+            import io as _io_dim
+            header_d, _, data_d = data_url.partition(",")
+            if not data_d or not data_url.startswith("data:"):
+                return None
+            from PIL import Image as _PILImage
+            with _PILImage.open(_io_dim.BytesIO(_b64_dim.b64decode(data_d))) as _img:
+                return _img.size
+        except Exception:
            return None

-        # Check both byte size AND pixel dimensions.
+    def _shrink_data_url(url: str) -> tuple:
+        """Return ``(resized_url, unshrinkable)`` for a data URL.
+
+        ``resized_url`` is a smaller/dimension-correct data URL, or None when
+        no rewrite was applied.  ``unshrinkable`` is True only when the image
+        exceeded a constraint (byte-size or dimensions) and the resize failed
+        to satisfy *that same* constraint — so the caller knows retrying is
+        pointless even if a different image in the request shrank.
+        """
+        if not isinstance(url, str) or not url.startswith("data:"):
+            return None, False
+
+        # Determine which constraint is binding.  The accept/reject gate below
+        # MUST be checked against the same axis that triggered the shrink: a
+        # downscaled screenshot PNG routinely re-encodes to *more* bytes than
+        # the original (PNG compression is non-monotonic in image size — a
+        # smaller raster with LANCZOS resampling noise compresses worse than a
+        # larger smooth one).  Rejecting a pixel-correct downscale purely
+        # because its bytes grew permanently wedges sessions on the Anthropic
+        # many-image 2000px path (#48013).
        needs_shrink = len(url) > target_bytes  # over byte budget
+        triggered_by = "bytes" if needs_shrink else None
        if not needs_shrink:
-            # Even if bytes are fine, check pixel dimensions against the
-            # provider's reported per-side cap.  A screenshot can be tiny in
-            # bytes yet too large in pixels.
-            try:
-                import base64 as _b64_dim
-                header_d, _, data_d = url.partition(",")
-                if not data_d:
-                    return None
-                raw_d = _b64_dim.b64decode(data_d)
-                from PIL import Image as _PILImage
-                import io as _io_dim
-                with _PILImage.open(_io_dim.BytesIO(raw_d)) as _img:
-                    if max(_img.size) <= max_dimension:
-                        return None  # both bytes and pixels are fine
-                needs_shrink = True  # pixels exceed limit, force shrink
-            except Exception:
-                # If we can't check dimensions (Pillow unavailable, corrupt
-                # image, etc.), fall back to byte-only check.
-                return None
+            # Bytes are fine — check pixel dimensions against the provider's
+            # reported per-side cap.  A screenshot can be tiny in bytes yet
+            # too large in pixels.
+            dims = _decode_pixels(url)
+            if dims is None:
+                # Pillow missing or corrupt data — fall back to byte-only.
+                return None, False
+            if max(dims) <= max_dimension:
+                return None, False  # both bytes and pixels are within limits
+            needs_shrink = True
+            triggered_by = "dimension"

        try:
            header, _, data = url.partition(",")
@ -770,13 +795,45 @@ def try_shrink_image_parts_in_messages(
                    Path(tmp.name).unlink(missing_ok=True)
                except Exception:
                    pass
-            if not resized or len(resized) >= len(url):
-                # Shrink didn't help (or made it bigger — corrupt input?).
-                return None
-            return resized
+            if not resized:
+                # Resize returned nothing — Pillow couldn't help.
+                return None, True
+            if triggered_by == "bytes":
+                # Byte budget is the binding constraint — bytes must shrink.
+                if len(resized) >= len(url):
+                    return None, True  # re-encode made it bigger
+                # The per-side dimension cap is ALSO an active provider
+                # constraint on this request (the caller passes the parsed cap
+                # to both this helper and the resizer).  _resize_image_for_vision
+                # returns a best-effort, possibly-over-cap blob when it
+                # exhausts its halving budget — it freezes the long side once
+                # the short side hits its 64px floor, so a very-high-aspect
+                # image can stay over the cap even after bytes shrank.  If the
+                # output is still over the cap, retrying would re-400 on
+                # dimensions; treat it as unshrinkable.  (Skip when dims can't
+                # be decoded — preserves historical byte-only behaviour.)
+                new_dims = _decode_pixels(resized)
+                if new_dims is not None and max(new_dims) > max_dimension:
+                    return None, True
+                return resized, False
+            # triggered_by == "dimension": the per-side cap is binding.  The
+            # re-encode may have grown in bytes; accept it as long as it is now
+            # within the dimension cap.  Verify the new dimensions when we can.
+            new_dims = _decode_pixels(resized)
+            if new_dims is not None:
+                if max(new_dims) <= max_dimension:
+                    return resized, False
+                # Still over the per-side cap — the resize didn't satisfy it.
+                return None, True
+            # Couldn't verify the re-encode's dimensions (corrupt output or
+            # Pillow gone mid-call).  Fall back to the historical "bytes must
+            # shrink" gate so we never accept an unverifiable, byte-larger blob.
+            if len(resized) >= len(url):
+                return None, True
+            return resized, False
        except Exception as exc:
            logger.warning("image-shrink recovery: re-encode failed — %s", exc)
-            return None
+            return None, triggered_by is not None

    for msg in api_messages:
        if not isinstance(msg, dict):
@ -795,20 +852,18 @@ def try_shrink_image_parts_in_messages(
            # OpenAI Responses: {"image_url": "data:..."}
            if isinstance(image_value, dict):
                url = image_value.get("url", "")
-                resized = _shrink_data_url(url)
+                resized, unshrinkable = _shrink_data_url(url)
                if resized:
                    image_value["url"] = resized
                    changed_count += 1
-                elif isinstance(url, str) and url.startswith("data:") \
-                        and len(url) > target_bytes:
+                elif unshrinkable:
                    unshrinkable_oversized += 1
            elif isinstance(image_value, str):
-                resized = _shrink_data_url(image_value)
+                resized, unshrinkable = _shrink_data_url(image_value)
                if resized:
                    part["image_url"] = resized
                    changed_count += 1
-                elif image_value.startswith("data:") \
-                        and len(image_value) > target_bytes:
+                elif unshrinkable:
                    unshrinkable_oversized += 1

    if changed_count:
--- a/tests/run_agent/repro_48013_image_shrink_brick.py
+++ b/tests/run_agent/repro_48013_image_shrink_brick.py
@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+"""Runnable proof for issue #48013 — image-dimension 400 session brick.
+
+Before the fix, ``agent.conversation_compression.try_shrink_image_parts_in_messages``
+silently discarded a *pixel-correct* downscale whenever the re-encoded PNG was
+larger in bytes than the original (the common case for downscaled Retina
+screenshots). The image was left at its original oversized dimensions, the
+provider re-rejected it on retry, and the session wedged forever on the
+Anthropic many-image 2000px path.
+
+This script reproduces the exact scenario with REAL Pillow (no mocks): it
+synthesizes screenshot-like PNGs at the dimensions from the issue's table —
+images that are small in bytes (under the 4 MB budget) but over the 2000px
+per-side cap — and runs the real recovery helper. It asserts every image is
+brought under the cap and that the helper reports success.
+
+Run directly to see a human-readable report:
+
+    python tests/run_agent/repro_48013_image_shrink_brick.py
+
+Or as a pytest smoke test (skipped automatically when Pillow is absent):
+
+    scripts/run_tests.sh tests/run_agent/repro_48013_image_shrink_brick.py
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import sys
+from pathlib import Path
+
+import pytest
+
+# Make the repo root importable when run as a plain script.
+_REPO_ROOT = Path(__file__).resolve().parents[2]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+PIL = pytest.importorskip("PIL", reason="Pillow required for the real-resize proof")
+from PIL import Image, ImageDraw  # noqa: E402
+
+from agent.conversation_compression import (  # noqa: E402
+    try_shrink_image_parts_in_messages,
+)
+
+# The many-image per-side cap Anthropic reported in the wild (issue #48013).
+MANY_IMAGE_CAP = 2000
+BYTE_BUDGET = 4 * 1024 * 1024
+
+# Dimensions straight from the issue's per-image table. The "REJECTED" rows
+# are the ones that bricked: tall/large screenshots whose downscale re-encodes
+# to MORE PNG bytes than the original.
+CASES = [
+    (2344, 778),   # wide — shrank even before the fix
+    (2374, 1144),  # wide — shrank even before the fix
+    (2097, 1476),  # REJECTED before fix
+    (2247, 1544),  # REJECTED before fix
+    (2263, 1644),  # REJECTED before fix
+]
+
+
+def _make_screenshot_png(width: int, height: int) -> bytes:
+    """A screenshot-like PNG: mostly flat UI regions so it compresses small.
+
+    Flat regions keep the byte size well under the 4 MB budget, forcing the
+    DIMENSION path (not the byte path) — exactly the code that bricked. The
+    downscale of such an image re-encodes to a comparable-or-larger PNG, which
+    is what the old byte gate wrongly rejected.
+    """
+    img = Image.new("RGB", (width, height), (245, 245, 247))
+    draw = ImageDraw.Draw(img)
+    for y in range(0, height, 40):
+        shade = 255 - (y // 40) % 6 * 4
+        draw.rectangle([20, y + 5, width - 20, y + 30], fill=(shade, 250, 250))
+    for x in range(0, width, 160):
+        draw.rectangle([x, 0, x + 2, height], fill=(220, 220, 225))
+    draw.text((40, 40), "Some UI text " * 30, fill=(20, 20, 20))
+    buf = io.BytesIO()
+    img.save(buf, format="PNG", optimize=False)
+    return buf.getvalue()
+
+
+def _data_url(raw: bytes) -> str:
+    return "data:image/png;base64," + base64.b64encode(raw).decode("ascii")
+
+
+def _decode_dims(data_url: str) -> tuple[int, int]:
+    payload = data_url.partition(",")[2]
+    with Image.open(io.BytesIO(base64.b64decode(payload))) as img:
+        return img.size
+
+
+def run_proof(verbose: bool = False) -> list[dict]:
+    """Run the recovery against every case; return per-case results."""
+    results: list[dict] = []
+    for width, height in CASES:
+        raw = _make_screenshot_png(width, height)
+        url = _data_url(raw)
+        # Sanity: this case must be UNDER the byte budget and OVER the pixel cap,
+        # i.e. it exercises the dimension path that bricked.
+        under_byte_budget = len(url) <= BYTE_BUDGET
+        over_pixel_cap = max(width, height) > MANY_IMAGE_CAP
+
+        msgs = [{
+            "role": "user",
+            "content": [{"type": "image_url", "image_url": {"url": url}}],
+        }]
+        changed = try_shrink_image_parts_in_messages(
+            msgs, max_dimension=MANY_IMAGE_CAP,
+        )
+        out_url = msgs[0]["content"][0]["image_url"]["url"]
+        out_dims = _decode_dims(out_url)
+
+        result = {
+            "orig": (width, height),
+            "orig_bytes": len(raw),
+            "under_byte_budget": under_byte_budget,
+            "over_pixel_cap": over_pixel_cap,
+            "changed": changed,
+            "result_dims": out_dims,
+            "under_cap_after": max(out_dims) <= MANY_IMAGE_CAP,
+        }
+        results.append(result)
+        if verbose:
+            status = "OK" if result["under_cap_after"] else "BRICK"
+            print(
+                f"  {width}x{height} ({len(raw)//1024:>3} KB)"
+                f" -> changed={changed!s:>5}"
+                f"  result={out_dims[0]}x{out_dims[1]}"
+                f"  [{status}]"
+            )
+    return results
+
+
+def test_issue_48013_dimension_shrink_does_not_brick():
+    """Every dimension-oversized screenshot must be brought under the cap."""
+    results = run_proof()
+    assert results, "no cases ran"
+    for r in results:
+        # Precondition: we really are on the dimension path.
+        assert r["under_byte_budget"], (
+            f"{r['orig']} must be under the byte budget to exercise the bug"
+        )
+        assert r["over_pixel_cap"], f"{r['orig']} must exceed the pixel cap"
+        # The fix: image lands under the cap and the helper reports success.
+        assert r["under_cap_after"], (
+            f"BRICK: {r['orig']} left at {r['result_dims']} "
+            f"(> {MANY_IMAGE_CAP}px) — the shrink recovery discarded a "
+            f"pixel-correct downscale (#48013)"
+        )
+        assert r["changed"] is True, (
+            f"{r['orig']} shrank but helper reported no progress — caller "
+            f"would surface the original error and burn the one-shot retry"
+        )
+
+
+def main() -> int:
+    print("Issue #48013 proof — image-dimension shrink must not brick sessions")
+    print(f"(many-image per-side cap = {MANY_IMAGE_CAP}px, byte budget = "
+          f"{BYTE_BUDGET // (1024 * 1024)} MB)\n")
+    results = run_proof(verbose=True)
+    bricked = [r for r in results if not r["under_cap_after"]]
+    no_progress = [r for r in results if r["under_cap_after"] and not r["changed"]]
+    print()
+    if bricked:
+        print(f"FAIL: {len(bricked)} image(s) still over the pixel cap (BRICK).")
+        return 1
+    if no_progress:
+        print(f"FAIL: {len(no_progress)} image(s) shrank but helper reported "
+              f"no progress (would burn the retry).")
+        return 1
+    print(f"PASS: all {len(results)} dimension-oversized screenshots brought "
+          f"under {MANY_IMAGE_CAP}px and reported as progress.")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/tests/run_agent/test_image_shrink_recovery.py
+++ b/tests/run_agent/test_image_shrink_recovery.py
@ -108,11 +108,36 @@ def _big_png_data_url(size_kb: int) -> str:
    return "data:image/png;base64," + base64.b64encode(raw).decode("ascii")


-def _install_fake_pillow(monkeypatch, size: tuple[int, int]) -> None:
-    """Install the tiny subset of Pillow used by the shrink preflight."""
+def _install_fake_pillow(
+    monkeypatch,
+    size: tuple[int, int],
+    *,
+    shrunk_size: tuple[int, int] | None = None,
+    sizes: list[tuple[int, int]] | None = None,
+) -> None:
+    """Install the tiny subset of Pillow used by the shrink preflight.
+
+    The shrink helper decodes pixel dimensions twice for the dimension path:
+    once on the *original* data URL (to decide it's oversized) and once on the
+    *re-encoded* result (to confirm the downscale landed under the cap).  To
+    model that honestly, ``_FakeImage`` can return a sequence of sizes across
+    successive ``open()`` calls:
+
+    * ``sizes=[...]``        — explicit per-call size list (clamped to last).
+    * ``shrunk_size=(w, h)`` — shorthand for ``[size, shrunk_size]``: first
+      decode is the oversized original, second is the in-cap re-encode.
+    * neither                — every decode returns ``size`` (legacy behaviour).
+    """
+    call_count = {"n": 0}
+    target_sizes = sizes or [
+        size,
+        shrunk_size if shrunk_size is not None else size,
+    ]
+
    class _FakeImage:
        def __init__(self):
-            self.size = size
+            self.size = target_sizes[min(call_count["n"], len(target_sizes) - 1)]
+            call_count["n"] += 1

        def __enter__(self):
            return self
@ -203,9 +228,10 @@ class TestShrinkImagePartsHelper:
        assert msgs[0]["content"][1]["image_url"]["url"] == shrunk

    def test_many_image_dimension_limit_rewritten(self, monkeypatch):
-        """A 2000px many-image rejection must shrink images below 8000px."""
+        """A 2000px many-image rejection must shrink images below the cap."""
        agent = _make_agent()
-        _install_fake_pillow(monkeypatch, (2501, 100))
+        # Original decodes oversized (2501px); the re-encode decodes in-cap.
+        _install_fake_pillow(monkeypatch, (2501, 100), shrunk_size=(1500, 60))
        oversized_for_many = _big_png_data_url(100)
        shrunk = "data:image/jpeg;base64," + "M" * 1000
        seen = {}
@ -392,3 +418,200 @@ class TestShrinkImagePartsHelper:
        assert msgs[0]["content"][0]["image_url"]["url"] == small
        # The unshrinkable one is left as-is (caller surfaces original error).
        assert msgs[0]["content"][1]["image_url"]["url"] == unshrinkable
+
+    # ------------------------------------------------------------------
+    # #48013: the dimension path must accept a pixel-correct downscale even
+    # when the re-encoded PNG grew in bytes.  Before the fix, the byte gate
+    # (`len(resized) >= len(url)`) discarded the dimension-correct result and
+    # left the image oversized, bricking the session on the Anthropic
+    # many-image 2000px path.
+    # ------------------------------------------------------------------
+
+    def test_dimension_shrink_with_byte_growth_accepted(self, monkeypatch):
+        """A dimension-driven shrink is accepted even if its bytes grow.
+
+        Regression for #48013.  The original (2501px, under the 4 MB byte
+        budget) is oversized on pixels only.  The re-encode lands at 1500px
+        (in-cap) but is *larger in bytes* — the historical byte gate would
+        reject it.  The fix keys the accept gate on the binding constraint
+        (dimensions), so the pixel-correct result is kept.
+        """
+        agent = _make_agent()
+        _install_fake_pillow(monkeypatch, (2501, 100), shrunk_size=(1500, 60))
+        original_url = _big_png_data_url(100)  # ~100 KB → well under 4 MB
+        # A *byte-larger* re-encode (the brick trigger): 200 KB payload.
+        dimensionally_shrunk = "data:image/png;base64," + "G" * 200 * 1024
+        seen = {}
+
+        def _fake_resize(path, mime_type=None, max_base64_bytes=None, max_dimension=None):
+            seen["max_dimension"] = max_dimension
+            return dimensionally_shrunk
+
+        monkeypatch.setattr(
+            "tools.vision_tools._resize_image_for_vision",
+            _fake_resize,
+            raising=False,
+        )
+
+        msgs = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": original_url}},
+            ],
+        }]
+        # The re-encode is byte-LARGER than the original — proves the byte gate
+        # is no longer the rejection driver on the dimension path.
+        assert len(dimensionally_shrunk) > len(original_url)
+        assert agent._try_shrink_image_parts_in_messages(
+            msgs, max_dimension=2000,
+        ) is True
+        assert seen["max_dimension"] == 2000
+        assert msgs[0]["content"][0]["image_url"]["url"] == dimensionally_shrunk
+
+    def test_dimension_shrink_failure_still_blocks_retry(self, monkeypatch):
+        """A dimension-oversized image that stays oversized is unshrinkable.
+
+        If the re-encode is *still* over the per-side cap, the helper must
+        report no progress (return False) so the one-shot retry isn't burned
+        re-sending a payload the provider already rejected.
+        """
+        agent = _make_agent()
+        # Both decodes report oversized: original and re-encode are 2501px.
+        _install_fake_pillow(monkeypatch, (2501, 100))
+        original_url = _big_png_data_url(100)
+        still_oversized = "data:image/png;base64," + "H" * 120 * 1024
+
+        monkeypatch.setattr(
+            "tools.vision_tools._resize_image_for_vision",
+            lambda *a, **kw: still_oversized,
+            raising=False,
+        )
+
+        msgs = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": original_url}},
+            ],
+        }]
+        assert agent._try_shrink_image_parts_in_messages(
+            msgs, max_dimension=2000,
+        ) is False
+        # Original left untouched — caller surfaces the provider's 400.
+        assert msgs[0]["content"][0]["image_url"]["url"] == original_url
+
+    def test_mixed_dimension_partial_progress_returns_false(self, monkeypatch):
+        """Partial dimension-path progress must not falsely burn the retry.
+
+        Two dimension-oversized images: the first re-encodes in-cap, the
+        second stays oversized.  Even though one part changed, an oversized
+        image survives, so retrying would 400 again — the helper must report
+        False.  (Mirrors the byte-path
+        ``test_mixed_one_shrinkable_one_not_returns_false`` invariant for the
+        pixel axis.)
+        """
+        agent = _make_agent()
+        # Decode order: img1 orig (2501) -> img1 re-encode (1500, in-cap) ->
+        #               img2 orig (2501) -> img2 re-encode (2501, still over).
+        _install_fake_pillow(
+            monkeypatch,
+            (2501, 100),
+            sizes=[(2501, 100), (1500, 60), (2501, 100), (2501, 100)],
+        )
+        first = _big_png_data_url(100)
+        second = _big_png_data_url(90)
+        calls = {"n": 0}
+
+        def _fake_resize(path, mime_type=None, max_base64_bytes=None, max_dimension=None):
+            calls["n"] += 1
+            if calls["n"] == 1:
+                return "data:image/png;base64," + "G" * 200 * 1024  # in-cap
+            return "data:image/png;base64," + "H" * 120 * 1024      # still over
+
+        monkeypatch.setattr(
+            "tools.vision_tools._resize_image_for_vision",
+            _fake_resize,
+            raising=False,
+        )
+
+        msgs = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": first}},
+                {"type": "image_url", "image_url": {"url": second}},
+            ],
+        }]
+        assert agent._try_shrink_image_parts_in_messages(
+            msgs, max_dimension=2000,
+        ) is False
+
+    def test_byte_oversized_but_pixel_oversized_after_shrink_blocks_retry(self, monkeypatch):
+        """Bytes-triggered shrink must ALSO honour the active per-side cap.
+
+        Adversarial-review regression (#48013, round 2): an image over BOTH the
+        4 MB byte budget AND the per-side pixel cap can be byte-shrunk yet stay
+        over the cap (``_resize_image_for_vision`` returns a best-effort blob
+        when it exhausts its halving budget on a very-high-aspect image).  The
+        byte-path accept gate originally checked only ``len(resized) < len(url)``
+        and reported success, so the caller retried and the provider re-rejected
+        on dimensions — re-bricking the session.  The fix re-checks the pixel
+        cap on the byte path too; a still-over-cap result must be unshrinkable.
+        """
+        agent = _make_agent()
+        # On the BYTE path, _decode_pixels is called once — on the RESIZED blob.
+        # Script that single decode to report still-over-cap dims (2560 > 2000).
+        _install_fake_pillow(monkeypatch, (2560, 64), sizes=[(2560, 64)])
+        # Over the 4 MB byte budget so the BYTE path is taken (triggered_by="bytes").
+        oversized_url = _big_png_data_url(5000)  # ~5 MB raw → ~6.7 MB b64
+        # Byte-SMALLER re-encode, but its decoded dims are still over the cap.
+        byte_smaller_still_over = "data:image/png;base64," + "K" * 1000
+
+        monkeypatch.setattr(
+            "tools.vision_tools._resize_image_for_vision",
+            lambda *a, **kw: byte_smaller_still_over,
+            raising=False,
+        )
+
+        msgs = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": oversized_url}},
+            ],
+        }]
+        # Bytes shrank, but the per-side cap is still violated → no real
+        # progress; the helper must NOT report success (would burn the retry).
+        assert len(byte_smaller_still_over) < len(oversized_url)
+        assert agent._try_shrink_image_parts_in_messages(
+            msgs, max_dimension=2000,
+        ) is False
+        # Original left in place — caller surfaces the provider's 400.
+        assert msgs[0]["content"][0]["image_url"]["url"] == oversized_url
+
+    def test_byte_oversized_with_no_dim_cap_accepts_byte_shrink(self, monkeypatch):
+        """Bytes path with the default 8000px cap still accepts a byte shrink.
+
+        Guards the fix above against over-reach: when no tight dimension cap is
+        active (default 8000px) and the byte-shrunk re-encode is comfortably
+        within it, the byte path must keep accepting on byte-shrinkage alone.
+        """
+        agent = _make_agent()
+        # Byte path → single _decode_pixels call on the resized blob; report
+        # in-cap dims so the byte-shrink is accepted under the default 8000 cap.
+        _install_fake_pillow(monkeypatch, (1250, 50), sizes=[(1250, 50)])
+        oversized_url = _big_png_data_url(5000)
+        shrunk = "data:image/jpeg;base64," + "L" * 1000
+
+        monkeypatch.setattr(
+            "tools.vision_tools._resize_image_for_vision",
+            lambda *a, **kw: shrunk,
+            raising=False,
+        )
+
+        msgs = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": oversized_url}},
+            ],
+        }]
+        # Default cap (8000) — no explicit max_dimension passed.
+        assert agent._try_shrink_image_parts_in_messages(msgs) is True
+        assert msgs[0]["content"][0]["image_url"]["url"] == shrunk