From 88e136448d0820186d1f56b5093c40e71b3d71f5 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Mon, 22 Jun 2026 18:23:21 -0500 Subject: [PATCH] fix(agent): shrink anthropic-native image history Retry image-size rejections by rewriting Anthropic base64 image source blocks, not just OpenAI-style image_url parts. --- agent/conversation_compression.py | 41 +++++++++++++++-- tests/run_agent/test_image_shrink_recovery.py | 46 +++++++++++++++++++ 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/agent/conversation_compression.py b/agent/conversation_compression.py index 94fff283893..ba67f036954 100644 --- a/agent/conversation_compression.py +++ b/agent/conversation_compression.py @@ -805,10 +805,11 @@ def try_shrink_image_parts_in_messages( Pillow couldn't help (caller should surface the original error). Strategy: look for ``image_url`` / ``input_image`` parts carrying a - ``data:image/...;base64,...`` payload. For each one whose encoded - size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB - ceiling with header overhead) or whose longest side exceeds - ``max_dimension``, write the base64 to a tempfile, call + ``data:image/...;base64,...`` payload, plus Anthropic-native + ``{"type": "image", "source": {"type": "base64", ...}}`` blocks. + For each one whose encoded size exceeds 4 MB (a safe target that slides + under Anthropic's 5 MB ceiling with header overhead) or whose longest side + exceeds ``max_dimension``, write the base64 to a tempfile, call ``vision_tools._resize_image_for_vision`` to produce a smaller data URL, and substitute it in place. @@ -964,6 +965,28 @@ def try_shrink_image_parts_in_messages( logger.warning("image-shrink recovery: re-encode failed — %s", exc) return None, triggered_by is not None + def _source_to_data_url(source: Any) -> Optional[str]: + if not isinstance(source, dict) or source.get("type") != "base64": + return None + data = source.get("data") + if not isinstance(data, str) or not data: + return None + media_type = str(source.get("media_type") or "image/jpeg").strip() + if not media_type.startswith("image/"): + media_type = "image/jpeg" + return f"data:{media_type};base64,{data}" + + def _write_data_url_to_source(source: dict, data_url: str) -> None: + header, _, data = data_url.partition(",") + media_type = "image/jpeg" + if header.startswith("data:"): + candidate = header[len("data:"):].split(";", 1)[0].strip() + if candidate.startswith("image/"): + media_type = candidate + source["type"] = "base64" + source["media_type"] = media_type + source["data"] = data + for msg in api_messages: if not isinstance(msg, dict): continue @@ -974,6 +997,16 @@ def try_shrink_image_parts_in_messages( if not isinstance(part, dict): continue ptype = part.get("type") + if ptype == "image": + source = part.get("source") + url = _source_to_data_url(source) + resized, unshrinkable = _shrink_data_url(url or "") + if resized and isinstance(source, dict): + _write_data_url_to_source(source, resized) + changed_count += 1 + elif unshrinkable: + unshrinkable_oversized += 1 + continue if ptype not in {"image_url", "input_image"}: continue image_value = part.get("image_url") diff --git a/tests/run_agent/test_image_shrink_recovery.py b/tests/run_agent/test_image_shrink_recovery.py index 24f8b7e242d..bdbb905d66e 100644 --- a/tests/run_agent/test_image_shrink_recovery.py +++ b/tests/run_agent/test_image_shrink_recovery.py @@ -260,6 +260,52 @@ class TestShrinkImagePartsHelper: assert seen["max_dimension"] == 2000 assert msgs[0]["content"][0]["image_url"]["url"] == shrunk + def test_anthropic_base64_image_source_rewritten(self, monkeypatch): + """Anthropic-native image blocks are shrinkable after adapter conversion.""" + agent = _make_agent() + _install_fake_pillow(monkeypatch, (2501, 100), shrunk_size=(1500, 60)) + original = _big_png_data_url(100) + _, _, original_data = original.partition(",") + shrunk = "data:image/jpeg;base64," + "N" * 1000 + seen = {} + + def _fake_resize(path, mime_type=None, max_base64_bytes=None, max_dimension=None): + seen["mime_type"] = mime_type + seen["max_dimension"] = max_dimension + return shrunk + + monkeypatch.setattr( + "tools.vision_tools._resize_image_for_vision", + _fake_resize, + raising=False, + ) + + msgs = [{ + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": original_data, + }, + }, + ], + }] + changed = agent._try_shrink_image_parts_in_messages( + msgs, + max_dimension=2000, + ) + source = msgs[0]["content"][0]["source"] + + assert changed is True + assert seen["mime_type"] == "image/png" + assert seen["max_dimension"] == 2000 + assert source["type"] == "base64" + assert source["media_type"] == "image/jpeg" + assert source["data"] == "N" * 1000 + def test_oversized_input_image_string_shape_rewritten(self, monkeypatch): """OpenAI Responses shape: {type: input_image, image_url: "data:..."}.""" agent = _make_agent()