mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-12 03:42:08 +00:00
fix(image-routing): sniff magic bytes for image MIME, ignore misleading suffix
Discord (and similar platforms) can serve a PNG image cached as discord_xxx.webp because the CDN reports content_type=image/webp for proxied stickers, custom emoji, and certain bot-uploaded images even when the actual bytes are PNG. Hermes' agent.image_routing._guess_mime trusted the file suffix and declared media_type=image/webp to Anthropic, which strict-validates and returns: HTTP 400 messages.N.content.M.image.source.base64: The image was specified using the image/webp media type, but the image appears to be a image/png image The Discord image attachment never reaches the model; the whole turn fails with no salvage path. Fix: sniff magic bytes in _file_to_data_url before declaring MIME. Suffix-based detection is kept as a fallback when bytes aren't available. New helper _sniff_mime_from_bytes covers PNG, JPEG, GIF, WEBP, BMP, and HEIC/HEIF. Tests: - Two existing tests asserted the old broken behaviour (PNG bytes in a .jpg/.webp file should report jpeg/webp); rewritten with real jpeg/webp magic bytes so they still cover suffix-aligned cases. - New regression test test_mime_sniff_overrides_misleading_extension reproduces the exact Discord scenario (PNG bytes, .webp suffix) and asserts the data URL comes back as image/png. All 28 tests in tests/agent/test_image_routing.py pass.
This commit is contained in:
parent
5ead126709
commit
5cf703245b
2 changed files with 63 additions and 4 deletions
|
|
@ -144,7 +144,51 @@ def decide_image_input_mode(
|
||||||
# it fires, which is cheaper than permanent quality loss.
|
# it fires, which is cheaper than permanent quality loss.
|
||||||
|
|
||||||
|
|
||||||
def _guess_mime(path: Path) -> str:
|
def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
|
||||||
|
"""Detect image MIME from magic bytes. Returns None if unrecognised.
|
||||||
|
|
||||||
|
Filename-based detection (``mimetypes.guess_type``) is unreliable when
|
||||||
|
upstream platforms lie about content-type. Discord, for example, can
|
||||||
|
serve a PNG with ``content_type=image/webp`` for proxied/animated
|
||||||
|
stickers, custom emoji previews, or images uploaded via certain bots.
|
||||||
|
Anthropic strictly validates that declared media_type matches the
|
||||||
|
actual bytes and returns HTTP 400 on mismatch, so we sniff to be safe.
|
||||||
|
"""
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
# PNG: 89 50 4E 47 0D 0A 1A 0A
|
||||||
|
if raw.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||||
|
return "image/png"
|
||||||
|
# JPEG: FF D8 FF
|
||||||
|
if raw.startswith(b"\xff\xd8\xff"):
|
||||||
|
return "image/jpeg"
|
||||||
|
# GIF87a / GIF89a
|
||||||
|
if raw[:6] in (b"GIF87a", b"GIF89a"):
|
||||||
|
return "image/gif"
|
||||||
|
# WEBP: "RIFF" .... "WEBP"
|
||||||
|
if len(raw) >= 12 and raw[:4] == b"RIFF" and raw[8:12] == b"WEBP":
|
||||||
|
return "image/webp"
|
||||||
|
# BMP: "BM"
|
||||||
|
if raw.startswith(b"BM"):
|
||||||
|
return "image/bmp"
|
||||||
|
# HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc.
|
||||||
|
if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in (
|
||||||
|
b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis",
|
||||||
|
):
|
||||||
|
return "image/heic"
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _guess_mime(path: Path, raw: Optional[bytes] = None) -> str:
|
||||||
|
"""Return image MIME type for *path*.
|
||||||
|
|
||||||
|
If *raw* bytes are provided, magic-byte sniffing wins (authoritative).
|
||||||
|
Otherwise we fall back to ``mimetypes`` then suffix-based defaults.
|
||||||
|
"""
|
||||||
|
if raw is not None:
|
||||||
|
sniffed = _sniff_mime_from_bytes(raw)
|
||||||
|
if sniffed:
|
||||||
|
return sniffed
|
||||||
mime, _ = mimetypes.guess_type(str(path))
|
mime, _ = mimetypes.guess_type(str(path))
|
||||||
if mime and mime.startswith("image/"):
|
if mime and mime.startswith("image/"):
|
||||||
return mime
|
return mime
|
||||||
|
|
@ -178,7 +222,7 @@ def _file_to_data_url(path: Path) -> Optional[str]:
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("image_routing: failed to read %s — %s", path, exc)
|
logger.warning("image_routing: failed to read %s — %s", path, exc)
|
||||||
return None
|
return None
|
||||||
mime = _guess_mime(path)
|
mime = _guess_mime(path, raw=raw)
|
||||||
b64 = base64.b64encode(raw).decode("ascii")
|
b64 = base64.b64encode(raw).decode("ascii")
|
||||||
return f"data:{mime};base64,{b64}"
|
return f"data:{mime};base64,{b64}"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -217,19 +217,34 @@ class TestBuildNativeContentParts:
|
||||||
assert str(img2) in text_part["text"]
|
assert str(img2) in text_part["text"]
|
||||||
|
|
||||||
def test_mime_inference_jpg(self, tmp_path: Path):
|
def test_mime_inference_jpg(self, tmp_path: Path):
|
||||||
|
# Real JPEG bytes (SOI marker FF D8 FF): sniffing now wins over suffix.
|
||||||
img = tmp_path / "photo.jpg"
|
img = tmp_path / "photo.jpg"
|
||||||
img.write_bytes(_png_bytes()) # bytes are PNG but extension is jpg
|
img.write_bytes(b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01" + b"\x00" * 32)
|
||||||
parts, _ = build_native_content_parts("x", [str(img)])
|
parts, _ = build_native_content_parts("x", [str(img)])
|
||||||
url = parts[1]["image_url"]["url"]
|
url = parts[1]["image_url"]["url"]
|
||||||
assert url.startswith("data:image/jpeg;base64,")
|
assert url.startswith("data:image/jpeg;base64,")
|
||||||
|
|
||||||
def test_mime_inference_webp(self, tmp_path: Path):
|
def test_mime_inference_webp(self, tmp_path: Path):
|
||||||
|
# Real WEBP bytes (RIFF....WEBP): sniffing now wins over suffix.
|
||||||
img = tmp_path / "pic.webp"
|
img = tmp_path / "pic.webp"
|
||||||
img.write_bytes(_png_bytes())
|
img.write_bytes(b"RIFF\x24\x00\x00\x00WEBPVP8 " + b"\x00" * 32)
|
||||||
parts, _ = build_native_content_parts("", [str(img)])
|
parts, _ = build_native_content_parts("", [str(img)])
|
||||||
url = parts[1]["image_url"]["url"]
|
url = parts[1]["image_url"]["url"]
|
||||||
assert url.startswith("data:image/webp;base64,")
|
assert url.startswith("data:image/webp;base64,")
|
||||||
|
|
||||||
|
def test_mime_sniff_overrides_misleading_extension(self, tmp_path: Path):
|
||||||
|
"""Discord-style bug: file is named .webp but contains PNG bytes.
|
||||||
|
Anthropic rejects on MIME mismatch (HTTP 400) so we MUST sniff.
|
||||||
|
Regression guard for the user-reported Discord PNG-as-WEBP failure.
|
||||||
|
"""
|
||||||
|
img = tmp_path / "discord_cached.webp"
|
||||||
|
img.write_bytes(_png_bytes()) # bytes are PNG, suffix lies
|
||||||
|
parts, _ = build_native_content_parts("", [str(img)])
|
||||||
|
url = parts[1]["image_url"]["url"]
|
||||||
|
assert url.startswith("data:image/png;base64,"), (
|
||||||
|
f"Expected MIME sniffing to detect PNG bytes regardless of .webp suffix, got: {url[:60]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ─── Oversize handling ───────────────────────────────────────────────────────
|
# ─── Oversize handling ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue