mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
feat(acp): pass image file attachments through as image_url parts
Extends PR #21400's resource inlining with image-specific handling: ACP resource_link and embedded blob resources with an image/* mime (or image file suffix when mime is missing) now emit an OpenAI image_url part with a base64 data URL, so vision models actually see the image instead of a [Binary file omitted] note. Non-image resources keep the existing text-inlining behavior. Adds 3 tests: local PNG via resource_link, JPEG mime inferred from suffix when client omits mimeType, and embedded blob PNG.
This commit is contained in:
parent
733e297b8a
commit
7e2af0c2e8
2 changed files with 205 additions and 35 deletions
|
|
@ -1,5 +1,8 @@
|
|||
import base64
|
||||
|
||||
import pytest
|
||||
from acp.schema import (
|
||||
BlobResourceContents,
|
||||
EmbeddedResourceContentBlock,
|
||||
ImageContentBlock,
|
||||
ResourceContentBlock,
|
||||
|
|
@ -82,3 +85,75 @@ async def test_initialize_advertises_image_prompt_capability():
|
|||
assert response.agent_capabilities is not None
|
||||
assert response.agent_capabilities.prompt_capabilities is not None
|
||||
assert response.agent_capabilities.prompt_capabilities.image is True
|
||||
|
||||
|
||||
# 1x1 transparent PNG — smallest valid image payload for inlining tests.
|
||||
_ONE_PX_PNG = bytes.fromhex(
|
||||
"89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
|
||||
"890000000a49444154789c6300010000000500010d0a2db40000000049454e44ae426082"
|
||||
)
|
||||
|
||||
|
||||
def test_acp_resource_link_image_file_is_inlined_as_image_url(tmp_path):
|
||||
attached = tmp_path / "shot.png"
|
||||
attached.write_bytes(_ONE_PX_PNG)
|
||||
|
||||
content = _content_blocks_to_openai_user_content([
|
||||
TextContentBlock(type="text", text="Look at this screenshot"),
|
||||
ResourceContentBlock(
|
||||
type="resource_link",
|
||||
name="shot.png",
|
||||
uri=attached.as_uri(),
|
||||
mimeType="image/png",
|
||||
),
|
||||
])
|
||||
|
||||
assert isinstance(content, list)
|
||||
# [user text, image header, image_url]
|
||||
assert content[0] == {"type": "text", "text": "Look at this screenshot"}
|
||||
assert content[1]["type"] == "text"
|
||||
assert "[Attached image: shot.png]" in content[1]["text"]
|
||||
assert content[2]["type"] == "image_url"
|
||||
expected_url = "data:image/png;base64," + base64.b64encode(_ONE_PX_PNG).decode("ascii")
|
||||
assert content[2]["image_url"]["url"] == expected_url
|
||||
|
||||
|
||||
def test_acp_resource_link_image_mime_inferred_from_suffix(tmp_path):
|
||||
"""No mimeType sent — should still be recognised as image by file suffix."""
|
||||
attached = tmp_path / "pic.jpg"
|
||||
attached.write_bytes(_ONE_PX_PNG) # content doesn't matter for the code path
|
||||
|
||||
content = _content_blocks_to_openai_user_content([
|
||||
ResourceContentBlock(
|
||||
type="resource_link",
|
||||
name="pic.jpg",
|
||||
uri=attached.as_uri(),
|
||||
),
|
||||
])
|
||||
|
||||
assert isinstance(content, list)
|
||||
image_parts = [p for p in content if p.get("type") == "image_url"]
|
||||
assert len(image_parts) == 1
|
||||
assert image_parts[0]["image_url"]["url"].startswith("data:image/jpeg;base64,")
|
||||
|
||||
|
||||
def test_acp_embedded_blob_image_is_inlined_as_image_url():
|
||||
b64 = base64.b64encode(_ONE_PX_PNG).decode("ascii")
|
||||
content = _content_blocks_to_openai_user_content([
|
||||
EmbeddedResourceContentBlock(
|
||||
type="resource",
|
||||
resource=BlobResourceContents(
|
||||
uri="file:///tmp/embed.png",
|
||||
mimeType="image/png",
|
||||
blob=b64,
|
||||
),
|
||||
),
|
||||
])
|
||||
|
||||
assert isinstance(content, list)
|
||||
assert content[0]["type"] == "text"
|
||||
assert "[Attached image: embed.png]" in content[0]["text"]
|
||||
assert content[1] == {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{b64}"},
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue