mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(gateway): auto-deliver image_generate output as native media (#42616)
image_generate returns its artifact as JSON ({"image": "/abs/path.png"})
with no MEDIA: tag, so the gateway auto-append path (which only recognized
text_to_speech MEDIA: tags) never delivered it — image delivery silently
depended on the model restating the path in its reply. Add image_generate to
the producer allowlist and extract the local path from its JSON result
(host_image > image > agent_visible_image), reusing the existing
extension-anchored matcher and history-dedupe so remote URLs, unknown
extensions, failures, and already-sent paths are rejected.
Closes the remaining unfixed path from #19105.
This commit is contained in:
parent
18ead88273
commit
9351cbafab
2 changed files with 131 additions and 3 deletions
|
|
@ -688,7 +688,18 @@ def _last_transcript_timestamp(history: Optional[List[Dict[str, Any]]]) -> Any:
|
|||
# ordinary outputs. Only tools that intentionally create deliverable media
|
||||
# artifacts should be eligible for automatic append when the model omits them
|
||||
# from the final gateway reply.
|
||||
_AUTO_APPEND_MEDIA_TOOL_NAMES = {"text_to_speech", "text_to_speech_tool"}
|
||||
_AUTO_APPEND_MEDIA_TOOL_NAMES = {
|
||||
"text_to_speech",
|
||||
"text_to_speech_tool",
|
||||
"image_generate",
|
||||
}
|
||||
|
||||
# Tools in this set return their deliverable artifact as a JSON payload with a
|
||||
# local-file path field rather than a literal ``MEDIA:`` tag (e.g. image_generate
|
||||
# returns ``{"success": true, "image": "/abs/path.png"}``). The auto-append path
|
||||
# extracts the path from these fields so delivery is deterministic and does not
|
||||
# depend on the model restating the path in its final reply.
|
||||
_JSON_MEDIA_TOOL_PATH_FIELDS = ("host_image", "image", "agent_visible_image")
|
||||
|
||||
|
||||
# Extension-anchored MEDIA: matcher for tool results. Mirrors the dispatch-site
|
||||
|
|
@ -755,10 +766,28 @@ def _collect_auto_append_media_tags(
|
|||
if tool_name_by_call_id.get(call_id) not in _AUTO_APPEND_MEDIA_TOOL_NAMES:
|
||||
continue
|
||||
content = str(msg.get("content") or "")
|
||||
tool_name = tool_name_by_call_id.get(call_id)
|
||||
# JSON-payload tools (image_generate) return a local-file path in a
|
||||
# known field rather than a MEDIA: tag. Extract it so delivery is
|
||||
# deterministic even when the model omits the path from its reply.
|
||||
if tool_name == "image_generate" and "MEDIA:" not in content:
|
||||
try:
|
||||
payload = json.loads(content)
|
||||
except Exception:
|
||||
payload = None
|
||||
if isinstance(payload, dict) and payload.get("success"):
|
||||
for field in _JSON_MEDIA_TOOL_PATH_FIELDS:
|
||||
path = payload.get(field)
|
||||
if (isinstance(path, str)
|
||||
and _TOOL_MEDIA_RE.fullmatch(f"MEDIA:{path}")
|
||||
and path not in history_media_paths):
|
||||
media_tags.append(f"MEDIA:{path}")
|
||||
break
|
||||
continue
|
||||
if "MEDIA:" not in content:
|
||||
continue
|
||||
for match in _TOOL_MEDIA_RE.finditer(content):
|
||||
path = match.group(1).strip().rstrip('\",}')
|
||||
path = match.group(1).strip().rstrip('",}')
|
||||
if path and path not in history_media_paths:
|
||||
media_tags.append(f"MEDIA:{path}")
|
||||
if "[[audio_as_voice]]" in content:
|
||||
|
|
|
|||
|
|
@ -159,7 +159,106 @@ caption
|
|||
tags, voice = _collect_auto_append_media_tags(messages, history_offset=0)
|
||||
assert tags == ["MEDIA:/tmp/voice.ogg"]
|
||||
assert voice is True
|
||||
|
||||
|
||||
def test_gateway_auto_append_image_generate_json_path(self):
|
||||
"""image_generate returns a local path in JSON (no MEDIA: tag); it is
|
||||
auto-appended so delivery doesn't depend on the model restating it."""
|
||||
from gateway.run import _collect_auto_append_media_tags
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "Make me a cat"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "call_img", "function": {"name": "image_generate"}}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_img",
|
||||
"content": '{"success": true, "image": "/tmp/gen/cat.png", "agent_visible_image": "/tmp/gen/cat.png"}',
|
||||
},
|
||||
{"role": "assistant", "content": "Here's your cat."},
|
||||
]
|
||||
|
||||
tags, voice = _collect_auto_append_media_tags(messages, history_offset=0)
|
||||
assert tags == ["MEDIA:/tmp/gen/cat.png"]
|
||||
assert voice is False
|
||||
|
||||
def test_gateway_auto_append_image_generate_prefers_host_path(self):
|
||||
"""When host and sandbox paths differ, the host-deliverable path wins."""
|
||||
from gateway.run import _collect_auto_append_media_tags
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "Make me a dog"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "call_img", "function": {"name": "image_generate"}}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_img",
|
||||
"content": '{"success": true, "host_image": "/host/dog.jpg", "image": "/host/dog.jpg", "agent_visible_image": "/sandbox/dog.jpg"}',
|
||||
},
|
||||
]
|
||||
|
||||
tags, _ = _collect_auto_append_media_tags(messages, history_offset=0)
|
||||
assert tags == ["MEDIA:/host/dog.jpg"]
|
||||
|
||||
def test_gateway_auto_append_image_generate_failure_and_url_ignored(self):
|
||||
"""Failed generations and remote URLs are not auto-delivered."""
|
||||
from gateway.run import _collect_auto_append_media_tags
|
||||
|
||||
def _img_msgs(content):
|
||||
return [
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "c", "function": {"name": "image_generate"}}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "c", "content": content},
|
||||
]
|
||||
|
||||
# Failed generation
|
||||
tags, _ = _collect_auto_append_media_tags(
|
||||
_img_msgs('{"success": false, "image": null, "error": "boom"}'),
|
||||
history_offset=0,
|
||||
)
|
||||
assert tags == []
|
||||
|
||||
# Remote URL is not a local file path
|
||||
tags, _ = _collect_auto_append_media_tags(
|
||||
_img_msgs('{"success": true, "image": "https://fal.media/x/cat.png"}'),
|
||||
history_offset=0,
|
||||
)
|
||||
assert tags == []
|
||||
|
||||
def test_gateway_auto_append_image_generate_dedupes_history(self):
|
||||
"""A generated image path already in history is not re-sent."""
|
||||
from gateway.run import _collect_auto_append_media_tags
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "c", "function": {"name": "image_generate"}}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "c",
|
||||
"content": '{"success": true, "image": "/tmp/gen/cat.png"}',
|
||||
},
|
||||
]
|
||||
|
||||
tags, _ = _collect_auto_append_media_tags(
|
||||
messages, history_offset=0, history_media_paths={"/tmp/gen/cat.png"}
|
||||
)
|
||||
assert tags == []
|
||||
|
||||
def test_media_tags_not_extracted_from_history(self):
|
||||
"""MEDIA tags from previous turns should NOT be extracted again."""
|
||||
# Simulate conversation history with a TTS call from a previous turn
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue