fix(gateway): auto-deliver image_generate output as native media (#42616)

image_generate returns its artifact as JSON ({"image": "/abs/path.png"})
with no MEDIA: tag, so the gateway auto-append path (which only recognized
text_to_speech MEDIA: tags) never delivered it — image delivery silently
depended on the model restating the path in its reply. Add image_generate to
the producer allowlist and extract the local path from its JSON result
(host_image > image > agent_visible_image), reusing the existing
extension-anchored matcher and history-dedupe so remote URLs, unknown
extensions, failures, and already-sent paths are rejected.

Closes the remaining unfixed path from #19105.
This commit is contained in:
Teknium 2026-06-08 22:51:03 -07:00 committed by GitHub
parent 18ead88273
commit 9351cbafab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 131 additions and 3 deletions

View file

@ -688,7 +688,18 @@ def _last_transcript_timestamp(history: Optional[List[Dict[str, Any]]]) -> Any:
# ordinary outputs. Only tools that intentionally create deliverable media
# artifacts should be eligible for automatic append when the model omits them
# from the final gateway reply.
_AUTO_APPEND_MEDIA_TOOL_NAMES = {"text_to_speech", "text_to_speech_tool"}
_AUTO_APPEND_MEDIA_TOOL_NAMES = {
"text_to_speech",
"text_to_speech_tool",
"image_generate",
}
# Tools in this set return their deliverable artifact as a JSON payload with a
# local-file path field rather than a literal ``MEDIA:`` tag (e.g. image_generate
# returns ``{"success": true, "image": "/abs/path.png"}``). The auto-append path
# extracts the path from these fields so delivery is deterministic and does not
# depend on the model restating the path in its final reply.
_JSON_MEDIA_TOOL_PATH_FIELDS = ("host_image", "image", "agent_visible_image")
# Extension-anchored MEDIA: matcher for tool results. Mirrors the dispatch-site
@ -755,10 +766,28 @@ def _collect_auto_append_media_tags(
if tool_name_by_call_id.get(call_id) not in _AUTO_APPEND_MEDIA_TOOL_NAMES:
continue
content = str(msg.get("content") or "")
tool_name = tool_name_by_call_id.get(call_id)
# JSON-payload tools (image_generate) return a local-file path in a
# known field rather than a MEDIA: tag. Extract it so delivery is
# deterministic even when the model omits the path from its reply.
if tool_name == "image_generate" and "MEDIA:" not in content:
try:
payload = json.loads(content)
except Exception:
payload = None
if isinstance(payload, dict) and payload.get("success"):
for field in _JSON_MEDIA_TOOL_PATH_FIELDS:
path = payload.get(field)
if (isinstance(path, str)
and _TOOL_MEDIA_RE.fullmatch(f"MEDIA:{path}")
and path not in history_media_paths):
media_tags.append(f"MEDIA:{path}")
break
continue
if "MEDIA:" not in content:
continue
for match in _TOOL_MEDIA_RE.finditer(content):
path = match.group(1).strip().rstrip('\",}')
path = match.group(1).strip().rstrip('",}')
if path and path not in history_media_paths:
media_tags.append(f"MEDIA:{path}")
if "[[audio_as_voice]]" in content:

View file

@ -159,7 +159,106 @@ caption
tags, voice = _collect_auto_append_media_tags(messages, history_offset=0)
assert tags == ["MEDIA:/tmp/voice.ogg"]
assert voice is True
def test_gateway_auto_append_image_generate_json_path(self):
"""image_generate returns a local path in JSON (no MEDIA: tag); it is
auto-appended so delivery doesn't depend on the model restating it."""
from gateway.run import _collect_auto_append_media_tags
messages = [
{"role": "user", "content": "Make me a cat"},
{
"role": "assistant",
"tool_calls": [
{"id": "call_img", "function": {"name": "image_generate"}}
],
},
{
"role": "tool",
"tool_call_id": "call_img",
"content": '{"success": true, "image": "/tmp/gen/cat.png", "agent_visible_image": "/tmp/gen/cat.png"}',
},
{"role": "assistant", "content": "Here's your cat."},
]
tags, voice = _collect_auto_append_media_tags(messages, history_offset=0)
assert tags == ["MEDIA:/tmp/gen/cat.png"]
assert voice is False
def test_gateway_auto_append_image_generate_prefers_host_path(self):
"""When host and sandbox paths differ, the host-deliverable path wins."""
from gateway.run import _collect_auto_append_media_tags
messages = [
{"role": "user", "content": "Make me a dog"},
{
"role": "assistant",
"tool_calls": [
{"id": "call_img", "function": {"name": "image_generate"}}
],
},
{
"role": "tool",
"tool_call_id": "call_img",
"content": '{"success": true, "host_image": "/host/dog.jpg", "image": "/host/dog.jpg", "agent_visible_image": "/sandbox/dog.jpg"}',
},
]
tags, _ = _collect_auto_append_media_tags(messages, history_offset=0)
assert tags == ["MEDIA:/host/dog.jpg"]
def test_gateway_auto_append_image_generate_failure_and_url_ignored(self):
"""Failed generations and remote URLs are not auto-delivered."""
from gateway.run import _collect_auto_append_media_tags
def _img_msgs(content):
return [
{
"role": "assistant",
"tool_calls": [
{"id": "c", "function": {"name": "image_generate"}}
],
},
{"role": "tool", "tool_call_id": "c", "content": content},
]
# Failed generation
tags, _ = _collect_auto_append_media_tags(
_img_msgs('{"success": false, "image": null, "error": "boom"}'),
history_offset=0,
)
assert tags == []
# Remote URL is not a local file path
tags, _ = _collect_auto_append_media_tags(
_img_msgs('{"success": true, "image": "https://fal.media/x/cat.png"}'),
history_offset=0,
)
assert tags == []
def test_gateway_auto_append_image_generate_dedupes_history(self):
"""A generated image path already in history is not re-sent."""
from gateway.run import _collect_auto_append_media_tags
messages = [
{
"role": "assistant",
"tool_calls": [
{"id": "c", "function": {"name": "image_generate"}}
],
},
{
"role": "tool",
"tool_call_id": "c",
"content": '{"success": true, "image": "/tmp/gen/cat.png"}',
},
]
tags, _ = _collect_auto_append_media_tags(
messages, history_offset=0, history_media_paths={"/tmp/gen/cat.png"}
)
assert tags == []
def test_media_tags_not_extracted_from_history(self):
"""MEDIA tags from previous turns should NOT be extracted again."""
# Simulate conversation history with a TTS call from a previous turn