fix(gateway): dedup image_generate media across the compression boundary

After context compression, the agent re-sent an already-delivered
generated image on every subsequent turn (#46627). The auto-append
fallback rescans full history when the message list shrinks (compression-
safe path), deduping against _history_media_paths — but that set was built
by scanning ONLY MEDIA: text tags in tool results. image_generate returns
its path in a JSON payload field (host_image/image/agent_visible_image),
never a MEDIA: tag, so generated-image paths never entered the dedup set
and were re-emitted after the boundary.

Extract the history-path collection into _collect_history_media_paths(),
which now covers BOTH delivery shapes: MEDIA: text tags AND image_generate
JSON-payload paths (mirroring what _collect_auto_append_media_tags
extracts). The inline block in _handle_message is replaced with a call to
the helper.

Co-authored-by: liuhao1024 <sunsky.lau@gmail.com>
This commit is contained in:
teknium1 2026-06-20 23:05:55 -07:00 committed by Teknium
parent 1f874dfe44
commit 8ac5e90ec2
2 changed files with 113 additions and 16 deletions

View file

@ -1115,6 +1115,55 @@ def _collect_auto_append_media_tags(
return media_tags, has_voice_directive
def _collect_history_media_paths(agent_history: List[Dict[str, Any]]) -> set:
"""Collect every media path already delivered in prior tool results.
Used to dedup auto-appended MEDIA tags so the same file is not re-sent on
later turns. Must cover BOTH delivery shapes:
* ``MEDIA:<path>`` text tags in tool results, and
* ``image_generate`` JSON-payload paths (``host_image`` / ``image`` /
``agent_visible_image``), which carry no MEDIA: tag.
Missing the JSON-payload shape caused #46627: after a compression
boundary the auto-append fallback rescans full history, re-discovers an
earlier ``image_generate`` result whose path was never in the dedup set,
and re-emits the MEDIA tag every turn.
"""
paths: set = set()
tool_name_by_call_id: Dict[str, str] = {}
for msg in agent_history:
if msg.get("role") == "assistant":
for call in msg.get("tool_calls") or []:
cid = call.get("id") or call.get("call_id")
fn = call.get("function") or {}
name = str(fn.get("name") or call.get("name") or "")
if cid and name:
tool_name_by_call_id[str(cid)] = name
for msg in agent_history:
if msg.get("role") not in {"tool", "function"}:
continue
content = str(msg.get("content", "") or "")
if "MEDIA:" in content:
for match in _TOOL_MEDIA_RE.finditer(content):
p = match.group(1).strip().rstrip('",}')
if p:
paths.add(p)
continue
cid = str(msg.get("tool_call_id") or msg.get("call_id") or "")
if tool_name_by_call_id.get(cid) == "image_generate":
try:
payload = json.loads(content)
except Exception:
payload = None
if isinstance(payload, dict) and payload.get("success"):
for field in _JSON_MEDIA_TOOL_PATH_FIELDS:
jp = payload.get(field)
if isinstance(jp, str) and jp:
paths.add(jp)
break
return paths
# ---------------------------------------------------------------------------
# SSL certificate auto-detection for NixOS and other non-standard systems.
# Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported.
@ -15537,22 +15586,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
# Collect MEDIA paths already in history so we can exclude them
# from the current turn's extraction. This is compression-safe:
# even if the message list shrinks, we know which paths are old.
_history_media_paths: set = set()
for _hm in agent_history:
if _hm.get("role") in {"tool", "function"}:
_hc = _hm.get("content", "")
if "MEDIA:" in _hc:
_TOOL_MEDIA_RE = re.compile(
r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
r'txt|csv|apk|ipa))',
re.IGNORECASE
)
for _match in _TOOL_MEDIA_RE.finditer(_hc):
_p = _match.group(1).strip().rstrip('",}')
if _p:
_history_media_paths.add(_p)
_history_media_paths: set = _collect_history_media_paths(agent_history)
# Register per-session gateway approval callback so dangerous
# command approval blocks the agent thread (mirrors CLI input()).

View file

@ -259,6 +259,69 @@ caption
)
assert tags == []
def test_collect_history_media_paths_includes_image_generate_json(self):
"""Regression for #46627: the history media-path collector must pick up
image_generate JSON-payload paths (no MEDIA: tag), not just MEDIA:
text tags. Otherwise, after a compression boundary the auto-append
fallback rescans full history, finds the generated path absent from
the dedup set, and re-emits the same MEDIA tag every turn.
"""
from gateway.run import _collect_history_media_paths
history = [
{"role": "user", "content": "make a cat"},
{
"role": "assistant",
"tool_calls": [{"id": "c", "function": {"name": "image_generate"}}],
},
{
"role": "tool",
"tool_call_id": "c",
"content": '{"success": true, "image": "/tmp/gen/cat.png"}',
},
# A separate MEDIA: text tag from another tool, to confirm both shapes.
{
"role": "tool",
"tool_call_id": "d",
"content": "Saved MEDIA:/tmp/voice/note.ogg done",
},
]
paths = _collect_history_media_paths(history)
assert "/tmp/gen/cat.png" in paths # JSON-payload path (the bug)
assert "/tmp/voice/note.ogg" in paths # MEDIA: text path (already worked)
def test_image_generate_not_reemitted_after_compression(self):
"""End-to-end of the #46627 fix: collect history paths, then the
compression-fallback rescan (history_offset stale) must dedup the
generated image against them no re-emission."""
from gateway.run import (
_collect_auto_append_media_tags,
_collect_history_media_paths,
)
history = [
{
"role": "assistant",
"tool_calls": [{"id": "c", "function": {"name": "image_generate"}}],
},
{
"role": "tool",
"tool_call_id": "c",
"content": '{"success": true, "image": "/tmp/gen/dog.png"}',
},
]
history_paths = _collect_history_media_paths(history)
# Simulate the post-compression fallback: history_offset is stale
# (larger than the shrunken message list), so the collector rescans
# the full list. With the dedup set populated, the already-delivered
# image must NOT be re-emitted.
tags, _ = _collect_auto_append_media_tags(
history, history_offset=9999, history_media_paths=history_paths
)
assert tags == [], f"generated image re-emitted after compression: {tags}"
def test_media_tags_not_extracted_from_history(self):
"""MEDIA tags from previous turns should NOT be extracted again."""
# Simulate conversation history with a TTS call from a previous turn