diff --git a/gateway/run.py b/gateway/run.py index 08415eb8629..bd991efeb69 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1115,6 +1115,55 @@ def _collect_auto_append_media_tags( return media_tags, has_voice_directive + +def _collect_history_media_paths(agent_history: List[Dict[str, Any]]) -> set: + """Collect every media path already delivered in prior tool results. + + Used to dedup auto-appended MEDIA tags so the same file is not re-sent on + later turns. Must cover BOTH delivery shapes: + * ``MEDIA:`` text tags in tool results, and + * ``image_generate`` JSON-payload paths (``host_image`` / ``image`` / + ``agent_visible_image``), which carry no MEDIA: tag. + + Missing the JSON-payload shape caused #46627: after a compression + boundary the auto-append fallback rescans full history, re-discovers an + earlier ``image_generate`` result whose path was never in the dedup set, + and re-emits the MEDIA tag every turn. + """ + paths: set = set() + tool_name_by_call_id: Dict[str, str] = {} + for msg in agent_history: + if msg.get("role") == "assistant": + for call in msg.get("tool_calls") or []: + cid = call.get("id") or call.get("call_id") + fn = call.get("function") or {} + name = str(fn.get("name") or call.get("name") or "") + if cid and name: + tool_name_by_call_id[str(cid)] = name + for msg in agent_history: + if msg.get("role") not in {"tool", "function"}: + continue + content = str(msg.get("content", "") or "") + if "MEDIA:" in content: + for match in _TOOL_MEDIA_RE.finditer(content): + p = match.group(1).strip().rstrip('",}') + if p: + paths.add(p) + continue + cid = str(msg.get("tool_call_id") or msg.get("call_id") or "") + if tool_name_by_call_id.get(cid) == "image_generate": + try: + payload = json.loads(content) + except Exception: + payload = None + if isinstance(payload, dict) and payload.get("success"): + for field in _JSON_MEDIA_TOOL_PATH_FIELDS: + jp = payload.get(field) + if isinstance(jp, str) and jp: + paths.add(jp) + break + return paths + # --------------------------------------------------------------------------- # SSL certificate auto-detection for NixOS and other non-standard systems. # Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported. @@ -15537,22 +15586,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew # Collect MEDIA paths already in history so we can exclude them # from the current turn's extraction. This is compression-safe: # even if the message list shrinks, we know which paths are old. - _history_media_paths: set = set() - for _hm in agent_history: - if _hm.get("role") in {"tool", "function"}: - _hc = _hm.get("content", "") - if "MEDIA:" in _hc: - _TOOL_MEDIA_RE = re.compile( - r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|' - r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|' - r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|' - r'txt|csv|apk|ipa))', - re.IGNORECASE - ) - for _match in _TOOL_MEDIA_RE.finditer(_hc): - _p = _match.group(1).strip().rstrip('",}') - if _p: - _history_media_paths.add(_p) + _history_media_paths: set = _collect_history_media_paths(agent_history) # Register per-session gateway approval callback so dangerous # command approval blocks the agent thread (mirrors CLI input()). diff --git a/tests/gateway/test_media_extraction.py b/tests/gateway/test_media_extraction.py index 74b4c877f67..65d4a72a2f0 100644 --- a/tests/gateway/test_media_extraction.py +++ b/tests/gateway/test_media_extraction.py @@ -259,6 +259,69 @@ caption ) assert tags == [] + def test_collect_history_media_paths_includes_image_generate_json(self): + """Regression for #46627: the history media-path collector must pick up + image_generate JSON-payload paths (no MEDIA: tag), not just MEDIA: + text tags. Otherwise, after a compression boundary the auto-append + fallback rescans full history, finds the generated path absent from + the dedup set, and re-emits the same MEDIA tag every turn. + """ + from gateway.run import _collect_history_media_paths + + history = [ + {"role": "user", "content": "make a cat"}, + { + "role": "assistant", + "tool_calls": [{"id": "c", "function": {"name": "image_generate"}}], + }, + { + "role": "tool", + "tool_call_id": "c", + "content": '{"success": true, "image": "/tmp/gen/cat.png"}', + }, + # A separate MEDIA: text tag from another tool, to confirm both shapes. + { + "role": "tool", + "tool_call_id": "d", + "content": "Saved MEDIA:/tmp/voice/note.ogg done", + }, + ] + paths = _collect_history_media_paths(history) + assert "/tmp/gen/cat.png" in paths # JSON-payload path (the bug) + assert "/tmp/voice/note.ogg" in paths # MEDIA: text path (already worked) + + def test_image_generate_not_reemitted_after_compression(self): + """End-to-end of the #46627 fix: collect history paths, then the + compression-fallback rescan (history_offset stale) must dedup the + generated image against them — no re-emission.""" + from gateway.run import ( + _collect_auto_append_media_tags, + _collect_history_media_paths, + ) + + history = [ + { + "role": "assistant", + "tool_calls": [{"id": "c", "function": {"name": "image_generate"}}], + }, + { + "role": "tool", + "tool_call_id": "c", + "content": '{"success": true, "image": "/tmp/gen/dog.png"}', + }, + ] + history_paths = _collect_history_media_paths(history) + + # Simulate the post-compression fallback: history_offset is stale + # (larger than the shrunken message list), so the collector rescans + # the full list. With the dedup set populated, the already-delivered + # image must NOT be re-emitted. + tags, _ = _collect_auto_append_media_tags( + history, history_offset=9999, history_media_paths=history_paths + ) + assert tags == [], f"generated image re-emitted after compression: {tags}" + + def test_media_tags_not_extracted_from_history(self): """MEDIA tags from previous turns should NOT be extracted again.""" # Simulate conversation history with a TTS call from a previous turn