mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-24 10:52:21 +00:00
fix(gateway): dedup image_generate media across the compression boundary
After context compression, the agent re-sent an already-delivered generated image on every subsequent turn (#46627). The auto-append fallback rescans full history when the message list shrinks (compression- safe path), deduping against _history_media_paths — but that set was built by scanning ONLY MEDIA: text tags in tool results. image_generate returns its path in a JSON payload field (host_image/image/agent_visible_image), never a MEDIA: tag, so generated-image paths never entered the dedup set and were re-emitted after the boundary. Extract the history-path collection into _collect_history_media_paths(), which now covers BOTH delivery shapes: MEDIA: text tags AND image_generate JSON-payload paths (mirroring what _collect_auto_append_media_tags extracts). The inline block in _handle_message is replaced with a call to the helper. Co-authored-by: liuhao1024 <sunsky.lau@gmail.com>
This commit is contained in:
parent
1f874dfe44
commit
8ac5e90ec2
2 changed files with 113 additions and 16 deletions
|
|
@ -1115,6 +1115,55 @@ def _collect_auto_append_media_tags(
|
|||
|
||||
return media_tags, has_voice_directive
|
||||
|
||||
|
||||
def _collect_history_media_paths(agent_history: List[Dict[str, Any]]) -> set:
|
||||
"""Collect every media path already delivered in prior tool results.
|
||||
|
||||
Used to dedup auto-appended MEDIA tags so the same file is not re-sent on
|
||||
later turns. Must cover BOTH delivery shapes:
|
||||
* ``MEDIA:<path>`` text tags in tool results, and
|
||||
* ``image_generate`` JSON-payload paths (``host_image`` / ``image`` /
|
||||
``agent_visible_image``), which carry no MEDIA: tag.
|
||||
|
||||
Missing the JSON-payload shape caused #46627: after a compression
|
||||
boundary the auto-append fallback rescans full history, re-discovers an
|
||||
earlier ``image_generate`` result whose path was never in the dedup set,
|
||||
and re-emits the MEDIA tag every turn.
|
||||
"""
|
||||
paths: set = set()
|
||||
tool_name_by_call_id: Dict[str, str] = {}
|
||||
for msg in agent_history:
|
||||
if msg.get("role") == "assistant":
|
||||
for call in msg.get("tool_calls") or []:
|
||||
cid = call.get("id") or call.get("call_id")
|
||||
fn = call.get("function") or {}
|
||||
name = str(fn.get("name") or call.get("name") or "")
|
||||
if cid and name:
|
||||
tool_name_by_call_id[str(cid)] = name
|
||||
for msg in agent_history:
|
||||
if msg.get("role") not in {"tool", "function"}:
|
||||
continue
|
||||
content = str(msg.get("content", "") or "")
|
||||
if "MEDIA:" in content:
|
||||
for match in _TOOL_MEDIA_RE.finditer(content):
|
||||
p = match.group(1).strip().rstrip('",}')
|
||||
if p:
|
||||
paths.add(p)
|
||||
continue
|
||||
cid = str(msg.get("tool_call_id") or msg.get("call_id") or "")
|
||||
if tool_name_by_call_id.get(cid) == "image_generate":
|
||||
try:
|
||||
payload = json.loads(content)
|
||||
except Exception:
|
||||
payload = None
|
||||
if isinstance(payload, dict) and payload.get("success"):
|
||||
for field in _JSON_MEDIA_TOOL_PATH_FIELDS:
|
||||
jp = payload.get(field)
|
||||
if isinstance(jp, str) and jp:
|
||||
paths.add(jp)
|
||||
break
|
||||
return paths
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SSL certificate auto-detection for NixOS and other non-standard systems.
|
||||
# Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported.
|
||||
|
|
@ -15537,22 +15586,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
# Collect MEDIA paths already in history so we can exclude them
|
||||
# from the current turn's extraction. This is compression-safe:
|
||||
# even if the message list shrinks, we know which paths are old.
|
||||
_history_media_paths: set = set()
|
||||
for _hm in agent_history:
|
||||
if _hm.get("role") in {"tool", "function"}:
|
||||
_hc = _hm.get("content", "")
|
||||
if "MEDIA:" in _hc:
|
||||
_TOOL_MEDIA_RE = re.compile(
|
||||
r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
|
||||
r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
|
||||
r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
|
||||
r'txt|csv|apk|ipa))',
|
||||
re.IGNORECASE
|
||||
)
|
||||
for _match in _TOOL_MEDIA_RE.finditer(_hc):
|
||||
_p = _match.group(1).strip().rstrip('",}')
|
||||
if _p:
|
||||
_history_media_paths.add(_p)
|
||||
_history_media_paths: set = _collect_history_media_paths(agent_history)
|
||||
|
||||
# Register per-session gateway approval callback so dangerous
|
||||
# command approval blocks the agent thread (mirrors CLI input()).
|
||||
|
|
|
|||
|
|
@ -259,6 +259,69 @@ caption
|
|||
)
|
||||
assert tags == []
|
||||
|
||||
def test_collect_history_media_paths_includes_image_generate_json(self):
|
||||
"""Regression for #46627: the history media-path collector must pick up
|
||||
image_generate JSON-payload paths (no MEDIA: tag), not just MEDIA:
|
||||
text tags. Otherwise, after a compression boundary the auto-append
|
||||
fallback rescans full history, finds the generated path absent from
|
||||
the dedup set, and re-emits the same MEDIA tag every turn.
|
||||
"""
|
||||
from gateway.run import _collect_history_media_paths
|
||||
|
||||
history = [
|
||||
{"role": "user", "content": "make a cat"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [{"id": "c", "function": {"name": "image_generate"}}],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "c",
|
||||
"content": '{"success": true, "image": "/tmp/gen/cat.png"}',
|
||||
},
|
||||
# A separate MEDIA: text tag from another tool, to confirm both shapes.
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "d",
|
||||
"content": "Saved MEDIA:/tmp/voice/note.ogg done",
|
||||
},
|
||||
]
|
||||
paths = _collect_history_media_paths(history)
|
||||
assert "/tmp/gen/cat.png" in paths # JSON-payload path (the bug)
|
||||
assert "/tmp/voice/note.ogg" in paths # MEDIA: text path (already worked)
|
||||
|
||||
def test_image_generate_not_reemitted_after_compression(self):
|
||||
"""End-to-end of the #46627 fix: collect history paths, then the
|
||||
compression-fallback rescan (history_offset stale) must dedup the
|
||||
generated image against them — no re-emission."""
|
||||
from gateway.run import (
|
||||
_collect_auto_append_media_tags,
|
||||
_collect_history_media_paths,
|
||||
)
|
||||
|
||||
history = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [{"id": "c", "function": {"name": "image_generate"}}],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "c",
|
||||
"content": '{"success": true, "image": "/tmp/gen/dog.png"}',
|
||||
},
|
||||
]
|
||||
history_paths = _collect_history_media_paths(history)
|
||||
|
||||
# Simulate the post-compression fallback: history_offset is stale
|
||||
# (larger than the shrunken message list), so the collector rescans
|
||||
# the full list. With the dedup set populated, the already-delivered
|
||||
# image must NOT be re-emitted.
|
||||
tags, _ = _collect_auto_append_media_tags(
|
||||
history, history_offset=9999, history_media_paths=history_paths
|
||||
)
|
||||
assert tags == [], f"generated image re-emitted after compression: {tags}"
|
||||
|
||||
|
||||
def test_media_tags_not_extracted_from_history(self):
|
||||
"""MEDIA tags from previous turns should NOT be extracted again."""
|
||||
# Simulate conversation history with a TTS call from a previous turn
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue