mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-17 09:41:58 +00:00
fix(gateway): scope tool-result MEDIA scan to current turn
The post-run scan that appends tool-emitted MEDIA: tags to the final response iterated every tool/function message in the full conversation and relied solely on path-based dedup against paths reconstructed from the replayable transcript. When that reconstruction does not byte-match the in-memory tool content (timestamp stripping, observed-context withholding, compression rewrites), a stale path emitted several turns earlier is absent from the dedup set and leaks onto a later text-only reply (Telegram 'Sending media group of 1 photo(s)' with no MEDIA directive present). Scope the scan to this turn's new messages by slicing result['messages'] at len(agent_history) (agent_history is passed as conversation_history into run_conversation, so the returned list is history + this turn). Retain path-based dedup as a secondary guard and as the sole guard on the compression-shrink fallback, preserving the #160 behaviour. Closes #34608
This commit is contained in:
parent
38c4f8c371
commit
08c0b22417
2 changed files with 158 additions and 4 deletions
|
|
@ -17478,13 +17478,33 @@ class GatewayRunner:
|
|||
# append any that aren't already present in the final response, so the
|
||||
# adapter's extract_media() can find and deliver the files exactly once.
|
||||
#
|
||||
# Uses path-based deduplication against _history_media_paths (collected
|
||||
# before run_conversation) instead of index slicing. This is safe even
|
||||
# when context compression shrinks the message list. (Fixes #160)
|
||||
# Scope the scan to THIS turn's tool results only. ``agent_history``
|
||||
# was passed into run_conversation as ``conversation_history``, so the
|
||||
# agent's returned ``messages`` list is ``agent_history`` followed by
|
||||
# the messages produced this turn. Slicing at ``len(agent_history)``
|
||||
# isolates the current turn precisely, so a stale MEDIA: path emitted
|
||||
# by a tool several turns earlier (still present in the full message
|
||||
# list) can never leak onto a later text-only reply. (Fixes #34608)
|
||||
#
|
||||
# Path-based deduplication against _history_media_paths (collected
|
||||
# before run_conversation) is retained as a secondary guard. It is
|
||||
# also the sole guard on the fallback branch taken when mid-run
|
||||
# context compression shrinks the message list below the original
|
||||
# history length, preserving the compression-safe behaviour of #160.
|
||||
if "MEDIA:" not in final_response:
|
||||
media_tags = []
|
||||
has_voice_directive = False
|
||||
for msg in result.get("messages", []):
|
||||
_all_msgs = result.get("messages", [])
|
||||
_history_len = len(agent_history)
|
||||
# Only trust the slice boundary when the message list still
|
||||
# contains the full history prefix. Mid-run compression can
|
||||
# rewrite/shrink the list; in that case fall back to scanning
|
||||
# everything and rely on _history_media_paths for dedup.
|
||||
if _history_len and len(_all_msgs) >= _history_len:
|
||||
_scan_msgs = _all_msgs[_history_len:]
|
||||
else:
|
||||
_scan_msgs = _all_msgs
|
||||
for msg in _scan_msgs:
|
||||
if msg.get("role") in {"tool", "function"}:
|
||||
content = msg.get("content", "")
|
||||
if "MEDIA:" in content:
|
||||
|
|
|
|||
|
|
@ -5,6 +5,10 @@ Verifies that MEDIA tags (e.g., from TTS tool) are only extracted from
|
|||
messages in the CURRENT turn, not from the full conversation history.
|
||||
This prevents voice messages from accumulating and being sent multiple
|
||||
times per reply. (Regression test for #160)
|
||||
|
||||
Also covers #34608: a stale MEDIA: path emitted by an execute_code /
|
||||
make_image tool several turns earlier must not leak onto a later
|
||||
text-only reply, even when the path-based dedup set fails to capture it.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
|
@ -43,6 +47,37 @@ def extract_media_tags_fixed(result_messages, history_len):
|
|||
return media_tags, has_voice_directive
|
||||
|
||||
|
||||
def extract_media_tags_production(result_messages, history_len, history_media_paths):
|
||||
"""Mirror of the production scan in gateway/run.py after the #34608 fix.
|
||||
|
||||
Primary guard: scope the scan to the current turn via ``history_len``
|
||||
slicing (matching how ``agent_history`` is passed as
|
||||
``conversation_history`` into ``run_conversation``). Secondary guard:
|
||||
path-based dedup against ``history_media_paths`` (the #160 compression-safe
|
||||
fallback, also used when compression shrinks the list below history_len).
|
||||
"""
|
||||
media_tags = []
|
||||
has_voice_directive = False
|
||||
|
||||
if len(result_messages) >= history_len and history_len:
|
||||
scan_msgs = result_messages[history_len:]
|
||||
else:
|
||||
scan_msgs = result_messages
|
||||
|
||||
for msg in scan_msgs:
|
||||
if msg.get("role") == "tool" or msg.get("role") == "function":
|
||||
content = msg.get("content", "")
|
||||
if "MEDIA:" in content:
|
||||
for match in re.finditer(r'MEDIA:(\S+)', content):
|
||||
path = match.group(1).strip().rstrip('",}')
|
||||
if path and path not in history_media_paths:
|
||||
media_tags.append(f"MEDIA:{path}")
|
||||
if "[[audio_as_voice]]" in content:
|
||||
has_voice_directive = True
|
||||
|
||||
return media_tags, has_voice_directive
|
||||
|
||||
|
||||
def extract_media_tags_broken(result_messages):
|
||||
"""
|
||||
The BROKEN behavior: extract MEDIA tags from ALL messages including history.
|
||||
|
|
@ -180,5 +215,104 @@ class TestMediaExtraction:
|
|||
assert len(unique) == 2 # After dedup: same.ogg and different.ogg
|
||||
|
||||
|
||||
class TestStaleToolMediaLeak:
|
||||
"""Regression tests for #34608.
|
||||
|
||||
A MEDIA: path emitted by an execute_code / make_image tool several turns
|
||||
earlier remains in the full conversation message list. A later text-only
|
||||
reply (zero MEDIA directives) must NOT attach that stale image.
|
||||
|
||||
The production code previously relied solely on path-based dedup against
|
||||
paths reconstructed from the replayable transcript. When that
|
||||
reconstruction does not byte-match the in-memory tool content (timestamp
|
||||
stripping, observed-context withholding, compression rewrites), the stale
|
||||
path is absent from the dedup set and leaks. Turn-scoped slicing closes
|
||||
this class of bug deterministically.
|
||||
"""
|
||||
|
||||
def test_stale_execute_code_media_not_attached_to_text_only_reply(self):
|
||||
"""The exact #34608 scenario: make_image cover from an earlier turn."""
|
||||
# Prior turn generated an image via execute_code stdout.
|
||||
history = [
|
||||
{"role": "user", "content": "Make a cover image"},
|
||||
{"role": "assistant", "content": None,
|
||||
"tool_calls": [{"id": "1", "function": {"name": "execute_code"}}]},
|
||||
{"role": "tool", "tool_call_id": "1",
|
||||
"content": "Generating cover...\nMEDIA:/tmp/seosmi_cover.png\nDone."},
|
||||
{"role": "assistant", "content": "Here is your cover."},
|
||||
]
|
||||
# Current turn: plain text status update, zero MEDIA directives.
|
||||
new_messages = [
|
||||
{"role": "user", "content": "What skill version am I on?"},
|
||||
{"role": "assistant", "content": "You're on v0.15.1."},
|
||||
]
|
||||
all_messages = history + new_messages
|
||||
history_len = len(history)
|
||||
|
||||
# Simulate the dedup set FAILING to capture the stale path (the real
|
||||
# #34608 condition: replayable-history reconstruction diverged from
|
||||
# the in-memory tool content, so the path is not in the set).
|
||||
history_media_paths = set()
|
||||
|
||||
tags, voice = extract_media_tags_production(
|
||||
all_messages, history_len, history_media_paths
|
||||
)
|
||||
assert tags == [], (
|
||||
"Stale tool MEDIA from a prior turn must not leak onto a "
|
||||
f"later text-only reply, got {tags}"
|
||||
)
|
||||
assert voice is False
|
||||
|
||||
# The pre-fix production behaviour (scan everything, dedup only) would
|
||||
# have leaked the stale path when the dedup set missed it.
|
||||
broken_tags, _ = extract_media_tags_broken(all_messages)
|
||||
assert any("seosmi_cover.png" in t for t in broken_tags), (
|
||||
"Sanity: the unscoped scan does surface the stale path"
|
||||
)
|
||||
|
||||
def test_current_turn_media_still_attached_when_dedup_set_empty(self):
|
||||
"""Turn-scoping must not suppress genuinely new media."""
|
||||
history = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{"role": "assistant", "content": "hello"},
|
||||
]
|
||||
new_messages = [
|
||||
{"role": "user", "content": "Make me a cover image"},
|
||||
{"role": "assistant", "content": None,
|
||||
"tool_calls": [{"id": "9", "function": {"name": "execute_code"}}]},
|
||||
{"role": "tool", "tool_call_id": "9",
|
||||
"content": "MEDIA:/tmp/fresh_cover.png"},
|
||||
{"role": "assistant", "content": "Here it is."},
|
||||
]
|
||||
all_messages = history + new_messages
|
||||
tags, _ = extract_media_tags_production(
|
||||
all_messages, len(history), set()
|
||||
)
|
||||
assert len(tags) == 1 and "fresh_cover.png" in tags[0]
|
||||
|
||||
def test_compression_shrink_falls_back_to_path_dedup(self):
|
||||
"""When the list is shorter than history_len (mid-run compression),
|
||||
fall back to scanning everything with path-based dedup so the #160
|
||||
compression-safe guarantee is preserved."""
|
||||
# Post-compression list is shorter than the original history length.
|
||||
compressed_messages = [
|
||||
{"role": "user", "content": "summary so far..."},
|
||||
{"role": "tool", "tool_call_id": "7",
|
||||
"content": "MEDIA:/tmp/old_from_history.png"},
|
||||
{"role": "assistant", "content": "ok"},
|
||||
]
|
||||
original_history_len = 12 # larger than the compressed list
|
||||
# The old path IS captured in the dedup set here (history scan ran
|
||||
# before compression), so it must still be excluded.
|
||||
history_media_paths = {"/tmp/old_from_history.png"}
|
||||
tags, _ = extract_media_tags_production(
|
||||
compressed_messages, original_history_len, history_media_paths
|
||||
)
|
||||
assert tags == [], (
|
||||
"On the compression fallback path, path-dedup must still exclude "
|
||||
f"known-old media, got {tags}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue