mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(agent): exclude prior-history tool messages from background review summary
The background memory/skill review forks a new AIAgent with conversation_history=messages_snapshot. The forked agent's _session_messages therefore contains tool messages copied from the prior conversation. The post-review scan that builds the user-visible 💾 summary walked the entire _session_messages list, so historical successes (e.g. 'Cron job '...' created.') were re-surfaced as if they had just happened — sometimes multiple times across unrelated background-review runs. Extract the scan into a staticmethod and skip any tool message whose tool_call_id was already present in messages_snapshot, with a content-equality fallback for tool messages that lack one. Fixes #14944
This commit is contained in:
parent
6051fba9dc
commit
27b6a217b5
2 changed files with 202 additions and 26 deletions
98
run_agent.py
98
run_agent.py
|
|
@ -2876,6 +2876,69 @@ class AIAgent:
|
||||||
"If nothing stands out, just say 'Nothing to save.' and stop."
|
"If nothing stands out, just say 'Nothing to save.' and stop."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _summarize_background_review_actions(
|
||||||
|
review_messages: List[Dict],
|
||||||
|
prior_snapshot: List[Dict],
|
||||||
|
) -> List[str]:
|
||||||
|
"""Build the human-facing action summary for a background review pass.
|
||||||
|
|
||||||
|
Walks the review agent's session messages and collects "successful tool
|
||||||
|
action" descriptions to surface to the user (e.g. "Memory updated").
|
||||||
|
Tool messages already present in ``prior_snapshot`` are skipped so we
|
||||||
|
don't re-surface stale results from the prior conversation that the
|
||||||
|
review agent inherited via ``conversation_history`` (issue #14944).
|
||||||
|
|
||||||
|
Matching is by ``tool_call_id`` when available, with a content-equality
|
||||||
|
fallback for tool messages that lack one.
|
||||||
|
"""
|
||||||
|
existing_tool_call_ids = set()
|
||||||
|
existing_tool_contents = set()
|
||||||
|
for prior in prior_snapshot or []:
|
||||||
|
if not isinstance(prior, dict) or prior.get("role") != "tool":
|
||||||
|
continue
|
||||||
|
tcid = prior.get("tool_call_id")
|
||||||
|
if tcid:
|
||||||
|
existing_tool_call_ids.add(tcid)
|
||||||
|
else:
|
||||||
|
content = prior.get("content")
|
||||||
|
if isinstance(content, str):
|
||||||
|
existing_tool_contents.add(content)
|
||||||
|
|
||||||
|
actions: List[str] = []
|
||||||
|
for msg in review_messages or []:
|
||||||
|
if not isinstance(msg, dict) or msg.get("role") != "tool":
|
||||||
|
continue
|
||||||
|
tcid = msg.get("tool_call_id")
|
||||||
|
if tcid and tcid in existing_tool_call_ids:
|
||||||
|
continue
|
||||||
|
if not tcid:
|
||||||
|
content_str = msg.get("content")
|
||||||
|
if isinstance(content_str, str) and content_str in existing_tool_contents:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
data = json.loads(msg.get("content", "{}"))
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
continue
|
||||||
|
if not isinstance(data, dict) or not data.get("success"):
|
||||||
|
continue
|
||||||
|
message = data.get("message", "")
|
||||||
|
target = data.get("target", "")
|
||||||
|
if "created" in message.lower():
|
||||||
|
actions.append(message)
|
||||||
|
elif "updated" in message.lower():
|
||||||
|
actions.append(message)
|
||||||
|
elif "added" in message.lower() or (target and "add" in message.lower()):
|
||||||
|
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||||
|
actions.append(f"{label} updated")
|
||||||
|
elif "Entry added" in message:
|
||||||
|
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||||
|
actions.append(f"{label} updated")
|
||||||
|
elif "removed" in message.lower() or "replaced" in message.lower():
|
||||||
|
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||||
|
actions.append(f"{label} updated")
|
||||||
|
return actions
|
||||||
|
|
||||||
def _spawn_background_review(
|
def _spawn_background_review(
|
||||||
self,
|
self,
|
||||||
messages_snapshot: List[Dict],
|
messages_snapshot: List[Dict],
|
||||||
|
|
@ -2925,32 +2988,15 @@ class AIAgent:
|
||||||
)
|
)
|
||||||
|
|
||||||
# Scan the review agent's messages for successful tool actions
|
# Scan the review agent's messages for successful tool actions
|
||||||
# and surface a compact summary to the user.
|
# and surface a compact summary to the user. Tool messages
|
||||||
actions = []
|
# already present in messages_snapshot must be skipped, since
|
||||||
for msg in getattr(review_agent, "_session_messages", []):
|
# the review agent inherits that history and would otherwise
|
||||||
if not isinstance(msg, dict) or msg.get("role") != "tool":
|
# re-surface stale "created"/"updated" messages from the prior
|
||||||
continue
|
# conversation as if they just happened (issue #14944).
|
||||||
try:
|
actions = self._summarize_background_review_actions(
|
||||||
data = json.loads(msg.get("content", "{}"))
|
getattr(review_agent, "_session_messages", []),
|
||||||
except (json.JSONDecodeError, TypeError):
|
messages_snapshot,
|
||||||
continue
|
)
|
||||||
if not data.get("success"):
|
|
||||||
continue
|
|
||||||
message = data.get("message", "")
|
|
||||||
target = data.get("target", "")
|
|
||||||
if "created" in message.lower():
|
|
||||||
actions.append(message)
|
|
||||||
elif "updated" in message.lower():
|
|
||||||
actions.append(message)
|
|
||||||
elif "added" in message.lower() or (target and "add" in message.lower()):
|
|
||||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
|
||||||
actions.append(f"{label} updated")
|
|
||||||
elif "Entry added" in message:
|
|
||||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
|
||||||
actions.append(f"{label} updated")
|
|
||||||
elif "removed" in message.lower() or "replaced" in message.lower():
|
|
||||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
|
||||||
actions.append(f"{label} updated")
|
|
||||||
|
|
||||||
if actions:
|
if actions:
|
||||||
summary = " · ".join(dict.fromkeys(actions))
|
summary = " · ".join(dict.fromkeys(actions))
|
||||||
|
|
|
||||||
130
tests/run_agent/test_background_review_summary.py
Normal file
130
tests/run_agent/test_background_review_summary.py
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
"""Tests for AIAgent._summarize_background_review_actions.
|
||||||
|
|
||||||
|
Regression coverage for issue #14944: the background memory/skill review used
|
||||||
|
to re-surface tool results that were already present in the conversation
|
||||||
|
history before the review started (e.g. an earlier "Cron job '...' created.").
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from run_agent import AIAgent
|
||||||
|
|
||||||
|
|
||||||
|
_summarize = AIAgent._summarize_background_review_actions
|
||||||
|
|
||||||
|
|
||||||
|
def _tool_msg(tool_call_id, payload):
|
||||||
|
return {
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": tool_call_id,
|
||||||
|
"content": json.dumps(payload),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_skips_prior_tool_messages_by_tool_call_id():
|
||||||
|
"""Stale 'created' tool result from prior history must not be re-surfaced."""
|
||||||
|
prior_payload = {"success": True, "message": "Cron job 'remind-me' created."}
|
||||||
|
new_payload = {
|
||||||
|
"success": True,
|
||||||
|
"message": "Entry added",
|
||||||
|
"target": "user",
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot = [
|
||||||
|
{"role": "user", "content": "create a reminder"},
|
||||||
|
_tool_msg("call_old", prior_payload),
|
||||||
|
{"role": "assistant", "content": "done"},
|
||||||
|
]
|
||||||
|
review_messages = list(snapshot) + [
|
||||||
|
{"role": "user", "content": "<review prompt>"},
|
||||||
|
_tool_msg("call_new", new_payload),
|
||||||
|
]
|
||||||
|
|
||||||
|
actions = _summarize(review_messages, snapshot)
|
||||||
|
|
||||||
|
assert "Cron job 'remind-me' created." not in actions
|
||||||
|
assert "User profile updated" in actions
|
||||||
|
|
||||||
|
|
||||||
|
def test_includes_genuinely_new_actions():
|
||||||
|
new_payload = {
|
||||||
|
"success": True,
|
||||||
|
"message": "Memory entry created.",
|
||||||
|
}
|
||||||
|
review_messages = [_tool_msg("call_new", new_payload)]
|
||||||
|
|
||||||
|
actions = _summarize(review_messages, prior_snapshot=[])
|
||||||
|
|
||||||
|
assert actions == ["Memory entry created."]
|
||||||
|
|
||||||
|
|
||||||
|
def test_falls_back_to_content_equality_when_tool_call_id_missing():
|
||||||
|
"""If a tool message has no tool_call_id, match prior entries by content."""
|
||||||
|
payload = {"success": True, "message": "Cron job 'X' created."}
|
||||||
|
raw = json.dumps(payload)
|
||||||
|
prior_msg = {"role": "tool", "content": raw} # no tool_call_id
|
||||||
|
review_messages = [
|
||||||
|
{"role": "tool", "content": raw}, # same content -> stale, skip
|
||||||
|
_tool_msg("call_new", {"success": True, "message": "Skill created."}),
|
||||||
|
]
|
||||||
|
|
||||||
|
actions = _summarize(review_messages, [prior_msg])
|
||||||
|
|
||||||
|
assert "Cron job 'X' created." not in actions
|
||||||
|
assert "Skill created." in actions
|
||||||
|
|
||||||
|
|
||||||
|
def test_ignores_failed_tool_results():
|
||||||
|
bad = {"success": False, "message": "something created but failed"}
|
||||||
|
review_messages = [_tool_msg("call_new", bad)]
|
||||||
|
|
||||||
|
actions = _summarize(review_messages, [])
|
||||||
|
|
||||||
|
assert actions == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_handles_non_json_tool_content_gracefully():
|
||||||
|
review_messages = [
|
||||||
|
{"role": "tool", "tool_call_id": "x", "content": "not-json"},
|
||||||
|
_tool_msg("call_y", {"success": True, "message": "Memory updated."}),
|
||||||
|
]
|
||||||
|
|
||||||
|
actions = _summarize(review_messages, [])
|
||||||
|
|
||||||
|
assert actions == ["Memory updated."]
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_inputs():
|
||||||
|
assert _summarize([], []) == []
|
||||||
|
assert _summarize(None, None) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_added_message_relabels_by_target():
|
||||||
|
review_messages = [
|
||||||
|
_tool_msg(
|
||||||
|
"c1",
|
||||||
|
{"success": True, "message": "Entry added to store.", "target": "memory"},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
actions = _summarize(review_messages, [])
|
||||||
|
|
||||||
|
assert actions == ["Memory updated"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_removed_or_replaced_relabels_by_target():
|
||||||
|
review_messages = [
|
||||||
|
_tool_msg(
|
||||||
|
"c1",
|
||||||
|
{"success": True, "message": "Entry removed.", "target": "user"},
|
||||||
|
),
|
||||||
|
_tool_msg(
|
||||||
|
"c2",
|
||||||
|
{"success": True, "message": "Entry replaced.", "target": "memory"},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
actions = _summarize(review_messages, [])
|
||||||
|
|
||||||
|
assert "User profile updated" in actions
|
||||||
|
assert "Memory updated" in actions
|
||||||
Loading…
Add table
Add a link
Reference in a new issue