hermes-agent/tests/agent/test_replay_cleanup.py
Teknium a8c862900b
fix(tui): sanitize replay history on WebUI/TUI session resume (#29086) (#53939)
A WebUI/TUI session whose last turn died mid-tool-loop (stale-timeout kill,
interrupt, or process restart before the tool result was written) persists a
dangling assistant(tool_calls) or interrupted assistant->tool tail. The
messaging gateway already strips these tails before replay (the #49201 fix),
but the TUI/WebUI resume path fed db.get_messages_as_conversation() straight
in as the agent's conversation_history with no cleanup. The model re-issued
the unanswered call on every resume -- including after a full WebUI + Gateway
restart, since the poison lives in the SessionDB, not memory -- leaving the
session permanently 'thinking'. Only deleting the session recovered it.

- Extract the two strippers + helper from gateway/run.py into a shared
  agent/replay_cleanup.py (sanitize_replay_history wraps both).
- gateway/run.py re-exports under the historical private names; messaging
  behavior unchanged.
- Both TUI cold-resume sites now sanitize the model-fed history while leaving
  the display transcript untouched, so the user still sees their full history.

Verified E2E against a real SessionDB: dangling and interrupted tails are
stripped from the model feed, healthy mid-progress tool sequences are
preserved, and the display transcript is always the full raw history.
2026-06-27 20:56:49 -07:00

92 lines
3.1 KiB
Python

"""Tests for agent.replay_cleanup — shared replay-tail sanitizers.
These functions were extracted from gateway/run.py so every resume surface
(messaging gateway AND TUI/WebUI gateway) strips poisoned tool-call tails the
same way. Regression coverage for #29086 (WebUI session permanently stuck
because the dangling tool-call tail was replayed on every resume).
"""
from agent.replay_cleanup import (
is_interrupted_tool_result,
strip_dangling_tool_call_tail,
strip_interrupted_tool_tails,
sanitize_replay_history,
)
def _user(text):
return {"role": "user", "content": text}
def _assistant_tc(name):
return {
"role": "assistant",
"content": "",
"tool_calls": [
{"id": "c1", "type": "function", "function": {"name": name, "arguments": "{}"}}
],
}
def _tool(content):
return {"role": "tool", "tool_call_id": "c1", "content": content}
def test_is_interrupted_tool_result_markers():
assert is_interrupted_tool_result("[Command interrupted]")
assert is_interrupted_tool_result("foo\nexit_code: 130 (interrupt)\nbar")
assert not is_interrupted_tool_result("exit_code: 0\nclean output")
assert not is_interrupted_tool_result("ordinary tool output")
assert not is_interrupted_tool_result(None)
def test_strip_dangling_tool_call_tail_removes_unanswered_tail():
history = [_user("hi"), _assistant_tc("write_file")]
out = strip_dangling_tool_call_tail(history)
assert out == [_user("hi")]
def test_strip_dangling_tool_call_tail_preserves_answered_pair():
history = [_user("hi"), _assistant_tc("read_file"), _tool("contents")]
out = strip_dangling_tool_call_tail(history)
assert out == history # answered -> untouched
def test_strip_interrupted_tool_tails_removes_interrupted_block():
history = [_user("hi"), _assistant_tc("terminal"), _tool("[Command interrupted]")]
out = strip_interrupted_tool_tails(history)
assert out == [_user("hi")]
def test_strip_interrupted_tool_tails_preserves_successful_block():
history = [_user("hi"), _assistant_tc("read_file"), _tool("ok"),
{"role": "assistant", "content": "done"}]
out = strip_interrupted_tool_tails(history)
assert out == history
def test_strip_interrupted_tool_tails_removes_orphan_interrupted_tool():
history = [_user("hi"), _tool("[Command interrupted] exit_code: 130 interrupt")]
out = strip_interrupted_tool_tails(history)
assert out == [_user("hi")]
def test_sanitize_replay_history_combines_both():
# interrupted block in the middle + dangling tail at the end
history = [
_user("first"),
_assistant_tc("terminal"), _tool("[Command interrupted]"),
_user("second"),
_assistant_tc("write_file"), # dangling
]
out = sanitize_replay_history(history)
assert out == [_user("first"), _user("second")]
def test_sanitize_replay_history_noop_on_clean_history():
history = [_user("hi"), {"role": "assistant", "content": "hello"}]
assert sanitize_replay_history(history) == history
def test_sanitize_replay_history_empty():
assert sanitize_replay_history([]) == []