mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
A WebUI/TUI session whose last turn died mid-tool-loop (stale-timeout kill, interrupt, or process restart before the tool result was written) persists a dangling assistant(tool_calls) or interrupted assistant->tool tail. The messaging gateway already strips these tails before replay (the #49201 fix), but the TUI/WebUI resume path fed db.get_messages_as_conversation() straight in as the agent's conversation_history with no cleanup. The model re-issued the unanswered call on every resume -- including after a full WebUI + Gateway restart, since the poison lives in the SessionDB, not memory -- leaving the session permanently 'thinking'. Only deleting the session recovered it. - Extract the two strippers + helper from gateway/run.py into a shared agent/replay_cleanup.py (sanitize_replay_history wraps both). - gateway/run.py re-exports under the historical private names; messaging behavior unchanged. - Both TUI cold-resume sites now sanitize the model-fed history while leaving the display transcript untouched, so the user still sees their full history. Verified E2E against a real SessionDB: dangling and interrupted tails are stripped from the model feed, healthy mid-progress tool sequences are preserved, and the display transcript is always the full raw history.
92 lines
3.1 KiB
Python
92 lines
3.1 KiB
Python
"""Tests for agent.replay_cleanup — shared replay-tail sanitizers.
|
|
|
|
These functions were extracted from gateway/run.py so every resume surface
|
|
(messaging gateway AND TUI/WebUI gateway) strips poisoned tool-call tails the
|
|
same way. Regression coverage for #29086 (WebUI session permanently stuck
|
|
because the dangling tool-call tail was replayed on every resume).
|
|
"""
|
|
|
|
from agent.replay_cleanup import (
|
|
is_interrupted_tool_result,
|
|
strip_dangling_tool_call_tail,
|
|
strip_interrupted_tool_tails,
|
|
sanitize_replay_history,
|
|
)
|
|
|
|
|
|
def _user(text):
|
|
return {"role": "user", "content": text}
|
|
|
|
|
|
def _assistant_tc(name):
|
|
return {
|
|
"role": "assistant",
|
|
"content": "",
|
|
"tool_calls": [
|
|
{"id": "c1", "type": "function", "function": {"name": name, "arguments": "{}"}}
|
|
],
|
|
}
|
|
|
|
|
|
def _tool(content):
|
|
return {"role": "tool", "tool_call_id": "c1", "content": content}
|
|
|
|
|
|
def test_is_interrupted_tool_result_markers():
|
|
assert is_interrupted_tool_result("[Command interrupted]")
|
|
assert is_interrupted_tool_result("foo\nexit_code: 130 (interrupt)\nbar")
|
|
assert not is_interrupted_tool_result("exit_code: 0\nclean output")
|
|
assert not is_interrupted_tool_result("ordinary tool output")
|
|
assert not is_interrupted_tool_result(None)
|
|
|
|
|
|
def test_strip_dangling_tool_call_tail_removes_unanswered_tail():
|
|
history = [_user("hi"), _assistant_tc("write_file")]
|
|
out = strip_dangling_tool_call_tail(history)
|
|
assert out == [_user("hi")]
|
|
|
|
|
|
def test_strip_dangling_tool_call_tail_preserves_answered_pair():
|
|
history = [_user("hi"), _assistant_tc("read_file"), _tool("contents")]
|
|
out = strip_dangling_tool_call_tail(history)
|
|
assert out == history # answered -> untouched
|
|
|
|
|
|
def test_strip_interrupted_tool_tails_removes_interrupted_block():
|
|
history = [_user("hi"), _assistant_tc("terminal"), _tool("[Command interrupted]")]
|
|
out = strip_interrupted_tool_tails(history)
|
|
assert out == [_user("hi")]
|
|
|
|
|
|
def test_strip_interrupted_tool_tails_preserves_successful_block():
|
|
history = [_user("hi"), _assistant_tc("read_file"), _tool("ok"),
|
|
{"role": "assistant", "content": "done"}]
|
|
out = strip_interrupted_tool_tails(history)
|
|
assert out == history
|
|
|
|
|
|
def test_strip_interrupted_tool_tails_removes_orphan_interrupted_tool():
|
|
history = [_user("hi"), _tool("[Command interrupted] exit_code: 130 interrupt")]
|
|
out = strip_interrupted_tool_tails(history)
|
|
assert out == [_user("hi")]
|
|
|
|
|
|
def test_sanitize_replay_history_combines_both():
|
|
# interrupted block in the middle + dangling tail at the end
|
|
history = [
|
|
_user("first"),
|
|
_assistant_tc("terminal"), _tool("[Command interrupted]"),
|
|
_user("second"),
|
|
_assistant_tc("write_file"), # dangling
|
|
]
|
|
out = sanitize_replay_history(history)
|
|
assert out == [_user("first"), _user("second")]
|
|
|
|
|
|
def test_sanitize_replay_history_noop_on_clean_history():
|
|
history = [_user("hi"), {"role": "assistant", "content": "hello"}]
|
|
assert sanitize_replay_history(history) == history
|
|
|
|
|
|
def test_sanitize_replay_history_empty():
|
|
assert sanitize_replay_history([]) == []
|