hermes-agent/tests/run_agent/test_empty_response_recovery_persistence.py
Teknium 812ce0b987
fix(run_agent): break permanent empty-response loop from orphan tool-tail (#21385)
When empty-response terminal scaffolding fires on a tool-result turn,
_drop_trailing_empty_response_scaffolding left the live history ending at
a bare 'tool' message. The next user input then landed as [...tool, user],
a protocol-invalid sequence that OpenRouter/Opus and other providers
silently fail on (returns empty content). That retriggered the empty-retry
recovery every turn, and recovery flags never hit SQLite (no column for
them), so history kept looking broken on every reload.

Two fixes:

1. Scaffolding strip rewinds the orphan assistant(tool_calls)+tool pair
   after popping sentinels. Only fires when scaffolding flags were
   actually present, so mid-iteration tool loops are untouched.

2. _repair_message_sequence runs right before every API call as a
   defensive belt: drops stray tool messages with unknown tool_call_ids,
   merges consecutive user messages so no user input is lost. Does NOT
   rewind assistant(tool_calls)+tool+user — that pattern is valid when
   the user redirected before the model got its continuation turn.

Repro: session 20260507_044111_fa7e65. Opus-4.7/OpenRouter returned
content-less response after a 42KB execute_code output, nudge+retry
chain exhausted (no fallback configured), terminal sentinel appended,
scaffolding stripped leaving bare tool tail, user typed 'wtf happened..'
and landed as tool→user violation. Every subsequent turn collapsed in
<50ms with the same 3-retry empty chain because the API request itself
was malformed.

Verified live via HTTP mock: pre-fix reproduced 5 api_calls/0.15s exit
'empty_response_exhausted'; post-fix 1 api_call/0.10s exit
'text_response(finish_reason=stop)'. Three-turn session flows cleanly
through the scenario. Full run_agent suite: 1242 passed (0 regressions,
2 pre-existing concurrent_interrupt failures unrelated).
2026-05-07 08:35:10 -07:00

98 lines
3.5 KiB
Python

"""Regression tests for empty-response recovery transcript persistence."""
from run_agent import AIAgent
def _agent_with_stubbed_persistence():
agent = AIAgent.__new__(AIAgent)
agent._persist_user_message_idx = None
agent._persist_user_message_override = None
agent._session_db = None
agent._session_messages = []
agent.saved_session_logs = []
agent.flushed_session_db_messages = []
agent._save_session_log = lambda messages: agent.saved_session_logs.append(
[m.copy() for m in messages]
)
agent._flush_messages_to_session_db = lambda messages, conversation_history=None: (
agent.flushed_session_db_messages.append([m.copy() for m in messages])
)
return agent
def test_persist_session_strips_trailing_empty_recovery_scaffolding():
"""After stripping scaffolding, also rewind past orphan trailing tool-result
messages that the failed iteration left behind. Otherwise the next user
message lands after a bare ``tool`` and produces a protocol-invalid
sequence that most providers silently fail on, retriggering the empty-
retry loop indefinitely.
"""
agent = _agent_with_stubbed_persistence()
messages = [
{"role": "user", "content": "run the task"},
{
"role": "assistant",
"content": "",
"tool_calls": [{"id": "call_1", "type": "function",
"function": {"name": "x", "arguments": "{}"}}],
},
{"role": "tool", "content": "{}", "tool_call_id": "call_1"},
{
"role": "assistant",
"content": "(empty)",
"_empty_recovery_synthetic": True,
},
{
"role": "user",
"content": (
"You just executed tool calls but returned an empty response. "
"Please process the tool results above and continue with the task."
),
"_empty_recovery_synthetic": True,
},
]
AIAgent._persist_session(agent, messages, conversation_history=[])
# After strip + rewind, only the original user message remains. The
# assistant(tool_calls) + tool pair is dropped because its iteration
# never produced a real response.
assert messages == [
{"role": "user", "content": "run the task"},
]
assert agent.saved_session_logs[-1] == messages
assert all(not msg.get("_empty_recovery_synthetic") for msg in messages)
def test_persist_session_keeps_unmarked_terminal_empty_response():
agent = _agent_with_stubbed_persistence()
messages = [
{"role": "user", "content": "run the task"},
{"role": "assistant", "content": "(empty)"},
]
AIAgent._persist_session(agent, messages, conversation_history=[])
assert messages == [
{"role": "user", "content": "run the task"},
{"role": "assistant", "content": "(empty)"},
]
assert agent.saved_session_logs[-1] == messages
def test_persist_session_strips_marked_terminal_empty_sentinel():
agent = _agent_with_stubbed_persistence()
messages = [
{"role": "user", "content": "continue"},
{
"role": "assistant",
"content": "(empty)",
"_empty_terminal_sentinel": True,
},
]
AIAgent._persist_session(agent, messages, conversation_history=[])
assert messages == [{"role": "user", "content": "continue"}]
assert agent.saved_session_logs[-1] == messages
assert all(not msg.get("_empty_terminal_sentinel") for msg in messages)