mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
When empty-response terminal scaffolding fires on a tool-result turn, _drop_trailing_empty_response_scaffolding left the live history ending at a bare 'tool' message. The next user input then landed as [...tool, user], a protocol-invalid sequence that OpenRouter/Opus and other providers silently fail on (returns empty content). That retriggered the empty-retry recovery every turn, and recovery flags never hit SQLite (no column for them), so history kept looking broken on every reload. Two fixes: 1. Scaffolding strip rewinds the orphan assistant(tool_calls)+tool pair after popping sentinels. Only fires when scaffolding flags were actually present, so mid-iteration tool loops are untouched. 2. _repair_message_sequence runs right before every API call as a defensive belt: drops stray tool messages with unknown tool_call_ids, merges consecutive user messages so no user input is lost. Does NOT rewind assistant(tool_calls)+tool+user — that pattern is valid when the user redirected before the model got its continuation turn. Repro: session 20260507_044111_fa7e65. Opus-4.7/OpenRouter returned content-less response after a 42KB execute_code output, nudge+retry chain exhausted (no fallback configured), terminal sentinel appended, scaffolding stripped leaving bare tool tail, user typed 'wtf happened..' and landed as tool→user violation. Every subsequent turn collapsed in <50ms with the same 3-retry empty chain because the API request itself was malformed. Verified live via HTTP mock: pre-fix reproduced 5 api_calls/0.15s exit 'empty_response_exhausted'; post-fix 1 api_call/0.10s exit 'text_response(finish_reason=stop)'. Three-turn session flows cleanly through the scenario. Full run_agent suite: 1242 passed (0 regressions, 2 pre-existing concurrent_interrupt failures unrelated).
201 lines
6.9 KiB
Python
201 lines
6.9 KiB
Python
"""Tests for pre-API-call message-sequence repair.
|
|
|
|
Covers ``_repair_message_sequence`` and the extended
|
|
``_drop_trailing_empty_response_scaffolding`` behavior that rewinds past
|
|
orphan tool-result tails. Together these prevent the self-reinforcing empty-
|
|
response loop observed in session 20260507_044111_fa7e65, where a tool-result
|
|
followed directly by a user message produced silent empty responses from
|
|
providers (violating role alternation), which retriggered the empty-retry
|
|
recovery every turn.
|
|
"""
|
|
|
|
from run_agent import AIAgent
|
|
|
|
|
|
def _bare_agent():
|
|
return AIAgent.__new__(AIAgent)
|
|
|
|
|
|
# ── _drop_trailing_empty_response_scaffolding ──────────────────────────────
|
|
|
|
def test_drop_scaffolding_rewinds_orphan_tool_tail():
|
|
"""When scaffolding is stripped, also rewind the orphan assistant+tool pair."""
|
|
agent = _bare_agent()
|
|
messages = [
|
|
{"role": "user", "content": "task"},
|
|
{"role": "assistant", "content": "",
|
|
"tool_calls": [{"id": "t1", "type": "function",
|
|
"function": {"name": "f", "arguments": "{}"}}]},
|
|
{"role": "tool", "tool_call_id": "t1", "content": "out"},
|
|
{"role": "assistant", "content": "(empty)",
|
|
"_empty_terminal_sentinel": True},
|
|
]
|
|
|
|
AIAgent._drop_trailing_empty_response_scaffolding(agent, messages)
|
|
|
|
assert messages == [{"role": "user", "content": "task"}]
|
|
|
|
|
|
def test_drop_scaffolding_keeps_tail_when_no_scaffolding():
|
|
"""Mid-iteration tool results must NOT be rewound — only if scaffolding fires."""
|
|
agent = _bare_agent()
|
|
messages = [
|
|
{"role": "user", "content": "task"},
|
|
{"role": "assistant", "content": "",
|
|
"tool_calls": [{"id": "t1", "type": "function",
|
|
"function": {"name": "f", "arguments": "{}"}}]},
|
|
{"role": "tool", "tool_call_id": "t1", "content": "out"},
|
|
]
|
|
original = [dict(m) for m in messages]
|
|
|
|
AIAgent._drop_trailing_empty_response_scaffolding(agent, messages)
|
|
|
|
assert messages == original
|
|
|
|
|
|
def test_drop_scaffolding_handles_multiple_parallel_tool_results():
|
|
"""Parallel tool calls (one assistant → many tool results) all rewound together."""
|
|
agent = _bare_agent()
|
|
messages = [
|
|
{"role": "user", "content": "task"},
|
|
{"role": "assistant", "content": "",
|
|
"tool_calls": [
|
|
{"id": "t1", "type": "function",
|
|
"function": {"name": "f", "arguments": "{}"}},
|
|
{"id": "t2", "type": "function",
|
|
"function": {"name": "g", "arguments": "{}"}},
|
|
]},
|
|
{"role": "tool", "tool_call_id": "t1", "content": "out1"},
|
|
{"role": "tool", "tool_call_id": "t2", "content": "out2"},
|
|
{"role": "assistant", "content": "(empty)",
|
|
"_empty_terminal_sentinel": True},
|
|
]
|
|
|
|
AIAgent._drop_trailing_empty_response_scaffolding(agent, messages)
|
|
|
|
assert messages == [{"role": "user", "content": "task"}]
|
|
|
|
|
|
# ── _repair_message_sequence ───────────────────────────────────────────────
|
|
|
|
def test_repair_merges_consecutive_user_messages():
|
|
agent = _bare_agent()
|
|
messages = [
|
|
{"role": "user", "content": "first"},
|
|
{"role": "user", "content": "second"},
|
|
]
|
|
|
|
repairs = AIAgent._repair_message_sequence(agent, messages)
|
|
|
|
assert repairs == 1
|
|
assert len(messages) == 1
|
|
assert messages[0]["role"] == "user"
|
|
assert messages[0]["content"] == "first\n\nsecond"
|
|
|
|
|
|
def test_repair_preserves_user_content_when_one_side_empty():
|
|
agent = _bare_agent()
|
|
messages = [
|
|
{"role": "user", "content": ""},
|
|
{"role": "user", "content": "real message"},
|
|
]
|
|
|
|
AIAgent._repair_message_sequence(agent, messages)
|
|
|
|
assert messages == [{"role": "user", "content": "real message"}]
|
|
|
|
|
|
def test_repair_does_not_rewind_ongoing_dialog_tool_pair():
|
|
"""assistant(tool_calls) + tool + user is a VALID pattern (user redirect
|
|
before the model gets its continuation turn). Repair must not touch it —
|
|
only the flag-gated scaffolding strip rewinds, and only when the
|
|
empty-recovery scaffolding was actually present.
|
|
"""
|
|
agent = _bare_agent()
|
|
messages = [
|
|
{"role": "user", "content": "Q1"},
|
|
{"role": "assistant", "content": "",
|
|
"tool_calls": [{"id": "t1", "type": "function",
|
|
"function": {"name": "f", "arguments": "{}"}}]},
|
|
{"role": "tool", "tool_call_id": "t1", "content": "out"},
|
|
{"role": "user", "content": "Q2"},
|
|
]
|
|
original = [dict(m) for m in messages]
|
|
|
|
repairs = AIAgent._repair_message_sequence(agent, messages)
|
|
|
|
assert repairs == 0
|
|
assert messages == original
|
|
|
|
|
|
def test_repair_drops_stray_tool_with_unknown_tool_call_id():
|
|
agent = _bare_agent()
|
|
messages = [
|
|
{"role": "user", "content": "hi"},
|
|
{"role": "assistant", "content": "hello"},
|
|
{"role": "tool", "tool_call_id": "orphan", "content": "stray"},
|
|
{"role": "user", "content": "real"},
|
|
]
|
|
|
|
repairs = AIAgent._repair_message_sequence(agent, messages)
|
|
|
|
assert repairs >= 1
|
|
assert all(m.get("role") != "tool" for m in messages)
|
|
|
|
|
|
def test_repair_leaves_valid_conversation_unchanged():
|
|
agent = _bare_agent()
|
|
messages = [
|
|
{"role": "user", "content": "list files"},
|
|
{"role": "assistant", "content": "",
|
|
"tool_calls": [{"id": "t1", "type": "function",
|
|
"function": {"name": "ls", "arguments": "{}"}}]},
|
|
{"role": "tool", "tool_call_id": "t1", "content": "a.txt b.txt"},
|
|
{"role": "assistant", "content": "Found 2 files"},
|
|
{"role": "user", "content": "more"},
|
|
]
|
|
original = [dict(m) for m in messages]
|
|
|
|
repairs = AIAgent._repair_message_sequence(agent, messages)
|
|
|
|
assert repairs == 0
|
|
assert messages == original
|
|
|
|
|
|
def test_repair_preserves_multimodal_user_content():
|
|
"""Multimodal (list) content must NOT be merged — risks mangling attachments."""
|
|
agent = _bare_agent()
|
|
messages = [
|
|
{"role": "user", "content": [{"type": "text", "text": "hi"},
|
|
{"type": "image_url", "image_url": {"url": "..."}}]},
|
|
{"role": "user", "content": "follow-up"},
|
|
]
|
|
|
|
AIAgent._repair_message_sequence(agent, messages)
|
|
|
|
# The multimodal user message stays as a distinct message — no merge
|
|
assert len(messages) == 2
|
|
assert isinstance(messages[0]["content"], list)
|
|
|
|
|
|
def test_repair_empty_messages_returns_zero():
|
|
agent = _bare_agent()
|
|
messages = []
|
|
|
|
repairs = AIAgent._repair_message_sequence(agent, messages)
|
|
|
|
assert repairs == 0
|
|
assert messages == []
|
|
|
|
|
|
def test_repair_preserves_system_messages():
|
|
agent = _bare_agent()
|
|
messages = [
|
|
{"role": "system", "content": "You are..."},
|
|
{"role": "user", "content": "hi"},
|
|
]
|
|
original = [dict(m) for m in messages]
|
|
|
|
AIAgent._repair_message_sequence(agent, messages)
|
|
|
|
assert messages == original
|