hermes-agent/tests/run_agent/test_thinking_only_sanitizer.py
Teknium 2b728e1274
fix(agent): drop thinking-only assistant turns before provider call (#16959)
Adds a pre-call sanitizer that detects assistant messages containing only
reasoning (reasoning / reasoning_content, no visible content, no
tool_calls) and drops them from the API copy. Adjacent user messages
left behind are merged so role alternation is preserved for the
provider.

Mirrors Claude Code's approach in src/utils/messages.ts
(filterOrphanedThinkingOnlyMessages + mergeAdjacentUserMessages). We
drop the whole turn rather than fabricate stub text (the '.' /
'(continued)' pattern from contributor PRs #11098, #13010, #16842 that
were rejected because they put words in the model's mouth).

The stored conversation history (self.messages) is never mutated — only
the per-call api_messages copy. Users still see the reasoning block in
the CLI/gateway transcript; only the wire copy is cleaned. Session
persistence keeps the full trace.

Two call sites covered:
- Main agent loop, after _sanitize_api_messages (catches every turn).
- Iteration-limit-summary fallback path.

Tests: tests/run_agent/test_thinking_only_sanitizer.py — 25 cases
covering detection (string/list content, whitespace-only, tool_calls,
reasoning_details list form), drop behavior, adjacent-user merge
(string+string, list+list, mixed), non-mutation of input dicts, and
system-message handling.

E2E live-tested against 5 providers with a poisoned history (empty
assistant message + reasoning_content): OpenRouter→Anthropic/OpenAI/
DeepSeek-R1/Qwen, native Gemini. All 5 accepted the cleaned request.
Happy-path regression (5/5) confirms the sanitizer is a noop when no
thinking-only turn exists.

Related: #16823 (wontfix — stub-text approach rejected).

Co-authored-by: teknium1 <teknium@users.noreply.github.com>
2026-04-28 03:50:51 -07:00

249 lines
10 KiB
Python

"""Tests for the thinking-only assistant message sanitizer.
Covers _is_thinking_only_assistant() + _drop_thinking_only_and_merge_users()
in run_agent.py. The sanitizer runs on the per-call api_messages copy and
drops assistant turns that contain only reasoning (no visible content, no
tool_calls). Adjacent user messages left behind are merged so role
alternation is preserved for the provider.
Claude Code uses this exact pattern (filterOrphanedThinkingOnlyMessages +
mergeAdjacentUserMessages in src/utils/messages.ts). See #16823 for the
backstory on why the alternative — fabricating "." stub text — was rejected.
"""
from run_agent import AIAgent
# ---------------------------------------------------------------------------
# _is_thinking_only_assistant — detection
# ---------------------------------------------------------------------------
class TestIsThinkingOnlyAssistant:
def test_plain_assistant_reply_is_not_thinking_only(self):
msg = {"role": "assistant", "content": "Hello there"}
assert not AIAgent._is_thinking_only_assistant(msg)
def test_assistant_with_tool_calls_is_not_thinking_only(self):
msg = {
"role": "assistant",
"content": "",
"reasoning": "let me use a tool",
"tool_calls": [{"id": "c1", "function": {"name": "terminal", "arguments": "{}"}}],
}
assert not AIAgent._is_thinking_only_assistant(msg)
def test_empty_content_plus_reasoning_is_thinking_only(self):
msg = {"role": "assistant", "content": "", "reasoning": "thinking..."}
assert AIAgent._is_thinking_only_assistant(msg)
def test_none_content_plus_reasoning_content_is_thinking_only(self):
msg = {"role": "assistant", "content": None, "reasoning_content": "thinking..."}
assert AIAgent._is_thinking_only_assistant(msg)
def test_whitespace_only_content_plus_reasoning_is_thinking_only(self):
msg = {"role": "assistant", "content": " \n\n ", "reasoning": "r"}
assert AIAgent._is_thinking_only_assistant(msg)
def test_empty_content_no_reasoning_is_not_thinking_only(self):
# If there's no reasoning either, this is just an empty turn — let
# other sanitizers handle it (orphan-tool-pair, etc.). We only care
# about the specific thinking-only case.
msg = {"role": "assistant", "content": ""}
assert not AIAgent._is_thinking_only_assistant(msg)
def test_list_content_all_thinking_blocks_is_thinking_only(self):
# Anthropic-native shape
msg = {
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "...", "signature": "sig"},
],
"reasoning": "...",
}
assert AIAgent._is_thinking_only_assistant(msg)
def test_list_content_with_real_text_is_not_thinking_only(self):
msg = {
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "..."},
{"type": "text", "text": "Hi there"},
],
"reasoning": "...",
}
assert not AIAgent._is_thinking_only_assistant(msg)
def test_list_content_with_tool_use_block_is_not_thinking_only(self):
msg = {
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "..."},
{"type": "tool_use", "id": "tu1", "name": "terminal", "input": {}},
],
}
assert not AIAgent._is_thinking_only_assistant(msg)
def test_list_content_thinking_plus_whitespace_text_is_thinking_only(self):
msg = {
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "..."},
{"type": "text", "text": " "},
],
"reasoning": "...",
}
assert AIAgent._is_thinking_only_assistant(msg)
def test_reasoning_details_list_form_detected(self):
msg = {
"role": "assistant",
"content": "",
"reasoning_details": [{"type": "thinking", "text": "..."}],
}
assert AIAgent._is_thinking_only_assistant(msg)
def test_user_message_never_thinking_only(self):
assert not AIAgent._is_thinking_only_assistant({"role": "user", "content": ""})
def test_tool_message_never_thinking_only(self):
assert not AIAgent._is_thinking_only_assistant(
{"role": "tool", "content": "", "tool_call_id": "x"}
)
def test_non_dict_returns_false(self):
assert not AIAgent._is_thinking_only_assistant(None)
assert not AIAgent._is_thinking_only_assistant("hello")
# ---------------------------------------------------------------------------
# _drop_thinking_only_and_merge_users — the full pass
# ---------------------------------------------------------------------------
class TestDropThinkingOnlyAndMergeUsers:
def test_empty_list_passthrough(self):
assert AIAgent._drop_thinking_only_and_merge_users([]) == []
def test_no_thinking_only_messages_is_noop_identity(self):
msgs = [
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
]
out = AIAgent._drop_thinking_only_and_merge_users(msgs)
# Should return the original list untouched (identity) when no changes.
assert out is msgs
def test_drops_thinking_only_between_user_messages_and_merges(self):
msgs = [
{"role": "user", "content": "help me with X"},
{"role": "assistant", "content": "", "reasoning": "let me think"},
{"role": "user", "content": "ok continue"},
]
out = AIAgent._drop_thinking_only_and_merge_users(msgs)
assert len(out) == 1
assert out[0]["role"] == "user"
assert out[0]["content"] == "help me with X\n\nok continue"
def test_preserves_alternation_after_drop(self):
msgs = [
{"role": "user", "content": "u1"},
{"role": "assistant", "content": "", "reasoning": "..."},
{"role": "user", "content": "u2"},
{"role": "assistant", "content": "real reply"},
]
out = AIAgent._drop_thinking_only_and_merge_users(msgs)
roles = [m["role"] for m in out]
assert roles == ["user", "assistant"]
assert out[0]["content"] == "u1\n\nu2"
assert out[1]["content"] == "real reply"
def test_does_not_merge_when_drop_leaves_non_adjacent_users(self):
# Thinking-only at end of conversation — no trailing user to merge
msgs = [
{"role": "user", "content": "u1"},
{"role": "assistant", "content": "reply"},
{"role": "user", "content": "u2"},
{"role": "assistant", "content": "", "reasoning": "..."},
]
out = AIAgent._drop_thinking_only_and_merge_users(msgs)
assert [m["role"] for m in out] == ["user", "assistant", "user"]
def test_multiple_thinking_only_in_sequence_collapses(self):
msgs = [
{"role": "user", "content": "u1"},
{"role": "assistant", "content": "", "reasoning": "r1"},
{"role": "assistant", "content": "", "reasoning": "r2"},
{"role": "user", "content": "u2"},
]
out = AIAgent._drop_thinking_only_and_merge_users(msgs)
assert len(out) == 1
assert out[0]["content"] == "u1\n\nu2"
def test_does_not_touch_stored_messages_original_list_unmutated(self):
original_first_user = {"role": "user", "content": "u1"}
original_assistant = {"role": "assistant", "content": "", "reasoning": "..."}
original_second_user = {"role": "user", "content": "u2"}
msgs = [original_first_user, original_assistant, original_second_user]
AIAgent._drop_thinking_only_and_merge_users(msgs)
# Caller passes in a per-call copy already, but the sanitizer itself
# must not rewrite the dicts it was handed on the drop path.
# (It CAN mutate merged dicts — those come from the caller's copy.)
assert original_first_user["content"] == "u1"
assert original_second_user["content"] == "u2"
def test_tool_result_between_user_and_thinking_preserved(self):
# Tool results shouldn't block a drop — but they do block the merge
# (user/tool are different roles). This scenario shouldn't happen in
# practice because a thinking-only turn won't have tool_calls, but if
# it did somehow, the surrounding tool result stays put.
msgs = [
{"role": "user", "content": "u1"},
{"role": "assistant", "tool_calls": [{"id": "c1", "function": {"name": "t", "arguments": "{}"}}]},
{"role": "tool", "tool_call_id": "c1", "content": "ok"},
{"role": "assistant", "content": "", "reasoning": "..."},
{"role": "user", "content": "u2"},
]
out = AIAgent._drop_thinking_only_and_merge_users(msgs)
assert [m["role"] for m in out] == ["user", "assistant", "tool", "user"]
def test_merge_concatenates_list_content_user_messages(self):
msgs = [
{"role": "user", "content": [{"type": "text", "text": "first"}]},
{"role": "assistant", "content": "", "reasoning": "..."},
{"role": "user", "content": [{"type": "text", "text": "second"}]},
]
out = AIAgent._drop_thinking_only_and_merge_users(msgs)
assert len(out) == 1
assert out[0]["content"] == [
{"type": "text", "text": "first"},
{"type": "text", "text": "second"},
]
def test_merge_mixed_string_and_list_content(self):
msgs = [
{"role": "user", "content": "plain text"},
{"role": "assistant", "content": "", "reasoning": "..."},
{"role": "user", "content": [{"type": "text", "text": "block text"}]},
]
out = AIAgent._drop_thinking_only_and_merge_users(msgs)
assert len(out) == 1
assert out[0]["content"] == [
{"type": "text", "text": "plain text"},
{"type": "text", "text": "block text"},
]
def test_system_messages_ignored_by_pass(self):
msgs = [
{"role": "system", "content": "sys prompt"},
{"role": "user", "content": "u1"},
{"role": "assistant", "content": "", "reasoning": "..."},
{"role": "user", "content": "u2"},
]
out = AIAgent._drop_thinking_only_and_merge_users(msgs)
assert len(out) == 2
assert out[0]["role"] == "system"
assert out[1]["role"] == "user"
assert out[1]["content"] == "u1\n\nu2"