mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
test(agent): cover interrupt tool-tail alternation close (#48879)
Regression coverage for the synthetic-assistant close: interrupt after a successful tool must persist an assistant tail (placeholder when no delivered text), real delivered text is preserved, and non-interrupted or non-tool tails are left untouched.
This commit is contained in:
parent
81d2dc5d0f
commit
0957d77187
1 changed files with 198 additions and 0 deletions
198
tests/agent/test_turn_finalizer_interrupt_alternation.py
Normal file
198
tests/agent/test_turn_finalizer_interrupt_alternation.py
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
"""Regression test for #48879.
|
||||
|
||||
When a turn is interrupted via ``/stop`` right after a tool completes — but
|
||||
before the assistant streams any final text — the transcript tail is a raw
|
||||
``tool`` message. Persisting that tail unmodified means the next user message
|
||||
lands as ``... tool → user``, a role-alternation violation that strict
|
||||
providers (Gemini, Claude) react to by hallucinating a continuation of the
|
||||
user's message before transitioning into the assistant persona.
|
||||
|
||||
``finalize_turn`` closes the tool-call sequence on interrupt by appending a
|
||||
synthetic ``assistant`` message before persistence. ``final_response`` is
|
||||
typically empty on an interrupt, so the placeholder text is used rather than
|
||||
an empty-content assistant turn.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.turn_finalizer import finalize_turn
|
||||
|
||||
|
||||
class _StubBudget:
|
||||
used = 1
|
||||
max_total = 90
|
||||
remaining = 89
|
||||
|
||||
|
||||
class _StubCompressor:
|
||||
last_prompt_tokens = 0
|
||||
|
||||
|
||||
class _StubAgent:
|
||||
"""Minimal agent surface that ``finalize_turn`` reads from."""
|
||||
|
||||
def __init__(self):
|
||||
self.max_iterations = 90
|
||||
self.iteration_budget = _StubBudget()
|
||||
self.context_compressor = _StubCompressor()
|
||||
self.model = "stub/model"
|
||||
self.provider = "stub"
|
||||
self.base_url = "http://stub"
|
||||
self.session_id = "sess-1"
|
||||
self.quiet_mode = True
|
||||
self.platform = "cli"
|
||||
self._interrupt_requested = False
|
||||
self._interrupt_message = None
|
||||
self._tool_guardrail_halt_decision = None
|
||||
self._response_was_previewed = False
|
||||
self._skill_nudge_interval = 0
|
||||
self._iters_since_skill = 0
|
||||
for attr in (
|
||||
"session_input_tokens",
|
||||
"session_output_tokens",
|
||||
"session_cache_read_tokens",
|
||||
"session_cache_write_tokens",
|
||||
"session_reasoning_tokens",
|
||||
"session_prompt_tokens",
|
||||
"session_completion_tokens",
|
||||
"session_total_tokens",
|
||||
"session_estimated_cost_usd",
|
||||
):
|
||||
setattr(self, attr, 0)
|
||||
self.session_cost_status = "ok"
|
||||
self.session_cost_source = "stub"
|
||||
self.persisted_messages = None
|
||||
|
||||
# --- fallible cleanup surfaces (all succeed here) ------------------
|
||||
def _save_trajectory(self, *a, **k):
|
||||
pass
|
||||
|
||||
def _cleanup_task_resources(self, *a, **k):
|
||||
pass
|
||||
|
||||
def _drop_trailing_empty_response_scaffolding(self, messages):
|
||||
# A clean interrupt sets no empty-response scaffolding flags, so
|
||||
# the real method returns early and leaves the tool tail in place.
|
||||
# Model that here as a no-op.
|
||||
pass
|
||||
|
||||
def _persist_session(self, messages, conversation_history):
|
||||
# Snapshot the role sequence at the moment of persistence.
|
||||
self.persisted_messages = [dict(m) for m in messages]
|
||||
|
||||
# --- harmless no-ops ------------------------------------------------
|
||||
def _emit_status(self, *a, **k):
|
||||
pass
|
||||
|
||||
def _safe_print(self, *a, **k):
|
||||
pass
|
||||
|
||||
def _file_mutation_verifier_enabled(self):
|
||||
return False
|
||||
|
||||
def _turn_completion_explainer_enabled(self):
|
||||
return False
|
||||
|
||||
def _drain_pending_steer(self):
|
||||
return None
|
||||
|
||||
def clear_interrupt(self):
|
||||
pass
|
||||
|
||||
def _sync_external_memory_for_turn(self, **k):
|
||||
pass
|
||||
|
||||
|
||||
def _interrupted_tool_tail():
|
||||
"""A transcript interrupted after a successful tool, before any
|
||||
assistant text — the exact #48879 shape."""
|
||||
return [
|
||||
{"role": "user", "content": "edit the file"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{"id": "c1", "function": {"name": "patch", "arguments": "{}"}}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "c1", "content": "ok edited"},
|
||||
]
|
||||
|
||||
|
||||
def _finalize(agent, messages, *, interrupted, final_response=None):
|
||||
return finalize_turn(
|
||||
agent,
|
||||
final_response=final_response,
|
||||
api_call_count=1,
|
||||
interrupted=interrupted,
|
||||
failed=False,
|
||||
messages=messages,
|
||||
conversation_history=None,
|
||||
effective_task_id="task-1",
|
||||
turn_id="turn-1",
|
||||
user_message="edit the file",
|
||||
original_user_message="edit the file",
|
||||
_should_review_memory=False,
|
||||
_turn_exit_reason="interrupted_by_user",
|
||||
)
|
||||
|
||||
|
||||
def _assert_no_tool_then_user(messages):
|
||||
for i in range(len(messages) - 1):
|
||||
if messages[i].get("role") == "tool":
|
||||
assert messages[i + 1].get("role") != "user", (
|
||||
f"role-alternation violation: tool → user at index {i}"
|
||||
)
|
||||
|
||||
|
||||
def test_interrupt_after_tool_closes_sequence_with_placeholder():
|
||||
agent = _StubAgent()
|
||||
messages = _interrupted_tool_tail()
|
||||
_finalize(agent, messages, interrupted=True, final_response=None)
|
||||
|
||||
# Tail must now be an assistant message, not a raw tool result.
|
||||
assert messages[-1]["role"] == "assistant"
|
||||
# Empty final_response falls back to the explicit placeholder rather
|
||||
# than persisting an empty-content assistant turn.
|
||||
assert messages[-1]["content"] == "Operation interrupted."
|
||||
|
||||
# The persisted snapshot is alternation-safe: appending a new user
|
||||
# message would follow an assistant, not an orphan tool.
|
||||
assert agent.persisted_messages is not None
|
||||
assert agent.persisted_messages[-1]["role"] == "assistant"
|
||||
follow_on = agent.persisted_messages + [{"role": "user", "content": "forget it"}]
|
||||
_assert_no_tool_then_user(follow_on)
|
||||
|
||||
|
||||
def test_interrupt_after_tool_keeps_delivered_text_when_present():
|
||||
agent = _StubAgent()
|
||||
messages = _interrupted_tool_tail()
|
||||
_finalize(agent, messages, interrupted=True, final_response="Partial answer so far")
|
||||
|
||||
assert messages[-1]["role"] == "assistant"
|
||||
# Real delivered text is preserved, not clobbered by the placeholder.
|
||||
assert messages[-1]["content"] == "Partial answer so far"
|
||||
|
||||
|
||||
def test_non_interrupted_tool_tail_is_left_untouched():
|
||||
# A turn that ends on a tool tail WITHOUT an interrupt (mid-progress
|
||||
# tool loop) must not get a synthetic close — that is normal dialog
|
||||
# state handled elsewhere.
|
||||
agent = _StubAgent()
|
||||
messages = _interrupted_tool_tail()
|
||||
_finalize(agent, messages, interrupted=False, final_response=None)
|
||||
assert messages[-1]["role"] == "tool"
|
||||
|
||||
|
||||
def test_interrupt_without_tool_tail_adds_nothing():
|
||||
# Interrupt while the tail is already an assistant/user message: no
|
||||
# synthetic close needed.
|
||||
agent = _StubAgent()
|
||||
messages = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{"role": "assistant", "content": "partial reply"},
|
||||
]
|
||||
before = len(messages)
|
||||
_finalize(agent, messages, interrupted=True, final_response="partial reply")
|
||||
assert len(messages) == before
|
||||
assert messages[-1]["role"] == "assistant"
|
||||
Loading…
Add table
Add a link
Reference in a new issue