mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
test: pin per-turn reasoning extraction semantics
Covers four scenarios for the reasoning-box extraction loop: - simple turn with reasoning - simple turn with no reasoning - tool-calling turn where reasoning lives on the tool-call step - prior turn had reasoning, current turn does not (the stale-display bug the fix exists for) - tool-calling turn where reasoning lives on BOTH steps (latest wins) - empty-string reasoning treated as missing Also updates the four inline replica loops in tests/cli/test_reasoning_command.py to match the new turn-boundary shape so the test file reflects production semantics.
This commit is contained in:
parent
efe1cb00c8
commit
9e0ef2a1bc
2 changed files with 117 additions and 0 deletions
|
|
@ -178,6 +178,8 @@ class TestLastReasoningInResult(unittest.TestCase):
|
||||||
messages = self._build_messages(reasoning="Let me think...")
|
messages = self._build_messages(reasoning="Let me think...")
|
||||||
last_reasoning = None
|
last_reasoning = None
|
||||||
for msg in reversed(messages):
|
for msg in reversed(messages):
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
break
|
||||||
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
||||||
last_reasoning = msg["reasoning"]
|
last_reasoning = msg["reasoning"]
|
||||||
break
|
break
|
||||||
|
|
@ -187,6 +189,8 @@ class TestLastReasoningInResult(unittest.TestCase):
|
||||||
messages = self._build_messages(reasoning=None)
|
messages = self._build_messages(reasoning=None)
|
||||||
last_reasoning = None
|
last_reasoning = None
|
||||||
for msg in reversed(messages):
|
for msg in reversed(messages):
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
break
|
||||||
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
||||||
last_reasoning = msg["reasoning"]
|
last_reasoning = msg["reasoning"]
|
||||||
break
|
break
|
||||||
|
|
@ -201,6 +205,8 @@ class TestLastReasoningInResult(unittest.TestCase):
|
||||||
]
|
]
|
||||||
last_reasoning = None
|
last_reasoning = None
|
||||||
for msg in reversed(messages):
|
for msg in reversed(messages):
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
break
|
||||||
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
||||||
last_reasoning = msg["reasoning"]
|
last_reasoning = msg["reasoning"]
|
||||||
break
|
break
|
||||||
|
|
@ -210,6 +216,8 @@ class TestLastReasoningInResult(unittest.TestCase):
|
||||||
messages = self._build_messages(reasoning="")
|
messages = self._build_messages(reasoning="")
|
||||||
last_reasoning = None
|
last_reasoning = None
|
||||||
for msg in reversed(messages):
|
for msg in reversed(messages):
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
break
|
||||||
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
||||||
last_reasoning = msg["reasoning"]
|
last_reasoning = msg["reasoning"]
|
||||||
break
|
break
|
||||||
|
|
@ -584,6 +592,8 @@ class TestEndToEndPipeline(unittest.TestCase):
|
||||||
|
|
||||||
last_reasoning = None
|
last_reasoning = None
|
||||||
for msg in reversed(messages):
|
for msg in reversed(messages):
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
break
|
||||||
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
||||||
last_reasoning = msg["reasoning"]
|
last_reasoning = msg["reasoning"]
|
||||||
break
|
break
|
||||||
|
|
|
||||||
107
tests/run_agent/test_last_reasoning_per_turn.py
Normal file
107
tests/run_agent/test_last_reasoning_per_turn.py
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
"""Tests for per-turn reasoning extraction in AIAgent.run_conversation.
|
||||||
|
|
||||||
|
Verifies the reasoning field returned to display layers (CLI reasoning box,
|
||||||
|
gateway reasoning footer, TUI reasoning event) only reflects the CURRENT
|
||||||
|
turn's reasoning — never leaks from a prior turn — and is picked up
|
||||||
|
correctly when reasoning is attached to a tool-calling assistant step
|
||||||
|
rather than the final-answer assistant step.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_last_reasoning(messages):
|
||||||
|
"""Replica of the extraction loop in run_agent.py (~line 13867).
|
||||||
|
|
||||||
|
Tests pin the loop's behaviour so that refactors can't silently
|
||||||
|
regress the per-turn semantic.
|
||||||
|
"""
|
||||||
|
last_reasoning = None
|
||||||
|
for msg in reversed(messages):
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
break
|
||||||
|
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
||||||
|
last_reasoning = msg["reasoning"]
|
||||||
|
break
|
||||||
|
return last_reasoning
|
||||||
|
|
||||||
|
|
||||||
|
def test_simple_turn_reasoning_present():
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "hello"},
|
||||||
|
{"role": "assistant", "content": "hi", "reasoning": "greeting the user"},
|
||||||
|
]
|
||||||
|
assert _extract_last_reasoning(messages) == "greeting the user"
|
||||||
|
|
||||||
|
|
||||||
|
def test_simple_turn_no_reasoning():
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "hello"},
|
||||||
|
{"role": "assistant", "content": "hi", "reasoning": None},
|
||||||
|
]
|
||||||
|
assert _extract_last_reasoning(messages) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_tool_call_turn_reasoning_on_tool_call_step():
|
||||||
|
"""When the model reasons on the tool-call step and the final-answer
|
||||||
|
step has no reasoning (Claude thinking / DeepSeek v4 / Codex Responses
|
||||||
|
pattern), the box must show the tool-call-step reasoning, not empty.
|
||||||
|
"""
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "search the repo for X"},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"reasoning": "I should use search_files",
|
||||||
|
"tool_calls": [{"id": "c1", "type": "function",
|
||||||
|
"function": {"name": "search_files", "arguments": "{}"}}],
|
||||||
|
},
|
||||||
|
{"role": "tool", "tool_call_id": "c1", "content": "3 matches"},
|
||||||
|
{"role": "assistant", "content": "Found 3 matches", "reasoning": None},
|
||||||
|
]
|
||||||
|
assert _extract_last_reasoning(messages) == "I should use search_files"
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_stale_reasoning_across_turns():
|
||||||
|
"""The regression the whole change exists for. Prior turn had
|
||||||
|
reasoning; current turn has none. The reasoning box must NOT show
|
||||||
|
the prior turn's text.
|
||||||
|
"""
|
||||||
|
messages = [
|
||||||
|
# prior turn
|
||||||
|
{"role": "user", "content": "explain quantum tunneling"},
|
||||||
|
{"role": "assistant", "content": "It's when...",
|
||||||
|
"reasoning": "tunneling happens when particles..."},
|
||||||
|
# current turn
|
||||||
|
{"role": "user", "content": "thanks"},
|
||||||
|
{"role": "assistant", "content": "You're welcome!", "reasoning": None},
|
||||||
|
]
|
||||||
|
assert _extract_last_reasoning(messages) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_tool_call_turn_picks_latest_reasoning_within_turn():
|
||||||
|
"""If BOTH the tool-call step and the final step have reasoning
|
||||||
|
(uncommon but possible), the final-step reasoning wins — it's the
|
||||||
|
most recent thought within the current turn.
|
||||||
|
"""
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "search and summarize"},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"reasoning": "initial plan",
|
||||||
|
"tool_calls": [{"id": "c1", "type": "function",
|
||||||
|
"function": {"name": "search_files", "arguments": "{}"}}],
|
||||||
|
},
|
||||||
|
{"role": "tool", "tool_call_id": "c1", "content": "results"},
|
||||||
|
{"role": "assistant", "content": "Here's the summary",
|
||||||
|
"reasoning": "synthesized view of results"},
|
||||||
|
]
|
||||||
|
assert _extract_last_reasoning(messages) == "synthesized view of results"
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_string_reasoning_treated_as_missing():
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "hi"},
|
||||||
|
{"role": "assistant", "content": "hello", "reasoning": ""},
|
||||||
|
]
|
||||||
|
assert _extract_last_reasoning(messages) is None
|
||||||
Loading…
Add table
Add a link
Reference in a new issue