mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: show full last assistant response when resuming a session (#8724)
When resuming a session with --resume or -c, the last assistant response was truncated to 200 chars / 3 lines just like older messages in the recap. This forced users to waste tokens re-asking for the response. Now the last assistant message in the recap is shown in full with non-dim styling, so users can see exactly where they left off. Earlier messages remain truncated for compact display. Changes: - Track un-truncated text for the last assistant entry during collection - Replace last entry with full text after history trimming - Render last assistant entry with bold (non-dim) styling - Update existing truncation tests to use multi-message histories - Add new tests for full last response display (char + multiline)
This commit is contained in:
parent
9e992df8ae
commit
5fae356a85
2 changed files with 64 additions and 4 deletions
24
cli.py
24
cli.py
|
|
@ -3114,6 +3114,8 @@ class HermesCLI:
|
|||
|
||||
# Collect displayable entries (skip system, tool-result messages)
|
||||
entries = [] # list of (role, display_text)
|
||||
_last_asst_idx = None # index of last assistant entry
|
||||
_last_asst_full = None # un-truncated display text for last assistant
|
||||
for msg in self.conversation_history:
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content")
|
||||
|
|
@ -3143,7 +3145,9 @@ class HermesCLI:
|
|||
text = "" if content is None else str(content)
|
||||
text = _strip_reasoning(text)
|
||||
parts = []
|
||||
full_parts = [] # un-truncated version
|
||||
if text:
|
||||
full_parts.append(text)
|
||||
lines = text.splitlines()
|
||||
if len(lines) > MAX_ASST_LINES:
|
||||
text = "\n".join(lines[:MAX_ASST_LINES]) + " ..."
|
||||
|
|
@ -3163,11 +3167,15 @@ class HermesCLI:
|
|||
if len(names) > 4:
|
||||
names_str += ", ..."
|
||||
noun = "call" if tc_count == 1 else "calls"
|
||||
parts.append(f"[{tc_count} tool {noun}: {names_str}]")
|
||||
tc_summary = f"[{tc_count} tool {noun}: {names_str}]"
|
||||
parts.append(tc_summary)
|
||||
full_parts.append(tc_summary)
|
||||
if not parts:
|
||||
# Skip pure-reasoning messages that have no visible output
|
||||
continue
|
||||
entries.append(("assistant", " ".join(parts)))
|
||||
_last_asst_idx = len(entries) - 1
|
||||
_last_asst_full = " ".join(full_parts)
|
||||
|
||||
if not entries:
|
||||
return
|
||||
|
|
@ -3178,6 +3186,13 @@ class HermesCLI:
|
|||
skipped = len(entries) - MAX_DISPLAY_EXCHANGES * 2
|
||||
entries = entries[skipped:]
|
||||
|
||||
# Replace last assistant entry with full (un-truncated) text
|
||||
# so the user can see where they left off without wasting tokens.
|
||||
if _last_asst_idx is not None and _last_asst_full:
|
||||
adj_idx = _last_asst_idx - skipped
|
||||
if 0 <= adj_idx < len(entries):
|
||||
entries[adj_idx] = ("assistant_last", _last_asst_full)
|
||||
|
||||
# Build the display using Rich
|
||||
from rich.panel import Panel
|
||||
from rich.text import Text
|
||||
|
|
@ -3210,6 +3225,13 @@ class HermesCLI:
|
|||
lines.append(msg_lines[0] + "\n", style="dim")
|
||||
for ml in msg_lines[1:]:
|
||||
lines.append(f" {ml}\n", style="dim")
|
||||
elif role == "assistant_last":
|
||||
# Last assistant response shown in full, non-dim
|
||||
lines.append(" ◆ Hermes: ", style=f"bold {_assistant_label_c}")
|
||||
msg_lines = text.splitlines()
|
||||
lines.append(msg_lines[0] + "\n", style="")
|
||||
for ml in msg_lines[1:]:
|
||||
lines.append(f" {ml}\n", style="")
|
||||
else:
|
||||
lines.append(" ◆ Hermes: ", style=f"dim bold {_assistant_label_c}")
|
||||
msg_lines = text.splitlines()
|
||||
|
|
|
|||
|
|
@ -180,33 +180,71 @@ class TestDisplayResumedHistory:
|
|||
assert 200 <= a_count <= 310 # roughly 300 chars (±panel padding)
|
||||
|
||||
def test_long_assistant_message_truncated(self):
|
||||
"""Non-last assistant messages are still truncated."""
|
||||
cli = _make_cli()
|
||||
long_text = "B" * 400
|
||||
cli.conversation_history = [
|
||||
{"role": "user", "content": "Tell me a lot."},
|
||||
{"role": "assistant", "content": long_text},
|
||||
{"role": "user", "content": "And more?"},
|
||||
{"role": "assistant", "content": "Short final reply."},
|
||||
]
|
||||
output = self._capture_display(cli)
|
||||
|
||||
assert "..." in output
|
||||
# The non-last assistant message should be truncated
|
||||
assert "B" * 400 not in output
|
||||
# The last assistant message shown in full
|
||||
assert "Short final reply." in output
|
||||
|
||||
def test_multiline_assistant_truncated(self):
|
||||
"""Non-last multiline assistant messages are truncated to 3 lines."""
|
||||
cli = _make_cli()
|
||||
multi = "\n".join([f"Line {i}" for i in range(20)])
|
||||
cli.conversation_history = [
|
||||
{"role": "user", "content": "Show me lines."},
|
||||
{"role": "assistant", "content": multi},
|
||||
{"role": "user", "content": "What else?"},
|
||||
{"role": "assistant", "content": "Done."},
|
||||
]
|
||||
output = self._capture_display(cli)
|
||||
|
||||
# First 3 lines should be there
|
||||
# First 3 lines of non-last assistant should be there
|
||||
assert "Line 0" in output
|
||||
assert "Line 1" in output
|
||||
assert "Line 2" in output
|
||||
# Line 19 should NOT be there (truncated after 3 lines)
|
||||
# Line 19 should NOT be in the truncated message
|
||||
assert "Line 19" not in output
|
||||
|
||||
def test_last_assistant_response_shown_in_full(self):
|
||||
"""The last assistant response is shown un-truncated so the user
|
||||
knows where they left off without wasting tokens re-asking."""
|
||||
cli = _make_cli()
|
||||
long_text = "X" * 500
|
||||
cli.conversation_history = [
|
||||
{"role": "user", "content": "Tell me everything."},
|
||||
{"role": "assistant", "content": long_text},
|
||||
]
|
||||
output = self._capture_display(cli)
|
||||
|
||||
# Full 500-char text should be present (may be line-wrapped by Rich)
|
||||
x_count = output.count("X")
|
||||
assert x_count >= 490 # allow small Rich formatting variance
|
||||
|
||||
def test_last_assistant_multiline_shown_in_full(self):
|
||||
"""The last assistant response shows all lines, not just 3."""
|
||||
cli = _make_cli()
|
||||
multi = "\n".join([f"Line {i}" for i in range(20)])
|
||||
cli.conversation_history = [
|
||||
{"role": "user", "content": "Show me everything."},
|
||||
{"role": "assistant", "content": multi},
|
||||
]
|
||||
output = self._capture_display(cli)
|
||||
|
||||
# All 20 lines should be present since it's the last response
|
||||
assert "Line 0" in output
|
||||
assert "Line 10" in output
|
||||
assert "Line 19" in output
|
||||
|
||||
def test_large_history_shows_truncation_indicator(self):
|
||||
cli = _make_cli()
|
||||
cli.conversation_history = _large_history(n_exchanges=15)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue