fix: show full last assistant response when resuming a session (#8724)

When resuming a session with --resume or -c, the last assistant response was truncated to 200 chars / 3 lines just like older messages in the recap. This forced users to waste tokens re-asking for the response. Now the last assistant message in the recap is shown in full with non-dim styling, so users can see exactly where they left off. Earlier messages remain truncated for compact display. Changes: - Track un-truncated text for the last assistant entry during collection - Replace last entry with full text after history trimming - Render last assistant entry with bold (non-dim) styling - Update existing truncation tests to use multi-message histories - Add new tests for full last response display (char + multiline)
2026-06-14 09:11:54 +00:00 · 2026-04-12 19:07:14 -07:00 · 2026-04-12 19:07:14 -07:00 · 5fae356a85
commit 5fae356a85
parent 9e992df8ae
2 changed files with 64 additions and 4 deletions
--- a/cli.py
+++ b/cli.py
@ -3114,6 +3114,8 @@ class HermesCLI:

        # Collect displayable entries (skip system, tool-result messages)
        entries = []  # list of (role, display_text)
+        _last_asst_idx = None       # index of last assistant entry
+        _last_asst_full = None      # un-truncated display text for last assistant
        for msg in self.conversation_history:
            role = msg.get("role", "")
            content = msg.get("content")
@ -3143,7 +3145,9 @@ class HermesCLI:
                text = "" if content is None else str(content)
                text = _strip_reasoning(text)
                parts = []
+                full_parts = []  # un-truncated version
                if text:
+                    full_parts.append(text)
                    lines = text.splitlines()
                    if len(lines) > MAX_ASST_LINES:
                        text = "\n".join(lines[:MAX_ASST_LINES]) + " ..."
@ -3163,11 +3167,15 @@ class HermesCLI:
                    if len(names) > 4:
                        names_str += ", ..."
                    noun = "call" if tc_count == 1 else "calls"
-                    parts.append(f"[{tc_count} tool {noun}: {names_str}]")
+                    tc_summary = f"[{tc_count} tool {noun}: {names_str}]"
+                    parts.append(tc_summary)
+                    full_parts.append(tc_summary)
                if not parts:
                    # Skip pure-reasoning messages that have no visible output
                    continue
                entries.append(("assistant", " ".join(parts)))
+                _last_asst_idx = len(entries) - 1
+                _last_asst_full = " ".join(full_parts)

        if not entries:
            return
@ -3178,6 +3186,13 @@ class HermesCLI:
            skipped = len(entries) - MAX_DISPLAY_EXCHANGES * 2
            entries = entries[skipped:]

+        # Replace last assistant entry with full (un-truncated) text
+        # so the user can see where they left off without wasting tokens.
+        if _last_asst_idx is not None and _last_asst_full:
+            adj_idx = _last_asst_idx - skipped
+            if 0 <= adj_idx < len(entries):
+                entries[adj_idx] = ("assistant_last", _last_asst_full)
+
        # Build the display using Rich
        from rich.panel import Panel
        from rich.text import Text
@ -3210,6 +3225,13 @@ class HermesCLI:
                lines.append(msg_lines[0] + "\n", style="dim")
                for ml in msg_lines[1:]:
                    lines.append(f"         {ml}\n", style="dim")
+            elif role == "assistant_last":
+                # Last assistant response shown in full, non-dim
+                lines.append("  ◆ Hermes: ", style=f"bold {_assistant_label_c}")
+                msg_lines = text.splitlines()
+                lines.append(msg_lines[0] + "\n", style="")
+                for ml in msg_lines[1:]:
+                    lines.append(f"            {ml}\n", style="")
            else:
                lines.append("  ◆ Hermes: ", style=f"dim bold {_assistant_label_c}")
                msg_lines = text.splitlines()
--- a/tests/cli/test_resume_display.py
+++ b/tests/cli/test_resume_display.py
@ -180,33 +180,71 @@ class TestDisplayResumedHistory:
        assert 200 <= a_count <= 310  # roughly 300 chars (±panel padding)

    def test_long_assistant_message_truncated(self):
+        """Non-last assistant messages are still truncated."""
        cli = _make_cli()
        long_text = "B" * 400
        cli.conversation_history = [
            {"role": "user", "content": "Tell me a lot."},
            {"role": "assistant", "content": long_text},
+            {"role": "user", "content": "And more?"},
+            {"role": "assistant", "content": "Short final reply."},
        ]
        output = self._capture_display(cli)

-        assert "..." in output
+        # The non-last assistant message should be truncated
        assert "B" * 400 not in output
+        # The last assistant message shown in full
+        assert "Short final reply." in output

    def test_multiline_assistant_truncated(self):
+        """Non-last multiline assistant messages are truncated to 3 lines."""
        cli = _make_cli()
        multi = "\n".join([f"Line {i}" for i in range(20)])
        cli.conversation_history = [
            {"role": "user", "content": "Show me lines."},
            {"role": "assistant", "content": multi},
+            {"role": "user", "content": "What else?"},
+            {"role": "assistant", "content": "Done."},
        ]
        output = self._capture_display(cli)

-        # First 3 lines should be there
+        # First 3 lines of non-last assistant should be there
        assert "Line 0" in output
        assert "Line 1" in output
        assert "Line 2" in output
-        # Line 19 should NOT be there (truncated after 3 lines)
+        # Line 19 should NOT be in the truncated message
        assert "Line 19" not in output

+    def test_last_assistant_response_shown_in_full(self):
+        """The last assistant response is shown un-truncated so the user
+        knows where they left off without wasting tokens re-asking."""
+        cli = _make_cli()
+        long_text = "X" * 500
+        cli.conversation_history = [
+            {"role": "user", "content": "Tell me everything."},
+            {"role": "assistant", "content": long_text},
+        ]
+        output = self._capture_display(cli)
+
+        # Full 500-char text should be present (may be line-wrapped by Rich)
+        x_count = output.count("X")
+        assert x_count >= 490  # allow small Rich formatting variance
+
+    def test_last_assistant_multiline_shown_in_full(self):
+        """The last assistant response shows all lines, not just 3."""
+        cli = _make_cli()
+        multi = "\n".join([f"Line {i}" for i in range(20)])
+        cli.conversation_history = [
+            {"role": "user", "content": "Show me everything."},
+            {"role": "assistant", "content": multi},
+        ]
+        output = self._capture_display(cli)
+
+        # All 20 lines should be present since it's the last response
+        assert "Line 0" in output
+        assert "Line 10" in output
+        assert "Line 19" in output
+
    def test_large_history_shows_truncation_indicator(self):
        cli = _make_cli()
        cli.conversation_history = _large_history(n_exchanges=15)