From d3687d3e817eaf98f983e8bd6358e825074340c9 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 10:22:44 -0700 Subject: [PATCH] docs: document planned live reasoning token display as future enhancement The streaming infrastructure already fires reasoning deltas via _fire_reasoning_delta() during streaming. The remaining work is the CLI display layer: a dim reasoning box that opens on first reasoning token, streams live, then transitions to the response box. Reference: PR #1214 (raulvidis) for gateway reasoning visibility. --- cli.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cli.py b/cli.py index 4256ac20b3..b02221ec81 100755 --- a/cli.py +++ b/cli.py @@ -1413,6 +1413,15 @@ class HermesCLI: self._invalidate() # ── Streaming display ──────────────────────────────────────────────── + # + # Future: When display.show_reasoning is also enabled, stream reasoning + # tokens into a dim box above the response (like the existing static + # reasoning display, but live). The infrastructure exists — reasoning + # deltas fire via _fire_reasoning_delta() during streaming. The display + # layer needs: a dim reasoning box that opens on first reasoning token, + # accumulates live, then transitions to the response box when content + # tokens start arriving. See PR #1214 (raulvidis) for gateway-side + # reasoning visibility modes as a reference implementation. def _stream_delta(self, text: str) -> None: """Line-buffered streaming callback for real-time token rendering.