From 95d53c3bcb066ab4180f1c6e2493727ef2ecdee6 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 21 Jun 2026 20:21:11 -0700
Subject: [PATCH] =?UTF-8?q?feat(cli):=20/reasoning=20full=20=E2=80=94=20sh?=
 =?UTF-8?q?ow=20complete=20thinking,=20not=2010-line=20clamp=20(#50499)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(cli): /reasoning full to show complete thinking, not 10-line clamp

The post-response Reasoning recap box hard-clamped long thinking to the
first 10 lines, so there was no way to see the full reasoning trace after
a turn (live streaming already shows it in full). Add display.reasoning_full
(default off) plus /reasoning full|clamp to toggle it at runtime; the clamp
truncation note now points at the command. Addresses repeated user requests
to show all thinking tokens.

* test(gateway): de-snapshot /reasoning help assertion

The test froze the exact args-hint literal '/reasoning [level|show|hide]',
which the new full/clamp args change to '[level|show|hide|full|clamp]'.
Convert to an invariant: assert /reasoning is in help and carries its core
args, not the exact hint string.

* feat(tui): /reasoning full|clamp parity in tui_gateway

The classic-CLI reasoning_full toggle had no TUI equivalent — typing
/reasoning full in the TUI fell through to parse_reasoning_effort and
errored. The TUI renders thinking as an expand/collapse section (no fixed
10-line recap), so map full -> sections.thinking=expanded (raw, uncapped
via thinkingPreview mode='full') and clamp -> collapsed, persisting
display.reasoning_full for cross-surface config consistency.
---
 cli.py                                        | 11 ++-
 hermes_cli/cli_commands_mixin.py              | 22 ++++-
 hermes_cli/commands.py                        |  4 +-
 hermes_cli/config.py                          |  4 +
 tests/gateway/test_reasoning_command.py       |  6 +-
 .../hermes_cli/test_reasoning_full_command.py | 81 +++++++++++++++++++
 tests/test_tui_gateway_server.py              | 27 +++++++
 tui_gateway/server.py                         | 39 +++++++++
 8 files changed, 186 insertions(+), 8 deletions(-)
 create mode 100644 tests/hermes_cli/test_reasoning_full_command.py

diff --git a/cli.py b/cli.py
index 4627ce2b2af..641044bc924 100644
--- a/cli.py
+++ b/cli.py
@@ -452,6 +452,7 @@ def load_cli_config() -> Dict[str, Any]:
             "resume_max_assistant_lines": 3,
             "resume_skip_tool_only": True,
             "show_reasoning": False,
+            "reasoning_full": False,
             "streaming": True,
             "busy_input_mode": "interrupt",
             "persistent_output": True,
@@ -3405,6 +3406,9 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
         self.bell_on_complete = CLI_CONFIG["display"].get("bell_on_complete", False)
         # show_reasoning: display model thinking/reasoning before the response
         self.show_reasoning = CLI_CONFIG["display"].get("show_reasoning", False)
+        # reasoning_full: when reasoning display is on, print the post-response
+        # recap box uncollapsed instead of clamping to the first 10 lines.
+        self.reasoning_full = CLI_CONFIG["display"].get("reasoning_full", False)
         _configure_output_history(
             enabled=CLI_CONFIG["display"].get("persistent_output", True),
             max_lines=CLI_CONFIG["display"].get("persistent_output_max_lines", 200),
@@ -11543,11 +11547,12 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
                     r_fill = w - 2 - len(r_label)
                     r_top = f"{_DIM}┌─{r_label}{'─' * max(r_fill - 1, 0)}┐{_RST}"
                     r_bot = f"{_DIM}└{'─' * (w - 2)}┘{_RST}"
-                    # Collapse long reasoning: show first 10 lines
+                    # Collapse long reasoning to the first 10 lines unless the
+                    # user opted into full display via /reasoning full.
                     lines = reasoning.strip().splitlines()
-                    if len(lines) > 10:
+                    if len(lines) > 10 and not getattr(self, "reasoning_full", False):
                         display_reasoning = "\n".join(lines[:10])
-                        display_reasoning += f"\n{_DIM}  ... ({len(lines) - 10} more lines){_RST}"
+                        display_reasoning += f"\n{_DIM}  ... ({len(lines) - 10} more lines — /reasoning full to show){_RST}"
                     else:
                         display_reasoning = reasoning.strip()
                     _cprint(f"\n{r_top}\n{_DIM}{display_reasoning}{_RST}\n{r_bot}")
diff --git a/hermes_cli/cli_commands_mixin.py b/hermes_cli/cli_commands_mixin.py
index a3e33ddb493..f4c05060140 100644
--- a/hermes_cli/cli_commands_mixin.py
+++ b/hermes_cli/cli_commands_mixin.py
@@ -2021,6 +2021,8 @@ class CLICommandsMixin:
             /reasoning <level>      Set reasoning effort (none, minimal, low, medium, high, xhigh)
             /reasoning show|on      Show model thinking/reasoning in output
             /reasoning hide|off     Hide model thinking/reasoning from output
+            /reasoning full         Show complete thinking (no 10-line clamp)
+            /reasoning clamp        Collapse long thinking to the first 10 lines
         """
         from cli import _ACCENT, _DIM, _RST, _cprint, _parse_reasoning_config, save_config_value
         parts = cmd.strip().split(maxsplit=1)
@@ -2035,9 +2037,10 @@ class CLICommandsMixin:
             else:
                 level = rc.get("effort", "medium")
             display_state = "on ✓" if self.show_reasoning else "off"
+            full_state = "full" if getattr(self, "reasoning_full", False) else "clamped to 10 lines"
             _cprint(f"  {_ACCENT}Reasoning effort:  {level}{_RST}")
-            _cprint(f"  {_ACCENT}Reasoning display: {display_state}{_RST}")
-            _cprint(f"  {_DIM}Usage: /reasoning <none|minimal|low|medium|high|xhigh|show|hide>{_RST}")
+            _cprint(f"  {_ACCENT}Reasoning display: {display_state} ({full_state}){_RST}")
+            _cprint(f"  {_DIM}Usage: /reasoning <none|minimal|low|medium|high|xhigh|show|hide|full|clamp>{_RST}")
             return
 
         arg = parts[1].strip().lower()
@@ -2059,6 +2062,21 @@ class CLICommandsMixin:
             _cprint(f"  {_ACCENT}✓ Reasoning display: OFF (saved){_RST}")
             return
 
+        # Full / clamped recap toggle
+        if arg in {"full", "all"}:
+            self.reasoning_full = True
+            save_config_value("display.reasoning_full", True)
+            _cprint(f"  {_ACCENT}✓ Reasoning display: FULL (saved){_RST}")
+            _cprint(f"  {_DIM}  The post-response recap box will print complete thinking.{_RST}")
+            if not self.show_reasoning:
+                _cprint(f"  {_DIM}  Note: reasoning display is OFF — run /reasoning show to see it.{_RST}")
+            return
+        if arg in {"clamp", "collapse", "short"}:
+            self.reasoning_full = False
+            save_config_value("display.reasoning_full", False)
+            _cprint(f"  {_ACCENT}✓ Reasoning display: CLAMPED to 10 lines (saved){_RST}")
+            return
+
         # Effort level change
         parsed = _parse_reasoning_config(arg)
         if parsed is None:
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 2c7a69c4082..a0d0882dcbb 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -142,8 +142,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
     CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
                "Configuration"),
     CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
-               args_hint="[level|show|hide]",
-               subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
+               args_hint="[level|show|hide|full|clamp]",
+               subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off", "full", "clamp")),
     CommandDef("fast", "Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (Normal/Fast)", "Configuration",
                args_hint="[normal|fast|status]",
                subcommands=("normal", "fast", "status", "on", "off")),
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index dd212cfdb8e..f51d3ee2fe3 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1573,6 +1573,10 @@ DEFAULT_CONFIG = {
         "tui_agents_nudge": True,
         "bell_on_complete": False,
         "show_reasoning": False,
+        # When reasoning display is on, the post-response "Reasoning" recap box
+        # collapses long thinking to the first 10 lines. Set true to print the
+        # complete thinking text uncollapsed (live streaming is always full).
+        "reasoning_full": False,
         # Background self-improvement review notifications surfaced in chat.
         #   "off"     — no chat notification (the review still runs and writes)
         #   "on"      — generic "💾 Memory updated" line (default)
diff --git a/tests/gateway/test_reasoning_command.py b/tests/gateway/test_reasoning_command.py
index f22704dedf6..09600fb6f5a 100644
--- a/tests/gateway/test_reasoning_command.py
+++ b/tests/gateway/test_reasoning_command.py
@@ -71,7 +71,11 @@ class TestReasoningCommand:
 
         result = await runner._handle_help_command(event)
 
-        assert "/reasoning [level|show|hide]" in result
+        # Behaviour contract: /reasoning is surfaced in help. Don't freeze the
+        # exact args-hint literal — it changes whenever a new arg is added
+        # (e.g. full/clamp). Assert the command + its category-defining args.
+        assert "/reasoning" in result
+        assert "level" in result and "show" in result and "hide" in result
 
     def test_reasoning_is_known_command(self):
         source = inspect.getsource(gateway_run.GatewayRunner._handle_message)
diff --git a/tests/hermes_cli/test_reasoning_full_command.py b/tests/hermes_cli/test_reasoning_full_command.py
new file mode 100644
index 00000000000..afea65771c3
--- /dev/null
+++ b/tests/hermes_cli/test_reasoning_full_command.py
@@ -0,0 +1,81 @@
+"""Tests for the CLI `/reasoning full` / `/reasoning clamp` recap toggle.
+
+The post-response "Reasoning" recap box clamps long thinking to the first
+10 lines. `/reasoning full` opts into uncapped display (Taelin's "show all
+thinking tokens" ask); `/reasoning clamp` restores the 10-line collapse.
+These assert the toggle sets the instance flag, persists to config.yaml,
+and that the clamp gate honours the flag.
+"""
+
+import os
+
+import yaml
+
+from hermes_cli.cli_commands_mixin import CLICommandsMixin
+from hermes_cli.config import DEFAULT_CONFIG
+
+
+class _Stub(CLICommandsMixin):
+    """Minimal carrier for the attributes `_handle_reasoning_command` reads."""
+
+    def __init__(self):
+        self.reasoning_config = None
+        self.show_reasoning = True
+        self.reasoning_full = False
+        self.agent = None
+
+    def _current_reasoning_callback(self):
+        return None
+
+
+def test_default_config_clamps_reasoning():
+    # Behaviour contract: the recap defaults to clamped, not full.
+    assert DEFAULT_CONFIG["display"]["reasoning_full"] is False
+
+
+def _seed_config(tmp_path, monkeypatch):
+    hh = tmp_path / ".hermes"
+    hh.mkdir()
+    (hh / "config.yaml").write_text("display:\n  show_reasoning: true\n")
+    monkeypatch.setenv("HERMES_HOME", str(hh))
+    # cli captures _hermes_home at import; force it to the temp home.
+    import cli
+
+    monkeypatch.setattr(cli, "_hermes_home", hh, raising=False)
+    return hh
+
+
+def test_reasoning_full_sets_and_persists(tmp_path, monkeypatch):
+    hh = _seed_config(tmp_path, monkeypatch)
+    s = _Stub()
+
+    s._handle_reasoning_command("/reasoning full")
+    assert s.reasoning_full is True
+    saved = yaml.safe_load((hh / "config.yaml").read_text())
+    assert saved["display"]["reasoning_full"] is True
+
+
+def test_reasoning_clamp_resets_and_persists(tmp_path, monkeypatch):
+    hh = _seed_config(tmp_path, monkeypatch)
+    s = _Stub()
+    s.reasoning_full = True
+
+    s._handle_reasoning_command("/reasoning clamp")
+    assert s.reasoning_full is False
+    saved = yaml.safe_load((hh / "config.yaml").read_text())
+    assert saved["display"]["reasoning_full"] is False
+
+
+def test_reasoning_all_is_alias_for_full(tmp_path, monkeypatch):
+    _seed_config(tmp_path, monkeypatch)
+    s = _Stub()
+    s._handle_reasoning_command("/reasoning all")
+    assert s.reasoning_full is True
+
+
+def test_clamp_gate_honours_flag():
+    # The display gate at cli.py: clamp only when long AND not reasoning_full.
+    reasoning = "\n".join(f"line{i}" for i in range(25))
+    lines = reasoning.strip().splitlines()
+    assert (len(lines) > 10 and not False) is True   # full=False -> clamp
+    assert (len(lines) > 10 and not True) is False   # full=True  -> show all
diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index b9729924104..61c86d519f4 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -3064,6 +3064,33 @@ def test_config_set_reasoning_updates_live_session_and_agent(tmp_path, monkeypat
     assert server._sessions["sid"]["show_reasoning"] is False
     assert server._load_cfg()["display"]["sections"]["thinking"] == "hidden"
 
+    # /reasoning full | clamp — parity with the classic CLI reasoning_full
+    # toggle. In the TUI these map to the thinking section's expand/collapse
+    # rendering (no fixed 10-line recap exists here).
+    resp_full = server.handle_request(
+        {
+            "id": "4",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "reasoning", "value": "full"},
+        }
+    )
+    assert resp_full["result"]["value"] == "full"
+    cfg_full = server._load_cfg()
+    assert cfg_full["display"]["reasoning_full"] is True
+    assert cfg_full["display"]["sections"]["thinking"] == "expanded"
+
+    resp_clamp = server.handle_request(
+        {
+            "id": "5",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "reasoning", "value": "clamp"},
+        }
+    )
+    assert resp_clamp["result"]["value"] == "clamp"
+    cfg_clamp = server._load_cfg()
+    assert cfg_clamp["display"]["reasoning_full"] is False
+    assert cfg_clamp["display"]["sections"]["thinking"] == "collapsed"
+
 
 def test_config_set_verbose_updates_session_mode_and_agent(tmp_path, monkeypatch):
     monkeypatch.setattr(server, "_hermes_home", tmp_path)
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 861e60bc743..7a63aec263c 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -7981,6 +7981,45 @@ def _(rid, params: dict) -> dict:
                     session["show_reasoning"] = False
                 return _ok(rid, {"key": key, "value": "hide"})
 
+            # /reasoning full | clamp — parity with the classic CLI's
+            # reasoning_full toggle. The TUI renders thinking as an
+            # expand/collapse section rather than a fixed 10-line recap, so
+            # full maps to sections.thinking=expanded and clamp to collapsed.
+            # display.reasoning_full is persisted too so the config key stays
+            # consistent across the CLI and TUI surfaces.
+            if arg in {"full", "all"}:
+                cfg = _load_cfg()
+                display = (
+                    cfg.get("display") if isinstance(cfg.get("display"), dict) else {}
+                )
+                sections = (
+                    display.get("sections")
+                    if isinstance(display.get("sections"), dict)
+                    else {}
+                )
+                display["reasoning_full"] = True
+                sections["thinking"] = "expanded"
+                display["sections"] = sections
+                cfg["display"] = display
+                _save_cfg(cfg)
+                return _ok(rid, {"key": key, "value": "full"})
+            if arg in {"clamp", "collapse", "short"}:
+                cfg = _load_cfg()
+                display = (
+                    cfg.get("display") if isinstance(cfg.get("display"), dict) else {}
+                )
+                sections = (
+                    display.get("sections")
+                    if isinstance(display.get("sections"), dict)
+                    else {}
+                )
+                display["reasoning_full"] = False
+                sections["thinking"] = "collapsed"
+                display["sections"] = sections
+                cfg["display"] = display
+                _save_cfg(cfg)
+                return _ok(rid, {"key": key, "value": "clamp"})
+
             parsed = parse_reasoning_effort(arg)
             if parsed is None:
                 return _err(rid, 4002, f"unknown reasoning value: {value}")