feat(cli): /reasoning full — show complete thinking, not 10-line clamp (#50499)

* feat(cli): /reasoning full to show complete thinking, not 10-line clamp The post-response Reasoning recap box hard-clamped long thinking to the first 10 lines, so there was no way to see the full reasoning trace after a turn (live streaming already shows it in full). Add display.reasoning_full (default off) plus /reasoning full|clamp to toggle it at runtime; the clamp truncation note now points at the command. Addresses repeated user requests to show all thinking tokens. * test(gateway): de-snapshot /reasoning help assertion The test froze the exact args-hint literal '/reasoning [level|show|hide]', which the new full/clamp args change to '[level|show|hide|full|clamp]'. Convert to an invariant: assert /reasoning is in help and carries its core args, not the exact hint string. * feat(tui): /reasoning full|clamp parity in tui_gateway The classic-CLI reasoning_full toggle had no TUI equivalent — typing /reasoning full in the TUI fell through to parse_reasoning_effort and errored. The TUI renders thinking as an expand/collapse section (no fixed 10-line recap), so map full -> sections.thinking=expanded (raw, uncapped via thinkingPreview mode='full') and clamp -> collapsed, persisting display.reasoning_full for cross-surface config consistency.
2026-06-23 10:42:00 +00:00 · 2026-06-21 20:21:11 -07:00 · 2026-06-21 20:21:11 -07:00 · 95d53c3bcb
commit 95d53c3bcb
parent b0a25980f8
8 changed files with 186 additions and 8 deletions
--- a/cli.py
+++ b/cli.py
@ -452,6 +452,7 @@ def load_cli_config() -> Dict[str, Any]:
            "resume_max_assistant_lines": 3,
            "resume_skip_tool_only": True,
            "show_reasoning": False,
+            "reasoning_full": False,
            "streaming": True,
            "busy_input_mode": "interrupt",
            "persistent_output": True,
@ -3405,6 +3406,9 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
        self.bell_on_complete = CLI_CONFIG["display"].get("bell_on_complete", False)
        # show_reasoning: display model thinking/reasoning before the response
        self.show_reasoning = CLI_CONFIG["display"].get("show_reasoning", False)
+        # reasoning_full: when reasoning display is on, print the post-response
+        # recap box uncollapsed instead of clamping to the first 10 lines.
+        self.reasoning_full = CLI_CONFIG["display"].get("reasoning_full", False)
        _configure_output_history(
            enabled=CLI_CONFIG["display"].get("persistent_output", True),
            max_lines=CLI_CONFIG["display"].get("persistent_output_max_lines", 200),
@ -11543,11 +11547,12 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
                    r_fill = w - 2 - len(r_label)
                    r_top = f"{_DIM}┌─{r_label}{'─' * max(r_fill - 1, 0)}┐{_RST}"
                    r_bot = f"{_DIM}└{'─' * (w - 2)}┘{_RST}"
-                    # Collapse long reasoning: show first 10 lines
+                    # Collapse long reasoning to the first 10 lines unless the
+                    # user opted into full display via /reasoning full.
                    lines = reasoning.strip().splitlines()
-                    if len(lines) > 10:
+                    if len(lines) > 10 and not getattr(self, "reasoning_full", False):
                        display_reasoning = "\n".join(lines[:10])
-                        display_reasoning += f"\n{_DIM}  ... ({len(lines) - 10} more lines){_RST}"
+                        display_reasoning += f"\n{_DIM}  ... ({len(lines) - 10} more lines — /reasoning full to show){_RST}"
                    else:
                        display_reasoning = reasoning.strip()
                    _cprint(f"\n{r_top}\n{_DIM}{display_reasoning}{_RST}\n{r_bot}")
--- a/hermes_cli/cli_commands_mixin.py
+++ b/hermes_cli/cli_commands_mixin.py
@ -2021,6 +2021,8 @@ class CLICommandsMixin:
            /reasoning <level>      Set reasoning effort (none, minimal, low, medium, high, xhigh)
            /reasoning show|on      Show model thinking/reasoning in output
            /reasoning hide|off     Hide model thinking/reasoning from output
+            /reasoning full         Show complete thinking (no 10-line clamp)
+            /reasoning clamp        Collapse long thinking to the first 10 lines
        """
        from cli import _ACCENT, _DIM, _RST, _cprint, _parse_reasoning_config, save_config_value
        parts = cmd.strip().split(maxsplit=1)
@ -2035,9 +2037,10 @@ class CLICommandsMixin:
            else:
                level = rc.get("effort", "medium")
            display_state = "on ✓" if self.show_reasoning else "off"
+            full_state = "full" if getattr(self, "reasoning_full", False) else "clamped to 10 lines"
            _cprint(f"  {_ACCENT}Reasoning effort:  {level}{_RST}")
-            _cprint(f"  {_ACCENT}Reasoning display: {display_state}{_RST}")
-            _cprint(f"  {_DIM}Usage: /reasoning <none|minimal|low|medium|high|xhigh|show|hide>{_RST}")
+            _cprint(f"  {_ACCENT}Reasoning display: {display_state} ({full_state}){_RST}")
+            _cprint(f"  {_DIM}Usage: /reasoning <none|minimal|low|medium|high|xhigh|show|hide|full|clamp>{_RST}")
            return

        arg = parts[1].strip().lower()
@ -2059,6 +2062,21 @@ class CLICommandsMixin:
            _cprint(f"  {_ACCENT}✓ Reasoning display: OFF (saved){_RST}")
            return

+        # Full / clamped recap toggle
+        if arg in {"full", "all"}:
+            self.reasoning_full = True
+            save_config_value("display.reasoning_full", True)
+            _cprint(f"  {_ACCENT}✓ Reasoning display: FULL (saved){_RST}")
+            _cprint(f"  {_DIM}  The post-response recap box will print complete thinking.{_RST}")
+            if not self.show_reasoning:
+                _cprint(f"  {_DIM}  Note: reasoning display is OFF — run /reasoning show to see it.{_RST}")
+            return
+        if arg in {"clamp", "collapse", "short"}:
+            self.reasoning_full = False
+            save_config_value("display.reasoning_full", False)
+            _cprint(f"  {_ACCENT}✓ Reasoning display: CLAMPED to 10 lines (saved){_RST}")
+            return
+
        # Effort level change
        parsed = _parse_reasoning_config(arg)
        if parsed is None:
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -142,8 +142,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
               "Configuration"),
    CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
-               args_hint="[level|show|hide]",
-               subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
+               args_hint="[level|show|hide|full|clamp]",
+               subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off", "full", "clamp")),
    CommandDef("fast", "Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (Normal/Fast)", "Configuration",
               args_hint="[normal|fast|status]",
               subcommands=("normal", "fast", "status", "on", "off")),
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -1573,6 +1573,10 @@ DEFAULT_CONFIG = {
        "tui_agents_nudge": True,
        "bell_on_complete": False,
        "show_reasoning": False,
+        # When reasoning display is on, the post-response "Reasoning" recap box
+        # collapses long thinking to the first 10 lines. Set true to print the
+        # complete thinking text uncollapsed (live streaming is always full).
+        "reasoning_full": False,
        # Background self-improvement review notifications surfaced in chat.
        #   "off"     — no chat notification (the review still runs and writes)
        #   "on"      — generic "💾 Memory updated" line (default)
--- a/tests/gateway/test_reasoning_command.py
+++ b/tests/gateway/test_reasoning_command.py
@ -71,7 +71,11 @@ class TestReasoningCommand:

        result = await runner._handle_help_command(event)

-        assert "/reasoning [level|show|hide]" in result
+        # Behaviour contract: /reasoning is surfaced in help. Don't freeze the
+        # exact args-hint literal — it changes whenever a new arg is added
+        # (e.g. full/clamp). Assert the command + its category-defining args.
+        assert "/reasoning" in result
+        assert "level" in result and "show" in result and "hide" in result

    def test_reasoning_is_known_command(self):
        source = inspect.getsource(gateway_run.GatewayRunner._handle_message)
--- a/tests/hermes_cli/test_reasoning_full_command.py
+++ b/tests/hermes_cli/test_reasoning_full_command.py
@ -0,0 +1,81 @@
+"""Tests for the CLI `/reasoning full` / `/reasoning clamp` recap toggle.
+
+The post-response "Reasoning" recap box clamps long thinking to the first
+10 lines. `/reasoning full` opts into uncapped display (Taelin's "show all
+thinking tokens" ask); `/reasoning clamp` restores the 10-line collapse.
+These assert the toggle sets the instance flag, persists to config.yaml,
+and that the clamp gate honours the flag.
+"""
+
+import os
+
+import yaml
+
+from hermes_cli.cli_commands_mixin import CLICommandsMixin
+from hermes_cli.config import DEFAULT_CONFIG
+
+
+class _Stub(CLICommandsMixin):
+    """Minimal carrier for the attributes `_handle_reasoning_command` reads."""
+
+    def __init__(self):
+        self.reasoning_config = None
+        self.show_reasoning = True
+        self.reasoning_full = False
+        self.agent = None
+
+    def _current_reasoning_callback(self):
+        return None
+
+
+def test_default_config_clamps_reasoning():
+    # Behaviour contract: the recap defaults to clamped, not full.
+    assert DEFAULT_CONFIG["display"]["reasoning_full"] is False
+
+
+def _seed_config(tmp_path, monkeypatch):
+    hh = tmp_path / ".hermes"
+    hh.mkdir()
+    (hh / "config.yaml").write_text("display:\n  show_reasoning: true\n")
+    monkeypatch.setenv("HERMES_HOME", str(hh))
+    # cli captures _hermes_home at import; force it to the temp home.
+    import cli
+
+    monkeypatch.setattr(cli, "_hermes_home", hh, raising=False)
+    return hh
+
+
+def test_reasoning_full_sets_and_persists(tmp_path, monkeypatch):
+    hh = _seed_config(tmp_path, monkeypatch)
+    s = _Stub()
+
+    s._handle_reasoning_command("/reasoning full")
+    assert s.reasoning_full is True
+    saved = yaml.safe_load((hh / "config.yaml").read_text())
+    assert saved["display"]["reasoning_full"] is True
+
+
+def test_reasoning_clamp_resets_and_persists(tmp_path, monkeypatch):
+    hh = _seed_config(tmp_path, monkeypatch)
+    s = _Stub()
+    s.reasoning_full = True
+
+    s._handle_reasoning_command("/reasoning clamp")
+    assert s.reasoning_full is False
+    saved = yaml.safe_load((hh / "config.yaml").read_text())
+    assert saved["display"]["reasoning_full"] is False
+
+
+def test_reasoning_all_is_alias_for_full(tmp_path, monkeypatch):
+    _seed_config(tmp_path, monkeypatch)
+    s = _Stub()
+    s._handle_reasoning_command("/reasoning all")
+    assert s.reasoning_full is True
+
+
+def test_clamp_gate_honours_flag():
+    # The display gate at cli.py: clamp only when long AND not reasoning_full.
+    reasoning = "\n".join(f"line{i}" for i in range(25))
+    lines = reasoning.strip().splitlines()
+    assert (len(lines) > 10 and not False) is True   # full=False -> clamp
+    assert (len(lines) > 10 and not True) is False   # full=True  -> show all
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@ -3064,6 +3064,33 @@ def test_config_set_reasoning_updates_live_session_and_agent(tmp_path, monkeypat
    assert server._sessions["sid"]["show_reasoning"] is False
    assert server._load_cfg()["display"]["sections"]["thinking"] == "hidden"

+    # /reasoning full | clamp — parity with the classic CLI reasoning_full
+    # toggle. In the TUI these map to the thinking section's expand/collapse
+    # rendering (no fixed 10-line recap exists here).
+    resp_full = server.handle_request(
+        {
+            "id": "4",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "reasoning", "value": "full"},
+        }
+    )
+    assert resp_full["result"]["value"] == "full"
+    cfg_full = server._load_cfg()
+    assert cfg_full["display"]["reasoning_full"] is True
+    assert cfg_full["display"]["sections"]["thinking"] == "expanded"
+
+    resp_clamp = server.handle_request(
+        {
+            "id": "5",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "reasoning", "value": "clamp"},
+        }
+    )
+    assert resp_clamp["result"]["value"] == "clamp"
+    cfg_clamp = server._load_cfg()
+    assert cfg_clamp["display"]["reasoning_full"] is False
+    assert cfg_clamp["display"]["sections"]["thinking"] == "collapsed"
+

 def test_config_set_verbose_updates_session_mode_and_agent(tmp_path, monkeypatch):
    monkeypatch.setattr(server, "_hermes_home", tmp_path)
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -7981,6 +7981,45 @@ def _(rid, params: dict) -> dict:
                    session["show_reasoning"] = False
                return _ok(rid, {"key": key, "value": "hide"})

+            # /reasoning full | clamp — parity with the classic CLI's
+            # reasoning_full toggle. The TUI renders thinking as an
+            # expand/collapse section rather than a fixed 10-line recap, so
+            # full maps to sections.thinking=expanded and clamp to collapsed.
+            # display.reasoning_full is persisted too so the config key stays
+            # consistent across the CLI and TUI surfaces.
+            if arg in {"full", "all"}:
+                cfg = _load_cfg()
+                display = (
+                    cfg.get("display") if isinstance(cfg.get("display"), dict) else {}
+                )
+                sections = (
+                    display.get("sections")
+                    if isinstance(display.get("sections"), dict)
+                    else {}
+                )
+                display["reasoning_full"] = True
+                sections["thinking"] = "expanded"
+                display["sections"] = sections
+                cfg["display"] = display
+                _save_cfg(cfg)
+                return _ok(rid, {"key": key, "value": "full"})
+            if arg in {"clamp", "collapse", "short"}:
+                cfg = _load_cfg()
+                display = (
+                    cfg.get("display") if isinstance(cfg.get("display"), dict) else {}
+                )
+                sections = (
+                    display.get("sections")
+                    if isinstance(display.get("sections"), dict)
+                    else {}
+                )
+                display["reasoning_full"] = False
+                sections["thinking"] = "collapsed"
+                display["sections"] = sections
+                cfg["display"] = display
+                _save_cfg(cfg)
+                return _ok(rid, {"key": key, "value": "clamp"})
+
            parsed = parse_reasoning_effort(arg)
            if parsed is None:
                return _err(rid, 4002, f"unknown reasoning value: {value}")