feat(cli): /reasoning full — show complete thinking, not 10-line clamp (#50499)

* feat(cli): /reasoning full to show complete thinking, not 10-line clamp

The post-response Reasoning recap box hard-clamped long thinking to the
first 10 lines, so there was no way to see the full reasoning trace after
a turn (live streaming already shows it in full). Add display.reasoning_full
(default off) plus /reasoning full|clamp to toggle it at runtime; the clamp
truncation note now points at the command. Addresses repeated user requests
to show all thinking tokens.

* test(gateway): de-snapshot /reasoning help assertion

The test froze the exact args-hint literal '/reasoning [level|show|hide]',
which the new full/clamp args change to '[level|show|hide|full|clamp]'.
Convert to an invariant: assert /reasoning is in help and carries its core
args, not the exact hint string.

* feat(tui): /reasoning full|clamp parity in tui_gateway

The classic-CLI reasoning_full toggle had no TUI equivalent — typing
/reasoning full in the TUI fell through to parse_reasoning_effort and
errored. The TUI renders thinking as an expand/collapse section (no fixed
10-line recap), so map full -> sections.thinking=expanded (raw, uncapped
via thinkingPreview mode='full') and clamp -> collapsed, persisting
display.reasoning_full for cross-surface config consistency.
This commit is contained in:
Teknium 2026-06-21 20:21:11 -07:00 committed by GitHub
parent b0a25980f8
commit 95d53c3bcb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 186 additions and 8 deletions

11
cli.py
View file

@ -452,6 +452,7 @@ def load_cli_config() -> Dict[str, Any]:
"resume_max_assistant_lines": 3,
"resume_skip_tool_only": True,
"show_reasoning": False,
"reasoning_full": False,
"streaming": True,
"busy_input_mode": "interrupt",
"persistent_output": True,
@ -3405,6 +3406,9 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
self.bell_on_complete = CLI_CONFIG["display"].get("bell_on_complete", False)
# show_reasoning: display model thinking/reasoning before the response
self.show_reasoning = CLI_CONFIG["display"].get("show_reasoning", False)
# reasoning_full: when reasoning display is on, print the post-response
# recap box uncollapsed instead of clamping to the first 10 lines.
self.reasoning_full = CLI_CONFIG["display"].get("reasoning_full", False)
_configure_output_history(
enabled=CLI_CONFIG["display"].get("persistent_output", True),
max_lines=CLI_CONFIG["display"].get("persistent_output_max_lines", 200),
@ -11543,11 +11547,12 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
r_fill = w - 2 - len(r_label)
r_top = f"{_DIM}┌─{r_label}{'' * max(r_fill - 1, 0)}{_RST}"
r_bot = f"{_DIM}{'' * (w - 2)}{_RST}"
# Collapse long reasoning: show first 10 lines
# Collapse long reasoning to the first 10 lines unless the
# user opted into full display via /reasoning full.
lines = reasoning.strip().splitlines()
if len(lines) > 10:
if len(lines) > 10 and not getattr(self, "reasoning_full", False):
display_reasoning = "\n".join(lines[:10])
display_reasoning += f"\n{_DIM} ... ({len(lines) - 10} more lines){_RST}"
display_reasoning += f"\n{_DIM} ... ({len(lines) - 10} more lines — /reasoning full to show){_RST}"
else:
display_reasoning = reasoning.strip()
_cprint(f"\n{r_top}\n{_DIM}{display_reasoning}{_RST}\n{r_bot}")

View file

@ -2021,6 +2021,8 @@ class CLICommandsMixin:
/reasoning <level> Set reasoning effort (none, minimal, low, medium, high, xhigh)
/reasoning show|on Show model thinking/reasoning in output
/reasoning hide|off Hide model thinking/reasoning from output
/reasoning full Show complete thinking (no 10-line clamp)
/reasoning clamp Collapse long thinking to the first 10 lines
"""
from cli import _ACCENT, _DIM, _RST, _cprint, _parse_reasoning_config, save_config_value
parts = cmd.strip().split(maxsplit=1)
@ -2035,9 +2037,10 @@ class CLICommandsMixin:
else:
level = rc.get("effort", "medium")
display_state = "on ✓" if self.show_reasoning else "off"
full_state = "full" if getattr(self, "reasoning_full", False) else "clamped to 10 lines"
_cprint(f" {_ACCENT}Reasoning effort: {level}{_RST}")
_cprint(f" {_ACCENT}Reasoning display: {display_state}{_RST}")
_cprint(f" {_DIM}Usage: /reasoning <none|minimal|low|medium|high|xhigh|show|hide>{_RST}")
_cprint(f" {_ACCENT}Reasoning display: {display_state} ({full_state}){_RST}")
_cprint(f" {_DIM}Usage: /reasoning <none|minimal|low|medium|high|xhigh|show|hide|full|clamp>{_RST}")
return
arg = parts[1].strip().lower()
@ -2059,6 +2062,21 @@ class CLICommandsMixin:
_cprint(f" {_ACCENT}✓ Reasoning display: OFF (saved){_RST}")
return
# Full / clamped recap toggle
if arg in {"full", "all"}:
self.reasoning_full = True
save_config_value("display.reasoning_full", True)
_cprint(f" {_ACCENT}✓ Reasoning display: FULL (saved){_RST}")
_cprint(f" {_DIM} The post-response recap box will print complete thinking.{_RST}")
if not self.show_reasoning:
_cprint(f" {_DIM} Note: reasoning display is OFF — run /reasoning show to see it.{_RST}")
return
if arg in {"clamp", "collapse", "short"}:
self.reasoning_full = False
save_config_value("display.reasoning_full", False)
_cprint(f" {_ACCENT}✓ Reasoning display: CLAMPED to 10 lines (saved){_RST}")
return
# Effort level change
parsed = _parse_reasoning_config(arg)
if parsed is None:

View file

@ -142,8 +142,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
"Configuration"),
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
args_hint="[level|show|hide]",
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
args_hint="[level|show|hide|full|clamp]",
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off", "full", "clamp")),
CommandDef("fast", "Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (Normal/Fast)", "Configuration",
args_hint="[normal|fast|status]",
subcommands=("normal", "fast", "status", "on", "off")),

View file

@ -1573,6 +1573,10 @@ DEFAULT_CONFIG = {
"tui_agents_nudge": True,
"bell_on_complete": False,
"show_reasoning": False,
# When reasoning display is on, the post-response "Reasoning" recap box
# collapses long thinking to the first 10 lines. Set true to print the
# complete thinking text uncollapsed (live streaming is always full).
"reasoning_full": False,
# Background self-improvement review notifications surfaced in chat.
# "off" — no chat notification (the review still runs and writes)
# "on" — generic "💾 Memory updated" line (default)

View file

@ -71,7 +71,11 @@ class TestReasoningCommand:
result = await runner._handle_help_command(event)
assert "/reasoning [level|show|hide]" in result
# Behaviour contract: /reasoning is surfaced in help. Don't freeze the
# exact args-hint literal — it changes whenever a new arg is added
# (e.g. full/clamp). Assert the command + its category-defining args.
assert "/reasoning" in result
assert "level" in result and "show" in result and "hide" in result
def test_reasoning_is_known_command(self):
source = inspect.getsource(gateway_run.GatewayRunner._handle_message)

View file

@ -0,0 +1,81 @@
"""Tests for the CLI `/reasoning full` / `/reasoning clamp` recap toggle.
The post-response "Reasoning" recap box clamps long thinking to the first
10 lines. `/reasoning full` opts into uncapped display (Taelin's "show all
thinking tokens" ask); `/reasoning clamp` restores the 10-line collapse.
These assert the toggle sets the instance flag, persists to config.yaml,
and that the clamp gate honours the flag.
"""
import os
import yaml
from hermes_cli.cli_commands_mixin import CLICommandsMixin
from hermes_cli.config import DEFAULT_CONFIG
class _Stub(CLICommandsMixin):
"""Minimal carrier for the attributes `_handle_reasoning_command` reads."""
def __init__(self):
self.reasoning_config = None
self.show_reasoning = True
self.reasoning_full = False
self.agent = None
def _current_reasoning_callback(self):
return None
def test_default_config_clamps_reasoning():
# Behaviour contract: the recap defaults to clamped, not full.
assert DEFAULT_CONFIG["display"]["reasoning_full"] is False
def _seed_config(tmp_path, monkeypatch):
hh = tmp_path / ".hermes"
hh.mkdir()
(hh / "config.yaml").write_text("display:\n show_reasoning: true\n")
monkeypatch.setenv("HERMES_HOME", str(hh))
# cli captures _hermes_home at import; force it to the temp home.
import cli
monkeypatch.setattr(cli, "_hermes_home", hh, raising=False)
return hh
def test_reasoning_full_sets_and_persists(tmp_path, monkeypatch):
hh = _seed_config(tmp_path, monkeypatch)
s = _Stub()
s._handle_reasoning_command("/reasoning full")
assert s.reasoning_full is True
saved = yaml.safe_load((hh / "config.yaml").read_text())
assert saved["display"]["reasoning_full"] is True
def test_reasoning_clamp_resets_and_persists(tmp_path, monkeypatch):
hh = _seed_config(tmp_path, monkeypatch)
s = _Stub()
s.reasoning_full = True
s._handle_reasoning_command("/reasoning clamp")
assert s.reasoning_full is False
saved = yaml.safe_load((hh / "config.yaml").read_text())
assert saved["display"]["reasoning_full"] is False
def test_reasoning_all_is_alias_for_full(tmp_path, monkeypatch):
_seed_config(tmp_path, monkeypatch)
s = _Stub()
s._handle_reasoning_command("/reasoning all")
assert s.reasoning_full is True
def test_clamp_gate_honours_flag():
# The display gate at cli.py: clamp only when long AND not reasoning_full.
reasoning = "\n".join(f"line{i}" for i in range(25))
lines = reasoning.strip().splitlines()
assert (len(lines) > 10 and not False) is True # full=False -> clamp
assert (len(lines) > 10 and not True) is False # full=True -> show all

View file

@ -3064,6 +3064,33 @@ def test_config_set_reasoning_updates_live_session_and_agent(tmp_path, monkeypat
assert server._sessions["sid"]["show_reasoning"] is False
assert server._load_cfg()["display"]["sections"]["thinking"] == "hidden"
# /reasoning full | clamp — parity with the classic CLI reasoning_full
# toggle. In the TUI these map to the thinking section's expand/collapse
# rendering (no fixed 10-line recap exists here).
resp_full = server.handle_request(
{
"id": "4",
"method": "config.set",
"params": {"session_id": "sid", "key": "reasoning", "value": "full"},
}
)
assert resp_full["result"]["value"] == "full"
cfg_full = server._load_cfg()
assert cfg_full["display"]["reasoning_full"] is True
assert cfg_full["display"]["sections"]["thinking"] == "expanded"
resp_clamp = server.handle_request(
{
"id": "5",
"method": "config.set",
"params": {"session_id": "sid", "key": "reasoning", "value": "clamp"},
}
)
assert resp_clamp["result"]["value"] == "clamp"
cfg_clamp = server._load_cfg()
assert cfg_clamp["display"]["reasoning_full"] is False
assert cfg_clamp["display"]["sections"]["thinking"] == "collapsed"
def test_config_set_verbose_updates_session_mode_and_agent(tmp_path, monkeypatch):
monkeypatch.setattr(server, "_hermes_home", tmp_path)

View file

@ -7981,6 +7981,45 @@ def _(rid, params: dict) -> dict:
session["show_reasoning"] = False
return _ok(rid, {"key": key, "value": "hide"})
# /reasoning full | clamp — parity with the classic CLI's
# reasoning_full toggle. The TUI renders thinking as an
# expand/collapse section rather than a fixed 10-line recap, so
# full maps to sections.thinking=expanded and clamp to collapsed.
# display.reasoning_full is persisted too so the config key stays
# consistent across the CLI and TUI surfaces.
if arg in {"full", "all"}:
cfg = _load_cfg()
display = (
cfg.get("display") if isinstance(cfg.get("display"), dict) else {}
)
sections = (
display.get("sections")
if isinstance(display.get("sections"), dict)
else {}
)
display["reasoning_full"] = True
sections["thinking"] = "expanded"
display["sections"] = sections
cfg["display"] = display
_save_cfg(cfg)
return _ok(rid, {"key": key, "value": "full"})
if arg in {"clamp", "collapse", "short"}:
cfg = _load_cfg()
display = (
cfg.get("display") if isinstance(cfg.get("display"), dict) else {}
)
sections = (
display.get("sections")
if isinstance(display.get("sections"), dict)
else {}
)
display["reasoning_full"] = False
sections["thinking"] = "collapsed"
display["sections"] = sections
cfg["display"] = display
_save_cfg(cfg)
return _ok(rid, {"key": key, "value": "clamp"})
parsed = parse_reasoning_effort(arg)
if parsed is None:
return _err(rid, 4002, f"unknown reasoning value: {value}")