diff --git a/cli.py b/cli.py index b540f13b1c..57ec69e606 100755 --- a/cli.py +++ b/cli.py @@ -3120,8 +3120,8 @@ class HermesCLI: level = "none (disabled)" else: level = rc.get("effort", "medium") - display_state = "on" if self.show_reasoning else "off" - _cprint(f" {_GOLD}Reasoning effort: {level}{_RST}") + display_state = "on ✓" if self.show_reasoning else "off" + _cprint(f" {_GOLD}Reasoning effort: {level}{_RST}") _cprint(f" {_GOLD}Reasoning display: {display_state}{_RST}") _cprint(f" {_DIM}Usage: /reasoning {_RST}") return @@ -3133,14 +3133,16 @@ class HermesCLI: self.show_reasoning = True if self.agent: self.agent.reasoning_callback = self._on_reasoning - _cprint(f" {_GOLD}Reasoning display: ON{_RST}") - _cprint(f" {_DIM}Model thinking will be shown during and after each response.{_RST}") + save_config_value("display.show_reasoning", True) + _cprint(f" {_GOLD}✓ Reasoning display: ON (saved){_RST}") + _cprint(f" {_DIM} Model thinking will be shown during and after each response.{_RST}") return if arg in ("hide", "off"): self.show_reasoning = False if self.agent: self.agent.reasoning_callback = None - _cprint(f" {_GOLD}Reasoning display: OFF{_RST}") + save_config_value("display.show_reasoning", False) + _cprint(f" {_GOLD}✓ Reasoning display: OFF (saved){_RST}") return # Effort level change @@ -3155,9 +3157,9 @@ class HermesCLI: self.agent = None # Force agent re-init with new reasoning config if save_config_value("agent.reasoning_effort", arg): - _cprint(f" {_GOLD}Reasoning effort set to '{arg}' (saved to config){_RST}") + _cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (saved to config){_RST}") else: - _cprint(f" {_GOLD}Reasoning effort set to '{arg}' (session only){_RST}") + _cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}") def _on_reasoning(self, reasoning_text: str): """Callback for intermediate reasoning display during tool-call loops.""" @@ -4544,7 +4546,7 @@ class HermesCLI: # Check for commands if isinstance(user_input, str) and user_input.startswith("/"): - print(f"\n⚙️ {user_input}") + _cprint(f"\n⚙️ {user_input}") if not self.process_command(user_input): self._should_exit = True # Schedule app exit diff --git a/gateway/run.py b/gateway/run.py index dfd1e4c200..6f4e43e981 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -228,6 +228,7 @@ class GatewayRunner: self._prefill_messages = self._load_prefill_messages() self._ephemeral_system_prompt = self._load_ephemeral_system_prompt() self._reasoning_config = self._load_reasoning_config() + self._show_reasoning = self._load_show_reasoning() self._provider_routing = self._load_provider_routing() self._fallback_model = self._load_fallback_model() @@ -421,6 +422,20 @@ class GatewayRunner: logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort) return None + @staticmethod + def _load_show_reasoning() -> bool: + """Load show_reasoning toggle from config.yaml display section.""" + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path, encoding="utf-8") as _f: + cfg = _y.safe_load(_f) or {} + return bool(cfg.get("display", {}).get("show_reasoning", False)) + except Exception: + pass + return False + @staticmethod def _load_background_notifications_mode() -> str: """Load background process notification mode from config or env var. @@ -846,7 +861,7 @@ class GatewayRunner: "personality", "retry", "undo", "sethome", "set-home", "compress", "usage", "insights", "reload-mcp", "reload_mcp", "update", "title", "resume", "provider", "rollback", - "background"} + "background", "reasoning"} if command and command in _known_commands: await self.hooks.emit(f"command:{command}", { "platform": source.platform.value if source.platform else "", @@ -911,6 +926,9 @@ class GatewayRunner: if command == "background": return await self._handle_background_command(event) + + if command == "reasoning": + return await self._handle_reasoning_command(event) # User-defined quick commands (bypass agent loop, no LLM call) if command: @@ -1352,7 +1370,20 @@ class GatewayRunner: response = agent_result.get("final_response", "") agent_messages = agent_result.get("messages", []) - + + # Prepend reasoning/thinking if display is enabled + if getattr(self, "_show_reasoning", False) and response: + last_reasoning = agent_result.get("last_reasoning") + if last_reasoning: + # Collapse long reasoning to keep messages readable + lines = last_reasoning.strip().splitlines() + if len(lines) > 15: + display_reasoning = "\n".join(lines[:15]) + display_reasoning += f"\n_... ({len(lines) - 15} more lines)_" + else: + display_reasoning = last_reasoning.strip() + response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}" + # Emit agent:end hook await self.hooks.emit("agent:end", { **hook_ctx, @@ -1543,6 +1574,7 @@ class GatewayRunner: "`/resume [name]` — Resume a previously-named session", "`/usage` — Show token usage for this session", "`/insights [days]` — Show usage insights and analytics", + "`/reasoning [level|show|hide]` — Set reasoning effort or toggle display", "`/rollback [number]` — List or restore filesystem checkpoints", "`/background ` — Run a prompt in a separate background session", "`/reload-mcp` — Reload MCP servers from config", @@ -2170,6 +2202,88 @@ class GatewayRunner: except Exception: pass + async def _handle_reasoning_command(self, event: MessageEvent) -> str: + """Handle /reasoning command — manage reasoning effort and display toggle. + + Usage: + /reasoning Show current effort level and display state + /reasoning Set reasoning effort (none, low, medium, high, xhigh) + /reasoning show|on Show model reasoning in responses + /reasoning hide|off Hide model reasoning from responses + """ + import yaml + + args = event.get_command_args().strip().lower() + config_path = _hermes_home / "config.yaml" + + def _save_config_key(key_path: str, value): + """Save a dot-separated key to config.yaml.""" + try: + user_config = {} + if config_path.exists(): + with open(config_path, encoding="utf-8") as f: + user_config = yaml.safe_load(f) or {} + keys = key_path.split(".") + current = user_config + for k in keys[:-1]: + if k not in current or not isinstance(current[k], dict): + current[k] = {} + current = current[k] + current[keys[-1]] = value + with open(config_path, "w", encoding="utf-8") as f: + yaml.dump(user_config, f, default_flow_style=False, sort_keys=False) + return True + except Exception as e: + logger.error("Failed to save config key %s: %s", key_path, e) + return False + + if not args: + # Show current state + rc = self._reasoning_config + if rc is None: + level = "medium (default)" + elif rc.get("enabled") is False: + level = "none (disabled)" + else: + level = rc.get("effort", "medium") + display_state = "on ✓" if self._show_reasoning else "off" + return ( + "🧠 **Reasoning Settings**\n\n" + f"**Effort:** `{level}`\n" + f"**Display:** {display_state}\n\n" + "_Usage:_ `/reasoning `" + ) + + # Display toggle + if args in ("show", "on"): + self._show_reasoning = True + _save_config_key("display.show_reasoning", True) + return "🧠 ✓ Reasoning display: **ON**\nModel thinking will be shown before each response." + + if args in ("hide", "off"): + self._show_reasoning = False + _save_config_key("display.show_reasoning", False) + return "🧠 ✓ Reasoning display: **OFF**" + + # Effort level change + effort = args.strip() + if effort == "none": + parsed = {"enabled": False} + elif effort in ("xhigh", "high", "medium", "low", "minimal"): + parsed = {"enabled": True, "effort": effort} + else: + return ( + f"⚠️ Unknown argument: `{effort}`\n\n" + "**Valid levels:** none, low, minimal, medium, high, xhigh\n" + "**Display:** show, hide" + ) + + self._reasoning_config = parsed + if _save_config_key("agent.reasoning_effort", effort): + return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_" + else: + return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)" + async def _handle_compress_command(self, event: MessageEvent) -> str: """Handle /compress command -- manually compress conversation context.""" source = event.source @@ -3273,6 +3387,7 @@ class GatewayRunner: return { "final_response": final_response, + "last_reasoning": result.get("last_reasoning"), "messages": result_holder[0].get("messages", []) if result_holder[0] else [], "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0, "tools": tools_holder[0] or [], diff --git a/run_agent.py b/run_agent.py index cce83f6b6b..608dde94cd 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2442,6 +2442,16 @@ class AIAgent: """ reasoning_text = self._extract_reasoning(assistant_message) + # Fallback: extract inline blocks from content when no structured + # reasoning fields are present (some models/providers embed thinking + # directly in the content rather than returning separate API fields). + if not reasoning_text: + content = assistant_message.content or "" + think_blocks = re.findall(r'(.*?)', content, flags=re.DOTALL) + if think_blocks: + combined = "\n\n".join(b.strip() for b in think_blocks if b.strip()) + reasoning_text = combined or None + if reasoning_text and self.verbose_logging: preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}") diff --git a/tests/test_reasoning_command.py b/tests/test_reasoning_command.py index 2cca80f303..425e28a58c 100644 --- a/tests/test_reasoning_command.py +++ b/tests/test_reasoning_command.py @@ -342,6 +342,90 @@ class TestExtractReasoningFormats(unittest.TestCase): self.assertIsNone(result) +# --------------------------------------------------------------------------- +# Inline block extraction fallback +# --------------------------------------------------------------------------- + +class TestInlineThinkBlockExtraction(unittest.TestCase): + """Test _build_assistant_message extracts inline blocks as reasoning + when no structured API-level reasoning fields are present.""" + + def _build_msg(self, content, reasoning=None, reasoning_content=None, reasoning_details=None, tool_calls=None): + """Create a mock API response message.""" + msg = SimpleNamespace(content=content, tool_calls=tool_calls) + if reasoning is not None: + msg.reasoning = reasoning + if reasoning_content is not None: + msg.reasoning_content = reasoning_content + if reasoning_details is not None: + msg.reasoning_details = reasoning_details + return msg + + def _make_agent(self): + """Create a minimal agent with _build_assistant_message.""" + from run_agent import AIAgent + agent = MagicMock(spec=AIAgent) + agent._build_assistant_message = AIAgent._build_assistant_message.__get__(agent) + agent._extract_reasoning = AIAgent._extract_reasoning.__get__(agent) + agent.verbose_logging = False + agent.reasoning_callback = None + return agent + + def test_single_think_block_extracted(self): + agent = self._make_agent() + api_msg = self._build_msg("Let me calculate 2+2=4.The answer is 4.") + result = agent._build_assistant_message(api_msg, "stop") + self.assertEqual(result["reasoning"], "Let me calculate 2+2=4.") + + def test_multiple_think_blocks_extracted(self): + agent = self._make_agent() + api_msg = self._build_msg("First thought.Some textSecond thought.More text") + result = agent._build_assistant_message(api_msg, "stop") + self.assertIn("First thought.", result["reasoning"]) + self.assertIn("Second thought.", result["reasoning"]) + + def test_no_think_blocks_no_reasoning(self): + agent = self._make_agent() + api_msg = self._build_msg("Just a plain response.") + result = agent._build_assistant_message(api_msg, "stop") + # No structured reasoning AND no inline think blocks → None + self.assertIsNone(result["reasoning"]) + + def test_structured_reasoning_takes_priority(self): + """When structured API reasoning exists, inline think blocks should NOT override.""" + agent = self._make_agent() + api_msg = self._build_msg( + "Inline thought.Response text.", + reasoning="Structured reasoning from API.", + ) + result = agent._build_assistant_message(api_msg, "stop") + self.assertEqual(result["reasoning"], "Structured reasoning from API.") + + def test_empty_think_block_ignored(self): + agent = self._make_agent() + api_msg = self._build_msg("Hello!") + result = agent._build_assistant_message(api_msg, "stop") + # Empty think block should not produce reasoning + self.assertIsNone(result["reasoning"]) + + def test_multiline_think_block(self): + agent = self._make_agent() + api_msg = self._build_msg("\nStep 1: Analyze.\nStep 2: Solve.\nDone.") + result = agent._build_assistant_message(api_msg, "stop") + self.assertIn("Step 1: Analyze.", result["reasoning"]) + self.assertIn("Step 2: Solve.", result["reasoning"]) + + def test_callback_fires_for_inline_think(self): + """Reasoning callback should fire when reasoning is extracted from inline think blocks.""" + agent = self._make_agent() + captured = [] + agent.reasoning_callback = lambda t: captured.append(t) + api_msg = self._build_msg("Deep analysis here.Answer.") + agent._build_assistant_message(api_msg, "stop") + self.assertEqual(len(captured), 1) + self.assertIn("Deep analysis", captured[0]) + + # --------------------------------------------------------------------------- # Config defaults # ---------------------------------------------------------------------------