diff --git a/cli.py b/cli.py
index b540f13b1c..57ec69e606 100755
--- a/cli.py
+++ b/cli.py
@@ -3120,8 +3120,8 @@ class HermesCLI:
                 level = "none (disabled)"
             else:
                 level = rc.get("effort", "medium")
-            display_state = "on" if self.show_reasoning else "off"
-            _cprint(f"  {_GOLD}Reasoning effort: {level}{_RST}")
+            display_state = "on ✓" if self.show_reasoning else "off"
+            _cprint(f"  {_GOLD}Reasoning effort:  {level}{_RST}")
             _cprint(f"  {_GOLD}Reasoning display: {display_state}{_RST}")
             _cprint(f"  {_DIM}Usage: /reasoning <none|low|medium|high|xhigh|show|hide>{_RST}")
             return
@@ -3133,14 +3133,16 @@ class HermesCLI:
             self.show_reasoning = True
             if self.agent:
                 self.agent.reasoning_callback = self._on_reasoning
-            _cprint(f"  {_GOLD}Reasoning display: ON{_RST}")
-            _cprint(f"  {_DIM}Model thinking will be shown during and after each response.{_RST}")
+            save_config_value("display.show_reasoning", True)
+            _cprint(f"  {_GOLD}✓ Reasoning display: ON (saved){_RST}")
+            _cprint(f"  {_DIM}  Model thinking will be shown during and after each response.{_RST}")
             return
         if arg in ("hide", "off"):
             self.show_reasoning = False
             if self.agent:
                 self.agent.reasoning_callback = None
-            _cprint(f"  {_GOLD}Reasoning display: OFF{_RST}")
+            save_config_value("display.show_reasoning", False)
+            _cprint(f"  {_GOLD}✓ Reasoning display: OFF (saved){_RST}")
             return
 
         # Effort level change
@@ -3155,9 +3157,9 @@ class HermesCLI:
         self.agent = None  # Force agent re-init with new reasoning config
 
         if save_config_value("agent.reasoning_effort", arg):
-            _cprint(f"  {_GOLD}Reasoning effort set to '{arg}' (saved to config){_RST}")
+            _cprint(f"  {_GOLD}✓ Reasoning effort set to '{arg}' (saved to config){_RST}")
         else:
-            _cprint(f"  {_GOLD}Reasoning effort set to '{arg}' (session only){_RST}")
+            _cprint(f"  {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
 
     def _on_reasoning(self, reasoning_text: str):
         """Callback for intermediate reasoning display during tool-call loops."""
@@ -4544,7 +4546,7 @@ class HermesCLI:
                     
                     # Check for commands
                     if isinstance(user_input, str) and user_input.startswith("/"):
-                        print(f"\n⚙️  {user_input}")
+                        _cprint(f"\n⚙️  {user_input}")
                         if not self.process_command(user_input):
                             self._should_exit = True
                             # Schedule app exit
diff --git a/gateway/run.py b/gateway/run.py
index dfd1e4c200..6f4e43e981 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -228,6 +228,7 @@ class GatewayRunner:
         self._prefill_messages = self._load_prefill_messages()
         self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
         self._reasoning_config = self._load_reasoning_config()
+        self._show_reasoning = self._load_show_reasoning()
         self._provider_routing = self._load_provider_routing()
         self._fallback_model = self._load_fallback_model()
 
@@ -421,6 +422,20 @@ class GatewayRunner:
         logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
         return None
 
+    @staticmethod
+    def _load_show_reasoning() -> bool:
+        """Load show_reasoning toggle from config.yaml display section."""
+        try:
+            import yaml as _y
+            cfg_path = _hermes_home / "config.yaml"
+            if cfg_path.exists():
+                with open(cfg_path, encoding="utf-8") as _f:
+                    cfg = _y.safe_load(_f) or {}
+                return bool(cfg.get("display", {}).get("show_reasoning", False))
+        except Exception:
+            pass
+        return False
+
     @staticmethod
     def _load_background_notifications_mode() -> str:
         """Load background process notification mode from config or env var.
@@ -846,7 +861,7 @@ class GatewayRunner:
                           "personality", "retry", "undo", "sethome", "set-home",
                           "compress", "usage", "insights", "reload-mcp", "reload_mcp",
                           "update", "title", "resume", "provider", "rollback",
-                          "background"}
+                          "background", "reasoning"}
         if command and command in _known_commands:
             await self.hooks.emit(f"command:{command}", {
                 "platform": source.platform.value if source.platform else "",
@@ -911,6 +926,9 @@ class GatewayRunner:
 
         if command == "background":
             return await self._handle_background_command(event)
+
+        if command == "reasoning":
+            return await self._handle_reasoning_command(event)
         
         # User-defined quick commands (bypass agent loop, no LLM call)
         if command:
@@ -1352,7 +1370,20 @@ class GatewayRunner:
             
             response = agent_result.get("final_response", "")
             agent_messages = agent_result.get("messages", [])
-            
+
+            # Prepend reasoning/thinking if display is enabled
+            if getattr(self, "_show_reasoning", False) and response:
+                last_reasoning = agent_result.get("last_reasoning")
+                if last_reasoning:
+                    # Collapse long reasoning to keep messages readable
+                    lines = last_reasoning.strip().splitlines()
+                    if len(lines) > 15:
+                        display_reasoning = "\n".join(lines[:15])
+                        display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
+                    else:
+                        display_reasoning = last_reasoning.strip()
+                    response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"
+
             # Emit agent:end hook
             await self.hooks.emit("agent:end", {
                 **hook_ctx,
@@ -1543,6 +1574,7 @@ class GatewayRunner:
             "`/resume [name]` — Resume a previously-named session",
             "`/usage` — Show token usage for this session",
             "`/insights [days]` — Show usage insights and analytics",
+            "`/reasoning [level|show|hide]` — Set reasoning effort or toggle display",
             "`/rollback [number]` — List or restore filesystem checkpoints",
             "`/background <prompt>` — Run a prompt in a separate background session",
             "`/reload-mcp` — Reload MCP servers from config",
@@ -2170,6 +2202,88 @@ class GatewayRunner:
             except Exception:
                 pass
 
+    async def _handle_reasoning_command(self, event: MessageEvent) -> str:
+        """Handle /reasoning command — manage reasoning effort and display toggle.
+
+        Usage:
+            /reasoning              Show current effort level and display state
+            /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
+            /reasoning show|on      Show model reasoning in responses
+            /reasoning hide|off     Hide model reasoning from responses
+        """
+        import yaml
+
+        args = event.get_command_args().strip().lower()
+        config_path = _hermes_home / "config.yaml"
+
+        def _save_config_key(key_path: str, value):
+            """Save a dot-separated key to config.yaml."""
+            try:
+                user_config = {}
+                if config_path.exists():
+                    with open(config_path, encoding="utf-8") as f:
+                        user_config = yaml.safe_load(f) or {}
+                keys = key_path.split(".")
+                current = user_config
+                for k in keys[:-1]:
+                    if k not in current or not isinstance(current[k], dict):
+                        current[k] = {}
+                    current = current[k]
+                current[keys[-1]] = value
+                with open(config_path, "w", encoding="utf-8") as f:
+                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
+                return True
+            except Exception as e:
+                logger.error("Failed to save config key %s: %s", key_path, e)
+                return False
+
+        if not args:
+            # Show current state
+            rc = self._reasoning_config
+            if rc is None:
+                level = "medium (default)"
+            elif rc.get("enabled") is False:
+                level = "none (disabled)"
+            else:
+                level = rc.get("effort", "medium")
+            display_state = "on ✓" if self._show_reasoning else "off"
+            return (
+                "🧠 **Reasoning Settings**\n\n"
+                f"**Effort:** `{level}`\n"
+                f"**Display:** {display_state}\n\n"
+                "_Usage:_ `/reasoning <none|low|medium|high|xhigh|show|hide>`"
+            )
+
+        # Display toggle
+        if args in ("show", "on"):
+            self._show_reasoning = True
+            _save_config_key("display.show_reasoning", True)
+            return "🧠 ✓ Reasoning display: **ON**\nModel thinking will be shown before each response."
+
+        if args in ("hide", "off"):
+            self._show_reasoning = False
+            _save_config_key("display.show_reasoning", False)
+            return "🧠 ✓ Reasoning display: **OFF**"
+
+        # Effort level change
+        effort = args.strip()
+        if effort == "none":
+            parsed = {"enabled": False}
+        elif effort in ("xhigh", "high", "medium", "low", "minimal"):
+            parsed = {"enabled": True, "effort": effort}
+        else:
+            return (
+                f"⚠️ Unknown argument: `{effort}`\n\n"
+                "**Valid levels:** none, low, minimal, medium, high, xhigh\n"
+                "**Display:** show, hide"
+            )
+
+        self._reasoning_config = parsed
+        if _save_config_key("agent.reasoning_effort", effort):
+            return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
+        else:
+            return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
+
     async def _handle_compress_command(self, event: MessageEvent) -> str:
         """Handle /compress command -- manually compress conversation context."""
         source = event.source
@@ -3273,6 +3387,7 @@ class GatewayRunner:
             
             return {
                 "final_response": final_response,
+                "last_reasoning": result.get("last_reasoning"),
                 "messages": result_holder[0].get("messages", []) if result_holder[0] else [],
                 "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
                 "tools": tools_holder[0] or [],
diff --git a/run_agent.py b/run_agent.py
index cce83f6b6b..608dde94cd 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2442,6 +2442,16 @@ class AIAgent:
         """
         reasoning_text = self._extract_reasoning(assistant_message)
 
+        # Fallback: extract inline <think> blocks from content when no structured
+        # reasoning fields are present (some models/providers embed thinking
+        # directly in the content rather than returning separate API fields).
+        if not reasoning_text:
+            content = assistant_message.content or ""
+            think_blocks = re.findall(r'<think>(.*?)</think>', content, flags=re.DOTALL)
+            if think_blocks:
+                combined = "\n\n".join(b.strip() for b in think_blocks if b.strip())
+                reasoning_text = combined or None
+
         if reasoning_text and self.verbose_logging:
             preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
             logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")
diff --git a/tests/test_reasoning_command.py b/tests/test_reasoning_command.py
index 2cca80f303..425e28a58c 100644
--- a/tests/test_reasoning_command.py
+++ b/tests/test_reasoning_command.py
@@ -342,6 +342,90 @@ class TestExtractReasoningFormats(unittest.TestCase):
         self.assertIsNone(result)
 
 
+# ---------------------------------------------------------------------------
+# Inline <think> block extraction fallback
+# ---------------------------------------------------------------------------
+
+class TestInlineThinkBlockExtraction(unittest.TestCase):
+    """Test _build_assistant_message extracts inline <think> blocks as reasoning
+    when no structured API-level reasoning fields are present."""
+
+    def _build_msg(self, content, reasoning=None, reasoning_content=None, reasoning_details=None, tool_calls=None):
+        """Create a mock API response message."""
+        msg = SimpleNamespace(content=content, tool_calls=tool_calls)
+        if reasoning is not None:
+            msg.reasoning = reasoning
+        if reasoning_content is not None:
+            msg.reasoning_content = reasoning_content
+        if reasoning_details is not None:
+            msg.reasoning_details = reasoning_details
+        return msg
+
+    def _make_agent(self):
+        """Create a minimal agent with _build_assistant_message."""
+        from run_agent import AIAgent
+        agent = MagicMock(spec=AIAgent)
+        agent._build_assistant_message = AIAgent._build_assistant_message.__get__(agent)
+        agent._extract_reasoning = AIAgent._extract_reasoning.__get__(agent)
+        agent.verbose_logging = False
+        agent.reasoning_callback = None
+        return agent
+
+    def test_single_think_block_extracted(self):
+        agent = self._make_agent()
+        api_msg = self._build_msg("<think>Let me calculate 2+2=4.</think>The answer is 4.")
+        result = agent._build_assistant_message(api_msg, "stop")
+        self.assertEqual(result["reasoning"], "Let me calculate 2+2=4.")
+
+    def test_multiple_think_blocks_extracted(self):
+        agent = self._make_agent()
+        api_msg = self._build_msg("<think>First thought.</think>Some text<think>Second thought.</think>More text")
+        result = agent._build_assistant_message(api_msg, "stop")
+        self.assertIn("First thought.", result["reasoning"])
+        self.assertIn("Second thought.", result["reasoning"])
+
+    def test_no_think_blocks_no_reasoning(self):
+        agent = self._make_agent()
+        api_msg = self._build_msg("Just a plain response.")
+        result = agent._build_assistant_message(api_msg, "stop")
+        # No structured reasoning AND no inline think blocks → None
+        self.assertIsNone(result["reasoning"])
+
+    def test_structured_reasoning_takes_priority(self):
+        """When structured API reasoning exists, inline think blocks should NOT override."""
+        agent = self._make_agent()
+        api_msg = self._build_msg(
+            "<think>Inline thought.</think>Response text.",
+            reasoning="Structured reasoning from API.",
+        )
+        result = agent._build_assistant_message(api_msg, "stop")
+        self.assertEqual(result["reasoning"], "Structured reasoning from API.")
+
+    def test_empty_think_block_ignored(self):
+        agent = self._make_agent()
+        api_msg = self._build_msg("<think></think>Hello!")
+        result = agent._build_assistant_message(api_msg, "stop")
+        # Empty think block should not produce reasoning
+        self.assertIsNone(result["reasoning"])
+
+    def test_multiline_think_block(self):
+        agent = self._make_agent()
+        api_msg = self._build_msg("<think>\nStep 1: Analyze.\nStep 2: Solve.\n</think>Done.")
+        result = agent._build_assistant_message(api_msg, "stop")
+        self.assertIn("Step 1: Analyze.", result["reasoning"])
+        self.assertIn("Step 2: Solve.", result["reasoning"])
+
+    def test_callback_fires_for_inline_think(self):
+        """Reasoning callback should fire when reasoning is extracted from inline think blocks."""
+        agent = self._make_agent()
+        captured = []
+        agent.reasoning_callback = lambda t: captured.append(t)
+        api_msg = self._build_msg("<think>Deep analysis here.</think>Answer.")
+        agent._build_assistant_message(api_msg, "stop")
+        self.assertEqual(len(captured), 1)
+        self.assertIn("Deep analysis", captured[0])
+
+
 # ---------------------------------------------------------------------------
 # Config defaults
 # ---------------------------------------------------------------------------