feat(gateway,cli): confirm /reload-mcp to warn about prompt cache invalidation

Reloading MCP servers rebuilds the tool set for the active session, which invalidates the provider prompt cache (tool schemas are baked into the system prompt). The next message re-sends full input tokens — can be expensive on long-context or high-reasoning models. To surface that cost, /reload-mcp now routes through a new slash-confirm primitive with three options: Approve Once / Always Approve / Cancel. 'Always Approve' persists approvals.mcp_reload_confirm: false so future reloads run silently. Coverage: * Classic CLI (cli.py) — interactive numbered prompt. * TUI (tui_gateway + Ink ops.ts) — text warning on first call; `now` / `always` args skip the gate; `always` also persists the opt-out. * Messenger gateway — button UI on Telegram (inline keyboard), Discord (discord.ui.View), Slack (Block Kit actions); text fallback on every other platform via /approve /always /cancel replies intercepted in gateway/run.py _handle_message. * Config key: approvals.mcp_reload_confirm (default true). * Auto-reload paths (CLI file watcher, TUI config-sync mtime poll) pass confirm=true so they do NOT prompt. Implementation: * tools/slash_confirm.py — module-level pending-state store used by all adapters and by the CLI prompt. Thread-safe register/resolve/clear. * gateway/platforms/base.py — send_slash_confirm hook (default 'Not supported' → text fallback). * gateway/run.py — _request_slash_confirm helper + text intercept in _handle_message (yields to in-progress tool-exec approvals so dangerous-command /approve still unblocks the tool thread first). Tests: * tests/tools/test_slash_confirm.py — primitive lifecycle + async resolution + double-click atomicity (16 tests). * tests/hermes_cli/test_mcp_reload_confirm_gate.py — default-config shape + deep-merge preserves user opt-out (5 tests). Targeted runs (hermetic): 89 passed (slash-confirm, config gate, existing agent cache, existing telegram approval buttons).
2026-05-02 02:01:47 +00:00 · 2026-04-29 21:20:53 -07:00 · 2026-04-29 21:20:53 -07:00 · 4d7fc0f37c
commit 4d7fc0f37c
parent 7fae87bc00
14 changed files with 1287 additions and 9 deletions
--- a/cli.py
+++ b/cli.py
@ -6286,8 +6286,10 @@ class HermesCLI:
            count = reload_env()
            print(f"  Reloaded .env ({count} var(s) updated)")
        elif canonical == "reload-mcp":
-            with self._busy_command(self._slow_command_status(cmd_original)):
-                self._reload_mcp()
+            # Interactive reload: confirm first (unless the user has opted out).
+            # The auto-reload path (file watcher) calls _reload_mcp directly
+            # without this confirmation.
+            self._confirm_and_reload_mcp(cmd_original)
        elif canonical == "reload-skills":
            with self._busy_command(self._slow_command_status(cmd_original)):
                self._reload_skills()
@ -7417,6 +7419,77 @@ class HermesCLI:
        if _reload_thread.is_alive():
            print("  ⚠️  MCP reload timed out (30s). Some servers may not have reconnected.")

+    def _confirm_and_reload_mcp(self, cmd_original: str = "") -> None:
+        """Interactive /reload-mcp — confirm with the user, then reload.
+
+        Reloading MCP tools invalidates the provider prompt cache for the
+        active session (tool schemas are baked into the system prompt).
+        The next message re-sends full input tokens — can be expensive on
+        long-context or high-reasoning models.
+
+        Three options: Approve Once, Always Approve (persists
+        ``approvals.mcp_reload_confirm: false`` so future reloads run
+        without this prompt), Cancel.  Gated by
+        ``approvals.mcp_reload_confirm`` — default on.
+        """
+        # Gate check — respects prior "Always Approve" clicks.
+        try:
+            cfg = load_cli_config()
+            approvals = cfg.get("approvals") if isinstance(cfg, dict) else None
+            confirm_required = True
+            if isinstance(approvals, dict):
+                confirm_required = bool(approvals.get("mcp_reload_confirm", True))
+        except Exception:
+            confirm_required = True
+
+        if not confirm_required:
+            with self._busy_command(self._slow_command_status(cmd_original)):
+                self._reload_mcp()
+            return
+
+        # Render warning + prompt.  Use a single-line prompt so the user
+        # sees the warning as output and types a response into the composer.
+        print()
+        print("⚠️  /reload-mcp — Prompt cache invalidation warning")
+        print()
+        print("  Reloading MCP servers rebuilds the tool set for this session and")
+        print("  invalidates the provider prompt cache.  The next message will")
+        print("  re-send full input tokens (can be expensive on long-context or")
+        print("  high-reasoning models).")
+        print()
+        print("  [1] Approve Once   — reload now")
+        print("  [2] Always Approve — reload now and silence this prompt permanently")
+        print("  [3] Cancel         — leave MCP tools unchanged")
+        print()
+        raw = self._prompt_text_input("Choice [1/2/3]: ")
+        if raw is None:
+            print("🟡 /reload-mcp cancelled (no input).")
+            return
+        choice_raw = raw.strip().lower()
+        if choice_raw in ("1", "once", "approve", "yes", "y", "ok"):
+            choice = "once"
+        elif choice_raw in ("2", "always", "remember"):
+            choice = "always"
+        elif choice_raw in ("3", "cancel", "nevermind", "no", "n", ""):
+            choice = "cancel"
+        else:
+            print(f"🟡 Unrecognized choice '{raw}'. /reload-mcp cancelled.")
+            return
+
+        if choice == "cancel":
+            print("🟡 /reload-mcp cancelled. MCP tools unchanged.")
+            return
+
+        if choice == "always":
+            if save_config_value("approvals.mcp_reload_confirm", False):
+                print("🔒 Future /reload-mcp calls will run without confirmation.")
+                print("   Re-enable via `approvals.mcp_reload_confirm: true` in config.yaml.")
+            else:
+                print("⚠️  Couldn't persist opt-out — reloading once.")
+
+        with self._busy_command(self._slow_command_status(cmd_original)):
+            self._reload_mcp()
+
    def _reload_mcp(self):
        """Reload MCP servers: disconnect all, re-read config.yaml, reconnect.

--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -1415,6 +1415,41 @@ class BasePlatformAdapter(ABC):
        """
        return False

+    async def send_slash_confirm(
+        self,
+        chat_id: str,
+        title: str,
+        message: str,
+        session_key: str,
+        confirm_id: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a three-option slash-command confirmation prompt.
+
+        Used by the gateway's generic slash-confirm primitive (see
+        ``GatewayRunner._request_slash_confirm``) for commands that have a
+        non-destructive but expensive side effect the user should explicitly
+        acknowledge — the current caller is ``/reload-mcp``, which
+        invalidates the provider prompt cache.
+
+        Platforms with inline-button support (Telegram, Discord, Slack,
+        Matrix, Feishu) should override this to render three buttons:
+        Approve Once / Always Approve / Cancel.  Button callbacks MUST be
+        routed back through the gateway by calling
+        ``GatewayRunner._resolve_slash_confirm(confirm_id, choice)`` where
+        ``choice`` is ``"once"`` / ``"always"`` / ``"cancel"``.
+
+        Platforms without button UIs leave this as the default and fall
+        through to the gateway's text fallback (which sends ``message`` as
+        plain text and intercepts the next ``/approve`` / ``/always`` /
+        ``/cancel`` reply).
+
+        ``confirm_id`` is a short string generated by the gateway; the
+        adapter stores it alongside any platform-specific state needed to
+        route the callback (e.g. Telegram's ``_approval_state`` dict).
+        """
+        return SendResult(success=False, error="Not supported")
+
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """
        Send a typing indicator.
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@ -2910,6 +2910,43 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            return SendResult(success=False, error=str(e))

+    async def send_slash_confirm(
+        self, chat_id: str, title: str, message: str, session_key: str,
+        confirm_id: str, metadata: Optional[dict] = None,
+    ) -> SendResult:
+        """Send a three-button slash-command confirmation prompt."""
+        if not self._client or not DISCORD_AVAILABLE:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            target_id = chat_id
+            if metadata and metadata.get("thread_id"):
+                target_id = metadata["thread_id"]
+
+            channel = self._client.get_channel(int(target_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(target_id))
+
+            # Embed description limit is 4096; message usually fits easily.
+            max_desc = 4088
+            body = message if len(message) <= max_desc else message[: max_desc - 3] + "..."
+            embed = discord.Embed(
+                title=title or "Confirm",
+                description=body,
+                color=discord.Color.orange(),
+            )
+
+            view = SlashConfirmView(
+                session_key=session_key,
+                confirm_id=confirm_id,
+                allowed_user_ids=self._allowed_user_ids,
+            )
+
+            msg = await channel.send(embed=embed, view=view)
+            return SendResult(success=True, message_id=str(msg.id))
+        except Exception as e:
+            return SendResult(success=False, error=str(e))
+
    async def send_update_prompt(
        self, chat_id: str, prompt: str, default: str = "",
        session_key: str = "",
@ -3643,6 +3680,103 @@ if DISCORD_AVAILABLE:
            for child in self.children:
                child.disabled = True

+    class SlashConfirmView(discord.ui.View):
+        """Three-button view for generic slash-command confirmations.
+
+        Used by ``/reload-mcp`` and any future slash command routed through
+        ``GatewayRunner._request_slash_confirm``.  Buttons map to the
+        gateway's three choices:
+
+          * "Approve Once"   → ``choice="once"``
+          * "Always Approve" → ``choice="always"``
+          * "Cancel"         → ``choice="cancel"``
+
+        Clicking calls the module-level
+        ``tools.slash_confirm.resolve(session_key, confirm_id, choice)``
+        which runs the handler the runner stored for this ``session_key``.
+        Only users in the adapter's allowlist can click.  Times out after
+        5 minutes (matches the gateway primitive's timeout).
+        """
+
+        def __init__(self, session_key: str, confirm_id: str, allowed_user_ids: set):
+            super().__init__(timeout=300)
+            self.session_key = session_key
+            self.confirm_id = confirm_id
+            self.allowed_user_ids = allowed_user_ids
+            self.resolved = False
+
+        def _check_auth(self, interaction: discord.Interaction) -> bool:
+            if not self.allowed_user_ids:
+                return True
+            return str(interaction.user.id) in self.allowed_user_ids
+
+        async def _resolve(
+            self, interaction: discord.Interaction, choice: str,
+            color: discord.Color, label: str,
+        ):
+            if self.resolved:
+                await interaction.response.send_message(
+                    "This prompt has already been resolved~", ephemeral=True,
+                )
+                return
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized to answer this prompt~", ephemeral=True,
+                )
+                return
+
+            self.resolved = True
+
+            embed = interaction.message.embeds[0] if interaction.message.embeds else None
+            if embed:
+                embed.color = color
+                embed.set_footer(text=f"{label} by {interaction.user.display_name}")
+
+            for child in self.children:
+                child.disabled = True
+
+            await interaction.response.edit_message(embed=embed, view=self)
+
+            # Resolve via the module-level primitive.  If the handler
+            # returns a follow-up message, post it in the same channel.
+            try:
+                from tools import slash_confirm as _slash_confirm_mod
+                result_text = await _slash_confirm_mod.resolve(
+                    self.session_key, self.confirm_id, choice,
+                )
+                if result_text:
+                    await interaction.followup.send(result_text)
+                logger.info(
+                    "Discord button resolved slash-confirm for session %s "
+                    "(choice=%s, user=%s)",
+                    self.session_key, choice, interaction.user.display_name,
+                )
+            except Exception as exc:
+                logger.error("Discord slash-confirm resolve failed: %s", exc, exc_info=True)
+
+        @discord.ui.button(label="Approve Once", style=discord.ButtonStyle.green)
+        async def approve_once(
+            self, interaction: discord.Interaction, button: discord.ui.Button,
+        ):
+            await self._resolve(interaction, "once", discord.Color.green(), "Approved once")
+
+        @discord.ui.button(label="Always Approve", style=discord.ButtonStyle.blurple)
+        async def approve_always(
+            self, interaction: discord.Interaction, button: discord.ui.Button,
+        ):
+            await self._resolve(interaction, "always", discord.Color.purple(), "Always approved")
+
+        @discord.ui.button(label="Cancel", style=discord.ButtonStyle.red)
+        async def cancel(
+            self, interaction: discord.Interaction, button: discord.ui.Button,
+        ):
+            await self._resolve(interaction, "cancel", discord.Color.greyple(), "Cancelled")
+
+        async def on_timeout(self):
+            self.resolved = True
+            for child in self.children:
+                child.disabled = True
+
    class UpdatePromptView(discord.ui.View):
        """Interactive Yes/No buttons for ``hermes update`` prompts.

--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@ -514,6 +514,15 @@ class SlackAdapter(BasePlatformAdapter):
            ):
                self._app.action(_action_id)(self._handle_approval_action)

+            # Register Block Kit action handlers for slash-confirm buttons
+            # (generic three-option prompts; see tools/slash_confirm.py).
+            for _action_id in (
+                "hermes_confirm_once",
+                "hermes_confirm_always",
+                "hermes_confirm_cancel",
+            ):
+                self._app.action(_action_id)(self._handle_slash_confirm_action)
+
            # Start Socket Mode handler in background
            self._handler = AsyncSocketModeHandler(self._app, app_token, proxy=proxy_url)
            _apply_slack_proxy(self._handler.client, proxy_url)
@ -1931,6 +1940,168 @@ class SlackAdapter(BasePlatformAdapter):
            logger.error("[Slack] send_exec_approval failed: %s", e, exc_info=True)
            return SendResult(success=False, error=str(e))

+    async def send_slash_confirm(
+        self, chat_id: str, title: str, message: str, session_key: str,
+        confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a Block Kit three-option slash-command confirmation prompt."""
+        if not self._app:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            body = message[:2900] + "..." if len(message) > 2900 else message
+            thread_ts = self._resolve_thread_ts(None, metadata)
+            # Encode session_key and confirm_id into the button value so the
+            # callback handler can resolve without extra bookkeeping.
+            value = f"{session_key}|{confirm_id}"
+
+            blocks = [
+                {
+                    "type": "section",
+                    "text": {
+                        "type": "mrkdwn",
+                        "text": f"*{title or 'Confirm'}*\n\n{body}",
+                    },
+                },
+                {
+                    "type": "actions",
+                    "elements": [
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Approve Once"},
+                            "style": "primary",
+                            "action_id": "hermes_confirm_once",
+                            "value": value,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Always Approve"},
+                            "action_id": "hermes_confirm_always",
+                            "value": value,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Cancel"},
+                            "style": "danger",
+                            "action_id": "hermes_confirm_cancel",
+                            "value": value,
+                        },
+                    ],
+                },
+            ]
+
+            kwargs: Dict[str, Any] = {
+                "channel": chat_id,
+                "text": f"{title or 'Confirm'}: {body[:100]}",
+                "blocks": blocks,
+            }
+            if thread_ts:
+                kwargs["thread_ts"] = thread_ts
+
+            result = await self._get_client(chat_id).chat_postMessage(**kwargs)
+            return SendResult(success=True, message_id=result.get("ts", ""), raw_response=result)
+        except Exception as e:
+            logger.error("[Slack] send_slash_confirm failed: %s", e, exc_info=True)
+            return SendResult(success=False, error=str(e))
+
+    async def _handle_slash_confirm_action(self, ack, body, action) -> None:
+        """Handle a slash-confirm button click from Block Kit."""
+        await ack()
+
+        action_id = action.get("action_id", "")
+        value = action.get("value", "")
+        message = body.get("message", {})
+        msg_ts = message.get("ts", "")
+        channel_id = body.get("channel", {}).get("id", "")
+        user_name = body.get("user", {}).get("name", "unknown")
+        user_id = body.get("user", {}).get("id", "")
+
+        # Authorization — reuse the exec-approval allowlist.
+        allowed_csv = os.getenv("SLACK_ALLOWED_USERS", "").strip()
+        if allowed_csv:
+            allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
+            if "*" not in allowed_ids and user_id not in allowed_ids:
+                logger.warning(
+                    "[Slack] Unauthorized slash-confirm click by %s (%s) — ignoring",
+                    user_name, user_id,
+                )
+                return
+
+        # Parse session_key|confirm_id back out
+        if "|" not in value:
+            logger.warning("[Slack] Malformed slash-confirm value: %s", value)
+            return
+        session_key, confirm_id = value.split("|", 1)
+
+        choice_map = {
+            "hermes_confirm_once": "once",
+            "hermes_confirm_always": "always",
+            "hermes_confirm_cancel": "cancel",
+        }
+        choice = choice_map.get(action_id, "cancel")
+
+        label_map = {
+            "once": f"✅ Approved once by {user_name}",
+            "always": f"🔒 Always approved by {user_name}",
+            "cancel": f"❌ Cancelled by {user_name}",
+        }
+        decision_text = label_map.get(choice, f"Resolved by {user_name}")
+
+        # Pull original prompt body out of the section block so we can show
+        # the decision inline without losing context.
+        original_text = ""
+        for block in message.get("blocks", []):
+            if block.get("type") == "section":
+                original_text = block.get("text", {}).get("text", "")
+                break
+
+        updated_blocks = [
+            {
+                "type": "section",
+                "text": {
+                    "type": "mrkdwn",
+                    "text": original_text or "Confirmation prompt",
+                },
+            },
+            {
+                "type": "context",
+                "elements": [
+                    {"type": "mrkdwn", "text": decision_text},
+                ],
+            },
+        ]
+
+        try:
+            await self._get_client(channel_id).chat_update(
+                channel=channel_id,
+                ts=msg_ts,
+                text=decision_text,
+                blocks=updated_blocks,
+            )
+        except Exception as e:
+            logger.warning("[Slack] Failed to update slash-confirm message: %s", e)
+
+        # Resolve via the module-level primitive and post any follow-up.
+        try:
+            from tools import slash_confirm as _slash_confirm_mod
+            result_text = await _slash_confirm_mod.resolve(session_key, confirm_id, choice)
+            if result_text:
+                post_kwargs: Dict[str, Any] = {
+                    "channel": channel_id,
+                    "text": result_text,
+                }
+                # Inherit the thread so the reply stays in the same place.
+                thread_ts = message.get("thread_ts") or msg_ts
+                if thread_ts:
+                    post_kwargs["thread_ts"] = thread_ts
+                await self._get_client(channel_id).chat_postMessage(**post_kwargs)
+            logger.info(
+                "Slack button resolved slash-confirm for session %s (choice=%s, user=%s)",
+                session_key, choice, user_name,
+            )
+        except Exception as exc:
+            logger.error("Failed to resolve slash-confirm from Slack button: %s", exc, exc_info=True)
+
    async def _handle_approval_action(self, ack, body, action) -> None:
        """Handle an approval button click from Block Kit."""
        await ack()
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -286,6 +286,9 @@ class TelegramAdapter(BasePlatformAdapter):
        self._model_picker_state: Dict[str, dict] = {}
        # Approval button state: message_id → session_key
        self._approval_state: Dict[int, str] = {}
+        # Slash-confirm button state: confirm_id → session_key (for /reload-mcp
+        # and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
+        self._slash_confirm_state: Dict[str, str] = {}

    @staticmethod
    def _is_callback_user_authorized(user_id: str) -> bool:
@ -1411,6 +1414,48 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.warning("[%s] send_exec_approval failed: %s", self.name, e)
            return SendResult(success=False, error=str(e))

+    async def send_slash_confirm(
+        self, chat_id: str, title: str, message: str, session_key: str,
+        confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Render a three-button slash-command confirmation prompt."""
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            # Message body: render as plain text (message already contains
+            # markdown formatting from the gateway primitive).
+            preview = message if len(message) <= 3800 else message[:3800] + "..."
+
+            keyboard = InlineKeyboardMarkup([
+                [
+                    InlineKeyboardButton("✅ Approve Once", callback_data=f"sc:once:{confirm_id}"),
+                    InlineKeyboardButton("🔒 Always Approve", callback_data=f"sc:always:{confirm_id}"),
+                ],
+                [
+                    InlineKeyboardButton("❌ Cancel", callback_data=f"sc:cancel:{confirm_id}"),
+                ],
+            ])
+
+            thread_id = self._metadata_thread_id(metadata)
+            kwargs: Dict[str, Any] = {
+                "chat_id": int(chat_id),
+                "text": preview,
+                "parse_mode": ParseMode.MARKDOWN,
+                "reply_markup": keyboard,
+                **self._link_preview_kwargs(),
+            }
+            message_thread_id = self._message_thread_id_for_send(thread_id)
+            if message_thread_id is not None:
+                kwargs["message_thread_id"] = message_thread_id
+
+            msg = await self._bot.send_message(**kwargs)
+            self._slash_confirm_state[confirm_id] = session_key
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            logger.warning("[%s] send_slash_confirm failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
    async def send_model_picker(
        self,
        chat_id: str,
@ -1779,6 +1824,68 @@ class TelegramAdapter(BasePlatformAdapter):
                    logger.error("Failed to resolve gateway approval from Telegram button: %s", exc)
            return

+        # --- Slash-confirm callbacks (sc:choice:confirm_id) ---
+        if data.startswith("sc:"):
+            parts = data.split(":", 2)
+            if len(parts) == 3:
+                choice = parts[1]  # once, always, cancel
+                confirm_id = parts[2]
+
+                caller_id = str(getattr(query.from_user, "id", "")) 
+                if not self._is_callback_user_authorized(caller_id):
+                    await query.answer(text="⛔ You are not authorized to answer this prompt.")
+                    return
+
+                session_key = self._slash_confirm_state.pop(confirm_id, None)
+                if not session_key:
+                    await query.answer(text="This prompt has already been resolved.")
+                    return
+
+                label_map = {
+                    "once": "✅ Approved once",
+                    "always": "🔒 Always approve",
+                    "cancel": "❌ Cancelled",
+                }
+                user_display = getattr(query.from_user, "first_name", "User")
+                label = label_map.get(choice, "Resolved")
+
+                await query.answer(text=label)
+
+                try:
+                    await query.edit_message_text(
+                        text=f"{label} by {user_display}",
+                        parse_mode=ParseMode.MARKDOWN,
+                        reply_markup=None,
+                    )
+                except Exception:
+                    pass
+
+                # Resolve via the module-level primitive.  The runner stored
+                # a handler keyed by session_key; we run it on the event
+                # loop and (if it returns a string) send it as a follow-up
+                # message in the same chat.
+                try:
+                    from tools import slash_confirm as _slash_confirm_mod
+                    result_text = await _slash_confirm_mod.resolve(
+                        session_key, confirm_id, choice,
+                    )
+                    if result_text and query.message:
+                        # Inherit the prompt message's thread so the reply
+                        # lands in the same supergroup topic / reply chain.
+                        thread_id = getattr(query.message, "message_thread_id", None)
+                        send_kwargs: Dict[str, Any] = {
+                            "chat_id": int(query.message.chat_id),
+                            "text": result_text,
+                            "parse_mode": ParseMode.MARKDOWN,
+                            **self._link_preview_kwargs(),
+                        }
+                        if thread_id is not None:
+                            send_kwargs["message_thread_id"] = thread_id
+                        await self._bot.send_message(**send_kwargs)
+                except Exception as exc:
+                    logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
+            return
+
        # --- Update prompt callbacks ---
        if not data.startswith("update_prompt:"):
            return
--- a/gateway/run.py
+++ b/gateway/run.py
@ -892,6 +892,14 @@ class GatewayRunner:
        # Key: session_key, Value: True when a prompt is waiting for user input.
        self._update_prompt_pending: Dict[str, bool] = {}

+        # Slash-confirm state lives in tools.slash_confirm (module-level),
+        # so platform adapters can resolve callbacks without a backref to
+        # this runner.  Keep a local counter for confirm_id generation so
+        # IDs stay compact (button callback_data has a 64-byte cap on
+        # some platforms).
+        import itertools as _itertools
+        self._slash_confirm_counter = _itertools.count(1)
+
        # Persistent Honcho managers keyed by gateway session key.
        # This preserves write_frequency="session" semantics across short-lived
        # per-message AIAgent instances.
@ -3805,6 +3813,50 @@ class GatewayRunner:
                    )
                _update_prompts.pop(_quick_key, None)

+        # Intercept messages that are responses to a pending /reload-mcp
+        # (or future) slash-confirm prompt.  Recognized confirm replies are
+        # /approve, /always, /cancel (plus short aliases).  Anything else
+        # falls through to normal dispatch — a stale pending confirm does
+        # NOT block other commands.
+        #
+        # Important: if a dangerous-command approval is ALSO pending (agent
+        # blocked inside tools/approval.py), the tool approval takes
+        # precedence — /approve there unblocks the waiting tool thread.
+        # Slash-confirm only catches /approve when no tool approval is live.
+        from tools import slash_confirm as _slash_confirm_mod
+        _pending_confirm = _slash_confirm_mod.get_pending(_quick_key)
+        _tool_approval_live = False
+        try:
+            from tools.approval import has_blocking_approval
+            _tool_approval_live = has_blocking_approval(_quick_key)
+        except Exception:
+            _tool_approval_live = False
+        if _pending_confirm and not _tool_approval_live:
+            _raw_reply = (event.text or "").strip()
+            _cmd_reply = event.get_command()
+            _confirm_choice = None
+            if _cmd_reply in ("approve", "yes", "ok", "confirm"):
+                _confirm_choice = "once"
+            elif _cmd_reply in ("always", "remember"):
+                _confirm_choice = "always"
+            elif _cmd_reply in ("cancel", "no", "deny", "nevermind"):
+                _confirm_choice = "cancel"
+            elif _raw_reply.lower() in ("approve", "approve once", "once"):
+                _confirm_choice = "once"
+            elif _raw_reply.lower() in ("always", "always approve"):
+                _confirm_choice = "always"
+            elif _raw_reply.lower() in ("cancel", "nevermind", "no"):
+                _confirm_choice = "cancel"
+            if _confirm_choice is not None:
+                _resolved = await _slash_confirm_mod.resolve(
+                    _quick_key, _pending_confirm.get("confirm_id"), _confirm_choice,
+                )
+                return _resolved or ""
+            # Stale pending + unrelated command: drop the pending state so
+            # the confirm doesn't block normal usage indefinitely.  The user
+            # clearly moved on.
+            _slash_confirm_mod.clear_if_stale(_quick_key)
+
        # PRIORITY handling when an agent is already running for this session.
        # Default behavior is to interrupt immediately so user text/stop messages
        # are handled with minimal latency.
@ -8200,8 +8252,91 @@ class GatewayRunner:
            logger.error("Insights command error: %s", e, exc_info=True)
            return f"Error generating insights: {e}"

-    async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
-        """Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
+    async def _handle_reload_mcp_command(self, event: MessageEvent) -> Optional[str]:
+        """Handle /reload-mcp — reconnect MCP servers and rebuild the cached agent.
+
+        Reloading MCP tools invalidates the provider prompt cache for the
+        active session (tool schemas are baked into the system prompt).  The
+        next message re-sends full input tokens, which is expensive on
+        long-context or high-reasoning models.
+
+        To surface that cost, the command routes through the slash-confirm
+        primitive: users get an Approve Once / Always Approve / Cancel
+        prompt before the reload actually runs.  "Always Approve" persists
+        ``approvals.mcp_reload_confirm: false`` so the prompt is silenced
+        for subsequent reloads in any session.
+
+        Users can also skip the confirm by flipping the config key directly.
+        """
+        source = event.source
+        session_key = self._session_key_for_source(source)
+
+        # Read the gate fresh from disk so a prior "always" click takes
+        # effect on the next invocation without restarting the gateway.
+        user_config = self._read_user_config()
+        approvals = user_config.get("approvals") if isinstance(user_config, dict) else None
+        confirm_required = True
+        if isinstance(approvals, dict):
+            confirm_required = bool(approvals.get("mcp_reload_confirm", True))
+
+        if not confirm_required:
+            return await self._execute_mcp_reload(event)
+
+        # Route through slash-confirm.  The primitive sends the prompt and
+        # stores the resume handler; the button/text response triggers
+        # ``_resolve_slash_confirm`` which invokes the handler with the
+        # chosen outcome.
+        async def _on_confirm(choice: str) -> Optional[str]:
+            if choice == "cancel":
+                return "🟡 /reload-mcp cancelled. MCP tools unchanged."
+            if choice == "always":
+                # Persist the opt-out and run the reload.
+                try:
+                    from cli import save_config_value
+                    save_config_value("approvals.mcp_reload_confirm", False)
+                    logger.info(
+                        "User opted out of /reload-mcp confirmation (session=%s)",
+                        session_key,
+                    )
+                except Exception as exc:
+                    logger.warning("Failed to persist mcp_reload_confirm=false: %s", exc)
+            # once / always → run the reload
+            result = await self._execute_mcp_reload(event)
+            if choice == "always":
+                return (
+                    f"{result}\n\n"
+                    "ℹ️ Future `/reload-mcp` calls will run without confirmation. "
+                    "Re-enable via `approvals.mcp_reload_confirm: true` in config.yaml."
+                )
+            return result
+
+        prompt_message = (
+            "⚠️ **Confirm /reload-mcp**\n\n"
+            "Reloading MCP servers rebuilds the tool set for this session "
+            "and **invalidates the provider prompt cache** — the next "
+            "message will re-send full input tokens.  On long-context or "
+            "high-reasoning models this can be expensive.\n\n"
+            "Choose:\n"
+            "• **Approve Once** — reload now\n"
+            "• **Always Approve** — reload now and silence this prompt permanently\n"
+            "• **Cancel** — leave MCP tools unchanged\n\n"
+            "_Text fallback: reply `/approve`, `/always`, or `/cancel`._"
+        )
+        return await self._request_slash_confirm(
+            event=event,
+            command="reload-mcp",
+            title="/reload-mcp",
+            message=prompt_message,
+            handler=_on_confirm,
+        )
+
+    async def _execute_mcp_reload(self, event: MessageEvent) -> str:
+        """Actually disconnect, reconnect, and notify MCP tool changes.
+
+        Split out from ``_handle_reload_mcp_command`` so the confirmation
+        wrapper can invoke the same path whether the user confirmed via
+        button, text reply, or has the confirm gate disabled.
+        """
        loop = asyncio.get_running_loop()
        try:
            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _servers, _lock
@ -8343,6 +8478,102 @@ class GatewayRunner:
            logger.warning("Skills reload failed: %s", e)
            return f"❌ Skills reload failed: {e}"

+    # ------------------------------------------------------------------
+    # Slash-command confirmation primitive (generic)
+    # ------------------------------------------------------------------
+    # Used by slash commands that have a non-destructive but expensive
+    # side effect worth an explicit user confirmation (currently only
+    # /reload-mcp, which invalidates the prompt cache).  Two delivery
+    # paths:
+    #   1. Button UI — adapters that override ``send_slash_confirm``
+    #      (Telegram, Discord, Slack, Matrix, Feishu) render three
+    #      inline buttons.  The adapter routes the button click back via
+    #      ``tools.slash_confirm.resolve(session_key, confirm_id, choice)``.
+    #   2. Text fallback — adapters that don't override the hook get a
+    #      plain text prompt.  Users reply with /approve, /always, or
+    #      /cancel; the early intercept in ``_handle_message`` matches
+    #      those replies against ``tools.slash_confirm.get_pending()``.
+
+    async def _request_slash_confirm(
+        self,
+        *,
+        event: MessageEvent,
+        command: str,
+        title: str,
+        message: str,
+        handler,
+    ) -> Optional[str]:
+        """Ask the user to confirm an expensive slash command.
+
+        ``handler`` is an async callable ``handler(choice: str) -> str``
+        where ``choice`` is ``"once"``, ``"always"``, or ``"cancel"``.
+        The handler runs on the event loop when the user responds; its
+        return value is sent back as a gateway message.
+
+        Returns a short acknowledgment string to send immediately (before
+        the user's response).  If buttons rendered successfully the ack
+        is ``None`` (buttons are self-explanatory); if we fell back to
+        text the message itself IS the ack.
+        """
+        from tools import slash_confirm as _slash_confirm_mod
+
+        source = event.source
+        session_key = self._session_key_for_source(source)
+        confirm_id = f"{next(self._slash_confirm_counter)}"
+
+        # Register the pending confirm FIRST so a super-fast button click
+        # cannot race the send_slash_confirm return.
+        _slash_confirm_mod.register(session_key, confirm_id, command, handler)
+
+        adapter = self.adapters.get(source.platform)
+        metadata = self._thread_metadata_for_source(source)
+
+        used_buttons = False
+        if adapter is not None:
+            try:
+                button_result = await adapter.send_slash_confirm(
+                    chat_id=source.chat_id,
+                    title=title,
+                    message=message,
+                    session_key=session_key,
+                    confirm_id=confirm_id,
+                    metadata=metadata,
+                )
+                if button_result and getattr(button_result, "success", False):
+                    used_buttons = True
+            except Exception as exc:
+                logger.debug(
+                    "send_slash_confirm failed for %s on %s: %s",
+                    command, source.platform, exc,
+                )
+
+        if used_buttons:
+            # Buttons rendered — no redundant text ack.
+            return None
+        # Text fallback — return the prompt message as the direct reply.
+        return message
+
+    def _read_user_config(self) -> Dict[str, Any]:
+        """Read the user's raw config.yaml (cached) for gate lookups.
+
+        Used by slash-confirm gates that must reflect on-disk state changes
+        (e.g. a prior "Always Approve" click) without a gateway restart.
+        """
+        try:
+            from hermes_cli.config import load_config
+            cfg = load_config()
+            return cfg if isinstance(cfg, dict) else {}
+        except Exception:
+            return {}
+
+    def _thread_metadata_for_source(self, source) -> Optional[Dict[str, Any]]:
+        """Build the metadata dict platforms need for thread-aware replies."""
+        thread_id = getattr(source, "thread_id", None)
+        if thread_id is None:
+            return None
+        return {"thread_id": thread_id}
+
+
    # ------------------------------------------------------------------
    # /approve & /deny — explicit dangerous-command approval
    # ------------------------------------------------------------------
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -1018,6 +1018,14 @@ DEFAULT_CONFIG = {
        "mode": "manual",
        "timeout": 60,
        "cron_mode": "deny",
+        # When true, /reload-mcp asks the user to confirm before rebuilding
+        # the MCP tool set for the active session.  Reloading invalidates
+        # the provider prompt cache (tool schemas are baked into the system
+        # prompt), so the next message re-sends full input tokens — this can
+        # be expensive on long-context or high-reasoning models.  Users click
+        # "Always Approve" to silence the prompt permanently; that flips
+        # this key to false.
+        "mcp_reload_confirm": True,
    },

    # Permanently allowed dangerous command patterns (added via "always" approval)
--- a/tests/hermes_cli/test_mcp_reload_confirm_gate.py
+++ b/tests/hermes_cli/test_mcp_reload_confirm_gate.py
@ -0,0 +1,91 @@
+"""Tests for the approvals.mcp_reload_confirm config gate.
+
+When the user runs /reload-mcp, the MCP tool set is rebuilt which
+invalidates the provider prompt cache for the active session.  That's
+expensive on long-context / high-reasoning models.  The config gate
+adds a three-option confirmation (Approve Once / Always Approve /
+Cancel); "Always Approve" flips this key to false so subsequent reloads
+run silently.
+"""
+
+from __future__ import annotations
+
+from copy import deepcopy
+
+from hermes_cli.config import DEFAULT_CONFIG
+
+
+class TestMcpReloadConfirmDefault:
+    def test_default_config_has_the_key(self):
+        approvals = DEFAULT_CONFIG.get("approvals")
+        assert isinstance(approvals, dict)
+        assert "mcp_reload_confirm" in approvals
+
+    def test_default_is_true(self):
+        # New installs confirm by default — this is the safe behavior.
+        assert DEFAULT_CONFIG["approvals"]["mcp_reload_confirm"] is True
+
+    def test_shape_matches_other_approval_keys(self):
+        # Same flat dict level as `mode` / `timeout` / `cron_mode`.
+        approvals = DEFAULT_CONFIG["approvals"]
+        assert isinstance(approvals.get("mode"), str)
+        assert isinstance(approvals.get("timeout"), int)
+        assert isinstance(approvals.get("cron_mode"), str)
+        assert isinstance(approvals.get("mcp_reload_confirm"), bool)
+
+
+class TestUserConfigMerge:
+    """If a user has a pre-existing config without this key, load_config
+    should fill it in from DEFAULT_CONFIG (deep merge preserves keys the
+    user didn't override).
+    """
+
+    def test_existing_user_config_without_key_gets_default(self, tmp_path, monkeypatch):
+        import yaml
+
+        # Simulate a legacy user config without the new key.
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        cfg_path = home / "config.yaml"
+        legacy = {
+            "approvals": {"mode": "manual", "timeout": 60, "cron_mode": "deny"},
+        }
+        cfg_path.write_text(yaml.safe_dump(legacy))
+
+        monkeypatch.setenv("HERMES_HOME", str(home))
+        # Force a fresh reimport of config.py so the HERMES_HOME is honored.
+        import importlib
+        import hermes_cli.config as cfg_mod
+        importlib.reload(cfg_mod)
+
+        cfg = cfg_mod.load_config()
+        assert cfg["approvals"]["mcp_reload_confirm"] is True
+
+    def test_existing_user_config_with_false_key_survives_merge(
+        self, tmp_path, monkeypatch,
+    ):
+        """A user who has clicked "Always Approve" (key=false) must keep
+        that setting across reloads — the default_true value must not win.
+        """
+        import yaml
+
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        cfg_path = home / "config.yaml"
+        user_cfg = {
+            "approvals": {
+                "mode": "manual",
+                "timeout": 60,
+                "cron_mode": "deny",
+                "mcp_reload_confirm": False,
+            },
+        }
+        cfg_path.write_text(yaml.safe_dump(user_cfg))
+
+        monkeypatch.setenv("HERMES_HOME", str(home))
+        import importlib
+        import hermes_cli.config as cfg_mod
+        importlib.reload(cfg_mod)
+
+        cfg = cfg_mod.load_config()
+        assert cfg["approvals"]["mcp_reload_confirm"] is False
--- a/tests/tools/test_slash_confirm.py
+++ b/tests/tools/test_slash_confirm.py
@ -0,0 +1,197 @@
+"""Tests for tools/slash_confirm.py — the generic slash-command confirmation primitive.
+
+Covers register/resolve/clear lifecycle, stale-entry behavior, confirm_id
+mismatch, handler exceptions, and async resolution.
+"""
+
+import asyncio
+import time
+
+import pytest
+
+from tools import slash_confirm
+
+
+@pytest.fixture(autouse=True)
+def _clean_pending():
+    """Every test gets a clean primitive state."""
+    slash_confirm._pending.clear()
+    yield
+    slash_confirm._pending.clear()
+
+
+class TestRegisterAndGetPending:
+    def test_register_stores_entry(self):
+        async def handler(choice):
+            return f"got {choice}"
+
+        slash_confirm.register("sess1", "cid1", "reload-mcp", handler)
+
+        pending = slash_confirm.get_pending("sess1")
+        assert pending is not None
+        assert pending["confirm_id"] == "cid1"
+        assert pending["command"] == "reload-mcp"
+        assert pending["handler"] is handler
+        assert "created_at" in pending
+
+    def test_get_pending_missing_returns_none(self):
+        assert slash_confirm.get_pending("nobody") is None
+
+    def test_register_supersedes_prior_entry(self):
+        async def h1(choice):
+            return "first"
+
+        async def h2(choice):
+            return "second"
+
+        slash_confirm.register("sess1", "cid1", "reload-mcp", h1)
+        slash_confirm.register("sess1", "cid2", "reload-mcp", h2)
+
+        pending = slash_confirm.get_pending("sess1")
+        assert pending["confirm_id"] == "cid2"
+        assert pending["handler"] is h2
+
+    def test_get_pending_returns_copy_not_reference(self):
+        async def h(choice):
+            return "x"
+
+        slash_confirm.register("sess1", "cid1", "cmd", h)
+
+        p1 = slash_confirm.get_pending("sess1")
+        p1["command"] = "mutated"
+
+        p2 = slash_confirm.get_pending("sess1")
+        assert p2["command"] == "cmd"
+
+
+class TestResolve:
+    @pytest.mark.asyncio
+    async def test_resolve_runs_handler_and_pops_entry(self):
+        calls = []
+
+        async def handler(choice):
+            calls.append(choice)
+            return f"resolved {choice}"
+
+        slash_confirm.register("sess1", "cid1", "reload-mcp", handler)
+
+        result = await slash_confirm.resolve("sess1", "cid1", "once")
+        assert result == "resolved once"
+        assert calls == ["once"]
+
+        # Entry should be popped.
+        assert slash_confirm.get_pending("sess1") is None
+
+    @pytest.mark.asyncio
+    async def test_resolve_no_pending_returns_none(self):
+        result = await slash_confirm.resolve("sess1", "cid1", "once")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_resolve_confirm_id_mismatch_returns_none(self):
+        async def handler(choice):
+            return "should not run"
+
+        slash_confirm.register("sess1", "cid_real", "cmd", handler)
+
+        result = await slash_confirm.resolve("sess1", "cid_wrong", "once")
+        assert result is None
+
+        # Stale entry should still be present (mismatch doesn't pop).
+        assert slash_confirm.get_pending("sess1") is not None
+
+    @pytest.mark.asyncio
+    async def test_resolve_stale_entry_returns_none(self):
+        async def handler(choice):
+            return "should not run"
+
+        slash_confirm.register("sess1", "cid1", "cmd", handler)
+        # Force entry age past timeout
+        slash_confirm._pending["sess1"]["created_at"] = time.time() - 10000
+
+        result = await slash_confirm.resolve("sess1", "cid1", "once")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_resolve_handler_exception_returns_error_string(self):
+        async def handler(choice):
+            raise RuntimeError("boom")
+
+        slash_confirm.register("sess1", "cid1", "cmd", handler)
+
+        result = await slash_confirm.resolve("sess1", "cid1", "once")
+        assert result is not None
+        assert "boom" in result
+        # Entry should still be popped even when handler raises.
+        assert slash_confirm.get_pending("sess1") is None
+
+    @pytest.mark.asyncio
+    async def test_resolve_non_string_return_becomes_none(self):
+        async def handler(choice):
+            return {"not": "a string"}
+
+        slash_confirm.register("sess1", "cid1", "cmd", handler)
+        result = await slash_confirm.resolve("sess1", "cid1", "once")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_resolve_double_click_only_runs_handler_once(self):
+        calls = []
+
+        async def handler(choice):
+            calls.append(choice)
+            return "ran"
+
+        slash_confirm.register("sess1", "cid1", "cmd", handler)
+
+        # Simulate two near-simultaneous button clicks.
+        r1, r2 = await asyncio.gather(
+            slash_confirm.resolve("sess1", "cid1", "once"),
+            slash_confirm.resolve("sess1", "cid1", "once"),
+        )
+        # Exactly one should have run the handler.
+        assert calls == ["once"]
+        assert (r1 == "ran") ^ (r2 == "ran")
+
+
+class TestClear:
+    def test_clear_removes_entry(self):
+        async def h(c):
+            return "x"
+
+        slash_confirm.register("sess1", "cid1", "cmd", h)
+        assert slash_confirm.get_pending("sess1") is not None
+
+        slash_confirm.clear("sess1")
+        assert slash_confirm.get_pending("sess1") is None
+
+    def test_clear_missing_is_noop(self):
+        # Should not raise.
+        slash_confirm.clear("nobody")
+
+
+class TestClearIfStale:
+    def test_clears_stale_entry(self):
+        async def h(c):
+            return "x"
+
+        slash_confirm.register("sess1", "cid1", "cmd", h)
+        slash_confirm._pending["sess1"]["created_at"] = time.time() - 10000
+
+        cleared = slash_confirm.clear_if_stale("sess1", timeout=300)
+        assert cleared is True
+        assert slash_confirm.get_pending("sess1") is None
+
+    def test_preserves_fresh_entry(self):
+        async def h(c):
+            return "x"
+
+        slash_confirm.register("sess1", "cid1", "cmd", h)
+
+        cleared = slash_confirm.clear_if_stale("sess1", timeout=300)
+        assert cleared is False
+        assert slash_confirm.get_pending("sess1") is not None
+
+    def test_returns_false_for_missing_entry(self):
+        cleared = slash_confirm.clear_if_stale("nobody")
+        assert cleared is False
--- a/tools/slash_confirm.py
+++ b/tools/slash_confirm.py
@ -0,0 +1,162 @@
+"""Generic slash-command confirmation primitive (gateway-side).
+
+Slash commands that have a non-destructive but expensive side effect worth
+surfacing to the user (currently only ``/reload-mcp``, which invalidates
+the provider prompt cache) route through this module.
+
+Two delivery paths:
+
+  1. Button UI — adapters that override ``send_slash_confirm`` render
+     three inline buttons (Approve Once / Always Approve / Cancel).  The
+     button callback calls ``resolve(session_key, confirm_id, choice)``.
+
+  2. Text fallback — adapters without button UIs get a plain text prompt.
+     Users reply with ``/approve``, ``/always``, or ``/cancel``; the
+     gateway's ``_handle_message`` intercepts those replies and calls
+     ``resolve()`` directly.
+
+State is stored module-level (like ``tools.approval``) so platform
+adapters can resolve callbacks without needing a backreference to the
+``GatewayRunner`` instance.  The CLI path (``cli.py``) uses a local
+synchronous variant — see ``_prompt_slash_confirm`` there.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import threading
+import time
+from typing import Any, Awaitable, Callable, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+# Pending confirmations keyed by gateway session_key.  Each entry:
+#   {
+#       "confirm_id": str,
+#       "command":    str,                       # e.g. "reload-mcp"
+#       "handler":    Callable[[str], Awaitable[Optional[str]]],
+#       "created_at": float,                     # time.time()
+#   }
+_pending: Dict[str, Dict[str, Any]] = {}
+_lock = threading.RLock()
+
+# Default timeout — a pending confirm older than this is discarded when
+# the next message arrives for the same session.  Buttons work up until
+# the adapter drops the callback_data (Telegram: ~48h; Discord: ephemeral;
+# Slack: 3s ack + long-lived actions).
+DEFAULT_TIMEOUT_SECONDS = 300
+
+
+def register(
+    session_key: str,
+    confirm_id: str,
+    command: str,
+    handler: Callable[[str], Awaitable[Optional[str]]],
+) -> None:
+    """Register a pending slash-command confirmation.
+
+    Overwrites any prior pending confirm for the same ``session_key`` — the
+    user invoking a new confirmable command supersedes the stale one.
+    """
+    with _lock:
+        _pending[session_key] = {
+            "confirm_id": confirm_id,
+            "command": command,
+            "handler": handler,
+            "created_at": time.time(),
+        }
+
+
+def get_pending(session_key: str) -> Optional[Dict[str, Any]]:
+    """Return the pending confirm dict for a session, or None."""
+    with _lock:
+        entry = _pending.get(session_key)
+        return dict(entry) if entry else None
+
+
+def clear(session_key: str) -> None:
+    """Drop the pending confirm for ``session_key`` without running it."""
+    with _lock:
+        _pending.pop(session_key, None)
+
+
+def clear_if_stale(session_key: str, timeout: float = DEFAULT_TIMEOUT_SECONDS) -> bool:
+    """Drop the pending confirm if older than ``timeout`` seconds.
+
+    Returns True if an entry was dropped.
+    """
+    with _lock:
+        entry = _pending.get(session_key)
+        if not entry:
+            return False
+        if time.time() - float(entry.get("created_at", 0) or 0) > timeout:
+            _pending.pop(session_key, None)
+            return True
+        return False
+
+
+async def resolve(
+    session_key: str,
+    confirm_id: str,
+    choice: str,
+    timeout: float = DEFAULT_TIMEOUT_SECONDS,
+) -> Optional[str]:
+    """Resolve a pending confirm.
+
+    ``choice`` must be one of ``"once"``, ``"always"``, or ``"cancel"``.
+    Returns the handler's output string (to be sent as a follow-up
+    message), or ``None`` if the confirm was stale, already resolved, or
+    the confirm_id doesn't match.
+
+    Safe to call from an asyncio callback (button click) or from the
+    gateway's message intercept path.
+    """
+    with _lock:
+        entry = _pending.get(session_key)
+        if not entry:
+            return None
+        if entry.get("confirm_id") != confirm_id:
+            # Stale confirm_id — superseded by a newer prompt on the same session.
+            return None
+        # Pop before we run the handler to prevent duplicate callbacks
+        # (e.g. button double-click) from running it twice.
+        _pending.pop(session_key, None)
+        if time.time() - float(entry.get("created_at", 0) or 0) > timeout:
+            return None
+        handler = entry.get("handler")
+        command = entry.get("command", "?")
+
+    if not handler:
+        return None
+    try:
+        result = await handler(choice)
+    except Exception as exc:
+        logger.error(
+            "Slash-confirm handler for /%s raised: %s",
+            command, exc, exc_info=True,
+        )
+        return f"❌ Error handling confirmation: {exc}"
+    return result if isinstance(result, str) else None
+
+
+def resolve_sync_compat(
+    loop: asyncio.AbstractEventLoop,
+    session_key: str,
+    confirm_id: str,
+    choice: str,
+) -> Optional[str]:
+    """Synchronous helper: schedule resolve() on a loop and wait for the result.
+
+    Used by platform callback paths that run on a different thread than the
+    event loop (e.g. Discord's button click handler in some configurations).
+    Prefer the async ``resolve()`` from an async context.
+    """
+    try:
+        fut = asyncio.run_coroutine_threadsafe(
+            resolve(session_key, confirm_id, choice), loop,
+        )
+        return fut.result(timeout=30)
+    except Exception as exc:
+        logger.error("resolve_sync_compat failed: %s", exc)
+        return None
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -3744,6 +3744,40 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
    session = _sessions.get(params.get("session_id", ""))
    try:
+        # Gate: /reload-mcp invalidates the prompt cache for this session.
+        # Respect the ``approvals.mcp_reload_confirm`` config toggle — if
+        # set (default true) AND the caller did not pass ``confirm=true``
+        # in params, surface a warning to the transcript instead of just
+        # reloading silently.  Users pass confirm=true either by
+        # re-invoking after reading the warning, or by setting the
+        # config key to false permanently.
+        user_confirm = bool(params.get("confirm", False))
+        if not user_confirm:
+            try:
+                from hermes_cli.config import load_config as _load_config
+                _cfg = _load_config()
+                _approvals = _cfg.get("approvals") if isinstance(_cfg, dict) else None
+                _confirm_required = True
+                if isinstance(_approvals, dict):
+                    _confirm_required = bool(_approvals.get("mcp_reload_confirm", True))
+            except Exception:
+                _confirm_required = True
+            if _confirm_required:
+                # Return a structured response the Ink client can surface
+                # as a warning/confirmation without actually reloading yet.
+                # Ink's ops.ts reads ``status`` and prints ``message`` to
+                # the transcript; a follow-up invocation with confirm=true
+                # (or an `always` choice that flips the config) proceeds.
+                return _ok(rid, {
+                    "status": "confirm_required",
+                    "message": (
+                        "⚠️  /reload-mcp invalidates the prompt cache (next "
+                        "message re-sends full input tokens). Reply `/reload-mcp "
+                        "now` to proceed, or `/reload-mcp always` to proceed and "
+                        "silence this prompt permanently."
+                    ),
+                })
+
        from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools

        shutdown_mcp_servers()
@ -3753,6 +3787,15 @@ def _(rid, params: dict) -> dict:
            if hasattr(agent, "refresh_tools"):
                agent.refresh_tools()
            _emit("session.info", params.get("session_id", ""), _session_info(agent))
+
+        # Honor `always=true` by persisting the opt-out to config.
+        if bool(params.get("always", False)):
+            try:
+                from cli import save_config_value as _save_cfg
+                _save_cfg("approvals.mcp_reload_confirm", False)
+            except Exception as _exc:
+                logger.warning("Failed to persist mcp_reload_confirm=false: %s", _exc)
+
        return _ok(rid, {"status": "reloaded"})
    except Exception as e:
        return _err(rid, 5015, str(e))
--- a/ui-tui/src/app/slash/commands/ops.ts
+++ b/ui-tui/src/app/slash/commands/ops.ts
@ -76,14 +76,39 @@ export const opsCommands: SlashCommand[] = [

  {
    aliases: ['reload_mcp'],
-    help: 'reload MCP servers in the live session',
+    help: 'reload MCP servers in the live session (warns about prompt cache invalidation)',
    name: 'reload-mcp',
-    run: (_arg, ctx) => {
+    run: (arg, ctx) => {
+      // Parse arg: `now` / `always` skip the confirmation gate.
+      // `always` additionally persists approvals.mcp_reload_confirm=false.
+      const a = (arg || '').trim().toLowerCase()
+      const params: { session_id: string; confirm?: boolean; always?: boolean } = {
+        session_id: ctx.sid
+      }
+      if (a === 'now' || a === 'approve' || a === 'once' || a === 'yes') {
+        params.confirm = true
+      } else if (a === 'always') {
+        params.confirm = true
+        params.always = true
+      }
+
      ctx.gateway
-        .rpc<ReloadMcpResponse>('reload.mcp', { session_id: ctx.sid })
+        .rpc<ReloadMcpResponse>('reload.mcp', params)
        .then(
          ctx.guarded<ReloadMcpResponse>(r => {
-            ctx.transcript.sys(r.status === 'reloaded' ? 'MCP servers reloaded' : 'reload complete')
+            if (r.status === 'confirm_required') {
+              ctx.transcript.sys(r.message || '/reload-mcp requires confirmation')
+              return
+            }
+            if (r.status === 'reloaded') {
+              ctx.transcript.sys(
+                params.always
+                  ? 'MCP servers reloaded · future /reload-mcp will run without confirmation'
+                  : 'MCP servers reloaded'
+              )
+              return
+            }
+            ctx.transcript.sys('reload complete')
          })
        )
        .catch(ctx.guardedErr)
--- a/ui-tui/src/app/useConfigSync.ts
+++ b/ui-tui/src/app/useConfigSync.ts
@ -151,7 +151,7 @@ export function useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid }: U

        mtimeRef.current = next

-        quietRpc<ReloadMcpResponse>(gw, 'reload.mcp', { session_id: sid }).then(
+        quietRpc<ReloadMcpResponse>(gw, 'reload.mcp', { session_id: sid, confirm: true }).then(
          r => r && turnController.pushActivity('MCP reloaded after config change')
        )
        quietRpc<ConfigFullResponse>(gw, 'config.get', { key: 'full' }).then(r => applyDisplay(r, setBellOnComplete))
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@ -320,6 +320,7 @@ export interface ModelOptionsResponse {

 export interface ReloadMcpResponse {
  status?: string
+  message?: string
 }

 export interface ReloadEnvResponse {