feat(gateway,cli): confirm /reload-mcp to warn about prompt cache invalidation

Reloading MCP servers rebuilds the tool set for the active session, which
invalidates the provider prompt cache (tool schemas are baked into the
system prompt). The next message re-sends full input tokens — can be
expensive on long-context or high-reasoning models.

To surface that cost, /reload-mcp now routes through a new slash-confirm
primitive with three options: Approve Once / Always Approve / Cancel.
'Always Approve' persists approvals.mcp_reload_confirm: false so future
reloads run silently.

Coverage:

* Classic CLI (cli.py) — interactive numbered prompt.
* TUI (tui_gateway + Ink ops.ts) — text warning on first call; `now` /
  `always` args skip the gate; `always` also persists the opt-out.
* Messenger gateway — button UI on Telegram (inline keyboard), Discord
  (discord.ui.View), Slack (Block Kit actions); text fallback on every
  other platform via /approve /always /cancel replies intercepted in
  gateway/run.py _handle_message.
* Config key: approvals.mcp_reload_confirm (default true).
* Auto-reload paths (CLI file watcher, TUI config-sync mtime poll) pass
  confirm=true so they do NOT prompt.

Implementation:

* tools/slash_confirm.py — module-level pending-state store used by all
  adapters and by the CLI prompt. Thread-safe register/resolve/clear.
* gateway/platforms/base.py — send_slash_confirm hook (default 'Not
  supported' → text fallback).
* gateway/run.py — _request_slash_confirm helper + text intercept in
  _handle_message (yields to in-progress tool-exec approvals so
  dangerous-command /approve still unblocks the tool thread first).

Tests:

* tests/tools/test_slash_confirm.py — primitive lifecycle + async
  resolution + double-click atomicity (16 tests).
* tests/hermes_cli/test_mcp_reload_confirm_gate.py — default-config
  shape + deep-merge preserves user opt-out (5 tests).

Targeted runs (hermetic): 89 passed (slash-confirm, config gate,
existing agent cache, existing telegram approval buttons).
This commit is contained in:
Teknium 2026-04-29 21:20:53 -07:00
parent 7fae87bc00
commit 4d7fc0f37c
14 changed files with 1287 additions and 9 deletions

View file

@ -0,0 +1,91 @@
"""Tests for the approvals.mcp_reload_confirm config gate.
When the user runs /reload-mcp, the MCP tool set is rebuilt which
invalidates the provider prompt cache for the active session. That's
expensive on long-context / high-reasoning models. The config gate
adds a three-option confirmation (Approve Once / Always Approve /
Cancel); "Always Approve" flips this key to false so subsequent reloads
run silently.
"""
from __future__ import annotations
from copy import deepcopy
from hermes_cli.config import DEFAULT_CONFIG
class TestMcpReloadConfirmDefault:
def test_default_config_has_the_key(self):
approvals = DEFAULT_CONFIG.get("approvals")
assert isinstance(approvals, dict)
assert "mcp_reload_confirm" in approvals
def test_default_is_true(self):
# New installs confirm by default — this is the safe behavior.
assert DEFAULT_CONFIG["approvals"]["mcp_reload_confirm"] is True
def test_shape_matches_other_approval_keys(self):
# Same flat dict level as `mode` / `timeout` / `cron_mode`.
approvals = DEFAULT_CONFIG["approvals"]
assert isinstance(approvals.get("mode"), str)
assert isinstance(approvals.get("timeout"), int)
assert isinstance(approvals.get("cron_mode"), str)
assert isinstance(approvals.get("mcp_reload_confirm"), bool)
class TestUserConfigMerge:
"""If a user has a pre-existing config without this key, load_config
should fill it in from DEFAULT_CONFIG (deep merge preserves keys the
user didn't override).
"""
def test_existing_user_config_without_key_gets_default(self, tmp_path, monkeypatch):
import yaml
# Simulate a legacy user config without the new key.
home = tmp_path / ".hermes"
home.mkdir()
cfg_path = home / "config.yaml"
legacy = {
"approvals": {"mode": "manual", "timeout": 60, "cron_mode": "deny"},
}
cfg_path.write_text(yaml.safe_dump(legacy))
monkeypatch.setenv("HERMES_HOME", str(home))
# Force a fresh reimport of config.py so the HERMES_HOME is honored.
import importlib
import hermes_cli.config as cfg_mod
importlib.reload(cfg_mod)
cfg = cfg_mod.load_config()
assert cfg["approvals"]["mcp_reload_confirm"] is True
def test_existing_user_config_with_false_key_survives_merge(
self, tmp_path, monkeypatch,
):
"""A user who has clicked "Always Approve" (key=false) must keep
that setting across reloads the default_true value must not win.
"""
import yaml
home = tmp_path / ".hermes"
home.mkdir()
cfg_path = home / "config.yaml"
user_cfg = {
"approvals": {
"mode": "manual",
"timeout": 60,
"cron_mode": "deny",
"mcp_reload_confirm": False,
},
}
cfg_path.write_text(yaml.safe_dump(user_cfg))
monkeypatch.setenv("HERMES_HOME", str(home))
import importlib
import hermes_cli.config as cfg_mod
importlib.reload(cfg_mod)
cfg = cfg_mod.load_config()
assert cfg["approvals"]["mcp_reload_confirm"] is False

View file

@ -0,0 +1,197 @@
"""Tests for tools/slash_confirm.py — the generic slash-command confirmation primitive.
Covers register/resolve/clear lifecycle, stale-entry behavior, confirm_id
mismatch, handler exceptions, and async resolution.
"""
import asyncio
import time
import pytest
from tools import slash_confirm
@pytest.fixture(autouse=True)
def _clean_pending():
"""Every test gets a clean primitive state."""
slash_confirm._pending.clear()
yield
slash_confirm._pending.clear()
class TestRegisterAndGetPending:
def test_register_stores_entry(self):
async def handler(choice):
return f"got {choice}"
slash_confirm.register("sess1", "cid1", "reload-mcp", handler)
pending = slash_confirm.get_pending("sess1")
assert pending is not None
assert pending["confirm_id"] == "cid1"
assert pending["command"] == "reload-mcp"
assert pending["handler"] is handler
assert "created_at" in pending
def test_get_pending_missing_returns_none(self):
assert slash_confirm.get_pending("nobody") is None
def test_register_supersedes_prior_entry(self):
async def h1(choice):
return "first"
async def h2(choice):
return "second"
slash_confirm.register("sess1", "cid1", "reload-mcp", h1)
slash_confirm.register("sess1", "cid2", "reload-mcp", h2)
pending = slash_confirm.get_pending("sess1")
assert pending["confirm_id"] == "cid2"
assert pending["handler"] is h2
def test_get_pending_returns_copy_not_reference(self):
async def h(choice):
return "x"
slash_confirm.register("sess1", "cid1", "cmd", h)
p1 = slash_confirm.get_pending("sess1")
p1["command"] = "mutated"
p2 = slash_confirm.get_pending("sess1")
assert p2["command"] == "cmd"
class TestResolve:
@pytest.mark.asyncio
async def test_resolve_runs_handler_and_pops_entry(self):
calls = []
async def handler(choice):
calls.append(choice)
return f"resolved {choice}"
slash_confirm.register("sess1", "cid1", "reload-mcp", handler)
result = await slash_confirm.resolve("sess1", "cid1", "once")
assert result == "resolved once"
assert calls == ["once"]
# Entry should be popped.
assert slash_confirm.get_pending("sess1") is None
@pytest.mark.asyncio
async def test_resolve_no_pending_returns_none(self):
result = await slash_confirm.resolve("sess1", "cid1", "once")
assert result is None
@pytest.mark.asyncio
async def test_resolve_confirm_id_mismatch_returns_none(self):
async def handler(choice):
return "should not run"
slash_confirm.register("sess1", "cid_real", "cmd", handler)
result = await slash_confirm.resolve("sess1", "cid_wrong", "once")
assert result is None
# Stale entry should still be present (mismatch doesn't pop).
assert slash_confirm.get_pending("sess1") is not None
@pytest.mark.asyncio
async def test_resolve_stale_entry_returns_none(self):
async def handler(choice):
return "should not run"
slash_confirm.register("sess1", "cid1", "cmd", handler)
# Force entry age past timeout
slash_confirm._pending["sess1"]["created_at"] = time.time() - 10000
result = await slash_confirm.resolve("sess1", "cid1", "once")
assert result is None
@pytest.mark.asyncio
async def test_resolve_handler_exception_returns_error_string(self):
async def handler(choice):
raise RuntimeError("boom")
slash_confirm.register("sess1", "cid1", "cmd", handler)
result = await slash_confirm.resolve("sess1", "cid1", "once")
assert result is not None
assert "boom" in result
# Entry should still be popped even when handler raises.
assert slash_confirm.get_pending("sess1") is None
@pytest.mark.asyncio
async def test_resolve_non_string_return_becomes_none(self):
async def handler(choice):
return {"not": "a string"}
slash_confirm.register("sess1", "cid1", "cmd", handler)
result = await slash_confirm.resolve("sess1", "cid1", "once")
assert result is None
@pytest.mark.asyncio
async def test_resolve_double_click_only_runs_handler_once(self):
calls = []
async def handler(choice):
calls.append(choice)
return "ran"
slash_confirm.register("sess1", "cid1", "cmd", handler)
# Simulate two near-simultaneous button clicks.
r1, r2 = await asyncio.gather(
slash_confirm.resolve("sess1", "cid1", "once"),
slash_confirm.resolve("sess1", "cid1", "once"),
)
# Exactly one should have run the handler.
assert calls == ["once"]
assert (r1 == "ran") ^ (r2 == "ran")
class TestClear:
def test_clear_removes_entry(self):
async def h(c):
return "x"
slash_confirm.register("sess1", "cid1", "cmd", h)
assert slash_confirm.get_pending("sess1") is not None
slash_confirm.clear("sess1")
assert slash_confirm.get_pending("sess1") is None
def test_clear_missing_is_noop(self):
# Should not raise.
slash_confirm.clear("nobody")
class TestClearIfStale:
def test_clears_stale_entry(self):
async def h(c):
return "x"
slash_confirm.register("sess1", "cid1", "cmd", h)
slash_confirm._pending["sess1"]["created_at"] = time.time() - 10000
cleared = slash_confirm.clear_if_stale("sess1", timeout=300)
assert cleared is True
assert slash_confirm.get_pending("sess1") is None
def test_preserves_fresh_entry(self):
async def h(c):
return "x"
slash_confirm.register("sess1", "cid1", "cmd", h)
cleared = slash_confirm.clear_if_stale("sess1", timeout=300)
assert cleared is False
assert slash_confirm.get_pending("sess1") is not None
def test_returns_false_for_missing_entry(self):
cleared = slash_confirm.clear_if_stale("nobody")
assert cleared is False