From b6e2a54a94f58f9ebafa79f45d45b0ccb2b17043 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 19 Jun 2026 22:41:15 +0530
Subject: [PATCH] fix(mcp): address adversarial review round 1 (cache parity,
 gates, races)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Consolidated findings from three independent reviewers (Codex, Claude Code, a
Hermes subagent w/ the hermes-agent-dev skill):

- BLOCKING: refresh_agent_mcp_tools rebuilt only the registry subset, silently
  dropping post-build-injected memory-provider (mem0/honcho/…) and context-
  engine (lcm_*) tools on every refresh. Now additive-preserving: re-applies
  the same injectors agent_init uses, staged on locals and published atomically.
- Re-injection now honors the #5544 enabled_toolsets gate for context-engine
  tools, so a restricted-toolset platform can't get lcm_* leaked back in.
- Atomic read-diff-publish under one lock: the returned `added` set and the
  (tools, valid_tool_names) pair are consistent even under concurrent callers
  (no half-swap, no TOCTOU).
- background_review fork opts out (_skip_mcp_refresh) so its byte-identical
  tools[] cache parity with the parent is preserved.
- CLI /reload-mcp routed through the shared helper (was a 4th divergent copy
  with the same clobber bug + missing disabled_toolsets).
- Explicit reloads (TUI RPC + CLI) pass enabled_override so a server the user
  just enabled in config this session is picked up; automatic paths reuse the
  agent's build-time selection.
- mcp_discovery_timeout default 5.0 -> 1.5s: correctness now comes from the
  between-turns refresh, so the startup wait is only a small turn-1 UX bump
  rather than a heavy dead-server latency penalty.
- has_registered_mcp_tools checks registered TOOLS (not connected servers) so a
  zero-tool/prompt-only server doesn't make the per-turn hook fire forever.
- Tests: rewrote the thread-safety test to actually exercise the write path
  (alternating tool sets), added the #5544-gate regression, the memory/context
  preservation regression, and a "callable next turn via valid_tool_names"
  contract; removed a dead monkeypatch line.
---
 agent/agent_init.py                         |   5 +-
 agent/background_review.py                  |   7 +
 agent/turn_context.py                       |   7 +-
 cli.py                                      |  18 ++-
 hermes_cli/config.py                        |  12 +-
 tests/tools/test_refresh_agent_mcp_tools.py | 120 ++++++++++++++--
 tools/mcp_tool.py                           | 152 ++++++++++++++++----
 tui_gateway/server.py                       |   8 +-
 8 files changed, 278 insertions(+), 51 deletions(-)

diff --git a/agent/agent_init.py b/agent/agent_init.py
index 555f930f559..7131a93c3b7 100644
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -531,7 +531,10 @@ def init_agent(
     agent._last_activity_desc: str = "initializing"
     agent._current_tool: str | None = None
     agent._api_call_count: int = 0
-
+    # Opt-out flag for the between-turns MCP tool refresh (build_turn_context).
+    # Set on internal forks (e.g. background_review) that must keep ``tools[]``
+    # byte-identical to a parent for provider cache parity.
+    agent._skip_mcp_refresh = False
     # Rate limit tracking — updated from x-ratelimit-* response headers
     # after each API call.  Accessed by /usage slash command.
     agent._rate_limit_state: Optional["RateLimitState"] = None
diff --git a/agent/background_review.py b/agent/background_review.py
index ee4791d98d3..c809b496065 100644
--- a/agent/background_review.py
+++ b/agent/background_review.py
@@ -535,6 +535,13 @@ def _run_review_in_thread(
             )
             review_agent._memory_write_origin = "background_review"
             review_agent._memory_write_context = "background_review"
+            # The review fork pins the parent's cached system prompt and keeps
+            # ``tools[]`` byte-identical to the parent so its outbound request
+            # hits the same provider cache prefix (see the toolset-parity note
+            # above). The between-turns MCP refresh in build_turn_context would
+            # add late-connecting MCP tools to this fork and break that parity,
+            # so opt the review fork out of it.
+            review_agent._skip_mcp_refresh = True
             review_agent._memory_store = agent._memory_store
             review_agent._memory_enabled = agent._memory_enabled
             review_agent._user_profile_enabled = agent._user_profile_enabled
diff --git a/agent/turn_context.py b/agent/turn_context.py
index 3e107e0900c..0bbdf73764e 100644
--- a/agent/turn_context.py
+++ b/agent/turn_context.py
@@ -123,9 +123,10 @@ def build_turn_context(
     # or when the tool set is unchanged (``refresh_agent_mcp_tools`` diffs by
     # name and leaves the snapshot untouched on no-change).
     try:
-        from tools.mcp_tool import has_registered_mcp_tools, refresh_agent_mcp_tools
-        if has_registered_mcp_tools():
-            refresh_agent_mcp_tools(agent, quiet_mode=True)
+        if not getattr(agent, "_skip_mcp_refresh", False):
+            from tools.mcp_tool import has_registered_mcp_tools, refresh_agent_mcp_tools
+            if has_registered_mcp_tools():
+                refresh_agent_mcp_tools(agent, quiet_mode=True)
     except Exception:
         logger.debug("between-turns MCP tool refresh skipped", exc_info=True)
 
diff --git a/cli.py b/cli.py
index 2ca1af6faef..eb8b15a93f8 100644
--- a/cli.py
+++ b/cli.py
@@ -9661,16 +9661,20 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
             else:
                 print(f"  🔧 {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)")
 
-            # Refresh the agent's tool list so the model can call new tools
+            # Refresh the agent's tool list so the model can call new tools.
+            # Route through the shared helper so this CLI /reload-mcp path stays
+            # in lockstep with the TUI RPC / gateway reload / late-binding paths
+            # (name-diff, thread-safe, and — critically — additive-preserving so
+            # memory-provider and context-engine tools survive the rebuild).
             if self.agent is not None:
-                self.agent.tools = get_tool_definitions(
-                    enabled_toolsets=self.agent.enabled_toolsets
-                    if hasattr(self.agent, "enabled_toolsets") else None,
+                from tools.mcp_tool import refresh_agent_mcp_tools
+                # Explicit reload: re-resolve enabled toolsets so a server the
+                # user just enabled in config this session is picked up.
+                refresh_agent_mcp_tools(
+                    self.agent,
+                    enabled_override=self.enabled_toolsets,
                     quiet_mode=True,
                 )
-                self.agent.valid_tool_names = {
-                    tool["function"]["name"] for tool in self.agent.tools
-                } if self.agent.tools else set()
 
             # Inject a message at the END of conversation history so the
             # model knows tools changed.  Appended after all existing
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 6b3389b406b..d36f7e8a9c9 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1208,11 +1208,13 @@ DEFAULT_CONFIG = {
     # the INSTANT discovery completes, so users with no MCP servers (the common
     # case) or fast servers pay ~0s regardless of this value — the bound is
     # only reached when a server is genuinely still connecting.  The old 0.75s
-    # default was too short for HTTP/OAuth servers (which can take 2–6s on a
-    # cold connect), so their tools were invisible for the whole session.
-    # Slow servers that miss this window are still picked up by the automatic
-    # late-binding refresh, so this is a UX/latency knob, not a correctness one.
-    "mcp_discovery_timeout": 5.0,
+    # default was a touch short for HTTP/OAuth servers on a cold connect; a
+    # modest bump lets more of them land in the FIRST turn's snapshot.  This is
+    # only a turn-1 latency/UX knob: a server that misses this window is still
+    # picked up automatically on the next turn by the between-turns refresh
+    # (see agent/turn_context.py), so correctness never depends on it.  Keep it
+    # small so a slow/dead server adds little to first-response latency.
+    "mcp_discovery_timeout": 1.5,
 
     # Tool-output truncation thresholds. When terminal output or a
     # single read_file page exceeds these limits, Hermes truncates the
diff --git a/tests/tools/test_refresh_agent_mcp_tools.py b/tests/tools/test_refresh_agent_mcp_tools.py
index 13e5cbb286e..3a347371c02 100644
--- a/tests/tools/test_refresh_agent_mcp_tools.py
+++ b/tests/tools/test_refresh_agent_mcp_tools.py
@@ -98,13 +98,114 @@ def test_refresh_passes_agent_toolset_filters(monkeypatch):
     assert seen["disabled_toolsets"] == ["messaging"]
 
 
-def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
-    """Concurrent refreshes never leave tools / valid_tool_names inconsistent."""
-    agent = _agent(["a"])
+def test_refresh_preserves_memory_provider_and_context_engine_tools(monkeypatch):
+    """B1 regression: a rebuild must NOT drop post-build-injected tools.
+
+    get_tool_definitions() returns only the registry-derived tools. agent_init
+    appends memory-provider tools (mem0/honcho/…) and context-engine tools
+    (lcm_*) directly onto agent.tools AFTER that. A naive
+    `agent.tools = get_tool_definitions()` would silently delete them on every
+    refresh. The helper must re-inject them.
+    """
+    # Agent already carries: a built-in, a memory-provider tool, a context tool.
+    agent = _agent(["read_file", "memory_search", "lcm_grep"])
+
+    # Provider exposes its schemas; context compressor exposes lcm_*.
+    agent._memory_manager = types.SimpleNamespace(
+        get_all_tool_schemas=lambda: [
+            {"name": "memory_search", "description": "", "parameters": {}}
+        ]
+    )
+    agent.context_compressor = types.SimpleNamespace(
+        get_tool_schemas=lambda: [
+            {"name": "lcm_grep", "description": "", "parameters": {}}
+        ]
+    )
+    agent._context_engine_tool_names = {"lcm_grep"}
 
     import model_tools
-    defs = [_tool("a"), _tool("b"), _tool("c")]
-    monkeypatch.setattr(model_tools, "get_tool_definitions", lambda **kw: defs)
+    # The registry now ALSO has a newly-connected MCP tool, but does NOT contain
+    # the memory/context tools (they're never in get_tool_definitions output).
+    monkeypatch.setattr(
+        model_tools, "get_tool_definitions",
+        lambda **kw: [_tool("read_file"), _tool("mcp_new_server_tool")],
+    )
+
+    added = mcp_tool.refresh_agent_mcp_tools(agent)
+
+    # The new MCP tool landed AND the injected families survived.
+    assert "mcp_new_server_tool" in agent.valid_tool_names
+    assert "memory_search" in agent.valid_tool_names   # not clobbered
+    assert "lcm_grep" in agent.valid_tool_names         # not clobbered
+    assert added == {"mcp_new_server_tool"}
+
+
+def test_refresh_respects_context_engine_toolset_gate(monkeypatch):
+    """#5544: context-engine tools must NOT be re-injected on a restricted
+    toolset. A platform with enabled_toolsets that excludes context_engine
+    must not get lcm_* leaked back in by a refresh."""
+    agent = _agent(["read_file"], enabled=["coding"])  # context_engine NOT enabled
+    agent.context_compressor = types.SimpleNamespace(
+        get_tool_schemas=lambda: [{"name": "lcm_grep", "description": "", "parameters": {}}]
+    )
+    agent._context_engine_tool_names = set()
+
+    import model_tools
+    monkeypatch.setattr(
+        model_tools, "get_tool_definitions",
+        lambda **kw: [_tool("read_file"), _tool("mcp_new_tool")],
+    )
+
+    mcp_tool.refresh_agent_mcp_tools(agent)
+
+    assert "mcp_new_tool" in agent.valid_tool_names  # MCP tool still lands
+    assert "lcm_grep" not in agent.valid_tool_names   # gated out (#5544)
+
+
+def test_refreshed_tool_is_callable_through_valid_tool_names_guard(monkeypatch):
+    """The whole point: a late tool, once refreshed, passes the name guard the
+    run loop uses to accept/reject tool calls (agent.valid_tool_names)."""
+    agent = _agent(["read_file"])
+
+    import model_tools
+    monkeypatch.setattr(
+        model_tools, "get_tool_definitions",
+        lambda **kw: [_tool("read_file"), _tool("mcp_granola_list_meetings")],
+    )
+
+    # Before refresh the run loop would reject the call ("Tool does not exist").
+    assert "mcp_granola_list_meetings" not in agent.valid_tool_names
+
+    mcp_tool.refresh_agent_mcp_tools(agent)
+
+    # After refresh the same guard accepts it AND it's in the tools= payload.
+    assert "mcp_granola_list_meetings" in agent.valid_tool_names
+    assert any(t["function"]["name"] == "mcp_granola_list_meetings" for t in agent.tools)
+
+
+def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
+    """Concurrent refreshes keep tools / valid_tool_names coherent.
+
+    The registry alternates between two DIFFERENT tool sets every call, so the
+    write path (publish) runs repeatedly rather than short-circuiting on the
+    no-change early return — this actually exercises the lock. The invariant:
+    a reader of ``valid_tool_names`` must always match ``agent.tools``, and the
+    final published pair must be one of the two valid sets (never a mix).
+    """
+    agent = _agent(["a"])
+
+    import itertools
+    set_a = [_tool("a"), _tool("b")]
+    set_b = [_tool("a"), _tool("c")]
+    flip = itertools.cycle([set_a, set_b])
+    flip_lock = threading.Lock()
+
+    def _gtd(**kw):
+        with flip_lock:
+            return list(next(flip))
+
+    import model_tools
+    monkeypatch.setattr(model_tools, "get_tool_definitions", _gtd)
 
     errors = []
 
@@ -112,9 +213,11 @@ def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
         try:
             for _ in range(50):
                 mcp_tool.refresh_agent_mcp_tools(agent)
-                # Invariant: valid_tool_names must always match agent.tools.
+                # Coherence invariant: the name set must match the tool list
+                # at every observation, never a torn cross-attribute state.
                 names = {t["function"]["name"] for t in agent.tools}
                 assert agent.valid_tool_names == names
+                assert names in ({"a", "b"}, {"a", "c"})
         except Exception as exc:  # pragma: no cover - failure path
             errors.append(exc)
 
@@ -125,7 +228,7 @@ def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
         t.join(timeout=10)
 
     assert not errors
-    assert agent.valid_tool_names == {"a", "b", "c"}
+    assert agent.valid_tool_names in ({"a", "b"}, {"a", "c"})
 
 
 # ── discovery-wait bound (mcp_discovery_timeout config) ──────────────────────
@@ -139,9 +242,8 @@ def test_resolve_discovery_timeout_explicit_wins(monkeypatch):
 
 def test_resolve_discovery_timeout_reads_config(monkeypatch):
     from hermes_cli import mcp_startup
-
-    monkeypatch.setattr(mcp_startup, "load_config", None, raising=False)
     import hermes_cli.config as cfg
+
     monkeypatch.setattr(cfg, "load_config", lambda: {"mcp_discovery_timeout": 8.0})
 
     assert mcp_startup._resolve_discovery_timeout(None) == 8.0
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index 1611d8394f6..ffb9fe0f1ab 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -4237,17 +4237,26 @@ _agent_tools_lock = threading.Lock()
 
 
 def has_registered_mcp_tools() -> bool:
-    """True if any MCP server currently has tools registered in the registry.
+    """True if any MCP server has actually registered tools into the registry.
 
-    Cheap — checks the live server map under ``_lock``, no registry walk.  Used
-    by the per-turn refresh hook so a session with no MCP servers configured
-    (the common case) skips the ``get_tool_definitions`` rebuild entirely.
+    Cheap — checks the global MCP-tool→server name map under ``_lock``, no
+    registry walk.  Used by the per-turn refresh hook so a session with no MCP
+    tools (the common case, and also a connected-but-zero-tool/prompt-only
+    server) skips the ``get_tool_definitions`` rebuild entirely.  Checks
+    registered TOOLS, not connected servers, so a server that registers no tools
+    doesn't keep the hook firing every turn.
     """
     with _lock:
-        return bool(_servers)
+        return bool(_mcp_tool_server_names)
 
 
-def refresh_agent_mcp_tools(agent, *, quiet_mode: bool = True) -> set:
+def refresh_agent_mcp_tools(
+    agent,
+    *,
+    enabled_override=None,
+    disabled_override=None,
+    quiet_mode: bool = True,
+) -> set:
     """Re-derive an already-built agent's tool snapshot from the live registry.
 
     The agent snapshots ``agent.tools`` once at build time and never re-reads
@@ -4255,43 +4264,136 @@ def refresh_agent_mcp_tools(agent, *, quiet_mode: bool = True) -> set:
     *after* that snapshot — a slow HTTP/OAuth server that misses the bounded
     startup wait, or a ``/reload-mcp`` — their tools are invisible until the
     snapshot is rebuilt.  This is the single shared rebuild used by every such
-    caller (the TUI ``reload.mcp`` RPC, the gateway reload, and the late-binding
-    refresh thread) so they can't drift apart again.
+    caller (the TUI ``reload.mcp`` RPC, the gateway reload, the late-binding
+    refresh thread, and the per-turn between-turns refresh) so they can't drift
+    apart again.
 
     The rebuild respects the agent's own ``enabled_toolsets`` /
-    ``disabled_toolsets`` (the same filtering it was built with), diffs by tool
-    **name** (not count — a count compare misses an equal-size add/remove swap),
-    and mutates the agent under ``_agent_tools_lock``.
+    ``disabled_toolsets`` (the same filtering it was built with) and diffs by
+    tool **name** (not count — a count compare misses an equal-size add/remove
+    swap).
+
+    Crucially it is **additive-preserving**: ``get_tool_definitions`` returns
+    only the registry-derived tools, but ``agent_init`` appends two further
+    families directly onto ``agent.tools`` *after* that — external
+    memory-provider tools (mem0/honcho/…) and context-engine tools
+    (``lcm_*``).  A naive ``agent.tools = get_tool_definitions(...)`` would
+    silently DELETE those.  So after rebuilding the registry set we re-run the
+    same post-build injectors ``agent_init`` used, reconstructing the full
+    surface.  The new ``(tools, valid_tool_names)`` pair is published together
+    under ``_agent_tools_lock`` so a concurrent reader never sees a
+    cross-attribute half-swap.
 
     Returns the set of newly-added tool names (empty when nothing changed), so
     callers can decide whether to notify the user / re-emit session info.  The
     caller owns the prompt-cache contract: this helper does NOT check turn state,
     because each caller has a different policy (``/reload-mcp`` rebuilds after
-    explicit user consent; the late-binding thread only rebuilds pre-first-turn).
+    explicit user consent; the late-binding and between-turns paths only rebuild
+    at a turn boundary, before that turn's ``tools=`` prefix is assembled).
     """
     from model_tools import get_tool_definitions
 
+    # Explicit reloads (/reload-mcp) pass freshly-resolved toolsets so a server
+    # the user just ENABLED in config is picked up; the agent's stored selection
+    # is then updated to match. The automatic paths (between-turns, late-binding)
+    # pass nothing and reuse the agent's build-time selection unchanged.
+    if enabled_override is not None or disabled_override is not None:
+        enabled = enabled_override if enabled_override is not None else getattr(agent, "enabled_toolsets", None)
+        disabled = disabled_override if disabled_override is not None else getattr(agent, "disabled_toolsets", None)
+        agent.enabled_toolsets = enabled
+        agent.disabled_toolsets = disabled
+    else:
+        enabled = getattr(agent, "enabled_toolsets", None)
+        disabled = getattr(agent, "disabled_toolsets", None)
+
+    # Registry-derived tools (built-ins + MCP), filtered to the agent's toolsets.
+    # Computed OUTSIDE the lock (get_tool_definitions can be slow); the diff and
+    # publish below happen together in ONE critical section so two concurrent
+    # callers can't compute overlapping ``added`` sets or torn-publish.
+    new_defs = list(
+        get_tool_definitions(
+            enabled_toolsets=enabled,
+            disabled_toolsets=disabled,
+            quiet_mode=quiet_mode,
+        )
+        or []
+    )
+    new_names = {t["function"]["name"] for t in new_defs}
+
+    # Re-append the post-build injected families that get_tool_definitions does
+    # NOT reproduce, so a refresh never strips them (memory-provider + context-
+    # engine tools). Staged entirely on LOCALS — the live ``agent.tools`` /
+    # ``valid_tool_names`` are never touched until the single atomic publish
+    # below, so a concurrent reader (``build_api_kwargs``) can't see a partial
+    # rebuild or a cross-attribute half-swap.
+    _reinject_post_build_tools(agent, new_defs, new_names)
+
+    # Single atomic read-diff-publish so the returned ``added`` is consistent
+    # with what was actually published, even under concurrent callers.
     with _agent_tools_lock:
         current = {
             t["function"]["name"]
             for t in (getattr(agent, "tools", None) or [])
         }
-
-    new_defs = get_tool_definitions(
-        enabled_toolsets=getattr(agent, "enabled_toolsets", None),
-        disabled_toolsets=getattr(agent, "disabled_toolsets", None),
-        quiet_mode=quiet_mode,
-    )
-    new_names = {t["function"]["name"] for t in new_defs} if new_defs else set()
-
-    if new_names == current:
-        return set()
-
-    with _agent_tools_lock:
+        if new_names == current:
+            return set()  # no change → leave the live snapshot untouched (no churn)
         agent.tools = new_defs
         agent.valid_tool_names = new_names
+        return new_names - current
 
-    return new_names - current
+
+def _reinject_post_build_tools(agent, tools_list: list, name_set: set) -> None:
+    """Append memory-provider and context-engine tools onto staged locals.
+
+    Mirrors the post-``get_tool_definitions`` injection in ``agent_init`` so a
+    snapshot rebuild reconstructs the FULL tool surface, not just the
+    registry-derived subset. Operates on the caller's staged ``tools_list`` /
+    ``name_set`` (NOT the live agent attributes) so the rebuild stays atomic.
+    Idempotent (skips names already present) and fail-soft.
+    """
+    def _add(schema: dict) -> None:
+        name = schema.get("name", "")
+        if not name or name in name_set:
+            return
+        tools_list.append({"type": "function", "function": schema})
+        name_set.add(name)
+
+    # Memory-provider tools (mem0/honcho/byterover/supermemory/…).
+    try:
+        memory_manager = getattr(agent, "_memory_manager", None)
+        get_mem_schemas = getattr(memory_manager, "get_all_tool_schemas", None) if memory_manager else None
+        if callable(get_mem_schemas):
+            # Honor the same enablement gate inject_memory_provider_tools uses.
+            from agent.memory_manager import memory_provider_tools_enabled
+            if "memory" in name_set or memory_provider_tools_enabled(getattr(agent, "enabled_toolsets", None)):
+                for schema in get_mem_schemas():
+                    if isinstance(schema, dict):
+                        _add(schema)
+    except Exception:
+        logger.debug("Memory-provider tool re-injection skipped", exc_info=True)
+
+    # Context-engine tools (lcm_grep/lcm_describe/…) — the `context_engine`
+    # toolset is intentionally empty, so these only exist via this append.
+    # Honor the same enabled_toolsets gate agent_init uses (#5544): without it a
+    # restricted-toolset platform (e.g. platform_toolsets: telegram: []) would
+    # re-leak lcm_* tools the build deliberately excluded, and pay the local-
+    # model latency penalty.
+    try:
+        enabled = getattr(agent, "enabled_toolsets", None)
+        context_engine_allowed = enabled is None or "context_engine" in enabled
+        compressor = getattr(agent, "context_compressor", None)
+        get_schemas = getattr(compressor, "get_tool_schemas", None) if compressor else None
+        if context_engine_allowed and callable(get_schemas):
+            engine_names = getattr(agent, "_context_engine_tool_names", None)
+            for schema in get_schemas():
+                if not isinstance(schema, dict):
+                    continue
+                name = schema.get("name", "")
+                _add(schema)
+                if name and isinstance(engine_names, set):
+                    engine_names.add(name)
+    except Exception:
+        logger.debug("Context-engine tool re-injection skipped", exc_info=True)
 
 
 def shutdown_mcp_servers():
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 8b13e7352b9..f43ea707c81 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -8409,7 +8409,13 @@ def _(rid, params: dict) -> dict:
             try:
                 from tools.mcp_tool import refresh_agent_mcp_tools
 
-                refresh_agent_mcp_tools(agent, quiet_mode=True)
+                # Explicit reload: re-resolve enabled toolsets so a server the
+                # user just enabled in config this session is picked up.
+                refresh_agent_mcp_tools(
+                    agent,
+                    enabled_override=_load_enabled_toolsets(),
+                    quiet_mode=True,
+                )
             except Exception as _exc:
                 logger.warning(
                     "Failed to refresh cached agent tools after /reload-mcp: %s",