From f05a47309ec8842387e88eab856df55c6910b57b Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Tue, 26 May 2026 14:16:29 -0700
Subject: [PATCH] fix(gateway): refresh cached agent tools on /reload-mcp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the gateway processes /reload-mcp, it reconnects MCP servers and
updates the global _servers registry, but cached AIAgent instances in
_agent_cache keep the tools list they were built with. The user had to
also run /new (discarding conversation history) before the agent could
see the new tools — even though /reload-mcp had succeeded.

This patch refreshes each cached agent's .tools and .valid_tool_names
in _execute_mcp_reload after discovery returns, so existing sessions
pick up new MCP tools on their next turn. The slash-confirm gate in
_handle_reload_mcp_command already obtains user consent for the
implied prompt-cache invalidation before this code runs.

Mirrors the equivalent behaviour the CLI already does in cli.py
_reload_mcp. Per-agent enabled_toolsets and disabled_toolsets are
preserved so an agent that was scoped to a subset of toolsets does
not silently gain disabled tools after the reload.

Original diagnosis + initial implementation in #23812 from @fujinice.
The auto-reload watcher half of that PR is intentionally dropped —
users want /reload-mcp to remain explicit.

Co-authored-by: fujinice <45688690+fujinice@users.noreply.github.com>
---
 gateway/run.py                                |  34 ++++
 scripts/release.py                            |   1 +
 ...test_mcp_reload_refreshes_cached_agents.py | 176 ++++++++++++++++++
 3 files changed, 211 insertions(+)
 create mode 100644 tests/gateway/test_mcp_reload_refreshes_cached_agents.py

diff --git a/gateway/run.py b/gateway/run.py
index 7b5ace07067..a2e41c6090f 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -13339,6 +13339,40 @@ class GatewayRunner:
             else:
                 lines.append(t("gateway.reload_mcp.tools_available", tools=len(new_tools), servers=len(connected_servers)))
 
+            # Refresh cached agents so existing sessions see new MCP tools on
+            # their next turn — without this, the user has to `/new` (which
+            # discards conversation history) to pick up tools from a server
+            # that was just added or reconnected. The user has already
+            # consented to the prompt-cache invalidation via the slash-confirm
+            # gate in _handle_reload_mcp_command before we reach this point.
+            try:
+                from model_tools import get_tool_definitions
+                _cache = getattr(self, "_agent_cache", None)
+                _cache_lock = getattr(self, "_agent_cache_lock", None)
+                if _cache_lock is not None and _cache:
+                    with _cache_lock:
+                        for _sess_key, _entry in list(_cache.items()):
+                            try:
+                                _agent = _entry[0] if isinstance(_entry, tuple) else _entry
+                            except Exception:
+                                continue
+                            if _agent is None:
+                                continue
+                            new_defs = get_tool_definitions(
+                                enabled_toolsets=getattr(_agent, "enabled_toolsets", None),
+                                disabled_toolsets=getattr(_agent, "disabled_toolsets", None),
+                                quiet_mode=True,
+                            )
+                            _agent.tools = new_defs
+                            _agent.valid_tool_names = {
+                                t["function"]["name"] for t in new_defs
+                            } if new_defs else set()
+            except Exception as _exc:
+                logger.debug(
+                    "Failed to update cached agent tools after MCP reload: %s",
+                    _exc,
+                )
+
             # Inject a message at the END of the session history so the
             # model knows tools changed on its next turn.  Appended after
             # all existing messages to preserve prompt-cache for the prefix.
diff --git a/scripts/release.py b/scripts/release.py
index ea1ed052d82..d46e36ce50b 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -46,6 +46,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
 # Auto-extracted from noreply emails + manual overrides
 AUTHOR_MAP = {
     "9592417+adam91holt@users.noreply.github.com": "adam91holt",
+    "45688690+fujinice@users.noreply.github.com": "fujinice",
     # teknium (multiple emails)
     "teknium1@gmail.com": "teknium1",
     "kenyon1977@gmail.com": "kenyonxu",
diff --git a/tests/gateway/test_mcp_reload_refreshes_cached_agents.py b/tests/gateway/test_mcp_reload_refreshes_cached_agents.py
new file mode 100644
index 00000000000..4d945f03c59
--- /dev/null
+++ b/tests/gateway/test_mcp_reload_refreshes_cached_agents.py
@@ -0,0 +1,176 @@
+"""Regression test for /reload-mcp refreshing cached agent tool lists.
+
+Before this fix, the gateway's _execute_mcp_reload reconnected MCP servers
+and updated the global _servers registry, but cached AIAgent instances kept
+their original tools list. Users had to run /new (discarding conversation
+history) for the agent to pick up the new tools.
+
+This test exercises _execute_mcp_reload directly with mocked MCP discovery
+and asserts that every cached agent's `tools` and `valid_tool_names`
+attributes are overwritten with the freshly-discovered tool set.
+"""
+
+from __future__ import annotations
+
+from collections import OrderedDict
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event() -> MessageEvent:
+    return MessageEvent(text="/reload-mcp", source=_make_source(), message_id="m1")
+
+
+def _make_runner_with_cached_agents(num_agents: int = 2):
+    """Build a bare GatewayRunner with `num_agents` fake cached agents."""
+    import threading
+
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+
+    # Session store stub — _execute_mcp_reload writes a transcript message
+    # at the end; tests don't care about that side effect.
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = session_entry
+    runner.session_store.append_to_transcript = MagicMock()
+
+    # Build N fake cached agents with stale `tools` + `valid_tool_names`.
+    runner._agent_cache = OrderedDict()
+    runner._agent_cache_lock = threading.Lock()
+    for i in range(num_agents):
+        stale_tool = {
+            "type": "function",
+            "function": {"name": f"stale_tool_{i}", "description": "old"},
+        }
+        agent = SimpleNamespace(
+            tools=[stale_tool],
+            valid_tool_names={f"stale_tool_{i}"},
+            enabled_toolsets=None,
+            disabled_toolsets=None,
+        )
+        runner._agent_cache[f"session-{i}"] = (agent, f"sig-{i}")
+
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_reload_mcp_refreshes_cached_agent_tools():
+    """After /reload-mcp succeeds, every cached agent gets its tool list
+    replaced with the freshly-discovered set."""
+    runner = _make_runner_with_cached_agents(num_agents=3)
+
+    # Snapshot the stale state so we can assert it changed.
+    pre_reload_tools = {
+        key: list(entry[0].tools) for key, entry in runner._agent_cache.items()
+    }
+
+    # Fresh tools that get_tool_definitions() will return after the reload.
+    fresh_tool_defs = [
+        {
+            "type": "function",
+            "function": {"name": "HassTurnOn", "description": "Turns on a device"},
+        },
+        {
+            "type": "function",
+            "function": {"name": "HassTurnOff", "description": "Turns off a device"},
+        },
+    ]
+
+    with (
+        patch("tools.mcp_tool.shutdown_mcp_servers"),
+        patch("tools.mcp_tool.discover_mcp_tools", return_value=["HassTurnOn", "HassTurnOff"]),
+        patch.dict("tools.mcp_tool._servers", {"homeassistant": object()}, clear=True),
+        patch("model_tools.get_tool_definitions", return_value=fresh_tool_defs),
+    ):
+        result = await runner._execute_mcp_reload(_make_event())
+
+    # The reload itself returned a status string (not an exception).
+    assert isinstance(result, str)
+
+    # Every cached agent has fresh tools and the matching valid_tool_names.
+    expected_names = {"HassTurnOn", "HassTurnOff"}
+    for key, (agent, _sig) in runner._agent_cache.items():
+        assert agent.tools == fresh_tool_defs, (
+            f"Agent {key} kept stale tools: {agent.tools} != {fresh_tool_defs}"
+        )
+        assert agent.valid_tool_names == expected_names, (
+            f"Agent {key} kept stale valid_tool_names: {agent.valid_tool_names}"
+        )
+        # Sanity check that the swap actually changed something.
+        assert agent.tools != pre_reload_tools[key]
+
+
+@pytest.mark.asyncio
+async def test_reload_mcp_handles_empty_agent_cache():
+    """Reload with no cached agents (e.g. fresh gateway) must not raise."""
+    runner = _make_runner_with_cached_agents(num_agents=0)
+    assert len(runner._agent_cache) == 0
+
+    with (
+        patch("tools.mcp_tool.shutdown_mcp_servers"),
+        patch("tools.mcp_tool.discover_mcp_tools", return_value=[]),
+        patch.dict("tools.mcp_tool._servers", {}, clear=True),
+        patch("model_tools.get_tool_definitions", return_value=[]),
+    ):
+        result = await runner._execute_mcp_reload(_make_event())
+
+    assert isinstance(result, str)
+
+
+@pytest.mark.asyncio
+async def test_reload_mcp_preserves_per_agent_toolset_overrides():
+    """If a cached agent was built with enabled_toolsets=["safe"], the
+    refresh must pass that same list to get_tool_definitions so the agent
+    doesn't silently gain disabled tools after a reload."""
+    runner = _make_runner_with_cached_agents(num_agents=1)
+    # Override the toolsets on the cached agent.
+    agent, _sig = runner._agent_cache["session-0"]
+    agent.enabled_toolsets = ["safe"]
+    agent.disabled_toolsets = ["terminal"]
+
+    captured_calls = []
+
+    def _capture_get_tool_definitions(**kwargs):
+        captured_calls.append(kwargs)
+        return [{"type": "function", "function": {"name": "refreshed"}}]
+
+    with (
+        patch("tools.mcp_tool.shutdown_mcp_servers"),
+        patch("tools.mcp_tool.discover_mcp_tools", return_value=["refreshed"]),
+        patch.dict("tools.mcp_tool._servers", {"homeassistant": object()}, clear=True),
+        patch("model_tools.get_tool_definitions", side_effect=_capture_get_tool_definitions),
+    ):
+        await runner._execute_mcp_reload(_make_event())
+
+    assert captured_calls, "get_tool_definitions was never called to refresh the cache"
+    assert captured_calls[0]["enabled_toolsets"] == ["safe"]
+    assert captured_calls[0]["disabled_toolsets"] == ["terminal"]