From f05a47309ec8842387e88eab856df55c6910b57b Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Tue, 26 May 2026 14:16:29 -0700 Subject: [PATCH] fix(gateway): refresh cached agent tools on /reload-mcp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the gateway processes /reload-mcp, it reconnects MCP servers and updates the global _servers registry, but cached AIAgent instances in _agent_cache keep the tools list they were built with. The user had to also run /new (discarding conversation history) before the agent could see the new tools — even though /reload-mcp had succeeded. This patch refreshes each cached agent's .tools and .valid_tool_names in _execute_mcp_reload after discovery returns, so existing sessions pick up new MCP tools on their next turn. The slash-confirm gate in _handle_reload_mcp_command already obtains user consent for the implied prompt-cache invalidation before this code runs. Mirrors the equivalent behaviour the CLI already does in cli.py _reload_mcp. Per-agent enabled_toolsets and disabled_toolsets are preserved so an agent that was scoped to a subset of toolsets does not silently gain disabled tools after the reload. Original diagnosis + initial implementation in #23812 from @fujinice. The auto-reload watcher half of that PR is intentionally dropped — users want /reload-mcp to remain explicit. Co-authored-by: fujinice <45688690+fujinice@users.noreply.github.com> --- gateway/run.py | 34 ++++ scripts/release.py | 1 + ...test_mcp_reload_refreshes_cached_agents.py | 176 ++++++++++++++++++ 3 files changed, 211 insertions(+) create mode 100644 tests/gateway/test_mcp_reload_refreshes_cached_agents.py diff --git a/gateway/run.py b/gateway/run.py index 7b5ace07067..a2e41c6090f 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -13339,6 +13339,40 @@ class GatewayRunner: else: lines.append(t("gateway.reload_mcp.tools_available", tools=len(new_tools), servers=len(connected_servers))) + # Refresh cached agents so existing sessions see new MCP tools on + # their next turn — without this, the user has to `/new` (which + # discards conversation history) to pick up tools from a server + # that was just added or reconnected. The user has already + # consented to the prompt-cache invalidation via the slash-confirm + # gate in _handle_reload_mcp_command before we reach this point. + try: + from model_tools import get_tool_definitions + _cache = getattr(self, "_agent_cache", None) + _cache_lock = getattr(self, "_agent_cache_lock", None) + if _cache_lock is not None and _cache: + with _cache_lock: + for _sess_key, _entry in list(_cache.items()): + try: + _agent = _entry[0] if isinstance(_entry, tuple) else _entry + except Exception: + continue + if _agent is None: + continue + new_defs = get_tool_definitions( + enabled_toolsets=getattr(_agent, "enabled_toolsets", None), + disabled_toolsets=getattr(_agent, "disabled_toolsets", None), + quiet_mode=True, + ) + _agent.tools = new_defs + _agent.valid_tool_names = { + t["function"]["name"] for t in new_defs + } if new_defs else set() + except Exception as _exc: + logger.debug( + "Failed to update cached agent tools after MCP reload: %s", + _exc, + ) + # Inject a message at the END of the session history so the # model knows tools changed on its next turn. Appended after # all existing messages to preserve prompt-cache for the prefix. diff --git a/scripts/release.py b/scripts/release.py index ea1ed052d82..d46e36ce50b 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -46,6 +46,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" # Auto-extracted from noreply emails + manual overrides AUTHOR_MAP = { "9592417+adam91holt@users.noreply.github.com": "adam91holt", + "45688690+fujinice@users.noreply.github.com": "fujinice", # teknium (multiple emails) "teknium1@gmail.com": "teknium1", "kenyon1977@gmail.com": "kenyonxu", diff --git a/tests/gateway/test_mcp_reload_refreshes_cached_agents.py b/tests/gateway/test_mcp_reload_refreshes_cached_agents.py new file mode 100644 index 00000000000..4d945f03c59 --- /dev/null +++ b/tests/gateway/test_mcp_reload_refreshes_cached_agents.py @@ -0,0 +1,176 @@ +"""Regression test for /reload-mcp refreshing cached agent tool lists. + +Before this fix, the gateway's _execute_mcp_reload reconnected MCP servers +and updated the global _servers registry, but cached AIAgent instances kept +their original tools list. Users had to run /new (discarding conversation +history) for the agent to pick up the new tools. + +This test exercises _execute_mcp_reload directly with mocked MCP discovery +and asserts that every cached agent's `tools` and `valid_tool_names` +attributes are overwritten with the freshly-discovered tool set. +""" + +from __future__ import annotations + +from collections import OrderedDict +from datetime import datetime +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import MessageEvent +from gateway.session import SessionEntry, SessionSource, build_session_key + + +def _make_source() -> SessionSource: + return SessionSource( + platform=Platform.TELEGRAM, + user_id="u1", + chat_id="c1", + user_name="tester", + chat_type="dm", + ) + + +def _make_event() -> MessageEvent: + return MessageEvent(text="/reload-mcp", source=_make_source(), message_id="m1") + + +def _make_runner_with_cached_agents(num_agents: int = 2): + """Build a bare GatewayRunner with `num_agents` fake cached agents.""" + import threading + + from gateway.run import GatewayRunner + + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} + ) + + # Session store stub — _execute_mcp_reload writes a transcript message + # at the end; tests don't care about that side effect. + session_entry = SessionEntry( + session_key=build_session_key(_make_source()), + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = session_entry + runner.session_store.append_to_transcript = MagicMock() + + # Build N fake cached agents with stale `tools` + `valid_tool_names`. + runner._agent_cache = OrderedDict() + runner._agent_cache_lock = threading.Lock() + for i in range(num_agents): + stale_tool = { + "type": "function", + "function": {"name": f"stale_tool_{i}", "description": "old"}, + } + agent = SimpleNamespace( + tools=[stale_tool], + valid_tool_names={f"stale_tool_{i}"}, + enabled_toolsets=None, + disabled_toolsets=None, + ) + runner._agent_cache[f"session-{i}"] = (agent, f"sig-{i}") + + return runner + + +@pytest.mark.asyncio +async def test_reload_mcp_refreshes_cached_agent_tools(): + """After /reload-mcp succeeds, every cached agent gets its tool list + replaced with the freshly-discovered set.""" + runner = _make_runner_with_cached_agents(num_agents=3) + + # Snapshot the stale state so we can assert it changed. + pre_reload_tools = { + key: list(entry[0].tools) for key, entry in runner._agent_cache.items() + } + + # Fresh tools that get_tool_definitions() will return after the reload. + fresh_tool_defs = [ + { + "type": "function", + "function": {"name": "HassTurnOn", "description": "Turns on a device"}, + }, + { + "type": "function", + "function": {"name": "HassTurnOff", "description": "Turns off a device"}, + }, + ] + + with ( + patch("tools.mcp_tool.shutdown_mcp_servers"), + patch("tools.mcp_tool.discover_mcp_tools", return_value=["HassTurnOn", "HassTurnOff"]), + patch.dict("tools.mcp_tool._servers", {"homeassistant": object()}, clear=True), + patch("model_tools.get_tool_definitions", return_value=fresh_tool_defs), + ): + result = await runner._execute_mcp_reload(_make_event()) + + # The reload itself returned a status string (not an exception). + assert isinstance(result, str) + + # Every cached agent has fresh tools and the matching valid_tool_names. + expected_names = {"HassTurnOn", "HassTurnOff"} + for key, (agent, _sig) in runner._agent_cache.items(): + assert agent.tools == fresh_tool_defs, ( + f"Agent {key} kept stale tools: {agent.tools} != {fresh_tool_defs}" + ) + assert agent.valid_tool_names == expected_names, ( + f"Agent {key} kept stale valid_tool_names: {agent.valid_tool_names}" + ) + # Sanity check that the swap actually changed something. + assert agent.tools != pre_reload_tools[key] + + +@pytest.mark.asyncio +async def test_reload_mcp_handles_empty_agent_cache(): + """Reload with no cached agents (e.g. fresh gateway) must not raise.""" + runner = _make_runner_with_cached_agents(num_agents=0) + assert len(runner._agent_cache) == 0 + + with ( + patch("tools.mcp_tool.shutdown_mcp_servers"), + patch("tools.mcp_tool.discover_mcp_tools", return_value=[]), + patch.dict("tools.mcp_tool._servers", {}, clear=True), + patch("model_tools.get_tool_definitions", return_value=[]), + ): + result = await runner._execute_mcp_reload(_make_event()) + + assert isinstance(result, str) + + +@pytest.mark.asyncio +async def test_reload_mcp_preserves_per_agent_toolset_overrides(): + """If a cached agent was built with enabled_toolsets=["safe"], the + refresh must pass that same list to get_tool_definitions so the agent + doesn't silently gain disabled tools after a reload.""" + runner = _make_runner_with_cached_agents(num_agents=1) + # Override the toolsets on the cached agent. + agent, _sig = runner._agent_cache["session-0"] + agent.enabled_toolsets = ["safe"] + agent.disabled_toolsets = ["terminal"] + + captured_calls = [] + + def _capture_get_tool_definitions(**kwargs): + captured_calls.append(kwargs) + return [{"type": "function", "function": {"name": "refreshed"}}] + + with ( + patch("tools.mcp_tool.shutdown_mcp_servers"), + patch("tools.mcp_tool.discover_mcp_tools", return_value=["refreshed"]), + patch.dict("tools.mcp_tool._servers", {"homeassistant": object()}, clear=True), + patch("model_tools.get_tool_definitions", side_effect=_capture_get_tool_definitions), + ): + await runner._execute_mcp_reload(_make_event()) + + assert captured_calls, "get_tool_definitions was never called to refresh the cache" + assert captured_calls[0]["enabled_toolsets"] == ["safe"] + assert captured_calls[0]["disabled_toolsets"] == ["terminal"]