fix(gateway): refresh cached agent tools on /reload-mcp

When the gateway processes /reload-mcp, it reconnects MCP servers and
updates the global _servers registry, but cached AIAgent instances in
_agent_cache keep the tools list they were built with. The user had to
also run /new (discarding conversation history) before the agent could
see the new tools — even though /reload-mcp had succeeded.

This patch refreshes each cached agent's .tools and .valid_tool_names
in _execute_mcp_reload after discovery returns, so existing sessions
pick up new MCP tools on their next turn. The slash-confirm gate in
_handle_reload_mcp_command already obtains user consent for the
implied prompt-cache invalidation before this code runs.

Mirrors the equivalent behaviour the CLI already does in cli.py
_reload_mcp. Per-agent enabled_toolsets and disabled_toolsets are
preserved so an agent that was scoped to a subset of toolsets does
not silently gain disabled tools after the reload.

Original diagnosis + initial implementation in #23812 from @fujinice.
The auto-reload watcher half of that PR is intentionally dropped —
users want /reload-mcp to remain explicit.

Co-authored-by: fujinice <45688690+fujinice@users.noreply.github.com>
This commit is contained in:
teknium1 2026-05-26 14:16:29 -07:00 committed by Teknium
parent 556bf7c5c1
commit f05a47309e
3 changed files with 211 additions and 0 deletions

View file

@ -13339,6 +13339,40 @@ class GatewayRunner:
else:
lines.append(t("gateway.reload_mcp.tools_available", tools=len(new_tools), servers=len(connected_servers)))
# Refresh cached agents so existing sessions see new MCP tools on
# their next turn — without this, the user has to `/new` (which
# discards conversation history) to pick up tools from a server
# that was just added or reconnected. The user has already
# consented to the prompt-cache invalidation via the slash-confirm
# gate in _handle_reload_mcp_command before we reach this point.
try:
from model_tools import get_tool_definitions
_cache = getattr(self, "_agent_cache", None)
_cache_lock = getattr(self, "_agent_cache_lock", None)
if _cache_lock is not None and _cache:
with _cache_lock:
for _sess_key, _entry in list(_cache.items()):
try:
_agent = _entry[0] if isinstance(_entry, tuple) else _entry
except Exception:
continue
if _agent is None:
continue
new_defs = get_tool_definitions(
enabled_toolsets=getattr(_agent, "enabled_toolsets", None),
disabled_toolsets=getattr(_agent, "disabled_toolsets", None),
quiet_mode=True,
)
_agent.tools = new_defs
_agent.valid_tool_names = {
t["function"]["name"] for t in new_defs
} if new_defs else set()
except Exception as _exc:
logger.debug(
"Failed to update cached agent tools after MCP reload: %s",
_exc,
)
# Inject a message at the END of the session history so the
# model knows tools changed on its next turn. Appended after
# all existing messages to preserve prompt-cache for the prefix.

View file

@ -46,6 +46,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"9592417+adam91holt@users.noreply.github.com": "adam91holt",
"45688690+fujinice@users.noreply.github.com": "fujinice",
# teknium (multiple emails)
"teknium1@gmail.com": "teknium1",
"kenyon1977@gmail.com": "kenyonxu",

View file

@ -0,0 +1,176 @@
"""Regression test for /reload-mcp refreshing cached agent tool lists.
Before this fix, the gateway's _execute_mcp_reload reconnected MCP servers
and updated the global _servers registry, but cached AIAgent instances kept
their original tools list. Users had to run /new (discarding conversation
history) for the agent to pick up the new tools.
This test exercises _execute_mcp_reload directly with mocked MCP discovery
and asserts that every cached agent's `tools` and `valid_tool_names`
attributes are overwritten with the freshly-discovered tool set.
"""
from __future__ import annotations
from collections import OrderedDict
from datetime import datetime
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent
from gateway.session import SessionEntry, SessionSource, build_session_key
def _make_source() -> SessionSource:
return SessionSource(
platform=Platform.TELEGRAM,
user_id="u1",
chat_id="c1",
user_name="tester",
chat_type="dm",
)
def _make_event() -> MessageEvent:
return MessageEvent(text="/reload-mcp", source=_make_source(), message_id="m1")
def _make_runner_with_cached_agents(num_agents: int = 2):
"""Build a bare GatewayRunner with `num_agents` fake cached agents."""
import threading
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner.config = GatewayConfig(
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
)
# Session store stub — _execute_mcp_reload writes a transcript message
# at the end; tests don't care about that side effect.
session_entry = SessionEntry(
session_key=build_session_key(_make_source()),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
)
runner.session_store = MagicMock()
runner.session_store.get_or_create_session.return_value = session_entry
runner.session_store.append_to_transcript = MagicMock()
# Build N fake cached agents with stale `tools` + `valid_tool_names`.
runner._agent_cache = OrderedDict()
runner._agent_cache_lock = threading.Lock()
for i in range(num_agents):
stale_tool = {
"type": "function",
"function": {"name": f"stale_tool_{i}", "description": "old"},
}
agent = SimpleNamespace(
tools=[stale_tool],
valid_tool_names={f"stale_tool_{i}"},
enabled_toolsets=None,
disabled_toolsets=None,
)
runner._agent_cache[f"session-{i}"] = (agent, f"sig-{i}")
return runner
@pytest.mark.asyncio
async def test_reload_mcp_refreshes_cached_agent_tools():
"""After /reload-mcp succeeds, every cached agent gets its tool list
replaced with the freshly-discovered set."""
runner = _make_runner_with_cached_agents(num_agents=3)
# Snapshot the stale state so we can assert it changed.
pre_reload_tools = {
key: list(entry[0].tools) for key, entry in runner._agent_cache.items()
}
# Fresh tools that get_tool_definitions() will return after the reload.
fresh_tool_defs = [
{
"type": "function",
"function": {"name": "HassTurnOn", "description": "Turns on a device"},
},
{
"type": "function",
"function": {"name": "HassTurnOff", "description": "Turns off a device"},
},
]
with (
patch("tools.mcp_tool.shutdown_mcp_servers"),
patch("tools.mcp_tool.discover_mcp_tools", return_value=["HassTurnOn", "HassTurnOff"]),
patch.dict("tools.mcp_tool._servers", {"homeassistant": object()}, clear=True),
patch("model_tools.get_tool_definitions", return_value=fresh_tool_defs),
):
result = await runner._execute_mcp_reload(_make_event())
# The reload itself returned a status string (not an exception).
assert isinstance(result, str)
# Every cached agent has fresh tools and the matching valid_tool_names.
expected_names = {"HassTurnOn", "HassTurnOff"}
for key, (agent, _sig) in runner._agent_cache.items():
assert agent.tools == fresh_tool_defs, (
f"Agent {key} kept stale tools: {agent.tools} != {fresh_tool_defs}"
)
assert agent.valid_tool_names == expected_names, (
f"Agent {key} kept stale valid_tool_names: {agent.valid_tool_names}"
)
# Sanity check that the swap actually changed something.
assert agent.tools != pre_reload_tools[key]
@pytest.mark.asyncio
async def test_reload_mcp_handles_empty_agent_cache():
"""Reload with no cached agents (e.g. fresh gateway) must not raise."""
runner = _make_runner_with_cached_agents(num_agents=0)
assert len(runner._agent_cache) == 0
with (
patch("tools.mcp_tool.shutdown_mcp_servers"),
patch("tools.mcp_tool.discover_mcp_tools", return_value=[]),
patch.dict("tools.mcp_tool._servers", {}, clear=True),
patch("model_tools.get_tool_definitions", return_value=[]),
):
result = await runner._execute_mcp_reload(_make_event())
assert isinstance(result, str)
@pytest.mark.asyncio
async def test_reload_mcp_preserves_per_agent_toolset_overrides():
"""If a cached agent was built with enabled_toolsets=["safe"], the
refresh must pass that same list to get_tool_definitions so the agent
doesn't silently gain disabled tools after a reload."""
runner = _make_runner_with_cached_agents(num_agents=1)
# Override the toolsets on the cached agent.
agent, _sig = runner._agent_cache["session-0"]
agent.enabled_toolsets = ["safe"]
agent.disabled_toolsets = ["terminal"]
captured_calls = []
def _capture_get_tool_definitions(**kwargs):
captured_calls.append(kwargs)
return [{"type": "function", "function": {"name": "refreshed"}}]
with (
patch("tools.mcp_tool.shutdown_mcp_servers"),
patch("tools.mcp_tool.discover_mcp_tools", return_value=["refreshed"]),
patch.dict("tools.mcp_tool._servers", {"homeassistant": object()}, clear=True),
patch("model_tools.get_tool_definitions", side_effect=_capture_get_tool_definitions),
):
await runner._execute_mcp_reload(_make_event())
assert captured_calls, "get_tool_definitions was never called to refresh the cache"
assert captured_calls[0]["enabled_toolsets"] == ["safe"]
assert captured_calls[0]["disabled_toolsets"] == ["terminal"]