diff --git a/tests/tools/test_mcp_capability_gating.py b/tests/tools/test_mcp_capability_gating.py new file mode 100644 index 00000000000..b4f91d16bb2 --- /dev/null +++ b/tests/tools/test_mcp_capability_gating.py @@ -0,0 +1,158 @@ +"""Tests for capability-gated MCP tool discovery and keepalive. + +Prompt-only / resource-only MCP servers do not implement the ``tools/*`` +request family. Per the MCP spec, ``InitializeResult.capabilities.tools`` +is non-None iff the server supports it. Before this fix, Hermes always +called ``tools/list`` during discovery and as the keepalive probe — both +raised ``McpError(-32601 Method not found)`` against such servers, so a +prompt-only server could never stay connected. + +Ported from anomalyco/opencode#31271. +""" +import asyncio +from types import SimpleNamespace +from unittest.mock import AsyncMock + +import pytest + +from tools.mcp_tool import MCPServerTask + + +def _caps(tools=None, prompts=None, resources=None): + """Build a fake InitializeResult with the given capability sub-objects.""" + return SimpleNamespace( + capabilities=SimpleNamespace(tools=tools, prompts=prompts, resources=resources) + ) + + +class TestAdvertisesTools: + def test_true_when_tools_capability_present(self): + task = MCPServerTask("test") + task.initialize_result = _caps(tools=SimpleNamespace(listChanged=True)) + assert task._advertises_tools() is True + + def test_false_for_prompt_only_server(self): + task = MCPServerTask("test") + task.initialize_result = _caps(prompts=SimpleNamespace(listChanged=None)) + assert task._advertises_tools() is False + + def test_false_for_resource_only_server(self): + task = MCPServerTask("test") + task.initialize_result = _caps(resources=SimpleNamespace()) + assert task._advertises_tools() is False + + def test_legacy_fallback_no_initialize_result(self): + """No captured capabilities → preserve old always-list_tools behavior.""" + task = MCPServerTask("test") + assert task.initialize_result is None + assert task._advertises_tools() is True + + def test_legacy_fallback_no_capabilities_attr(self): + task = MCPServerTask("test") + task.initialize_result = SimpleNamespace() # no .capabilities + assert task._advertises_tools() is True + + +@pytest.mark.asyncio +class TestDiscoverToolsGating: + async def test_skips_list_tools_for_prompt_only_server(self): + task = MCPServerTask("test") + task.initialize_result = _caps(prompts=SimpleNamespace()) + task.session = SimpleNamespace(list_tools=AsyncMock()) + task._tools = ["stale"] + + await task._discover_tools() + + task.session.list_tools.assert_not_called() + assert task._tools == [] + + async def test_calls_list_tools_for_tool_capable_server(self): + task = MCPServerTask("test") + task.initialize_result = _caps(tools=SimpleNamespace()) + fake_tool = SimpleNamespace(name="echo") + task.session = SimpleNamespace( + list_tools=AsyncMock(return_value=SimpleNamespace(tools=[fake_tool])) + ) + + await task._discover_tools() + + task.session.list_tools.assert_awaited_once() + assert task._tools == [fake_tool] + + async def test_legacy_fallback_still_calls_list_tools(self): + task = MCPServerTask("test") + task.session = SimpleNamespace( + list_tools=AsyncMock(return_value=SimpleNamespace(tools=[])) + ) + + await task._discover_tools() + + task.session.list_tools.assert_awaited_once() + + +@pytest.mark.asyncio +class TestRefreshToolsGating: + async def test_refresh_noop_for_prompt_only_server(self): + task = MCPServerTask("test") + task.initialize_result = _caps(prompts=SimpleNamespace()) + task.session = SimpleNamespace(list_tools=AsyncMock()) + + await task._refresh_tools() + + task.session.list_tools.assert_not_called() + + +@pytest.mark.asyncio +class TestKeepaliveProbe: + async def _run_one_keepalive_cycle(self, task): + """Drive _wait_for_lifecycle_event through exactly one keepalive + timeout, then fire shutdown so it returns.""" + real_wait = asyncio.wait + cycles = {"n": 0} + + async def fake_wait(tasks, timeout=None, return_when=None): + cycles["n"] += 1 + if cycles["n"] == 1: + # Simulate keepalive timeout: nothing completed. + return set(), set(tasks) + # Second cycle: let shutdown win. + task._shutdown_event.set() + return await real_wait( + tasks, timeout=0.5, return_when=return_when or asyncio.FIRST_COMPLETED + ) + + import tools.mcp_tool as mcp_mod + orig = mcp_mod.asyncio.wait + mcp_mod.asyncio.wait = fake_wait + try: + return await task._wait_for_lifecycle_event() + finally: + mcp_mod.asyncio.wait = orig + + async def test_keepalive_uses_ping_for_prompt_only_server(self): + task = MCPServerTask("test") + task.initialize_result = _caps(prompts=SimpleNamespace()) + task.session = SimpleNamespace( + list_tools=AsyncMock(), + send_ping=AsyncMock(), + ) + + reason = await self._run_one_keepalive_cycle(task) + + assert reason == "shutdown" + task.session.send_ping.assert_awaited_once() + task.session.list_tools.assert_not_called() + + async def test_keepalive_uses_list_tools_for_tool_capable_server(self): + task = MCPServerTask("test") + task.initialize_result = _caps(tools=SimpleNamespace()) + task.session = SimpleNamespace( + list_tools=AsyncMock(return_value=SimpleNamespace(tools=[])), + send_ping=AsyncMock(), + ) + + reason = await self._run_one_keepalive_cycle(task) + + assert reason == "shutdown" + task.session.list_tools.assert_awaited_once() + task.session.send_ping.assert_not_called() diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index 9fe58eff818..0cc4907ea72 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -1202,6 +1202,26 @@ class MCPServerTask: """Check if this server uses HTTP transport.""" return "url" in self._config + def _advertises_tools(self) -> bool: + """Whether the server advertises the ``tools`` capability. + + Per the MCP spec, ``InitializeResult.capabilities.tools`` is non-None + iff the server implements the ``tools/*`` request family. Prompt-only + or resource-only servers omit it, and calling ``tools/list`` against + them raises ``McpError(-32601 Method not found)`` — which previously + killed the connection during discovery and made every keepalive fail. + (Ported from anomalyco/opencode#31271.) + + Returns True when no capability info was captured (legacy fallback: + preserve the old always-call-list_tools behavior rather than regress + any server that was working before this gate). + """ + init_result = self.initialize_result + caps = getattr(init_result, "capabilities", None) if init_result is not None else None + if caps is None: + return True + return getattr(caps, "tools", None) is not None + # ----- Dynamic tool discovery (notifications/tools/list_changed) ----- async def _refresh_tools_task(self): @@ -1273,6 +1293,12 @@ class MCPServerTask: """ from tools.registry import registry + if not self._advertises_tools(): + # A server that doesn't implement tools/* should never send + # tools/list_changed, but guard anyway — calling tools/list + # would raise McpError(-32601). + return + async with self._refresh_lock: # Capture old tool names for change diff old_tool_names = set(self._registered_tool_names) @@ -1360,12 +1386,22 @@ class MCPServerTask: # Timeout — no lifecycle event fired. Send a keepalive # to exercise the connection and detect stale sockets. + # Prompt-only / resource-only servers don't implement + # ``tools/list`` (McpError -32601), so use the universal + # ``ping`` request for them instead — otherwise every + # keepalive cycle would trigger a spurious reconnect. if self.session: try: - await asyncio.wait_for( - self.session.list_tools(), - timeout=30.0, - ) + if self._advertises_tools(): + await asyncio.wait_for( + self.session.list_tools(), + timeout=30.0, + ) + else: + await asyncio.wait_for( + self.session.send_ping(), + timeout=30.0, + ) except Exception as exc: logger.warning( "MCP server '%s' keepalive failed, " @@ -1778,9 +1814,25 @@ class MCPServerTask: ) async def _discover_tools(self): - """Discover tools from the connected session.""" + """Discover tools from the connected session. + + Capability-gated: prompt-only / resource-only MCP servers don't + implement ``tools/list``, and calling it raises ``McpError(-32601)``, + which previously aborted the connection — those servers could never + stay connected for their prompts/resources. Skip the call when the + server doesn't advertise the ``tools`` capability. + (Ported from anomalyco/opencode#31271.) + """ if self.session is None: return + if not self._advertises_tools(): + logger.info( + "MCP server '%s': does not advertise 'tools' capability — " + "skipping tools/list (prompts/resources remain available)", + self.name, + ) + self._tools = [] + return async with self._rpc_lock: tools_result = await self.session.list_tools() self._tools = (