mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(mcp): gate utility stubs on server-advertised capabilities (#21347)
For every connected MCP server we register four "utility" tool schemas (mcp_<server>_list_resources, read_resource, list_prompts, get_prompt). The existing gate was `hasattr(server.session, method)` — but `mcp.ClientSession` defines all four methods on the class regardless of what the remote server supports, so the gate never filtered anything. Tools-only servers (e.g. @upstash/context7-mcp which advertises only `tools`) ended up with 4 dead stubs; every model call to them returned JSON-RPC -32601 Method not found, which made the model conclude the server was broken even when the real tools worked. Capture the `InitializeResult` returned by `await session.initialize()` on the `MCPServerTask`, then gate each utility schema on the corresponding `capabilities` sub-object (resources / prompts). A legacy `hasattr` fallback runs when `initialize_result` is missing (older test fixtures / not-yet-captured code paths) so pre-existing behavior is preserved. Verified against real `mcp.types.InitializeResult` pydantic models: - Context7 shape (tools only) → 0 utility stubs registered (was 4) - Resources-only server → 2 stubs (list_resources, read_resource) - Prompts-only server → 2 stubs (list_prompts, get_prompt) - Fully capable server → all 4 stubs Closes #18051. Co-authored-by: nikolay-bratanov <nikolay-bratanov@users.noreply.github.com>
This commit is contained in:
parent
898b6d7d55
commit
74c9c0eec9
2 changed files with 240 additions and 13 deletions
175
tests/tools/test_mcp_utility_capability_gating.py
Normal file
175
tests/tools/test_mcp_utility_capability_gating.py
Normal file
|
|
@ -0,0 +1,175 @@
|
||||||
|
"""Regression tests for capability-gated MCP utility schema registration.
|
||||||
|
|
||||||
|
Background
|
||||||
|
==========
|
||||||
|
For every connected MCP server, hermes-agent used to register four "utility"
|
||||||
|
tool schemas (``mcp_<server>_list_resources``, ``read_resource``,
|
||||||
|
``list_prompts``, ``get_prompt``) regardless of whether the server actually
|
||||||
|
advertises those capabilities. The old gate used ``hasattr(server.session,
|
||||||
|
method)`` which always returned True because ``mcp.ClientSession`` defines
|
||||||
|
all four methods on the class — independent of what the remote server
|
||||||
|
supports.
|
||||||
|
|
||||||
|
Tools-only servers like ``@upstash/context7-mcp`` advertise
|
||||||
|
``{\"tools\": {\"listChanged\": true}}`` in their ``initialize`` response —
|
||||||
|
no ``prompts`` or ``resources`` keys — and they return JSON-RPC
|
||||||
|
``-32601 Method not found`` for ``prompts/list``, ``prompts/get``,
|
||||||
|
``resources/list``, ``resources/read``. The model would try the stubs,
|
||||||
|
get the error, and incorrectly conclude the MCP server was broken.
|
||||||
|
|
||||||
|
The fix captures the ``InitializeResult`` from
|
||||||
|
``await session.initialize()`` into ``MCPServerTask.initialize_result``
|
||||||
|
and gates utility schema registration on the advertised
|
||||||
|
``capabilities.resources`` / ``capabilities.prompts`` sub-objects. See
|
||||||
|
#18051 for the reporter's repro (Context7) and analysis.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _make_init_result(*, resources: bool, prompts: bool):
|
||||||
|
"""Build a fake ``InitializeResult`` whose ``capabilities`` sub-object
|
||||||
|
matches a server that advertises exactly the given capability set.
|
||||||
|
|
||||||
|
MCP spec shape: ``capabilities.resources`` / ``capabilities.prompts``
|
||||||
|
are non-None iff the server implements the corresponding request
|
||||||
|
family. We mirror that with ``SimpleNamespace`` because the real SDK
|
||||||
|
models are pydantic and we don't want the test to couple to pydantic
|
||||||
|
versioning.
|
||||||
|
"""
|
||||||
|
caps_attrs: dict = {"tools": SimpleNamespace(listChanged=True)}
|
||||||
|
caps_attrs["resources"] = SimpleNamespace(listChanged=True) if resources else None
|
||||||
|
caps_attrs["prompts"] = SimpleNamespace(listChanged=True) if prompts else None
|
||||||
|
return SimpleNamespace(capabilities=SimpleNamespace(**caps_attrs))
|
||||||
|
|
||||||
|
|
||||||
|
def _make_fake_server(*, initialize_result):
|
||||||
|
"""Build a stand-in ``MCPServerTask`` that exposes just the fields
|
||||||
|
``_select_utility_schemas`` inspects: ``name``, ``session``,
|
||||||
|
``initialize_result``.
|
||||||
|
|
||||||
|
A plain ``MCPServerTask`` uses ``__slots__`` and needs an asyncio
|
||||||
|
loop for the ``Event``/``Lock`` init — overkill for unit scope.
|
||||||
|
"""
|
||||||
|
server = MagicMock()
|
||||||
|
server.name = "test-server"
|
||||||
|
# session must satisfy the legacy ``hasattr`` fallback too
|
||||||
|
server.session = MagicMock(
|
||||||
|
spec=["list_resources", "read_resource", "list_prompts", "get_prompt"]
|
||||||
|
)
|
||||||
|
server.initialize_result = initialize_result
|
||||||
|
return server
|
||||||
|
|
||||||
|
|
||||||
|
def _handler_keys(selected):
|
||||||
|
return {entry["handler_key"] for entry in selected}
|
||||||
|
|
||||||
|
|
||||||
|
class TestCapabilityGatedRegistration:
|
||||||
|
def test_tools_only_server_gets_no_utility_schemas(self):
|
||||||
|
"""Context7-shaped server (tools only, no prompts / resources) should
|
||||||
|
get zero utility stubs registered — this is the exact scenario
|
||||||
|
from the #18051 bug report."""
|
||||||
|
from tools.mcp_tool import _select_utility_schemas
|
||||||
|
|
||||||
|
server = _make_fake_server(
|
||||||
|
initialize_result=_make_init_result(resources=False, prompts=False)
|
||||||
|
)
|
||||||
|
selected = _select_utility_schemas("context7", server, {})
|
||||||
|
assert _handler_keys(selected) == set(), (
|
||||||
|
f"tools-only server should have zero utility stubs, got "
|
||||||
|
f"{_handler_keys(selected)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_resources_only_server_gets_resource_stubs_only(self):
|
||||||
|
from tools.mcp_tool import _select_utility_schemas
|
||||||
|
|
||||||
|
server = _make_fake_server(
|
||||||
|
initialize_result=_make_init_result(resources=True, prompts=False)
|
||||||
|
)
|
||||||
|
selected = _select_utility_schemas("res-only", server, {})
|
||||||
|
assert _handler_keys(selected) == {"list_resources", "read_resource"}
|
||||||
|
|
||||||
|
def test_prompts_only_server_gets_prompt_stubs_only(self):
|
||||||
|
from tools.mcp_tool import _select_utility_schemas
|
||||||
|
|
||||||
|
server = _make_fake_server(
|
||||||
|
initialize_result=_make_init_result(resources=False, prompts=True)
|
||||||
|
)
|
||||||
|
selected = _select_utility_schemas("prompt-only", server, {})
|
||||||
|
assert _handler_keys(selected) == {"list_prompts", "get_prompt"}
|
||||||
|
|
||||||
|
def test_fully_capable_server_gets_all_four_stubs(self):
|
||||||
|
from tools.mcp_tool import _select_utility_schemas
|
||||||
|
|
||||||
|
server = _make_fake_server(
|
||||||
|
initialize_result=_make_init_result(resources=True, prompts=True)
|
||||||
|
)
|
||||||
|
selected = _select_utility_schemas("full", server, {})
|
||||||
|
assert _handler_keys(selected) == {
|
||||||
|
"list_resources", "read_resource", "list_prompts", "get_prompt",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TestConfigFilterStillApplies:
|
||||||
|
"""Per-server config flags ``tools.resources: false`` / ``tools.prompts: false``
|
||||||
|
must continue to override even when the server DOES advertise the capability."""
|
||||||
|
|
||||||
|
def test_config_disables_resources_even_when_advertised(self):
|
||||||
|
from tools.mcp_tool import _select_utility_schemas
|
||||||
|
|
||||||
|
server = _make_fake_server(
|
||||||
|
initialize_result=_make_init_result(resources=True, prompts=True)
|
||||||
|
)
|
||||||
|
selected = _select_utility_schemas(
|
||||||
|
"full-but-filtered",
|
||||||
|
server,
|
||||||
|
{"tools": {"resources": False}},
|
||||||
|
)
|
||||||
|
assert _handler_keys(selected) == {"list_prompts", "get_prompt"}
|
||||||
|
|
||||||
|
def test_config_disables_prompts_even_when_advertised(self):
|
||||||
|
from tools.mcp_tool import _select_utility_schemas
|
||||||
|
|
||||||
|
server = _make_fake_server(
|
||||||
|
initialize_result=_make_init_result(resources=True, prompts=True)
|
||||||
|
)
|
||||||
|
selected = _select_utility_schemas(
|
||||||
|
"full-but-filtered",
|
||||||
|
server,
|
||||||
|
{"tools": {"prompts": False}},
|
||||||
|
)
|
||||||
|
assert _handler_keys(selected) == {"list_resources", "read_resource"}
|
||||||
|
|
||||||
|
|
||||||
|
class TestLegacyFallback:
|
||||||
|
"""When ``initialize_result`` is missing (older test fixtures or code
|
||||||
|
paths that haven't captured it yet), fall back to the legacy hasattr
|
||||||
|
check so pre-existing tests and servers keep working."""
|
||||||
|
|
||||||
|
def test_no_initialize_result_falls_back_to_hasattr_check(self):
|
||||||
|
from tools.mcp_tool import _select_utility_schemas
|
||||||
|
|
||||||
|
server = _make_fake_server(initialize_result=None)
|
||||||
|
# With the legacy fallback, session.spec includes all four methods,
|
||||||
|
# so all four stubs should register (old behavior).
|
||||||
|
selected = _select_utility_schemas("legacy", server, {})
|
||||||
|
assert _handler_keys(selected) == {
|
||||||
|
"list_resources", "read_resource", "list_prompts", "get_prompt",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_no_initialize_result_respects_session_spec(self):
|
||||||
|
"""Legacy fallback still filters by ``hasattr(session, method)``, so
|
||||||
|
a session whose spec lacks a method is correctly skipped."""
|
||||||
|
from tools.mcp_tool import _select_utility_schemas
|
||||||
|
|
||||||
|
server = _make_fake_server(initialize_result=None)
|
||||||
|
# Override session to a spec that only has list_resources
|
||||||
|
server.session = MagicMock(spec=["list_resources"])
|
||||||
|
selected = _select_utility_schemas("legacy-partial", server, {})
|
||||||
|
assert _handler_keys(selected) == {"list_resources"}
|
||||||
|
|
@ -950,6 +950,7 @@ class MCPServerTask:
|
||||||
"_tools", "_error", "_config",
|
"_tools", "_error", "_config",
|
||||||
"_sampling", "_registered_tool_names", "_auth_type", "_refresh_lock",
|
"_sampling", "_registered_tool_names", "_auth_type", "_refresh_lock",
|
||||||
"_rpc_lock", "_pending_refresh_tasks",
|
"_rpc_lock", "_pending_refresh_tasks",
|
||||||
|
"initialize_result",
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, name: str):
|
def __init__(self, name: str):
|
||||||
|
|
@ -980,6 +981,12 @@ class MCPServerTask:
|
||||||
# transports for conservative per-server ordering.
|
# transports for conservative per-server ordering.
|
||||||
self._rpc_lock = asyncio.Lock()
|
self._rpc_lock = asyncio.Lock()
|
||||||
self._pending_refresh_tasks: set[asyncio.Task] = set()
|
self._pending_refresh_tasks: set[asyncio.Task] = set()
|
||||||
|
# Captures the ``InitializeResult`` returned by
|
||||||
|
# ``await session.initialize()`` so downstream code can inspect the
|
||||||
|
# server's real advertised capabilities (``.capabilities.resources``,
|
||||||
|
# ``.capabilities.prompts``) instead of assuming every ``ClientSession``
|
||||||
|
# method attribute corresponds to a supported server method. See #18051.
|
||||||
|
self.initialize_result: Optional[Any] = None
|
||||||
|
|
||||||
def _is_http(self) -> bool:
|
def _is_http(self) -> bool:
|
||||||
"""Check if this server uses HTTP transport."""
|
"""Check if this server uses HTTP transport."""
|
||||||
|
|
@ -1225,7 +1232,7 @@ class MCPServerTask:
|
||||||
async with ClientSession(
|
async with ClientSession(
|
||||||
read_stream, write_stream, **sampling_kwargs
|
read_stream, write_stream, **sampling_kwargs
|
||||||
) as session:
|
) as session:
|
||||||
await session.initialize()
|
self.initialize_result = await session.initialize()
|
||||||
self.session = session
|
self.session = session
|
||||||
await self._discover_tools()
|
await self._discover_tools()
|
||||||
self._ready.set()
|
self._ready.set()
|
||||||
|
|
@ -1324,7 +1331,7 @@ class MCPServerTask:
|
||||||
async with ClientSession(
|
async with ClientSession(
|
||||||
read_stream, write_stream, **sampling_kwargs
|
read_stream, write_stream, **sampling_kwargs
|
||||||
) as session:
|
) as session:
|
||||||
await session.initialize()
|
self.initialize_result = await session.initialize()
|
||||||
self.session = session
|
self.session = session
|
||||||
await self._discover_tools()
|
await self._discover_tools()
|
||||||
self._ready.set()
|
self._ready.set()
|
||||||
|
|
@ -1371,7 +1378,7 @@ class MCPServerTask:
|
||||||
read_stream, write_stream, _get_session_id,
|
read_stream, write_stream, _get_session_id,
|
||||||
):
|
):
|
||||||
async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
|
async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
|
||||||
await session.initialize()
|
self.initialize_result = await session.initialize()
|
||||||
self.session = session
|
self.session = session
|
||||||
await self._discover_tools()
|
await self._discover_tools()
|
||||||
self._ready.set()
|
self._ready.set()
|
||||||
|
|
@ -1394,7 +1401,7 @@ class MCPServerTask:
|
||||||
read_stream, write_stream, _get_session_id,
|
read_stream, write_stream, _get_session_id,
|
||||||
):
|
):
|
||||||
async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
|
async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
|
||||||
await session.initialize()
|
self.initialize_result = await session.initialize()
|
||||||
self.session = session
|
self.session = session
|
||||||
await self._discover_tools()
|
await self._discover_tools()
|
||||||
self._ready.set()
|
self._ready.set()
|
||||||
|
|
@ -2806,6 +2813,23 @@ _UTILITY_CAPABILITY_METHODS = {
|
||||||
"get_prompt": "get_prompt",
|
"get_prompt": "get_prompt",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Maps each utility handler to the MCP capability key that must be non-None
|
||||||
|
# on the server's ``initialize`` response for the handler to be registered.
|
||||||
|
# Source of truth: MCP spec — capabilities.resources / capabilities.prompts
|
||||||
|
# are present on the response only when the server actually implements
|
||||||
|
# those request families. Without this gate, tools-only servers (e.g.
|
||||||
|
# Context7 @upstash/context7-mcp, which advertises only ``tools``) had
|
||||||
|
# all four utility stubs registered and every model call to them came
|
||||||
|
# back with JSON-RPC ``-32601 Method not found``, which made the model
|
||||||
|
# conclude the server was broken even when the real tools worked. See
|
||||||
|
# #18051.
|
||||||
|
_UTILITY_CAPABILITY_ATTRS = {
|
||||||
|
"list_resources": "resources",
|
||||||
|
"read_resource": "resources",
|
||||||
|
"list_prompts": "prompts",
|
||||||
|
"get_prompt": "prompts",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _select_utility_schemas(server_name: str, server: MCPServerTask, config: dict) -> List[dict]:
|
def _select_utility_schemas(server_name: str, server: MCPServerTask, config: dict) -> List[dict]:
|
||||||
"""Select utility schemas based on config and server capabilities."""
|
"""Select utility schemas based on config and server capabilities."""
|
||||||
|
|
@ -2813,6 +2837,16 @@ def _select_utility_schemas(server_name: str, server: MCPServerTask, config: dic
|
||||||
resources_enabled = _parse_boolish(tools_filter.get("resources"), default=True)
|
resources_enabled = _parse_boolish(tools_filter.get("resources"), default=True)
|
||||||
prompts_enabled = _parse_boolish(tools_filter.get("prompts"), default=True)
|
prompts_enabled = _parse_boolish(tools_filter.get("prompts"), default=True)
|
||||||
|
|
||||||
|
# ``initialize_result.capabilities`` is the source of truth: its sub-objects
|
||||||
|
# (``resources``, ``prompts``) are non-None iff the server advertises that
|
||||||
|
# request family. ``hasattr(server.session, ...)`` was the old gate but
|
||||||
|
# ClientSession always has the four method attributes defined on the class,
|
||||||
|
# so it never filtered anything.
|
||||||
|
advertised_caps = None
|
||||||
|
init_result = getattr(server, "initialize_result", None)
|
||||||
|
if init_result is not None:
|
||||||
|
advertised_caps = getattr(init_result, "capabilities", None)
|
||||||
|
|
||||||
selected: List[dict] = []
|
selected: List[dict] = []
|
||||||
for entry in _build_utility_schemas(server_name):
|
for entry in _build_utility_schemas(server_name):
|
||||||
handler_key = entry["handler_key"]
|
handler_key = entry["handler_key"]
|
||||||
|
|
@ -2823,15 +2857,33 @@ def _select_utility_schemas(server_name: str, server: MCPServerTask, config: dic
|
||||||
logger.debug("MCP server '%s': skipping utility '%s' (prompts disabled)", server_name, handler_key)
|
logger.debug("MCP server '%s': skipping utility '%s' (prompts disabled)", server_name, handler_key)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
required_method = _UTILITY_CAPABILITY_METHODS[handler_key]
|
# Preferred gate: check the server's advertised capabilities. Skip
|
||||||
if not hasattr(server.session, required_method):
|
# if the capability is explicitly not advertised.
|
||||||
logger.debug(
|
if advertised_caps is not None:
|
||||||
"MCP server '%s': skipping utility '%s' (session lacks %s)",
|
cap_attr = _UTILITY_CAPABILITY_ATTRS[handler_key]
|
||||||
server_name,
|
if getattr(advertised_caps, cap_attr, None) is None:
|
||||||
handler_key,
|
logger.debug(
|
||||||
required_method,
|
"MCP server '%s': skipping utility '%s' "
|
||||||
)
|
"(server does not advertise '%s' capability)",
|
||||||
continue
|
server_name,
|
||||||
|
handler_key,
|
||||||
|
cap_attr,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Legacy fallback for test fixtures or older code paths where
|
||||||
|
# initialize_result wasn't captured. Preserves the old behavior
|
||||||
|
# of registering every stub in that case rather than regressing
|
||||||
|
# any server that was working before this fix.
|
||||||
|
required_method = _UTILITY_CAPABILITY_METHODS[handler_key]
|
||||||
|
if not hasattr(server.session, required_method):
|
||||||
|
logger.debug(
|
||||||
|
"MCP server '%s': skipping utility '%s' (session lacks %s)",
|
||||||
|
server_name,
|
||||||
|
handler_key,
|
||||||
|
required_method,
|
||||||
|
)
|
||||||
|
continue
|
||||||
selected.append(entry)
|
selected.append(entry)
|
||||||
return selected
|
return selected
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue