fix(anthropic): also normalize MCP-server tool names to mcp__ on OAuth wire

The double-underscore prefix swap fixed bare native tools but SKIPPED tools
already named mcp_<server>_<tool> (real MCP servers, e.g. mcp_linear_get_issue):
they went on the OAuth wire single-underscore and still tripped Anthropic's
third-party billing classifier -> HTTP 400 'extra usage, not plan limits'.
Verified empirically against a live Max subscription: a single mcp_ tool flips
the whole request to the extra-usage lane; mcp__ is accepted.

- build_anthropic_kwargs: promote ANY leading single-underscore mcp_ to mcp__
  (bare names -> mcp__name; mcp_<server>_<tool> -> mcp__<server>_<tool>),
  never double-prefixing an already-mcp__ name. Same for tool_use blocks in
  history.
- normalize_response: reverse the mcp__ wire name back to whichever original
  the registry knows — the single-underscore mcp_<server>_<tool> form for MCP
  server tools, or the bare name for native tools — preferring a name that
  already resolves natively.
- Tests rewritten to assert the invariant: ZERO single-underscore mcp_ names
  reach the OAuth wire, and the mcp__ round-trip resolves back to the
  registered name for both native and MCP-server tools.

Builds on liuhao1024's mcp__ prefix commit (cherry-picked). Closes the
MCP-server gap that left any session with an MCP server configured still
billing to extra usage.
This commit is contained in:
kshitijk4poor 2026-06-17 13:20:29 +05:30
parent 3d37869295
commit b70a4e7533
3 changed files with 131 additions and 114 deletions

View file

@ -2349,28 +2349,46 @@ def build_anthropic_kwargs(
text = text.replace("Nous Research", "Anthropic")
block["text"] = text
# 3. Prefix tool names with mcp__ (Claude Code convention)
# Skip names that already begin with the marker — native MCP server
# tools (from mcp_servers: in config.yaml) are registered under their
# full mcp_<server>_<tool> name and would double-prefix otherwise,
# breaking round-trip registry lookup in normalize_response. GH-25255.
# Check both single-underscore (legacy MCP) and double-underscore
# (new prefix) to avoid double-prefixing either format.
_MCP_SKIP = ("mcp__", "mcp_")
# 3. Normalize tool names so NOTHING goes on the OAuth wire with a
# single-underscore ``mcp_`` prefix. Anthropic's subscription/OAuth
# billing classifier treats a single-underscore ``mcp_`` tool name as
# a third-party-app fingerprint and rejects the request with HTTP 400
# "Third-party apps now draw from extra usage, not plan limits"
# (verified empirically: a single ``mcp_foo`` tool flips a request
# from plan-billing to the extra-usage lane; ``mcp__foo`` is accepted).
#
# Two cases, both must land on the double-underscore ``mcp__`` form:
# a) bare Hermes-native tools (``read_file``) -> ``mcp__read_file``
# b) native MCP server tools registered under their full
# single-underscore ``mcp_<server>_<tool>`` name
# (``mcp_linear_get_issue``) -> ``mcp__linear_get_issue``
# Case (b) is the gap that the bare ``mcp_``->``mcp__`` constant swap
# left open: those tools were *skipped* and stayed single-underscore,
# so any session with an MCP server configured still tripped the
# classifier. normalize_response reverses both forms via registry
# lookup so the dispatcher still sees the original name. GH-25255.
def _to_oauth_wire_name(name: str) -> str:
if name.startswith("mcp__"):
return name # already correct, don't double-prefix
if name.startswith("mcp_"):
# single-underscore native MCP tool -> promote to double
return "mcp__" + name[len("mcp_"):]
return _MCP_TOOL_PREFIX + name # bare name -> mcp__<name>
if anthropic_tools:
for tool in anthropic_tools:
if "name" in tool and not tool["name"].startswith(_MCP_SKIP):
tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
if "name" in tool:
tool["name"] = _to_oauth_wire_name(tool["name"])
# 4. Prefix tool names in message history (tool_use and tool_result blocks)
# 4. Apply the same normalization to tool names in message history
# (tool_use blocks) so replayed turns match the wire names above.
for msg in anthropic_messages:
content = msg.get("content")
if isinstance(content, list):
for block in content:
if isinstance(block, dict):
if block.get("type") == "tool_use" and "name" in block:
if not block["name"].startswith(_MCP_SKIP):
block["name"] = _MCP_TOOL_PREFIX + block["name"]
block["name"] = _to_oauth_wire_name(block["name"])
elif block.get("type") == "tool_result" and "tool_use_id" in block:
pass # tool_result uses ID, not name

View file

@ -132,17 +132,25 @@ class AnthropicTransport(ProviderTransport):
elif block.type == "tool_use":
name = block.name
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
stripped = name[len(_MCP_PREFIX):]
# Only strip the mcp_ prefix for OAuth-injected tools
# (where Hermes adds the prefix when sending to Anthropic
# and must remove it on the way back). Native MCP server
# tools (from mcp_servers: in config.yaml) are registered
# in the tool registry under their FULL mcp_<server>_<tool>
# name and must NOT be stripped. GH-25255.
# On the OAuth wire every tool carries a double-underscore
# ``mcp__`` prefix (added in build_anthropic_kwargs to avoid
# Anthropic's single-underscore third-party classifier).
# Reverse it back to the name the registry/dispatcher knows.
# Two original forms map onto the same ``mcp__`` wire name:
# ``mcp__read_file`` <- bare native tool ``read_file``
# ``mcp__linear_get_issue`` <- MCP server tool
# ``mcp_linear_get_issue``
# Resolve by registry lookup, preferring whichever original
# is actually registered; never rewrite a name the LLM used
# that already resolves natively. GH-25255.
from tools.registry import registry as _tool_registry
if (_tool_registry.get_entry(stripped)
and not _tool_registry.get_entry(name)):
name = stripped
if not _tool_registry.get_entry(name):
bare = name[len(_MCP_PREFIX):] # read_file
single = "mcp_" + bare # mcp_read_file / mcp_linear_get_issue
if _tool_registry.get_entry(single):
name = single
elif _tool_registry.get_entry(bare):
name = bare
tool_calls.append(
ToolCall(
id=block.id,

View file

@ -1,9 +1,16 @@
"""Tests for GH-25255: Anthropic OAuth mcp__ prefix stripping.
"""Tests for GH-25255: Anthropic OAuth ``mcp__`` tool-name round-trip.
When strip_tool_prefix=True (Anthropic OAuth path), the transport must only
strip the ``mcp__`` prefix from OAuth-injected tools, NOT from Hermes-native
MCP server tools that are registered under their full ``mcp_<server>_<tool>``
name in the tool registry.
Anthropic's subscription/OAuth billing classifier treats a **single-underscore**
``mcp_`` tool name as a third-party-app fingerprint and rejects the request with
HTTP 400 "Third-party apps now draw from extra usage, not plan limits". So on
the OAuth wire NOTHING may carry a single-underscore ``mcp_`` prefix:
* bare native tools ``read_file`` -> ``mcp__read_file``
* native MCP server tools ``mcp_linear_get_issue`` -> ``mcp__linear_get_issue``
``normalize_response`` reverses the ``mcp__`` wire name back to whatever the tool
registry knows (the single-underscore ``mcp_<server>_<tool>`` form for MCP server
tools, or the bare name for native tools) so the dispatcher is unaffected.
"""
from __future__ import annotations
@ -12,7 +19,6 @@ from types import SimpleNamespace
from unittest.mock import patch
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
@ -38,7 +44,7 @@ def _make_response(*blocks, stop_reason="end_turn"):
class _FakeRegistry:
"""Minimal fake tool registry for testing prefix stripping logic."""
"""Minimal fake tool registry for testing prefix round-trip logic."""
def __init__(self, registered_names: set[str]):
self._names = registered_names
@ -50,23 +56,18 @@ class _FakeRegistry:
# ---------------------------------------------------------------------------
# Tests
# Response side: mcp__ wire name -> registry name
# ---------------------------------------------------------------------------
class TestAnthropicMcpPrefixStrip:
"""Verify that strip_tool_prefix only strips OAuth-injected prefixes."""
"""Verify strip_tool_prefix reverses the ``mcp__`` wire prefix correctly."""
def _get_transport(self):
from agent.transports.anthropic import AnthropicTransport
return AnthropicTransport()
def test_strips_prefix_for_oauth_injected_tool(self):
"""OAuth tools: mcp__read_file -> read_file (stripped).
The tool was registered as 'read_file' in the registry.
Anthropic sees 'mcp__read_file' because Hermes adds the prefix.
On response, we must strip it back to 'read_file'.
"""
def test_strips_prefix_for_oauth_injected_native_tool(self):
"""``mcp__read_file`` -> ``read_file`` (bare native tool)."""
transport = self._get_transport()
block = _make_tool_use_block("mcp__read_file")
response = _make_response(block)
@ -78,26 +79,24 @@ class TestAnthropicMcpPrefixStrip:
assert len(result.tool_calls) == 1
assert result.tool_calls[0].name == "read_file"
def test_preserves_native_mcp_server_tool_name(self):
"""Native MCP tools: mcp_composio_SEARCH -> mcp_composio_SEARCH (kept).
def test_restores_single_underscore_mcp_server_tool(self):
"""``mcp__linear_get_issue`` -> ``mcp_linear_get_issue`` (MCP server tool).
The tool is registered with the full mcp_ prefix in the registry.
Stripping would break registry lookup.
MCP server tools are registered under their full single-underscore
``mcp_<server>_<tool>`` name, but they MUST go on the OAuth wire as
double-underscore to dodge the classifier. The response side restores
the single-underscore registry name so dispatch still resolves.
"""
transport = self._get_transport()
block = _make_tool_use_block("mcp_composio_COMPOSIO_SEARCH_TOOLS")
block = _make_tool_use_block("mcp__linear_get_issue")
response = _make_response(block)
registry = _FakeRegistry({
"mcp_composio_COMPOSIO_SEARCH_TOOLS",
"mcp_composio_COMPOSIO_GET_TOOL_SCHEMAS",
"read_file",
})
registry = _FakeRegistry({"mcp_linear_get_issue", "read_file"})
with patch("tools.registry.registry", registry):
result = transport.normalize_response(response, strip_tool_prefix=True)
assert len(result.tool_calls) == 1
assert result.tool_calls[0].name == "mcp_composio_COMPOSIO_SEARCH_TOOLS"
assert result.tool_calls[0].name == "mcp_linear_get_issue"
def test_no_strip_when_flag_false(self):
"""When strip_tool_prefix=False, names are never modified."""
@ -113,7 +112,7 @@ class TestAnthropicMcpPrefixStrip:
assert result.tool_calls[0].name == "mcp__read_file"
def test_no_strip_when_not_mcp_prefixed(self):
"""Non-mcp__ names are untouched regardless of strip flag."""
"""Non-``mcp__`` names are untouched regardless of strip flag."""
transport = self._get_transport()
block = _make_tool_use_block("web_search")
response = _make_response(block)
@ -125,49 +124,41 @@ class TestAnthropicMcpPrefixStrip:
assert len(result.tool_calls) == 1
assert result.tool_calls[0].name == "web_search"
def test_preserves_name_when_neither_in_registry(self):
"""When neither stripped nor full name is in registry, keep full name.
def test_preserves_name_when_no_original_in_registry(self):
"""Neither the single-underscore nor bare original is registered.
Safety fallback: if we can't determine the type, prefer the full name
since it's what the LLM was told about.
Safety fallback: keep the full ``mcp__`` name the LLM was told about.
"""
transport = self._get_transport()
block = _make_tool_use_block("mcp__unknown_tool")
response = _make_response(block)
registry = _FakeRegistry({"read_file"}) # neither name registered
registry = _FakeRegistry({"read_file"}) # no matching original
with patch("tools.registry.registry", registry):
result = transport.normalize_response(response, strip_tool_prefix=True)
assert len(result.tool_calls) == 1
assert result.tool_calls[0].name == "mcp__unknown_tool"
def test_mixed_tools_same_response(self):
"""Both OAuth and native MCP tools in the same response."""
def test_mixed_native_and_mcp_server_tools_same_response(self):
"""A bare native tool and an MCP server tool, both wired as ``mcp__``."""
transport = self._get_transport()
block1 = _make_tool_use_block("mcp__read_file", block_id="tc_1")
block2 = _make_tool_use_block("mcp_composio_SEARCH", block_id="tc_2")
block3 = _make_tool_use_block("mcp_composio_SEARCH", block_id="tc_3") # also registered natively
response = _make_response(block1, block2, block3)
block2 = _make_tool_use_block("mcp__linear_get_issue", block_id="tc_2")
response = _make_response(block1, block2)
registry = _FakeRegistry({
"read_file", # OAuth-injected
"mcp_composio_SEARCH", # native MCP
})
registry = _FakeRegistry({"read_file", "mcp_linear_get_issue"})
with patch("tools.registry.registry", registry):
result = transport.normalize_response(response, strip_tool_prefix=True)
assert len(result.tool_calls) == 3
# OAuth tool: stripped
assert len(result.tool_calls) == 2
assert result.tool_calls[0].name == "read_file"
# Native MCP: preserved (doesn't start with mcp__ so never enters strip path)
assert result.tool_calls[1].name == "mcp_composio_SEARCH"
assert result.tool_calls[2].name == "mcp_composio_SEARCH"
assert result.tool_calls[1].name == "mcp_linear_get_issue"
def test_both_stripped_and_full_registered_prefers_full(self):
"""Edge case: both 'foo' and 'mcp__foo' exist in registry.
def test_prefers_full_wire_name_when_it_resolves_directly(self):
"""If the ``mcp__`` wire name itself is registered, keep it as-is.
Keep 'mcp__foo' (the original name) since it's what the LLM requested.
Defensive: never rewrite a name that already resolves natively.
"""
transport = self._get_transport()
block = _make_tool_use_block("mcp__foo")
@ -178,31 +169,16 @@ class TestAnthropicMcpPrefixStrip:
result = transport.normalize_response(response, strip_tool_prefix=True)
assert len(result.tool_calls) == 1
# Both exist — the condition `get_entry(stripped) and not get_entry(name)`
# is False because get_entry(name) IS truthy, so we keep the full name.
assert result.tool_calls[0].name == "mcp__foo"
def test_legacy_single_underscore_native_mcp_not_stripped(self):
"""Legacy mcp_ (single underscore) native MCP tools are NOT stripped.
They don't start with mcp__ so the strip path is never entered.
"""
transport = self._get_transport()
block = _make_tool_use_block("mcp_github_create_issue")
response = _make_response(block)
registry = _FakeRegistry({"mcp_github_create_issue"})
with patch("tools.registry.registry", registry):
result = transport.normalize_response(response, strip_tool_prefix=True)
assert len(result.tool_calls) == 1
assert result.tool_calls[0].name == "mcp_github_create_issue"
# ---------------------------------------------------------------------------
# Request side: registry name -> mcp__ wire name (no single-underscore leaks)
# ---------------------------------------------------------------------------
class TestAnthropicOAuthOutgoingPrefix:
"""Verify the outgoing-side companion fix: build_anthropic_kwargs must not
double-prefix tool names that already start with ``mcp_`` or ``mcp__``
(native MCP server tools registered as ``mcp_<server>_<tool>``). GH-25255."""
"""build_anthropic_kwargs must emit ZERO single-underscore ``mcp_`` names on
the OAuth wire bare names and MCP server names both land on ``mcp__``."""
def _build(self, tools, is_oauth=True):
from agent.anthropic_adapter import build_anthropic_kwargs
@ -215,50 +191,65 @@ class TestAnthropicOAuthOutgoingPrefix:
is_oauth=is_oauth,
)
def test_oauth_adds_prefix_to_bare_tool_name(self):
"""OAuth + bare name → mcp__ prefix added (Claude Code convention)."""
def test_oauth_adds_double_prefix_to_bare_tool_name(self):
"""OAuth + bare name -> ``mcp__`` prefix added."""
kwargs = self._build([{
"type": "function",
"function": {"name": "read_file", "description": "x", "parameters": {}},
}])
names = [t["name"] for t in kwargs["tools"]]
assert names == ["mcp__read_file"]
assert [t["name"] for t in kwargs["tools"]] == ["mcp__read_file"]
def test_oauth_does_not_double_prefix_native_mcp_tool(self):
"""OAuth + already-prefixed native MCP name → left alone."""
def test_oauth_promotes_single_underscore_mcp_server_tool(self):
"""OAuth + ``mcp_<server>_<tool>`` -> promoted to double underscore.
This is the gap left by the bare constant swap: MCP server tools used
to be *skipped* and went on the wire single-underscore, still tripping
the classifier. They must become ``mcp__`` and NOT be double-prefixed.
"""
kwargs = self._build([{
"type": "function",
"function": {
"name": "mcp_composio_COMPOSIO_SEARCH_TOOLS",
"name": "mcp_linear_get_issue",
"description": "x",
"parameters": {},
},
}])
names = [t["name"] for t in kwargs["tools"]]
# Must NOT become "mcp__mcp_composio_..." — that breaks the round-trip
# because normalize_response only strips ONE mcp__ prefix.
assert names == ["mcp_composio_COMPOSIO_SEARCH_TOOLS"]
assert names == ["mcp__linear_get_issue"]
# never double-prefixed
assert not any(n.startswith("mcp__mcp_") for n in names)
def test_oauth_mixed_native_and_bare_tools(self):
"""Mixed: native MCP preserved, bare names prefixed with mcp__."""
def test_oauth_already_double_prefixed_left_alone(self):
"""OAuth + already-``mcp__`` name -> unchanged (no triple underscore)."""
kwargs = self._build([{
"type": "function",
"function": {"name": "mcp__already", "description": "x", "parameters": {}},
}])
assert [t["name"] for t in kwargs["tools"]] == ["mcp__already"]
def test_oauth_no_single_underscore_mcp_on_wire(self):
"""Mixed set: every wire name is bare-free of single-underscore mcp_."""
kwargs = self._build([
{"type": "function", "function": {"name": "read_file",
"description": "x", "parameters": {}}},
{"type": "function", "function": {"name": "mcp_composio_SEARCH",
"description": "y", "parameters": {}}},
"description": "x", "parameters": {}}},
{"type": "function", "function": {"name": "mcp_linear_get_issue",
"description": "y", "parameters": {}}},
{"type": "function", "function": {"name": "terminal",
"description": "z", "parameters": {}}},
"description": "z", "parameters": {}}},
])
names = sorted(t["name"] for t in kwargs["tools"])
assert names == ["mcp__read_file", "mcp__terminal", "mcp_composio_SEARCH"]
assert names == ["mcp__linear_get_issue", "mcp__read_file", "mcp__terminal"]
# The core invariant: NOTHING single-underscore reaches the wire.
for n in names:
assert not (n.startswith("mcp_") and not n.startswith("mcp__"))
def test_non_oauth_path_untouched(self):
"""Non-OAuth requests never get the prefix — schemas pass through as-is."""
kwargs = self._build([
{"type": "function", "function": {"name": "read_file",
"description": "x", "parameters": {}}},
{"type": "function", "function": {"name": "mcp_composio_SEARCH",
"description": "y", "parameters": {}}},
"description": "x", "parameters": {}}},
{"type": "function", "function": {"name": "mcp_linear_get_issue",
"description": "y", "parameters": {}}},
], is_oauth=False)
names = sorted(t["name"] for t in kwargs["tools"])
assert names == ["mcp_composio_SEARCH", "read_file"]
assert names == ["mcp_linear_get_issue", "read_file"]