Merge pull request #47723 from NousResearch/salvage/oauth-mcp-prefix
Some checks are pending
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Lint (ruff + ty) / ruff + ty diff (push) Waiting to run
Lint (ruff + ty) / ruff enforcement (blocking) (push) Waiting to run
Lint (ruff + ty) / Windows footguns (blocking) (push) Waiting to run
Tests / test (1) (push) Waiting to run
Tests / test (2) (push) Waiting to run
Tests / test (3) (push) Waiting to run
Tests / test (4) (push) Waiting to run
Tests / test (5) (push) Waiting to run
Tests / test (6) (push) Waiting to run
Tests / save-durations (push) Blocked by required conditions
Tests / e2e (push) Waiting to run
Typecheck / typecheck (apps/bootstrap-installer) (push) Waiting to run
Typecheck / typecheck (apps/desktop) (push) Waiting to run
Typecheck / typecheck (apps/shared) (push) Waiting to run
Typecheck / typecheck (ui-tui) (push) Waiting to run
Typecheck / typecheck (web) (push) Waiting to run
Typecheck / desktop-build (push) Waiting to run

fix(anthropic): no single-underscore mcp_ tool names on the OAuth wire (plan-limit billing)
This commit is contained in:
kshitij 2026-06-17 13:26:02 +05:30 committed by GitHub
commit f9c8d95e43
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 143 additions and 107 deletions

View file

@ -372,7 +372,7 @@ def _detect_claude_code_version() -> str:
_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
_MCP_TOOL_PREFIX = "mcp_"
_MCP_TOOL_PREFIX = "mcp__"
def _get_claude_code_version() -> str:
@ -2349,25 +2349,46 @@ def build_anthropic_kwargs(
text = text.replace("Nous Research", "Anthropic")
block["text"] = text
# 3. Prefix tool names with mcp_ (Claude Code convention)
# Skip names that already begin with the marker — native MCP server
# tools (from mcp_servers: in config.yaml) are registered under their
# full mcp_<server>_<tool> name and would double-prefix otherwise,
# breaking round-trip registry lookup in normalize_response. GH-25255.
# 3. Normalize tool names so NOTHING goes on the OAuth wire with a
# single-underscore ``mcp_`` prefix. Anthropic's subscription/OAuth
# billing classifier treats a single-underscore ``mcp_`` tool name as
# a third-party-app fingerprint and rejects the request with HTTP 400
# "Third-party apps now draw from extra usage, not plan limits"
# (verified empirically: a single ``mcp_foo`` tool flips a request
# from plan-billing to the extra-usage lane; ``mcp__foo`` is accepted).
#
# Two cases, both must land on the double-underscore ``mcp__`` form:
# a) bare Hermes-native tools (``read_file``) -> ``mcp__read_file``
# b) native MCP server tools registered under their full
# single-underscore ``mcp_<server>_<tool>`` name
# (``mcp_linear_get_issue``) -> ``mcp__linear_get_issue``
# Case (b) is the gap that the bare ``mcp_``->``mcp__`` constant swap
# left open: those tools were *skipped* and stayed single-underscore,
# so any session with an MCP server configured still tripped the
# classifier. normalize_response reverses both forms via registry
# lookup so the dispatcher still sees the original name. GH-25255.
def _to_oauth_wire_name(name: str) -> str:
if name.startswith("mcp__"):
return name # already correct, don't double-prefix
if name.startswith("mcp_"):
# single-underscore native MCP tool -> promote to double
return "mcp__" + name[len("mcp_"):]
return _MCP_TOOL_PREFIX + name # bare name -> mcp__<name>
if anthropic_tools:
for tool in anthropic_tools:
if "name" in tool and not tool["name"].startswith(_MCP_TOOL_PREFIX):
tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
if "name" in tool:
tool["name"] = _to_oauth_wire_name(tool["name"])
# 4. Prefix tool names in message history (tool_use and tool_result blocks)
# 4. Apply the same normalization to tool names in message history
# (tool_use blocks) so replayed turns match the wire names above.
for msg in anthropic_messages:
content = msg.get("content")
if isinstance(content, list):
for block in content:
if isinstance(block, dict):
if block.get("type") == "tool_use" and "name" in block:
if not block["name"].startswith(_MCP_TOOL_PREFIX):
block["name"] = _MCP_TOOL_PREFIX + block["name"]
block["name"] = _to_oauth_wire_name(block["name"])
elif block.get("type") == "tool_result" and "tool_use_id" in block:
pass # tool_result uses ID, not name

View file

@ -88,7 +88,7 @@ class AnthropicTransport(ProviderTransport):
from agent.transports.types import ToolCall
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
_MCP_PREFIX = "mcp_"
_MCP_PREFIX = "mcp__"
text_parts = []
reasoning_parts = []
@ -132,17 +132,25 @@ class AnthropicTransport(ProviderTransport):
elif block.type == "tool_use":
name = block.name
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
stripped = name[len(_MCP_PREFIX):]
# Only strip the mcp_ prefix for OAuth-injected tools
# (where Hermes adds the prefix when sending to Anthropic
# and must remove it on the way back). Native MCP server
# tools (from mcp_servers: in config.yaml) are registered
# in the tool registry under their FULL mcp_<server>_<tool>
# name and must NOT be stripped. GH-25255.
# On the OAuth wire every tool carries a double-underscore
# ``mcp__`` prefix (added in build_anthropic_kwargs to avoid
# Anthropic's single-underscore third-party classifier).
# Reverse it back to the name the registry/dispatcher knows.
# Two original forms map onto the same ``mcp__`` wire name:
# ``mcp__read_file`` <- bare native tool ``read_file``
# ``mcp__linear_get_issue`` <- MCP server tool
# ``mcp_linear_get_issue``
# Resolve by registry lookup, preferring whichever original
# is actually registered; never rewrite a name the LLM used
# that already resolves natively. GH-25255.
from tools.registry import registry as _tool_registry
if (_tool_registry.get_entry(stripped)
and not _tool_registry.get_entry(name)):
name = stripped
if not _tool_registry.get_entry(name):
bare = name[len(_MCP_PREFIX):] # read_file
single = "mcp_" + bare # mcp_read_file / mcp_linear_get_issue
if _tool_registry.get_entry(single):
name = single
elif _tool_registry.get_entry(bare):
name = bare
tool_calls.append(
ToolCall(
id=block.id,

View file

@ -1,9 +1,16 @@
"""Tests for GH-25255: Anthropic OAuth mcp_ prefix stripping.
"""Tests for GH-25255: Anthropic OAuth ``mcp__`` tool-name round-trip.
When strip_tool_prefix=True (Anthropic OAuth path), the transport must only
strip the ``mcp_`` prefix from OAuth-injected tools, NOT from Hermes-native
MCP server tools that are registered under their full ``mcp_<server>_<tool>``
name in the tool registry.
Anthropic's subscription/OAuth billing classifier treats a **single-underscore**
``mcp_`` tool name as a third-party-app fingerprint and rejects the request with
HTTP 400 "Third-party apps now draw from extra usage, not plan limits". So on
the OAuth wire NOTHING may carry a single-underscore ``mcp_`` prefix:
* bare native tools ``read_file`` -> ``mcp__read_file``
* native MCP server tools ``mcp_linear_get_issue`` -> ``mcp__linear_get_issue``
``normalize_response`` reverses the ``mcp__`` wire name back to whatever the tool
registry knows (the single-underscore ``mcp_<server>_<tool>`` form for MCP server
tools, or the bare name for native tools) so the dispatcher is unaffected.
"""
from __future__ import annotations
@ -12,7 +19,6 @@ from types import SimpleNamespace
from unittest.mock import patch
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
@ -38,7 +44,7 @@ def _make_response(*blocks, stop_reason="end_turn"):
class _FakeRegistry:
"""Minimal fake tool registry for testing prefix stripping logic."""
"""Minimal fake tool registry for testing prefix round-trip logic."""
def __init__(self, registered_names: set[str]):
self._names = registered_names
@ -50,25 +56,20 @@ class _FakeRegistry:
# ---------------------------------------------------------------------------
# Tests
# Response side: mcp__ wire name -> registry name
# ---------------------------------------------------------------------------
class TestAnthropicMcpPrefixStrip:
"""Verify that strip_tool_prefix only strips OAuth-injected prefixes."""
"""Verify strip_tool_prefix reverses the ``mcp__`` wire prefix correctly."""
def _get_transport(self):
from agent.transports.anthropic import AnthropicTransport
return AnthropicTransport()
def test_strips_prefix_for_oauth_injected_tool(self):
"""OAuth tools: mcp_read_file -> read_file (stripped).
The tool was registered as 'read_file' in the registry.
Anthropic sees 'mcp_read_file' because Hermes adds the prefix.
On response, we must strip it back to 'read_file'.
"""
def test_strips_prefix_for_oauth_injected_native_tool(self):
"""``mcp__read_file`` -> ``read_file`` (bare native tool)."""
transport = self._get_transport()
block = _make_tool_use_block("mcp_read_file")
block = _make_tool_use_block("mcp__read_file")
response = _make_response(block)
registry = _FakeRegistry({"read_file", "terminal", "web_search"})
@ -78,31 +79,29 @@ class TestAnthropicMcpPrefixStrip:
assert len(result.tool_calls) == 1
assert result.tool_calls[0].name == "read_file"
def test_preserves_native_mcp_server_tool_name(self):
"""Native MCP tools: mcp_composio_SEARCH -> mcp_composio_SEARCH (kept).
def test_restores_single_underscore_mcp_server_tool(self):
"""``mcp__linear_get_issue`` -> ``mcp_linear_get_issue`` (MCP server tool).
The tool is registered with the full mcp_ prefix in the registry.
Stripping would break registry lookup.
MCP server tools are registered under their full single-underscore
``mcp_<server>_<tool>`` name, but they MUST go on the OAuth wire as
double-underscore to dodge the classifier. The response side restores
the single-underscore registry name so dispatch still resolves.
"""
transport = self._get_transport()
block = _make_tool_use_block("mcp_composio_COMPOSIO_SEARCH_TOOLS")
block = _make_tool_use_block("mcp__linear_get_issue")
response = _make_response(block)
registry = _FakeRegistry({
"mcp_composio_COMPOSIO_SEARCH_TOOLS",
"mcp_composio_COMPOSIO_GET_TOOL_SCHEMAS",
"read_file",
})
registry = _FakeRegistry({"mcp_linear_get_issue", "read_file"})
with patch("tools.registry.registry", registry):
result = transport.normalize_response(response, strip_tool_prefix=True)
assert len(result.tool_calls) == 1
assert result.tool_calls[0].name == "mcp_composio_COMPOSIO_SEARCH_TOOLS"
assert result.tool_calls[0].name == "mcp_linear_get_issue"
def test_no_strip_when_flag_false(self):
"""When strip_tool_prefix=False, names are never modified."""
transport = self._get_transport()
block = _make_tool_use_block("mcp_read_file")
block = _make_tool_use_block("mcp__read_file")
response = _make_response(block)
registry = _FakeRegistry({"read_file"})
@ -110,10 +109,10 @@ class TestAnthropicMcpPrefixStrip:
result = transport.normalize_response(response, strip_tool_prefix=False)
assert len(result.tool_calls) == 1
assert result.tool_calls[0].name == "mcp_read_file"
assert result.tool_calls[0].name == "mcp__read_file"
def test_no_strip_when_not_mcp_prefixed(self):
"""Non-mcp_ names are untouched regardless of strip flag."""
"""Non-``mcp__`` names are untouched regardless of strip flag."""
transport = self._get_transport()
block = _make_tool_use_block("web_search")
response = _make_response(block)
@ -125,68 +124,61 @@ class TestAnthropicMcpPrefixStrip:
assert len(result.tool_calls) == 1
assert result.tool_calls[0].name == "web_search"
def test_preserves_name_when_neither_in_registry(self):
"""When neither stripped nor full name is in registry, keep full name.
def test_preserves_name_when_no_original_in_registry(self):
"""Neither the single-underscore nor bare original is registered.
Safety fallback: if we can't determine the type, prefer the full name
since it's what the LLM was told about.
Safety fallback: keep the full ``mcp__`` name the LLM was told about.
"""
transport = self._get_transport()
block = _make_tool_use_block("mcp_unknown_tool")
block = _make_tool_use_block("mcp__unknown_tool")
response = _make_response(block)
registry = _FakeRegistry({"read_file"}) # neither name registered
registry = _FakeRegistry({"read_file"}) # no matching original
with patch("tools.registry.registry", registry):
result = transport.normalize_response(response, strip_tool_prefix=True)
assert len(result.tool_calls) == 1
assert result.tool_calls[0].name == "mcp_unknown_tool"
assert result.tool_calls[0].name == "mcp__unknown_tool"
def test_mixed_tools_same_response(self):
"""Both OAuth and native MCP tools in the same response."""
def test_mixed_native_and_mcp_server_tools_same_response(self):
"""A bare native tool and an MCP server tool, both wired as ``mcp__``."""
transport = self._get_transport()
block1 = _make_tool_use_block("mcp_read_file", block_id="tc_1")
block2 = _make_tool_use_block("mcp_composio_SEARCH", block_id="tc_2")
block3 = _make_tool_use_block("mcp_composio_SEARCH", block_id="tc_3") # also registered natively
response = _make_response(block1, block2, block3)
block1 = _make_tool_use_block("mcp__read_file", block_id="tc_1")
block2 = _make_tool_use_block("mcp__linear_get_issue", block_id="tc_2")
response = _make_response(block1, block2)
registry = _FakeRegistry({
"read_file", # OAuth-injected
"mcp_composio_SEARCH", # native MCP
})
registry = _FakeRegistry({"read_file", "mcp_linear_get_issue"})
with patch("tools.registry.registry", registry):
result = transport.normalize_response(response, strip_tool_prefix=True)
assert len(result.tool_calls) == 3
# OAuth tool: stripped
assert len(result.tool_calls) == 2
assert result.tool_calls[0].name == "read_file"
# Native MCP: preserved (both stripped and full are registered, full wins)
assert result.tool_calls[1].name == "mcp_composio_SEARCH"
assert result.tool_calls[2].name == "mcp_composio_SEARCH"
assert result.tool_calls[1].name == "mcp_linear_get_issue"
def test_both_stripped_and_full_registered_prefers_full(self):
"""Edge case: both 'foo' and 'mcp_foo' exist in registry.
def test_prefers_full_wire_name_when_it_resolves_directly(self):
"""If the ``mcp__`` wire name itself is registered, keep it as-is.
Keep 'mcp_foo' (the original name) since it's what the LLM requested.
Defensive: never rewrite a name that already resolves natively.
"""
transport = self._get_transport()
block = _make_tool_use_block("mcp_foo")
block = _make_tool_use_block("mcp__foo")
response = _make_response(block)
registry = _FakeRegistry({"foo", "mcp_foo"})
registry = _FakeRegistry({"foo", "mcp__foo"})
with patch("tools.registry.registry", registry):
result = transport.normalize_response(response, strip_tool_prefix=True)
assert len(result.tool_calls) == 1
# Both exist — the condition `get_entry(stripped) and not get_entry(name)`
# is False because get_entry(name) IS truthy, so we keep the full name.
assert result.tool_calls[0].name == "mcp_foo"
assert result.tool_calls[0].name == "mcp__foo"
# ---------------------------------------------------------------------------
# Request side: registry name -> mcp__ wire name (no single-underscore leaks)
# ---------------------------------------------------------------------------
class TestAnthropicOAuthOutgoingPrefix:
"""Verify the outgoing-side companion fix: build_anthropic_kwargs must not
double-prefix tool names that already start with ``mcp_`` (native MCP server
tools registered as ``mcp_<server>_<tool>``). GH-25255."""
"""build_anthropic_kwargs must emit ZERO single-underscore ``mcp_`` names on
the OAuth wire bare names and MCP server names both land on ``mcp__``."""
def _build(self, tools, is_oauth=True):
from agent.anthropic_adapter import build_anthropic_kwargs
@ -199,50 +191,65 @@ class TestAnthropicOAuthOutgoingPrefix:
is_oauth=is_oauth,
)
def test_oauth_adds_prefix_to_bare_tool_name(self):
"""OAuth + bare name → prefix added (existing Claude Code convention)."""
def test_oauth_adds_double_prefix_to_bare_tool_name(self):
"""OAuth + bare name -> ``mcp__`` prefix added."""
kwargs = self._build([{
"type": "function",
"function": {"name": "read_file", "description": "x", "parameters": {}},
}])
names = [t["name"] for t in kwargs["tools"]]
assert names == ["mcp_read_file"]
assert [t["name"] for t in kwargs["tools"]] == ["mcp__read_file"]
def test_oauth_does_not_double_prefix_native_mcp_tool(self):
"""OAuth + already-prefixed native MCP name → left alone."""
def test_oauth_promotes_single_underscore_mcp_server_tool(self):
"""OAuth + ``mcp_<server>_<tool>`` -> promoted to double underscore.
This is the gap left by the bare constant swap: MCP server tools used
to be *skipped* and went on the wire single-underscore, still tripping
the classifier. They must become ``mcp__`` and NOT be double-prefixed.
"""
kwargs = self._build([{
"type": "function",
"function": {
"name": "mcp_composio_COMPOSIO_SEARCH_TOOLS",
"name": "mcp_linear_get_issue",
"description": "x",
"parameters": {},
},
}])
names = [t["name"] for t in kwargs["tools"]]
# Must NOT become "mcp_mcp_composio_..." — that breaks the round-trip
# because normalize_response only strips ONE mcp_ prefix.
assert names == ["mcp_composio_COMPOSIO_SEARCH_TOOLS"]
assert names == ["mcp__linear_get_issue"]
# never double-prefixed
assert not any(n.startswith("mcp__mcp_") for n in names)
def test_oauth_mixed_native_and_bare_tools(self):
"""Mixed: native MCP preserved, bare names prefixed."""
def test_oauth_already_double_prefixed_left_alone(self):
"""OAuth + already-``mcp__`` name -> unchanged (no triple underscore)."""
kwargs = self._build([{
"type": "function",
"function": {"name": "mcp__already", "description": "x", "parameters": {}},
}])
assert [t["name"] for t in kwargs["tools"]] == ["mcp__already"]
def test_oauth_no_single_underscore_mcp_on_wire(self):
"""Mixed set: every wire name is bare-free of single-underscore mcp_."""
kwargs = self._build([
{"type": "function", "function": {"name": "read_file",
"description": "x", "parameters": {}}},
{"type": "function", "function": {"name": "mcp_composio_SEARCH",
"description": "y", "parameters": {}}},
"description": "x", "parameters": {}}},
{"type": "function", "function": {"name": "mcp_linear_get_issue",
"description": "y", "parameters": {}}},
{"type": "function", "function": {"name": "terminal",
"description": "z", "parameters": {}}},
"description": "z", "parameters": {}}},
])
names = sorted(t["name"] for t in kwargs["tools"])
assert names == ["mcp_composio_SEARCH", "mcp_read_file", "mcp_terminal"]
assert names == ["mcp__linear_get_issue", "mcp__read_file", "mcp__terminal"]
# The core invariant: NOTHING single-underscore reaches the wire.
for n in names:
assert not (n.startswith("mcp_") and not n.startswith("mcp__"))
def test_non_oauth_path_untouched(self):
"""Non-OAuth requests never get the prefix — schemas pass through as-is."""
kwargs = self._build([
{"type": "function", "function": {"name": "read_file",
"description": "x", "parameters": {}}},
{"type": "function", "function": {"name": "mcp_composio_SEARCH",
"description": "y", "parameters": {}}},
"description": "x", "parameters": {}}},
{"type": "function", "function": {"name": "mcp_linear_get_issue",
"description": "y", "parameters": {}}},
], is_oauth=False)
names = sorted(t["name"] for t in kwargs["tools"])
assert names == ["mcp_composio_SEARCH", "read_file"]
assert names == ["mcp_linear_get_issue", "read_file"]