From fb05f5d4b58d4fb20c3a4a98c2c150de3f729f3c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 16 May 2026 13:06:56 -0700 Subject: [PATCH] fix(mcp): validate remote URLs up-front with a clear error (#27105) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port from anomalyco/opencode#25019 ("fix: handle invalid mcp urls"). Previously: a typo in `config.yaml` (missing scheme, wrong scheme, empty string, non-string value) slipped past `_is_http()` and hit `httpx.URL(url)` or `streamablehttp_client(url, ...)` deep in the transport layer. That raised a generic exception which went through the reconnect-backoff loop, so a bad URL caused _MAX_INITIAL_CONNECT_RETRIES attempts with doubling backoff — about a minute of pointless retries plus an opaque error — before the server was marked failed. Now: we validate the URL once, at the top of `run()`, before entering the retry loop. A malformed URL raises `InvalidMcpUrlError` (a `ValueError` subclass) with a message that names the offending server and explains exactly what was wrong. `_ready` is set and `_error` is populated, so `start()` re-raises and the server shows up as failed in `hermes mcp list` without any backoff burn. Validation rules: - Must be a string (rejects None, dict, int) - Must be non-empty (rejects '' and whitespace-only) - Scheme must be http or https (rejects file://, ws://, stdio://) - Must have a non-empty host (rejects http:///, http://:8080) Tests (21 new cases in tests/tools/test_mcp_invalid_url.py): - TestValidUrlsAccepted: http, https, IPv6, ports, paths, query strings - TestInvalidUrlsRejected: every rejection path above + clear error text - TestErrorIsValueError: downstream code catching ValueError still works E2E verified: a misconfigured server with `url: not-a-valid-url` now fails in <0.001s with the clear error, instead of minutes of retries. Doesn't touch stdio servers (they use `command`, not `url`) — the validator only fires when `_is_http()` returns True. --- tests/tools/test_mcp_invalid_url.py | 125 ++++++++++++++++++++++++++++ tools/mcp_tool.py | 82 ++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 tests/tools/test_mcp_invalid_url.py diff --git a/tests/tools/test_mcp_invalid_url.py b/tests/tools/test_mcp_invalid_url.py new file mode 100644 index 00000000000..539696292ad --- /dev/null +++ b/tests/tools/test_mcp_invalid_url.py @@ -0,0 +1,125 @@ +"""Tests for the MCP remote-URL validator. + +Ported from anomalyco/opencode#25019 (``fix: handle invalid mcp urls``). + +Previously, a typo in ``config.yaml`` (missing scheme, wrong scheme, empty +string, dict where a URL was expected) caused the MCP server startup code +to enter httpx's URL-parsing path and crash inside the transport layer. +The reconnect-backoff loop would then retry +``_MAX_INITIAL_CONNECT_RETRIES`` times with doubling backoff — a minute or +more of pointless retries plus a confusing opaque error message — before +eventually giving up. + +The fix validates the URL once, up front, and fails fast with a specific +error message identifying the offending server. +""" + +from __future__ import annotations + +import pytest + +from tools.mcp_tool import ( + InvalidMcpUrlError, + _validate_remote_mcp_url, +) + + +class TestValidUrlsAccepted: + """Every valid http(s) URL must pass through untouched (stripped of whitespace).""" + + @pytest.mark.parametrize( + "url", + [ + "http://localhost:3000/mcp", + "https://example.com/mcp", + "https://context7.liam.com/mcp", + "http://127.0.0.1:8080", + "https://api.example.com:443/v1/mcp?session=abc", + "http://[::1]:9000/mcp", # IPv6 + "https://host.example.com", # no port, no path + ], + ) + def test_accepts_valid_http_url(self, url): + assert _validate_remote_mcp_url("test", url) == url + + def test_strips_surrounding_whitespace(self): + assert ( + _validate_remote_mcp_url("test", " https://example.com/mcp ") + == "https://example.com/mcp" + ) + + +class TestInvalidUrlsRejected: + """Every broken shape must raise ``InvalidMcpUrlError`` with a clear message.""" + + def test_none_rejected(self): + with pytest.raises(InvalidMcpUrlError, match="context7.*expected a string"): + _validate_remote_mcp_url("context7", None) + + def test_dict_rejected(self): + with pytest.raises(InvalidMcpUrlError, match="expected a string, got dict"): + _validate_remote_mcp_url("ctx", {"url": "nested"}) + + def test_int_rejected(self): + with pytest.raises(InvalidMcpUrlError, match="expected a string, got int"): + _validate_remote_mcp_url("ctx", 8080) + + def test_empty_string_rejected(self): + with pytest.raises(InvalidMcpUrlError, match="empty url"): + _validate_remote_mcp_url("ctx", "") + + def test_whitespace_only_rejected(self): + with pytest.raises(InvalidMcpUrlError, match="empty url"): + _validate_remote_mcp_url("ctx", " \t\n") + + def test_missing_scheme_rejected(self): + # The most common typo — users copy a host from a web page. + with pytest.raises( + InvalidMcpUrlError, match="scheme must be http or https" + ): + _validate_remote_mcp_url("ctx", "example.com/mcp") + + def test_file_scheme_rejected(self): + with pytest.raises( + InvalidMcpUrlError, match="scheme must be http or https" + ): + _validate_remote_mcp_url("ctx", "file:///etc/passwd") + + def test_ws_scheme_rejected(self): + # WebSocket is not MCP's remote transport. + with pytest.raises( + InvalidMcpUrlError, match="scheme must be http or https" + ): + _validate_remote_mcp_url("ctx", "ws://example.com/mcp") + + def test_stdio_scheme_rejected(self): + # stdio servers use the ``command`` key, not ``url``. + with pytest.raises( + InvalidMcpUrlError, match="scheme must be http or https" + ): + _validate_remote_mcp_url("ctx", "stdio:///node server.js") + + def test_empty_host_rejected(self): + with pytest.raises(InvalidMcpUrlError, match="missing host"): + _validate_remote_mcp_url("ctx", "http:///") + + def test_empty_host_with_path_rejected(self): + with pytest.raises(InvalidMcpUrlError, match="missing host"): + _validate_remote_mcp_url("ctx", "https:///path/only") + + def test_error_mentions_server_name(self): + # So users can find the bad entry when there are multiple configured. + with pytest.raises(InvalidMcpUrlError, match="my-weird-server"): + _validate_remote_mcp_url("my-weird-server", "not a url at all") + + +class TestErrorIsValueError: + """InvalidMcpUrlError must be a ValueError for broad downstream catch blocks.""" + + def test_is_value_error(self): + try: + _validate_remote_mcp_url("ctx", "garbage") + except ValueError: + pass # expected + else: + pytest.fail("expected ValueError") diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index b24bb9705ad..a46496ef59c 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -91,6 +91,7 @@ import threading import time from datetime import datetime from typing import Any, Dict, List, Optional +from urllib.parse import urlparse logger = logging.getLogger(__name__) @@ -492,6 +493,72 @@ def _cache_mcp_image_block(block) -> str: return f"MEDIA:{image_path}" +# --------------------------------------------------------------------------- +# Remote MCP URL validation +# --------------------------------------------------------------------------- + + +class InvalidMcpUrlError(ValueError): + """Raised when a remote MCP server's ``url`` cannot be parsed as http(s)://. + + Validated once at startup so we fail fast with a clear message instead of + burning through the reconnect-backoff loop on every attempt. (Ported from + anomalyco/opencode#25019.) + """ + + +def _validate_remote_mcp_url(server_name: str, url: Any) -> str: + """Return the URL as a string if it's a valid http(s) remote MCP URL. + + Raises :class:`InvalidMcpUrlError` otherwise with a message naming the + offending server, so users can spot the bad entry in their config. + + Accepts: + - ``http://host`` / ``https://host`` with optional port, path, query + - IPv4, IPv6 (bracketed), DNS hostnames + + Rejects: + - Non-string values (``None``, dicts, ints) + - Missing scheme (``example.com/mcp``) + - Non-http(s) schemes (``file://``, ``ws://``, ``stdio:`` — stdio servers + use the ``command`` key, not ``url``) + - Empty host (``http://``, ``https:///path``) + """ + if not isinstance(url, str): + raise InvalidMcpUrlError( + f"Invalid MCP URL for '{server_name}': expected a string, got " + f"{type(url).__name__}" + ) + stripped = url.strip() + if not stripped: + raise InvalidMcpUrlError( + f"Invalid MCP URL for '{server_name}': empty url" + ) + try: + parsed = urlparse(stripped) + except Exception as exc: # urlparse is very permissive — belt and braces + raise InvalidMcpUrlError( + f"Invalid MCP URL for '{server_name}': {stripped!r} ({exc})" + ) from exc + if parsed.scheme.lower() not in ("http", "https"): + raise InvalidMcpUrlError( + f"Invalid MCP URL for '{server_name}': scheme must be http or " + f"https, got {parsed.scheme!r} ({stripped!r})" + ) + if not parsed.netloc: + raise InvalidMcpUrlError( + f"Invalid MCP URL for '{server_name}': missing host ({stripped!r})" + ) + # ``urlparse`` accepts ``http://:8080`` (empty host, explicit port). + # Reject that — we need a real host. + if not parsed.hostname: + raise InvalidMcpUrlError( + f"Invalid MCP URL for '{server_name}': missing hostname " + f"({stripped!r})" + ) + return stripped + + def _format_connect_error(exc: BaseException) -> str: """Render nested MCP connection errors into an actionable short message.""" @@ -1458,6 +1525,21 @@ class MCPServerTask: "this warning.", self.name, ) + + # Validate remote URL once, up front. Raising here (rather than + # letting it blow up inside the SDK's httpx layer on every retry) + # means a typo in config.yaml fails fast with a clear error — and + # critically, no reconnect-backoff burn. (Ported from + # anomalyco/opencode#25019.) + if self._is_http(): + try: + _validate_remote_mcp_url(self.name, config.get("url")) + except InvalidMcpUrlError as exc: + logger.warning("%s", exc) + self._error = exc + self._ready.set() + return + retries = 0 initial_retries = 0 backoff = 1.0