mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(mcp): validate remote URLs up-front with a clear error (#27105)
Port from anomalyco/opencode#25019 ("fix: handle invalid mcp urls"). Previously: a typo in `config.yaml` (missing scheme, wrong scheme, empty string, non-string value) slipped past `_is_http()` and hit `httpx.URL(url)` or `streamablehttp_client(url, ...)` deep in the transport layer. That raised a generic exception which went through the reconnect-backoff loop, so a bad URL caused _MAX_INITIAL_CONNECT_RETRIES attempts with doubling backoff — about a minute of pointless retries plus an opaque error — before the server was marked failed. Now: we validate the URL once, at the top of `run()`, before entering the retry loop. A malformed URL raises `InvalidMcpUrlError` (a `ValueError` subclass) with a message that names the offending server and explains exactly what was wrong. `_ready` is set and `_error` is populated, so `start()` re-raises and the server shows up as failed in `hermes mcp list` without any backoff burn. Validation rules: - Must be a string (rejects None, dict, int) - Must be non-empty (rejects '' and whitespace-only) - Scheme must be http or https (rejects file://, ws://, stdio://) - Must have a non-empty host (rejects http:///, http://:8080) Tests (21 new cases in tests/tools/test_mcp_invalid_url.py): - TestValidUrlsAccepted: http, https, IPv6, ports, paths, query strings - TestInvalidUrlsRejected: every rejection path above + clear error text - TestErrorIsValueError: downstream code catching ValueError still works E2E verified: a misconfigured server with `url: not-a-valid-url` now fails in <0.001s with the clear error, instead of minutes of retries. Doesn't touch stdio servers (they use `command`, not `url`) — the validator only fires when `_is_http()` returns True.
This commit is contained in:
parent
93e109a1d5
commit
fb05f5d4b5
2 changed files with 207 additions and 0 deletions
125
tests/tools/test_mcp_invalid_url.py
Normal file
125
tests/tools/test_mcp_invalid_url.py
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
"""Tests for the MCP remote-URL validator.
|
||||||
|
|
||||||
|
Ported from anomalyco/opencode#25019 (``fix: handle invalid mcp urls``).
|
||||||
|
|
||||||
|
Previously, a typo in ``config.yaml`` (missing scheme, wrong scheme, empty
|
||||||
|
string, dict where a URL was expected) caused the MCP server startup code
|
||||||
|
to enter httpx's URL-parsing path and crash inside the transport layer.
|
||||||
|
The reconnect-backoff loop would then retry
|
||||||
|
``_MAX_INITIAL_CONNECT_RETRIES`` times with doubling backoff — a minute or
|
||||||
|
more of pointless retries plus a confusing opaque error message — before
|
||||||
|
eventually giving up.
|
||||||
|
|
||||||
|
The fix validates the URL once, up front, and fails fast with a specific
|
||||||
|
error message identifying the offending server.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tools.mcp_tool import (
|
||||||
|
InvalidMcpUrlError,
|
||||||
|
_validate_remote_mcp_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidUrlsAccepted:
|
||||||
|
"""Every valid http(s) URL must pass through untouched (stripped of whitespace)."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"url",
|
||||||
|
[
|
||||||
|
"http://localhost:3000/mcp",
|
||||||
|
"https://example.com/mcp",
|
||||||
|
"https://context7.liam.com/mcp",
|
||||||
|
"http://127.0.0.1:8080",
|
||||||
|
"https://api.example.com:443/v1/mcp?session=abc",
|
||||||
|
"http://[::1]:9000/mcp", # IPv6
|
||||||
|
"https://host.example.com", # no port, no path
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_accepts_valid_http_url(self, url):
|
||||||
|
assert _validate_remote_mcp_url("test", url) == url
|
||||||
|
|
||||||
|
def test_strips_surrounding_whitespace(self):
|
||||||
|
assert (
|
||||||
|
_validate_remote_mcp_url("test", " https://example.com/mcp ")
|
||||||
|
== "https://example.com/mcp"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestInvalidUrlsRejected:
|
||||||
|
"""Every broken shape must raise ``InvalidMcpUrlError`` with a clear message."""
|
||||||
|
|
||||||
|
def test_none_rejected(self):
|
||||||
|
with pytest.raises(InvalidMcpUrlError, match="context7.*expected a string"):
|
||||||
|
_validate_remote_mcp_url("context7", None)
|
||||||
|
|
||||||
|
def test_dict_rejected(self):
|
||||||
|
with pytest.raises(InvalidMcpUrlError, match="expected a string, got dict"):
|
||||||
|
_validate_remote_mcp_url("ctx", {"url": "nested"})
|
||||||
|
|
||||||
|
def test_int_rejected(self):
|
||||||
|
with pytest.raises(InvalidMcpUrlError, match="expected a string, got int"):
|
||||||
|
_validate_remote_mcp_url("ctx", 8080)
|
||||||
|
|
||||||
|
def test_empty_string_rejected(self):
|
||||||
|
with pytest.raises(InvalidMcpUrlError, match="empty url"):
|
||||||
|
_validate_remote_mcp_url("ctx", "")
|
||||||
|
|
||||||
|
def test_whitespace_only_rejected(self):
|
||||||
|
with pytest.raises(InvalidMcpUrlError, match="empty url"):
|
||||||
|
_validate_remote_mcp_url("ctx", " \t\n")
|
||||||
|
|
||||||
|
def test_missing_scheme_rejected(self):
|
||||||
|
# The most common typo — users copy a host from a web page.
|
||||||
|
with pytest.raises(
|
||||||
|
InvalidMcpUrlError, match="scheme must be http or https"
|
||||||
|
):
|
||||||
|
_validate_remote_mcp_url("ctx", "example.com/mcp")
|
||||||
|
|
||||||
|
def test_file_scheme_rejected(self):
|
||||||
|
with pytest.raises(
|
||||||
|
InvalidMcpUrlError, match="scheme must be http or https"
|
||||||
|
):
|
||||||
|
_validate_remote_mcp_url("ctx", "file:///etc/passwd")
|
||||||
|
|
||||||
|
def test_ws_scheme_rejected(self):
|
||||||
|
# WebSocket is not MCP's remote transport.
|
||||||
|
with pytest.raises(
|
||||||
|
InvalidMcpUrlError, match="scheme must be http or https"
|
||||||
|
):
|
||||||
|
_validate_remote_mcp_url("ctx", "ws://example.com/mcp")
|
||||||
|
|
||||||
|
def test_stdio_scheme_rejected(self):
|
||||||
|
# stdio servers use the ``command`` key, not ``url``.
|
||||||
|
with pytest.raises(
|
||||||
|
InvalidMcpUrlError, match="scheme must be http or https"
|
||||||
|
):
|
||||||
|
_validate_remote_mcp_url("ctx", "stdio:///node server.js")
|
||||||
|
|
||||||
|
def test_empty_host_rejected(self):
|
||||||
|
with pytest.raises(InvalidMcpUrlError, match="missing host"):
|
||||||
|
_validate_remote_mcp_url("ctx", "http:///")
|
||||||
|
|
||||||
|
def test_empty_host_with_path_rejected(self):
|
||||||
|
with pytest.raises(InvalidMcpUrlError, match="missing host"):
|
||||||
|
_validate_remote_mcp_url("ctx", "https:///path/only")
|
||||||
|
|
||||||
|
def test_error_mentions_server_name(self):
|
||||||
|
# So users can find the bad entry when there are multiple configured.
|
||||||
|
with pytest.raises(InvalidMcpUrlError, match="my-weird-server"):
|
||||||
|
_validate_remote_mcp_url("my-weird-server", "not a url at all")
|
||||||
|
|
||||||
|
|
||||||
|
class TestErrorIsValueError:
|
||||||
|
"""InvalidMcpUrlError must be a ValueError for broad downstream catch blocks."""
|
||||||
|
|
||||||
|
def test_is_value_error(self):
|
||||||
|
try:
|
||||||
|
_validate_remote_mcp_url("ctx", "garbage")
|
||||||
|
except ValueError:
|
||||||
|
pass # expected
|
||||||
|
else:
|
||||||
|
pytest.fail("expected ValueError")
|
||||||
|
|
@ -91,6 +91,7 @@ import threading
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -492,6 +493,72 @@ def _cache_mcp_image_block(block) -> str:
|
||||||
return f"MEDIA:{image_path}"
|
return f"MEDIA:{image_path}"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Remote MCP URL validation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidMcpUrlError(ValueError):
|
||||||
|
"""Raised when a remote MCP server's ``url`` cannot be parsed as http(s)://.
|
||||||
|
|
||||||
|
Validated once at startup so we fail fast with a clear message instead of
|
||||||
|
burning through the reconnect-backoff loop on every attempt. (Ported from
|
||||||
|
anomalyco/opencode#25019.)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_remote_mcp_url(server_name: str, url: Any) -> str:
|
||||||
|
"""Return the URL as a string if it's a valid http(s) remote MCP URL.
|
||||||
|
|
||||||
|
Raises :class:`InvalidMcpUrlError` otherwise with a message naming the
|
||||||
|
offending server, so users can spot the bad entry in their config.
|
||||||
|
|
||||||
|
Accepts:
|
||||||
|
- ``http://host`` / ``https://host`` with optional port, path, query
|
||||||
|
- IPv4, IPv6 (bracketed), DNS hostnames
|
||||||
|
|
||||||
|
Rejects:
|
||||||
|
- Non-string values (``None``, dicts, ints)
|
||||||
|
- Missing scheme (``example.com/mcp``)
|
||||||
|
- Non-http(s) schemes (``file://``, ``ws://``, ``stdio:`` — stdio servers
|
||||||
|
use the ``command`` key, not ``url``)
|
||||||
|
- Empty host (``http://``, ``https:///path``)
|
||||||
|
"""
|
||||||
|
if not isinstance(url, str):
|
||||||
|
raise InvalidMcpUrlError(
|
||||||
|
f"Invalid MCP URL for '{server_name}': expected a string, got "
|
||||||
|
f"{type(url).__name__}"
|
||||||
|
)
|
||||||
|
stripped = url.strip()
|
||||||
|
if not stripped:
|
||||||
|
raise InvalidMcpUrlError(
|
||||||
|
f"Invalid MCP URL for '{server_name}': empty url"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
parsed = urlparse(stripped)
|
||||||
|
except Exception as exc: # urlparse is very permissive — belt and braces
|
||||||
|
raise InvalidMcpUrlError(
|
||||||
|
f"Invalid MCP URL for '{server_name}': {stripped!r} ({exc})"
|
||||||
|
) from exc
|
||||||
|
if parsed.scheme.lower() not in ("http", "https"):
|
||||||
|
raise InvalidMcpUrlError(
|
||||||
|
f"Invalid MCP URL for '{server_name}': scheme must be http or "
|
||||||
|
f"https, got {parsed.scheme!r} ({stripped!r})"
|
||||||
|
)
|
||||||
|
if not parsed.netloc:
|
||||||
|
raise InvalidMcpUrlError(
|
||||||
|
f"Invalid MCP URL for '{server_name}': missing host ({stripped!r})"
|
||||||
|
)
|
||||||
|
# ``urlparse`` accepts ``http://:8080`` (empty host, explicit port).
|
||||||
|
# Reject that — we need a real host.
|
||||||
|
if not parsed.hostname:
|
||||||
|
raise InvalidMcpUrlError(
|
||||||
|
f"Invalid MCP URL for '{server_name}': missing hostname "
|
||||||
|
f"({stripped!r})"
|
||||||
|
)
|
||||||
|
return stripped
|
||||||
|
|
||||||
|
|
||||||
def _format_connect_error(exc: BaseException) -> str:
|
def _format_connect_error(exc: BaseException) -> str:
|
||||||
"""Render nested MCP connection errors into an actionable short message."""
|
"""Render nested MCP connection errors into an actionable short message."""
|
||||||
|
|
||||||
|
|
@ -1458,6 +1525,21 @@ class MCPServerTask:
|
||||||
"this warning.",
|
"this warning.",
|
||||||
self.name,
|
self.name,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Validate remote URL once, up front. Raising here (rather than
|
||||||
|
# letting it blow up inside the SDK's httpx layer on every retry)
|
||||||
|
# means a typo in config.yaml fails fast with a clear error — and
|
||||||
|
# critically, no reconnect-backoff burn. (Ported from
|
||||||
|
# anomalyco/opencode#25019.)
|
||||||
|
if self._is_http():
|
||||||
|
try:
|
||||||
|
_validate_remote_mcp_url(self.name, config.get("url"))
|
||||||
|
except InvalidMcpUrlError as exc:
|
||||||
|
logger.warning("%s", exc)
|
||||||
|
self._error = exc
|
||||||
|
self._ready.set()
|
||||||
|
return
|
||||||
|
|
||||||
retries = 0
|
retries = 0
|
||||||
initial_retries = 0
|
initial_retries = 0
|
||||||
backoff = 1.0
|
backoff = 1.0
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue