mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
Merge pull request #52147 from NousResearch/salvage/29184-mcp-osv-nonblocking
fix(mcp): run OSV malware preflight off the event loop with a bounded timeout (#29184)
This commit is contained in:
commit
d682f320b3
3 changed files with 110 additions and 2 deletions
|
|
@ -66,6 +66,7 @@ AUTHOR_MAP = {
|
|||
"96944678+sweetcornna@users.noreply.github.com": "sweetcornna", # cron ticker-liveness salvage co-author (#33849)
|
||||
"izumi0uu@gmail.com": "izumi0uu", # PR #49544 salvage (native rich reply echo; #49534)
|
||||
"dev@pixlmedia.no": "texhy", # PR #27435 salvage (few-but-huge preflight compression gate; #27405)
|
||||
"qdaszx@naver.com": "qdaszx", # PR #29190 salvage (non-blocking OSV malware preflight; #29184)
|
||||
"w31rdm4ch1n3z@protonmail.com": "w31rdm4ch1nZ",
|
||||
"xtpeeps@gmail.com": "x7peeps",
|
||||
"ahmad@madsgency.com": "ahmadashfq",
|
||||
|
|
|
|||
|
|
@ -126,3 +126,85 @@ def test_run_stdio_uses_resolved_command_and_prepended_path(tmp_path):
|
|||
await server.shutdown()
|
||||
|
||||
asyncio.run(_test())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #29184: OSV malware preflight must not block the asyncio event loop, and a
|
||||
# stalled check must time out fail-open rather than freezing MCP startup.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _stdio_mocks():
|
||||
mock_session = MagicMock()
|
||||
mock_session.initialize = AsyncMock()
|
||||
mock_session.list_tools = AsyncMock(return_value=SimpleNamespace(tools=[]))
|
||||
mock_stdio_cm = MagicMock()
|
||||
mock_stdio_cm.__aenter__ = AsyncMock(return_value=(object(), object()))
|
||||
mock_stdio_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_session_cm = MagicMock()
|
||||
mock_session_cm.__aenter__ = AsyncMock(return_value=mock_session)
|
||||
mock_session_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
return mock_stdio_cm, mock_session_cm
|
||||
|
||||
|
||||
def test_run_stdio_malware_check_does_not_block_event_loop():
|
||||
"""The blocking OSV check runs off the loop (asyncio.to_thread), so a
|
||||
concurrent coroutine keeps making progress while it runs."""
|
||||
import time
|
||||
mock_stdio_cm, mock_session_cm = _stdio_mocks()
|
||||
|
||||
def slow_check(_command, _args):
|
||||
time.sleep(0.3) # simulate a slow OSV HTTPS call
|
||||
return None
|
||||
|
||||
ticks = {"n": 0}
|
||||
|
||||
async def _ticker():
|
||||
# If the loop were blocked, these ticks would not advance during the
|
||||
# 0.3s check.
|
||||
for _ in range(20):
|
||||
await asyncio.sleep(0.01)
|
||||
ticks["n"] += 1
|
||||
|
||||
async def _test():
|
||||
with patch("tools.osv_check.check_package_for_malware", side_effect=slow_check), \
|
||||
patch("tools.mcp_tool.StdioServerParameters"), \
|
||||
patch("tools.mcp_tool.stdio_client", return_value=mock_stdio_cm), \
|
||||
patch("tools.mcp_tool.ClientSession", return_value=mock_session_cm):
|
||||
server = MCPServerTask("srv")
|
||||
ticker = asyncio.create_task(_ticker())
|
||||
await server.start({"command": "npx", "args": ["-y", "pkg"]})
|
||||
ticks_during = ticks["n"]
|
||||
await ticker
|
||||
await server.shutdown()
|
||||
# The loop kept ticking DURING the 0.3s blocking check -> not blocked.
|
||||
assert ticks_during >= 3, f"event loop appeared blocked (ticks={ticks_during})"
|
||||
|
||||
asyncio.run(_test())
|
||||
|
||||
|
||||
def test_run_stdio_malware_check_times_out_fail_open():
|
||||
"""A check that hangs past the timeout must NOT freeze startup: it times
|
||||
out, logs, and proceeds (fail-open) so the server still starts."""
|
||||
import time
|
||||
mock_stdio_cm, mock_session_cm = _stdio_mocks()
|
||||
|
||||
def hung_check(_command, _args):
|
||||
time.sleep(0.5) # outlasts the 0.2s timeout 2.5x; short enough not to stall teardown
|
||||
return "MALWARE" # would block startup if awaited to completion
|
||||
|
||||
async def _test():
|
||||
with patch("tools.osv_check.check_package_for_malware", side_effect=hung_check), \
|
||||
patch("tools.mcp_tool._OSV_MALWARE_CHECK_TIMEOUT_S", 0.2), \
|
||||
patch("tools.mcp_tool.StdioServerParameters"), \
|
||||
patch("tools.mcp_tool.stdio_client", return_value=mock_stdio_cm), \
|
||||
patch("tools.mcp_tool.ClientSession", return_value=mock_session_cm):
|
||||
server = MCPServerTask("srv")
|
||||
start = time.monotonic()
|
||||
await server.start({"command": "npx", "args": ["-y", "pkg"]})
|
||||
elapsed = time.monotonic() - start
|
||||
await server.shutdown()
|
||||
# Returned shortly after the 0.2s timeout (fail-open), not the 0.5s hang.
|
||||
assert elapsed < 1.0, f"startup did not fail-open promptly ({elapsed:.1f}s)"
|
||||
|
||||
asyncio.run(_test())
|
||||
|
|
|
|||
|
|
@ -101,6 +101,16 @@ from urllib.parse import urlparse
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Upper bound for the OSV malware preflight during stdio MCP startup. The
|
||||
# check makes a blocking urllib HTTPS call whose own timeout can fail to
|
||||
# interrupt a stalled SSL handshake, which froze the asyncio event loop and
|
||||
# blew past the gateway's 15s startup budget (#29184). We run it off the loop
|
||||
# AND bound it here; the check is fail-open, so a timeout lets startup proceed.
|
||||
# Set just ABOVE osv_check._TIMEOUT (10s) so the inner socket timeout fires
|
||||
# first in the normal case; this outer bound only bites when a stalled SSL
|
||||
# handshake defeats the inner timeout (the #29184 failure mode).
|
||||
_OSV_MALWARE_CHECK_TIMEOUT_S = 12.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stdio subprocess stderr redirection
|
||||
|
|
@ -1772,9 +1782,24 @@ class MCPServerTask:
|
|||
safe_env = _build_safe_env(user_env)
|
||||
command, safe_env = _resolve_stdio_command(command, safe_env)
|
||||
|
||||
# Check package against OSV malware database before spawning
|
||||
# Check package against OSV malware database before spawning.
|
||||
# Run off the event loop (the urllib HTTPS call is blocking) and bound
|
||||
# it with a wall-clock timeout so a stalled SSL handshake can't freeze
|
||||
# MCP discovery / gateway startup (#29184). The check is fail-open, so
|
||||
# on timeout we log and proceed rather than blocking indefinitely.
|
||||
from tools.osv_check import check_package_for_malware
|
||||
malware_error = check_package_for_malware(command, args)
|
||||
try:
|
||||
malware_error = await asyncio.wait_for(
|
||||
asyncio.to_thread(check_package_for_malware, command, args),
|
||||
timeout=_OSV_MALWARE_CHECK_TIMEOUT_S,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"MCP server '%s': OSV malware preflight timed out after %.0fs "
|
||||
"(network slow/unreachable) — proceeding without the check.",
|
||||
self.name, _OSV_MALWARE_CHECK_TIMEOUT_S,
|
||||
)
|
||||
malware_error = None
|
||||
if malware_error:
|
||||
raise ValueError(
|
||||
f"MCP server '{self.name}': {malware_error}"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue