mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 09:21:36 +00:00
Some checks failed
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Lint (ruff + ty) / ruff + ty diff (push) Waiting to run
Lint (ruff + ty) / ruff enforcement (blocking) (push) Waiting to run
Lint (ruff + ty) / Windows footguns (blocking) (push) Waiting to run
Nix / nix (macos-latest) (push) Waiting to run
Nix / nix (ubuntu-latest) (push) Waiting to run
OSV-Scanner / Scan lockfiles (push) Waiting to run
Tests / test (1) (push) Waiting to run
Tests / test (2) (push) Waiting to run
Tests / test (3) (push) Waiting to run
Tests / test (4) (push) Waiting to run
Tests / test (5) (push) Waiting to run
Tests / test (6) (push) Waiting to run
Tests / save-durations (push) Blocked by required conditions
Tests / e2e (push) Waiting to run
Typecheck / typecheck (apps/bootstrap-installer) (push) Waiting to run
Typecheck / typecheck (apps/desktop) (push) Waiting to run
Typecheck / typecheck (apps/shared) (push) Waiting to run
Typecheck / typecheck (ui-tui) (push) Waiting to run
Typecheck / typecheck (web) (push) Waiting to run
uv.lock check / uv lock --check (push) Waiting to run
Nix Lockfile Fix / auto-fix-main (push) Has been cancelled
Nix Lockfile Fix / fix (push) Has been cancelled
Build Skills Index / build-index (push) Has been cancelled
Build Skills Index / trigger-deploy (push) Has been cancelled
570 lines
22 KiB
Python
570 lines
22 KiB
Python
"""Tests for MCP stability fixes — event loop handler, PID tracking, shutdown robustness."""
|
|
|
|
import asyncio
|
|
import os
|
|
import signal
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fix 1: MCP event loop exception handler
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMCPLoopExceptionHandler:
|
|
"""_mcp_loop_exception_handler suppresses benign 'Event loop is closed'."""
|
|
|
|
def test_suppresses_event_loop_closed(self):
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
loop = MagicMock()
|
|
context = {"exception": RuntimeError("Event loop is closed")}
|
|
# Should NOT call default handler
|
|
_mcp_loop_exception_handler(loop, context)
|
|
loop.default_exception_handler.assert_not_called()
|
|
|
|
def test_forwards_other_runtime_errors(self):
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
loop = MagicMock()
|
|
context = {"exception": RuntimeError("some other error")}
|
|
_mcp_loop_exception_handler(loop, context)
|
|
loop.default_exception_handler.assert_called_once_with(context)
|
|
|
|
def test_forwards_non_runtime_errors(self):
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
loop = MagicMock()
|
|
context = {"exception": ValueError("bad value")}
|
|
_mcp_loop_exception_handler(loop, context)
|
|
loop.default_exception_handler.assert_called_once_with(context)
|
|
|
|
def test_forwards_contexts_without_exception(self):
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
loop = MagicMock()
|
|
context = {"message": "just a message"}
|
|
_mcp_loop_exception_handler(loop, context)
|
|
loop.default_exception_handler.assert_called_once_with(context)
|
|
|
|
def test_handler_installed_on_mcp_loop(self):
|
|
"""_ensure_mcp_loop installs the exception handler on the new loop."""
|
|
import tools.mcp_tool as mcp_mod
|
|
try:
|
|
mcp_mod._ensure_mcp_loop()
|
|
with mcp_mod._lock:
|
|
loop = mcp_mod._mcp_loop
|
|
assert loop is not None
|
|
assert loop.get_exception_handler() is mcp_mod._mcp_loop_exception_handler
|
|
finally:
|
|
mcp_mod._stop_mcp_loop()
|
|
|
|
def test_probe_cleanup_does_not_stop_loop_with_registered_servers(self):
|
|
"""Probe cleanup must not kill the shared loop used by live MCP tools."""
|
|
import tools.mcp_tool as mcp_mod
|
|
|
|
with mcp_mod._lock:
|
|
mcp_mod._servers.clear()
|
|
mcp_mod._server_connecting.clear()
|
|
try:
|
|
mcp_mod._ensure_mcp_loop()
|
|
with mcp_mod._lock:
|
|
loop = mcp_mod._mcp_loop
|
|
mcp_mod._servers["live"] = MagicMock(session=object())
|
|
|
|
assert mcp_mod._stop_mcp_loop_if_idle() is False
|
|
|
|
with mcp_mod._lock:
|
|
assert mcp_mod._mcp_loop is loop
|
|
assert mcp_mod._mcp_thread is not None
|
|
assert loop is not None
|
|
assert loop.is_running()
|
|
finally:
|
|
with mcp_mod._lock:
|
|
mcp_mod._servers.clear()
|
|
mcp_mod._server_connecting.clear()
|
|
mcp_mod._stop_mcp_loop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fix 2: stdio PID tracking
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestStdioPidTracking:
|
|
"""_snapshot_child_pids and _stdio_pids track subprocess PIDs."""
|
|
|
|
def test_snapshot_returns_set(self):
|
|
from tools.mcp_tool import _snapshot_child_pids
|
|
result = _snapshot_child_pids()
|
|
assert isinstance(result, set)
|
|
# All elements should be ints
|
|
for pid in result:
|
|
assert isinstance(pid, int)
|
|
|
|
def test_stdio_pids_starts_empty(self):
|
|
from tools.mcp_tool import _stdio_pids, _lock
|
|
with _lock:
|
|
# Might have residual state from other tests, just check type
|
|
assert isinstance(_stdio_pids, dict)
|
|
|
|
def test_kill_orphaned_noop_when_empty(self):
|
|
"""_kill_orphaned_mcp_children does nothing when no PIDs tracked."""
|
|
from tools.mcp_tool import (
|
|
_kill_orphaned_mcp_children,
|
|
_orphan_stdio_pids,
|
|
_stdio_pids,
|
|
_lock,
|
|
)
|
|
|
|
with _lock:
|
|
_stdio_pids.clear()
|
|
_orphan_stdio_pids.clear()
|
|
|
|
# Should not raise
|
|
_kill_orphaned_mcp_children()
|
|
|
|
def test_kill_orphaned_handles_dead_pids(self):
|
|
"""_kill_orphaned_mcp_children gracefully handles already-dead PIDs."""
|
|
from tools.mcp_tool import (
|
|
_kill_orphaned_mcp_children,
|
|
_orphan_stdio_pids,
|
|
_lock,
|
|
)
|
|
|
|
# Use a PID that definitely doesn't exist
|
|
fake_pid = 999999999
|
|
with _lock:
|
|
_orphan_stdio_pids.add(fake_pid)
|
|
|
|
# Should not raise (ProcessLookupError is caught)
|
|
_kill_orphaned_mcp_children()
|
|
|
|
with _lock:
|
|
assert fake_pid not in _orphan_stdio_pids
|
|
|
|
def test_kill_orphaned_uses_sigkill_when_available(self, monkeypatch):
|
|
"""SIGTERM-first then SIGKILL after 2s for orphan cleanup."""
|
|
from tools.mcp_tool import (
|
|
_kill_orphaned_mcp_children,
|
|
_orphan_stdio_pids,
|
|
_lock,
|
|
)
|
|
|
|
fake_pid = 424242
|
|
with _lock:
|
|
_orphan_stdio_pids.clear()
|
|
_orphan_stdio_pids.add(fake_pid)
|
|
|
|
fake_sigkill = 9
|
|
monkeypatch.setattr(signal, "SIGKILL", fake_sigkill, raising=False)
|
|
|
|
# Post-#21561 the alive check routes through
|
|
# ``gateway.status._pid_exists`` (so it's safe on Windows — see
|
|
# bpo-14484). Return True so the SIGKILL escalation fires.
|
|
with patch("tools.mcp_tool.os.kill") as mock_kill, \
|
|
patch("gateway.status._pid_exists", return_value=True), \
|
|
patch("tools.mcp_tool.time.sleep") as mock_sleep:
|
|
_kill_orphaned_mcp_children()
|
|
|
|
# SIGTERM then SIGKILL; the alive check no longer touches os.kill.
|
|
mock_kill.assert_any_call(fake_pid, signal.SIGTERM)
|
|
mock_kill.assert_any_call(fake_pid, fake_sigkill)
|
|
assert mock_kill.call_count == 2
|
|
mock_sleep.assert_called_once_with(2)
|
|
|
|
with _lock:
|
|
assert fake_pid not in _orphan_stdio_pids
|
|
|
|
def test_kill_orphaned_falls_back_without_sigkill(self, monkeypatch):
|
|
"""Without SIGKILL, SIGTERM is used for both phases."""
|
|
from tools.mcp_tool import (
|
|
_kill_orphaned_mcp_children,
|
|
_orphan_stdio_pids,
|
|
_lock,
|
|
)
|
|
|
|
fake_pid = 434343
|
|
with _lock:
|
|
_orphan_stdio_pids.clear()
|
|
_orphan_stdio_pids.add(fake_pid)
|
|
|
|
monkeypatch.delattr(signal, "SIGKILL", raising=False)
|
|
|
|
with patch("tools.mcp_tool.os.kill") as mock_kill, \
|
|
patch("tools.mcp_tool.time.sleep") as mock_sleep:
|
|
_kill_orphaned_mcp_children()
|
|
|
|
# SIGTERM phase, alive check raises (process gone), no escalation
|
|
mock_kill.assert_any_call(fake_pid, signal.SIGTERM)
|
|
assert mock_sleep.called
|
|
|
|
with _lock:
|
|
assert fake_pid not in _orphan_stdio_pids
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fix 2b: stdio descendant reaping via process group (issue #23799)
|
|
# ---------------------------------------------------------------------------
|
|
#
|
|
# When a stdio MCP wrapper (e.g. ``openclaw mcp serve``) itself spawns a
|
|
# helper subprocess (``claude mcp serve``) and then exits, the helper
|
|
# reparents to systemd-user and is invisible to the per-pid orphan reaper.
|
|
# The fix captures the wrapper's pgid at spawn time and reaps via killpg,
|
|
# which reaches same-group descendants whether or not the direct pid is alive.
|
|
|
|
class TestStdioPgroupReaping:
|
|
"""_kill_orphaned_mcp_children reaps via killpg when a pgid is tracked."""
|
|
|
|
def _reset_state(self):
|
|
from tools.mcp_tool import _stdio_pids, _orphan_stdio_pids, _stdio_pgids, _lock
|
|
with _lock:
|
|
_stdio_pids.clear()
|
|
_orphan_stdio_pids.clear()
|
|
_stdio_pgids.clear()
|
|
|
|
def test_killpg_used_when_pgid_tracked(self, monkeypatch):
|
|
"""SIGTERM and SIGKILL route through killpg when pgid is known."""
|
|
from tools.mcp_tool import (
|
|
_kill_orphaned_mcp_children,
|
|
_orphan_stdio_pids,
|
|
_stdio_pgids,
|
|
_lock,
|
|
)
|
|
|
|
self._reset_state()
|
|
fake_pid = 525252
|
|
fake_pgid = 525252 # session leader: pgid == pid
|
|
with _lock:
|
|
_orphan_stdio_pids.add(fake_pid)
|
|
_stdio_pgids[fake_pid] = fake_pgid
|
|
|
|
fake_sigkill = 9
|
|
monkeypatch.setattr(signal, "SIGKILL", fake_sigkill, raising=False)
|
|
|
|
# Ensure os.killpg exists on this platform for the test to make sense;
|
|
# the production fallback path is covered by the per-pid tests above.
|
|
if not hasattr(os, "killpg"):
|
|
pytest.skip("os.killpg not available on this platform")
|
|
|
|
with patch("tools.mcp_tool.os.killpg") as mock_killpg, \
|
|
patch("tools.mcp_tool.os.kill") as mock_kill, \
|
|
patch("gateway.status._pid_exists", return_value=True), \
|
|
patch("time.sleep"):
|
|
_kill_orphaned_mcp_children()
|
|
|
|
# Both phases should have used killpg (pgroup reach), not per-pid kill.
|
|
mock_killpg.assert_any_call(fake_pgid, signal.SIGTERM)
|
|
mock_killpg.assert_any_call(fake_pgid, fake_sigkill)
|
|
assert mock_killpg.call_count == 2
|
|
mock_kill.assert_not_called()
|
|
|
|
with _lock:
|
|
assert fake_pid not in _orphan_stdio_pids
|
|
assert fake_pid not in _stdio_pgids
|
|
|
|
def test_killpg_failure_falls_back_to_kill(self, monkeypatch):
|
|
"""If killpg raises ProcessLookupError (pgroup gone), try os.kill."""
|
|
from tools.mcp_tool import (
|
|
_kill_orphaned_mcp_children,
|
|
_orphan_stdio_pids,
|
|
_stdio_pgids,
|
|
_lock,
|
|
)
|
|
|
|
self._reset_state()
|
|
fake_pid = 636363
|
|
fake_pgid = 636363
|
|
with _lock:
|
|
_orphan_stdio_pids.add(fake_pid)
|
|
_stdio_pgids[fake_pid] = fake_pgid
|
|
|
|
if not hasattr(os, "killpg"):
|
|
pytest.skip("os.killpg not available on this platform")
|
|
|
|
with patch(
|
|
"tools.mcp_tool.os.killpg",
|
|
side_effect=ProcessLookupError("no such process group"),
|
|
) as mock_killpg, \
|
|
patch("tools.mcp_tool.os.kill") as mock_kill, \
|
|
patch("gateway.status._pid_exists", return_value=False), \
|
|
patch("time.sleep"):
|
|
_kill_orphaned_mcp_children()
|
|
|
|
# killpg was attempted (phase 1 SIGTERM) and fell back to os.kill.
|
|
# Phase 3 skips because _pid_exists returns False (direct pid gone).
|
|
mock_killpg.assert_called()
|
|
mock_kill.assert_any_call(fake_pid, signal.SIGTERM)
|
|
|
|
with _lock:
|
|
assert fake_pid not in _orphan_stdio_pids
|
|
assert fake_pid not in _stdio_pgids
|
|
|
|
def test_no_pgid_uses_per_pid_kill(self, monkeypatch):
|
|
"""When no pgid is recorded (e.g. Windows), fall back to os.kill."""
|
|
from tools.mcp_tool import (
|
|
_kill_orphaned_mcp_children,
|
|
_orphan_stdio_pids,
|
|
_stdio_pgids,
|
|
_lock,
|
|
)
|
|
|
|
self._reset_state()
|
|
fake_pid = 747474
|
|
with _lock:
|
|
_orphan_stdio_pids.add(fake_pid)
|
|
# No entry in _stdio_pgids.
|
|
|
|
with patch("tools.mcp_tool.os.kill") as mock_kill, \
|
|
patch("gateway.status._pid_exists", return_value=False), \
|
|
patch("time.sleep"):
|
|
# killpg may or may not exist; either way the no-pgid path skips it.
|
|
_kill_orphaned_mcp_children()
|
|
|
|
mock_kill.assert_any_call(fake_pid, signal.SIGTERM)
|
|
|
|
with _lock:
|
|
assert fake_pid not in _orphan_stdio_pids
|
|
|
|
@pytest.mark.live_system_guard_bypass
|
|
@pytest.mark.skipif(
|
|
not hasattr(os, "killpg") or not hasattr(os, "setsid"),
|
|
reason="POSIX-only: requires os.killpg and os.setsid",
|
|
)
|
|
def test_grandchild_reaped_via_pgroup(self, tmp_path):
|
|
"""End-to-end: parent spawns grandchild, parent exits, killpg reaps grandchild.
|
|
|
|
Mirrors issue #23799: a stdio MCP wrapper (parent) launches a long-lived
|
|
helper subprocess (grandchild) in the same process group, then the
|
|
wrapper exits while the grandchild keeps running. killpg on the pgid
|
|
captured at spawn time must still deliver the signal to the grandchild.
|
|
|
|
Marked ``live_system_guard_bypass`` because this test genuinely needs
|
|
real signal delivery to its own subprocess tree (the conftest guard
|
|
only knows the test's *initial* children; the spawned tree here is
|
|
outside that allowlist).
|
|
"""
|
|
import subprocess
|
|
import sys
|
|
import time as _time
|
|
|
|
psutil = pytest.importorskip("psutil")
|
|
|
|
# Grandchild: sleep forever, write its pid then wait. The pid file
|
|
# is written to a temp path and os.replace()d into place so the
|
|
# polling reader below can never observe a created-but-empty file
|
|
# (CI flake: int('') ValueError when the reader won the race between
|
|
# open('w') creating the file and write() filling it).
|
|
grandchild_pid_file = tmp_path / "grandchild.pid"
|
|
grandchild_script = tmp_path / "grandchild.py"
|
|
grandchild_script.write_text(
|
|
"import os, sys, time\n"
|
|
f"tmp = {str(grandchild_pid_file)!r} + '.tmp'\n"
|
|
"with open(tmp, 'w') as f:\n"
|
|
" f.write(str(os.getpid()))\n"
|
|
f"os.replace(tmp, {str(grandchild_pid_file)!r})\n"
|
|
"while True:\n"
|
|
" time.sleep(0.5)\n"
|
|
)
|
|
|
|
# Parent: spawn grandchild, exit immediately (without killing it).
|
|
parent_script = tmp_path / "parent.py"
|
|
parent_script.write_text(
|
|
"import subprocess, sys\n"
|
|
f"subprocess.Popen([sys.executable, {str(grandchild_script)!r}])\n"
|
|
# Parent exits — grandchild reparents to init.
|
|
)
|
|
|
|
# Spawn parent in its own session (mirrors stdio_client behaviour).
|
|
parent = subprocess.Popen(
|
|
[sys.executable, str(parent_script)],
|
|
start_new_session=True,
|
|
)
|
|
parent_pgid = os.getpgid(parent.pid)
|
|
# Wait for parent to exit and grandchild to spin up.
|
|
parent.wait(timeout=5)
|
|
deadline = _time.time() + 5
|
|
while _time.time() < deadline and not grandchild_pid_file.exists():
|
|
_time.sleep(0.05)
|
|
assert grandchild_pid_file.exists(), "grandchild did not start"
|
|
grandchild_pid = int(grandchild_pid_file.read_text().strip())
|
|
|
|
# Sanity: grandchild is alive and shares the parent's pgid.
|
|
assert psutil.pid_exists(grandchild_pid)
|
|
assert os.getpgid(grandchild_pid) == parent_pgid
|
|
|
|
# Drive the reaper: register the parent pid + pgid as an orphan.
|
|
from tools.mcp_tool import (
|
|
_kill_orphaned_mcp_children,
|
|
_orphan_stdio_pids,
|
|
_stdio_pgids,
|
|
_stdio_pids,
|
|
_lock,
|
|
)
|
|
with _lock:
|
|
_stdio_pids.clear()
|
|
_orphan_stdio_pids.clear()
|
|
_stdio_pgids.clear()
|
|
_orphan_stdio_pids.add(parent.pid)
|
|
_stdio_pgids[parent.pid] = parent_pgid
|
|
try:
|
|
_kill_orphaned_mcp_children()
|
|
finally:
|
|
# Belt-and-suspenders: ensure grandchild is dead even if test fails.
|
|
try:
|
|
os.kill(grandchild_pid, signal.SIGKILL)
|
|
except ProcessLookupError:
|
|
pass
|
|
|
|
# Grandchild should be gone — SIGTERM via killpg in phase 1 reached it.
|
|
deadline = _time.time() + 3
|
|
while _time.time() < deadline and psutil.pid_exists(grandchild_pid):
|
|
_time.sleep(0.05)
|
|
assert not psutil.pid_exists(grandchild_pid), (
|
|
"grandchild survived killpg-based reaping (issue #23799 regression)"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fix 3: MCP reload timeout (cli.py)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMCPReloadTimeout:
|
|
"""_check_config_mcp_changes uses a timeout on _reload_mcp."""
|
|
|
|
def test_reload_timeout_does_not_block_forever(self, tmp_path, monkeypatch):
|
|
"""If _reload_mcp hangs, the config watcher times out and returns."""
|
|
import time
|
|
|
|
# Create a mock HermesCLI-like object with the needed attributes
|
|
class FakeCLI:
|
|
_config_mtime = 0.0
|
|
_config_mcp_servers = {}
|
|
_last_config_check = 0.0
|
|
_command_running = False
|
|
config = {}
|
|
agent = None
|
|
|
|
def _reload_mcp(self):
|
|
# Simulate a hang — sleep longer than the timeout
|
|
time.sleep(60)
|
|
|
|
def _slow_command_status(self, cmd):
|
|
return cmd
|
|
|
|
# This test verifies the timeout mechanism exists in the code
|
|
# by checking that _check_config_mcp_changes doesn't call
|
|
# _reload_mcp directly (it uses a thread now)
|
|
import inspect
|
|
from cli import HermesCLI
|
|
source = inspect.getsource(HermesCLI._check_config_mcp_changes)
|
|
# The fix adds threading.Thread for _reload_mcp
|
|
assert "Thread" in source or "thread" in source.lower(), \
|
|
"_check_config_mcp_changes should use a thread for _reload_mcp"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fix 4: MCP initial connection retry with backoff
|
|
# (Ported from Kilo Code's MCP resilience fix)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMCPInitialConnectionRetry:
|
|
"""MCPServerTask.run() retries initial connection failures instead of giving up."""
|
|
|
|
def test_initial_connect_retries_constant_exists(self):
|
|
"""_MAX_INITIAL_CONNECT_RETRIES should be defined."""
|
|
from tools.mcp_tool import _MAX_INITIAL_CONNECT_RETRIES
|
|
assert _MAX_INITIAL_CONNECT_RETRIES >= 1
|
|
|
|
def test_initial_connect_retry_succeeds_on_second_attempt(self):
|
|
"""Server succeeds after one transient initial failure."""
|
|
from tools.mcp_tool import MCPServerTask
|
|
|
|
call_count = 0
|
|
|
|
async def _run():
|
|
nonlocal call_count
|
|
server = MCPServerTask("test-retry")
|
|
|
|
# Track calls via patching the method on the class
|
|
original_run_stdio = MCPServerTask._run_stdio
|
|
|
|
async def fake_run_stdio(self_inner, config):
|
|
nonlocal call_count
|
|
call_count += 1
|
|
if call_count == 1:
|
|
raise ConnectionError("DNS resolution failed")
|
|
# Second attempt: success — set ready and "run" until shutdown
|
|
self_inner._ready.set()
|
|
await self_inner._shutdown_event.wait()
|
|
|
|
with patch.object(MCPServerTask, '_run_stdio', fake_run_stdio):
|
|
task = asyncio.ensure_future(server.run({"command": "fake"}))
|
|
await server._ready.wait()
|
|
|
|
# It should have succeeded (no error) after retrying
|
|
assert server._error is None, f"Expected no error, got: {server._error}"
|
|
assert call_count == 2, f"Expected 2 attempts, got {call_count}"
|
|
|
|
# Clean shutdown
|
|
server._shutdown_event.set()
|
|
await task
|
|
|
|
asyncio.get_event_loop().run_until_complete(_run())
|
|
|
|
def test_initial_connect_gives_up_after_max_retries(self):
|
|
"""Server gives up after _MAX_INITIAL_CONNECT_RETRIES failures."""
|
|
from tools.mcp_tool import MCPServerTask, _MAX_INITIAL_CONNECT_RETRIES
|
|
|
|
call_count = 0
|
|
|
|
async def _run():
|
|
nonlocal call_count
|
|
server = MCPServerTask("test-exhaust")
|
|
|
|
async def fake_run_stdio(self_inner, config):
|
|
nonlocal call_count
|
|
call_count += 1
|
|
raise ConnectionError("DNS resolution failed")
|
|
|
|
with patch.object(MCPServerTask, '_run_stdio', fake_run_stdio):
|
|
task = asyncio.ensure_future(server.run({"command": "fake"}))
|
|
await server._ready.wait()
|
|
|
|
# Should have an error after exhausting retries
|
|
assert server._error is not None
|
|
assert "DNS resolution failed" in str(server._error)
|
|
# 1 initial + N retries = _MAX_INITIAL_CONNECT_RETRIES + 1 total attempts
|
|
assert call_count == _MAX_INITIAL_CONNECT_RETRIES + 1
|
|
|
|
await task
|
|
|
|
asyncio.get_event_loop().run_until_complete(_run())
|
|
|
|
def test_initial_connect_retry_respects_shutdown(self):
|
|
"""Shutdown during initial retry backoff aborts cleanly."""
|
|
from tools.mcp_tool import MCPServerTask
|
|
|
|
async def _run():
|
|
server = MCPServerTask("test-shutdown")
|
|
attempt = 0
|
|
|
|
async def fake_run_stdio(self_inner, config):
|
|
nonlocal attempt
|
|
attempt += 1
|
|
if attempt == 1:
|
|
raise ConnectionError("transient failure")
|
|
# Should not reach here because shutdown fires during sleep
|
|
raise AssertionError("Should not attempt after shutdown")
|
|
|
|
with patch.object(MCPServerTask, '_run_stdio', fake_run_stdio):
|
|
task = asyncio.ensure_future(server.run({"command": "fake"}))
|
|
|
|
# Give the first attempt time to fail, then set shutdown
|
|
# during the backoff sleep
|
|
await asyncio.sleep(0.1)
|
|
server._shutdown_event.set()
|
|
await server._ready.wait()
|
|
|
|
# Should have the error set and be done
|
|
assert server._error is not None
|
|
await task
|
|
|
|
asyncio.get_event_loop().run_until_complete(_run())
|