fix(mcp): avoid false failed startup status

This commit is contained in:
helix4u 2026-06-10 16:01:12 -06:00 committed by Teknium
parent 5508f4bc54
commit e71d746820
6 changed files with 159 additions and 8 deletions

View file

@ -693,16 +693,27 @@ def build_welcome_banner(console: "Console", model: str, cwd: str,
right_lines.append("")
right_lines.append(f"[bold {accent}]MCP Servers[/]")
for srv in mcp_status:
status = srv.get("status")
if srv["connected"]:
right_lines.append(
f"[dim {dim}]{srv['name']}[/] [{text}]({srv['transport']})[/] "
f"[dim {dim}]—[/] [{text}]{srv['tools']} tool(s)[/]"
)
elif srv.get("disabled"):
elif srv.get("disabled") or status == "disabled":
right_lines.append(
f"[dim {dim}]{srv['name']}[/] [dim]({srv['transport']})[/] "
f"[dim {dim}]— disabled[/]"
)
elif status == "connecting":
right_lines.append(
f"[dim {dim}]{srv['name']}[/] [dim]({srv['transport']})[/] "
f"[yellow]— connecting[/]"
)
elif status == "configured":
right_lines.append(
f"[dim {dim}]{srv['name']}[/] [dim]({srv['transport']})[/] "
f"[dim {dim}]— configured[/]"
)
else:
right_lines.append(
f"[red]{srv['name']}[/] [dim]({srv['transport']})[/] "

View file

@ -167,3 +167,36 @@ def test_build_welcome_banner_disabled_mcp_shows_disabled_not_failed():
assert "broken" in output
assert "failed" in output
def test_build_welcome_banner_configured_mcp_is_not_failed():
"""A configured MCP server with no connection attempt yet is not a failure."""
with (
patch.object(model_tools, "check_tool_availability", return_value=(["web"], [])),
patch.object(banner, "get_available_skills", return_value={}),
patch.object(banner, "get_update_result", return_value=None),
patch.object(
tools.mcp_tool,
"get_mcp_status",
return_value=[
{
"name": "docker-profile",
"transport": "stdio",
"tools": 0,
"connected": False,
"disabled": False,
"status": "configured",
},
],
),
):
console = Console(record=True, force_terminal=False, color_system=None, width=160)
banner.build_welcome_banner(
console=console, model="anthropic/test-model", cwd="/tmp/project",
tools=[{"function": {"name": "read_file"}}],
get_toolset_for_tool=lambda n: "file",
)
output = console.export_text()
assert "docker-profile" in output
assert "configured" in output
assert "failed" not in output

View file

@ -82,6 +82,56 @@ class TestLoadMCPConfig:
assert result == {}
class TestMCPStatus:
def test_status_distinguishes_configured_connecting_failed_and_disabled(
self, monkeypatch
):
import tools.mcp_tool as mcp_tool
monkeypatch.setattr(
mcp_tool,
"_load_mcp_config",
lambda: {
"configured": {"command": "docker", "args": ["mcp", "gateway", "run"]},
"connecting": {"command": "slow-mcp"},
"failed": {"command": "bad-mcp"},
"disabled": {"command": "off-mcp", "enabled": False},
},
)
with mcp_tool._lock:
saved_servers = dict(mcp_tool._servers)
saved_connecting = set(mcp_tool._server_connecting)
saved_errors = dict(mcp_tool._server_connect_errors)
mcp_tool._servers.clear()
mcp_tool._server_connecting.clear()
mcp_tool._server_connect_errors.clear()
mcp_tool._server_connecting.add("connecting")
mcp_tool._server_connect_errors["failed"] = "Connection closed"
try:
statuses = {
entry["name"]: entry
for entry in mcp_tool.get_mcp_status()
}
finally:
with mcp_tool._lock:
mcp_tool._servers.clear()
mcp_tool._servers.update(saved_servers)
mcp_tool._server_connecting.clear()
mcp_tool._server_connecting.update(saved_connecting)
mcp_tool._server_connect_errors.clear()
mcp_tool._server_connect_errors.update(saved_errors)
assert statuses["configured"]["status"] == "configured"
assert statuses["configured"]["connected"] is False
assert statuses["configured"]["disabled"] is False
assert statuses["connecting"]["status"] == "connecting"
assert statuses["failed"]["status"] == "failed"
assert statuses["failed"]["error"] == "Connection closed"
assert statuses["disabled"]["status"] == "disabled"
assert statuses["disabled"]["disabled"] is True
# ---------------------------------------------------------------------------
# Schema conversion
# ---------------------------------------------------------------------------

View file

@ -2022,6 +2022,8 @@ class MCPServerTask:
# ---------------------------------------------------------------------------
_servers: Dict[str, MCPServerTask] = {}
_server_connecting: set[str] = set()
_server_connect_errors: Dict[str, str] = {}
# Circuit breaker: consecutive error counts per server. After
# _CIRCUIT_BREAKER_THRESHOLD consecutive failures, the handler returns
@ -2408,8 +2410,8 @@ _mcp_tool_server_names: Dict[str, str] = {}
_mcp_loop: Optional[asyncio.AbstractEventLoop] = None
_mcp_thread: Optional[threading.Thread] = None
# Protects _mcp_loop, _mcp_thread, _servers, _parallel_safe_servers,
# _mcp_tool_server_names, and _stdio_pids.
# Protects _mcp_loop, _mcp_thread, _servers, MCP connection status maps,
# _parallel_safe_servers, _mcp_tool_server_names, and _stdio_pids.
_lock = threading.Lock()
# PIDs of stdio MCP server subprocesses. Tracked so we can force-kill
@ -3553,6 +3555,8 @@ async def _discover_and_register_server(name: str, config: dict) -> List[str]:
timeout=connect_timeout,
)
with _lock:
_server_connecting.discard(name)
_server_connect_errors.pop(name, None)
_servers[name] = server
registered_names = _register_server_tools(name, server, config)
@ -3599,6 +3603,9 @@ def register_mcp_servers(servers: Dict[str, dict]) -> List[str]:
for k, v in servers.items()
if k not in _servers and _parse_boolish(v.get("enabled", True), default=True)
}
_server_connecting.update(new_servers)
for srv_name in new_servers:
_server_connect_errors.pop(srv_name, None)
# Track which servers opt-in to parallel tool calls (idempotent).
for srv_name, srv_cfg in servers.items():
if _parse_boolish(srv_cfg.get("supports_parallel_tool_calls", False), default=False):
@ -3626,12 +3633,20 @@ def register_mcp_servers(servers: Dict[str, dict]) -> List[str]:
for name, result in zip(server_names, results):
if isinstance(result, BaseException):
command = new_servers.get(name, {}).get("command")
message = _format_connect_error(result)
with _lock:
_server_connecting.discard(name)
_server_connect_errors[name] = message
logger.warning(
"Failed to connect to MCP server '%s'%s: %s",
name,
f" (command={command})" if command else "",
_format_connect_error(result),
message,
)
else:
with _lock:
_server_connecting.discard(name)
_server_connect_errors.pop(name, None)
# Per-server timeouts are handled inside _discover_and_register_server.
# The outer timeout is generous: 120s total for parallel discovery.
@ -3736,8 +3751,10 @@ def is_mcp_tool_parallel_safe(tool_name: str) -> bool:
def get_mcp_status() -> List[dict]:
"""Return status of all configured MCP servers for banner display.
Returns a list of dicts with keys: name, transport, tools, connected.
Includes both successfully connected servers and configured-but-failed ones.
Returns a list of dicts with keys: name, transport, tools, connected,
disabled, and status. Includes connected servers, disabled servers,
in-flight connection attempts, recorded failures, and servers that are
configured but have not been started in this process yet.
"""
result: List[dict] = []
@ -3748,6 +3765,8 @@ def get_mcp_status() -> List[dict]:
with _lock:
active_servers = dict(_servers)
connecting = set(_server_connecting)
connect_errors = dict(_server_connect_errors)
for name, cfg in configured.items():
transport = cfg.get("transport", "http") if "url" in cfg else "stdio"
@ -3760,11 +3779,12 @@ def get_mcp_status() -> List[dict]:
"tools": len(server._registered_tool_names) if hasattr(server, "_registered_tool_names") else len(server._tools),
"connected": True,
"disabled": False,
"status": "connected",
}
if server._sampling:
entry["sampling"] = dict(server._sampling.metrics)
result.append(entry)
else:
elif not enabled:
# A server with enabled: false is intentionally not connected — it is
# disabled, not failed. Surface that distinction so consumers (banner,
# TUI) can render "disabled" rather than an alarming "failed".
@ -3773,7 +3793,36 @@ def get_mcp_status() -> List[dict]:
"transport": transport,
"tools": 0,
"connected": False,
"disabled": not enabled,
"disabled": True,
"status": "disabled",
})
elif name in connecting:
result.append({
"name": name,
"transport": transport,
"tools": 0,
"connected": False,
"disabled": False,
"status": "connecting",
})
elif name in connect_errors:
result.append({
"name": name,
"transport": transport,
"tools": 0,
"connected": False,
"disabled": False,
"status": "failed",
"error": connect_errors[name],
})
else:
result.append({
"name": name,
"transport": transport,
"tools": 0,
"connected": False,
"disabled": False,
"status": "configured",
})
return result

View file

@ -254,6 +254,12 @@ export function SessionPanel({ info, maxWidth, sid, t }: SessionPanelProps) {
<Text color={t.color.text}>
{s.tools} tool{s.tools === 1 ? '' : 's'}
</Text>
) : s.disabled || s.status === 'disabled' ? (
<Text color={t.color.muted}>disabled</Text>
) : s.status === 'connecting' ? (
<Text color={t.color.warn}>connecting</Text>
) : s.status === 'configured' ? (
<Text color={t.color.muted}>configured</Text>
) : (
<Text color={t.color.error}>failed</Text>
)}

View file

@ -138,6 +138,8 @@ export type SectionVisibility = Partial<Record<SectionName, DetailsMode>>
export interface McpServerStatus {
connected: boolean
disabled?: boolean
status?: 'configured' | 'connecting' | 'connected' | 'disabled' | 'failed'
name: string
tools: number
transport: string