mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
MCP tools connected and enabled but never surfaced into the agent's session toolset on the desktop app + dashboard WebUI (#51587). There are two independent background MCP discovery thread owners by surface: tui_gateway.entry (stdio 'hermes --tui') and hermes_cli.mcp_startup (desktop app + dashboard WS sidecar via tui_gateway/ws.py, and 'hermes dashboard'). The late-refresh scheduler gates on tui_gateway.entry.mcp_discovery_in_flight(), which read ONLY the entry thread global. On the desktop/dashboard surfaces that global is None, so a server slower than the bounded build-time wait never triggered a late refresh and its tools stayed invisible for the whole session. Make mcp_discovery_in_flight() / join_mcp_discovery() consult BOTH thread owners. Adds the matching in-flight/join helpers to hermes_cli.mcp_startup and has tui_gateway.entry delegate to them as a second owner.
130 lines
4.8 KiB
Python
130 lines
4.8 KiB
Python
"""Shared CLI/TUI-safe helpers for background MCP discovery."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import threading
|
|
from contextlib import nullcontext
|
|
from typing import Optional
|
|
|
|
_mcp_discovery_lock = threading.Lock()
|
|
_mcp_discovery_started = False
|
|
_mcp_discovery_thread: Optional[threading.Thread] = None
|
|
|
|
|
|
def _has_configured_mcp_servers() -> bool:
|
|
"""Cheap config probe so non-MCP users avoid importing the MCP stack."""
|
|
try:
|
|
from hermes_cli.config import read_raw_config
|
|
|
|
mcp_servers = (read_raw_config() or {}).get("mcp_servers")
|
|
return isinstance(mcp_servers, dict) and len(mcp_servers) > 0
|
|
except Exception:
|
|
# Be conservative: if config probing fails, try discovery in the
|
|
# background so startup still can't block.
|
|
return True
|
|
|
|
|
|
def start_background_mcp_discovery(*, logger, thread_name: str) -> None:
|
|
"""Spawn one shared background MCP discovery thread for this process."""
|
|
global _mcp_discovery_started, _mcp_discovery_thread
|
|
|
|
with _mcp_discovery_lock:
|
|
if _mcp_discovery_started:
|
|
return
|
|
_mcp_discovery_started = True
|
|
if not _has_configured_mcp_servers():
|
|
return
|
|
|
|
def _discover() -> None:
|
|
try:
|
|
_discover_mcp_tools_without_interactive_oauth()
|
|
except Exception:
|
|
logger.debug("Background MCP tool discovery failed", exc_info=True)
|
|
|
|
thread = threading.Thread(
|
|
target=_discover,
|
|
name=thread_name,
|
|
daemon=True,
|
|
)
|
|
_mcp_discovery_thread = thread
|
|
thread.start()
|
|
|
|
|
|
def _resolve_discovery_timeout(explicit: "float | None") -> float:
|
|
"""Resolve the MCP discovery wait bound: explicit arg > config > default.
|
|
|
|
Reads ``mcp_discovery_timeout`` from config.yaml, defaulting to the value in
|
|
``DEFAULT_CONFIG`` (single source of truth) when the key is absent. Kept lazy
|
|
and fail-safe — a missing/invalid value or a broken config falls back to a
|
|
short safe bound so startup can never hang or crash.
|
|
"""
|
|
if explicit is not None:
|
|
return explicit
|
|
try:
|
|
from hermes_cli.config import load_config, DEFAULT_CONFIG
|
|
|
|
default = float(DEFAULT_CONFIG.get("mcp_discovery_timeout", 1.5))
|
|
raw = (load_config() or {}).get("mcp_discovery_timeout", default)
|
|
val = float(raw)
|
|
return val if val > 0 else default
|
|
except Exception:
|
|
return 1.5
|
|
|
|
|
|
def _discover_mcp_tools_without_interactive_oauth() -> None:
|
|
"""Run MCP discovery without letting OAuth read from the user's stdin."""
|
|
try:
|
|
from tools.mcp_oauth import suppress_interactive_oauth
|
|
except Exception:
|
|
suppress_interactive_oauth = nullcontext
|
|
|
|
with suppress_interactive_oauth():
|
|
from tools.mcp_tool import discover_mcp_tools
|
|
|
|
discover_mcp_tools()
|
|
|
|
|
|
def wait_for_mcp_discovery(timeout: "float | None" = None) -> None:
|
|
"""Wait for background MCP discovery before the first tool snapshot.
|
|
|
|
``thread.join(timeout)`` returns the INSTANT discovery completes, so this
|
|
only ever blocks for the real connect time of a still-pending server —
|
|
users with no MCP servers or fast servers pay ~0s. The bound (from
|
|
``mcp_discovery_timeout`` in config) just caps the wait so a dead server
|
|
can't freeze startup; servers that miss it are picked up by the automatic
|
|
late-binding refresh.
|
|
"""
|
|
thread = _mcp_discovery_thread
|
|
if thread is None or not thread.is_alive():
|
|
return
|
|
thread.join(timeout=_resolve_discovery_timeout(timeout))
|
|
|
|
|
|
def mcp_discovery_in_flight() -> bool:
|
|
"""Return True if THIS module's background discovery thread is still running.
|
|
|
|
Mirrors ``tui_gateway.entry.mcp_discovery_in_flight`` for the surfaces that
|
|
start discovery through ``start_background_mcp_discovery`` here (the desktop
|
|
app + dashboard WebSocket sidecar via ``tui_gateway/ws.py``, and
|
|
``hermes dashboard``). Those processes populate THIS module's
|
|
``_mcp_discovery_thread``, not ``tui_gateway.entry``'s, so the late-refresh
|
|
scheduler must consult both to decide whether a slow server's tools are
|
|
still pending (see #51587).
|
|
"""
|
|
thread = _mcp_discovery_thread
|
|
return thread is not None and thread.is_alive()
|
|
|
|
|
|
def join_mcp_discovery(timeout: "float | None" = None) -> bool:
|
|
"""Block until THIS module's background discovery finishes, up to ``timeout``.
|
|
|
|
Returns True if discovery has completed (thread absent or no longer alive),
|
|
False if it is still running after the timeout. Unlike
|
|
``wait_for_mcp_discovery`` this accepts an unbounded/long wait and reports
|
|
the outcome, for the off-critical-path late-refresh waiter.
|
|
"""
|
|
thread = _mcp_discovery_thread
|
|
if thread is None:
|
|
return True
|
|
thread.join(timeout=timeout)
|
|
return not thread.is_alive()
|