mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(mcp): route stdio subprocess stderr to log file, not user TTY (#14901)
MCP stdio servers' stderr was being dumped directly onto the user's terminal during hermes launch. Servers like FastMCP-based ones print a large ASCII banner at startup; slack-mcp-server emits JSON logs; etc. With prompt_toolkit / Rich rendering the TUI concurrently, these unsolicited writes corrupt the terminal state — hanging the session ~80% of the time for one user with Google Ads Tools + slack-mcp configured, forcing Ctrl+C and restart loops. Root cause: `stdio_client(server_params)` in tools/mcp_tool.py was called without `errlog=`, and the SDK's default is `sys.stderr` — i.e. the real parent-process stderr, which is the TTY. Fix: open a shared, append-mode log at $HERMES_HOME/logs/mcp-stderr.log (created once per process, line-buffered, real fd required by asyncio's subprocess machinery) and pass it as `errlog` to every stdio_client. Each server's spawn writes a timestamped header so the shared log stays readable when multiple servers are running. Falls back to /dev/null if the log file cannot be opened. Verified by E2E spawning a subprocess with the log fd as its stderr: banner lines land in the log file, nothing reaches the calling TTY.
This commit is contained in:
parent
7db2703b33
commit
379b2273d9
1 changed files with 81 additions and 1 deletions
|
|
@ -78,12 +78,86 @@ import math
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import sys
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
from datetime import datetime
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Stdio subprocess stderr redirection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# The MCP SDK's ``stdio_client(server, errlog=sys.stderr)`` defaults the
|
||||||
|
# subprocess stderr stream to the parent process's real stderr, i.e. the
|
||||||
|
# user's TTY. That means any MCP server we spawn at startup (FastMCP
|
||||||
|
# banners, slack-mcp-server JSON startup logs, etc.) writes directly onto
|
||||||
|
# the terminal while prompt_toolkit / Rich is rendering the TUI — which
|
||||||
|
# corrupts the display and can hang the session.
|
||||||
|
#
|
||||||
|
# Instead we redirect every stdio MCP subprocess's stderr into a shared
|
||||||
|
# per-profile log file (~/.hermes/logs/mcp-stderr.log), tagged with the
|
||||||
|
# server name so individual servers remain debuggable.
|
||||||
|
#
|
||||||
|
# Fallback is os.devnull if opening the log file fails for any reason.
|
||||||
|
|
||||||
|
_mcp_stderr_log_fh: Optional[Any] = None
|
||||||
|
_mcp_stderr_log_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_mcp_stderr_log() -> Any:
|
||||||
|
"""Return a shared append-mode file handle for MCP subprocess stderr.
|
||||||
|
|
||||||
|
Opened once per process and reused for every stdio server. Must have a
|
||||||
|
real OS-level file descriptor (``fileno()``) because asyncio's subprocess
|
||||||
|
machinery wires the child's stderr directly to that fd. Falls back to
|
||||||
|
``/dev/null`` if opening the log file fails.
|
||||||
|
"""
|
||||||
|
global _mcp_stderr_log_fh
|
||||||
|
with _mcp_stderr_log_lock:
|
||||||
|
if _mcp_stderr_log_fh is not None:
|
||||||
|
return _mcp_stderr_log_fh
|
||||||
|
try:
|
||||||
|
from hermes_constants import get_hermes_home
|
||||||
|
log_dir = get_hermes_home() / "logs"
|
||||||
|
log_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
log_path = log_dir / "mcp-stderr.log"
|
||||||
|
# Line-buffered so server output lands on disk promptly; errors=
|
||||||
|
# "replace" tolerates garbled binary output from misbehaving
|
||||||
|
# servers.
|
||||||
|
fh = open(log_path, "a", encoding="utf-8", errors="replace", buffering=1)
|
||||||
|
# Sanity-check: confirm a real fd is available before we commit.
|
||||||
|
fh.fileno()
|
||||||
|
_mcp_stderr_log_fh = fh
|
||||||
|
except Exception as exc: # pragma: no cover — best-effort fallback
|
||||||
|
logger.debug("Failed to open MCP stderr log, using devnull: %s", exc)
|
||||||
|
try:
|
||||||
|
_mcp_stderr_log_fh = open(os.devnull, "w", encoding="utf-8")
|
||||||
|
except Exception:
|
||||||
|
# Last resort: the real stderr. Not ideal for TUI users but
|
||||||
|
# it matches pre-fix behavior.
|
||||||
|
_mcp_stderr_log_fh = sys.stderr
|
||||||
|
return _mcp_stderr_log_fh
|
||||||
|
|
||||||
|
|
||||||
|
def _write_stderr_log_header(server_name: str) -> None:
|
||||||
|
"""Write a human-readable session marker before launching a server.
|
||||||
|
|
||||||
|
Gives operators a way to find each server's output in the shared
|
||||||
|
``mcp-stderr.log`` file without needing per-line prefixes (which would
|
||||||
|
require a pipe + reader thread and complicate shutdown).
|
||||||
|
"""
|
||||||
|
fh = _get_mcp_stderr_log()
|
||||||
|
try:
|
||||||
|
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
fh.write(f"\n===== [{ts}] starting MCP server '{server_name}' =====\n")
|
||||||
|
fh.flush()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Graceful import -- MCP SDK is an optional dependency
|
# Graceful import -- MCP SDK is an optional dependency
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -962,7 +1036,13 @@ class MCPServerTask:
|
||||||
|
|
||||||
# Snapshot child PIDs before spawning so we can track the new one.
|
# Snapshot child PIDs before spawning so we can track the new one.
|
||||||
pids_before = _snapshot_child_pids()
|
pids_before = _snapshot_child_pids()
|
||||||
async with stdio_client(server_params) as (read_stream, write_stream):
|
# Redirect subprocess stderr into a shared log file so MCP servers
|
||||||
|
# (FastMCP banners, slack-mcp startup JSON, etc.) don't dump onto
|
||||||
|
# the user's TTY and corrupt the TUI. Preserves debuggability via
|
||||||
|
# ~/.hermes/logs/mcp-stderr.log.
|
||||||
|
_write_stderr_log_header(self.name)
|
||||||
|
_errlog = _get_mcp_stderr_log()
|
||||||
|
async with stdio_client(server_params, errlog=_errlog) as (read_stream, write_stream):
|
||||||
# Capture the newly spawned subprocess PID for force-kill cleanup.
|
# Capture the newly spawned subprocess PID for force-kill cleanup.
|
||||||
new_pids = _snapshot_child_pids() - pids_before
|
new_pids = _snapshot_child_pids() - pids_before
|
||||||
if new_pids:
|
if new_pids:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue