mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 08:51:53 +00:00
fix(mcp): propagate HERMES_HOME override onto the MCP event loop (#44220)
* fix(mcp): propagate HERMES_HOME override onto the MCP event loop Closes the known limit documented in #44007: tasks scheduled via run_coroutine_threadsafe are created INSIDE the MCP loop thread, so they copy that thread's context — a per-request profile scope (dashboard ?profile= endpoints, e.g. the MCP 'Test server' probe) silently vanished for anything resolving get_hermes_home() inside the coroutine. Most visible symptom: OAuth token-store paths (HERMES_HOME/mcp-tokens/) resolved against the process home instead of the selected profile, so testing an OAuth MCP cross-profile read the wrong tokens. _run_on_mcp_loop now wraps scheduled coroutines with the caller's context-local override (_wrap_with_home_override): set inside the task's own context on the loop, reset on completion — task-local, so concurrent calls carrying different scopes don't interfere, and the loop thread's default context stays untouched. No-op (coroutine passes through unwrapped) when no override is active, i.e. every non-dashboard caller. web_server's probe comment updated from 'known limit' to 'covered'. Tests: override propagation (direct + factory form), OAuth token-path resolution on the loop, loop-context cleanliness after scoped calls, no-op passthrough. 225 green across mcp_tool + unification suites. * test(mcp): concurrent different-scope calls don't interfere
This commit is contained in:
parent
3edd09a46f
commit
73dd584995
3 changed files with 188 additions and 4 deletions
|
|
@ -6577,9 +6577,10 @@ async def test_mcp_server(name: str, profile: Optional[str] = None):
|
|||
# selected profile, matching the config the server was saved into.
|
||||
# (asyncio.to_thread copies contextvars, but entering explicitly
|
||||
# keeps the lock-protected SKILLS_DIR swap balanced per-thread.)
|
||||
# Known limit: the dedicated MCP event-loop thread spawned by the
|
||||
# probe doesn't inherit the contextvar, so OAuth token-store paths
|
||||
# resolve against the process HERMES_HOME.
|
||||
# The probe's dedicated MCP event-loop thread is covered too:
|
||||
# _run_on_mcp_loop wraps scheduled coroutines with the caller's
|
||||
# HERMES_HOME override (see mcp_tool._wrap_with_home_override), so
|
||||
# OAuth token stores resolve against the selected profile as well.
|
||||
with _profile_scope(profile):
|
||||
return _probe_single_server(name, servers[name])
|
||||
|
||||
|
|
|
|||
139
tests/tools/test_mcp_loop_profile_override.py
Normal file
139
tests/tools/test_mcp_loop_profile_override.py
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
"""Regression tests for HERMES_HOME override propagation onto the MCP loop.
|
||||
|
||||
Tasks scheduled via run_coroutine_threadsafe are created inside the MCP
|
||||
event-loop thread, so they copy THAT thread's context — not the scheduling
|
||||
thread's. A per-request profile scope (dashboard ?profile= endpoints, e.g.
|
||||
the MCP "Test server" probe) would silently vanish for anything resolving
|
||||
get_hermes_home() inside the coroutine, most visibly OAuth token-store
|
||||
paths. _run_on_mcp_loop now wraps scheduled coroutines with the caller's
|
||||
override (mcp_tool._wrap_with_home_override).
|
||||
"""
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mcp_loop():
|
||||
import tools.mcp_tool as mcp_tool
|
||||
|
||||
mcp_tool._ensure_mcp_loop()
|
||||
yield mcp_tool
|
||||
mcp_tool._stop_mcp_loop()
|
||||
|
||||
|
||||
def test_override_propagates_to_mcp_loop(tmp_path, monkeypatch, mcp_loop):
|
||||
from hermes_constants import (
|
||||
get_hermes_home,
|
||||
reset_hermes_home_override,
|
||||
set_hermes_home_override,
|
||||
)
|
||||
|
||||
process_home = tmp_path / "proc-home"
|
||||
profile_home = tmp_path / "profile-home"
|
||||
process_home.mkdir()
|
||||
profile_home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(process_home))
|
||||
|
||||
async def read_home():
|
||||
return str(get_hermes_home())
|
||||
|
||||
# Unscoped: the loop task sees the process home.
|
||||
assert mcp_loop._run_on_mcp_loop(read_home(), timeout=10) == str(process_home)
|
||||
|
||||
# Scoped: the caller's override must reach the loop task.
|
||||
token = set_hermes_home_override(str(profile_home))
|
||||
try:
|
||||
assert mcp_loop._run_on_mcp_loop(read_home(), timeout=10) == str(profile_home)
|
||||
# Factory form must be wrapped too.
|
||||
assert mcp_loop._run_on_mcp_loop(lambda: read_home(), timeout=10) == str(
|
||||
profile_home
|
||||
)
|
||||
finally:
|
||||
reset_hermes_home_override(token)
|
||||
|
||||
# The loop thread's default context is untouched afterwards.
|
||||
assert mcp_loop._run_on_mcp_loop(read_home(), timeout=10) == str(process_home)
|
||||
|
||||
|
||||
def test_oauth_token_paths_follow_override(tmp_path, monkeypatch, mcp_loop):
|
||||
"""The actual symptom path: HermesTokenStorage resolving inside the
|
||||
probe's MCP-loop coroutine must land in the selected profile's
|
||||
mcp-tokens dir, not the process home's."""
|
||||
from hermes_constants import (
|
||||
reset_hermes_home_override,
|
||||
set_hermes_home_override,
|
||||
)
|
||||
|
||||
process_home = tmp_path / "proc-home"
|
||||
profile_home = tmp_path / "profile-home"
|
||||
process_home.mkdir()
|
||||
profile_home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(process_home))
|
||||
|
||||
async def token_path():
|
||||
from tools.mcp_oauth import HermesTokenStorage
|
||||
|
||||
return str(HermesTokenStorage("probe-srv")._tokens_path())
|
||||
|
||||
token = set_hermes_home_override(str(profile_home))
|
||||
try:
|
||||
path = mcp_loop._run_on_mcp_loop(token_path(), timeout=10)
|
||||
finally:
|
||||
reset_hermes_home_override(token)
|
||||
assert path.startswith(str(profile_home))
|
||||
assert os.path.join("mcp-tokens", "probe-srv.json") in path
|
||||
|
||||
|
||||
def test_concurrent_scopes_do_not_interfere(tmp_path, monkeypatch, mcp_loop):
|
||||
"""Two threads carrying DIFFERENT overrides scheduling onto the same
|
||||
loop must each see their own home — the wrapper is task-local."""
|
||||
import threading
|
||||
|
||||
from hermes_constants import (
|
||||
get_hermes_home,
|
||||
reset_hermes_home_override,
|
||||
set_hermes_home_override,
|
||||
)
|
||||
|
||||
process_home = tmp_path / "proc-home"
|
||||
home_a = tmp_path / "profile-a"
|
||||
home_b = tmp_path / "profile-b"
|
||||
for h in (process_home, home_a, home_b):
|
||||
h.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(process_home))
|
||||
|
||||
async def read_home():
|
||||
return str(get_hermes_home())
|
||||
|
||||
results: dict = {}
|
||||
|
||||
def scoped_call(key, home):
|
||||
token = set_hermes_home_override(str(home))
|
||||
try:
|
||||
results[key] = mcp_loop._run_on_mcp_loop(read_home(), timeout=10)
|
||||
finally:
|
||||
reset_hermes_home_override(token)
|
||||
|
||||
threads = [
|
||||
threading.Thread(target=scoped_call, args=("a", home_a)),
|
||||
threading.Thread(target=scoped_call, args=("b", home_b)),
|
||||
]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(timeout=15)
|
||||
|
||||
assert results == {"a": str(home_a), "b": str(home_b)}
|
||||
|
||||
|
||||
def test_wrap_is_noop_without_override(mcp_loop):
|
||||
"""No active override → the coroutine passes through unwrapped."""
|
||||
|
||||
async def trivial():
|
||||
return 42
|
||||
|
||||
coro = trivial()
|
||||
wrapped = mcp_loop._wrap_with_home_override(coro)
|
||||
assert wrapped is coro
|
||||
coro.close()
|
||||
|
|
@ -90,7 +90,7 @@ import sys
|
|||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Coroutine, Dict, List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -2460,6 +2460,37 @@ def _ensure_mcp_loop():
|
|||
_mcp_thread.start()
|
||||
|
||||
|
||||
def _wrap_with_home_override(coro: "Coroutine") -> "Coroutine":
|
||||
"""Carry the caller's context-local HERMES_HOME override into ``coro``.
|
||||
|
||||
Returns ``coro`` unchanged when no override is active. Otherwise wraps
|
||||
it so the override is set inside the coroutine's own (task-local)
|
||||
context on the MCP loop and reset when it completes — concurrent calls
|
||||
carrying different scopes don't interfere.
|
||||
"""
|
||||
try:
|
||||
from hermes_constants import (
|
||||
get_hermes_home_override,
|
||||
reset_hermes_home_override,
|
||||
set_hermes_home_override,
|
||||
)
|
||||
|
||||
home_override = get_hermes_home_override()
|
||||
except Exception:
|
||||
return coro
|
||||
if not home_override:
|
||||
return coro
|
||||
|
||||
async def _scoped():
|
||||
token = set_hermes_home_override(home_override)
|
||||
try:
|
||||
return await coro
|
||||
finally:
|
||||
reset_hermes_home_override(token)
|
||||
|
||||
return _scoped()
|
||||
|
||||
|
||||
def _run_on_mcp_loop(coro_or_factory, timeout: float = 30):
|
||||
"""Schedule a coroutine on the MCP event loop and block until done.
|
||||
|
||||
|
|
@ -2482,6 +2513,19 @@ def _run_on_mcp_loop(coro_or_factory, timeout: float = 30):
|
|||
raise RuntimeError("MCP event loop is not running")
|
||||
|
||||
coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
|
||||
|
||||
# Propagate the context-local HERMES_HOME override onto the MCP loop.
|
||||
# Tasks scheduled via run_coroutine_threadsafe are created INSIDE the
|
||||
# loop thread, so they copy the loop thread's context — not the
|
||||
# scheduling thread's. A per-request profile scope (the dashboard's
|
||||
# ?profile= endpoints, e.g. the MCP "Test server" probe) would silently
|
||||
# vanish here: OAuth token stores and any other get_hermes_home()
|
||||
# resolution inside the coroutine would read the process home instead
|
||||
# of the selected profile's. Re-establish the override inside the
|
||||
# task's own context (task-local — concurrent calls carrying different
|
||||
# scopes don't interfere). No-op when no override is active.
|
||||
coro = _wrap_with_home_override(coro)
|
||||
|
||||
future = safe_schedule_threadsafe(
|
||||
coro, loop,
|
||||
logger=logger,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue