fix(mcp): address adversarial review round 1 (cache parity, gates, races)

Consolidated findings from three independent reviewers (Codex, Claude Code, a
Hermes subagent w/ the hermes-agent-dev skill):

- BLOCKING: refresh_agent_mcp_tools rebuilt only the registry subset, silently
  dropping post-build-injected memory-provider (mem0/honcho/…) and context-
  engine (lcm_*) tools on every refresh. Now additive-preserving: re-applies
  the same injectors agent_init uses, staged on locals and published atomically.
- Re-injection now honors the #5544 enabled_toolsets gate for context-engine
  tools, so a restricted-toolset platform can't get lcm_* leaked back in.
- Atomic read-diff-publish under one lock: the returned `added` set and the
  (tools, valid_tool_names) pair are consistent even under concurrent callers
  (no half-swap, no TOCTOU).
- background_review fork opts out (_skip_mcp_refresh) so its byte-identical
  tools[] cache parity with the parent is preserved.
- CLI /reload-mcp routed through the shared helper (was a 4th divergent copy
  with the same clobber bug + missing disabled_toolsets).
- Explicit reloads (TUI RPC + CLI) pass enabled_override so a server the user
  just enabled in config this session is picked up; automatic paths reuse the
  agent's build-time selection.
- mcp_discovery_timeout default 5.0 -> 1.5s: correctness now comes from the
  between-turns refresh, so the startup wait is only a small turn-1 UX bump
  rather than a heavy dead-server latency penalty.
- has_registered_mcp_tools checks registered TOOLS (not connected servers) so a
  zero-tool/prompt-only server doesn't make the per-turn hook fire forever.
- Tests: rewrote the thread-safety test to actually exercise the write path
  (alternating tool sets), added the #5544-gate regression, the memory/context
  preservation regression, and a "callable next turn via valid_tool_names"
  contract; removed a dead monkeypatch line.
This commit is contained in:
alt-glitch 2026-06-19 22:41:15 +05:30 committed by Teknium
parent 3713483874
commit b6e2a54a94
8 changed files with 278 additions and 51 deletions

View file

@ -531,7 +531,10 @@ def init_agent(
agent._last_activity_desc: str = "initializing"
agent._current_tool: str | None = None
agent._api_call_count: int = 0
# Opt-out flag for the between-turns MCP tool refresh (build_turn_context).
# Set on internal forks (e.g. background_review) that must keep ``tools[]``
# byte-identical to a parent for provider cache parity.
agent._skip_mcp_refresh = False
# Rate limit tracking — updated from x-ratelimit-* response headers
# after each API call. Accessed by /usage slash command.
agent._rate_limit_state: Optional["RateLimitState"] = None

View file

@ -535,6 +535,13 @@ def _run_review_in_thread(
)
review_agent._memory_write_origin = "background_review"
review_agent._memory_write_context = "background_review"
# The review fork pins the parent's cached system prompt and keeps
# ``tools[]`` byte-identical to the parent so its outbound request
# hits the same provider cache prefix (see the toolset-parity note
# above). The between-turns MCP refresh in build_turn_context would
# add late-connecting MCP tools to this fork and break that parity,
# so opt the review fork out of it.
review_agent._skip_mcp_refresh = True
review_agent._memory_store = agent._memory_store
review_agent._memory_enabled = agent._memory_enabled
review_agent._user_profile_enabled = agent._user_profile_enabled

View file

@ -123,9 +123,10 @@ def build_turn_context(
# or when the tool set is unchanged (``refresh_agent_mcp_tools`` diffs by
# name and leaves the snapshot untouched on no-change).
try:
from tools.mcp_tool import has_registered_mcp_tools, refresh_agent_mcp_tools
if has_registered_mcp_tools():
refresh_agent_mcp_tools(agent, quiet_mode=True)
if not getattr(agent, "_skip_mcp_refresh", False):
from tools.mcp_tool import has_registered_mcp_tools, refresh_agent_mcp_tools
if has_registered_mcp_tools():
refresh_agent_mcp_tools(agent, quiet_mode=True)
except Exception:
logger.debug("between-turns MCP tool refresh skipped", exc_info=True)

18
cli.py
View file

@ -9661,16 +9661,20 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
else:
print(f" 🔧 {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)")
# Refresh the agent's tool list so the model can call new tools
# Refresh the agent's tool list so the model can call new tools.
# Route through the shared helper so this CLI /reload-mcp path stays
# in lockstep with the TUI RPC / gateway reload / late-binding paths
# (name-diff, thread-safe, and — critically — additive-preserving so
# memory-provider and context-engine tools survive the rebuild).
if self.agent is not None:
self.agent.tools = get_tool_definitions(
enabled_toolsets=self.agent.enabled_toolsets
if hasattr(self.agent, "enabled_toolsets") else None,
from tools.mcp_tool import refresh_agent_mcp_tools
# Explicit reload: re-resolve enabled toolsets so a server the
# user just enabled in config this session is picked up.
refresh_agent_mcp_tools(
self.agent,
enabled_override=self.enabled_toolsets,
quiet_mode=True,
)
self.agent.valid_tool_names = {
tool["function"]["name"] for tool in self.agent.tools
} if self.agent.tools else set()
# Inject a message at the END of conversation history so the
# model knows tools changed. Appended after all existing

View file

@ -1208,11 +1208,13 @@ DEFAULT_CONFIG = {
# the INSTANT discovery completes, so users with no MCP servers (the common
# case) or fast servers pay ~0s regardless of this value — the bound is
# only reached when a server is genuinely still connecting. The old 0.75s
# default was too short for HTTP/OAuth servers (which can take 26s on a
# cold connect), so their tools were invisible for the whole session.
# Slow servers that miss this window are still picked up by the automatic
# late-binding refresh, so this is a UX/latency knob, not a correctness one.
"mcp_discovery_timeout": 5.0,
# default was a touch short for HTTP/OAuth servers on a cold connect; a
# modest bump lets more of them land in the FIRST turn's snapshot. This is
# only a turn-1 latency/UX knob: a server that misses this window is still
# picked up automatically on the next turn by the between-turns refresh
# (see agent/turn_context.py), so correctness never depends on it. Keep it
# small so a slow/dead server adds little to first-response latency.
"mcp_discovery_timeout": 1.5,
# Tool-output truncation thresholds. When terminal output or a
# single read_file page exceeds these limits, Hermes truncates the

View file

@ -98,13 +98,114 @@ def test_refresh_passes_agent_toolset_filters(monkeypatch):
assert seen["disabled_toolsets"] == ["messaging"]
def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
"""Concurrent refreshes never leave tools / valid_tool_names inconsistent."""
agent = _agent(["a"])
def test_refresh_preserves_memory_provider_and_context_engine_tools(monkeypatch):
"""B1 regression: a rebuild must NOT drop post-build-injected tools.
get_tool_definitions() returns only the registry-derived tools. agent_init
appends memory-provider tools (mem0/honcho/) and context-engine tools
(lcm_*) directly onto agent.tools AFTER that. A naive
`agent.tools = get_tool_definitions()` would silently delete them on every
refresh. The helper must re-inject them.
"""
# Agent already carries: a built-in, a memory-provider tool, a context tool.
agent = _agent(["read_file", "memory_search", "lcm_grep"])
# Provider exposes its schemas; context compressor exposes lcm_*.
agent._memory_manager = types.SimpleNamespace(
get_all_tool_schemas=lambda: [
{"name": "memory_search", "description": "", "parameters": {}}
]
)
agent.context_compressor = types.SimpleNamespace(
get_tool_schemas=lambda: [
{"name": "lcm_grep", "description": "", "parameters": {}}
]
)
agent._context_engine_tool_names = {"lcm_grep"}
import model_tools
defs = [_tool("a"), _tool("b"), _tool("c")]
monkeypatch.setattr(model_tools, "get_tool_definitions", lambda **kw: defs)
# The registry now ALSO has a newly-connected MCP tool, but does NOT contain
# the memory/context tools (they're never in get_tool_definitions output).
monkeypatch.setattr(
model_tools, "get_tool_definitions",
lambda **kw: [_tool("read_file"), _tool("mcp_new_server_tool")],
)
added = mcp_tool.refresh_agent_mcp_tools(agent)
# The new MCP tool landed AND the injected families survived.
assert "mcp_new_server_tool" in agent.valid_tool_names
assert "memory_search" in agent.valid_tool_names # not clobbered
assert "lcm_grep" in agent.valid_tool_names # not clobbered
assert added == {"mcp_new_server_tool"}
def test_refresh_respects_context_engine_toolset_gate(monkeypatch):
"""#5544: context-engine tools must NOT be re-injected on a restricted
toolset. A platform with enabled_toolsets that excludes context_engine
must not get lcm_* leaked back in by a refresh."""
agent = _agent(["read_file"], enabled=["coding"]) # context_engine NOT enabled
agent.context_compressor = types.SimpleNamespace(
get_tool_schemas=lambda: [{"name": "lcm_grep", "description": "", "parameters": {}}]
)
agent._context_engine_tool_names = set()
import model_tools
monkeypatch.setattr(
model_tools, "get_tool_definitions",
lambda **kw: [_tool("read_file"), _tool("mcp_new_tool")],
)
mcp_tool.refresh_agent_mcp_tools(agent)
assert "mcp_new_tool" in agent.valid_tool_names # MCP tool still lands
assert "lcm_grep" not in agent.valid_tool_names # gated out (#5544)
def test_refreshed_tool_is_callable_through_valid_tool_names_guard(monkeypatch):
"""The whole point: a late tool, once refreshed, passes the name guard the
run loop uses to accept/reject tool calls (agent.valid_tool_names)."""
agent = _agent(["read_file"])
import model_tools
monkeypatch.setattr(
model_tools, "get_tool_definitions",
lambda **kw: [_tool("read_file"), _tool("mcp_granola_list_meetings")],
)
# Before refresh the run loop would reject the call ("Tool does not exist").
assert "mcp_granola_list_meetings" not in agent.valid_tool_names
mcp_tool.refresh_agent_mcp_tools(agent)
# After refresh the same guard accepts it AND it's in the tools= payload.
assert "mcp_granola_list_meetings" in agent.valid_tool_names
assert any(t["function"]["name"] == "mcp_granola_list_meetings" for t in agent.tools)
def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
"""Concurrent refreshes keep tools / valid_tool_names coherent.
The registry alternates between two DIFFERENT tool sets every call, so the
write path (publish) runs repeatedly rather than short-circuiting on the
no-change early return this actually exercises the lock. The invariant:
a reader of ``valid_tool_names`` must always match ``agent.tools``, and the
final published pair must be one of the two valid sets (never a mix).
"""
agent = _agent(["a"])
import itertools
set_a = [_tool("a"), _tool("b")]
set_b = [_tool("a"), _tool("c")]
flip = itertools.cycle([set_a, set_b])
flip_lock = threading.Lock()
def _gtd(**kw):
with flip_lock:
return list(next(flip))
import model_tools
monkeypatch.setattr(model_tools, "get_tool_definitions", _gtd)
errors = []
@ -112,9 +213,11 @@ def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
try:
for _ in range(50):
mcp_tool.refresh_agent_mcp_tools(agent)
# Invariant: valid_tool_names must always match agent.tools.
# Coherence invariant: the name set must match the tool list
# at every observation, never a torn cross-attribute state.
names = {t["function"]["name"] for t in agent.tools}
assert agent.valid_tool_names == names
assert names in ({"a", "b"}, {"a", "c"})
except Exception as exc: # pragma: no cover - failure path
errors.append(exc)
@ -125,7 +228,7 @@ def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
t.join(timeout=10)
assert not errors
assert agent.valid_tool_names == {"a", "b", "c"}
assert agent.valid_tool_names in ({"a", "b"}, {"a", "c"})
# ── discovery-wait bound (mcp_discovery_timeout config) ──────────────────────
@ -139,9 +242,8 @@ def test_resolve_discovery_timeout_explicit_wins(monkeypatch):
def test_resolve_discovery_timeout_reads_config(monkeypatch):
from hermes_cli import mcp_startup
monkeypatch.setattr(mcp_startup, "load_config", None, raising=False)
import hermes_cli.config as cfg
monkeypatch.setattr(cfg, "load_config", lambda: {"mcp_discovery_timeout": 8.0})
assert mcp_startup._resolve_discovery_timeout(None) == 8.0

View file

@ -4237,17 +4237,26 @@ _agent_tools_lock = threading.Lock()
def has_registered_mcp_tools() -> bool:
"""True if any MCP server currently has tools registered in the registry.
"""True if any MCP server has actually registered tools into the registry.
Cheap checks the live server map under ``_lock``, no registry walk. Used
by the per-turn refresh hook so a session with no MCP servers configured
(the common case) skips the ``get_tool_definitions`` rebuild entirely.
Cheap checks the global MCP-toolserver name map under ``_lock``, no
registry walk. Used by the per-turn refresh hook so a session with no MCP
tools (the common case, and also a connected-but-zero-tool/prompt-only
server) skips the ``get_tool_definitions`` rebuild entirely. Checks
registered TOOLS, not connected servers, so a server that registers no tools
doesn't keep the hook firing every turn.
"""
with _lock:
return bool(_servers)
return bool(_mcp_tool_server_names)
def refresh_agent_mcp_tools(agent, *, quiet_mode: bool = True) -> set:
def refresh_agent_mcp_tools(
agent,
*,
enabled_override=None,
disabled_override=None,
quiet_mode: bool = True,
) -> set:
"""Re-derive an already-built agent's tool snapshot from the live registry.
The agent snapshots ``agent.tools`` once at build time and never re-reads
@ -4255,43 +4264,136 @@ def refresh_agent_mcp_tools(agent, *, quiet_mode: bool = True) -> set:
*after* that snapshot a slow HTTP/OAuth server that misses the bounded
startup wait, or a ``/reload-mcp`` their tools are invisible until the
snapshot is rebuilt. This is the single shared rebuild used by every such
caller (the TUI ``reload.mcp`` RPC, the gateway reload, and the late-binding
refresh thread) so they can't drift apart again.
caller (the TUI ``reload.mcp`` RPC, the gateway reload, the late-binding
refresh thread, and the per-turn between-turns refresh) so they can't drift
apart again.
The rebuild respects the agent's own ``enabled_toolsets`` /
``disabled_toolsets`` (the same filtering it was built with), diffs by tool
**name** (not count a count compare misses an equal-size add/remove swap),
and mutates the agent under ``_agent_tools_lock``.
``disabled_toolsets`` (the same filtering it was built with) and diffs by
tool **name** (not count a count compare misses an equal-size add/remove
swap).
Crucially it is **additive-preserving**: ``get_tool_definitions`` returns
only the registry-derived tools, but ``agent_init`` appends two further
families directly onto ``agent.tools`` *after* that external
memory-provider tools (mem0/honcho/) and context-engine tools
(``lcm_*``). A naive ``agent.tools = get_tool_definitions(...)`` would
silently DELETE those. So after rebuilding the registry set we re-run the
same post-build injectors ``agent_init`` used, reconstructing the full
surface. The new ``(tools, valid_tool_names)`` pair is published together
under ``_agent_tools_lock`` so a concurrent reader never sees a
cross-attribute half-swap.
Returns the set of newly-added tool names (empty when nothing changed), so
callers can decide whether to notify the user / re-emit session info. The
caller owns the prompt-cache contract: this helper does NOT check turn state,
because each caller has a different policy (``/reload-mcp`` rebuilds after
explicit user consent; the late-binding thread only rebuilds pre-first-turn).
explicit user consent; the late-binding and between-turns paths only rebuild
at a turn boundary, before that turn's ``tools=`` prefix is assembled).
"""
from model_tools import get_tool_definitions
# Explicit reloads (/reload-mcp) pass freshly-resolved toolsets so a server
# the user just ENABLED in config is picked up; the agent's stored selection
# is then updated to match. The automatic paths (between-turns, late-binding)
# pass nothing and reuse the agent's build-time selection unchanged.
if enabled_override is not None or disabled_override is not None:
enabled = enabled_override if enabled_override is not None else getattr(agent, "enabled_toolsets", None)
disabled = disabled_override if disabled_override is not None else getattr(agent, "disabled_toolsets", None)
agent.enabled_toolsets = enabled
agent.disabled_toolsets = disabled
else:
enabled = getattr(agent, "enabled_toolsets", None)
disabled = getattr(agent, "disabled_toolsets", None)
# Registry-derived tools (built-ins + MCP), filtered to the agent's toolsets.
# Computed OUTSIDE the lock (get_tool_definitions can be slow); the diff and
# publish below happen together in ONE critical section so two concurrent
# callers can't compute overlapping ``added`` sets or torn-publish.
new_defs = list(
get_tool_definitions(
enabled_toolsets=enabled,
disabled_toolsets=disabled,
quiet_mode=quiet_mode,
)
or []
)
new_names = {t["function"]["name"] for t in new_defs}
# Re-append the post-build injected families that get_tool_definitions does
# NOT reproduce, so a refresh never strips them (memory-provider + context-
# engine tools). Staged entirely on LOCALS — the live ``agent.tools`` /
# ``valid_tool_names`` are never touched until the single atomic publish
# below, so a concurrent reader (``build_api_kwargs``) can't see a partial
# rebuild or a cross-attribute half-swap.
_reinject_post_build_tools(agent, new_defs, new_names)
# Single atomic read-diff-publish so the returned ``added`` is consistent
# with what was actually published, even under concurrent callers.
with _agent_tools_lock:
current = {
t["function"]["name"]
for t in (getattr(agent, "tools", None) or [])
}
new_defs = get_tool_definitions(
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
disabled_toolsets=getattr(agent, "disabled_toolsets", None),
quiet_mode=quiet_mode,
)
new_names = {t["function"]["name"] for t in new_defs} if new_defs else set()
if new_names == current:
return set()
with _agent_tools_lock:
if new_names == current:
return set() # no change → leave the live snapshot untouched (no churn)
agent.tools = new_defs
agent.valid_tool_names = new_names
return new_names - current
return new_names - current
def _reinject_post_build_tools(agent, tools_list: list, name_set: set) -> None:
"""Append memory-provider and context-engine tools onto staged locals.
Mirrors the post-``get_tool_definitions`` injection in ``agent_init`` so a
snapshot rebuild reconstructs the FULL tool surface, not just the
registry-derived subset. Operates on the caller's staged ``tools_list`` /
``name_set`` (NOT the live agent attributes) so the rebuild stays atomic.
Idempotent (skips names already present) and fail-soft.
"""
def _add(schema: dict) -> None:
name = schema.get("name", "")
if not name or name in name_set:
return
tools_list.append({"type": "function", "function": schema})
name_set.add(name)
# Memory-provider tools (mem0/honcho/byterover/supermemory/…).
try:
memory_manager = getattr(agent, "_memory_manager", None)
get_mem_schemas = getattr(memory_manager, "get_all_tool_schemas", None) if memory_manager else None
if callable(get_mem_schemas):
# Honor the same enablement gate inject_memory_provider_tools uses.
from agent.memory_manager import memory_provider_tools_enabled
if "memory" in name_set or memory_provider_tools_enabled(getattr(agent, "enabled_toolsets", None)):
for schema in get_mem_schemas():
if isinstance(schema, dict):
_add(schema)
except Exception:
logger.debug("Memory-provider tool re-injection skipped", exc_info=True)
# Context-engine tools (lcm_grep/lcm_describe/…) — the `context_engine`
# toolset is intentionally empty, so these only exist via this append.
# Honor the same enabled_toolsets gate agent_init uses (#5544): without it a
# restricted-toolset platform (e.g. platform_toolsets: telegram: []) would
# re-leak lcm_* tools the build deliberately excluded, and pay the local-
# model latency penalty.
try:
enabled = getattr(agent, "enabled_toolsets", None)
context_engine_allowed = enabled is None or "context_engine" in enabled
compressor = getattr(agent, "context_compressor", None)
get_schemas = getattr(compressor, "get_tool_schemas", None) if compressor else None
if context_engine_allowed and callable(get_schemas):
engine_names = getattr(agent, "_context_engine_tool_names", None)
for schema in get_schemas():
if not isinstance(schema, dict):
continue
name = schema.get("name", "")
_add(schema)
if name and isinstance(engine_names, set):
engine_names.add(name)
except Exception:
logger.debug("Context-engine tool re-injection skipped", exc_info=True)
def shutdown_mcp_servers():

View file

@ -8409,7 +8409,13 @@ def _(rid, params: dict) -> dict:
try:
from tools.mcp_tool import refresh_agent_mcp_tools
refresh_agent_mcp_tools(agent, quiet_mode=True)
# Explicit reload: re-resolve enabled toolsets so a server the
# user just enabled in config this session is picked up.
refresh_agent_mcp_tools(
agent,
enabled_override=_load_enabled_toolsets(),
quiet_mode=True,
)
except Exception as _exc:
logger.warning(
"Failed to refresh cached agent tools after /reload-mcp: %s",