From b6e2a54a94f58f9ebafa79f45d45b0ccb2b17043 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 19 Jun 2026 22:41:15 +0530 Subject: [PATCH] fix(mcp): address adversarial review round 1 (cache parity, gates, races) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidated findings from three independent reviewers (Codex, Claude Code, a Hermes subagent w/ the hermes-agent-dev skill): - BLOCKING: refresh_agent_mcp_tools rebuilt only the registry subset, silently dropping post-build-injected memory-provider (mem0/honcho/…) and context- engine (lcm_*) tools on every refresh. Now additive-preserving: re-applies the same injectors agent_init uses, staged on locals and published atomically. - Re-injection now honors the #5544 enabled_toolsets gate for context-engine tools, so a restricted-toolset platform can't get lcm_* leaked back in. - Atomic read-diff-publish under one lock: the returned `added` set and the (tools, valid_tool_names) pair are consistent even under concurrent callers (no half-swap, no TOCTOU). - background_review fork opts out (_skip_mcp_refresh) so its byte-identical tools[] cache parity with the parent is preserved. - CLI /reload-mcp routed through the shared helper (was a 4th divergent copy with the same clobber bug + missing disabled_toolsets). - Explicit reloads (TUI RPC + CLI) pass enabled_override so a server the user just enabled in config this session is picked up; automatic paths reuse the agent's build-time selection. - mcp_discovery_timeout default 5.0 -> 1.5s: correctness now comes from the between-turns refresh, so the startup wait is only a small turn-1 UX bump rather than a heavy dead-server latency penalty. - has_registered_mcp_tools checks registered TOOLS (not connected servers) so a zero-tool/prompt-only server doesn't make the per-turn hook fire forever. - Tests: rewrote the thread-safety test to actually exercise the write path (alternating tool sets), added the #5544-gate regression, the memory/context preservation regression, and a "callable next turn via valid_tool_names" contract; removed a dead monkeypatch line. --- agent/agent_init.py | 5 +- agent/background_review.py | 7 + agent/turn_context.py | 7 +- cli.py | 18 ++- hermes_cli/config.py | 12 +- tests/tools/test_refresh_agent_mcp_tools.py | 120 ++++++++++++++-- tools/mcp_tool.py | 152 ++++++++++++++++---- tui_gateway/server.py | 8 +- 8 files changed, 278 insertions(+), 51 deletions(-) diff --git a/agent/agent_init.py b/agent/agent_init.py index 555f930f559..7131a93c3b7 100644 --- a/agent/agent_init.py +++ b/agent/agent_init.py @@ -531,7 +531,10 @@ def init_agent( agent._last_activity_desc: str = "initializing" agent._current_tool: str | None = None agent._api_call_count: int = 0 - + # Opt-out flag for the between-turns MCP tool refresh (build_turn_context). + # Set on internal forks (e.g. background_review) that must keep ``tools[]`` + # byte-identical to a parent for provider cache parity. + agent._skip_mcp_refresh = False # Rate limit tracking β€” updated from x-ratelimit-* response headers # after each API call. Accessed by /usage slash command. agent._rate_limit_state: Optional["RateLimitState"] = None diff --git a/agent/background_review.py b/agent/background_review.py index ee4791d98d3..c809b496065 100644 --- a/agent/background_review.py +++ b/agent/background_review.py @@ -535,6 +535,13 @@ def _run_review_in_thread( ) review_agent._memory_write_origin = "background_review" review_agent._memory_write_context = "background_review" + # The review fork pins the parent's cached system prompt and keeps + # ``tools[]`` byte-identical to the parent so its outbound request + # hits the same provider cache prefix (see the toolset-parity note + # above). The between-turns MCP refresh in build_turn_context would + # add late-connecting MCP tools to this fork and break that parity, + # so opt the review fork out of it. + review_agent._skip_mcp_refresh = True review_agent._memory_store = agent._memory_store review_agent._memory_enabled = agent._memory_enabled review_agent._user_profile_enabled = agent._user_profile_enabled diff --git a/agent/turn_context.py b/agent/turn_context.py index 3e107e0900c..0bbdf73764e 100644 --- a/agent/turn_context.py +++ b/agent/turn_context.py @@ -123,9 +123,10 @@ def build_turn_context( # or when the tool set is unchanged (``refresh_agent_mcp_tools`` diffs by # name and leaves the snapshot untouched on no-change). try: - from tools.mcp_tool import has_registered_mcp_tools, refresh_agent_mcp_tools - if has_registered_mcp_tools(): - refresh_agent_mcp_tools(agent, quiet_mode=True) + if not getattr(agent, "_skip_mcp_refresh", False): + from tools.mcp_tool import has_registered_mcp_tools, refresh_agent_mcp_tools + if has_registered_mcp_tools(): + refresh_agent_mcp_tools(agent, quiet_mode=True) except Exception: logger.debug("between-turns MCP tool refresh skipped", exc_info=True) diff --git a/cli.py b/cli.py index 2ca1af6faef..eb8b15a93f8 100644 --- a/cli.py +++ b/cli.py @@ -9661,16 +9661,20 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): else: print(f" πŸ”§ {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)") - # Refresh the agent's tool list so the model can call new tools + # Refresh the agent's tool list so the model can call new tools. + # Route through the shared helper so this CLI /reload-mcp path stays + # in lockstep with the TUI RPC / gateway reload / late-binding paths + # (name-diff, thread-safe, and β€” critically β€” additive-preserving so + # memory-provider and context-engine tools survive the rebuild). if self.agent is not None: - self.agent.tools = get_tool_definitions( - enabled_toolsets=self.agent.enabled_toolsets - if hasattr(self.agent, "enabled_toolsets") else None, + from tools.mcp_tool import refresh_agent_mcp_tools + # Explicit reload: re-resolve enabled toolsets so a server the + # user just enabled in config this session is picked up. + refresh_agent_mcp_tools( + self.agent, + enabled_override=self.enabled_toolsets, quiet_mode=True, ) - self.agent.valid_tool_names = { - tool["function"]["name"] for tool in self.agent.tools - } if self.agent.tools else set() # Inject a message at the END of conversation history so the # model knows tools changed. Appended after all existing diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 6b3389b406b..d36f7e8a9c9 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1208,11 +1208,13 @@ DEFAULT_CONFIG = { # the INSTANT discovery completes, so users with no MCP servers (the common # case) or fast servers pay ~0s regardless of this value β€” the bound is # only reached when a server is genuinely still connecting. The old 0.75s - # default was too short for HTTP/OAuth servers (which can take 2–6s on a - # cold connect), so their tools were invisible for the whole session. - # Slow servers that miss this window are still picked up by the automatic - # late-binding refresh, so this is a UX/latency knob, not a correctness one. - "mcp_discovery_timeout": 5.0, + # default was a touch short for HTTP/OAuth servers on a cold connect; a + # modest bump lets more of them land in the FIRST turn's snapshot. This is + # only a turn-1 latency/UX knob: a server that misses this window is still + # picked up automatically on the next turn by the between-turns refresh + # (see agent/turn_context.py), so correctness never depends on it. Keep it + # small so a slow/dead server adds little to first-response latency. + "mcp_discovery_timeout": 1.5, # Tool-output truncation thresholds. When terminal output or a # single read_file page exceeds these limits, Hermes truncates the diff --git a/tests/tools/test_refresh_agent_mcp_tools.py b/tests/tools/test_refresh_agent_mcp_tools.py index 13e5cbb286e..3a347371c02 100644 --- a/tests/tools/test_refresh_agent_mcp_tools.py +++ b/tests/tools/test_refresh_agent_mcp_tools.py @@ -98,13 +98,114 @@ def test_refresh_passes_agent_toolset_filters(monkeypatch): assert seen["disabled_toolsets"] == ["messaging"] -def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch): - """Concurrent refreshes never leave tools / valid_tool_names inconsistent.""" - agent = _agent(["a"]) +def test_refresh_preserves_memory_provider_and_context_engine_tools(monkeypatch): + """B1 regression: a rebuild must NOT drop post-build-injected tools. + + get_tool_definitions() returns only the registry-derived tools. agent_init + appends memory-provider tools (mem0/honcho/…) and context-engine tools + (lcm_*) directly onto agent.tools AFTER that. A naive + `agent.tools = get_tool_definitions()` would silently delete them on every + refresh. The helper must re-inject them. + """ + # Agent already carries: a built-in, a memory-provider tool, a context tool. + agent = _agent(["read_file", "memory_search", "lcm_grep"]) + + # Provider exposes its schemas; context compressor exposes lcm_*. + agent._memory_manager = types.SimpleNamespace( + get_all_tool_schemas=lambda: [ + {"name": "memory_search", "description": "", "parameters": {}} + ] + ) + agent.context_compressor = types.SimpleNamespace( + get_tool_schemas=lambda: [ + {"name": "lcm_grep", "description": "", "parameters": {}} + ] + ) + agent._context_engine_tool_names = {"lcm_grep"} import model_tools - defs = [_tool("a"), _tool("b"), _tool("c")] - monkeypatch.setattr(model_tools, "get_tool_definitions", lambda **kw: defs) + # The registry now ALSO has a newly-connected MCP tool, but does NOT contain + # the memory/context tools (they're never in get_tool_definitions output). + monkeypatch.setattr( + model_tools, "get_tool_definitions", + lambda **kw: [_tool("read_file"), _tool("mcp_new_server_tool")], + ) + + added = mcp_tool.refresh_agent_mcp_tools(agent) + + # The new MCP tool landed AND the injected families survived. + assert "mcp_new_server_tool" in agent.valid_tool_names + assert "memory_search" in agent.valid_tool_names # not clobbered + assert "lcm_grep" in agent.valid_tool_names # not clobbered + assert added == {"mcp_new_server_tool"} + + +def test_refresh_respects_context_engine_toolset_gate(monkeypatch): + """#5544: context-engine tools must NOT be re-injected on a restricted + toolset. A platform with enabled_toolsets that excludes context_engine + must not get lcm_* leaked back in by a refresh.""" + agent = _agent(["read_file"], enabled=["coding"]) # context_engine NOT enabled + agent.context_compressor = types.SimpleNamespace( + get_tool_schemas=lambda: [{"name": "lcm_grep", "description": "", "parameters": {}}] + ) + agent._context_engine_tool_names = set() + + import model_tools + monkeypatch.setattr( + model_tools, "get_tool_definitions", + lambda **kw: [_tool("read_file"), _tool("mcp_new_tool")], + ) + + mcp_tool.refresh_agent_mcp_tools(agent) + + assert "mcp_new_tool" in agent.valid_tool_names # MCP tool still lands + assert "lcm_grep" not in agent.valid_tool_names # gated out (#5544) + + +def test_refreshed_tool_is_callable_through_valid_tool_names_guard(monkeypatch): + """The whole point: a late tool, once refreshed, passes the name guard the + run loop uses to accept/reject tool calls (agent.valid_tool_names).""" + agent = _agent(["read_file"]) + + import model_tools + monkeypatch.setattr( + model_tools, "get_tool_definitions", + lambda **kw: [_tool("read_file"), _tool("mcp_granola_list_meetings")], + ) + + # Before refresh the run loop would reject the call ("Tool does not exist"). + assert "mcp_granola_list_meetings" not in agent.valid_tool_names + + mcp_tool.refresh_agent_mcp_tools(agent) + + # After refresh the same guard accepts it AND it's in the tools= payload. + assert "mcp_granola_list_meetings" in agent.valid_tool_names + assert any(t["function"]["name"] == "mcp_granola_list_meetings" for t in agent.tools) + + +def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch): + """Concurrent refreshes keep tools / valid_tool_names coherent. + + The registry alternates between two DIFFERENT tool sets every call, so the + write path (publish) runs repeatedly rather than short-circuiting on the + no-change early return β€” this actually exercises the lock. The invariant: + a reader of ``valid_tool_names`` must always match ``agent.tools``, and the + final published pair must be one of the two valid sets (never a mix). + """ + agent = _agent(["a"]) + + import itertools + set_a = [_tool("a"), _tool("b")] + set_b = [_tool("a"), _tool("c")] + flip = itertools.cycle([set_a, set_b]) + flip_lock = threading.Lock() + + def _gtd(**kw): + with flip_lock: + return list(next(flip)) + + import model_tools + monkeypatch.setattr(model_tools, "get_tool_definitions", _gtd) errors = [] @@ -112,9 +213,11 @@ def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch): try: for _ in range(50): mcp_tool.refresh_agent_mcp_tools(agent) - # Invariant: valid_tool_names must always match agent.tools. + # Coherence invariant: the name set must match the tool list + # at every observation, never a torn cross-attribute state. names = {t["function"]["name"] for t in agent.tools} assert agent.valid_tool_names == names + assert names in ({"a", "b"}, {"a", "c"}) except Exception as exc: # pragma: no cover - failure path errors.append(exc) @@ -125,7 +228,7 @@ def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch): t.join(timeout=10) assert not errors - assert agent.valid_tool_names == {"a", "b", "c"} + assert agent.valid_tool_names in ({"a", "b"}, {"a", "c"}) # ── discovery-wait bound (mcp_discovery_timeout config) ────────────────────── @@ -139,9 +242,8 @@ def test_resolve_discovery_timeout_explicit_wins(monkeypatch): def test_resolve_discovery_timeout_reads_config(monkeypatch): from hermes_cli import mcp_startup - - monkeypatch.setattr(mcp_startup, "load_config", None, raising=False) import hermes_cli.config as cfg + monkeypatch.setattr(cfg, "load_config", lambda: {"mcp_discovery_timeout": 8.0}) assert mcp_startup._resolve_discovery_timeout(None) == 8.0 diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index 1611d8394f6..ffb9fe0f1ab 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -4237,17 +4237,26 @@ _agent_tools_lock = threading.Lock() def has_registered_mcp_tools() -> bool: - """True if any MCP server currently has tools registered in the registry. + """True if any MCP server has actually registered tools into the registry. - Cheap β€” checks the live server map under ``_lock``, no registry walk. Used - by the per-turn refresh hook so a session with no MCP servers configured - (the common case) skips the ``get_tool_definitions`` rebuild entirely. + Cheap β€” checks the global MCP-toolβ†’server name map under ``_lock``, no + registry walk. Used by the per-turn refresh hook so a session with no MCP + tools (the common case, and also a connected-but-zero-tool/prompt-only + server) skips the ``get_tool_definitions`` rebuild entirely. Checks + registered TOOLS, not connected servers, so a server that registers no tools + doesn't keep the hook firing every turn. """ with _lock: - return bool(_servers) + return bool(_mcp_tool_server_names) -def refresh_agent_mcp_tools(agent, *, quiet_mode: bool = True) -> set: +def refresh_agent_mcp_tools( + agent, + *, + enabled_override=None, + disabled_override=None, + quiet_mode: bool = True, +) -> set: """Re-derive an already-built agent's tool snapshot from the live registry. The agent snapshots ``agent.tools`` once at build time and never re-reads @@ -4255,43 +4264,136 @@ def refresh_agent_mcp_tools(agent, *, quiet_mode: bool = True) -> set: *after* that snapshot β€” a slow HTTP/OAuth server that misses the bounded startup wait, or a ``/reload-mcp`` β€” their tools are invisible until the snapshot is rebuilt. This is the single shared rebuild used by every such - caller (the TUI ``reload.mcp`` RPC, the gateway reload, and the late-binding - refresh thread) so they can't drift apart again. + caller (the TUI ``reload.mcp`` RPC, the gateway reload, the late-binding + refresh thread, and the per-turn between-turns refresh) so they can't drift + apart again. The rebuild respects the agent's own ``enabled_toolsets`` / - ``disabled_toolsets`` (the same filtering it was built with), diffs by tool - **name** (not count β€” a count compare misses an equal-size add/remove swap), - and mutates the agent under ``_agent_tools_lock``. + ``disabled_toolsets`` (the same filtering it was built with) and diffs by + tool **name** (not count β€” a count compare misses an equal-size add/remove + swap). + + Crucially it is **additive-preserving**: ``get_tool_definitions`` returns + only the registry-derived tools, but ``agent_init`` appends two further + families directly onto ``agent.tools`` *after* that β€” external + memory-provider tools (mem0/honcho/…) and context-engine tools + (``lcm_*``). A naive ``agent.tools = get_tool_definitions(...)`` would + silently DELETE those. So after rebuilding the registry set we re-run the + same post-build injectors ``agent_init`` used, reconstructing the full + surface. The new ``(tools, valid_tool_names)`` pair is published together + under ``_agent_tools_lock`` so a concurrent reader never sees a + cross-attribute half-swap. Returns the set of newly-added tool names (empty when nothing changed), so callers can decide whether to notify the user / re-emit session info. The caller owns the prompt-cache contract: this helper does NOT check turn state, because each caller has a different policy (``/reload-mcp`` rebuilds after - explicit user consent; the late-binding thread only rebuilds pre-first-turn). + explicit user consent; the late-binding and between-turns paths only rebuild + at a turn boundary, before that turn's ``tools=`` prefix is assembled). """ from model_tools import get_tool_definitions + # Explicit reloads (/reload-mcp) pass freshly-resolved toolsets so a server + # the user just ENABLED in config is picked up; the agent's stored selection + # is then updated to match. The automatic paths (between-turns, late-binding) + # pass nothing and reuse the agent's build-time selection unchanged. + if enabled_override is not None or disabled_override is not None: + enabled = enabled_override if enabled_override is not None else getattr(agent, "enabled_toolsets", None) + disabled = disabled_override if disabled_override is not None else getattr(agent, "disabled_toolsets", None) + agent.enabled_toolsets = enabled + agent.disabled_toolsets = disabled + else: + enabled = getattr(agent, "enabled_toolsets", None) + disabled = getattr(agent, "disabled_toolsets", None) + + # Registry-derived tools (built-ins + MCP), filtered to the agent's toolsets. + # Computed OUTSIDE the lock (get_tool_definitions can be slow); the diff and + # publish below happen together in ONE critical section so two concurrent + # callers can't compute overlapping ``added`` sets or torn-publish. + new_defs = list( + get_tool_definitions( + enabled_toolsets=enabled, + disabled_toolsets=disabled, + quiet_mode=quiet_mode, + ) + or [] + ) + new_names = {t["function"]["name"] for t in new_defs} + + # Re-append the post-build injected families that get_tool_definitions does + # NOT reproduce, so a refresh never strips them (memory-provider + context- + # engine tools). Staged entirely on LOCALS β€” the live ``agent.tools`` / + # ``valid_tool_names`` are never touched until the single atomic publish + # below, so a concurrent reader (``build_api_kwargs``) can't see a partial + # rebuild or a cross-attribute half-swap. + _reinject_post_build_tools(agent, new_defs, new_names) + + # Single atomic read-diff-publish so the returned ``added`` is consistent + # with what was actually published, even under concurrent callers. with _agent_tools_lock: current = { t["function"]["name"] for t in (getattr(agent, "tools", None) or []) } - - new_defs = get_tool_definitions( - enabled_toolsets=getattr(agent, "enabled_toolsets", None), - disabled_toolsets=getattr(agent, "disabled_toolsets", None), - quiet_mode=quiet_mode, - ) - new_names = {t["function"]["name"] for t in new_defs} if new_defs else set() - - if new_names == current: - return set() - - with _agent_tools_lock: + if new_names == current: + return set() # no change β†’ leave the live snapshot untouched (no churn) agent.tools = new_defs agent.valid_tool_names = new_names + return new_names - current - return new_names - current + +def _reinject_post_build_tools(agent, tools_list: list, name_set: set) -> None: + """Append memory-provider and context-engine tools onto staged locals. + + Mirrors the post-``get_tool_definitions`` injection in ``agent_init`` so a + snapshot rebuild reconstructs the FULL tool surface, not just the + registry-derived subset. Operates on the caller's staged ``tools_list`` / + ``name_set`` (NOT the live agent attributes) so the rebuild stays atomic. + Idempotent (skips names already present) and fail-soft. + """ + def _add(schema: dict) -> None: + name = schema.get("name", "") + if not name or name in name_set: + return + tools_list.append({"type": "function", "function": schema}) + name_set.add(name) + + # Memory-provider tools (mem0/honcho/byterover/supermemory/…). + try: + memory_manager = getattr(agent, "_memory_manager", None) + get_mem_schemas = getattr(memory_manager, "get_all_tool_schemas", None) if memory_manager else None + if callable(get_mem_schemas): + # Honor the same enablement gate inject_memory_provider_tools uses. + from agent.memory_manager import memory_provider_tools_enabled + if "memory" in name_set or memory_provider_tools_enabled(getattr(agent, "enabled_toolsets", None)): + for schema in get_mem_schemas(): + if isinstance(schema, dict): + _add(schema) + except Exception: + logger.debug("Memory-provider tool re-injection skipped", exc_info=True) + + # Context-engine tools (lcm_grep/lcm_describe/…) β€” the `context_engine` + # toolset is intentionally empty, so these only exist via this append. + # Honor the same enabled_toolsets gate agent_init uses (#5544): without it a + # restricted-toolset platform (e.g. platform_toolsets: telegram: []) would + # re-leak lcm_* tools the build deliberately excluded, and pay the local- + # model latency penalty. + try: + enabled = getattr(agent, "enabled_toolsets", None) + context_engine_allowed = enabled is None or "context_engine" in enabled + compressor = getattr(agent, "context_compressor", None) + get_schemas = getattr(compressor, "get_tool_schemas", None) if compressor else None + if context_engine_allowed and callable(get_schemas): + engine_names = getattr(agent, "_context_engine_tool_names", None) + for schema in get_schemas(): + if not isinstance(schema, dict): + continue + name = schema.get("name", "") + _add(schema) + if name and isinstance(engine_names, set): + engine_names.add(name) + except Exception: + logger.debug("Context-engine tool re-injection skipped", exc_info=True) def shutdown_mcp_servers(): diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 8b13e7352b9..f43ea707c81 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -8409,7 +8409,13 @@ def _(rid, params: dict) -> dict: try: from tools.mcp_tool import refresh_agent_mcp_tools - refresh_agent_mcp_tools(agent, quiet_mode=True) + # Explicit reload: re-resolve enabled toolsets so a server the + # user just enabled in config this session is picked up. + refresh_agent_mcp_tools( + agent, + enabled_override=_load_enabled_toolsets(), + quiet_mode=True, + ) except Exception as _exc: logger.warning( "Failed to refresh cached agent tools after /reload-mcp: %s",