fix(mcp): address adversarial review round 1 (cache parity, gates, races)

Consolidated findings from three independent reviewers (Codex, Claude Code, a Hermes subagent w/ the hermes-agent-dev skill): - BLOCKING: refresh_agent_mcp_tools rebuilt only the registry subset, silently dropping post-build-injected memory-provider (mem0/honcho/…) and context- engine (lcm_*) tools on every refresh. Now additive-preserving: re-applies the same injectors agent_init uses, staged on locals and published atomically. - Re-injection now honors the #5544 enabled_toolsets gate for context-engine tools, so a restricted-toolset platform can't get lcm_* leaked back in. - Atomic read-diff-publish under one lock: the returned `added` set and the (tools, valid_tool_names) pair are consistent even under concurrent callers (no half-swap, no TOCTOU). - background_review fork opts out (_skip_mcp_refresh) so its byte-identical tools[] cache parity with the parent is preserved. - CLI /reload-mcp routed through the shared helper (was a 4th divergent copy with the same clobber bug + missing disabled_toolsets). - Explicit reloads (TUI RPC + CLI) pass enabled_override so a server the user just enabled in config this session is picked up; automatic paths reuse the agent's build-time selection. - mcp_discovery_timeout default 5.0 -> 1.5s: correctness now comes from the between-turns refresh, so the startup wait is only a small turn-1 UX bump rather than a heavy dead-server latency penalty. - has_registered_mcp_tools checks registered TOOLS (not connected servers) so a zero-tool/prompt-only server doesn't make the per-turn hook fire forever. - Tests: rewrote the thread-safety test to actually exercise the write path (alternating tool sets), added the #5544-gate regression, the memory/context preservation regression, and a "callable next turn via valid_tool_names" contract; removed a dead monkeypatch line.
2026-06-21 10:22:18 +00:00 · 2026-06-19 22:41:15 +05:30 · 2026-06-19 22:41:15 +05:30 · b6e2a54a94
commit b6e2a54a94
parent 3713483874
8 changed files with 278 additions and 51 deletions
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@ -531,7 +531,10 @@ def init_agent(
    agent._last_activity_desc: str = "initializing"
    agent._current_tool: str | None = None
    agent._api_call_count: int = 0
-
+    # Opt-out flag for the between-turns MCP tool refresh (build_turn_context).
+    # Set on internal forks (e.g. background_review) that must keep ``tools[]``
+    # byte-identical to a parent for provider cache parity.
+    agent._skip_mcp_refresh = False
    # Rate limit tracking — updated from x-ratelimit-* response headers
    # after each API call.  Accessed by /usage slash command.
    agent._rate_limit_state: Optional["RateLimitState"] = None
--- a/agent/background_review.py
+++ b/agent/background_review.py
@ -535,6 +535,13 @@ def _run_review_in_thread(
            )
            review_agent._memory_write_origin = "background_review"
            review_agent._memory_write_context = "background_review"
+            # The review fork pins the parent's cached system prompt and keeps
+            # ``tools[]`` byte-identical to the parent so its outbound request
+            # hits the same provider cache prefix (see the toolset-parity note
+            # above). The between-turns MCP refresh in build_turn_context would
+            # add late-connecting MCP tools to this fork and break that parity,
+            # so opt the review fork out of it.
+            review_agent._skip_mcp_refresh = True
            review_agent._memory_store = agent._memory_store
            review_agent._memory_enabled = agent._memory_enabled
            review_agent._user_profile_enabled = agent._user_profile_enabled
--- a/agent/turn_context.py
+++ b/agent/turn_context.py
@ -123,9 +123,10 @@ def build_turn_context(
    # or when the tool set is unchanged (``refresh_agent_mcp_tools`` diffs by
    # name and leaves the snapshot untouched on no-change).
    try:
-        from tools.mcp_tool import has_registered_mcp_tools, refresh_agent_mcp_tools
-        if has_registered_mcp_tools():
-            refresh_agent_mcp_tools(agent, quiet_mode=True)
+        if not getattr(agent, "_skip_mcp_refresh", False):
+            from tools.mcp_tool import has_registered_mcp_tools, refresh_agent_mcp_tools
+            if has_registered_mcp_tools():
+                refresh_agent_mcp_tools(agent, quiet_mode=True)
    except Exception:
        logger.debug("between-turns MCP tool refresh skipped", exc_info=True)

--- a/cli.py
+++ b/cli.py
@ -9661,16 +9661,20 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
            else:
                print(f"  🔧 {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)")

-            # Refresh the agent's tool list so the model can call new tools
+            # Refresh the agent's tool list so the model can call new tools.
+            # Route through the shared helper so this CLI /reload-mcp path stays
+            # in lockstep with the TUI RPC / gateway reload / late-binding paths
+            # (name-diff, thread-safe, and — critically — additive-preserving so
+            # memory-provider and context-engine tools survive the rebuild).
            if self.agent is not None:
-                self.agent.tools = get_tool_definitions(
-                    enabled_toolsets=self.agent.enabled_toolsets
-                    if hasattr(self.agent, "enabled_toolsets") else None,
+                from tools.mcp_tool import refresh_agent_mcp_tools
+                # Explicit reload: re-resolve enabled toolsets so a server the
+                # user just enabled in config this session is picked up.
+                refresh_agent_mcp_tools(
+                    self.agent,
+                    enabled_override=self.enabled_toolsets,
                    quiet_mode=True,
                )
-                self.agent.valid_tool_names = {
-                    tool["function"]["name"] for tool in self.agent.tools
-                } if self.agent.tools else set()

            # Inject a message at the END of conversation history so the
            # model knows tools changed.  Appended after all existing
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -1208,11 +1208,13 @@ DEFAULT_CONFIG = {
    # the INSTANT discovery completes, so users with no MCP servers (the common
    # case) or fast servers pay ~0s regardless of this value — the bound is
    # only reached when a server is genuinely still connecting.  The old 0.75s
-    # default was too short for HTTP/OAuth servers (which can take 2–6s on a
-    # cold connect), so their tools were invisible for the whole session.
-    # Slow servers that miss this window are still picked up by the automatic
-    # late-binding refresh, so this is a UX/latency knob, not a correctness one.
-    "mcp_discovery_timeout": 5.0,
+    # default was a touch short for HTTP/OAuth servers on a cold connect; a
+    # modest bump lets more of them land in the FIRST turn's snapshot.  This is
+    # only a turn-1 latency/UX knob: a server that misses this window is still
+    # picked up automatically on the next turn by the between-turns refresh
+    # (see agent/turn_context.py), so correctness never depends on it.  Keep it
+    # small so a slow/dead server adds little to first-response latency.
+    "mcp_discovery_timeout": 1.5,

    # Tool-output truncation thresholds. When terminal output or a
    # single read_file page exceeds these limits, Hermes truncates the
--- a/tests/tools/test_refresh_agent_mcp_tools.py
+++ b/tests/tools/test_refresh_agent_mcp_tools.py
@ -98,13 +98,114 @@ def test_refresh_passes_agent_toolset_filters(monkeypatch):
    assert seen["disabled_toolsets"] == ["messaging"]


-def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
-    """Concurrent refreshes never leave tools / valid_tool_names inconsistent."""
-    agent = _agent(["a"])
+def test_refresh_preserves_memory_provider_and_context_engine_tools(monkeypatch):
+    """B1 regression: a rebuild must NOT drop post-build-injected tools.
+
+    get_tool_definitions() returns only the registry-derived tools. agent_init
+    appends memory-provider tools (mem0/honcho/…) and context-engine tools
+    (lcm_*) directly onto agent.tools AFTER that. A naive
+    `agent.tools = get_tool_definitions()` would silently delete them on every
+    refresh. The helper must re-inject them.
+    """
+    # Agent already carries: a built-in, a memory-provider tool, a context tool.
+    agent = _agent(["read_file", "memory_search", "lcm_grep"])
+
+    # Provider exposes its schemas; context compressor exposes lcm_*.
+    agent._memory_manager = types.SimpleNamespace(
+        get_all_tool_schemas=lambda: [
+            {"name": "memory_search", "description": "", "parameters": {}}
+        ]
+    )
+    agent.context_compressor = types.SimpleNamespace(
+        get_tool_schemas=lambda: [
+            {"name": "lcm_grep", "description": "", "parameters": {}}
+        ]
+    )
+    agent._context_engine_tool_names = {"lcm_grep"}

    import model_tools
-    defs = [_tool("a"), _tool("b"), _tool("c")]
-    monkeypatch.setattr(model_tools, "get_tool_definitions", lambda **kw: defs)
+    # The registry now ALSO has a newly-connected MCP tool, but does NOT contain
+    # the memory/context tools (they're never in get_tool_definitions output).
+    monkeypatch.setattr(
+        model_tools, "get_tool_definitions",
+        lambda **kw: [_tool("read_file"), _tool("mcp_new_server_tool")],
+    )
+
+    added = mcp_tool.refresh_agent_mcp_tools(agent)
+
+    # The new MCP tool landed AND the injected families survived.
+    assert "mcp_new_server_tool" in agent.valid_tool_names
+    assert "memory_search" in agent.valid_tool_names   # not clobbered
+    assert "lcm_grep" in agent.valid_tool_names         # not clobbered
+    assert added == {"mcp_new_server_tool"}
+
+
+def test_refresh_respects_context_engine_toolset_gate(monkeypatch):
+    """#5544: context-engine tools must NOT be re-injected on a restricted
+    toolset. A platform with enabled_toolsets that excludes context_engine
+    must not get lcm_* leaked back in by a refresh."""
+    agent = _agent(["read_file"], enabled=["coding"])  # context_engine NOT enabled
+    agent.context_compressor = types.SimpleNamespace(
+        get_tool_schemas=lambda: [{"name": "lcm_grep", "description": "", "parameters": {}}]
+    )
+    agent._context_engine_tool_names = set()
+
+    import model_tools
+    monkeypatch.setattr(
+        model_tools, "get_tool_definitions",
+        lambda **kw: [_tool("read_file"), _tool("mcp_new_tool")],
+    )
+
+    mcp_tool.refresh_agent_mcp_tools(agent)
+
+    assert "mcp_new_tool" in agent.valid_tool_names  # MCP tool still lands
+    assert "lcm_grep" not in agent.valid_tool_names   # gated out (#5544)
+
+
+def test_refreshed_tool_is_callable_through_valid_tool_names_guard(monkeypatch):
+    """The whole point: a late tool, once refreshed, passes the name guard the
+    run loop uses to accept/reject tool calls (agent.valid_tool_names)."""
+    agent = _agent(["read_file"])
+
+    import model_tools
+    monkeypatch.setattr(
+        model_tools, "get_tool_definitions",
+        lambda **kw: [_tool("read_file"), _tool("mcp_granola_list_meetings")],
+    )
+
+    # Before refresh the run loop would reject the call ("Tool does not exist").
+    assert "mcp_granola_list_meetings" not in agent.valid_tool_names
+
+    mcp_tool.refresh_agent_mcp_tools(agent)
+
+    # After refresh the same guard accepts it AND it's in the tools= payload.
+    assert "mcp_granola_list_meetings" in agent.valid_tool_names
+    assert any(t["function"]["name"] == "mcp_granola_list_meetings" for t in agent.tools)
+
+
+def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
+    """Concurrent refreshes keep tools / valid_tool_names coherent.
+
+    The registry alternates between two DIFFERENT tool sets every call, so the
+    write path (publish) runs repeatedly rather than short-circuiting on the
+    no-change early return — this actually exercises the lock. The invariant:
+    a reader of ``valid_tool_names`` must always match ``agent.tools``, and the
+    final published pair must be one of the two valid sets (never a mix).
+    """
+    agent = _agent(["a"])
+
+    import itertools
+    set_a = [_tool("a"), _tool("b")]
+    set_b = [_tool("a"), _tool("c")]
+    flip = itertools.cycle([set_a, set_b])
+    flip_lock = threading.Lock()
+
+    def _gtd(**kw):
+        with flip_lock:
+            return list(next(flip))
+
+    import model_tools
+    monkeypatch.setattr(model_tools, "get_tool_definitions", _gtd)

    errors = []

@ -112,9 +213,11 @@ def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
        try:
            for _ in range(50):
                mcp_tool.refresh_agent_mcp_tools(agent)
-                # Invariant: valid_tool_names must always match agent.tools.
+                # Coherence invariant: the name set must match the tool list
+                # at every observation, never a torn cross-attribute state.
                names = {t["function"]["name"] for t in agent.tools}
                assert agent.valid_tool_names == names
+                assert names in ({"a", "b"}, {"a", "c"})
        except Exception as exc:  # pragma: no cover - failure path
            errors.append(exc)

@ -125,7 +228,7 @@ def test_refresh_is_thread_safe_under_concurrent_calls(monkeypatch):
        t.join(timeout=10)

    assert not errors
-    assert agent.valid_tool_names == {"a", "b", "c"}
+    assert agent.valid_tool_names in ({"a", "b"}, {"a", "c"})


 # ── discovery-wait bound (mcp_discovery_timeout config) ──────────────────────
@ -139,9 +242,8 @@ def test_resolve_discovery_timeout_explicit_wins(monkeypatch):

 def test_resolve_discovery_timeout_reads_config(monkeypatch):
    from hermes_cli import mcp_startup
-
-    monkeypatch.setattr(mcp_startup, "load_config", None, raising=False)
    import hermes_cli.config as cfg
+
    monkeypatch.setattr(cfg, "load_config", lambda: {"mcp_discovery_timeout": 8.0})

    assert mcp_startup._resolve_discovery_timeout(None) == 8.0
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@ -4237,17 +4237,26 @@ _agent_tools_lock = threading.Lock()


 def has_registered_mcp_tools() -> bool:
-    """True if any MCP server currently has tools registered in the registry.
+    """True if any MCP server has actually registered tools into the registry.

-    Cheap — checks the live server map under ``_lock``, no registry walk.  Used
-    by the per-turn refresh hook so a session with no MCP servers configured
-    (the common case) skips the ``get_tool_definitions`` rebuild entirely.
+    Cheap — checks the global MCP-tool→server name map under ``_lock``, no
+    registry walk.  Used by the per-turn refresh hook so a session with no MCP
+    tools (the common case, and also a connected-but-zero-tool/prompt-only
+    server) skips the ``get_tool_definitions`` rebuild entirely.  Checks
+    registered TOOLS, not connected servers, so a server that registers no tools
+    doesn't keep the hook firing every turn.
    """
    with _lock:
-        return bool(_servers)
+        return bool(_mcp_tool_server_names)


-def refresh_agent_mcp_tools(agent, *, quiet_mode: bool = True) -> set:
+def refresh_agent_mcp_tools(
+    agent,
+    *,
+    enabled_override=None,
+    disabled_override=None,
+    quiet_mode: bool = True,
+) -> set:
    """Re-derive an already-built agent's tool snapshot from the live registry.

    The agent snapshots ``agent.tools`` once at build time and never re-reads
@ -4255,43 +4264,136 @@ def refresh_agent_mcp_tools(agent, *, quiet_mode: bool = True) -> set:
    *after* that snapshot — a slow HTTP/OAuth server that misses the bounded
    startup wait, or a ``/reload-mcp`` — their tools are invisible until the
    snapshot is rebuilt.  This is the single shared rebuild used by every such
-    caller (the TUI ``reload.mcp`` RPC, the gateway reload, and the late-binding
-    refresh thread) so they can't drift apart again.
+    caller (the TUI ``reload.mcp`` RPC, the gateway reload, the late-binding
+    refresh thread, and the per-turn between-turns refresh) so they can't drift
+    apart again.

    The rebuild respects the agent's own ``enabled_toolsets`` /
-    ``disabled_toolsets`` (the same filtering it was built with), diffs by tool
-    **name** (not count — a count compare misses an equal-size add/remove swap),
-    and mutates the agent under ``_agent_tools_lock``.
+    ``disabled_toolsets`` (the same filtering it was built with) and diffs by
+    tool **name** (not count — a count compare misses an equal-size add/remove
+    swap).
+
+    Crucially it is **additive-preserving**: ``get_tool_definitions`` returns
+    only the registry-derived tools, but ``agent_init`` appends two further
+    families directly onto ``agent.tools`` *after* that — external
+    memory-provider tools (mem0/honcho/…) and context-engine tools
+    (``lcm_*``).  A naive ``agent.tools = get_tool_definitions(...)`` would
+    silently DELETE those.  So after rebuilding the registry set we re-run the
+    same post-build injectors ``agent_init`` used, reconstructing the full
+    surface.  The new ``(tools, valid_tool_names)`` pair is published together
+    under ``_agent_tools_lock`` so a concurrent reader never sees a
+    cross-attribute half-swap.

    Returns the set of newly-added tool names (empty when nothing changed), so
    callers can decide whether to notify the user / re-emit session info.  The
    caller owns the prompt-cache contract: this helper does NOT check turn state,
    because each caller has a different policy (``/reload-mcp`` rebuilds after
-    explicit user consent; the late-binding thread only rebuilds pre-first-turn).
+    explicit user consent; the late-binding and between-turns paths only rebuild
+    at a turn boundary, before that turn's ``tools=`` prefix is assembled).
    """
    from model_tools import get_tool_definitions

+    # Explicit reloads (/reload-mcp) pass freshly-resolved toolsets so a server
+    # the user just ENABLED in config is picked up; the agent's stored selection
+    # is then updated to match. The automatic paths (between-turns, late-binding)
+    # pass nothing and reuse the agent's build-time selection unchanged.
+    if enabled_override is not None or disabled_override is not None:
+        enabled = enabled_override if enabled_override is not None else getattr(agent, "enabled_toolsets", None)
+        disabled = disabled_override if disabled_override is not None else getattr(agent, "disabled_toolsets", None)
+        agent.enabled_toolsets = enabled
+        agent.disabled_toolsets = disabled
+    else:
+        enabled = getattr(agent, "enabled_toolsets", None)
+        disabled = getattr(agent, "disabled_toolsets", None)
+
+    # Registry-derived tools (built-ins + MCP), filtered to the agent's toolsets.
+    # Computed OUTSIDE the lock (get_tool_definitions can be slow); the diff and
+    # publish below happen together in ONE critical section so two concurrent
+    # callers can't compute overlapping ``added`` sets or torn-publish.
+    new_defs = list(
+        get_tool_definitions(
+            enabled_toolsets=enabled,
+            disabled_toolsets=disabled,
+            quiet_mode=quiet_mode,
+        )
+        or []
+    )
+    new_names = {t["function"]["name"] for t in new_defs}
+
+    # Re-append the post-build injected families that get_tool_definitions does
+    # NOT reproduce, so a refresh never strips them (memory-provider + context-
+    # engine tools). Staged entirely on LOCALS — the live ``agent.tools`` /
+    # ``valid_tool_names`` are never touched until the single atomic publish
+    # below, so a concurrent reader (``build_api_kwargs``) can't see a partial
+    # rebuild or a cross-attribute half-swap.
+    _reinject_post_build_tools(agent, new_defs, new_names)
+
+    # Single atomic read-diff-publish so the returned ``added`` is consistent
+    # with what was actually published, even under concurrent callers.
    with _agent_tools_lock:
        current = {
            t["function"]["name"]
            for t in (getattr(agent, "tools", None) or [])
        }
-
-    new_defs = get_tool_definitions(
-        enabled_toolsets=getattr(agent, "enabled_toolsets", None),
-        disabled_toolsets=getattr(agent, "disabled_toolsets", None),
-        quiet_mode=quiet_mode,
-    )
-    new_names = {t["function"]["name"] for t in new_defs} if new_defs else set()
-
-    if new_names == current:
-        return set()
-
-    with _agent_tools_lock:
+        if new_names == current:
+            return set()  # no change → leave the live snapshot untouched (no churn)
        agent.tools = new_defs
        agent.valid_tool_names = new_names
+        return new_names - current

-    return new_names - current
+
+def _reinject_post_build_tools(agent, tools_list: list, name_set: set) -> None:
+    """Append memory-provider and context-engine tools onto staged locals.
+
+    Mirrors the post-``get_tool_definitions`` injection in ``agent_init`` so a
+    snapshot rebuild reconstructs the FULL tool surface, not just the
+    registry-derived subset. Operates on the caller's staged ``tools_list`` /
+    ``name_set`` (NOT the live agent attributes) so the rebuild stays atomic.
+    Idempotent (skips names already present) and fail-soft.
+    """
+    def _add(schema: dict) -> None:
+        name = schema.get("name", "")
+        if not name or name in name_set:
+            return
+        tools_list.append({"type": "function", "function": schema})
+        name_set.add(name)
+
+    # Memory-provider tools (mem0/honcho/byterover/supermemory/…).
+    try:
+        memory_manager = getattr(agent, "_memory_manager", None)
+        get_mem_schemas = getattr(memory_manager, "get_all_tool_schemas", None) if memory_manager else None
+        if callable(get_mem_schemas):
+            # Honor the same enablement gate inject_memory_provider_tools uses.
+            from agent.memory_manager import memory_provider_tools_enabled
+            if "memory" in name_set or memory_provider_tools_enabled(getattr(agent, "enabled_toolsets", None)):
+                for schema in get_mem_schemas():
+                    if isinstance(schema, dict):
+                        _add(schema)
+    except Exception:
+        logger.debug("Memory-provider tool re-injection skipped", exc_info=True)
+
+    # Context-engine tools (lcm_grep/lcm_describe/…) — the `context_engine`
+    # toolset is intentionally empty, so these only exist via this append.
+    # Honor the same enabled_toolsets gate agent_init uses (#5544): without it a
+    # restricted-toolset platform (e.g. platform_toolsets: telegram: []) would
+    # re-leak lcm_* tools the build deliberately excluded, and pay the local-
+    # model latency penalty.
+    try:
+        enabled = getattr(agent, "enabled_toolsets", None)
+        context_engine_allowed = enabled is None or "context_engine" in enabled
+        compressor = getattr(agent, "context_compressor", None)
+        get_schemas = getattr(compressor, "get_tool_schemas", None) if compressor else None
+        if context_engine_allowed and callable(get_schemas):
+            engine_names = getattr(agent, "_context_engine_tool_names", None)
+            for schema in get_schemas():
+                if not isinstance(schema, dict):
+                    continue
+                name = schema.get("name", "")
+                _add(schema)
+                if name and isinstance(engine_names, set):
+                    engine_names.add(name)
+    except Exception:
+        logger.debug("Context-engine tool re-injection skipped", exc_info=True)


 def shutdown_mcp_servers():
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -8409,7 +8409,13 @@ def _(rid, params: dict) -> dict:
            try:
                from tools.mcp_tool import refresh_agent_mcp_tools

-                refresh_agent_mcp_tools(agent, quiet_mode=True)
+                # Explicit reload: re-resolve enabled toolsets so a server the
+                # user just enabled in config this session is picked up.
+                refresh_agent_mcp_tools(
+                    agent,
+                    enabled_override=_load_enabled_toolsets(),
+                    quiet_mode=True,
+                )
            except Exception as _exc:
                logger.warning(
                    "Failed to refresh cached agent tools after /reload-mcp: %s",