hermes-agent/tests/test_tui_mcp_late_refresh.py
Teknium 2f7c4858a7
fix(tui): refresh tool snapshot when MCP discovery lands after agent build (#48403)
The TUI banner reported fewer tools than the classic CLI for the same
config (e.g. 32 vs 38) when an MCP server connected slowly. Root cause:
the agent snapshots `agent.tools` once at build time and never re-reads
the registry. `_make_agent` briefly joins the background MCP discovery
thread (`wait_for_mcp_discovery`, ~0.75s) so fast servers land in that
snapshot, but a server slower than the bound — common for an HTTP MCP
server on first connect — lands *after* the agent is built. Its tools are
then absent from both the agent (uncallable until `/reload-mcp`) and the
banner for the whole session.

The classic CLI doesn't hit this because it re-derives
`get_tool_definitions()` at banner render time (which re-waits for
discovery), so it picks the late tools up.

Fix: after a fresh agent is built and its first `session.info` emitted,
if discovery is still in flight, schedule an off-critical-path daemon that
waits for it to finish, then rebuilds the tool snapshot and re-emits
`session.info` — the same rebuild `/reload-mcp` performs, but automatic.
Both the agent's callable tools and the banner count catch up.

Cache safety: the rebuild runs only while the session is still
pre-first-turn (`_user_turn_count`/`_api_call_count` both 0 → nothing
cached to invalidate). Once the user has sent a message we leave the
snapshot frozen rather than break the cached prompt prefix mid-conversation;
late tools then require an explicit `/reload-mcp` (user-consented), exactly
as today. No-op when discovery finished before the agent build, when the
join times out, when the registry was unchanged, or when the session was
swapped/closed while waiting.

Adds entry.mcp_discovery_in_flight() / join_mcp_discovery() accessors and
covers the matrix (added/none/post-turn/timeout/unchanged/replaced) with
unit tests.
2026-06-18 05:41:23 -07:00

167 lines
6.2 KiB
Python

"""Tests for the TUI gateway's late MCP tool-snapshot refresh.
When an MCP server connects slower than the bounded wait in ``_make_agent``,
the agent is built without its tools and the banner/tool count is stale for the
session. ``_schedule_mcp_late_refresh`` waits for discovery to land, then
rebuilds the snapshot and re-emits ``session.info`` — but only while the
session is still pre-first-turn, so it never invalidates a cached prompt.
"""
import threading
import time
import types
import model_tools
from tui_gateway import server
from tui_gateway import entry
def _make_fake_agent(initial_tools, *, user_turns=0, api_calls=0):
agent = types.SimpleNamespace()
agent.tools = list(initial_tools)
agent.valid_tool_names = {t["function"]["name"] for t in initial_tools}
agent._user_turn_count = user_turns
agent._api_call_count = api_calls
return agent
def _tool(name):
return {"type": "function", "function": {"name": name, "description": "", "parameters": {}}}
def _drain_refresh_threads(timeout=5.0):
deadline = time.time() + timeout
for th in list(threading.enumerate()):
if th.name.startswith("tui-mcp-late-refresh-"):
th.join(timeout=max(0.0, deadline - time.time()))
def _install(monkeypatch, *, in_flight, join_result, new_defs):
"""Wire entry discovery accessors + get_tool_definitions, capture emits."""
monkeypatch.setattr(entry, "mcp_discovery_in_flight", lambda: in_flight)
monkeypatch.setattr(entry, "join_mcp_discovery", lambda timeout=None: join_result)
monkeypatch.setattr(model_tools, "get_tool_definitions", lambda **kw: list(new_defs))
monkeypatch.setattr(server, "_load_enabled_toolsets", lambda: None)
monkeypatch.setattr(server, "_session_info", lambda agent, session: {"tools_len": len(agent.tools)})
emitted = []
monkeypatch.setattr(server, "_emit", lambda event, sid, payload=None: emitted.append((event, sid, payload)))
return emitted
def test_late_refresh_adds_tools_and_reemits_when_pre_first_turn(monkeypatch):
base = [_tool("read_file"), _tool("write_file")]
full = base + [_tool("mcp__nous_support__a")] # discovery added one tool
agent = _make_fake_agent(base)
sid = "sess-late-1"
server._sessions[sid] = {"agent": agent}
try:
emitted = _install(monkeypatch, in_flight=True, join_result=True, new_defs=full)
server._schedule_mcp_late_refresh(sid, agent)
_drain_refresh_threads()
assert len(agent.tools) == 3
assert "mcp__nous_support__a" in agent.valid_tool_names
assert ("session.info", sid, {"tools_len": 3}) in emitted
finally:
server._sessions.pop(sid, None)
def test_no_refresh_when_discovery_not_in_flight(monkeypatch):
base = [_tool("read_file")]
agent = _make_fake_agent(base)
sid = "sess-late-2"
server._sessions[sid] = {"agent": agent}
try:
# in_flight=False → helper returns immediately, no thread, no rebuild.
emitted = _install(monkeypatch, in_flight=False, join_result=True, new_defs=base + [_tool("x")])
server._schedule_mcp_late_refresh(sid, agent)
_drain_refresh_threads()
assert len(agent.tools) == 1
assert emitted == []
finally:
server._sessions.pop(sid, None)
def test_no_refresh_once_conversation_started(monkeypatch):
"""Cache safety: never rebuild the tool list after the first turn."""
base = [_tool("read_file")]
full = base + [_tool("mcp__late__b")]
agent = _make_fake_agent(base, user_turns=1) # a turn already happened
sid = "sess-late-3"
server._sessions[sid] = {"agent": agent}
try:
emitted = _install(monkeypatch, in_flight=True, join_result=True, new_defs=full)
server._schedule_mcp_late_refresh(sid, agent)
_drain_refresh_threads()
# Snapshot frozen; no re-emit that would invalidate the prompt cache.
assert len(agent.tools) == 1
assert emitted == []
finally:
server._sessions.pop(sid, None)
def test_no_reemit_when_discovery_added_nothing(monkeypatch):
base = [_tool("read_file"), _tool("write_file")]
agent = _make_fake_agent(base)
sid = "sess-late-4"
server._sessions[sid] = {"agent": agent}
try:
# Discovery finished but the registry is unchanged (same count) →
# don't churn the client with a redundant session.info.
emitted = _install(monkeypatch, in_flight=True, join_result=True, new_defs=list(base))
server._schedule_mcp_late_refresh(sid, agent)
_drain_refresh_threads()
assert len(agent.tools) == 2
assert emitted == []
finally:
server._sessions.pop(sid, None)
def test_no_refresh_when_join_times_out(monkeypatch):
base = [_tool("read_file")]
full = base + [_tool("mcp__slow__c")]
agent = _make_fake_agent(base)
sid = "sess-late-5"
server._sessions[sid] = {"agent": agent}
try:
# Server never connected within the bound → join returns False, no rebuild.
emitted = _install(monkeypatch, in_flight=True, join_result=False, new_defs=full)
server._schedule_mcp_late_refresh(sid, agent)
_drain_refresh_threads()
assert len(agent.tools) == 1
assert emitted == []
finally:
server._sessions.pop(sid, None)
def test_no_refresh_when_session_replaced(monkeypatch):
"""If the session's agent was swapped (e.g. /new) while we waited, bail."""
base = [_tool("read_file")]
full = base + [_tool("mcp__late__d")]
agent = _make_fake_agent(base)
other_agent = _make_fake_agent(base)
sid = "sess-late-6"
server._sessions[sid] = {"agent": agent}
try:
emitted = _install(monkeypatch, in_flight=True, join_result=True, new_defs=full)
# Swap the stored agent out the moment join is awaited.
def _swap_join(timeout=None):
server._sessions[sid]["agent"] = other_agent
return True
monkeypatch.setattr(entry, "join_mcp_discovery", _swap_join)
server._schedule_mcp_late_refresh(sid, agent)
_drain_refresh_threads()
# Neither agent's snapshot was rebuilt; no emit.
assert len(agent.tools) == 1
assert len(other_agent.tools) == 1
assert emitted == []
finally:
server._sessions.pop(sid, None)