fix(cron): initialize MCP servers before constructing the cron AIAgent (#21354)

cron/scheduler.py:run_job() constructed AIAgent(...) without ever calling
discover_mcp_tools(). The CLI and gateway paths do this at startup; cron
jobs inherited none of it and the user's configured mcp_servers were
invisible inside every cron run.

Insert discover_mcp_tools() right before AIAgent(), wrapped in try/except
so a broken MCP server can't kill an otherwise-working cron job. The call
is idempotent: register_mcp_servers() short-circuits on already-connected
servers, so subsequent ticks in the same scheduler process pay ~0ms.
Scoped to the LLM path only; no_agent script jobs skip it entirely.

Closes #4219.
This commit is contained in:
Teknium 2026-05-07 07:53:03 -07:00 committed by GitHub
parent 4de3ef38b1
commit 04918345ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 161 additions and 0 deletions

View file

@ -1323,6 +1323,27 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
except Exception as e: except Exception as e:
logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e) logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e)
# Initialize MCP servers so configured mcp_servers are available to
# the agent's tool registry before AIAgent is constructed. Without
# this, cron jobs never saw any MCP tools — only the gateway / CLI
# paths called discover_mcp_tools() at startup. Idempotent: subsequent
# ticks short-circuit on already-connected servers inside
# register_mcp_servers(). Non-fatal on failure: a broken MCP server
# shouldn't kill an otherwise-working cron job. See #4219.
try:
from tools.mcp_tool import discover_mcp_tools
_mcp_tools = discover_mcp_tools()
if _mcp_tools:
logger.info(
"Job '%s': %d MCP tool(s) available",
job_id, len(_mcp_tools),
)
except Exception as _mcp_exc:
logger.warning(
"Job '%s': MCP initialization failed (non-fatal): %s",
job_id, _mcp_exc,
)
agent = AIAgent( agent = AIAgent(
model=model, model=model,
api_key=runtime.get("api_key"), api_key=runtime.get("api_key"),

View file

@ -0,0 +1,140 @@
"""Regression tests for MCP server availability in cron jobs.
Background
==========
``cron/scheduler.py:run_job()`` constructs ``AIAgent(...)`` directly without
calling ``discover_mcp_tools()`` the initialization that CLI and gateway
paths do at startup. Cron jobs therefore never saw any MCP tools from
``mcp_servers`` in config.yaml. See #4219.
The fix inserts ``discover_mcp_tools()`` before the ``AIAgent(...)`` call,
wrapped in try/except so a broken MCP server can't kill an otherwise
working cron job. ``discover_mcp_tools`` is idempotent subsequent ticks
short-circuit on already-connected servers.
"""
from __future__ import annotations
from unittest.mock import patch, MagicMock
import pytest
def test_run_job_calls_discover_mcp_tools_before_agent_construction():
"""The LLM-path branch of run_job must call discover_mcp_tools() before
the AIAgent construction, so MCP tools are in the registry by the time
the agent asks for its tool schema."""
from cron import scheduler
job = {
"id": "mcp-cron-test",
"name": "mcp-cron-test",
"prompt": "test",
}
call_order = []
def fake_discover():
call_order.append("discover_mcp_tools")
return ["mcp_server1_tool"]
# AIAgent is a class; replace with a recording stub
class _FakeAgent:
def __init__(self, *args, **kwargs):
call_order.append("AIAgent.__init__")
self._kwargs = kwargs
self._interrupt_requested = False
self.quiet_mode = True
def run_conversation(self, *args, **kwargs):
return {
"final_response": "ok",
"messages": [],
}
with patch("tools.mcp_tool.discover_mcp_tools", side_effect=fake_discover), \
patch("run_agent.AIAgent", _FakeAgent), \
patch("cron.scheduler._resolve_cron_enabled_toolsets", return_value=None):
scheduler.run_job(job)
# Discovery must be called, and must be called BEFORE agent construction.
assert "discover_mcp_tools" in call_order, (
"run_job did not call discover_mcp_tools — MCP tools unavailable in cron"
)
d_idx = call_order.index("discover_mcp_tools")
a_idx = call_order.index("AIAgent.__init__")
assert d_idx < a_idx, (
f"discover_mcp_tools was called AFTER AIAgent construction "
f"(indices discover={d_idx}, agent={a_idx}); MCP tools missed the "
f"registry window. Full order: {call_order}"
)
def test_run_job_tolerates_discover_mcp_tools_failure():
"""A broken MCP server must not kill an otherwise working cron job.
discover_mcp_tools() raising should be caught and logged, and the agent
should still run."""
from cron import scheduler
job = {
"id": "mcp-cron-fail",
"name": "mcp-cron-fail",
"prompt": "test",
}
agent_was_constructed = []
class _FakeAgent:
def __init__(self, *args, **kwargs):
agent_was_constructed.append(True)
self._interrupt_requested = False
self.quiet_mode = True
def run_conversation(self, *args, **kwargs):
return {"final_response": "ok", "messages": []}
def fake_discover_that_raises():
raise RuntimeError("MCP server unreachable")
with patch(
"tools.mcp_tool.discover_mcp_tools",
side_effect=fake_discover_that_raises,
), patch("run_agent.AIAgent", _FakeAgent), \
patch("cron.scheduler._resolve_cron_enabled_toolsets", return_value=None):
# Should NOT raise
success, doc, final_response, error = scheduler.run_job(job)
assert agent_was_constructed, (
"AIAgent was not constructed after discover_mcp_tools raised — "
"MCP failure incorrectly killed the cron job"
)
def test_no_agent_cron_job_does_not_initialize_mcp():
"""Cron jobs with no_agent=True are script-only — no AIAgent, no MCP
tools needed. We must NOT pay the MCP init cost for those."""
from cron import scheduler
job = {
"id": "noagent-job",
"name": "noagent-job",
"no_agent": True,
"script": "/nonexistent/script.sh",
}
discover_called = []
def fake_discover():
discover_called.append(True)
return []
# _run_job_script returns (ok, output); make it fail cleanly so we
# don't need a real script file.
with patch("tools.mcp_tool.discover_mcp_tools", side_effect=fake_discover), \
patch("cron.scheduler._run_job_script", return_value=(False, "no such file")):
scheduler.run_job(job)
assert not discover_called, (
"discover_mcp_tools was called for a no_agent job — wasted MCP init "
"for a script-only cron tick"
)