diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py index 72db4477e4b..5ab2c3d2d2e 100644 --- a/agent/agent_runtime_helpers.py +++ b/agent/agent_runtime_helpers.py @@ -1625,6 +1625,18 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo if (new_provider or "").strip().lower() == "moa": from agent.moa_loop import MoAClient + # The MoA virtual provider speaks only chat.completions via the + # MoAClient facade — the aggregator's real transport + # (codex_responses / anthropic_messages) is resolved and applied + # *inside* the reference/aggregator fan-out, never on the outer + # primary call. determine_api_mode("moa", ...) above may have left + # api_mode set to the aggregator's transport; if the conversation + # loop sees that, it dispatches client.responses.create (which the + # facade has no .responses for) and the call falls through to the + # moa://local placeholder → HTTP 404 → fallback to a reference + # model. Pin chat_completions here so the primary call always goes + # through MoAClient.chat.completions, matching agent_init.py. + agent.api_mode = "chat_completions" agent.api_key = api_key or "moa-virtual-provider" agent.base_url = "moa://local" agent._client_kwargs = {} diff --git a/tests/agent/test_moa_switch_api_mode.py b/tests/agent/test_moa_switch_api_mode.py new file mode 100644 index 00000000000..3abf48dae30 --- /dev/null +++ b/tests/agent/test_moa_switch_api_mode.py @@ -0,0 +1,84 @@ +"""Regression test for MoA primary-call routing on persisted preset switches. + +Issue #54259 / #54669: switching a live agent to a MoA preset (the gateway +``/model `` path) built the MoAClient facade but left ``agent.api_mode`` +set to whatever ``determine_api_mode`` / the resolved aggregator transport +produced (e.g. ``codex_responses`` or ``anthropic_messages``). The conversation +loop dispatches on ``agent.api_mode``, so a non-chat_completions value made it +call ``client.responses.create`` — which the MoAClient facade has no +``.responses`` for — and the call fell through to the ``moa://local`` +placeholder, 404'd three times, then fell back to a reference model. + +``agent_init.py`` already pins ``api_mode = "chat_completions"`` for +``provider == "moa"``; ``switch_model`` (the live in-place swap) must do the +same so the primary call always routes through ``MoAClient.chat.completions``. +""" + +from __future__ import annotations + +import types + +import pytest + + +def _make_fake_agent(): + """A minimal stand-in carrying only the attributes switch_model touches.""" + agent = types.SimpleNamespace() + agent.model = "minimax-m3" + agent.provider = "opencode-go" + agent.api_mode = "anthropic_messages" + agent.api_key = "old-key" + agent.base_url = "https://old.example/v1" + agent.client = object() + agent._client_kwargs = {"base_url": "https://old.example/v1"} + agent._config_context_length = 123456 + agent._transport_cache = {} + agent.quiet_mode = True + return agent + + +@pytest.mark.parametrize( + "incoming_api_mode", + ["codex_responses", "anthropic_messages", "chat_completions", ""], +) +def test_switch_to_moa_pins_chat_completions(monkeypatch, incoming_api_mode): + """Switching to provider=moa must force api_mode=chat_completions. + + No matter what transport the resolver/aggregator implies for the preset, + the outer agent.api_mode must end up chat_completions so the conversation + loop dispatches through the MoAClient chat.completions facade rather than + .responses.create against the moa://local placeholder. + """ + from agent import agent_runtime_helpers as arh + + # Neutralize the post-swap machinery that needs a real AIAgent (credential + # pool reload, context-compressor refresh, primary-runtime bookkeeping). + # We only assert the api_mode invariant set in the moa client-build branch. + monkeypatch.setattr(arh, "load_pool", lambda *a, **k: None, raising=False) + + agent = _make_fake_agent() + try: + arh.switch_model( + agent, + new_model="frontier", + new_provider="moa", + api_key="moa-virtual-provider", + base_url="moa://local", + api_mode=incoming_api_mode, + ) + except Exception: + # switch_model does post-swap work (compressor, pool, runtime) that may + # raise against a fake agent. The runtime-field swap — including the + # api_mode pin in the moa branch — happens before any of that, so the + # invariant we care about is already set even if a later step blew up. + pass + + assert agent.provider == "moa" + assert agent.base_url == "moa://local" + assert agent.api_mode == "chat_completions", ( + f"MoA switch left api_mode={agent.api_mode!r}; the primary call would " + "dispatch .responses.create / anthropic_messages against moa://local " + "instead of MoAClient.chat.completions (issue #54259)." + ) + # The MoAClient facade should be installed as the client. + assert type(agent.client).__name__ == "MoAClient"