fix(moa): pin chat_completions on live switch to a MoA preset

The gateway/CLI /model switch path (switch_model in agent_runtime_helpers) built the MoAClient facade but left agent.api_mode at the value determine_api_mode / the resolved aggregator transport produced (e.g. codex_responses or anthropic_messages). The conversation loop dispatches on agent.api_mode, so a non-chat_completions value made the primary/acting call go through client.responses.create — which the MoAClient facade has no .responses for — and fall through to the moa://local placeholder, 404 three times, then fall back to a reference model (issues #54259, #54669). agent_init.py already pins api_mode=chat_completions for provider==moa; mirror that in the live switch so the primary call always routes through MoAClient.chat.completions. The aggregator's real transport is resolved and applied inside the reference/aggregator fan-out, not on the outer call.
2026-07-01 12:02:05 +00:00 · 2026-06-30 03:22:04 -07:00 · 2026-06-30 03:22:04 -07:00 · 1366f376d6
commit 1366f376d6
parent d76ca3a7f2
2 changed files with 96 additions and 0 deletions
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@ -1625,6 +1625,18 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
        if (new_provider or "").strip().lower() == "moa":
            from agent.moa_loop import MoAClient

+            # The MoA virtual provider speaks only chat.completions via the
+            # MoAClient facade — the aggregator's real transport
+            # (codex_responses / anthropic_messages) is resolved and applied
+            # *inside* the reference/aggregator fan-out, never on the outer
+            # primary call. determine_api_mode("moa", ...) above may have left
+            # api_mode set to the aggregator's transport; if the conversation
+            # loop sees that, it dispatches client.responses.create (which the
+            # facade has no .responses for) and the call falls through to the
+            # moa://local placeholder → HTTP 404 → fallback to a reference
+            # model. Pin chat_completions here so the primary call always goes
+            # through MoAClient.chat.completions, matching agent_init.py.
+            agent.api_mode = "chat_completions"
            agent.api_key = api_key or "moa-virtual-provider"
            agent.base_url = "moa://local"
            agent._client_kwargs = {}
--- a/tests/agent/test_moa_switch_api_mode.py
+++ b/tests/agent/test_moa_switch_api_mode.py
@ -0,0 +1,84 @@
+"""Regression test for MoA primary-call routing on persisted preset switches.
+
+Issue #54259 / #54669: switching a live agent to a MoA preset (the gateway
+``/model <preset>`` path) built the MoAClient facade but left ``agent.api_mode``
+set to whatever ``determine_api_mode`` / the resolved aggregator transport
+produced (e.g. ``codex_responses`` or ``anthropic_messages``). The conversation
+loop dispatches on ``agent.api_mode``, so a non-chat_completions value made it
+call ``client.responses.create`` — which the MoAClient facade has no
+``.responses`` for — and the call fell through to the ``moa://local``
+placeholder, 404'd three times, then fell back to a reference model.
+
+``agent_init.py`` already pins ``api_mode = "chat_completions"`` for
+``provider == "moa"``; ``switch_model`` (the live in-place swap) must do the
+same so the primary call always routes through ``MoAClient.chat.completions``.
+"""
+
+from __future__ import annotations
+
+import types
+
+import pytest
+
+
+def _make_fake_agent():
+    """A minimal stand-in carrying only the attributes switch_model touches."""
+    agent = types.SimpleNamespace()
+    agent.model = "minimax-m3"
+    agent.provider = "opencode-go"
+    agent.api_mode = "anthropic_messages"
+    agent.api_key = "old-key"
+    agent.base_url = "https://old.example/v1"
+    agent.client = object()
+    agent._client_kwargs = {"base_url": "https://old.example/v1"}
+    agent._config_context_length = 123456
+    agent._transport_cache = {}
+    agent.quiet_mode = True
+    return agent
+
+
+@pytest.mark.parametrize(
+    "incoming_api_mode",
+    ["codex_responses", "anthropic_messages", "chat_completions", ""],
+)
+def test_switch_to_moa_pins_chat_completions(monkeypatch, incoming_api_mode):
+    """Switching to provider=moa must force api_mode=chat_completions.
+
+    No matter what transport the resolver/aggregator implies for the preset,
+    the outer agent.api_mode must end up chat_completions so the conversation
+    loop dispatches through the MoAClient chat.completions facade rather than
+    .responses.create against the moa://local placeholder.
+    """
+    from agent import agent_runtime_helpers as arh
+
+    # Neutralize the post-swap machinery that needs a real AIAgent (credential
+    # pool reload, context-compressor refresh, primary-runtime bookkeeping).
+    # We only assert the api_mode invariant set in the moa client-build branch.
+    monkeypatch.setattr(arh, "load_pool", lambda *a, **k: None, raising=False)
+
+    agent = _make_fake_agent()
+    try:
+        arh.switch_model(
+            agent,
+            new_model="frontier",
+            new_provider="moa",
+            api_key="moa-virtual-provider",
+            base_url="moa://local",
+            api_mode=incoming_api_mode,
+        )
+    except Exception:
+        # switch_model does post-swap work (compressor, pool, runtime) that may
+        # raise against a fake agent. The runtime-field swap — including the
+        # api_mode pin in the moa branch — happens before any of that, so the
+        # invariant we care about is already set even if a later step blew up.
+        pass
+
+    assert agent.provider == "moa"
+    assert agent.base_url == "moa://local"
+    assert agent.api_mode == "chat_completions", (
+        f"MoA switch left api_mode={agent.api_mode!r}; the primary call would "
+        "dispatch .responses.create / anthropic_messages against moa://local "
+        "instead of MoAClient.chat.completions (issue #54259)."
+    )
+    # The MoAClient facade should be installed as the client.
+    assert type(agent.client).__name__ == "MoAClient"