fix(moa): pin chat_completions on live switch to a MoA preset

The gateway/CLI /model switch path (switch_model in agent_runtime_helpers)
built the MoAClient facade but left agent.api_mode at the value
determine_api_mode / the resolved aggregator transport produced (e.g.
codex_responses or anthropic_messages). The conversation loop dispatches on
agent.api_mode, so a non-chat_completions value made the primary/acting call
go through client.responses.create — which the MoAClient facade has no
.responses for — and fall through to the moa://local placeholder, 404 three
times, then fall back to a reference model (issues #54259, #54669).

agent_init.py already pins api_mode=chat_completions for provider==moa; mirror
that in the live switch so the primary call always routes through
MoAClient.chat.completions. The aggregator's real transport is resolved and
applied inside the reference/aggregator fan-out, not on the outer call.
This commit is contained in:
teknium1 2026-06-30 03:22:04 -07:00 committed by Teknium
parent d76ca3a7f2
commit 1366f376d6
2 changed files with 96 additions and 0 deletions

View file

@ -1625,6 +1625,18 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
if (new_provider or "").strip().lower() == "moa":
from agent.moa_loop import MoAClient
# The MoA virtual provider speaks only chat.completions via the
# MoAClient facade — the aggregator's real transport
# (codex_responses / anthropic_messages) is resolved and applied
# *inside* the reference/aggregator fan-out, never on the outer
# primary call. determine_api_mode("moa", ...) above may have left
# api_mode set to the aggregator's transport; if the conversation
# loop sees that, it dispatches client.responses.create (which the
# facade has no .responses for) and the call falls through to the
# moa://local placeholder → HTTP 404 → fallback to a reference
# model. Pin chat_completions here so the primary call always goes
# through MoAClient.chat.completions, matching agent_init.py.
agent.api_mode = "chat_completions"
agent.api_key = api_key or "moa-virtual-provider"
agent.base_url = "moa://local"
agent._client_kwargs = {}

View file

@ -0,0 +1,84 @@
"""Regression test for MoA primary-call routing on persisted preset switches.
Issue #54259 / #54669: switching a live agent to a MoA preset (the gateway
``/model <preset>`` path) built the MoAClient facade but left ``agent.api_mode``
set to whatever ``determine_api_mode`` / the resolved aggregator transport
produced (e.g. ``codex_responses`` or ``anthropic_messages``). The conversation
loop dispatches on ``agent.api_mode``, so a non-chat_completions value made it
call ``client.responses.create`` which the MoAClient facade has no
``.responses`` for and the call fell through to the ``moa://local``
placeholder, 404'd three times, then fell back to a reference model.
``agent_init.py`` already pins ``api_mode = "chat_completions"`` for
``provider == "moa"``; ``switch_model`` (the live in-place swap) must do the
same so the primary call always routes through ``MoAClient.chat.completions``.
"""
from __future__ import annotations
import types
import pytest
def _make_fake_agent():
"""A minimal stand-in carrying only the attributes switch_model touches."""
agent = types.SimpleNamespace()
agent.model = "minimax-m3"
agent.provider = "opencode-go"
agent.api_mode = "anthropic_messages"
agent.api_key = "old-key"
agent.base_url = "https://old.example/v1"
agent.client = object()
agent._client_kwargs = {"base_url": "https://old.example/v1"}
agent._config_context_length = 123456
agent._transport_cache = {}
agent.quiet_mode = True
return agent
@pytest.mark.parametrize(
"incoming_api_mode",
["codex_responses", "anthropic_messages", "chat_completions", ""],
)
def test_switch_to_moa_pins_chat_completions(monkeypatch, incoming_api_mode):
"""Switching to provider=moa must force api_mode=chat_completions.
No matter what transport the resolver/aggregator implies for the preset,
the outer agent.api_mode must end up chat_completions so the conversation
loop dispatches through the MoAClient chat.completions facade rather than
.responses.create against the moa://local placeholder.
"""
from agent import agent_runtime_helpers as arh
# Neutralize the post-swap machinery that needs a real AIAgent (credential
# pool reload, context-compressor refresh, primary-runtime bookkeeping).
# We only assert the api_mode invariant set in the moa client-build branch.
monkeypatch.setattr(arh, "load_pool", lambda *a, **k: None, raising=False)
agent = _make_fake_agent()
try:
arh.switch_model(
agent,
new_model="frontier",
new_provider="moa",
api_key="moa-virtual-provider",
base_url="moa://local",
api_mode=incoming_api_mode,
)
except Exception:
# switch_model does post-swap work (compressor, pool, runtime) that may
# raise against a fake agent. The runtime-field swap — including the
# api_mode pin in the moa branch — happens before any of that, so the
# invariant we care about is already set even if a later step blew up.
pass
assert agent.provider == "moa"
assert agent.base_url == "moa://local"
assert agent.api_mode == "chat_completions", (
f"MoA switch left api_mode={agent.api_mode!r}; the primary call would "
"dispatch .responses.create / anthropic_messages against moa://local "
"instead of MoAClient.chat.completions (issue #54259)."
)
# The MoAClient facade should be installed as the client.
assert type(agent.client).__name__ == "MoAClient"