mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(moa): pin chat_completions on live switch to a MoA preset
The gateway/CLI /model switch path (switch_model in agent_runtime_helpers) built the MoAClient facade but left agent.api_mode at the value determine_api_mode / the resolved aggregator transport produced (e.g. codex_responses or anthropic_messages). The conversation loop dispatches on agent.api_mode, so a non-chat_completions value made the primary/acting call go through client.responses.create — which the MoAClient facade has no .responses for — and fall through to the moa://local placeholder, 404 three times, then fall back to a reference model (issues #54259, #54669). agent_init.py already pins api_mode=chat_completions for provider==moa; mirror that in the live switch so the primary call always routes through MoAClient.chat.completions. The aggregator's real transport is resolved and applied inside the reference/aggregator fan-out, not on the outer call.
This commit is contained in:
parent
d76ca3a7f2
commit
1366f376d6
2 changed files with 96 additions and 0 deletions
|
|
@ -1625,6 +1625,18 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
|||
if (new_provider or "").strip().lower() == "moa":
|
||||
from agent.moa_loop import MoAClient
|
||||
|
||||
# The MoA virtual provider speaks only chat.completions via the
|
||||
# MoAClient facade — the aggregator's real transport
|
||||
# (codex_responses / anthropic_messages) is resolved and applied
|
||||
# *inside* the reference/aggregator fan-out, never on the outer
|
||||
# primary call. determine_api_mode("moa", ...) above may have left
|
||||
# api_mode set to the aggregator's transport; if the conversation
|
||||
# loop sees that, it dispatches client.responses.create (which the
|
||||
# facade has no .responses for) and the call falls through to the
|
||||
# moa://local placeholder → HTTP 404 → fallback to a reference
|
||||
# model. Pin chat_completions here so the primary call always goes
|
||||
# through MoAClient.chat.completions, matching agent_init.py.
|
||||
agent.api_mode = "chat_completions"
|
||||
agent.api_key = api_key or "moa-virtual-provider"
|
||||
agent.base_url = "moa://local"
|
||||
agent._client_kwargs = {}
|
||||
|
|
|
|||
84
tests/agent/test_moa_switch_api_mode.py
Normal file
84
tests/agent/test_moa_switch_api_mode.py
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
"""Regression test for MoA primary-call routing on persisted preset switches.
|
||||
|
||||
Issue #54259 / #54669: switching a live agent to a MoA preset (the gateway
|
||||
``/model <preset>`` path) built the MoAClient facade but left ``agent.api_mode``
|
||||
set to whatever ``determine_api_mode`` / the resolved aggregator transport
|
||||
produced (e.g. ``codex_responses`` or ``anthropic_messages``). The conversation
|
||||
loop dispatches on ``agent.api_mode``, so a non-chat_completions value made it
|
||||
call ``client.responses.create`` — which the MoAClient facade has no
|
||||
``.responses`` for — and the call fell through to the ``moa://local``
|
||||
placeholder, 404'd three times, then fell back to a reference model.
|
||||
|
||||
``agent_init.py`` already pins ``api_mode = "chat_completions"`` for
|
||||
``provider == "moa"``; ``switch_model`` (the live in-place swap) must do the
|
||||
same so the primary call always routes through ``MoAClient.chat.completions``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import types
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_fake_agent():
|
||||
"""A minimal stand-in carrying only the attributes switch_model touches."""
|
||||
agent = types.SimpleNamespace()
|
||||
agent.model = "minimax-m3"
|
||||
agent.provider = "opencode-go"
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent.api_key = "old-key"
|
||||
agent.base_url = "https://old.example/v1"
|
||||
agent.client = object()
|
||||
agent._client_kwargs = {"base_url": "https://old.example/v1"}
|
||||
agent._config_context_length = 123456
|
||||
agent._transport_cache = {}
|
||||
agent.quiet_mode = True
|
||||
return agent
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"incoming_api_mode",
|
||||
["codex_responses", "anthropic_messages", "chat_completions", ""],
|
||||
)
|
||||
def test_switch_to_moa_pins_chat_completions(monkeypatch, incoming_api_mode):
|
||||
"""Switching to provider=moa must force api_mode=chat_completions.
|
||||
|
||||
No matter what transport the resolver/aggregator implies for the preset,
|
||||
the outer agent.api_mode must end up chat_completions so the conversation
|
||||
loop dispatches through the MoAClient chat.completions facade rather than
|
||||
.responses.create against the moa://local placeholder.
|
||||
"""
|
||||
from agent import agent_runtime_helpers as arh
|
||||
|
||||
# Neutralize the post-swap machinery that needs a real AIAgent (credential
|
||||
# pool reload, context-compressor refresh, primary-runtime bookkeeping).
|
||||
# We only assert the api_mode invariant set in the moa client-build branch.
|
||||
monkeypatch.setattr(arh, "load_pool", lambda *a, **k: None, raising=False)
|
||||
|
||||
agent = _make_fake_agent()
|
||||
try:
|
||||
arh.switch_model(
|
||||
agent,
|
||||
new_model="frontier",
|
||||
new_provider="moa",
|
||||
api_key="moa-virtual-provider",
|
||||
base_url="moa://local",
|
||||
api_mode=incoming_api_mode,
|
||||
)
|
||||
except Exception:
|
||||
# switch_model does post-swap work (compressor, pool, runtime) that may
|
||||
# raise against a fake agent. The runtime-field swap — including the
|
||||
# api_mode pin in the moa branch — happens before any of that, so the
|
||||
# invariant we care about is already set even if a later step blew up.
|
||||
pass
|
||||
|
||||
assert agent.provider == "moa"
|
||||
assert agent.base_url == "moa://local"
|
||||
assert agent.api_mode == "chat_completions", (
|
||||
f"MoA switch left api_mode={agent.api_mode!r}; the primary call would "
|
||||
"dispatch .responses.create / anthropic_messages against moa://local "
|
||||
"instead of MoAClient.chat.completions (issue #54259)."
|
||||
)
|
||||
# The MoAClient facade should be installed as the client.
|
||||
assert type(agent.client).__name__ == "MoAClient"
|
||||
Loading…
Add table
Add a link
Reference in a new issue