fix(moa): keep virtual provider on MoA client

This commit is contained in:
Gille 2026-06-27 14:03:43 -06:00 committed by Teknium
parent 5db1430af9
commit 66aeda3550
5 changed files with 34 additions and 5 deletions

View file

@ -765,7 +765,7 @@ def init_agent(
)
agent._client_kwargs = {}
agent.api_key = api_key or "moa-virtual-provider"
agent.base_url = base_url or "moa://local"
agent.base_url = "moa://local"
if not agent.quiet_mode:
print(f"🤖 AI Agent initialized with MoA preset: {agent.model}")
elif agent.api_mode == "bedrock_converse":

View file

@ -1549,7 +1549,14 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
)
# ── Build new client ──
if api_mode == "anthropic_messages":
if (new_provider or "").strip().lower() == "moa":
from agent.moa_loop import MoAClient
agent.api_key = api_key or "moa-virtual-provider"
agent.base_url = "moa://local"
agent._client_kwargs = {}
agent.client = MoAClient(agent.model or "default")
elif api_mode == "anthropic_messages":
from agent.anthropic_adapter import (
build_anthropic_client,
resolve_anthropic_token,

View file

@ -247,6 +247,11 @@ def interruptible_api_call(agent, api_kwargs: dict):
invalidate_runtime_client(region)
raise
result["response"] = normalize_converse_response(raw_response)
elif agent.provider == "moa":
# MoA is a virtual chat-completions provider backed by the
# in-process MoAClient facade. Do not rebuild a request-local
# OpenAI client from the virtual runtime metadata.
result["response"] = agent.client.chat.completions.create(**api_kwargs)
else:
request_client = _set_request_client(
agent._create_request_openai_client(

View file

@ -1404,7 +1404,7 @@ def resolve_runtime_provider(
return {
"provider": "moa",
"api_mode": "chat_completions",
"base_url": "http://127.0.0.1/v1",
"base_url": "moa://local",
"api_key": "moa-virtual-provider",
"source": "moa-virtual-provider",
"requested_provider": requested_provider,

View file

@ -41,7 +41,7 @@ moa:
agent = AIAgent(
api_key="moa-virtual-provider",
base_url="moa://local",
base_url="http://127.0.0.1/v1",
model="review",
provider="moa",
quiet_mode=True,
@ -50,10 +50,18 @@ moa:
enabled_toolsets=["file"],
max_iterations=1,
)
monkeypatch.setattr(
agent,
"_create_request_openai_client",
lambda *_args, **_kwargs: (_ for _ in ()).throw(
AssertionError("MoA calls must use MoAClient, not a request OpenAI client")
),
)
result = agent.run_conversation("solve this")
assert result["final_response"] == "aggregator acted"
assert agent.base_url == "moa://local"
assert [(c["task"], c["provider"], c["model"]) for c in calls] == [
("moa_reference", "openai-codex", "gpt-5.5"),
("moa_aggregator", "openrouter", "anthropic/claude-opus-4.8"),
@ -61,6 +69,16 @@ moa:
assert calls[1]["tools"] is not None
def test_moa_runtime_provider_uses_virtual_endpoint():
from hermes_cli.runtime_provider import resolve_runtime_provider
runtime = resolve_runtime_provider(requested="moa", target_model="review")
assert runtime["provider"] == "moa"
assert runtime["base_url"] == "moa://local"
assert runtime["api_key"] == "moa-virtual-provider"
def test_moa_does_not_cap_output_tokens(monkeypatch, tmp_path):
"""MoA must not inject an output cap on reference or aggregator calls.
@ -459,4 +477,3 @@ def test_moa_facade_reruns_references_on_new_turn(monkeypatch, tmp_path):
# 2 references × 2 distinct turns = 4 reference runs.
assert len(ref_runs) == 4