From 66aeda35501d4998fddbc87fde92b3c25d1b4486 Mon Sep 17 00:00:00 2001 From: Gille <4317663+helix4u@users.noreply.github.com> Date: Sat, 27 Jun 2026 14:03:43 -0600 Subject: [PATCH] fix(moa): keep virtual provider on MoA client --- agent/agent_init.py | 2 +- agent/agent_runtime_helpers.py | 9 ++++++++- agent/chat_completion_helpers.py | 5 +++++ hermes_cli/runtime_provider.py | 2 +- tests/run_agent/test_moa_loop_mode.py | 21 +++++++++++++++++++-- 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/agent/agent_init.py b/agent/agent_init.py index 807c44b78e8..afa3d3aa62a 100644 --- a/agent/agent_init.py +++ b/agent/agent_init.py @@ -765,7 +765,7 @@ def init_agent( ) agent._client_kwargs = {} agent.api_key = api_key or "moa-virtual-provider" - agent.base_url = base_url or "moa://local" + agent.base_url = "moa://local" if not agent.quiet_mode: print(f"🤖 AI Agent initialized with MoA preset: {agent.model}") elif agent.api_mode == "bedrock_converse": diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py index 1f7a3b0b60f..85c0388a1df 100644 --- a/agent/agent_runtime_helpers.py +++ b/agent/agent_runtime_helpers.py @@ -1549,7 +1549,14 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo ) # ── Build new client ── - if api_mode == "anthropic_messages": + if (new_provider or "").strip().lower() == "moa": + from agent.moa_loop import MoAClient + + agent.api_key = api_key or "moa-virtual-provider" + agent.base_url = "moa://local" + agent._client_kwargs = {} + agent.client = MoAClient(agent.model or "default") + elif api_mode == "anthropic_messages": from agent.anthropic_adapter import ( build_anthropic_client, resolve_anthropic_token, diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index 499d9411d48..680125c7611 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -247,6 +247,11 @@ def interruptible_api_call(agent, api_kwargs: dict): invalidate_runtime_client(region) raise result["response"] = normalize_converse_response(raw_response) + elif agent.provider == "moa": + # MoA is a virtual chat-completions provider backed by the + # in-process MoAClient facade. Do not rebuild a request-local + # OpenAI client from the virtual runtime metadata. + result["response"] = agent.client.chat.completions.create(**api_kwargs) else: request_client = _set_request_client( agent._create_request_openai_client( diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index c6f6db9fa75..c2e5e4b3d13 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -1404,7 +1404,7 @@ def resolve_runtime_provider( return { "provider": "moa", "api_mode": "chat_completions", - "base_url": "http://127.0.0.1/v1", + "base_url": "moa://local", "api_key": "moa-virtual-provider", "source": "moa-virtual-provider", "requested_provider": requested_provider, diff --git a/tests/run_agent/test_moa_loop_mode.py b/tests/run_agent/test_moa_loop_mode.py index af02cc1ff59..2bf5624df69 100644 --- a/tests/run_agent/test_moa_loop_mode.py +++ b/tests/run_agent/test_moa_loop_mode.py @@ -41,7 +41,7 @@ moa: agent = AIAgent( api_key="moa-virtual-provider", - base_url="moa://local", + base_url="http://127.0.0.1/v1", model="review", provider="moa", quiet_mode=True, @@ -50,10 +50,18 @@ moa: enabled_toolsets=["file"], max_iterations=1, ) + monkeypatch.setattr( + agent, + "_create_request_openai_client", + lambda *_args, **_kwargs: (_ for _ in ()).throw( + AssertionError("MoA calls must use MoAClient, not a request OpenAI client") + ), + ) result = agent.run_conversation("solve this") assert result["final_response"] == "aggregator acted" + assert agent.base_url == "moa://local" assert [(c["task"], c["provider"], c["model"]) for c in calls] == [ ("moa_reference", "openai-codex", "gpt-5.5"), ("moa_aggregator", "openrouter", "anthropic/claude-opus-4.8"), @@ -61,6 +69,16 @@ moa: assert calls[1]["tools"] is not None +def test_moa_runtime_provider_uses_virtual_endpoint(): + from hermes_cli.runtime_provider import resolve_runtime_provider + + runtime = resolve_runtime_provider(requested="moa", target_model="review") + + assert runtime["provider"] == "moa" + assert runtime["base_url"] == "moa://local" + assert runtime["api_key"] == "moa-virtual-provider" + + def test_moa_does_not_cap_output_tokens(monkeypatch, tmp_path): """MoA must not inject an output cap on reference or aggregator calls. @@ -459,4 +477,3 @@ def test_moa_facade_reruns_references_on_new_turn(monkeypatch, tmp_path): # 2 references × 2 distinct turns = 4 reference runs. assert len(ref_runs) == 4 -