fix(moa): keep virtual provider on MoA client

2026-07-01 12:02:05 +00:00 · 2026-06-27 14:03:43 -06:00 · 2026-06-27 14:03:43 -06:00 · 66aeda3550
commit 66aeda3550
parent 5db1430af9
5 changed files with 34 additions and 5 deletions
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@ -765,7 +765,7 @@ def init_agent(
        )
        agent._client_kwargs = {}
        agent.api_key = api_key or "moa-virtual-provider"
-        agent.base_url = base_url or "moa://local"
+        agent.base_url = "moa://local"
        if not agent.quiet_mode:
            print(f"🤖 AI Agent initialized with MoA preset: {agent.model}")
    elif agent.api_mode == "bedrock_converse":
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@ -1549,7 +1549,14 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
                )

        # ── Build new client ──
-        if api_mode == "anthropic_messages":
+        if (new_provider or "").strip().lower() == "moa":
+            from agent.moa_loop import MoAClient
+
+            agent.api_key = api_key or "moa-virtual-provider"
+            agent.base_url = "moa://local"
+            agent._client_kwargs = {}
+            agent.client = MoAClient(agent.model or "default")
+        elif api_mode == "anthropic_messages":
            from agent.anthropic_adapter import (
                build_anthropic_client,
                resolve_anthropic_token,
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@ -247,6 +247,11 @@ def interruptible_api_call(agent, api_kwargs: dict):
                        invalidate_runtime_client(region)
                    raise
                result["response"] = normalize_converse_response(raw_response)
+            elif agent.provider == "moa":
+                # MoA is a virtual chat-completions provider backed by the
+                # in-process MoAClient facade. Do not rebuild a request-local
+                # OpenAI client from the virtual runtime metadata.
+                result["response"] = agent.client.chat.completions.create(**api_kwargs)
            else:
                request_client = _set_request_client(
                    agent._create_request_openai_client(
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -1404,7 +1404,7 @@ def resolve_runtime_provider(
        return {
            "provider": "moa",
            "api_mode": "chat_completions",
-            "base_url": "http://127.0.0.1/v1",
+            "base_url": "moa://local",
            "api_key": "moa-virtual-provider",
            "source": "moa-virtual-provider",
            "requested_provider": requested_provider,
--- a/tests/run_agent/test_moa_loop_mode.py
+++ b/tests/run_agent/test_moa_loop_mode.py
@ -41,7 +41,7 @@ moa:

    agent = AIAgent(
        api_key="moa-virtual-provider",
-        base_url="moa://local",
+        base_url="http://127.0.0.1/v1",
        model="review",
        provider="moa",
        quiet_mode=True,
@ -50,10 +50,18 @@ moa:
        enabled_toolsets=["file"],
        max_iterations=1,
    )
+    monkeypatch.setattr(
+        agent,
+        "_create_request_openai_client",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("MoA calls must use MoAClient, not a request OpenAI client")
+        ),
+    )

    result = agent.run_conversation("solve this")

    assert result["final_response"] == "aggregator acted"
+    assert agent.base_url == "moa://local"
    assert [(c["task"], c["provider"], c["model"]) for c in calls] == [
        ("moa_reference", "openai-codex", "gpt-5.5"),
        ("moa_aggregator", "openrouter", "anthropic/claude-opus-4.8"),
@ -61,6 +69,16 @@ moa:
    assert calls[1]["tools"] is not None


+def test_moa_runtime_provider_uses_virtual_endpoint():
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+
+    runtime = resolve_runtime_provider(requested="moa", target_model="review")
+
+    assert runtime["provider"] == "moa"
+    assert runtime["base_url"] == "moa://local"
+    assert runtime["api_key"] == "moa-virtual-provider"
+
+
 def test_moa_does_not_cap_output_tokens(monkeypatch, tmp_path):
    """MoA must not inject an output cap on reference or aggregator calls.

@ -459,4 +477,3 @@ def test_moa_facade_reruns_references_on_new_turn(monkeypatch, tmp_path):

    # 2 references × 2 distinct turns = 4 reference runs.
    assert len(ref_runs) == 4
-