fix(moa): resolve auxiliary tasks to the aggregator, not the preset name (#53827)

On a MoA session, auxiliary tasks (title generation, compression, vision, …) ran through _resolve_auto with provider='moa' / model='<preset>', which sent the preset name (e.g. 'opus-gpt') as the model id to resolve_provider_client — producing 'HTTP 400: opus-gpt is not a valid model ID' on every turn (visible as the title-generation warning). MoA is a virtual provider with no real HTTP endpoint; aux tasks don't need the reference fan-out. _resolve_auto now resolves a 'moa' main provider to the preset's aggregator slot (its acting model) and continues Step 1 with that real provider+model, dropping the virtual moa://local base_url + placeholder key so the aggregator resolves via its own provider credentials. Mirrors the MoA context-length resolution. Verified live: a MoA turn no longer emits the 'not a valid model ID' warning. Test: tests/agent/test_auxiliary_main_first.py (19 pass).
2026-07-01 12:02:05 +00:00 · 2026-06-27 14:21:26 -07:00 · 2026-06-27 14:21:26 -07:00 · 3ac96d3308
commit 3ac96d3308
parent e7bb67332d
2 changed files with 91 additions and 0 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -3624,6 +3624,37 @@ def _resolve_auto(
    # config.yaml (auxiliary.<task>.provider) still win over this.
    main_provider = str(runtime_provider or _read_main_provider() or "")
    main_model = str(runtime_model or _read_main_model() or "")
+
+    # MoA virtual provider: the "model" is a preset name (e.g. "opus-gpt") and
+    # there is no real "moa" HTTP endpoint, so resolving an aux client against
+    # provider="moa"/model=<preset> sends the preset name as the model id and
+    # the provider 400s ("opus-gpt is not a valid model ID"). Auxiliary tasks
+    # (title generation, compression, vision, …) don't need the reference
+    # fan-out — they should run on the aggregator, which is the preset's acting
+    # model. Resolve the MoA preset to its aggregator slot and continue Step 1
+    # with that real provider+model. Mirrors the MoA context-length resolution.
+    if main_provider == "moa":
+        try:
+            from hermes_cli.config import load_config
+            from hermes_cli.moa_config import resolve_moa_preset
+
+            _preset = resolve_moa_preset(load_config().get("moa") or {}, main_model)
+            _agg = _preset.get("aggregator") or {}
+            _agg_provider = str(_agg.get("provider") or "").strip()
+            _agg_model = str(_agg.get("model") or "").strip()
+            if _agg_provider and _agg_model and _agg_provider.lower() != "moa":
+                main_provider = _agg_provider
+                main_model = _agg_model
+                # The MoA virtual runtime carries a non-HTTP base_url
+                # ("moa://local") and a placeholder api_key; they belong to the
+                # facade, not the aggregator's real provider. Drop them so the
+                # aggregator resolves through its own provider credentials.
+                runtime_base_url = ""
+                runtime_api_key = ""
+                runtime_api_mode = ""
+        except Exception:
+            logger.debug("MoA aux resolution to aggregator failed", exc_info=True)
+
    if (main_provider and main_model
            and main_provider not in {"auto", ""}):
        resolved_provider = main_provider
--- a/tests/agent/test_auxiliary_main_first.py
+++ b/tests/agent/test_auxiliary_main_first.py
@ -51,6 +51,66 @@ class TestResolveAutoMainFirst:
        assert mock_resolve.call_args.args[0] == "openrouter"
        assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"

+    def test_moa_main_resolves_aux_to_aggregator(self, monkeypatch, tmp_path):
+        """MoA main user → aux runs on the aggregator slot, NOT the preset name.
+
+        provider='moa'/model='opus-gpt' would otherwise send the preset name
+        'opus-gpt' as the model id and 400 ("not a valid model ID"). Aux tasks
+        don't need the reference fan-out — they use the aggregator (the preset's
+        acting model). The virtual moa://local base_url + placeholder key must
+        be dropped so the aggregator resolves via its own provider credentials.
+        """
+        import yaml
+
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        (home / "config.yaml").write_text(
+            yaml.safe_dump(
+                {
+                    "moa": {
+                        "default_preset": "opus-gpt",
+                        "presets": {
+                            "opus-gpt": {
+                                "enabled": True,
+                                "reference_models": [{"provider": "openrouter", "model": "openai/gpt-5.5"}],
+                                "aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
+                            }
+                        },
+                    }
+                }
+            )
+        )
+        monkeypatch.setenv("HERMES_HOME", str(home))
+
+        with patch(
+            "agent.auxiliary_client.resolve_provider_client"
+        ) as mock_resolve, patch(
+            "agent.auxiliary_client._is_provider_unhealthy", return_value=False
+        ):
+            mock_client = MagicMock()
+            mock_resolve.return_value = (mock_client, "anthropic/claude-opus-4.8")
+
+            from agent.auxiliary_client import _resolve_auto
+
+            client, model = _resolve_auto(
+                main_runtime={
+                    "provider": "moa",
+                    "model": "opus-gpt",
+                    "base_url": "moa://local",
+                    "api_key": "moa-virtual-provider",
+                    "api_mode": "chat_completions",
+                },
+                task="title_generation",
+            )
+
+        assert client is mock_client
+        # Resolved to the aggregator's real provider+model, not the preset name.
+        assert mock_resolve.call_args.args[0] == "openrouter"
+        assert mock_resolve.call_args.args[1] == "anthropic/claude-opus-4.8"
+        # The virtual moa://local endpoint must not be forwarded as the
+        # aggregator's base_url.
+        assert mock_resolve.call_args.kwargs.get("explicit_base_url") in (None, "")
+
    def test_nous_main_uses_main_model_for_aux(self, monkeypatch):
        """Nous Portal main user → aux uses their picked Nous model, not free-tier MiMo."""
        # No OPENROUTER_API_KEY → ensures if main failed we'd fall to chain