From 3ac96d330892cf0e7d9ad0def1d23b9aa7d50c0f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 27 Jun 2026 14:21:26 -0700 Subject: [PATCH] fix(moa): resolve auxiliary tasks to the aggregator, not the preset name (#53827) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a MoA session, auxiliary tasks (title generation, compression, vision, …) ran through _resolve_auto with provider='moa' / model='', which sent the preset name (e.g. 'opus-gpt') as the model id to resolve_provider_client — producing 'HTTP 400: opus-gpt is not a valid model ID' on every turn (visible as the title-generation warning). MoA is a virtual provider with no real HTTP endpoint; aux tasks don't need the reference fan-out. _resolve_auto now resolves a 'moa' main provider to the preset's aggregator slot (its acting model) and continues Step 1 with that real provider+model, dropping the virtual moa://local base_url + placeholder key so the aggregator resolves via its own provider credentials. Mirrors the MoA context-length resolution. Verified live: a MoA turn no longer emits the 'not a valid model ID' warning. Test: tests/agent/test_auxiliary_main_first.py (19 pass). --- agent/auxiliary_client.py | 31 ++++++++++++ tests/agent/test_auxiliary_main_first.py | 60 ++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index a4fe065b6d3..9605f4e6cc5 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -3624,6 +3624,37 @@ def _resolve_auto( # config.yaml (auxiliary..provider) still win over this. main_provider = str(runtime_provider or _read_main_provider() or "") main_model = str(runtime_model or _read_main_model() or "") + + # MoA virtual provider: the "model" is a preset name (e.g. "opus-gpt") and + # there is no real "moa" HTTP endpoint, so resolving an aux client against + # provider="moa"/model= sends the preset name as the model id and + # the provider 400s ("opus-gpt is not a valid model ID"). Auxiliary tasks + # (title generation, compression, vision, …) don't need the reference + # fan-out — they should run on the aggregator, which is the preset's acting + # model. Resolve the MoA preset to its aggregator slot and continue Step 1 + # with that real provider+model. Mirrors the MoA context-length resolution. + if main_provider == "moa": + try: + from hermes_cli.config import load_config + from hermes_cli.moa_config import resolve_moa_preset + + _preset = resolve_moa_preset(load_config().get("moa") or {}, main_model) + _agg = _preset.get("aggregator") or {} + _agg_provider = str(_agg.get("provider") or "").strip() + _agg_model = str(_agg.get("model") or "").strip() + if _agg_provider and _agg_model and _agg_provider.lower() != "moa": + main_provider = _agg_provider + main_model = _agg_model + # The MoA virtual runtime carries a non-HTTP base_url + # ("moa://local") and a placeholder api_key; they belong to the + # facade, not the aggregator's real provider. Drop them so the + # aggregator resolves through its own provider credentials. + runtime_base_url = "" + runtime_api_key = "" + runtime_api_mode = "" + except Exception: + logger.debug("MoA aux resolution to aggregator failed", exc_info=True) + if (main_provider and main_model and main_provider not in {"auto", ""}): resolved_provider = main_provider diff --git a/tests/agent/test_auxiliary_main_first.py b/tests/agent/test_auxiliary_main_first.py index f8a681ebfa9..94181d468d4 100644 --- a/tests/agent/test_auxiliary_main_first.py +++ b/tests/agent/test_auxiliary_main_first.py @@ -51,6 +51,66 @@ class TestResolveAutoMainFirst: assert mock_resolve.call_args.args[0] == "openrouter" assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6" + def test_moa_main_resolves_aux_to_aggregator(self, monkeypatch, tmp_path): + """MoA main user → aux runs on the aggregator slot, NOT the preset name. + + provider='moa'/model='opus-gpt' would otherwise send the preset name + 'opus-gpt' as the model id and 400 ("not a valid model ID"). Aux tasks + don't need the reference fan-out — they use the aggregator (the preset's + acting model). The virtual moa://local base_url + placeholder key must + be dropped so the aggregator resolves via its own provider credentials. + """ + import yaml + + home = tmp_path / ".hermes" + home.mkdir() + (home / "config.yaml").write_text( + yaml.safe_dump( + { + "moa": { + "default_preset": "opus-gpt", + "presets": { + "opus-gpt": { + "enabled": True, + "reference_models": [{"provider": "openrouter", "model": "openai/gpt-5.5"}], + "aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"}, + } + }, + } + } + ) + ) + monkeypatch.setenv("HERMES_HOME", str(home)) + + with patch( + "agent.auxiliary_client.resolve_provider_client" + ) as mock_resolve, patch( + "agent.auxiliary_client._is_provider_unhealthy", return_value=False + ): + mock_client = MagicMock() + mock_resolve.return_value = (mock_client, "anthropic/claude-opus-4.8") + + from agent.auxiliary_client import _resolve_auto + + client, model = _resolve_auto( + main_runtime={ + "provider": "moa", + "model": "opus-gpt", + "base_url": "moa://local", + "api_key": "moa-virtual-provider", + "api_mode": "chat_completions", + }, + task="title_generation", + ) + + assert client is mock_client + # Resolved to the aggregator's real provider+model, not the preset name. + assert mock_resolve.call_args.args[0] == "openrouter" + assert mock_resolve.call_args.args[1] == "anthropic/claude-opus-4.8" + # The virtual moa://local endpoint must not be forwarded as the + # aggregator's base_url. + assert mock_resolve.call_args.kwargs.get("explicit_base_url") in (None, "") + def test_nous_main_uses_main_model_for_aux(self, monkeypatch): """Nous Portal main user → aux uses their picked Nous model, not free-tier MiMo.""" # No OPENROUTER_API_KEY → ensures if main failed we'd fall to chain