fix(moa): resolve auxiliary tasks to the aggregator, not the preset name (#53827)

On a MoA session, auxiliary tasks (title generation, compression, vision, …)
ran through _resolve_auto with provider='moa' / model='<preset>', which sent
the preset name (e.g. 'opus-gpt') as the model id to resolve_provider_client —
producing 'HTTP 400: opus-gpt is not a valid model ID' on every turn (visible
as the title-generation warning).

MoA is a virtual provider with no real HTTP endpoint; aux tasks don't need the
reference fan-out. _resolve_auto now resolves a 'moa' main provider to the
preset's aggregator slot (its acting model) and continues Step 1 with that real
provider+model, dropping the virtual moa://local base_url + placeholder key so
the aggregator resolves via its own provider credentials. Mirrors the MoA
context-length resolution.

Verified live: a MoA turn no longer emits the 'not a valid model ID' warning.
Test: tests/agent/test_auxiliary_main_first.py (19 pass).
This commit is contained in:
Teknium 2026-06-27 14:21:26 -07:00 committed by GitHub
parent e7bb67332d
commit 3ac96d3308
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 91 additions and 0 deletions

View file

@ -3624,6 +3624,37 @@ def _resolve_auto(
# config.yaml (auxiliary.<task>.provider) still win over this.
main_provider = str(runtime_provider or _read_main_provider() or "")
main_model = str(runtime_model or _read_main_model() or "")
# MoA virtual provider: the "model" is a preset name (e.g. "opus-gpt") and
# there is no real "moa" HTTP endpoint, so resolving an aux client against
# provider="moa"/model=<preset> sends the preset name as the model id and
# the provider 400s ("opus-gpt is not a valid model ID"). Auxiliary tasks
# (title generation, compression, vision, …) don't need the reference
# fan-out — they should run on the aggregator, which is the preset's acting
# model. Resolve the MoA preset to its aggregator slot and continue Step 1
# with that real provider+model. Mirrors the MoA context-length resolution.
if main_provider == "moa":
try:
from hermes_cli.config import load_config
from hermes_cli.moa_config import resolve_moa_preset
_preset = resolve_moa_preset(load_config().get("moa") or {}, main_model)
_agg = _preset.get("aggregator") or {}
_agg_provider = str(_agg.get("provider") or "").strip()
_agg_model = str(_agg.get("model") or "").strip()
if _agg_provider and _agg_model and _agg_provider.lower() != "moa":
main_provider = _agg_provider
main_model = _agg_model
# The MoA virtual runtime carries a non-HTTP base_url
# ("moa://local") and a placeholder api_key; they belong to the
# facade, not the aggregator's real provider. Drop them so the
# aggregator resolves through its own provider credentials.
runtime_base_url = ""
runtime_api_key = ""
runtime_api_mode = ""
except Exception:
logger.debug("MoA aux resolution to aggregator failed", exc_info=True)
if (main_provider and main_model
and main_provider not in {"auto", ""}):
resolved_provider = main_provider

View file

@ -51,6 +51,66 @@ class TestResolveAutoMainFirst:
assert mock_resolve.call_args.args[0] == "openrouter"
assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
def test_moa_main_resolves_aux_to_aggregator(self, monkeypatch, tmp_path):
"""MoA main user → aux runs on the aggregator slot, NOT the preset name.
provider='moa'/model='opus-gpt' would otherwise send the preset name
'opus-gpt' as the model id and 400 ("not a valid model ID"). Aux tasks
don't need the reference fan-out — they use the aggregator (the preset's
acting model). The virtual moa://local base_url + placeholder key must
be dropped so the aggregator resolves via its own provider credentials.
"""
import yaml
home = tmp_path / ".hermes"
home.mkdir()
(home / "config.yaml").write_text(
yaml.safe_dump(
{
"moa": {
"default_preset": "opus-gpt",
"presets": {
"opus-gpt": {
"enabled": True,
"reference_models": [{"provider": "openrouter", "model": "openai/gpt-5.5"}],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
}
},
}
}
)
)
monkeypatch.setenv("HERMES_HOME", str(home))
with patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve, patch(
"agent.auxiliary_client._is_provider_unhealthy", return_value=False
):
mock_client = MagicMock()
mock_resolve.return_value = (mock_client, "anthropic/claude-opus-4.8")
from agent.auxiliary_client import _resolve_auto
client, model = _resolve_auto(
main_runtime={
"provider": "moa",
"model": "opus-gpt",
"base_url": "moa://local",
"api_key": "moa-virtual-provider",
"api_mode": "chat_completions",
},
task="title_generation",
)
assert client is mock_client
# Resolved to the aggregator's real provider+model, not the preset name.
assert mock_resolve.call_args.args[0] == "openrouter"
assert mock_resolve.call_args.args[1] == "anthropic/claude-opus-4.8"
# The virtual moa://local endpoint must not be forwarded as the
# aggregator's base_url.
assert mock_resolve.call_args.kwargs.get("explicit_base_url") in (None, "")
def test_nous_main_uses_main_model_for_aux(self, monkeypatch):
"""Nous Portal main user → aux uses their picked Nous model, not free-tier MiMo."""
# No OPENROUTER_API_KEY → ensures if main failed we'd fall to chain