feat(auxiliary): default 'auto' routing to main model for all users (#11900)

Before: aggregator users (OpenRouter / Nous Portal) running 'auto'
routing for auxiliary tasks — compression, vision, web extraction,
session search, etc. — got routed to a cheap provider-side default
model (Gemini Flash).  Non-aggregator users already got their main
model.  Behavior was inconsistent and surprising — users picked
Claude / GPT / their preferred model, but side tasks ran on
Gemini Flash.

After: 'auto' means "use my main chat model" for every user,
regardless of provider type.  Only when the main provider has no
working client does the fallback chain run (OpenRouter → Nous →
custom → Codex → API-key providers).  Explicit per-task overrides
in config.yaml (auxiliary.<task>.provider / .model) still win —
they are a hard constraint, not subject to the auto policy.

Vision auto-detection follows the same policy: try main provider +
main model first (with _PROVIDER_VISION_MODELS overrides preserved
for providers like xiaomi and zai that ship a dedicated multimodal
model distinct from their chat model).  Aggregator strict vision
backends are fallbacks, not the primary path.

Changes:
  - agent/auxiliary_client.py: _resolve_auto() drops the
    `_AGGREGATOR_PROVIDERS` guard.  resolve_vision_provider_client()
    auto branch unifies aggregator and exotic-provider paths —
    everyone goes through resolve_provider_client() with main_model.
    Dead _AGGREGATOR_PROVIDERS constant removed (was only used by
    the guard we just removed).
  - hermes_cli/main.py: aux config menu copy updated to reflect
    the new semantics ("'auto' means 'use my main model'").
  - tests/agent/test_auxiliary_main_first.py: 12 regression tests
    covering OpenRouter/Nous/DeepSeek main paths, runtime-override
    wins, explicit-config wins, vision override preservation for
    exotic providers, and fallback-chain activation when the main
    provider has no working client.

Co-authored-by: teknium1 <teknium@nousresearch.com>
This commit is contained in:
Teknium 2026-04-17 19:13:23 -07:00 committed by GitHub
parent b449a0e049
commit a155b4a159
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 352 additions and 37 deletions

View file

@ -1075,8 +1075,6 @@ _AUTO_PROVIDER_LABELS = {
"_resolve_api_key_provider": "api-key",
}
_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
@ -1207,11 +1205,15 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
"""Full auto-detection chain.
Priority:
1. If the user's main provider is NOT an aggregator (OpenRouter / Nous),
use their main provider + main model directly. This ensures users on
Alibaba, DeepSeek, ZAI, etc. get auxiliary tasks handled by the same
provider they already have credentials for no OpenRouter key needed.
2. OpenRouter Nous custom Codex API-key providers (original chain).
1. User's main provider + main model, regardless of provider type.
This means auxiliary tasks (compression, vision, web extraction,
session search, etc.) use the same model the user configured for
chat. Users on OpenRouter/Nous get their chosen chat model; users
on DeepSeek/ZAI/Alibaba get theirs; etc. Running aux tasks on the
user's picked model keeps behavior predictable — no surprise
switches to a cheap fallback model for side tasks.
2. OpenRouter Nous custom Codex API-key providers (fallback
chain, only used when the main provider has no working client).
"""
global auxiliary_is_nous, _stale_base_url_warned
auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins
@ -1241,11 +1243,16 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
)
_stale_base_url_warned = True
# ── Step 1: non-aggregator main provider → use main model directly ──
# ── Step 1: main provider + main model → use them directly ──
#
# This is the primary aux backend for every user. "auto" means
# "use my main chat model for side tasks as well" — including users
# on aggregators (OpenRouter, Nous) who previously got routed to a
# cheap provider-side default. Explicit per-task overrides set via
# config.yaml (auxiliary.<task>.provider) still win over this.
main_provider = runtime_provider or _read_main_provider()
main_model = runtime_model or _read_main_model()
if (main_provider and main_model
and main_provider not in _AGGREGATOR_PROVIDERS
and main_provider not in ("auto", "")):
resolved_provider = main_provider
explicit_base_url = None
@ -1828,34 +1835,31 @@ def resolve_vision_provider_client(
if requested == "auto":
# Vision auto-detection order:
# 1. Active provider + model (user's main chat config)
# 2. OpenRouter (known vision-capable default model)
# 3. Nous Portal (known vision-capable default model)
# 1. User's main provider + main model (including aggregators).
# _PROVIDER_VISION_MODELS provides per-provider vision model
# overrides when the provider has a dedicated multimodal model
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
# zai → glm-5v-turbo).
# 2. OpenRouter (vision-capable aggregator fallback)
# 3. Nous Portal (vision-capable aggregator fallback)
# 4. Stop
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in ("auto", ""):
if main_provider in _VISION_AUTO_PROVIDER_ORDER:
# Known strict backend — use its defaults.
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
if sync_client is not None:
return _finalize(main_provider, sync_client, default_model)
else:
# Exotic provider (DeepSeek, Alibaba, Xiaomi, named custom, etc.)
# Use provider-specific vision model if available, otherwise main model.
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using active provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
# Fall back through aggregators.
# Fall back through aggregators (uses their dedicated vision model,
# not the user's main model) when main provider has no client.
for candidate in _VISION_AUTO_PROVIDER_ORDER:
if candidate == main_provider:
continue # already tried above

View file

@ -1705,11 +1705,11 @@ def _aux_config_menu() -> None:
print()
print(" Auxiliary models — side-task routing")
print()
print(" Hermes uses small, fast models for vision, compression, web")
print(" extraction, and other side tasks. \"auto\" lets Hermes pick the")
print(" best available backend automatically (OpenRouter → Nous Portal")
print(" → your main provider). You rarely need to change these —")
print(" override only if you want a specific model for a task.")
print(" Side tasks (vision, compression, web extraction, etc.) default")
print(" to your main chat model. \"auto\" means \"use my main model\"")
print(" Hermes only falls back to a lightweight backend (OpenRouter,")
print(" Nous Portal) if the main model is unavailable. Override a")
print(" task below if you want it pinned to a specific provider/model.")
print()
# Build the task menu with current settings inline

View file

@ -0,0 +1,311 @@
"""Regression tests for the ``auto`` → main-model-first policy.
Prior to this change, aggregator users (OpenRouter / Nous Portal) had aux
tasks routed through a cheap provider-side default (Gemini Flash) while
non-aggregator users got their main model. This made behavior inconsistent
and surprising users picked Claude but got Gemini Flash summaries.
The current policy: ``auto`` means "use my main chat model" for every user,
regardless of provider type. Explicit per-task overrides in ``config.yaml``
(``auxiliary.<task>.provider``) still win. The cheap fallback chain only
runs when the main provider has no working client.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
# ── Text aux tasks — _resolve_auto ──────────────────────────────────────────
class TestResolveAutoMainFirst:
"""_resolve_auto() must prefer main provider + main model for every user."""
def test_openrouter_main_uses_main_model_for_aux(self, monkeypatch):
"""OpenRouter main user → aux uses their picked OR model, not Gemini Flash."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
with patch(
"agent.auxiliary_client._read_main_provider",
return_value="openrouter",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="anthropic/claude-sonnet-4.6",
), patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve:
mock_client = MagicMock()
mock_resolve.return_value = (mock_client, "anthropic/claude-sonnet-4.6")
from agent.auxiliary_client import _resolve_auto
client, model = _resolve_auto()
assert client is mock_client
assert model == "anthropic/claude-sonnet-4.6"
# Verify it asked resolve_provider_client for the MAIN provider+model,
# not a fallback-chain provider
mock_resolve.assert_called_once()
assert mock_resolve.call_args.args[0] == "openrouter"
assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
def test_nous_main_uses_main_model_for_aux(self, monkeypatch):
"""Nous Portal main user → aux uses their picked Nous model, not free-tier MiMo."""
# No OPENROUTER_API_KEY → ensures if main failed we'd fall to chain
with patch(
"agent.auxiliary_client._read_main_provider", return_value="nous",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="anthropic/claude-opus-4.6",
), patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve:
mock_client = MagicMock()
mock_resolve.return_value = (mock_client, "anthropic/claude-opus-4.6")
from agent.auxiliary_client import _resolve_auto
client, model = _resolve_auto()
assert client is mock_client
assert model == "anthropic/claude-opus-4.6"
assert mock_resolve.call_args.args[0] == "nous"
def test_non_aggregator_main_still_uses_main(self, monkeypatch):
"""Non-aggregator main (DeepSeek) → unchanged behavior, main model used."""
monkeypatch.setenv("DEEPSEEK_API_KEY", "ds-test")
with patch(
"agent.auxiliary_client._read_main_provider", return_value="deepseek",
), patch(
"agent.auxiliary_client._read_main_model", return_value="deepseek-chat",
), patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve:
mock_client = MagicMock()
mock_resolve.return_value = (mock_client, "deepseek-chat")
from agent.auxiliary_client import _resolve_auto
client, model = _resolve_auto()
assert client is mock_client
assert model == "deepseek-chat"
assert mock_resolve.call_args.args[0] == "deepseek"
def test_main_unavailable_falls_through_to_chain(self, monkeypatch):
"""Main provider with no working client → fall back to aux chain."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
chain_client = MagicMock()
with patch(
"agent.auxiliary_client._read_main_provider", return_value="anthropic",
), patch(
"agent.auxiliary_client._read_main_model", return_value="claude-opus",
), patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(None, None), # main provider has no client
), patch(
"agent.auxiliary_client._try_openrouter",
return_value=(chain_client, "google/gemini-3-flash-preview"),
):
from agent.auxiliary_client import _resolve_auto
client, model = _resolve_auto()
assert client is chain_client
assert model == "google/gemini-3-flash-preview"
def test_no_main_config_uses_chain_directly(self):
"""No main provider configured → skip step 1, use chain (no regression)."""
chain_client = MagicMock()
with patch(
"agent.auxiliary_client._read_main_provider", return_value="",
), patch(
"agent.auxiliary_client._read_main_model", return_value="",
), patch(
"agent.auxiliary_client._try_openrouter",
return_value=(chain_client, "google/gemini-3-flash-preview"),
):
from agent.auxiliary_client import _resolve_auto
client, model = _resolve_auto()
assert client is chain_client
def test_runtime_override_wins_over_config(self, monkeypatch):
"""main_runtime kwarg overrides config-read main provider/model."""
with patch(
"agent.auxiliary_client._read_main_provider",
return_value="openrouter",
), patch(
"agent.auxiliary_client._read_main_model", return_value="config-model",
), patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve:
mock_resolve.return_value = (MagicMock(), "runtime-model")
from agent.auxiliary_client import _resolve_auto
_resolve_auto(main_runtime={
"provider": "anthropic",
"model": "runtime-model",
"base_url": "",
"api_key": "",
"api_mode": "",
})
# Runtime override wins
assert mock_resolve.call_args.args[0] == "anthropic"
assert mock_resolve.call_args.args[1] == "runtime-model"
# ── Vision — resolve_vision_provider_client ─────────────────────────────────
class TestResolveVisionMainFirst:
"""Vision auto-detection prefers main provider + main model first."""
def test_openrouter_main_vision_uses_main_model(self, monkeypatch):
"""OpenRouter main with vision-capable model → aux vision uses main model."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
with patch(
"agent.auxiliary_client._read_main_provider", return_value="openrouter",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="anthropic/claude-sonnet-4.6",
), patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve, patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
):
mock_client = MagicMock()
mock_resolve.return_value = (mock_client, "anthropic/claude-sonnet-4.6")
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "openrouter"
assert client is mock_client
assert model == "anthropic/claude-sonnet-4.6"
# Verify it did NOT call the strict vision backend for OpenRouter
# (which would have used a cheap gemini-flash-preview default)
mock_resolve.assert_called_once()
assert mock_resolve.call_args.args[0] == "openrouter"
assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
def test_nous_main_vision_uses_main_model(self):
"""Nous Portal main → aux vision uses main model, not free-tier MiMo-V2-Omni."""
with patch(
"agent.auxiliary_client._read_main_provider", return_value="nous",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="openai/gpt-5",
), patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve, patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
):
mock_client = MagicMock()
mock_resolve.return_value = (mock_client, "openai/gpt-5")
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "nous"
assert model == "openai/gpt-5"
def test_exotic_provider_with_vision_override_preserved(self):
"""xiaomi → mimo-v2-omni override still wins over main_model."""
with patch(
"agent.auxiliary_client._read_main_provider", return_value="xiaomi",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="mimo-v2-pro", # text model
), patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve, patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
):
mock_resolve.return_value = (MagicMock(), "mimo-v2-omni")
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "xiaomi"
# Should use mimo-v2-omni (vision override), not mimo-v2-pro (text main)
assert mock_resolve.call_args.args[1] == "mimo-v2-omni"
def test_main_unavailable_vision_falls_through_to_aggregators(self):
"""Main provider fails → fall back to OpenRouter/Nous strict backends."""
fallback_client = MagicMock()
with patch(
"agent.auxiliary_client._read_main_provider", return_value="deepseek",
), patch(
"agent.auxiliary_client._read_main_model", return_value="deepseek-chat",
), patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(None, None),
), patch(
"agent.auxiliary_client._resolve_strict_vision_backend",
return_value=(fallback_client, "google/gemini-3-flash-preview"),
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
):
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert client is fallback_client
assert provider in ("openrouter", "nous")
def test_explicit_provider_override_still_wins(self):
"""Explicit config override bypasses main-first policy."""
with patch(
"agent.auxiliary_client._read_main_provider", return_value="openrouter",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="anthropic/claude-opus-4.6",
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("nous", None, None, None, None), # explicit override
), patch(
"agent.auxiliary_client._resolve_strict_vision_backend"
) as mock_strict:
mock_strict.return_value = (MagicMock(), "nous-default-model")
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
# Explicit "nous" override → uses strict backend, NOT main model path
assert provider == "nous"
mock_strict.assert_called_once_with("nous")
# ── Constant cleanup ────────────────────────────────────────────────────────
def test_aggregator_providers_constant_removed():
"""The dead _AGGREGATOR_PROVIDERS constant should no longer live in the module.
Removed when the main-first policy made the aggregator-skip guard obsolete.
"""
import agent.auxiliary_client as aux_mod
assert not hasattr(aux_mod, "_AGGREGATOR_PROVIDERS"), (
"_AGGREGATOR_PROVIDERS was removed when _resolve_auto stopped "
"treating aggregators specially. If you re-added it, the main-first "
"policy may have regressed."
)