mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(auxiliary): default 'auto' routing to main model for all users (#11900)
Before: aggregator users (OpenRouter / Nous Portal) running 'auto'
routing for auxiliary tasks — compression, vision, web extraction,
session search, etc. — got routed to a cheap provider-side default
model (Gemini Flash). Non-aggregator users already got their main
model. Behavior was inconsistent and surprising — users picked
Claude / GPT / their preferred model, but side tasks ran on
Gemini Flash.
After: 'auto' means "use my main chat model" for every user,
regardless of provider type. Only when the main provider has no
working client does the fallback chain run (OpenRouter → Nous →
custom → Codex → API-key providers). Explicit per-task overrides
in config.yaml (auxiliary.<task>.provider / .model) still win —
they are a hard constraint, not subject to the auto policy.
Vision auto-detection follows the same policy: try main provider +
main model first (with _PROVIDER_VISION_MODELS overrides preserved
for providers like xiaomi and zai that ship a dedicated multimodal
model distinct from their chat model). Aggregator strict vision
backends are fallbacks, not the primary path.
Changes:
- agent/auxiliary_client.py: _resolve_auto() drops the
`_AGGREGATOR_PROVIDERS` guard. resolve_vision_provider_client()
auto branch unifies aggregator and exotic-provider paths —
everyone goes through resolve_provider_client() with main_model.
Dead _AGGREGATOR_PROVIDERS constant removed (was only used by
the guard we just removed).
- hermes_cli/main.py: aux config menu copy updated to reflect
the new semantics ("'auto' means 'use my main model'").
- tests/agent/test_auxiliary_main_first.py: 12 regression tests
covering OpenRouter/Nous/DeepSeek main paths, runtime-override
wins, explicit-config wins, vision override preservation for
exotic providers, and fallback-chain activation when the main
provider has no working client.
Co-authored-by: teknium1 <teknium@nousresearch.com>
This commit is contained in:
parent
b449a0e049
commit
a155b4a159
3 changed files with 352 additions and 37 deletions
|
|
@ -1075,8 +1075,6 @@ _AUTO_PROVIDER_LABELS = {
|
|||
"_resolve_api_key_provider": "api-key",
|
||||
}
|
||||
|
||||
_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
|
||||
|
||||
_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
|
||||
|
||||
|
||||
|
|
@ -1207,11 +1205,15 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
|
|||
"""Full auto-detection chain.
|
||||
|
||||
Priority:
|
||||
1. If the user's main provider is NOT an aggregator (OpenRouter / Nous),
|
||||
use their main provider + main model directly. This ensures users on
|
||||
Alibaba, DeepSeek, ZAI, etc. get auxiliary tasks handled by the same
|
||||
provider they already have credentials for — no OpenRouter key needed.
|
||||
2. OpenRouter → Nous → custom → Codex → API-key providers (original chain).
|
||||
1. User's main provider + main model, regardless of provider type.
|
||||
This means auxiliary tasks (compression, vision, web extraction,
|
||||
session search, etc.) use the same model the user configured for
|
||||
chat. Users on OpenRouter/Nous get their chosen chat model; users
|
||||
on DeepSeek/ZAI/Alibaba get theirs; etc. Running aux tasks on the
|
||||
user's picked model keeps behavior predictable — no surprise
|
||||
switches to a cheap fallback model for side tasks.
|
||||
2. OpenRouter → Nous → custom → Codex → API-key providers (fallback
|
||||
chain, only used when the main provider has no working client).
|
||||
"""
|
||||
global auxiliary_is_nous, _stale_base_url_warned
|
||||
auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins
|
||||
|
|
@ -1241,11 +1243,16 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
|
|||
)
|
||||
_stale_base_url_warned = True
|
||||
|
||||
# ── Step 1: non-aggregator main provider → use main model directly ──
|
||||
# ── Step 1: main provider + main model → use them directly ──
|
||||
#
|
||||
# This is the primary aux backend for every user. "auto" means
|
||||
# "use my main chat model for side tasks as well" — including users
|
||||
# on aggregators (OpenRouter, Nous) who previously got routed to a
|
||||
# cheap provider-side default. Explicit per-task overrides set via
|
||||
# config.yaml (auxiliary.<task>.provider) still win over this.
|
||||
main_provider = runtime_provider or _read_main_provider()
|
||||
main_model = runtime_model or _read_main_model()
|
||||
if (main_provider and main_model
|
||||
and main_provider not in _AGGREGATOR_PROVIDERS
|
||||
and main_provider not in ("auto", "")):
|
||||
resolved_provider = main_provider
|
||||
explicit_base_url = None
|
||||
|
|
@ -1828,34 +1835,31 @@ def resolve_vision_provider_client(
|
|||
|
||||
if requested == "auto":
|
||||
# Vision auto-detection order:
|
||||
# 1. Active provider + model (user's main chat config)
|
||||
# 2. OpenRouter (known vision-capable default model)
|
||||
# 3. Nous Portal (known vision-capable default model)
|
||||
# 1. User's main provider + main model (including aggregators).
|
||||
# _PROVIDER_VISION_MODELS provides per-provider vision model
|
||||
# overrides when the provider has a dedicated multimodal model
|
||||
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
|
||||
# zai → glm-5v-turbo).
|
||||
# 2. OpenRouter (vision-capable aggregator fallback)
|
||||
# 3. Nous Portal (vision-capable aggregator fallback)
|
||||
# 4. Stop
|
||||
main_provider = _read_main_provider()
|
||||
main_model = _read_main_model()
|
||||
if main_provider and main_provider not in ("auto", ""):
|
||||
if main_provider in _VISION_AUTO_PROVIDER_ORDER:
|
||||
# Known strict backend — use its defaults.
|
||||
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
|
||||
if sync_client is not None:
|
||||
return _finalize(main_provider, sync_client, default_model)
|
||||
else:
|
||||
# Exotic provider (DeepSeek, Alibaba, Xiaomi, named custom, etc.)
|
||||
# Use provider-specific vision model if available, otherwise main model.
|
||||
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, vision_model,
|
||||
api_mode=resolved_api_mode)
|
||||
if rpc_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using active provider %s (%s)",
|
||||
main_provider, rpc_model or vision_model,
|
||||
)
|
||||
return _finalize(
|
||||
main_provider, rpc_client, rpc_model or vision_model)
|
||||
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, vision_model,
|
||||
api_mode=resolved_api_mode)
|
||||
if rpc_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using main provider %s (%s)",
|
||||
main_provider, rpc_model or vision_model,
|
||||
)
|
||||
return _finalize(
|
||||
main_provider, rpc_client, rpc_model or vision_model)
|
||||
|
||||
# Fall back through aggregators.
|
||||
# Fall back through aggregators (uses their dedicated vision model,
|
||||
# not the user's main model) when main provider has no client.
|
||||
for candidate in _VISION_AUTO_PROVIDER_ORDER:
|
||||
if candidate == main_provider:
|
||||
continue # already tried above
|
||||
|
|
|
|||
|
|
@ -1705,11 +1705,11 @@ def _aux_config_menu() -> None:
|
|||
print()
|
||||
print(" Auxiliary models — side-task routing")
|
||||
print()
|
||||
print(" Hermes uses small, fast models for vision, compression, web")
|
||||
print(" extraction, and other side tasks. \"auto\" lets Hermes pick the")
|
||||
print(" best available backend automatically (OpenRouter → Nous Portal")
|
||||
print(" → your main provider). You rarely need to change these —")
|
||||
print(" override only if you want a specific model for a task.")
|
||||
print(" Side tasks (vision, compression, web extraction, etc.) default")
|
||||
print(" to your main chat model. \"auto\" means \"use my main model\" —")
|
||||
print(" Hermes only falls back to a lightweight backend (OpenRouter,")
|
||||
print(" Nous Portal) if the main model is unavailable. Override a")
|
||||
print(" task below if you want it pinned to a specific provider/model.")
|
||||
print()
|
||||
|
||||
# Build the task menu with current settings inline
|
||||
|
|
|
|||
311
tests/agent/test_auxiliary_main_first.py
Normal file
311
tests/agent/test_auxiliary_main_first.py
Normal file
|
|
@ -0,0 +1,311 @@
|
|||
"""Regression tests for the ``auto`` → main-model-first policy.
|
||||
|
||||
Prior to this change, aggregator users (OpenRouter / Nous Portal) had aux
|
||||
tasks routed through a cheap provider-side default (Gemini Flash) while
|
||||
non-aggregator users got their main model. This made behavior inconsistent
|
||||
and surprising — users picked Claude but got Gemini Flash summaries.
|
||||
|
||||
The current policy: ``auto`` means "use my main chat model" for every user,
|
||||
regardless of provider type. Explicit per-task overrides in ``config.yaml``
|
||||
(``auxiliary.<task>.provider``) still win. The cheap fallback chain only
|
||||
runs when the main provider has no working client.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ── Text aux tasks — _resolve_auto ──────────────────────────────────────────
|
||||
|
||||
|
||||
class TestResolveAutoMainFirst:
|
||||
"""_resolve_auto() must prefer main provider + main model for every user."""
|
||||
|
||||
def test_openrouter_main_uses_main_model_for_aux(self, monkeypatch):
|
||||
"""OpenRouter main user → aux uses their picked OR model, not Gemini Flash."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider",
|
||||
return_value="openrouter",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="anthropic/claude-sonnet-4.6",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client"
|
||||
) as mock_resolve:
|
||||
mock_client = MagicMock()
|
||||
mock_resolve.return_value = (mock_client, "anthropic/claude-sonnet-4.6")
|
||||
|
||||
from agent.auxiliary_client import _resolve_auto
|
||||
|
||||
client, model = _resolve_auto()
|
||||
|
||||
assert client is mock_client
|
||||
assert model == "anthropic/claude-sonnet-4.6"
|
||||
# Verify it asked resolve_provider_client for the MAIN provider+model,
|
||||
# not a fallback-chain provider
|
||||
mock_resolve.assert_called_once()
|
||||
assert mock_resolve.call_args.args[0] == "openrouter"
|
||||
assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
|
||||
|
||||
def test_nous_main_uses_main_model_for_aux(self, monkeypatch):
|
||||
"""Nous Portal main user → aux uses their picked Nous model, not free-tier MiMo."""
|
||||
# No OPENROUTER_API_KEY → ensures if main failed we'd fall to chain
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="nous",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="anthropic/claude-opus-4.6",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client"
|
||||
) as mock_resolve:
|
||||
mock_client = MagicMock()
|
||||
mock_resolve.return_value = (mock_client, "anthropic/claude-opus-4.6")
|
||||
|
||||
from agent.auxiliary_client import _resolve_auto
|
||||
|
||||
client, model = _resolve_auto()
|
||||
|
||||
assert client is mock_client
|
||||
assert model == "anthropic/claude-opus-4.6"
|
||||
assert mock_resolve.call_args.args[0] == "nous"
|
||||
|
||||
def test_non_aggregator_main_still_uses_main(self, monkeypatch):
|
||||
"""Non-aggregator main (DeepSeek) → unchanged behavior, main model used."""
|
||||
monkeypatch.setenv("DEEPSEEK_API_KEY", "ds-test")
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="deepseek",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model", return_value="deepseek-chat",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client"
|
||||
) as mock_resolve:
|
||||
mock_client = MagicMock()
|
||||
mock_resolve.return_value = (mock_client, "deepseek-chat")
|
||||
|
||||
from agent.auxiliary_client import _resolve_auto
|
||||
|
||||
client, model = _resolve_auto()
|
||||
|
||||
assert client is mock_client
|
||||
assert model == "deepseek-chat"
|
||||
assert mock_resolve.call_args.args[0] == "deepseek"
|
||||
|
||||
def test_main_unavailable_falls_through_to_chain(self, monkeypatch):
|
||||
"""Main provider with no working client → fall back to aux chain."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
chain_client = MagicMock()
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="anthropic",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model", return_value="claude-opus",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client",
|
||||
return_value=(None, None), # main provider has no client
|
||||
), patch(
|
||||
"agent.auxiliary_client._try_openrouter",
|
||||
return_value=(chain_client, "google/gemini-3-flash-preview"),
|
||||
):
|
||||
from agent.auxiliary_client import _resolve_auto
|
||||
|
||||
client, model = _resolve_auto()
|
||||
|
||||
assert client is chain_client
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
|
||||
def test_no_main_config_uses_chain_directly(self):
|
||||
"""No main provider configured → skip step 1, use chain (no regression)."""
|
||||
chain_client = MagicMock()
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model", return_value="",
|
||||
), patch(
|
||||
"agent.auxiliary_client._try_openrouter",
|
||||
return_value=(chain_client, "google/gemini-3-flash-preview"),
|
||||
):
|
||||
from agent.auxiliary_client import _resolve_auto
|
||||
|
||||
client, model = _resolve_auto()
|
||||
|
||||
assert client is chain_client
|
||||
|
||||
def test_runtime_override_wins_over_config(self, monkeypatch):
|
||||
"""main_runtime kwarg overrides config-read main provider/model."""
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider",
|
||||
return_value="openrouter",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model", return_value="config-model",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client"
|
||||
) as mock_resolve:
|
||||
mock_resolve.return_value = (MagicMock(), "runtime-model")
|
||||
|
||||
from agent.auxiliary_client import _resolve_auto
|
||||
|
||||
_resolve_auto(main_runtime={
|
||||
"provider": "anthropic",
|
||||
"model": "runtime-model",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"api_mode": "",
|
||||
})
|
||||
|
||||
# Runtime override wins
|
||||
assert mock_resolve.call_args.args[0] == "anthropic"
|
||||
assert mock_resolve.call_args.args[1] == "runtime-model"
|
||||
|
||||
|
||||
# ── Vision — resolve_vision_provider_client ─────────────────────────────────
|
||||
|
||||
|
||||
class TestResolveVisionMainFirst:
|
||||
"""Vision auto-detection prefers main provider + main model first."""
|
||||
|
||||
def test_openrouter_main_vision_uses_main_model(self, monkeypatch):
|
||||
"""OpenRouter main with vision-capable model → aux vision uses main model."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="openrouter",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="anthropic/claude-sonnet-4.6",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client"
|
||||
) as mock_resolve, patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", None, None, None, None),
|
||||
):
|
||||
mock_client = MagicMock()
|
||||
mock_resolve.return_value = (mock_client, "anthropic/claude-sonnet-4.6")
|
||||
|
||||
from agent.auxiliary_client import resolve_vision_provider_client
|
||||
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
assert provider == "openrouter"
|
||||
assert client is mock_client
|
||||
assert model == "anthropic/claude-sonnet-4.6"
|
||||
# Verify it did NOT call the strict vision backend for OpenRouter
|
||||
# (which would have used a cheap gemini-flash-preview default)
|
||||
mock_resolve.assert_called_once()
|
||||
assert mock_resolve.call_args.args[0] == "openrouter"
|
||||
assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
|
||||
|
||||
def test_nous_main_vision_uses_main_model(self):
|
||||
"""Nous Portal main → aux vision uses main model, not free-tier MiMo-V2-Omni."""
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="nous",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="openai/gpt-5",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client"
|
||||
) as mock_resolve, patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", None, None, None, None),
|
||||
):
|
||||
mock_client = MagicMock()
|
||||
mock_resolve.return_value = (mock_client, "openai/gpt-5")
|
||||
|
||||
from agent.auxiliary_client import resolve_vision_provider_client
|
||||
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
assert provider == "nous"
|
||||
assert model == "openai/gpt-5"
|
||||
|
||||
def test_exotic_provider_with_vision_override_preserved(self):
|
||||
"""xiaomi → mimo-v2-omni override still wins over main_model."""
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="xiaomi",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="mimo-v2-pro", # text model
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client"
|
||||
) as mock_resolve, patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", None, None, None, None),
|
||||
):
|
||||
mock_resolve.return_value = (MagicMock(), "mimo-v2-omni")
|
||||
|
||||
from agent.auxiliary_client import resolve_vision_provider_client
|
||||
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
assert provider == "xiaomi"
|
||||
# Should use mimo-v2-omni (vision override), not mimo-v2-pro (text main)
|
||||
assert mock_resolve.call_args.args[1] == "mimo-v2-omni"
|
||||
|
||||
def test_main_unavailable_vision_falls_through_to_aggregators(self):
|
||||
"""Main provider fails → fall back to OpenRouter/Nous strict backends."""
|
||||
fallback_client = MagicMock()
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="deepseek",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model", return_value="deepseek-chat",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client",
|
||||
return_value=(None, None),
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_strict_vision_backend",
|
||||
return_value=(fallback_client, "google/gemini-3-flash-preview"),
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", None, None, None, None),
|
||||
):
|
||||
from agent.auxiliary_client import resolve_vision_provider_client
|
||||
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
assert client is fallback_client
|
||||
assert provider in ("openrouter", "nous")
|
||||
|
||||
def test_explicit_provider_override_still_wins(self):
|
||||
"""Explicit config override bypasses main-first policy."""
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="openrouter",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="anthropic/claude-opus-4.6",
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("nous", None, None, None, None), # explicit override
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_strict_vision_backend"
|
||||
) as mock_strict:
|
||||
mock_strict.return_value = (MagicMock(), "nous-default-model")
|
||||
|
||||
from agent.auxiliary_client import resolve_vision_provider_client
|
||||
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
# Explicit "nous" override → uses strict backend, NOT main model path
|
||||
assert provider == "nous"
|
||||
mock_strict.assert_called_once_with("nous")
|
||||
|
||||
|
||||
# ── Constant cleanup ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_aggregator_providers_constant_removed():
|
||||
"""The dead _AGGREGATOR_PROVIDERS constant should no longer live in the module.
|
||||
|
||||
Removed when the main-first policy made the aggregator-skip guard obsolete.
|
||||
"""
|
||||
import agent.auxiliary_client as aux_mod
|
||||
|
||||
assert not hasattr(aux_mod, "_AGGREGATOR_PROVIDERS"), (
|
||||
"_AGGREGATOR_PROVIDERS was removed when _resolve_auto stopped "
|
||||
"treating aggregators specially. If you re-added it, the main-first "
|
||||
"policy may have regressed."
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue