mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-20 10:11:58 +00:00
fix(auxiliary): honor main fallback chain for auto tasks (#47235)
This commit is contained in:
parent
4d470b3dbb
commit
4858942c55
7 changed files with 290 additions and 38 deletions
|
|
@ -3079,23 +3079,20 @@ def _try_configured_fallback_chain(
|
|||
if not fb_provider or fb_provider.lower() == skip:
|
||||
continue
|
||||
fb_model = str(entry.get("model", "")).strip() or None
|
||||
fb_base_url = str(entry.get("base_url", "")).strip() or None
|
||||
fb_api_key = str(entry.get("api_key", "")).strip() or None
|
||||
|
||||
label = f"fallback_chain[{i}]({fb_provider})"
|
||||
|
||||
try:
|
||||
fb_client = _resolve_single_provider(
|
||||
fb_provider, fb_model, fb_base_url, fb_api_key)
|
||||
fb_client, resolved_model = _resolve_fallback_entry(entry)
|
||||
except Exception:
|
||||
fb_client = None
|
||||
fb_client, resolved_model = None, None
|
||||
|
||||
if fb_client is not None:
|
||||
logger.info(
|
||||
"Auxiliary %s: %s on %s — configured fallback to %s (%s)",
|
||||
task, reason, failed_provider, label, fb_model or "default",
|
||||
task, reason, failed_provider, label, resolved_model or fb_model or "default",
|
||||
)
|
||||
return fb_client, fb_model, label
|
||||
return fb_client, resolved_model or fb_model, label
|
||||
tried.append(label)
|
||||
|
||||
if tried:
|
||||
|
|
@ -3106,6 +3103,103 @@ def _try_configured_fallback_chain(
|
|||
return None, None, ""
|
||||
|
||||
|
||||
def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]:
|
||||
"""Resolve inline or env-backed API key from a fallback-chain entry."""
|
||||
explicit = str(entry.get("api_key") or "").strip()
|
||||
if explicit:
|
||||
return explicit
|
||||
key_env = str(entry.get("key_env") or entry.get("api_key_env") or "").strip()
|
||||
if key_env:
|
||||
return os.getenv(key_env, "").strip() or None
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_fallback_entry(entry: Dict[str, Any]) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Resolve one fallback entry through the central provider router."""
|
||||
provider = str(entry.get("provider") or "").strip()
|
||||
model = str(entry.get("model") or "").strip() or None
|
||||
if not provider or not model:
|
||||
return None, None
|
||||
base_url = str(entry.get("base_url") or "").strip() or None
|
||||
api_key = _fallback_entry_api_key(entry)
|
||||
api_mode = str(entry.get("api_mode") or entry.get("transport") or "").strip() or None
|
||||
return resolve_provider_client(
|
||||
provider,
|
||||
model=model,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
)
|
||||
|
||||
|
||||
def _try_main_fallback_chain(
|
||||
task: Optional[str],
|
||||
failed_provider: str = "",
|
||||
reason: str = "error",
|
||||
) -> Tuple[Optional[Any], Optional[str], str]:
|
||||
"""Try the top-level main-agent fallback chain for an auxiliary call.
|
||||
|
||||
``provider: auto`` auxiliary tasks should respect the user's declared
|
||||
main fallback policy before dropping into Hermes' built-in discovery
|
||||
chain. The top-level chain is read through ``get_fallback_chain`` so
|
||||
both modern ``fallback_providers`` and legacy ``fallback_model`` entries
|
||||
participate in the same order as the main agent.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.fallback_config import get_fallback_chain
|
||||
|
||||
chain = get_fallback_chain(load_config())
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary %s: could not load main fallback chain: %s", task or "call", exc)
|
||||
return None, None, ""
|
||||
|
||||
if not chain:
|
||||
return None, None, ""
|
||||
|
||||
failed_norm = (failed_provider or "").strip().lower()
|
||||
main_norm = (_read_main_provider() or "").strip().lower()
|
||||
skip = {p for p in (failed_norm, main_norm, "auto") if p}
|
||||
tried: List[str] = []
|
||||
|
||||
for i, entry in enumerate(chain):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
fb_provider = str(entry.get("provider") or "").strip()
|
||||
fb_model = str(entry.get("model") or "").strip()
|
||||
if not fb_provider or not fb_model:
|
||||
continue
|
||||
fb_norm = fb_provider.lower()
|
||||
label = f"fallback_providers[{i}]({fb_provider})"
|
||||
if fb_norm in skip:
|
||||
tried.append(f"{label} (skipped)")
|
||||
continue
|
||||
if _is_provider_unhealthy(fb_norm):
|
||||
_log_skip_unhealthy(fb_norm, task)
|
||||
tried.append(f"{label} (unhealthy)")
|
||||
continue
|
||||
try:
|
||||
fb_client, resolved_model = _resolve_fallback_entry(entry)
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary %s: main fallback %s failed to resolve: %s", task or "call", label, exc)
|
||||
fb_client, resolved_model = None, None
|
||||
if fb_client is not None:
|
||||
logger.info(
|
||||
"Auxiliary %s: %s on %s — main fallback chain to %s (%s)",
|
||||
task or "call", reason, failed_provider or "auto", label,
|
||||
resolved_model or fb_model,
|
||||
)
|
||||
return fb_client, resolved_model or fb_model, fb_provider
|
||||
tried.append(label)
|
||||
|
||||
if tried:
|
||||
logger.debug(
|
||||
"Auxiliary %s: main fallback chain exhausted (tried: %s)",
|
||||
task or "call", ", ".join(tried),
|
||||
)
|
||||
return None, None, ""
|
||||
|
||||
|
||||
def _resolve_single_provider(
|
||||
provider: str,
|
||||
model: Optional[str] = None,
|
||||
|
|
@ -3116,16 +3210,19 @@ def _resolve_single_provider(
|
|||
|
||||
Uses the existing provider resolution infrastructure where possible.
|
||||
"""
|
||||
# Reuse resolve_provider_client which handles provider→client mapping
|
||||
# Reuse resolve_provider_client which handles provider→client mapping.
|
||||
client, resolved_model = resolve_provider_client(
|
||||
provider=provider,
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
return client
|
||||
|
||||
def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
def _resolve_auto(
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
task: Optional[str] = None,
|
||||
) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Full auto-detection chain.
|
||||
|
||||
Priority:
|
||||
|
|
@ -3223,7 +3320,22 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
|
|||
main_provider, resolved or main_model)
|
||||
return client, resolved or main_model
|
||||
|
||||
# ── Step 2: aggregator / fallback chain ──────────────────────────────
|
||||
# ── Step 2: user-configured fallback policy ─────────────────────────
|
||||
# In auto mode, respect the task-specific fallback chain first, then the
|
||||
# main agent's top-level fallback_providers/fallback_model chain. The
|
||||
# hardcoded provider discovery chain below is only the convenience default
|
||||
# for users who have not declared a fallback policy.
|
||||
if task:
|
||||
fb_client, fb_model, _fb_label = _try_configured_fallback_chain(
|
||||
task, main_provider or "auto", reason="main provider unavailable")
|
||||
if fb_client is not None:
|
||||
return fb_client, fb_model
|
||||
fb_client, fb_model, _fb_label = _try_main_fallback_chain(
|
||||
task, main_provider or "auto", reason="main provider unavailable")
|
||||
if fb_client is not None:
|
||||
return fb_client, fb_model
|
||||
|
||||
# ── Step 3: aggregator / fallback chain ──────────────────────────────
|
||||
tried = []
|
||||
for label, try_fn in _get_provider_chain():
|
||||
if _is_provider_unhealthy(label):
|
||||
|
|
@ -3344,6 +3456,7 @@ def resolve_provider_client(
|
|||
api_mode: str = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
is_vision: bool = False,
|
||||
task: Optional[str] = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Central router: given a provider name and optional model, return a
|
||||
configured client with the correct auth, base URL, and API format.
|
||||
|
|
@ -3464,7 +3577,7 @@ def resolve_provider_client(
|
|||
|
||||
# ── Auto: try all providers in priority order ────────────────────
|
||||
if provider == "auto":
|
||||
client, resolved = _resolve_auto(main_runtime=main_runtime)
|
||||
client, resolved = _resolve_auto(main_runtime=main_runtime, task=task)
|
||||
if client is None:
|
||||
return None, None
|
||||
# When auto-detection lands on a non-OpenRouter provider (e.g. a
|
||||
|
|
@ -4357,11 +4470,16 @@ def _client_cache_key(
|
|||
api_mode: Optional[str] = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
is_vision: bool = False,
|
||||
task: Optional[str] = None,
|
||||
) -> tuple:
|
||||
runtime = _normalize_main_runtime(main_runtime)
|
||||
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
|
||||
# `auto` can now resolve through task-specific or main fallback policy,
|
||||
# so the task participates in the cache key. Non-auto providers keep the
|
||||
# old cache shape because the explicit provider/model tuple is sufficient.
|
||||
task_key = (task or "") if provider == "auto" else ""
|
||||
pool_hint = _pool_cache_hint(provider, main_runtime=main_runtime)
|
||||
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, pool_hint)
|
||||
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, task_key, pool_hint)
|
||||
|
||||
|
||||
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
|
||||
|
|
@ -4554,6 +4672,7 @@ def _get_cached_client(
|
|||
api_mode: str = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
is_vision: bool = False,
|
||||
task: Optional[str] = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Get or create a cached client for the given provider.
|
||||
|
||||
|
|
@ -4591,6 +4710,7 @@ def _get_cached_client(
|
|||
api_mode=api_mode,
|
||||
main_runtime=main_runtime,
|
||||
is_vision=is_vision,
|
||||
task=task,
|
||||
)
|
||||
with _client_cache_lock:
|
||||
if cache_key in _client_cache:
|
||||
|
|
@ -4635,6 +4755,7 @@ def _get_cached_client(
|
|||
api_mode=api_mode,
|
||||
main_runtime=runtime,
|
||||
is_vision=is_vision,
|
||||
task=task,
|
||||
)
|
||||
if client is not None:
|
||||
# For async clients, remember which loop they were created on so we
|
||||
|
|
@ -5140,7 +5261,7 @@ def call_llm(
|
|||
if not resolved_base_url:
|
||||
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
|
||||
task or "call", resolved_provider)
|
||||
client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
|
||||
client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task)
|
||||
if client is None:
|
||||
raise RuntimeError(
|
||||
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
||||
|
|
@ -5466,14 +5587,19 @@ def call_llm(
|
|||
|
||||
# Fallback order (#26882, #26803):
|
||||
# 1. User-configured fallback_chain (per-task) if set
|
||||
# 2. Main agent model (last-resort safety net)
|
||||
# For auto users (no explicit aux provider), use the full
|
||||
# auto-detection chain instead — its Step 1 IS the main agent
|
||||
# model, so users on `auto` already get main-model fallback.
|
||||
# 2. For auto: top-level main fallback_providers/fallback_model
|
||||
# 3. For auto: built-in auxiliary discovery chain
|
||||
# 4. For explicit aux providers: main agent model safety net
|
||||
fb_client, fb_model, fb_label = (None, None, "")
|
||||
if is_auto:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_main_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
else:
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
|
|
@ -5636,7 +5762,7 @@ async def async_call_llm(
|
|||
if not resolved_base_url:
|
||||
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
|
||||
task or "call", resolved_provider)
|
||||
client, final_model = _get_cached_client("auto", async_mode=True)
|
||||
client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task)
|
||||
if client is None:
|
||||
raise RuntimeError(
|
||||
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
||||
|
|
@ -5904,13 +6030,19 @@ async def async_call_llm(
|
|||
|
||||
# Fallback order (#26882, #26803):
|
||||
# 1. User-configured fallback_chain (per-task) if set
|
||||
# 2. Main agent model (last-resort safety net)
|
||||
# Auto users get the full auto-detection chain instead — its
|
||||
# Step 1 IS the main agent model.
|
||||
# 2. For auto: top-level main fallback_providers/fallback_model
|
||||
# 3. For auto: built-in auxiliary discovery chain
|
||||
# 4. For explicit aux providers: main agent model safety net
|
||||
fb_client, fb_model, fb_label = (None, None, "")
|
||||
if is_auto:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_main_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
else:
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
|
|
|
|||
|
|
@ -1653,6 +1653,37 @@ class TestAuxiliaryFallbackLayering:
|
|||
exc.status_code = 402
|
||||
return exc
|
||||
|
||||
def test_auto_provider_uses_task_then_main_chain_before_builtin_chain(self, monkeypatch):
|
||||
"""Auto aux call failures try per-task then top-level fallback before built-ins."""
|
||||
primary_client = MagicMock()
|
||||
primary_client.chat.completions.create.side_effect = self._make_payment_err()
|
||||
|
||||
main_chain_client = MagicMock()
|
||||
main_chain_client.chat.completions.create.return_value = MagicMock(choices=[
|
||||
MagicMock(message=MagicMock(content="from main fallback chain"))
|
||||
])
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "qwen/qwen3.5-122b-a10b")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", None, None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_configured_fallback_chain",
|
||||
return_value=(None, None, "")) as mock_task_chain, \
|
||||
patch("agent.auxiliary_client._try_main_fallback_chain",
|
||||
return_value=(main_chain_client, "inclusionai/ring-2.6-1t:free", "openrouter")) as mock_main_chain, \
|
||||
patch("agent.auxiliary_client._try_payment_fallback") as mock_builtin_chain:
|
||||
result = call_llm(
|
||||
task="title_generation",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
assert main_chain_client.chat.completions.create.called
|
||||
mock_task_chain.assert_called_once_with(
|
||||
"title_generation", "auto", reason="payment error")
|
||||
mock_main_chain.assert_called_once_with(
|
||||
"title_generation", "auto", reason="payment error")
|
||||
mock_builtin_chain.assert_not_called()
|
||||
|
||||
def test_explicit_provider_uses_configured_chain_first(self, monkeypatch, caplog):
|
||||
"""When a user has fallback_chain configured, it's tried BEFORE the main agent model."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
|
|
|||
|
|
@ -118,6 +118,64 @@ class TestResolveAutoMainFirst:
|
|||
assert client is chain_client
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
|
||||
def test_main_unavailable_uses_task_fallback_chain_before_builtin_chain(self):
|
||||
"""Auto aux resolution honors auxiliary.<task>.fallback_chain before built-ins."""
|
||||
task_client = MagicMock()
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="nvidia",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model", return_value="qwen/qwen3.5-122b-a10b",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client",
|
||||
return_value=(None, None), # main provider has no client
|
||||
), patch(
|
||||
"agent.auxiliary_client._try_configured_fallback_chain",
|
||||
return_value=(task_client, "task-free-model", "fallback_chain[0](openrouter)"),
|
||||
) as mock_task_chain, patch(
|
||||
"agent.auxiliary_client._try_main_fallback_chain",
|
||||
) as mock_main_chain, patch(
|
||||
"agent.auxiliary_client._try_openrouter",
|
||||
) as mock_openrouter:
|
||||
from agent.auxiliary_client import _resolve_auto
|
||||
|
||||
client, model = _resolve_auto(task="title_generation")
|
||||
|
||||
assert client is task_client
|
||||
assert model == "task-free-model"
|
||||
mock_task_chain.assert_called_once_with(
|
||||
"title_generation", "nvidia", reason="main provider unavailable")
|
||||
mock_main_chain.assert_not_called()
|
||||
mock_openrouter.assert_not_called()
|
||||
|
||||
def test_main_unavailable_uses_main_fallback_chain_before_builtin_chain(self):
|
||||
"""Auto aux resolution honors top-level fallback_providers before built-ins."""
|
||||
main_fallback_client = MagicMock()
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="nvidia",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model", return_value="qwen/qwen3.5-122b-a10b",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client",
|
||||
return_value=(None, None), # main provider has no client
|
||||
), patch(
|
||||
"agent.auxiliary_client._try_configured_fallback_chain",
|
||||
return_value=(None, None, ""),
|
||||
), patch(
|
||||
"agent.auxiliary_client._try_main_fallback_chain",
|
||||
return_value=(main_fallback_client, "inclusionai/ring-2.6-1t:free", "openrouter"),
|
||||
) as mock_main_chain, patch(
|
||||
"agent.auxiliary_client._try_openrouter",
|
||||
) as mock_openrouter:
|
||||
from agent.auxiliary_client import _resolve_auto
|
||||
|
||||
client, model = _resolve_auto(task="title_generation")
|
||||
|
||||
assert client is main_fallback_client
|
||||
assert model == "inclusionai/ring-2.6-1t:free"
|
||||
mock_main_chain.assert_called_once_with(
|
||||
"title_generation", "nvidia", reason="main provider unavailable")
|
||||
mock_openrouter.assert_not_called()
|
||||
|
||||
def test_no_main_config_uses_chain_directly(self):
|
||||
"""No main provider configured → skip step 1, use chain (no regression)."""
|
||||
chain_client = MagicMock()
|
||||
|
|
|
|||
|
|
@ -176,11 +176,16 @@ class TestClientCacheBoundedGrowth:
|
|||
"""When the loop changes, the old entry should be replaced, not duplicated."""
|
||||
from agent.auxiliary_client import (
|
||||
_client_cache,
|
||||
_client_cache_key,
|
||||
_client_cache_lock,
|
||||
_get_cached_client,
|
||||
)
|
||||
|
||||
key = ("test_replace", True, "", "", "", (), False, "")
|
||||
key = _client_cache_key(
|
||||
"test_replace",
|
||||
async_mode=True,
|
||||
task="",
|
||||
)
|
||||
|
||||
# Simulate a stale entry from a closed loop
|
||||
old_loop = asyncio.new_event_loop()
|
||||
|
|
|
|||
|
|
@ -687,7 +687,7 @@ For task-specific direct endpoints, Hermes uses the task's configured API key or
|
|||
|
||||
## Fallback Providers (config.yaml only)
|
||||
|
||||
The primary model fallback chain is configured exclusively through `config.yaml` — there are no environment variables for it. Add a top-level `fallback_providers` list with `provider` and `model` keys to enable automatic failover when your main model encounters errors.
|
||||
The primary model fallback chain is configured exclusively through `config.yaml` — there are no environment variables for it. Add a top-level `fallback_providers` list with `provider` and `model` keys to enable automatic failover when your main model encounters errors. Auxiliary tasks whose provider is `auto` also consult this chain before Hermes' built-in auxiliary discovery chain.
|
||||
|
||||
```yaml
|
||||
fallback_providers:
|
||||
|
|
@ -695,7 +695,7 @@ fallback_providers:
|
|||
model: anthropic/claude-sonnet-4
|
||||
```
|
||||
|
||||
The older top-level `fallback_model` single-provider shape is still read for backward compatibility, but new configuration should use `fallback_providers`.
|
||||
The older top-level `fallback_model` single-provider shape is still read for backward compatibility, but new configuration should use `fallback_providers`. For task-specific auxiliary policy, use `auxiliary.<task>.fallback_chain` in `config.yaml`; there is no environment variable equivalent.
|
||||
|
||||
See [Fallback Providers](/user-guide/features/fallback-providers) for full details.
|
||||
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ Click **Show auxiliary** to reveal the 11 task slots:
|
|||
|
||||

|
||||
|
||||
Every auxiliary task defaults to `auto` — meaning Hermes uses your main model for that job too. Override a specific task when you want a cheaper or faster model for a side-job.
|
||||
Every auxiliary task defaults to `auto` — meaning Hermes tries your main model for that job too. If that route is unavailable or hits a capacity-style failure, `auto` follows any task-specific `auxiliary.<task>.fallback_chain`, then the main `fallback_providers` / `fallback_model` chain, then Hermes' built-in auxiliary discovery chain. Override a specific task when you want a cheaper or faster model for a side-job.
|
||||
|
||||
### Common override patterns
|
||||
|
||||
|
|
@ -129,7 +129,21 @@ auxiliary:
|
|||
# ... other fields unchanged
|
||||
```
|
||||
|
||||
`provider: auto` with `model: ''` tells Hermes to use the main model for that task.
|
||||
`provider: auto` with `model: ''` tells Hermes to use the main model for that task, while still honoring fallback policy if the main route cannot serve the auxiliary call.
|
||||
|
||||
Optional task-specific fallback chains live under the same auxiliary task:
|
||||
|
||||
```yaml
|
||||
auxiliary:
|
||||
title_generation:
|
||||
provider: auto
|
||||
model: ''
|
||||
fallback_chain:
|
||||
- provider: openrouter
|
||||
model: inclusionai/ring-2.6-1t:free
|
||||
```
|
||||
|
||||
When `fallback_chain` is absent, `auto` uses the top-level `fallback_providers` chain before the built-in auxiliary discovery chain.
|
||||
|
||||
## When does it take effect?
|
||||
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ fallback_providers:
|
|||
| Messaging gateway (Telegram, Discord, etc.) | ✔ |
|
||||
| Subagent delegation | ✔ (subagents inherit the parent fallback chain) |
|
||||
| Cron jobs | ✔ (cron agents inherit configured fallback providers) |
|
||||
| Auxiliary tasks (vision, compression) | ✘ (use their own provider chain — see below) |
|
||||
| Auxiliary tasks on `provider: auto` | ✔ (try per-task fallback, then the main fallback chain before built-in aux discovery) |
|
||||
|
||||
:::tip
|
||||
There are no environment variables for the primary fallback chain — configure it exclusively through `config.yaml` or `hermes fallback`. This is intentional: fallback configuration is a deliberate choice, not something a stale shell export should override.
|
||||
|
|
@ -195,23 +195,30 @@ Hermes uses separate lightweight models for side tasks. Each task has its own pr
|
|||
|
||||
### Auto-Detection Chain
|
||||
|
||||
When a task's provider is set to `"auto"` (the default), Hermes tries providers in order until one works:
|
||||
When a task's provider is set to `"auto"` (the default), Hermes first tries the main provider + main model for that auxiliary task. If that route is unavailable or later fails with a capacity-style error, Hermes now honors user-configured fallback policy before using the built-in discovery chain:
|
||||
|
||||
**For text tasks (compression, web extract, etc.):**
|
||||
```text
|
||||
Main provider + main model → auxiliary.<task>.fallback_chain →
|
||||
fallback_providers / fallback_model → built-in auxiliary discovery chain
|
||||
```
|
||||
|
||||
The task-specific chain is most precise and wins when present. The top-level `fallback_providers` chain is the same policy the main agent uses, so free-only or same-provider fallback rules apply to auxiliary tasks on `auto` as well.
|
||||
|
||||
**Built-in text discovery chain (compression, web extract, title generation, etc.):**
|
||||
|
||||
```text
|
||||
OpenRouter → Nous Portal → Custom endpoint → Codex OAuth →
|
||||
API-key providers (z.ai, Kimi, MiniMax, Xiaomi MiMo, Hugging Face, Anthropic) → give up
|
||||
```
|
||||
|
||||
**For vision tasks:**
|
||||
**Built-in vision discovery chain:**
|
||||
|
||||
```text
|
||||
Main provider (if vision-capable) → OpenRouter → Nous Portal →
|
||||
Codex OAuth → Anthropic → Custom endpoint → give up
|
||||
```
|
||||
|
||||
If the resolved provider fails at call time, Hermes also has an internal retry: if the provider is not OpenRouter and no explicit `base_url` is set, it tries OpenRouter as a last-resort fallback.
|
||||
Those built-in chains are a convenience fallback for users who have not declared a task-specific or main fallback policy.
|
||||
|
||||
### Configuring Auxiliary Providers
|
||||
|
||||
|
|
@ -232,6 +239,9 @@ auxiliary:
|
|||
compression:
|
||||
provider: "auto"
|
||||
model: ""
|
||||
fallback_chain: # optional, task-specific fallback policy
|
||||
- provider: openrouter
|
||||
model: inclusionai/ring-2.6-1t:free
|
||||
|
||||
skills_hub:
|
||||
provider: "auto"
|
||||
|
|
@ -242,7 +252,9 @@ auxiliary:
|
|||
model: ""
|
||||
```
|
||||
|
||||
Every task above follows the same **provider / model / base_url** pattern. Context compression is configured under `auxiliary.compression`:
|
||||
Every task above follows the same **provider / model / base_url** pattern. Each task can also declare its own `fallback_chain`; if omitted, `provider: auto` uses the top-level `fallback_providers` chain before Hermes' built-in auxiliary discovery chain.
|
||||
|
||||
Context compression is configured under `auxiliary.compression`:
|
||||
|
||||
```yaml
|
||||
auxiliary:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue