From 4858942c552733f72de5b2d0dfdfcc7a3a1dc248 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:23:24 -0700 Subject: [PATCH] fix(auxiliary): honor main fallback chain for auto tasks (#47235) --- agent/auxiliary_client.py | 186 +++++++++++++++--- tests/agent/test_auxiliary_client.py | 31 +++ tests/agent/test_auxiliary_main_first.py | 58 ++++++ .../run_agent/test_async_httpx_del_neuter.py | 7 +- .../docs/reference/environment-variables.md | 4 +- website/docs/user-guide/configuring-models.md | 18 +- .../user-guide/features/fallback-providers.md | 24 ++- 7 files changed, 290 insertions(+), 38 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 01ea45d7be2..86a1c765a78 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -3079,23 +3079,20 @@ def _try_configured_fallback_chain( if not fb_provider or fb_provider.lower() == skip: continue fb_model = str(entry.get("model", "")).strip() or None - fb_base_url = str(entry.get("base_url", "")).strip() or None - fb_api_key = str(entry.get("api_key", "")).strip() or None label = f"fallback_chain[{i}]({fb_provider})" try: - fb_client = _resolve_single_provider( - fb_provider, fb_model, fb_base_url, fb_api_key) + fb_client, resolved_model = _resolve_fallback_entry(entry) except Exception: - fb_client = None + fb_client, resolved_model = None, None if fb_client is not None: logger.info( "Auxiliary %s: %s on %s — configured fallback to %s (%s)", - task, reason, failed_provider, label, fb_model or "default", + task, reason, failed_provider, label, resolved_model or fb_model or "default", ) - return fb_client, fb_model, label + return fb_client, resolved_model or fb_model, label tried.append(label) if tried: @@ -3106,6 +3103,103 @@ def _try_configured_fallback_chain( return None, None, "" +def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]: + """Resolve inline or env-backed API key from a fallback-chain entry.""" + explicit = str(entry.get("api_key") or "").strip() + if explicit: + return explicit + key_env = str(entry.get("key_env") or entry.get("api_key_env") or "").strip() + if key_env: + return os.getenv(key_env, "").strip() or None + return None + + +def _resolve_fallback_entry(entry: Dict[str, Any]) -> Tuple[Optional[Any], Optional[str]]: + """Resolve one fallback entry through the central provider router.""" + provider = str(entry.get("provider") or "").strip() + model = str(entry.get("model") or "").strip() or None + if not provider or not model: + return None, None + base_url = str(entry.get("base_url") or "").strip() or None + api_key = _fallback_entry_api_key(entry) + api_mode = str(entry.get("api_mode") or entry.get("transport") or "").strip() or None + return resolve_provider_client( + provider, + model=model, + explicit_base_url=base_url, + explicit_api_key=api_key, + api_mode=api_mode, + ) + + +def _try_main_fallback_chain( + task: Optional[str], + failed_provider: str = "", + reason: str = "error", +) -> Tuple[Optional[Any], Optional[str], str]: + """Try the top-level main-agent fallback chain for an auxiliary call. + + ``provider: auto`` auxiliary tasks should respect the user's declared + main fallback policy before dropping into Hermes' built-in discovery + chain. The top-level chain is read through ``get_fallback_chain`` so + both modern ``fallback_providers`` and legacy ``fallback_model`` entries + participate in the same order as the main agent. + """ + try: + from hermes_cli.config import load_config + from hermes_cli.fallback_config import get_fallback_chain + + chain = get_fallback_chain(load_config()) + except Exception as exc: + logger.debug("Auxiliary %s: could not load main fallback chain: %s", task or "call", exc) + return None, None, "" + + if not chain: + return None, None, "" + + failed_norm = (failed_provider or "").strip().lower() + main_norm = (_read_main_provider() or "").strip().lower() + skip = {p for p in (failed_norm, main_norm, "auto") if p} + tried: List[str] = [] + + for i, entry in enumerate(chain): + if not isinstance(entry, dict): + continue + fb_provider = str(entry.get("provider") or "").strip() + fb_model = str(entry.get("model") or "").strip() + if not fb_provider or not fb_model: + continue + fb_norm = fb_provider.lower() + label = f"fallback_providers[{i}]({fb_provider})" + if fb_norm in skip: + tried.append(f"{label} (skipped)") + continue + if _is_provider_unhealthy(fb_norm): + _log_skip_unhealthy(fb_norm, task) + tried.append(f"{label} (unhealthy)") + continue + try: + fb_client, resolved_model = _resolve_fallback_entry(entry) + except Exception as exc: + logger.debug("Auxiliary %s: main fallback %s failed to resolve: %s", task or "call", label, exc) + fb_client, resolved_model = None, None + if fb_client is not None: + logger.info( + "Auxiliary %s: %s on %s — main fallback chain to %s (%s)", + task or "call", reason, failed_provider or "auto", label, + resolved_model or fb_model, + ) + return fb_client, resolved_model or fb_model, fb_provider + tried.append(label) + + if tried: + logger.debug( + "Auxiliary %s: main fallback chain exhausted (tried: %s)", + task or "call", ", ".join(tried), + ) + return None, None, "" + + def _resolve_single_provider( provider: str, model: Optional[str] = None, @@ -3116,16 +3210,19 @@ def _resolve_single_provider( Uses the existing provider resolution infrastructure where possible. """ - # Reuse resolve_provider_client which handles provider→client mapping + # Reuse resolve_provider_client which handles provider→client mapping. client, resolved_model = resolve_provider_client( provider=provider, model=model, - base_url=base_url, - api_key=api_key, + explicit_base_url=base_url, + explicit_api_key=api_key, ) return client -def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]: +def _resolve_auto( + main_runtime: Optional[Dict[str, Any]] = None, + task: Optional[str] = None, +) -> Tuple[Optional[OpenAI], Optional[str]]: """Full auto-detection chain. Priority: @@ -3223,7 +3320,22 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option main_provider, resolved or main_model) return client, resolved or main_model - # ── Step 2: aggregator / fallback chain ────────────────────────────── + # ── Step 2: user-configured fallback policy ───────────────────────── + # In auto mode, respect the task-specific fallback chain first, then the + # main agent's top-level fallback_providers/fallback_model chain. The + # hardcoded provider discovery chain below is only the convenience default + # for users who have not declared a fallback policy. + if task: + fb_client, fb_model, _fb_label = _try_configured_fallback_chain( + task, main_provider or "auto", reason="main provider unavailable") + if fb_client is not None: + return fb_client, fb_model + fb_client, fb_model, _fb_label = _try_main_fallback_chain( + task, main_provider or "auto", reason="main provider unavailable") + if fb_client is not None: + return fb_client, fb_model + + # ── Step 3: aggregator / fallback chain ────────────────────────────── tried = [] for label, try_fn in _get_provider_chain(): if _is_provider_unhealthy(label): @@ -3344,6 +3456,7 @@ def resolve_provider_client( api_mode: str = None, main_runtime: Optional[Dict[str, Any]] = None, is_vision: bool = False, + task: Optional[str] = None, ) -> Tuple[Optional[Any], Optional[str]]: """Central router: given a provider name and optional model, return a configured client with the correct auth, base URL, and API format. @@ -3464,7 +3577,7 @@ def resolve_provider_client( # ── Auto: try all providers in priority order ──────────────────── if provider == "auto": - client, resolved = _resolve_auto(main_runtime=main_runtime) + client, resolved = _resolve_auto(main_runtime=main_runtime, task=task) if client is None: return None, None # When auto-detection lands on a non-OpenRouter provider (e.g. a @@ -4357,11 +4470,16 @@ def _client_cache_key( api_mode: Optional[str] = None, main_runtime: Optional[Dict[str, Any]] = None, is_vision: bool = False, + task: Optional[str] = None, ) -> tuple: runtime = _normalize_main_runtime(main_runtime) runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else () + # `auto` can now resolve through task-specific or main fallback policy, + # so the task participates in the cache key. Non-auto providers keep the + # old cache shape because the explicit provider/model tuple is sufficient. + task_key = (task or "") if provider == "auto" else "" pool_hint = _pool_cache_hint(provider, main_runtime=main_runtime) - return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, pool_hint) + return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, task_key, pool_hint) def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None: @@ -4554,6 +4672,7 @@ def _get_cached_client( api_mode: str = None, main_runtime: Optional[Dict[str, Any]] = None, is_vision: bool = False, + task: Optional[str] = None, ) -> Tuple[Optional[Any], Optional[str]]: """Get or create a cached client for the given provider. @@ -4591,6 +4710,7 @@ def _get_cached_client( api_mode=api_mode, main_runtime=main_runtime, is_vision=is_vision, + task=task, ) with _client_cache_lock: if cache_key in _client_cache: @@ -4635,6 +4755,7 @@ def _get_cached_client( api_mode=api_mode, main_runtime=runtime, is_vision=is_vision, + task=task, ) if client is not None: # For async clients, remember which loop they were created on so we @@ -5140,7 +5261,7 @@ def call_llm( if not resolved_base_url: logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain", task or "call", resolved_provider) - client, final_model = _get_cached_client("auto", main_runtime=main_runtime) + client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task) if client is None: raise RuntimeError( f"No LLM provider configured for task={task} provider={resolved_provider}. " @@ -5466,14 +5587,19 @@ def call_llm( # Fallback order (#26882, #26803): # 1. User-configured fallback_chain (per-task) if set - # 2. Main agent model (last-resort safety net) - # For auto users (no explicit aux provider), use the full - # auto-detection chain instead — its Step 1 IS the main agent - # model, so users on `auto` already get main-model fallback. + # 2. For auto: top-level main fallback_providers/fallback_model + # 3. For auto: built-in auxiliary discovery chain + # 4. For explicit aux providers: main agent model safety net fb_client, fb_model, fb_label = (None, None, "") if is_auto: - fb_client, fb_model, fb_label = _try_payment_fallback( - resolved_provider, task, reason=reason) + fb_client, fb_model, fb_label = _try_configured_fallback_chain( + task, resolved_provider or "auto", reason=reason) + if fb_client is None: + fb_client, fb_model, fb_label = _try_main_fallback_chain( + task, resolved_provider or "auto", reason=reason) + if fb_client is None: + fb_client, fb_model, fb_label = _try_payment_fallback( + resolved_provider, task, reason=reason) else: fb_client, fb_model, fb_label = _try_configured_fallback_chain( task, resolved_provider or "auto", reason=reason) @@ -5636,7 +5762,7 @@ async def async_call_llm( if not resolved_base_url: logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain", task or "call", resolved_provider) - client, final_model = _get_cached_client("auto", async_mode=True) + client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task) if client is None: raise RuntimeError( f"No LLM provider configured for task={task} provider={resolved_provider}. " @@ -5904,13 +6030,19 @@ async def async_call_llm( # Fallback order (#26882, #26803): # 1. User-configured fallback_chain (per-task) if set - # 2. Main agent model (last-resort safety net) - # Auto users get the full auto-detection chain instead — its - # Step 1 IS the main agent model. + # 2. For auto: top-level main fallback_providers/fallback_model + # 3. For auto: built-in auxiliary discovery chain + # 4. For explicit aux providers: main agent model safety net fb_client, fb_model, fb_label = (None, None, "") if is_auto: - fb_client, fb_model, fb_label = _try_payment_fallback( - resolved_provider, task, reason=reason) + fb_client, fb_model, fb_label = _try_configured_fallback_chain( + task, resolved_provider or "auto", reason=reason) + if fb_client is None: + fb_client, fb_model, fb_label = _try_main_fallback_chain( + task, resolved_provider or "auto", reason=reason) + if fb_client is None: + fb_client, fb_model, fb_label = _try_payment_fallback( + resolved_provider, task, reason=reason) else: fb_client, fb_model, fb_label = _try_configured_fallback_chain( task, resolved_provider or "auto", reason=reason) diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 7770b2e8c88..b2960b703c7 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1653,6 +1653,37 @@ class TestAuxiliaryFallbackLayering: exc.status_code = 402 return exc + def test_auto_provider_uses_task_then_main_chain_before_builtin_chain(self, monkeypatch): + """Auto aux call failures try per-task then top-level fallback before built-ins.""" + primary_client = MagicMock() + primary_client.chat.completions.create.side_effect = self._make_payment_err() + + main_chain_client = MagicMock() + main_chain_client.chat.completions.create.return_value = MagicMock(choices=[ + MagicMock(message=MagicMock(content="from main fallback chain")) + ]) + + with patch("agent.auxiliary_client._get_cached_client", + return_value=(primary_client, "qwen/qwen3.5-122b-a10b")), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("auto", None, None, None, None)), \ + patch("agent.auxiliary_client._try_configured_fallback_chain", + return_value=(None, None, "")) as mock_task_chain, \ + patch("agent.auxiliary_client._try_main_fallback_chain", + return_value=(main_chain_client, "inclusionai/ring-2.6-1t:free", "openrouter")) as mock_main_chain, \ + patch("agent.auxiliary_client._try_payment_fallback") as mock_builtin_chain: + result = call_llm( + task="title_generation", + messages=[{"role": "user", "content": "hello"}], + ) + + assert main_chain_client.chat.completions.create.called + mock_task_chain.assert_called_once_with( + "title_generation", "auto", reason="payment error") + mock_main_chain.assert_called_once_with( + "title_generation", "auto", reason="payment error") + mock_builtin_chain.assert_not_called() + def test_explicit_provider_uses_configured_chain_first(self, monkeypatch, caplog): """When a user has fallback_chain configured, it's tried BEFORE the main agent model.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") diff --git a/tests/agent/test_auxiliary_main_first.py b/tests/agent/test_auxiliary_main_first.py index 8913aad537f..f8a681ebfa9 100644 --- a/tests/agent/test_auxiliary_main_first.py +++ b/tests/agent/test_auxiliary_main_first.py @@ -118,6 +118,64 @@ class TestResolveAutoMainFirst: assert client is chain_client assert model == "google/gemini-3-flash-preview" + def test_main_unavailable_uses_task_fallback_chain_before_builtin_chain(self): + """Auto aux resolution honors auxiliary..fallback_chain before built-ins.""" + task_client = MagicMock() + with patch( + "agent.auxiliary_client._read_main_provider", return_value="nvidia", + ), patch( + "agent.auxiliary_client._read_main_model", return_value="qwen/qwen3.5-122b-a10b", + ), patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(None, None), # main provider has no client + ), patch( + "agent.auxiliary_client._try_configured_fallback_chain", + return_value=(task_client, "task-free-model", "fallback_chain[0](openrouter)"), + ) as mock_task_chain, patch( + "agent.auxiliary_client._try_main_fallback_chain", + ) as mock_main_chain, patch( + "agent.auxiliary_client._try_openrouter", + ) as mock_openrouter: + from agent.auxiliary_client import _resolve_auto + + client, model = _resolve_auto(task="title_generation") + + assert client is task_client + assert model == "task-free-model" + mock_task_chain.assert_called_once_with( + "title_generation", "nvidia", reason="main provider unavailable") + mock_main_chain.assert_not_called() + mock_openrouter.assert_not_called() + + def test_main_unavailable_uses_main_fallback_chain_before_builtin_chain(self): + """Auto aux resolution honors top-level fallback_providers before built-ins.""" + main_fallback_client = MagicMock() + with patch( + "agent.auxiliary_client._read_main_provider", return_value="nvidia", + ), patch( + "agent.auxiliary_client._read_main_model", return_value="qwen/qwen3.5-122b-a10b", + ), patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(None, None), # main provider has no client + ), patch( + "agent.auxiliary_client._try_configured_fallback_chain", + return_value=(None, None, ""), + ), patch( + "agent.auxiliary_client._try_main_fallback_chain", + return_value=(main_fallback_client, "inclusionai/ring-2.6-1t:free", "openrouter"), + ) as mock_main_chain, patch( + "agent.auxiliary_client._try_openrouter", + ) as mock_openrouter: + from agent.auxiliary_client import _resolve_auto + + client, model = _resolve_auto(task="title_generation") + + assert client is main_fallback_client + assert model == "inclusionai/ring-2.6-1t:free" + mock_main_chain.assert_called_once_with( + "title_generation", "nvidia", reason="main provider unavailable") + mock_openrouter.assert_not_called() + def test_no_main_config_uses_chain_directly(self): """No main provider configured → skip step 1, use chain (no regression).""" chain_client = MagicMock() diff --git a/tests/run_agent/test_async_httpx_del_neuter.py b/tests/run_agent/test_async_httpx_del_neuter.py index 946d73dbdf1..090e6998269 100644 --- a/tests/run_agent/test_async_httpx_del_neuter.py +++ b/tests/run_agent/test_async_httpx_del_neuter.py @@ -176,11 +176,16 @@ class TestClientCacheBoundedGrowth: """When the loop changes, the old entry should be replaced, not duplicated.""" from agent.auxiliary_client import ( _client_cache, + _client_cache_key, _client_cache_lock, _get_cached_client, ) - key = ("test_replace", True, "", "", "", (), False, "") + key = _client_cache_key( + "test_replace", + async_mode=True, + task="", + ) # Simulate a stale entry from a closed loop old_loop = asyncio.new_event_loop() diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 2419846a10f..76ce863e661 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -687,7 +687,7 @@ For task-specific direct endpoints, Hermes uses the task's configured API key or ## Fallback Providers (config.yaml only) -The primary model fallback chain is configured exclusively through `config.yaml` — there are no environment variables for it. Add a top-level `fallback_providers` list with `provider` and `model` keys to enable automatic failover when your main model encounters errors. +The primary model fallback chain is configured exclusively through `config.yaml` — there are no environment variables for it. Add a top-level `fallback_providers` list with `provider` and `model` keys to enable automatic failover when your main model encounters errors. Auxiliary tasks whose provider is `auto` also consult this chain before Hermes' built-in auxiliary discovery chain. ```yaml fallback_providers: @@ -695,7 +695,7 @@ fallback_providers: model: anthropic/claude-sonnet-4 ``` -The older top-level `fallback_model` single-provider shape is still read for backward compatibility, but new configuration should use `fallback_providers`. +The older top-level `fallback_model` single-provider shape is still read for backward compatibility, but new configuration should use `fallback_providers`. For task-specific auxiliary policy, use `auxiliary..fallback_chain` in `config.yaml`; there is no environment variable equivalent. See [Fallback Providers](/user-guide/features/fallback-providers) for full details. diff --git a/website/docs/user-guide/configuring-models.md b/website/docs/user-guide/configuring-models.md index 3368d5201d8..8d749e15143 100644 --- a/website/docs/user-guide/configuring-models.md +++ b/website/docs/user-guide/configuring-models.md @@ -53,7 +53,7 @@ Click **Show auxiliary** to reveal the 11 task slots: ![Auxiliary panel expanded](/img/docs/dashboard-models/auxiliary-expanded.png) -Every auxiliary task defaults to `auto` — meaning Hermes uses your main model for that job too. Override a specific task when you want a cheaper or faster model for a side-job. +Every auxiliary task defaults to `auto` — meaning Hermes tries your main model for that job too. If that route is unavailable or hits a capacity-style failure, `auto` follows any task-specific `auxiliary..fallback_chain`, then the main `fallback_providers` / `fallback_model` chain, then Hermes' built-in auxiliary discovery chain. Override a specific task when you want a cheaper or faster model for a side-job. ### Common override patterns @@ -129,7 +129,21 @@ auxiliary: # ... other fields unchanged ``` -`provider: auto` with `model: ''` tells Hermes to use the main model for that task. +`provider: auto` with `model: ''` tells Hermes to use the main model for that task, while still honoring fallback policy if the main route cannot serve the auxiliary call. + +Optional task-specific fallback chains live under the same auxiliary task: + +```yaml +auxiliary: + title_generation: + provider: auto + model: '' + fallback_chain: + - provider: openrouter + model: inclusionai/ring-2.6-1t:free +``` + +When `fallback_chain` is absent, `auto` uses the top-level `fallback_providers` chain before the built-in auxiliary discovery chain. ## When does it take effect? diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index 7eaab0ea8af..dbe431fc1ea 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -168,7 +168,7 @@ fallback_providers: | Messaging gateway (Telegram, Discord, etc.) | ✔ | | Subagent delegation | ✔ (subagents inherit the parent fallback chain) | | Cron jobs | ✔ (cron agents inherit configured fallback providers) | -| Auxiliary tasks (vision, compression) | ✘ (use their own provider chain — see below) | +| Auxiliary tasks on `provider: auto` | ✔ (try per-task fallback, then the main fallback chain before built-in aux discovery) | :::tip There are no environment variables for the primary fallback chain — configure it exclusively through `config.yaml` or `hermes fallback`. This is intentional: fallback configuration is a deliberate choice, not something a stale shell export should override. @@ -195,23 +195,30 @@ Hermes uses separate lightweight models for side tasks. Each task has its own pr ### Auto-Detection Chain -When a task's provider is set to `"auto"` (the default), Hermes tries providers in order until one works: +When a task's provider is set to `"auto"` (the default), Hermes first tries the main provider + main model for that auxiliary task. If that route is unavailable or later fails with a capacity-style error, Hermes now honors user-configured fallback policy before using the built-in discovery chain: -**For text tasks (compression, web extract, etc.):** +```text +Main provider + main model → auxiliary..fallback_chain → +fallback_providers / fallback_model → built-in auxiliary discovery chain +``` + +The task-specific chain is most precise and wins when present. The top-level `fallback_providers` chain is the same policy the main agent uses, so free-only or same-provider fallback rules apply to auxiliary tasks on `auto` as well. + +**Built-in text discovery chain (compression, web extract, title generation, etc.):** ```text OpenRouter → Nous Portal → Custom endpoint → Codex OAuth → API-key providers (z.ai, Kimi, MiniMax, Xiaomi MiMo, Hugging Face, Anthropic) → give up ``` -**For vision tasks:** +**Built-in vision discovery chain:** ```text Main provider (if vision-capable) → OpenRouter → Nous Portal → Codex OAuth → Anthropic → Custom endpoint → give up ``` -If the resolved provider fails at call time, Hermes also has an internal retry: if the provider is not OpenRouter and no explicit `base_url` is set, it tries OpenRouter as a last-resort fallback. +Those built-in chains are a convenience fallback for users who have not declared a task-specific or main fallback policy. ### Configuring Auxiliary Providers @@ -232,6 +239,9 @@ auxiliary: compression: provider: "auto" model: "" + fallback_chain: # optional, task-specific fallback policy + - provider: openrouter + model: inclusionai/ring-2.6-1t:free skills_hub: provider: "auto" @@ -242,7 +252,9 @@ auxiliary: model: "" ``` -Every task above follows the same **provider / model / base_url** pattern. Context compression is configured under `auxiliary.compression`: +Every task above follows the same **provider / model / base_url** pattern. Each task can also declare its own `fallback_chain`; if omitted, `provider: auto` uses the top-level `fallback_providers` chain before Hermes' built-in auxiliary discovery chain. + +Context compression is configured under `auxiliary.compression`: ```yaml auxiliary: