mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
feat(auxiliary): add configurable fallback chains + main-agent safety net
Layered fallback for auxiliary tasks (compression, vision, tts, web_extract, session_search, etc.): 1. Primary aux provider (existing) 2. User-configured auxiliary.<task>.fallback_chain (new) 3. Main agent provider + model (new — last-resort safety net) 4. Warn user + re-raise original error (new) For users on 'auto' (no explicit aux provider), the existing _try_payment_fallback auto-detection chain runs instead — its Step 1 already IS the main agent model, so they get the same behaviour without configuration. The configured fallback_chain config schema comes from #26882 / @zccyman; the main-agent safety net + exhaustion warning were added on top. Closes #26882. Builds on the capacity-error gate fix in the previous commit (#26803 / @Bartok9).
This commit is contained in:
parent
ec096cfbd8
commit
a574246837
1 changed files with 176 additions and 4 deletions
|
|
@ -2606,6 +2606,133 @@ def _try_payment_fallback(
|
|||
return None, None, ""
|
||||
|
||||
|
||||
def _try_main_agent_model_fallback(
|
||||
failed_provider: str,
|
||||
task: str = None,
|
||||
reason: str = "error",
|
||||
) -> Tuple[Optional[Any], Optional[str], str]:
|
||||
"""Last-resort fallback to the user's main agent provider + model.
|
||||
|
||||
Used after the configured fallback_chain is exhausted (or empty) for
|
||||
users with an explicit auxiliary provider. This is the "safety net"
|
||||
layer: if nothing the user asked for can serve the request, try the
|
||||
main chat model before giving up.
|
||||
|
||||
Skips when the failed provider already IS the main provider (no point
|
||||
retrying the same backend that just failed).
|
||||
|
||||
Returns:
|
||||
(client, model, provider_label) or (None, None, "") if no fallback.
|
||||
"""
|
||||
main_provider = (_read_main_provider() or "").strip()
|
||||
main_model = (_read_main_model() or "").strip()
|
||||
if not main_provider or not main_model or main_provider.lower() in {"auto", ""}:
|
||||
return None, None, ""
|
||||
|
||||
skip = (failed_provider or "").lower().strip()
|
||||
if main_provider.lower() == skip:
|
||||
# The thing that failed IS the main model — nothing to fall back to.
|
||||
return None, None, ""
|
||||
if _is_provider_unhealthy(main_provider):
|
||||
_log_skip_unhealthy(main_provider, task)
|
||||
return None, None, ""
|
||||
|
||||
try:
|
||||
client, resolved_model = resolve_provider_client(
|
||||
provider=main_provider, model=main_model,
|
||||
)
|
||||
except Exception:
|
||||
client, resolved_model = None, None
|
||||
|
||||
if client is None:
|
||||
return None, None, ""
|
||||
|
||||
label = f"main-agent({main_provider})"
|
||||
logger.info(
|
||||
"Auxiliary %s: %s on %s — falling back to main agent model %s (%s)",
|
||||
task or "call", reason, failed_provider, label, resolved_model or main_model,
|
||||
)
|
||||
return client, resolved_model or main_model, label
|
||||
|
||||
|
||||
def _try_configured_fallback_chain(
|
||||
task: str,
|
||||
failed_provider: str,
|
||||
reason: str = "error",
|
||||
) -> Tuple[Optional[Any], Optional[str], str]:
|
||||
"""Try user-configured fallback_chain for a specific auxiliary task.
|
||||
|
||||
Reads auxiliary.<task>.fallback_chain from config.yaml and tries each
|
||||
entry in order. Each entry must have at least ``provider``; ``model``,
|
||||
``base_url``, and ``api_key`` are optional.
|
||||
|
||||
Returns:
|
||||
(client, model, provider_label) or (None, None, "") if no fallback.
|
||||
"""
|
||||
if not task:
|
||||
return None, None, ""
|
||||
|
||||
task_config = _get_auxiliary_task_config(task)
|
||||
chain = task_config.get("fallback_chain")
|
||||
if not chain or not isinstance(chain, list):
|
||||
return None, None, ""
|
||||
|
||||
skip = failed_provider.lower().strip()
|
||||
tried = []
|
||||
|
||||
for i, entry in enumerate(chain):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
fb_provider = str(entry.get("provider", "")).strip()
|
||||
if not fb_provider or fb_provider.lower() == skip:
|
||||
continue
|
||||
fb_model = str(entry.get("model", "")).strip() or None
|
||||
fb_base_url = str(entry.get("base_url", "")).strip() or None
|
||||
fb_api_key = str(entry.get("api_key", "")).strip() or None
|
||||
|
||||
label = f"fallback_chain[{i}]({fb_provider})"
|
||||
|
||||
try:
|
||||
fb_client = _resolve_single_provider(
|
||||
fb_provider, fb_model, fb_base_url, fb_api_key)
|
||||
except Exception:
|
||||
fb_client = None
|
||||
|
||||
if fb_client is not None:
|
||||
logger.info(
|
||||
"Auxiliary %s: %s on %s — configured fallback to %s (%s)",
|
||||
task, reason, failed_provider, label, fb_model or "default",
|
||||
)
|
||||
return fb_client, fb_model, label
|
||||
tried.append(label)
|
||||
|
||||
if tried:
|
||||
logger.debug(
|
||||
"Auxiliary %s: configured fallback_chain exhausted (tried: %s)",
|
||||
task, ", ".join(tried),
|
||||
)
|
||||
return None, None, ""
|
||||
|
||||
|
||||
def _resolve_single_provider(
|
||||
provider: str,
|
||||
model: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
) -> Optional[Any]:
|
||||
"""Resolve a single provider entry from fallback_chain to an OpenAI client.
|
||||
|
||||
Uses the existing provider resolution infrastructure where possible.
|
||||
"""
|
||||
# Reuse resolve_provider_client which handles provider→client mapping
|
||||
client, resolved_model = resolve_provider_client(
|
||||
provider=provider,
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
)
|
||||
return client
|
||||
|
||||
def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Full auto-detection chain.
|
||||
|
||||
|
|
@ -4579,8 +4706,24 @@ def call_llm(
|
|||
reason = "connection error"
|
||||
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
|
||||
task or "call", reason, resolved_provider, first_err)
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
|
||||
# Fallback order (#26882, #26803):
|
||||
# 1. User-configured fallback_chain (per-task) if set
|
||||
# 2. Main agent model (last-resort safety net)
|
||||
# For auto users (no explicit aux provider), use the full
|
||||
# auto-detection chain instead — its Step 1 IS the main agent
|
||||
# model, so users on `auto` already get main-model fallback.
|
||||
fb_client, fb_model, fb_label = (None, None, "")
|
||||
if is_auto:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
else:
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_main_agent_model_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
|
||||
if fb_client is not None:
|
||||
fb_kwargs = _build_call_kwargs(
|
||||
fb_label, fb_model, messages,
|
||||
|
|
@ -4590,6 +4733,14 @@ def call_llm(
|
|||
base_url=str(getattr(fb_client, "base_url", "") or ""))
|
||||
return _validate_llm_response(
|
||||
fb_client.chat.completions.create(**fb_kwargs), task)
|
||||
# All fallback layers exhausted — emit a single user-visible
|
||||
# warning so the operator knows aux task is about to fail.
|
||||
# (#26882) The error itself is re-raised below.
|
||||
logger.warning(
|
||||
"Auxiliary %s: %s on %s and all fallbacks exhausted "
|
||||
"(fallback_chain + main agent model). Raising original error.",
|
||||
task or "call", reason, resolved_provider,
|
||||
)
|
||||
# Connection/timeout errors leave the cached client poisoned (closed
|
||||
# httpx transport, half-read stream, dead async loop). Drop it from
|
||||
# the cache regardless of whether we found a fallback above so the
|
||||
|
|
@ -4908,8 +5059,23 @@ async def async_call_llm(
|
|||
reason = "connection error"
|
||||
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
|
||||
task or "call", reason, resolved_provider, first_err)
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
|
||||
# Fallback order (#26882, #26803):
|
||||
# 1. User-configured fallback_chain (per-task) if set
|
||||
# 2. Main agent model (last-resort safety net)
|
||||
# Auto users get the full auto-detection chain instead — its
|
||||
# Step 1 IS the main agent model.
|
||||
fb_client, fb_model, fb_label = (None, None, "")
|
||||
if is_auto:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
else:
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_main_agent_model_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
|
||||
if fb_client is not None:
|
||||
fb_kwargs = _build_call_kwargs(
|
||||
fb_label, fb_model, messages,
|
||||
|
|
@ -4925,6 +5091,12 @@ async def async_call_llm(
|
|||
fb_kwargs["model"] = async_fb_model
|
||||
return _validate_llm_response(
|
||||
await async_fb.chat.completions.create(**fb_kwargs), task)
|
||||
# All fallback layers exhausted — warn before re-raising. (#26882)
|
||||
logger.warning(
|
||||
"Auxiliary %s (async): %s on %s and all fallbacks exhausted "
|
||||
"(fallback_chain + main agent model). Raising original error.",
|
||||
task or "call", reason, resolved_provider,
|
||||
)
|
||||
# Mirror the sync path: drop poisoned clients on connection/timeout
|
||||
# so the next aux call rebuilds. See issue #23432.
|
||||
if _is_connection_error(first_err):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue