feat(auxiliary): add configurable fallback chains + main-agent safety net

Layered fallback for auxiliary tasks (compression, vision, tts, web_extract,
session_search, etc.):

  1. Primary aux provider (existing)
  2. User-configured auxiliary.<task>.fallback_chain (new)
  3. Main agent provider + model (new — last-resort safety net)
  4. Warn user + re-raise original error (new)

For users on 'auto' (no explicit aux provider), the existing
_try_payment_fallback auto-detection chain runs instead — its Step 1
already IS the main agent model, so they get the same behaviour without
configuration.

The configured fallback_chain config schema comes from #26882 / @zccyman;
the main-agent safety net + exhaustion warning were added on top.

Closes #26882. Builds on the capacity-error gate fix in the previous
commit (#26803 / @Bartok9).
This commit is contained in:
zccyman 2026-05-16 16:07:41 +00:00 committed by Teknium
parent ec096cfbd8
commit a574246837

View file

@ -2606,6 +2606,133 @@ def _try_payment_fallback(
return None, None, ""
def _try_main_agent_model_fallback(
failed_provider: str,
task: str = None,
reason: str = "error",
) -> Tuple[Optional[Any], Optional[str], str]:
"""Last-resort fallback to the user's main agent provider + model.
Used after the configured fallback_chain is exhausted (or empty) for
users with an explicit auxiliary provider. This is the "safety net"
layer: if nothing the user asked for can serve the request, try the
main chat model before giving up.
Skips when the failed provider already IS the main provider (no point
retrying the same backend that just failed).
Returns:
(client, model, provider_label) or (None, None, "") if no fallback.
"""
main_provider = (_read_main_provider() or "").strip()
main_model = (_read_main_model() or "").strip()
if not main_provider or not main_model or main_provider.lower() in {"auto", ""}:
return None, None, ""
skip = (failed_provider or "").lower().strip()
if main_provider.lower() == skip:
# The thing that failed IS the main model — nothing to fall back to.
return None, None, ""
if _is_provider_unhealthy(main_provider):
_log_skip_unhealthy(main_provider, task)
return None, None, ""
try:
client, resolved_model = resolve_provider_client(
provider=main_provider, model=main_model,
)
except Exception:
client, resolved_model = None, None
if client is None:
return None, None, ""
label = f"main-agent({main_provider})"
logger.info(
"Auxiliary %s: %s on %s — falling back to main agent model %s (%s)",
task or "call", reason, failed_provider, label, resolved_model or main_model,
)
return client, resolved_model or main_model, label
def _try_configured_fallback_chain(
task: str,
failed_provider: str,
reason: str = "error",
) -> Tuple[Optional[Any], Optional[str], str]:
"""Try user-configured fallback_chain for a specific auxiliary task.
Reads auxiliary.<task>.fallback_chain from config.yaml and tries each
entry in order. Each entry must have at least ``provider``; ``model``,
``base_url``, and ``api_key`` are optional.
Returns:
(client, model, provider_label) or (None, None, "") if no fallback.
"""
if not task:
return None, None, ""
task_config = _get_auxiliary_task_config(task)
chain = task_config.get("fallback_chain")
if not chain or not isinstance(chain, list):
return None, None, ""
skip = failed_provider.lower().strip()
tried = []
for i, entry in enumerate(chain):
if not isinstance(entry, dict):
continue
fb_provider = str(entry.get("provider", "")).strip()
if not fb_provider or fb_provider.lower() == skip:
continue
fb_model = str(entry.get("model", "")).strip() or None
fb_base_url = str(entry.get("base_url", "")).strip() or None
fb_api_key = str(entry.get("api_key", "")).strip() or None
label = f"fallback_chain[{i}]({fb_provider})"
try:
fb_client = _resolve_single_provider(
fb_provider, fb_model, fb_base_url, fb_api_key)
except Exception:
fb_client = None
if fb_client is not None:
logger.info(
"Auxiliary %s: %s on %s — configured fallback to %s (%s)",
task, reason, failed_provider, label, fb_model or "default",
)
return fb_client, fb_model, label
tried.append(label)
if tried:
logger.debug(
"Auxiliary %s: configured fallback_chain exhausted (tried: %s)",
task, ", ".join(tried),
)
return None, None, ""
def _resolve_single_provider(
provider: str,
model: Optional[str] = None,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> Optional[Any]:
"""Resolve a single provider entry from fallback_chain to an OpenAI client.
Uses the existing provider resolution infrastructure where possible.
"""
# Reuse resolve_provider_client which handles provider→client mapping
client, resolved_model = resolve_provider_client(
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
)
return client
def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
"""Full auto-detection chain.
@ -4579,8 +4706,24 @@ def call_llm(
reason = "connection error"
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
task or "call", reason, resolved_provider, first_err)
fb_client, fb_model, fb_label = _try_payment_fallback(
resolved_provider, task, reason=reason)
# Fallback order (#26882, #26803):
# 1. User-configured fallback_chain (per-task) if set
# 2. Main agent model (last-resort safety net)
# For auto users (no explicit aux provider), use the full
# auto-detection chain instead — its Step 1 IS the main agent
# model, so users on `auto` already get main-model fallback.
fb_client, fb_model, fb_label = (None, None, "")
if is_auto:
fb_client, fb_model, fb_label = _try_payment_fallback(
resolved_provider, task, reason=reason)
else:
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
task, resolved_provider or "auto", reason=reason)
if fb_client is None:
fb_client, fb_model, fb_label = _try_main_agent_model_fallback(
resolved_provider, task, reason=reason)
if fb_client is not None:
fb_kwargs = _build_call_kwargs(
fb_label, fb_model, messages,
@ -4590,6 +4733,14 @@ def call_llm(
base_url=str(getattr(fb_client, "base_url", "") or ""))
return _validate_llm_response(
fb_client.chat.completions.create(**fb_kwargs), task)
# All fallback layers exhausted — emit a single user-visible
# warning so the operator knows aux task is about to fail.
# (#26882) The error itself is re-raised below.
logger.warning(
"Auxiliary %s: %s on %s and all fallbacks exhausted "
"(fallback_chain + main agent model). Raising original error.",
task or "call", reason, resolved_provider,
)
# Connection/timeout errors leave the cached client poisoned (closed
# httpx transport, half-read stream, dead async loop). Drop it from
# the cache regardless of whether we found a fallback above so the
@ -4908,8 +5059,23 @@ async def async_call_llm(
reason = "connection error"
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
task or "call", reason, resolved_provider, first_err)
fb_client, fb_model, fb_label = _try_payment_fallback(
resolved_provider, task, reason=reason)
# Fallback order (#26882, #26803):
# 1. User-configured fallback_chain (per-task) if set
# 2. Main agent model (last-resort safety net)
# Auto users get the full auto-detection chain instead — its
# Step 1 IS the main agent model.
fb_client, fb_model, fb_label = (None, None, "")
if is_auto:
fb_client, fb_model, fb_label = _try_payment_fallback(
resolved_provider, task, reason=reason)
else:
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
task, resolved_provider or "auto", reason=reason)
if fb_client is None:
fb_client, fb_model, fb_label = _try_main_agent_model_fallback(
resolved_provider, task, reason=reason)
if fb_client is not None:
fb_kwargs = _build_call_kwargs(
fb_label, fb_model, messages,
@ -4925,6 +5091,12 @@ async def async_call_llm(
fb_kwargs["model"] = async_fb_model
return _validate_llm_response(
await async_fb.chat.completions.create(**fb_kwargs), task)
# All fallback layers exhausted — warn before re-raising. (#26882)
logger.warning(
"Auxiliary %s (async): %s on %s and all fallbacks exhausted "
"(fallback_chain + main agent model). Raising original error.",
task or "call", reason, resolved_provider,
)
# Mirror the sync path: drop poisoned clients on connection/timeout
# so the next aux call rebuilds. See issue #23432.
if _is_connection_error(first_err):