diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 39fa378a914..ba78833248e 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -2606,6 +2606,133 @@ def _try_payment_fallback( return None, None, "" +def _try_main_agent_model_fallback( + failed_provider: str, + task: str = None, + reason: str = "error", +) -> Tuple[Optional[Any], Optional[str], str]: + """Last-resort fallback to the user's main agent provider + model. + + Used after the configured fallback_chain is exhausted (or empty) for + users with an explicit auxiliary provider. This is the "safety net" + layer: if nothing the user asked for can serve the request, try the + main chat model before giving up. + + Skips when the failed provider already IS the main provider (no point + retrying the same backend that just failed). + + Returns: + (client, model, provider_label) or (None, None, "") if no fallback. + """ + main_provider = (_read_main_provider() or "").strip() + main_model = (_read_main_model() or "").strip() + if not main_provider or not main_model or main_provider.lower() in {"auto", ""}: + return None, None, "" + + skip = (failed_provider or "").lower().strip() + if main_provider.lower() == skip: + # The thing that failed IS the main model — nothing to fall back to. + return None, None, "" + if _is_provider_unhealthy(main_provider): + _log_skip_unhealthy(main_provider, task) + return None, None, "" + + try: + client, resolved_model = resolve_provider_client( + provider=main_provider, model=main_model, + ) + except Exception: + client, resolved_model = None, None + + if client is None: + return None, None, "" + + label = f"main-agent({main_provider})" + logger.info( + "Auxiliary %s: %s on %s — falling back to main agent model %s (%s)", + task or "call", reason, failed_provider, label, resolved_model or main_model, + ) + return client, resolved_model or main_model, label + + +def _try_configured_fallback_chain( + task: str, + failed_provider: str, + reason: str = "error", +) -> Tuple[Optional[Any], Optional[str], str]: + """Try user-configured fallback_chain for a specific auxiliary task. + + Reads auxiliary..fallback_chain from config.yaml and tries each + entry in order. Each entry must have at least ``provider``; ``model``, + ``base_url``, and ``api_key`` are optional. + + Returns: + (client, model, provider_label) or (None, None, "") if no fallback. + """ + if not task: + return None, None, "" + + task_config = _get_auxiliary_task_config(task) + chain = task_config.get("fallback_chain") + if not chain or not isinstance(chain, list): + return None, None, "" + + skip = failed_provider.lower().strip() + tried = [] + + for i, entry in enumerate(chain): + if not isinstance(entry, dict): + continue + fb_provider = str(entry.get("provider", "")).strip() + if not fb_provider or fb_provider.lower() == skip: + continue + fb_model = str(entry.get("model", "")).strip() or None + fb_base_url = str(entry.get("base_url", "")).strip() or None + fb_api_key = str(entry.get("api_key", "")).strip() or None + + label = f"fallback_chain[{i}]({fb_provider})" + + try: + fb_client = _resolve_single_provider( + fb_provider, fb_model, fb_base_url, fb_api_key) + except Exception: + fb_client = None + + if fb_client is not None: + logger.info( + "Auxiliary %s: %s on %s — configured fallback to %s (%s)", + task, reason, failed_provider, label, fb_model or "default", + ) + return fb_client, fb_model, label + tried.append(label) + + if tried: + logger.debug( + "Auxiliary %s: configured fallback_chain exhausted (tried: %s)", + task, ", ".join(tried), + ) + return None, None, "" + + +def _resolve_single_provider( + provider: str, + model: Optional[str] = None, + base_url: Optional[str] = None, + api_key: Optional[str] = None, +) -> Optional[Any]: + """Resolve a single provider entry from fallback_chain to an OpenAI client. + + Uses the existing provider resolution infrastructure where possible. + """ + # Reuse resolve_provider_client which handles provider→client mapping + client, resolved_model = resolve_provider_client( + provider=provider, + model=model, + base_url=base_url, + api_key=api_key, + ) + return client + def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]: """Full auto-detection chain. @@ -4579,8 +4706,24 @@ def call_llm( reason = "connection error" logger.info("Auxiliary %s: %s on %s (%s), trying fallback", task or "call", reason, resolved_provider, first_err) - fb_client, fb_model, fb_label = _try_payment_fallback( - resolved_provider, task, reason=reason) + + # Fallback order (#26882, #26803): + # 1. User-configured fallback_chain (per-task) if set + # 2. Main agent model (last-resort safety net) + # For auto users (no explicit aux provider), use the full + # auto-detection chain instead — its Step 1 IS the main agent + # model, so users on `auto` already get main-model fallback. + fb_client, fb_model, fb_label = (None, None, "") + if is_auto: + fb_client, fb_model, fb_label = _try_payment_fallback( + resolved_provider, task, reason=reason) + else: + fb_client, fb_model, fb_label = _try_configured_fallback_chain( + task, resolved_provider or "auto", reason=reason) + if fb_client is None: + fb_client, fb_model, fb_label = _try_main_agent_model_fallback( + resolved_provider, task, reason=reason) + if fb_client is not None: fb_kwargs = _build_call_kwargs( fb_label, fb_model, messages, @@ -4590,6 +4733,14 @@ def call_llm( base_url=str(getattr(fb_client, "base_url", "") or "")) return _validate_llm_response( fb_client.chat.completions.create(**fb_kwargs), task) + # All fallback layers exhausted — emit a single user-visible + # warning so the operator knows aux task is about to fail. + # (#26882) The error itself is re-raised below. + logger.warning( + "Auxiliary %s: %s on %s and all fallbacks exhausted " + "(fallback_chain + main agent model). Raising original error.", + task or "call", reason, resolved_provider, + ) # Connection/timeout errors leave the cached client poisoned (closed # httpx transport, half-read stream, dead async loop). Drop it from # the cache regardless of whether we found a fallback above so the @@ -4908,8 +5059,23 @@ async def async_call_llm( reason = "connection error" logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback", task or "call", reason, resolved_provider, first_err) - fb_client, fb_model, fb_label = _try_payment_fallback( - resolved_provider, task, reason=reason) + + # Fallback order (#26882, #26803): + # 1. User-configured fallback_chain (per-task) if set + # 2. Main agent model (last-resort safety net) + # Auto users get the full auto-detection chain instead — its + # Step 1 IS the main agent model. + fb_client, fb_model, fb_label = (None, None, "") + if is_auto: + fb_client, fb_model, fb_label = _try_payment_fallback( + resolved_provider, task, reason=reason) + else: + fb_client, fb_model, fb_label = _try_configured_fallback_chain( + task, resolved_provider or "auto", reason=reason) + if fb_client is None: + fb_client, fb_model, fb_label = _try_main_agent_model_fallback( + resolved_provider, task, reason=reason) + if fb_client is not None: fb_kwargs = _build_call_kwargs( fb_label, fb_model, messages, @@ -4925,6 +5091,12 @@ async def async_call_llm( fb_kwargs["model"] = async_fb_model return _validate_llm_response( await async_fb.chat.completions.create(**fb_kwargs), task) + # All fallback layers exhausted — warn before re-raising. (#26882) + logger.warning( + "Auxiliary %s (async): %s on %s and all fallbacks exhausted " + "(fallback_chain + main agent model). Raising original error.", + task or "call", reason, resolved_provider, + ) # Mirror the sync path: drop poisoned clients on connection/timeout # so the next aux call rebuilds. See issue #23432. if _is_connection_error(first_err):