From c946e6709fa68b8a85f8e71709dd6642016b46a8 Mon Sep 17 00:00:00 2001 From: linyubin Date: Sat, 27 Jun 2026 18:55:43 -0700 Subject: [PATCH] fix(agent): activate fallback on persistent transport failures (#22277) Eager fallback previously fired only on rate_limit/billing. A stale- detector-killed hung stream classifies as FailoverReason.timeout (retryable=True) and the retry loop re-hit the same dead primary until the budget exhausted -- 3 x ~180-300s stale kills compounding into a 15+ min silent hang while the configured fallback chain sat idle. Extend the existing eager-fallback gate to also cover timeout and overloaded, but only after one real retry (retry_count >= 2) so genuine transient hiccups still recover on the primary. Reuses the same pool-recovery guard and state-reset as the rate_limit branch -- no new config flag, no change to the rate-limit intent. Salvaged from PR #50228 by @linyubin. Closes #22277. Co-authored-by: Hermes Agent <127238744+teknium1@users.noreply.github.com> --- agent/conversation_loop.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 959a55dc0e9..6dba9e502a9 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -2853,15 +2853,25 @@ def run_conversation( # Fall through to normal error handling if compression # is exhausted or didn't help. - # Eager fallback for rate-limit errors (429 or quota exhaustion). - # When a fallback model is configured, switch immediately instead - # of burning through retries with exponential backoff -- the - # primary provider won't recover within the retry window. + # Eager fallback for rate-limit errors (429 or quota exhaustion) + # and transport errors (connection failure / timeout / provider + # overloaded). Rate limits and billing: switch immediately — + # the primary provider won't recover within the retry window. + # Transport errors: allow 1 retry first (transient hiccups + # recover), then fall back if the provider is truly unreachable. is_rate_limited = classified.reason in { FailoverReason.rate_limit, FailoverReason.billing, } - if is_rate_limited and agent._fallback_index < len(agent._fallback_chain): + _is_transport_failure = classified.reason in { + FailoverReason.timeout, + FailoverReason.overloaded, + } + _should_fallback = ( + is_rate_limited + or (_is_transport_failure and retry_count >= 2) + ) + if _should_fallback and agent._fallback_index < len(agent._fallback_chain): # Don't eagerly fallback if credential pool rotation may # still recover. See _pool_may_recover_from_rate_limit # for the single-credential-pool and CloudCode-quota @@ -2876,6 +2886,10 @@ def run_conversation( agent._buffer_status( "⚠️ Billing or credits exhausted — switching to fallback provider..." ) + elif _is_transport_failure: + agent._buffer_status( + "⚠️ Provider unreachable — switching to fallback provider..." + ) else: agent._buffer_status("⚠️ Rate limited — switching to fallback provider...") if agent._try_activate_fallback(reason=classified.reason):