diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 8850b7fd565..bcd84a373bb 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -2631,10 +2631,13 @@ def run_conversation( except Exception: pass if _genuine_nous_rate_limit: - # Skip straight to max_retries -- the - # top-of-loop guard will handle fallback or - # bail cleanly. - retry_count = max_retries + # Re-enter the loop exactly once so the + # top-of-loop Nous guard handles fallback or + # bails cleanly. (Setting retry_count to + # max_retries would make the while condition + # false immediately and the guard would never + # run -- no fallback, generic exhaustion error.) + retry_count = max(0, max_retries - 1) continue # Upstream capacity 429: fall through to normal # retry logic. A different model (or the same