From 8065e70274aa03397b9e69d8729c24b0de68020e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 24 May 2026 15:14:13 -0700 Subject: [PATCH] fix(agent): abort on HTTP 402 after pool rotation and fallback fail (#31443) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #31273. HTTP 402 (insufficient credits) was retried up to agent.api_max_retries times (default 3), burning paid requests against an exhausted balance. Real-world impact: ~$40 in 48h on a 24/7 Telegram+Discord gateway. Root cause: FailoverReason.billing was in the is_client_error exclusion set in agent/conversation_loop.py, which prevents the non-retryable-abort branch from firing. By the time control reaches that predicate: * credential-pool rotation has already run for billing and either continued the loop or returned False (pool exhausted/absent) * the eager-fallback branch has also fired on billing and either continued the loop or fell through (no fallback configured) Falling through to the backoff retry from here has no recovery mechanism left — it just burns more paid requests. Removing billing from the exclusion set makes 402 abort cleanly once pool+fallback recovery has failed, mirroring how 401/403 (also should_fallback=True) already behave. Added tests/run_agent/test_31273_402_not_retried.py which mirrors the is_client_error predicate shape from the source and asserts the invariant (plus a source-inspection guard against accidental re-introduction). --- agent/conversation_loop.py | 16 +- tests/run_agent/test_31273_402_not_retried.py | 147 ++++++++++++++++++ 2 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 tests/run_agent/test_31273_402_not_retried.py diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index eed3cfa956b..90b45c814d3 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -2806,6 +2806,21 @@ def run_conversation( # retryable=True mapping takes effect instead. and not isinstance(api_error, ssl.SSLError) ) + # ``FailoverReason.billing`` (HTTP 402) is NOT in this + # exclusion set. By the time we reach this block: + # • credential-pool rotation (line ~2031) has already + # fired for billing and either ``continue``d or + # returned (False, ...) — pool is exhausted or absent. + # • the eager-fallback branch above (line ~2422) also + # fires on billing and ``continue``s if a fallback + # provider is configured. + # Falling through to here means BOTH recovery paths + # gave up. Treating 402 as retryable from this point + # just burns more paid requests against a depleted + # balance with no recovery mechanism left — see #31273 + # (real-world: ~$40 in 48h on a 24/7 gateway). Aborting + # mirrors how 401/403 (also ``should_fallback=True``) + # already behave once their recovery paths have failed. is_client_error = ( is_local_validation_error or ( @@ -2813,7 +2828,6 @@ def run_conversation( and not classified.should_compress and classified.reason not in { FailoverReason.rate_limit, - FailoverReason.billing, FailoverReason.overloaded, FailoverReason.context_overflow, FailoverReason.payload_too_large, diff --git a/tests/run_agent/test_31273_402_not_retried.py b/tests/run_agent/test_31273_402_not_retried.py new file mode 100644 index 00000000000..bae4af45733 --- /dev/null +++ b/tests/run_agent/test_31273_402_not_retried.py @@ -0,0 +1,147 @@ +"""Regression guard for #31273: HTTP 402 (billing exhaustion) must abort +after credential-pool rotation and provider fallback have failed. + +Before the fix, ``FailoverReason.billing`` was in the exclusion set that +prevents the loop's ``is_client_error`` branch from firing. When a user +ran a pay-per-token provider (OpenRouter, etc.) with no credential pool +and no fallback configured, a single 402 cascaded into +``agent.api_max_retries`` paid requests against an exhausted balance. +Real-world impact: ~$40 burned in 48h on a 24/7 gateway routing Telegram ++ Discord traffic. + +The fix removes ``FailoverReason.billing`` from the exclusion set. By +the time control reaches the ``is_client_error`` check: + * credential-pool rotation has already run (and either ``continue``d + on rotation, or returned False because the pool is exhausted/absent). + * the eager-fallback branch for billing has also run (and either + ``continue``d on fallback activation, or fell through because no + fallback is configured). +Falling through to the retry-backoff path from here just burns paid +requests with no recovery mechanism left. Aborting mirrors how 401/403 +(also ``should_fallback=True``) already behave once their recovery paths +have failed. +""" +from __future__ import annotations + + +class TestBillingTriggersClientErrorAbort: + """Mirror the ``is_client_error`` predicate shape used in + ``agent/conversation_loop.py`` and verify ``FailoverReason.billing`` + now resolves to True (i.e. aborts the loop). + """ + + def _mirror_is_client_error( + self, + *, + classified_retryable: bool, + classified_reason, + classified_should_compress: bool = False, + is_local_validation_error: bool = False, + is_context_length_error: bool = False, + ) -> bool: + """Exact shape of conversation_loop.py's is_client_error check. + + Kept in lock-step with the source. If you change one, change + both — or, better, refactor the predicate into a shared helper + and have both sites import it. + """ + from agent.error_classifier import FailoverReason + + return ( + is_local_validation_error + or ( + not classified_retryable + and not classified_should_compress + and classified_reason not in { + FailoverReason.rate_limit, + FailoverReason.overloaded, + FailoverReason.context_overflow, + FailoverReason.payload_too_large, + FailoverReason.long_context_tier, + FailoverReason.thinking_signature, + } + ) + ) and not is_context_length_error + + def test_billing_now_aborts_the_loop(self): + """402 with no fallback / no pool entry → ``is_client_error`` True.""" + from agent.error_classifier import FailoverReason + + # This is what classify_api_error() returns for a plain 402: + # reason=billing, retryable=False, should_compress=False + assert self._mirror_is_client_error( + classified_retryable=False, + classified_reason=FailoverReason.billing, + ), ( + "FailoverReason.billing must trigger is_client_error abort after " + "credential-pool rotation and provider fallback have failed — see #31273." + ) + + def test_rate_limit_still_retries(self): + """Sanity check: rate_limit must still fall through to backoff retry.""" + from agent.error_classifier import FailoverReason + + # 429 / transient 402 / rate-limited usage: must NOT abort, + # because Retry-After backoff and pool rotation are the right + # recovery paths. + assert not self._mirror_is_client_error( + classified_retryable=True, + classified_reason=FailoverReason.rate_limit, + ) + + def test_local_validation_error_still_aborts(self): + """Sanity check: bare ValueError/TypeError still abort.""" + from agent.error_classifier import FailoverReason + + assert self._mirror_is_client_error( + classified_retryable=True, + classified_reason=FailoverReason.unknown, + is_local_validation_error=True, + ) + + def test_context_overflow_still_falls_through_to_compression(self): + """Sanity check: context-overflow must NOT be classified as + client error — compression is the recovery path.""" + from agent.error_classifier import FailoverReason + + assert not self._mirror_is_client_error( + classified_retryable=True, + classified_reason=FailoverReason.context_overflow, + classified_should_compress=True, + ) + + +class TestSourceStillHasBillingExclusionRemoved: + """Belt-and-suspenders: the production source must actually omit + ``FailoverReason.billing`` from the ``is_client_error`` exclusion + set. Protects against an accidental re-introduction. + """ + + def test_conversation_loop_omits_billing_from_client_error_exclusion(self): + import inspect + from agent import conversation_loop + + src = inspect.getsource(conversation_loop) + + # Locate the is_client_error block and inspect its exclusion set. + marker = "is_client_error = (" + assert marker in src, ( + "agent/conversation_loop.py must define is_client_error — " + "the bug-fix anchor for #31273 has moved or been renamed." + ) + idx = src.index(marker) + # Window large enough to span the full predicate (~30 lines). + window = src[idx:idx + 2000] + + assert "FailoverReason.rate_limit" in window, ( + "is_client_error exclusion set has changed shape — re-verify " + "that FailoverReason.billing is still NOT in it (#31273)." + ) + assert "FailoverReason.billing" not in window, ( + "FailoverReason.billing must NOT appear in the is_client_error " + "exclusion set — see #31273. Billing (HTTP 402) is non-retryable " + "by the time control reaches this block: credential-pool rotation " + "and provider fallback have both already had their chance to " + "continue the loop. Re-adding it causes runaway token spend on " + "depleted balances." + )