fix: stop billing fallback retry spam in gateways

2026-04-25 00:51:20 +00:00 · 2026-04-18 10:35:53 -04:00 · 2026-04-18 10:35:53 -04:00 · 79bfad0adc
commit 79bfad0adc
parent 00c3d848d8
6 changed files with 195 additions and 7 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -1303,13 +1303,47 @@ def _is_payment_error(exc: Exception) -> bool:
    status = getattr(exc, "status_code", None)
    if status == 402:
        return True
-    err_lower = str(exc).lower()
+
+    parts = [str(exc).lower()]
+    body = getattr(exc, "body", None)
+    if isinstance(body, dict):
+        try:
+            parts.append(json.dumps(body, ensure_ascii=False).lower())
+        except Exception:
+            parts.append(str(body).lower())
+        err_obj = body.get("error", {}) if isinstance(body.get("error"), dict) else {}
+        body_msg = (err_obj.get("message") or body.get("message") or "").lower()
+        if body_msg:
+            parts.append(body_msg)
+        error_code = (err_obj.get("code") or err_obj.get("type") or body.get("code") or body.get("type") or "")
+        if error_code:
+            parts.append(str(error_code).lower())
+        metadata = err_obj.get("metadata", {}) if isinstance(err_obj, dict) else {}
+        raw_json = metadata.get("raw") if isinstance(metadata, dict) else None
+        if isinstance(raw_json, str) and raw_json.strip():
+            try:
+                inner = json.loads(raw_json)
+                if isinstance(inner, dict):
+                    inner_err = inner.get("error", {}) if isinstance(inner.get("error"), dict) else {}
+                    inner_msg = (inner_err.get("message") or inner.get("message") or "").lower()
+                    if inner_msg:
+                        parts.append(inner_msg)
+            except Exception:
+                parts.append(raw_json.lower())
+    elif body is not None:
+        parts.append(str(body).lower())
+
+    err_lower = " ".join(p for p in parts if p)
    # OpenRouter and other providers include "credits" or "afford" in 402 bodies,
    # but sometimes wrap them in 429 or other codes.
    if status in (402, 429, None):
-        if any(kw in err_lower for kw in ("credits", "insufficient funds",
-                                           "can only afford", "billing",
-                                           "payment required")):
+        if any(kw in err_lower for kw in (
+            "credits", "insufficient funds", "insufficient balance",
+            "insufficient_balance", "insufficient_quota",
+            "can only afford", "billing",
+            "payment required", "payment_required",
+            "top up your credits",
+        )):
            return True
    return False

--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@ -89,10 +89,13 @@ class ClassifiedError:
 # Patterns that indicate billing exhaustion (not transient rate limit)
 _BILLING_PATTERNS = [
    "insufficient credits",
+    "insufficient balance",
+    "insufficient_balance",
    "insufficient_quota",
    "credit balance",
    "credits have been exhausted",
    "top up your credits",
+    "can only afford",
    "payment required",
    "billing hard limit",
    "exceeded your current quota",
@ -589,6 +592,20 @@ def _classify_by_status(
        )

    if status_code == 429:
+        # Some providers surface billing exhaustion as 429 instead of 402 and may
+        # only expose the billing signal through a structured error code.
+        if error_code:
+            classified = _classify_by_error_code(error_code, error_msg, result_fn)
+            if classified is not None:
+                return classified
+        # Others embed the billing signal only in free-text messages.
+        if any(p in error_msg for p in _BILLING_PATTERNS):
+            return result_fn(
+                FailoverReason.billing,
+                retryable=False,
+                should_rotate_credential=True,
+                should_fallback=True,
+            )
        # Already checked long_context_tier above; this is a normal rate limit
        return result_fn(
            FailoverReason.rate_limit,
--- a/run_agent.py
+++ b/run_agent.py
@ -2292,6 +2292,36 @@ class AIAgent:
            and getattr(self, "platform", "") == "cli"
        )

+    def _should_suppress_gateway_lifecycle_status(self, message: str) -> bool:
+        """Return True when a lifecycle status is too noisy for chat gateways.
+
+        CLI users still see every lifecycle event through ``_vprint``. Messaging
+        platforms, however, should not be flooded with internal retry/fallback
+        chatter when a final assistant response will summarize the failure.
+        """
+        raw_platform = getattr(self, "platform", "") or ""
+        platform = getattr(raw_platform, "value", raw_platform)
+        platform = str(platform).strip().lower()
+        if not platform or platform == "cli":
+            return False
+        text = (message or "").strip()
+        if not text:
+            return False
+
+        noisy_prefixes = (
+            "⚠️ Rate limited — switching to fallback provider...",
+            "💸 Provider credits/balance exhausted — switching to fallback provider...",
+            "⚠️ Empty/malformed response — switching to fallback...",
+            "🔄 Primary model failed — switching to fallback:",
+            "⏱️ Rate limit reached. Waiting",
+            "❌ Rate limited after ",
+        )
+        if any(text.startswith(prefix) for prefix in noisy_prefixes):
+            return True
+        if text.startswith("⚠️ Max retries (") and "trying fallback" in text.lower():
+            return True
+        return False
+
    def _emit_status(self, message: str) -> None:
        """Emit a lifecycle status message to both CLI and gateway channels.

@ -2307,6 +2337,8 @@ class AIAgent:
        except Exception:
            pass
        if self.status_callback:
+            if self._should_suppress_gateway_lifecycle_status(message):
+                return
            try:
                self.status_callback("lifecycle", message)
            except Exception:
@ -11081,7 +11113,10 @@ class AIAgent:
                            self._credential_pool
                        )
                        if not pool_may_recover:
-                            self._emit_status("⚠️ Rate limited — switching to fallback provider...")
+                            if classified.reason == FailoverReason.billing:
+                                self._emit_status("💸 Provider credits/balance exhausted — switching to fallback provider...")
+                            else:
+                                self._emit_status("⚠️ Rate limited — switching to fallback provider...")
                            if self._try_activate_fallback(reason=classified.reason):
                                retry_count = 0
                                compression_attempts = 0
@ -11363,7 +11398,6 @@ class AIAgent:
                            and not classified.should_compress
                            and classified.reason not in (
                                FailoverReason.rate_limit,
-                                FailoverReason.billing,
                                FailoverReason.overloaded,
                                FailoverReason.context_overflow,
                                FailoverReason.payload_too_large,
@ -11394,7 +11428,7 @@ class AIAgent:
                        self._vprint(f"{self.log_prefix}   🔌 Provider: {_provider}  Model: {_model}", force=True)
                        self._vprint(f"{self.log_prefix}   🌐 Endpoint: {_base}", force=True)
                        # Actionable guidance for common auth errors
-                        if classified.is_auth or classified.reason == FailoverReason.billing:
+                        if classified.is_auth:
                            if _provider == "openai-codex" and status_code == 401:
                                self._vprint(f"{self.log_prefix}   💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
                                self._vprint(f"{self.log_prefix}      refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
@ -11406,6 +11440,12 @@ class AIAgent:
                                self._vprint(f"{self.log_prefix}      • Does your account have access to {_model}?", force=True)
                                if base_url_host_matches(str(_base), "openrouter.ai"):
                                    self._vprint(f"{self.log_prefix}      • Check credits: https://openrouter.ai/settings/credits", force=True)
+                        elif classified.reason == FailoverReason.billing:
+                            self._vprint(f"{self.log_prefix}   💡 Provider balance/credits appear exhausted for this request.", force=True)
+                            if "openrouter" in str(_base).lower():
+                                self._vprint(f"{self.log_prefix}      • Top up credits: https://openrouter.ai/settings/credits", force=True)
+                            elif _provider == "minimax":
+                                self._vprint(f"{self.log_prefix}      • Check MiniMax account balance / billing before retrying.", force=True)
                        else:
                            self._vprint(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.", force=True)
                        logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -664,6 +664,23 @@ class TestIsPaymentError:
        exc.status_code = 429
        assert _is_payment_error(exc) is True

+    def test_429_with_insufficient_balance_message(self):
+        exc = Exception("HTTP 429: insufficient balance (1008)")
+        exc.status_code = 429
+        assert _is_payment_error(exc) is True
+
+    def test_429_with_billing_message_in_structured_body(self):
+        exc = Exception("provider error")
+        exc.status_code = 429
+        exc.body = {"error": {"message": "insufficient balance (1008)"}}
+        assert _is_payment_error(exc) is True
+
+    def test_429_with_billing_error_code_in_structured_body(self):
+        exc = Exception("provider error")
+        exc.status_code = 429
+        exc.body = {"error": {"code": "payment_required", "message": "provider error"}}
+        assert _is_payment_error(exc) is True
+
    def test_429_without_credits_message_is_not_payment(self):
        """Normal rate limits should NOT be treated as payment errors."""
        exc = Exception("Rate limit exceeded, try again in 2 seconds")
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@ -250,6 +250,24 @@ class TestClassifyApiError:
        assert result.reason == FailoverReason.rate_limit
        assert result.should_fallback is True

+    def test_429_insufficient_balance_classified_as_billing(self):
+        e = MockAPIError("HTTP 429: insufficient balance (1008)", status_code=429)
+        result = classify_api_error(e, provider="minimax")
+        assert result.reason == FailoverReason.billing
+        assert result.retryable is False
+        assert result.should_rotate_credential is True
+        assert result.should_fallback is True
+
+    def test_429_payment_required_error_code_classified_as_billing(self):
+        e = MockAPIError(
+            "provider error",
+            status_code=429,
+            body={"error": {"code": "payment_required", "message": "provider error"}},
+        )
+        result = classify_api_error(e, provider="openrouter")
+        assert result.reason == FailoverReason.billing
+        assert result.retryable is False
+
    def test_alibaba_rate_increased_too_quickly(self):
        """Alibaba/DashScope returns a unique throttling message.

--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -22,6 +22,7 @@ import run_agent
 from run_agent import AIAgent
 from agent.error_classifier import FailoverReason
 from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
+from gateway.config import Platform


 # ---------------------------------------------------------------------------
@ -1629,6 +1630,67 @@ class TestExecuteToolCalls:
        assert "API call failed" not in output
        assert "Rate limit reached" not in output

+    def test_emit_status_suppresses_noisy_gateway_fallback_messages(self, agent):
+        agent.platform = "telegram"
+        agent.status_callback = MagicMock()
+
+        with patch.object(agent, "_vprint") as mock_vprint:
+            agent._emit_status("⚠️ Rate limited — switching to fallback provider...")
+
+        mock_vprint.assert_called_once()
+        agent.status_callback.assert_not_called()
+
+    def test_emit_status_forwards_non_noisy_gateway_messages(self, agent):
+        agent.platform = "telegram"
+        agent.status_callback = MagicMock()
+
+        with patch.object(agent, "_vprint") as mock_vprint:
+            agent._emit_status("🗜️ Context reduced to 120,000 tokens (was 240,000), retrying...")
+
+        mock_vprint.assert_called_once()
+        agent.status_callback.assert_called_once_with("lifecycle", "🗜️ Context reduced to 120,000 tokens (was 240,000), retrying...")
+
+    def test_emit_status_handles_platform_enum_for_gateway_suppression(self, agent):
+        agent.platform = Platform.TELEGRAM
+        agent.status_callback = MagicMock()
+
+        with patch.object(agent, "_vprint") as mock_vprint:
+            agent._emit_status("⚠️ Rate limited — switching to fallback provider...")
+
+        mock_vprint.assert_called_once()
+        agent.status_callback.assert_not_called()
+
+    def test_billing_429_does_not_emit_rate_limit_backoff_status(self, agent):
+        class _Billing429Error(Exception):
+            status_code = 429
+
+            def __str__(self):
+                return "HTTP 429: insufficient balance (1008)"
+
+        agent._cached_system_prompt = "You are helpful."
+        agent._use_prompt_caching = False
+        agent.tool_delay = 0
+        agent.compression_enabled = False
+        agent.save_trajectories = False
+        agent.base_url = "https://api.minimax.io/v1/"
+        status_messages = []
+
+        with (
+            patch.object(agent, "_interruptible_api_call", side_effect=_Billing429Error()),
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+            patch.object(agent, "_emit_status", side_effect=status_messages.append),
+            patch("run_agent.time.sleep", return_value=None),
+        ):
+            result = agent.run_conversation("hello")
+
+        assert result["completed"] is False
+        assert result["final_response"] is None
+        assert "insufficient balance (1008)" in result["error"]
+        assert result["api_calls"] == 1
+        assert not any("Rate limit reached. Waiting" in msg for msg in status_messages)
+

 class TestConcurrentToolExecution:
    """Tests for _execute_tool_calls_concurrent and dispatch logic."""