fix: stop billing fallback retry spam in gateways

This commit is contained in:
Remotework 2026-04-18 10:35:53 -04:00
parent 00c3d848d8
commit 79bfad0adc
6 changed files with 195 additions and 7 deletions

View file

@ -1303,13 +1303,47 @@ def _is_payment_error(exc: Exception) -> bool:
status = getattr(exc, "status_code", None)
if status == 402:
return True
err_lower = str(exc).lower()
parts = [str(exc).lower()]
body = getattr(exc, "body", None)
if isinstance(body, dict):
try:
parts.append(json.dumps(body, ensure_ascii=False).lower())
except Exception:
parts.append(str(body).lower())
err_obj = body.get("error", {}) if isinstance(body.get("error"), dict) else {}
body_msg = (err_obj.get("message") or body.get("message") or "").lower()
if body_msg:
parts.append(body_msg)
error_code = (err_obj.get("code") or err_obj.get("type") or body.get("code") or body.get("type") or "")
if error_code:
parts.append(str(error_code).lower())
metadata = err_obj.get("metadata", {}) if isinstance(err_obj, dict) else {}
raw_json = metadata.get("raw") if isinstance(metadata, dict) else None
if isinstance(raw_json, str) and raw_json.strip():
try:
inner = json.loads(raw_json)
if isinstance(inner, dict):
inner_err = inner.get("error", {}) if isinstance(inner.get("error"), dict) else {}
inner_msg = (inner_err.get("message") or inner.get("message") or "").lower()
if inner_msg:
parts.append(inner_msg)
except Exception:
parts.append(raw_json.lower())
elif body is not None:
parts.append(str(body).lower())
err_lower = " ".join(p for p in parts if p)
# OpenRouter and other providers include "credits" or "afford" in 402 bodies,
# but sometimes wrap them in 429 or other codes.
if status in (402, 429, None):
if any(kw in err_lower for kw in ("credits", "insufficient funds",
"can only afford", "billing",
"payment required")):
if any(kw in err_lower for kw in (
"credits", "insufficient funds", "insufficient balance",
"insufficient_balance", "insufficient_quota",
"can only afford", "billing",
"payment required", "payment_required",
"top up your credits",
)):
return True
return False

View file

@ -89,10 +89,13 @@ class ClassifiedError:
# Patterns that indicate billing exhaustion (not transient rate limit)
_BILLING_PATTERNS = [
"insufficient credits",
"insufficient balance",
"insufficient_balance",
"insufficient_quota",
"credit balance",
"credits have been exhausted",
"top up your credits",
"can only afford",
"payment required",
"billing hard limit",
"exceeded your current quota",
@ -589,6 +592,20 @@ def _classify_by_status(
)
if status_code == 429:
# Some providers surface billing exhaustion as 429 instead of 402 and may
# only expose the billing signal through a structured error code.
if error_code:
classified = _classify_by_error_code(error_code, error_msg, result_fn)
if classified is not None:
return classified
# Others embed the billing signal only in free-text messages.
if any(p in error_msg for p in _BILLING_PATTERNS):
return result_fn(
FailoverReason.billing,
retryable=False,
should_rotate_credential=True,
should_fallback=True,
)
# Already checked long_context_tier above; this is a normal rate limit
return result_fn(
FailoverReason.rate_limit,

View file

@ -2292,6 +2292,36 @@ class AIAgent:
and getattr(self, "platform", "") == "cli"
)
def _should_suppress_gateway_lifecycle_status(self, message: str) -> bool:
"""Return True when a lifecycle status is too noisy for chat gateways.
CLI users still see every lifecycle event through ``_vprint``. Messaging
platforms, however, should not be flooded with internal retry/fallback
chatter when a final assistant response will summarize the failure.
"""
raw_platform = getattr(self, "platform", "") or ""
platform = getattr(raw_platform, "value", raw_platform)
platform = str(platform).strip().lower()
if not platform or platform == "cli":
return False
text = (message or "").strip()
if not text:
return False
noisy_prefixes = (
"⚠️ Rate limited — switching to fallback provider...",
"💸 Provider credits/balance exhausted — switching to fallback provider...",
"⚠️ Empty/malformed response — switching to fallback...",
"🔄 Primary model failed — switching to fallback:",
"⏱️ Rate limit reached. Waiting",
"❌ Rate limited after ",
)
if any(text.startswith(prefix) for prefix in noisy_prefixes):
return True
if text.startswith("⚠️ Max retries (") and "trying fallback" in text.lower():
return True
return False
def _emit_status(self, message: str) -> None:
"""Emit a lifecycle status message to both CLI and gateway channels.
@ -2307,6 +2337,8 @@ class AIAgent:
except Exception:
pass
if self.status_callback:
if self._should_suppress_gateway_lifecycle_status(message):
return
try:
self.status_callback("lifecycle", message)
except Exception:
@ -11081,7 +11113,10 @@ class AIAgent:
self._credential_pool
)
if not pool_may_recover:
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
if classified.reason == FailoverReason.billing:
self._emit_status("💸 Provider credits/balance exhausted — switching to fallback provider...")
else:
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
if self._try_activate_fallback(reason=classified.reason):
retry_count = 0
compression_attempts = 0
@ -11363,7 +11398,6 @@ class AIAgent:
and not classified.should_compress
and classified.reason not in (
FailoverReason.rate_limit,
FailoverReason.billing,
FailoverReason.overloaded,
FailoverReason.context_overflow,
FailoverReason.payload_too_large,
@ -11394,7 +11428,7 @@ class AIAgent:
self._vprint(f"{self.log_prefix} 🔌 Provider: {_provider} Model: {_model}", force=True)
self._vprint(f"{self.log_prefix} 🌐 Endpoint: {_base}", force=True)
# Actionable guidance for common auth errors
if classified.is_auth or classified.reason == FailoverReason.billing:
if classified.is_auth:
if _provider == "openai-codex" and status_code == 401:
self._vprint(f"{self.log_prefix} 💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
self._vprint(f"{self.log_prefix} refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
@ -11406,6 +11440,12 @@ class AIAgent:
self._vprint(f"{self.log_prefix} • Does your account have access to {_model}?", force=True)
if base_url_host_matches(str(_base), "openrouter.ai"):
self._vprint(f"{self.log_prefix} • Check credits: https://openrouter.ai/settings/credits", force=True)
elif classified.reason == FailoverReason.billing:
self._vprint(f"{self.log_prefix} 💡 Provider balance/credits appear exhausted for this request.", force=True)
if "openrouter" in str(_base).lower():
self._vprint(f"{self.log_prefix} • Top up credits: https://openrouter.ai/settings/credits", force=True)
elif _provider == "minimax":
self._vprint(f"{self.log_prefix} • Check MiniMax account balance / billing before retrying.", force=True)
else:
self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.", force=True)
logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")

View file

@ -664,6 +664,23 @@ class TestIsPaymentError:
exc.status_code = 429
assert _is_payment_error(exc) is True
def test_429_with_insufficient_balance_message(self):
exc = Exception("HTTP 429: insufficient balance (1008)")
exc.status_code = 429
assert _is_payment_error(exc) is True
def test_429_with_billing_message_in_structured_body(self):
exc = Exception("provider error")
exc.status_code = 429
exc.body = {"error": {"message": "insufficient balance (1008)"}}
assert _is_payment_error(exc) is True
def test_429_with_billing_error_code_in_structured_body(self):
exc = Exception("provider error")
exc.status_code = 429
exc.body = {"error": {"code": "payment_required", "message": "provider error"}}
assert _is_payment_error(exc) is True
def test_429_without_credits_message_is_not_payment(self):
"""Normal rate limits should NOT be treated as payment errors."""
exc = Exception("Rate limit exceeded, try again in 2 seconds")

View file

@ -250,6 +250,24 @@ class TestClassifyApiError:
assert result.reason == FailoverReason.rate_limit
assert result.should_fallback is True
def test_429_insufficient_balance_classified_as_billing(self):
e = MockAPIError("HTTP 429: insufficient balance (1008)", status_code=429)
result = classify_api_error(e, provider="minimax")
assert result.reason == FailoverReason.billing
assert result.retryable is False
assert result.should_rotate_credential is True
assert result.should_fallback is True
def test_429_payment_required_error_code_classified_as_billing(self):
e = MockAPIError(
"provider error",
status_code=429,
body={"error": {"code": "payment_required", "message": "provider error"}},
)
result = classify_api_error(e, provider="openrouter")
assert result.reason == FailoverReason.billing
assert result.retryable is False
def test_alibaba_rate_increased_too_quickly(self):
"""Alibaba/DashScope returns a unique throttling message.

View file

@ -22,6 +22,7 @@ import run_agent
from run_agent import AIAgent
from agent.error_classifier import FailoverReason
from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
from gateway.config import Platform
# ---------------------------------------------------------------------------
@ -1629,6 +1630,67 @@ class TestExecuteToolCalls:
assert "API call failed" not in output
assert "Rate limit reached" not in output
def test_emit_status_suppresses_noisy_gateway_fallback_messages(self, agent):
agent.platform = "telegram"
agent.status_callback = MagicMock()
with patch.object(agent, "_vprint") as mock_vprint:
agent._emit_status("⚠️ Rate limited — switching to fallback provider...")
mock_vprint.assert_called_once()
agent.status_callback.assert_not_called()
def test_emit_status_forwards_non_noisy_gateway_messages(self, agent):
agent.platform = "telegram"
agent.status_callback = MagicMock()
with patch.object(agent, "_vprint") as mock_vprint:
agent._emit_status("🗜️ Context reduced to 120,000 tokens (was 240,000), retrying...")
mock_vprint.assert_called_once()
agent.status_callback.assert_called_once_with("lifecycle", "🗜️ Context reduced to 120,000 tokens (was 240,000), retrying...")
def test_emit_status_handles_platform_enum_for_gateway_suppression(self, agent):
agent.platform = Platform.TELEGRAM
agent.status_callback = MagicMock()
with patch.object(agent, "_vprint") as mock_vprint:
agent._emit_status("⚠️ Rate limited — switching to fallback provider...")
mock_vprint.assert_called_once()
agent.status_callback.assert_not_called()
def test_billing_429_does_not_emit_rate_limit_backoff_status(self, agent):
class _Billing429Error(Exception):
status_code = 429
def __str__(self):
return "HTTP 429: insufficient balance (1008)"
agent._cached_system_prompt = "You are helpful."
agent._use_prompt_caching = False
agent.tool_delay = 0
agent.compression_enabled = False
agent.save_trajectories = False
agent.base_url = "https://api.minimax.io/v1/"
status_messages = []
with (
patch.object(agent, "_interruptible_api_call", side_effect=_Billing429Error()),
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
patch.object(agent, "_emit_status", side_effect=status_messages.append),
patch("run_agent.time.sleep", return_value=None),
):
result = agent.run_conversation("hello")
assert result["completed"] is False
assert result["final_response"] is None
assert "insufficient balance (1008)" in result["error"]
assert result["api_calls"] == 1
assert not any("Rate limit reached. Waiting" in msg for msg in status_messages)
class TestConcurrentToolExecution:
"""Tests for _execute_tool_calls_concurrent and dispatch logic."""