diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 5e8a60e76..8faa21785 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1303,13 +1303,47 @@ def _is_payment_error(exc: Exception) -> bool: status = getattr(exc, "status_code", None) if status == 402: return True - err_lower = str(exc).lower() + + parts = [str(exc).lower()] + body = getattr(exc, "body", None) + if isinstance(body, dict): + try: + parts.append(json.dumps(body, ensure_ascii=False).lower()) + except Exception: + parts.append(str(body).lower()) + err_obj = body.get("error", {}) if isinstance(body.get("error"), dict) else {} + body_msg = (err_obj.get("message") or body.get("message") or "").lower() + if body_msg: + parts.append(body_msg) + error_code = (err_obj.get("code") or err_obj.get("type") or body.get("code") or body.get("type") or "") + if error_code: + parts.append(str(error_code).lower()) + metadata = err_obj.get("metadata", {}) if isinstance(err_obj, dict) else {} + raw_json = metadata.get("raw") if isinstance(metadata, dict) else None + if isinstance(raw_json, str) and raw_json.strip(): + try: + inner = json.loads(raw_json) + if isinstance(inner, dict): + inner_err = inner.get("error", {}) if isinstance(inner.get("error"), dict) else {} + inner_msg = (inner_err.get("message") or inner.get("message") or "").lower() + if inner_msg: + parts.append(inner_msg) + except Exception: + parts.append(raw_json.lower()) + elif body is not None: + parts.append(str(body).lower()) + + err_lower = " ".join(p for p in parts if p) # OpenRouter and other providers include "credits" or "afford" in 402 bodies, # but sometimes wrap them in 429 or other codes. if status in (402, 429, None): - if any(kw in err_lower for kw in ("credits", "insufficient funds", - "can only afford", "billing", - "payment required")): + if any(kw in err_lower for kw in ( + "credits", "insufficient funds", "insufficient balance", + "insufficient_balance", "insufficient_quota", + "can only afford", "billing", + "payment required", "payment_required", + "top up your credits", + )): return True return False diff --git a/agent/error_classifier.py b/agent/error_classifier.py index 87324d676..285b2c532 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -89,10 +89,13 @@ class ClassifiedError: # Patterns that indicate billing exhaustion (not transient rate limit) _BILLING_PATTERNS = [ "insufficient credits", + "insufficient balance", + "insufficient_balance", "insufficient_quota", "credit balance", "credits have been exhausted", "top up your credits", + "can only afford", "payment required", "billing hard limit", "exceeded your current quota", @@ -589,6 +592,20 @@ def _classify_by_status( ) if status_code == 429: + # Some providers surface billing exhaustion as 429 instead of 402 and may + # only expose the billing signal through a structured error code. + if error_code: + classified = _classify_by_error_code(error_code, error_msg, result_fn) + if classified is not None: + return classified + # Others embed the billing signal only in free-text messages. + if any(p in error_msg for p in _BILLING_PATTERNS): + return result_fn( + FailoverReason.billing, + retryable=False, + should_rotate_credential=True, + should_fallback=True, + ) # Already checked long_context_tier above; this is a normal rate limit return result_fn( FailoverReason.rate_limit, diff --git a/run_agent.py b/run_agent.py index 6770f568c..ecc1e9da3 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2292,6 +2292,36 @@ class AIAgent: and getattr(self, "platform", "") == "cli" ) + def _should_suppress_gateway_lifecycle_status(self, message: str) -> bool: + """Return True when a lifecycle status is too noisy for chat gateways. + + CLI users still see every lifecycle event through ``_vprint``. Messaging + platforms, however, should not be flooded with internal retry/fallback + chatter when a final assistant response will summarize the failure. + """ + raw_platform = getattr(self, "platform", "") or "" + platform = getattr(raw_platform, "value", raw_platform) + platform = str(platform).strip().lower() + if not platform or platform == "cli": + return False + text = (message or "").strip() + if not text: + return False + + noisy_prefixes = ( + "⚠️ Rate limited — switching to fallback provider...", + "💸 Provider credits/balance exhausted — switching to fallback provider...", + "⚠️ Empty/malformed response — switching to fallback...", + "🔄 Primary model failed — switching to fallback:", + "⏱️ Rate limit reached. Waiting", + "❌ Rate limited after ", + ) + if any(text.startswith(prefix) for prefix in noisy_prefixes): + return True + if text.startswith("⚠️ Max retries (") and "trying fallback" in text.lower(): + return True + return False + def _emit_status(self, message: str) -> None: """Emit a lifecycle status message to both CLI and gateway channels. @@ -2307,6 +2337,8 @@ class AIAgent: except Exception: pass if self.status_callback: + if self._should_suppress_gateway_lifecycle_status(message): + return try: self.status_callback("lifecycle", message) except Exception: @@ -11081,7 +11113,10 @@ class AIAgent: self._credential_pool ) if not pool_may_recover: - self._emit_status("⚠️ Rate limited — switching to fallback provider...") + if classified.reason == FailoverReason.billing: + self._emit_status("💸 Provider credits/balance exhausted — switching to fallback provider...") + else: + self._emit_status("⚠️ Rate limited — switching to fallback provider...") if self._try_activate_fallback(reason=classified.reason): retry_count = 0 compression_attempts = 0 @@ -11363,7 +11398,6 @@ class AIAgent: and not classified.should_compress and classified.reason not in ( FailoverReason.rate_limit, - FailoverReason.billing, FailoverReason.overloaded, FailoverReason.context_overflow, FailoverReason.payload_too_large, @@ -11394,7 +11428,7 @@ class AIAgent: self._vprint(f"{self.log_prefix} 🔌 Provider: {_provider} Model: {_model}", force=True) self._vprint(f"{self.log_prefix} 🌐 Endpoint: {_base}", force=True) # Actionable guidance for common auth errors - if classified.is_auth or classified.reason == FailoverReason.billing: + if classified.is_auth: if _provider == "openai-codex" and status_code == 401: self._vprint(f"{self.log_prefix} 💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True) self._vprint(f"{self.log_prefix} refreshed by another client (Codex CLI, VS Code). To fix:", force=True) @@ -11406,6 +11440,12 @@ class AIAgent: self._vprint(f"{self.log_prefix} • Does your account have access to {_model}?", force=True) if base_url_host_matches(str(_base), "openrouter.ai"): self._vprint(f"{self.log_prefix} • Check credits: https://openrouter.ai/settings/credits", force=True) + elif classified.reason == FailoverReason.billing: + self._vprint(f"{self.log_prefix} 💡 Provider balance/credits appear exhausted for this request.", force=True) + if "openrouter" in str(_base).lower(): + self._vprint(f"{self.log_prefix} • Top up credits: https://openrouter.ai/settings/credits", force=True) + elif _provider == "minimax": + self._vprint(f"{self.log_prefix} • Check MiniMax account balance / billing before retrying.", force=True) else: self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.", force=True) logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}") diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 5ee0f1265..26b0f419d 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -664,6 +664,23 @@ class TestIsPaymentError: exc.status_code = 429 assert _is_payment_error(exc) is True + def test_429_with_insufficient_balance_message(self): + exc = Exception("HTTP 429: insufficient balance (1008)") + exc.status_code = 429 + assert _is_payment_error(exc) is True + + def test_429_with_billing_message_in_structured_body(self): + exc = Exception("provider error") + exc.status_code = 429 + exc.body = {"error": {"message": "insufficient balance (1008)"}} + assert _is_payment_error(exc) is True + + def test_429_with_billing_error_code_in_structured_body(self): + exc = Exception("provider error") + exc.status_code = 429 + exc.body = {"error": {"code": "payment_required", "message": "provider error"}} + assert _is_payment_error(exc) is True + def test_429_without_credits_message_is_not_payment(self): """Normal rate limits should NOT be treated as payment errors.""" exc = Exception("Rate limit exceeded, try again in 2 seconds") diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index e8a92774b..869944cbe 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -250,6 +250,24 @@ class TestClassifyApiError: assert result.reason == FailoverReason.rate_limit assert result.should_fallback is True + def test_429_insufficient_balance_classified_as_billing(self): + e = MockAPIError("HTTP 429: insufficient balance (1008)", status_code=429) + result = classify_api_error(e, provider="minimax") + assert result.reason == FailoverReason.billing + assert result.retryable is False + assert result.should_rotate_credential is True + assert result.should_fallback is True + + def test_429_payment_required_error_code_classified_as_billing(self): + e = MockAPIError( + "provider error", + status_code=429, + body={"error": {"code": "payment_required", "message": "provider error"}}, + ) + result = classify_api_error(e, provider="openrouter") + assert result.reason == FailoverReason.billing + assert result.retryable is False + def test_alibaba_rate_increased_too_quickly(self): """Alibaba/DashScope returns a unique throttling message. diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 9c54daffe..d43ad88f9 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -22,6 +22,7 @@ import run_agent from run_agent import AIAgent from agent.error_classifier import FailoverReason from agent.prompt_builder import DEFAULT_AGENT_IDENTITY +from gateway.config import Platform # --------------------------------------------------------------------------- @@ -1629,6 +1630,67 @@ class TestExecuteToolCalls: assert "API call failed" not in output assert "Rate limit reached" not in output + def test_emit_status_suppresses_noisy_gateway_fallback_messages(self, agent): + agent.platform = "telegram" + agent.status_callback = MagicMock() + + with patch.object(agent, "_vprint") as mock_vprint: + agent._emit_status("⚠️ Rate limited — switching to fallback provider...") + + mock_vprint.assert_called_once() + agent.status_callback.assert_not_called() + + def test_emit_status_forwards_non_noisy_gateway_messages(self, agent): + agent.platform = "telegram" + agent.status_callback = MagicMock() + + with patch.object(agent, "_vprint") as mock_vprint: + agent._emit_status("🗜️ Context reduced to 120,000 tokens (was 240,000), retrying...") + + mock_vprint.assert_called_once() + agent.status_callback.assert_called_once_with("lifecycle", "🗜️ Context reduced to 120,000 tokens (was 240,000), retrying...") + + def test_emit_status_handles_platform_enum_for_gateway_suppression(self, agent): + agent.platform = Platform.TELEGRAM + agent.status_callback = MagicMock() + + with patch.object(agent, "_vprint") as mock_vprint: + agent._emit_status("⚠️ Rate limited — switching to fallback provider...") + + mock_vprint.assert_called_once() + agent.status_callback.assert_not_called() + + def test_billing_429_does_not_emit_rate_limit_backoff_status(self, agent): + class _Billing429Error(Exception): + status_code = 429 + + def __str__(self): + return "HTTP 429: insufficient balance (1008)" + + agent._cached_system_prompt = "You are helpful." + agent._use_prompt_caching = False + agent.tool_delay = 0 + agent.compression_enabled = False + agent.save_trajectories = False + agent.base_url = "https://api.minimax.io/v1/" + status_messages = [] + + with ( + patch.object(agent, "_interruptible_api_call", side_effect=_Billing429Error()), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + patch.object(agent, "_emit_status", side_effect=status_messages.append), + patch("run_agent.time.sleep", return_value=None), + ): + result = agent.run_conversation("hello") + + assert result["completed"] is False + assert result["final_response"] is None + assert "insufficient balance (1008)" in result["error"] + assert result["api_calls"] == 1 + assert not any("Rate limit reached. Waiting" in msg for msg in status_messages) + class TestConcurrentToolExecution: """Tests for _execute_tool_calls_concurrent and dispatch logic."""