mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: stop billing fallback retry spam in gateways
This commit is contained in:
parent
00c3d848d8
commit
79bfad0adc
6 changed files with 195 additions and 7 deletions
|
|
@ -1303,13 +1303,47 @@ def _is_payment_error(exc: Exception) -> bool:
|
|||
status = getattr(exc, "status_code", None)
|
||||
if status == 402:
|
||||
return True
|
||||
err_lower = str(exc).lower()
|
||||
|
||||
parts = [str(exc).lower()]
|
||||
body = getattr(exc, "body", None)
|
||||
if isinstance(body, dict):
|
||||
try:
|
||||
parts.append(json.dumps(body, ensure_ascii=False).lower())
|
||||
except Exception:
|
||||
parts.append(str(body).lower())
|
||||
err_obj = body.get("error", {}) if isinstance(body.get("error"), dict) else {}
|
||||
body_msg = (err_obj.get("message") or body.get("message") or "").lower()
|
||||
if body_msg:
|
||||
parts.append(body_msg)
|
||||
error_code = (err_obj.get("code") or err_obj.get("type") or body.get("code") or body.get("type") or "")
|
||||
if error_code:
|
||||
parts.append(str(error_code).lower())
|
||||
metadata = err_obj.get("metadata", {}) if isinstance(err_obj, dict) else {}
|
||||
raw_json = metadata.get("raw") if isinstance(metadata, dict) else None
|
||||
if isinstance(raw_json, str) and raw_json.strip():
|
||||
try:
|
||||
inner = json.loads(raw_json)
|
||||
if isinstance(inner, dict):
|
||||
inner_err = inner.get("error", {}) if isinstance(inner.get("error"), dict) else {}
|
||||
inner_msg = (inner_err.get("message") or inner.get("message") or "").lower()
|
||||
if inner_msg:
|
||||
parts.append(inner_msg)
|
||||
except Exception:
|
||||
parts.append(raw_json.lower())
|
||||
elif body is not None:
|
||||
parts.append(str(body).lower())
|
||||
|
||||
err_lower = " ".join(p for p in parts if p)
|
||||
# OpenRouter and other providers include "credits" or "afford" in 402 bodies,
|
||||
# but sometimes wrap them in 429 or other codes.
|
||||
if status in (402, 429, None):
|
||||
if any(kw in err_lower for kw in ("credits", "insufficient funds",
|
||||
"can only afford", "billing",
|
||||
"payment required")):
|
||||
if any(kw in err_lower for kw in (
|
||||
"credits", "insufficient funds", "insufficient balance",
|
||||
"insufficient_balance", "insufficient_quota",
|
||||
"can only afford", "billing",
|
||||
"payment required", "payment_required",
|
||||
"top up your credits",
|
||||
)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
|
|
|||
|
|
@ -89,10 +89,13 @@ class ClassifiedError:
|
|||
# Patterns that indicate billing exhaustion (not transient rate limit)
|
||||
_BILLING_PATTERNS = [
|
||||
"insufficient credits",
|
||||
"insufficient balance",
|
||||
"insufficient_balance",
|
||||
"insufficient_quota",
|
||||
"credit balance",
|
||||
"credits have been exhausted",
|
||||
"top up your credits",
|
||||
"can only afford",
|
||||
"payment required",
|
||||
"billing hard limit",
|
||||
"exceeded your current quota",
|
||||
|
|
@ -589,6 +592,20 @@ def _classify_by_status(
|
|||
)
|
||||
|
||||
if status_code == 429:
|
||||
# Some providers surface billing exhaustion as 429 instead of 402 and may
|
||||
# only expose the billing signal through a structured error code.
|
||||
if error_code:
|
||||
classified = _classify_by_error_code(error_code, error_msg, result_fn)
|
||||
if classified is not None:
|
||||
return classified
|
||||
# Others embed the billing signal only in free-text messages.
|
||||
if any(p in error_msg for p in _BILLING_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
# Already checked long_context_tier above; this is a normal rate limit
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
|
|
|
|||
46
run_agent.py
46
run_agent.py
|
|
@ -2292,6 +2292,36 @@ class AIAgent:
|
|||
and getattr(self, "platform", "") == "cli"
|
||||
)
|
||||
|
||||
def _should_suppress_gateway_lifecycle_status(self, message: str) -> bool:
|
||||
"""Return True when a lifecycle status is too noisy for chat gateways.
|
||||
|
||||
CLI users still see every lifecycle event through ``_vprint``. Messaging
|
||||
platforms, however, should not be flooded with internal retry/fallback
|
||||
chatter when a final assistant response will summarize the failure.
|
||||
"""
|
||||
raw_platform = getattr(self, "platform", "") or ""
|
||||
platform = getattr(raw_platform, "value", raw_platform)
|
||||
platform = str(platform).strip().lower()
|
||||
if not platform or platform == "cli":
|
||||
return False
|
||||
text = (message or "").strip()
|
||||
if not text:
|
||||
return False
|
||||
|
||||
noisy_prefixes = (
|
||||
"⚠️ Rate limited — switching to fallback provider...",
|
||||
"💸 Provider credits/balance exhausted — switching to fallback provider...",
|
||||
"⚠️ Empty/malformed response — switching to fallback...",
|
||||
"🔄 Primary model failed — switching to fallback:",
|
||||
"⏱️ Rate limit reached. Waiting",
|
||||
"❌ Rate limited after ",
|
||||
)
|
||||
if any(text.startswith(prefix) for prefix in noisy_prefixes):
|
||||
return True
|
||||
if text.startswith("⚠️ Max retries (") and "trying fallback" in text.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
def _emit_status(self, message: str) -> None:
|
||||
"""Emit a lifecycle status message to both CLI and gateway channels.
|
||||
|
||||
|
|
@ -2307,6 +2337,8 @@ class AIAgent:
|
|||
except Exception:
|
||||
pass
|
||||
if self.status_callback:
|
||||
if self._should_suppress_gateway_lifecycle_status(message):
|
||||
return
|
||||
try:
|
||||
self.status_callback("lifecycle", message)
|
||||
except Exception:
|
||||
|
|
@ -11081,7 +11113,10 @@ class AIAgent:
|
|||
self._credential_pool
|
||||
)
|
||||
if not pool_may_recover:
|
||||
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
||||
if classified.reason == FailoverReason.billing:
|
||||
self._emit_status("💸 Provider credits/balance exhausted — switching to fallback provider...")
|
||||
else:
|
||||
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
||||
if self._try_activate_fallback(reason=classified.reason):
|
||||
retry_count = 0
|
||||
compression_attempts = 0
|
||||
|
|
@ -11363,7 +11398,6 @@ class AIAgent:
|
|||
and not classified.should_compress
|
||||
and classified.reason not in (
|
||||
FailoverReason.rate_limit,
|
||||
FailoverReason.billing,
|
||||
FailoverReason.overloaded,
|
||||
FailoverReason.context_overflow,
|
||||
FailoverReason.payload_too_large,
|
||||
|
|
@ -11394,7 +11428,7 @@ class AIAgent:
|
|||
self._vprint(f"{self.log_prefix} 🔌 Provider: {_provider} Model: {_model}", force=True)
|
||||
self._vprint(f"{self.log_prefix} 🌐 Endpoint: {_base}", force=True)
|
||||
# Actionable guidance for common auth errors
|
||||
if classified.is_auth or classified.reason == FailoverReason.billing:
|
||||
if classified.is_auth:
|
||||
if _provider == "openai-codex" and status_code == 401:
|
||||
self._vprint(f"{self.log_prefix} 💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
|
||||
self._vprint(f"{self.log_prefix} refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
|
||||
|
|
@ -11406,6 +11440,12 @@ class AIAgent:
|
|||
self._vprint(f"{self.log_prefix} • Does your account have access to {_model}?", force=True)
|
||||
if base_url_host_matches(str(_base), "openrouter.ai"):
|
||||
self._vprint(f"{self.log_prefix} • Check credits: https://openrouter.ai/settings/credits", force=True)
|
||||
elif classified.reason == FailoverReason.billing:
|
||||
self._vprint(f"{self.log_prefix} 💡 Provider balance/credits appear exhausted for this request.", force=True)
|
||||
if "openrouter" in str(_base).lower():
|
||||
self._vprint(f"{self.log_prefix} • Top up credits: https://openrouter.ai/settings/credits", force=True)
|
||||
elif _provider == "minimax":
|
||||
self._vprint(f"{self.log_prefix} • Check MiniMax account balance / billing before retrying.", force=True)
|
||||
else:
|
||||
self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.", force=True)
|
||||
logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
|
||||
|
|
|
|||
|
|
@ -664,6 +664,23 @@ class TestIsPaymentError:
|
|||
exc.status_code = 429
|
||||
assert _is_payment_error(exc) is True
|
||||
|
||||
def test_429_with_insufficient_balance_message(self):
|
||||
exc = Exception("HTTP 429: insufficient balance (1008)")
|
||||
exc.status_code = 429
|
||||
assert _is_payment_error(exc) is True
|
||||
|
||||
def test_429_with_billing_message_in_structured_body(self):
|
||||
exc = Exception("provider error")
|
||||
exc.status_code = 429
|
||||
exc.body = {"error": {"message": "insufficient balance (1008)"}}
|
||||
assert _is_payment_error(exc) is True
|
||||
|
||||
def test_429_with_billing_error_code_in_structured_body(self):
|
||||
exc = Exception("provider error")
|
||||
exc.status_code = 429
|
||||
exc.body = {"error": {"code": "payment_required", "message": "provider error"}}
|
||||
assert _is_payment_error(exc) is True
|
||||
|
||||
def test_429_without_credits_message_is_not_payment(self):
|
||||
"""Normal rate limits should NOT be treated as payment errors."""
|
||||
exc = Exception("Rate limit exceeded, try again in 2 seconds")
|
||||
|
|
|
|||
|
|
@ -250,6 +250,24 @@ class TestClassifyApiError:
|
|||
assert result.reason == FailoverReason.rate_limit
|
||||
assert result.should_fallback is True
|
||||
|
||||
def test_429_insufficient_balance_classified_as_billing(self):
|
||||
e = MockAPIError("HTTP 429: insufficient balance (1008)", status_code=429)
|
||||
result = classify_api_error(e, provider="minimax")
|
||||
assert result.reason == FailoverReason.billing
|
||||
assert result.retryable is False
|
||||
assert result.should_rotate_credential is True
|
||||
assert result.should_fallback is True
|
||||
|
||||
def test_429_payment_required_error_code_classified_as_billing(self):
|
||||
e = MockAPIError(
|
||||
"provider error",
|
||||
status_code=429,
|
||||
body={"error": {"code": "payment_required", "message": "provider error"}},
|
||||
)
|
||||
result = classify_api_error(e, provider="openrouter")
|
||||
assert result.reason == FailoverReason.billing
|
||||
assert result.retryable is False
|
||||
|
||||
def test_alibaba_rate_increased_too_quickly(self):
|
||||
"""Alibaba/DashScope returns a unique throttling message.
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import run_agent
|
|||
from run_agent import AIAgent
|
||||
from agent.error_classifier import FailoverReason
|
||||
from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
|
||||
from gateway.config import Platform
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -1629,6 +1630,67 @@ class TestExecuteToolCalls:
|
|||
assert "API call failed" not in output
|
||||
assert "Rate limit reached" not in output
|
||||
|
||||
def test_emit_status_suppresses_noisy_gateway_fallback_messages(self, agent):
|
||||
agent.platform = "telegram"
|
||||
agent.status_callback = MagicMock()
|
||||
|
||||
with patch.object(agent, "_vprint") as mock_vprint:
|
||||
agent._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
||||
|
||||
mock_vprint.assert_called_once()
|
||||
agent.status_callback.assert_not_called()
|
||||
|
||||
def test_emit_status_forwards_non_noisy_gateway_messages(self, agent):
|
||||
agent.platform = "telegram"
|
||||
agent.status_callback = MagicMock()
|
||||
|
||||
with patch.object(agent, "_vprint") as mock_vprint:
|
||||
agent._emit_status("🗜️ Context reduced to 120,000 tokens (was 240,000), retrying...")
|
||||
|
||||
mock_vprint.assert_called_once()
|
||||
agent.status_callback.assert_called_once_with("lifecycle", "🗜️ Context reduced to 120,000 tokens (was 240,000), retrying...")
|
||||
|
||||
def test_emit_status_handles_platform_enum_for_gateway_suppression(self, agent):
|
||||
agent.platform = Platform.TELEGRAM
|
||||
agent.status_callback = MagicMock()
|
||||
|
||||
with patch.object(agent, "_vprint") as mock_vprint:
|
||||
agent._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
||||
|
||||
mock_vprint.assert_called_once()
|
||||
agent.status_callback.assert_not_called()
|
||||
|
||||
def test_billing_429_does_not_emit_rate_limit_backoff_status(self, agent):
|
||||
class _Billing429Error(Exception):
|
||||
status_code = 429
|
||||
|
||||
def __str__(self):
|
||||
return "HTTP 429: insufficient balance (1008)"
|
||||
|
||||
agent._cached_system_prompt = "You are helpful."
|
||||
agent._use_prompt_caching = False
|
||||
agent.tool_delay = 0
|
||||
agent.compression_enabled = False
|
||||
agent.save_trajectories = False
|
||||
agent.base_url = "https://api.minimax.io/v1/"
|
||||
status_messages = []
|
||||
|
||||
with (
|
||||
patch.object(agent, "_interruptible_api_call", side_effect=_Billing429Error()),
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
patch.object(agent, "_emit_status", side_effect=status_messages.append),
|
||||
patch("run_agent.time.sleep", return_value=None),
|
||||
):
|
||||
result = agent.run_conversation("hello")
|
||||
|
||||
assert result["completed"] is False
|
||||
assert result["final_response"] is None
|
||||
assert "insufficient balance (1008)" in result["error"]
|
||||
assert result["api_calls"] == 1
|
||||
assert not any("Rate limit reached. Waiting" in msg for msg in status_messages)
|
||||
|
||||
|
||||
class TestConcurrentToolExecution:
|
||||
"""Tests for _execute_tool_calls_concurrent and dispatch logic."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue