mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(auxiliary): cache 402'd providers as unhealthy with TTL to stop per-call retry storms (#23597)
When an auxiliary provider returns HTTP 402 (credit / payment), every subsequent compression / title-gen / session-search / vision call still re-tried it as the FIRST entry in the chain — burning ~1 RTT to hit 402 again, then falling back. On a long Discord/LCM session that meant dozens of doomed 402s per minute (issue #23570). Add a per-process unhealthy-provider cache with a 10 min TTL. When any caller observes a payment error against a provider, the label is marked unhealthy and skipped by: * _resolve_auto Step-1 (main provider use-as-aux path) * _resolve_auto Step-2 (aggregator/fallback chain) * _try_payment_fallback (used by call_llm/acall_llm on first 402) Skip-logs are throttled to once per minute per label so a bursty session doesn't spam agent.log. Entries auto-expire so a topped-up account recovers without manual intervention. The cache is in-process only by design — multi-profile users with different keys per profile must each hit the 402 once. Refs #23570
This commit is contained in:
parent
ace1c4ea8c
commit
228b7d27bd
2 changed files with 307 additions and 11 deletions
|
|
@ -1830,6 +1830,113 @@ def _get_provider_chain() -> List[tuple]:
|
|||
]
|
||||
|
||||
|
||||
# ── Auxiliary "recently 402'd" unhealthy-provider cache ────────────────────
|
||||
#
|
||||
# When an auxiliary provider returns HTTP 402 (Payment Required / credit
|
||||
# exhaustion), retrying it on every subsequent aux call is wasteful — the
|
||||
# provider stays depleted for hours or days, but the chain re-tries it as
|
||||
# the FIRST entry on every compression/title-gen/session-search call,
|
||||
# burns ~1 RTT, gets 402 again, then falls back. On a long Discord/LCM
|
||||
# session that adds up to dozens of doomed 402s.
|
||||
#
|
||||
# Solution: when ANY caller observes a payment error against a provider,
|
||||
# mark it unhealthy for ``_AUX_UNHEALTHY_TTL_SECONDS``. ``_resolve_auto``
|
||||
# Step-2 and ``_try_payment_fallback`` both consult this cache and skip
|
||||
# unhealthy entries (logging once per skip-reason so the user sees what
|
||||
# happened). Entries auto-expire so a topped-up account recovers without
|
||||
# manual intervention.
|
||||
#
|
||||
# Failure isolation: the cache is in-process only. A second hermes
|
||||
# process won't inherit the unhealthy mark — that's intentional, since
|
||||
# the user might be running two profiles with different OpenRouter keys.
|
||||
|
||||
_AUX_UNHEALTHY_TTL_SECONDS = 600 # 10 minutes
|
||||
_aux_unhealthy_until: Dict[str, float] = {}
|
||||
_aux_unhealthy_logged_at: Dict[str, float] = {}
|
||||
|
||||
# Map provider names that show up in resolved_provider / explicit-config
|
||||
# back to the chain labels used by _get_provider_chain(). Keep in sync
|
||||
# with the alias map in _try_payment_fallback below.
|
||||
_AUX_UNHEALTHY_LABEL_ALIASES = {
|
||||
"openrouter": "openrouter",
|
||||
"nous": "nous",
|
||||
"custom": "local/custom",
|
||||
"local/custom": "local/custom",
|
||||
"openai-codex": "openai-codex",
|
||||
"codex": "openai-codex",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_chain_label(provider: str) -> str:
|
||||
"""Normalize a resolved_provider value to a chain label used by
|
||||
``_get_provider_chain()``. Falls back to the lowercased input for
|
||||
direct API-key providers (deepseek, alibaba, minimax, etc.) which
|
||||
each report their own provider name from the api-key chain.
|
||||
"""
|
||||
if not provider:
|
||||
return ""
|
||||
p = str(provider).strip().lower()
|
||||
return _AUX_UNHEALTHY_LABEL_ALIASES.get(p, p)
|
||||
|
||||
|
||||
def _mark_provider_unhealthy(provider: str, ttl: Optional[float] = None) -> None:
|
||||
"""Mark ``provider`` as recently-402'd, hidden from chain iteration
|
||||
until the TTL expires. Called from the payment-fallback branches in
|
||||
``call_llm`` and ``acall_llm`` after a confirmed payment error.
|
||||
"""
|
||||
label = _normalize_chain_label(provider)
|
||||
if not label:
|
||||
return
|
||||
expires_at = time.time() + (ttl if ttl is not None else _AUX_UNHEALTHY_TTL_SECONDS)
|
||||
_aux_unhealthy_until[label] = expires_at
|
||||
logger.warning(
|
||||
"Auxiliary: marking %s unhealthy for %ds (payment / credit error). "
|
||||
"Subsequent auxiliary calls will skip it until %s.",
|
||||
label,
|
||||
int(ttl if ttl is not None else _AUX_UNHEALTHY_TTL_SECONDS),
|
||||
time.strftime("%H:%M:%S", time.localtime(expires_at)),
|
||||
)
|
||||
|
||||
|
||||
def _is_provider_unhealthy(label: str) -> bool:
|
||||
"""True iff ``label`` is in the unhealthy cache and the TTL hasn't expired.
|
||||
Lazily evicts expired entries so the cache stays small.
|
||||
"""
|
||||
if not label:
|
||||
return False
|
||||
expires_at = _aux_unhealthy_until.get(label)
|
||||
if expires_at is None:
|
||||
return False
|
||||
if time.time() >= expires_at:
|
||||
_aux_unhealthy_until.pop(label, None)
|
||||
_aux_unhealthy_logged_at.pop(label, None)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _log_skip_unhealthy(label: str, task: Optional[str] = None) -> None:
|
||||
"""Emit a single info-level log per minute when we skip an unhealthy
|
||||
provider. Avoids spamming the log on bursty sessions while still
|
||||
giving the user a trail.
|
||||
"""
|
||||
now = time.time()
|
||||
last = _aux_unhealthy_logged_at.get(label, 0.0)
|
||||
if now - last >= 60:
|
||||
_aux_unhealthy_logged_at[label] = now
|
||||
expires_at = _aux_unhealthy_until.get(label, now)
|
||||
logger.info(
|
||||
"Auxiliary %s: skipping %s (recently returned payment error, retry in %ds)",
|
||||
task or "call", label, max(0, int(expires_at - now)),
|
||||
)
|
||||
|
||||
|
||||
def _reset_aux_unhealthy_cache() -> None:
|
||||
"""Clear the unhealthy cache. Used by tests and by a future explicit
|
||||
user trigger (e.g. ``hermes config aux reset``)."""
|
||||
_aux_unhealthy_until.clear()
|
||||
_aux_unhealthy_logged_at.clear()
|
||||
|
||||
|
||||
def _is_payment_error(exc: Exception) -> bool:
|
||||
"""Detect payment/credit/quota exhaustion errors.
|
||||
|
||||
|
|
@ -2302,6 +2409,10 @@ def _try_payment_fallback(
|
|||
for label, try_fn in _get_provider_chain():
|
||||
if label in skip_chain_labels:
|
||||
continue
|
||||
if _is_provider_unhealthy(label):
|
||||
_log_skip_unhealthy(label, task)
|
||||
tried.append(f"{label} (unhealthy)")
|
||||
continue
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
logger.info(
|
||||
|
|
@ -2378,21 +2489,34 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
|
|||
resolved_provider = "custom"
|
||||
explicit_base_url = runtime_base_url
|
||||
explicit_api_key = runtime_api_key or None
|
||||
client, resolved = resolve_provider_client(
|
||||
resolved_provider,
|
||||
main_model,
|
||||
explicit_base_url=explicit_base_url,
|
||||
explicit_api_key=explicit_api_key,
|
||||
api_mode=runtime_api_mode or None,
|
||||
)
|
||||
if client is not None:
|
||||
logger.info("Auxiliary auto-detect: using main provider %s (%s)",
|
||||
main_provider, resolved or main_model)
|
||||
return client, resolved or main_model
|
||||
# Skip Step-1 if the main provider was recently 402'd. The unhealthy
|
||||
# cache TTL bounds how long we bypass it, so a topped-up account
|
||||
# recovers automatically. If we tried Step-1 anyway, every aux call
|
||||
# on a depleted main provider would pay one doomed 402 RTT before
|
||||
# falling to Step-2.
|
||||
main_chain_label = _normalize_chain_label(resolved_provider)
|
||||
if main_chain_label and _is_provider_unhealthy(main_chain_label):
|
||||
_log_skip_unhealthy(main_chain_label)
|
||||
else:
|
||||
client, resolved = resolve_provider_client(
|
||||
resolved_provider,
|
||||
main_model,
|
||||
explicit_base_url=explicit_base_url,
|
||||
explicit_api_key=explicit_api_key,
|
||||
api_mode=runtime_api_mode or None,
|
||||
)
|
||||
if client is not None:
|
||||
logger.info("Auxiliary auto-detect: using main provider %s (%s)",
|
||||
main_provider, resolved or main_model)
|
||||
return client, resolved or main_model
|
||||
|
||||
# ── Step 2: aggregator / fallback chain ──────────────────────────────
|
||||
tried = []
|
||||
for label, try_fn in _get_provider_chain():
|
||||
if _is_provider_unhealthy(label):
|
||||
_log_skip_unhealthy(label)
|
||||
tried.append(f"{label} (unhealthy)")
|
||||
continue
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
if tried:
|
||||
|
|
@ -4224,6 +4348,13 @@ def call_llm(
|
|||
if should_fallback and is_auto:
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
# Resolve the actual provider label (resolved_provider may be
|
||||
# "auto"; the client's base_url tells us which backend got the
|
||||
# 402). Mark THAT label unhealthy so subsequent aux calls
|
||||
# skip it instead of paying another doomed RTT.
|
||||
_mark_provider_unhealthy(
|
||||
_recoverable_pool_provider(resolved_provider, client) or resolved_provider
|
||||
)
|
||||
elif _is_rate_limit_error(first_err):
|
||||
reason = "rate limit"
|
||||
else:
|
||||
|
|
@ -4546,6 +4677,9 @@ async def async_call_llm(
|
|||
if should_fallback and is_auto:
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
_mark_provider_unhealthy(
|
||||
_recoverable_pool_provider(resolved_provider, client) or resolved_provider
|
||||
)
|
||||
elif _is_rate_limit_error(first_err):
|
||||
reason = "rate limit"
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue