mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-19 04:52:06 +00:00
fix(auxiliary): cache 402'd providers as unhealthy with TTL to stop per-call retry storms (#23597)
When an auxiliary provider returns HTTP 402 (credit / payment), every subsequent compression / title-gen / session-search / vision call still re-tried it as the FIRST entry in the chain — burning ~1 RTT to hit 402 again, then falling back. On a long Discord/LCM session that meant dozens of doomed 402s per minute (issue #23570). Add a per-process unhealthy-provider cache with a 10 min TTL. When any caller observes a payment error against a provider, the label is marked unhealthy and skipped by: * _resolve_auto Step-1 (main provider use-as-aux path) * _resolve_auto Step-2 (aggregator/fallback chain) * _try_payment_fallback (used by call_llm/acall_llm on first 402) Skip-logs are throttled to once per minute per label so a bursty session doesn't spam agent.log. Entries auto-expire so a topped-up account recovers without manual intervention. The cache is in-process only by design — multi-profile users with different keys per profile must each hit the 402 once. Refs #23570
This commit is contained in:
parent
ace1c4ea8c
commit
228b7d27bd
2 changed files with 307 additions and 11 deletions
|
|
@ -1830,6 +1830,113 @@ def _get_provider_chain() -> List[tuple]:
|
|||
]
|
||||
|
||||
|
||||
# ── Auxiliary "recently 402'd" unhealthy-provider cache ────────────────────
|
||||
#
|
||||
# When an auxiliary provider returns HTTP 402 (Payment Required / credit
|
||||
# exhaustion), retrying it on every subsequent aux call is wasteful — the
|
||||
# provider stays depleted for hours or days, but the chain re-tries it as
|
||||
# the FIRST entry on every compression/title-gen/session-search call,
|
||||
# burns ~1 RTT, gets 402 again, then falls back. On a long Discord/LCM
|
||||
# session that adds up to dozens of doomed 402s.
|
||||
#
|
||||
# Solution: when ANY caller observes a payment error against a provider,
|
||||
# mark it unhealthy for ``_AUX_UNHEALTHY_TTL_SECONDS``. ``_resolve_auto``
|
||||
# Step-2 and ``_try_payment_fallback`` both consult this cache and skip
|
||||
# unhealthy entries (logging once per skip-reason so the user sees what
|
||||
# happened). Entries auto-expire so a topped-up account recovers without
|
||||
# manual intervention.
|
||||
#
|
||||
# Failure isolation: the cache is in-process only. A second hermes
|
||||
# process won't inherit the unhealthy mark — that's intentional, since
|
||||
# the user might be running two profiles with different OpenRouter keys.
|
||||
|
||||
_AUX_UNHEALTHY_TTL_SECONDS = 600 # 10 minutes
|
||||
_aux_unhealthy_until: Dict[str, float] = {}
|
||||
_aux_unhealthy_logged_at: Dict[str, float] = {}
|
||||
|
||||
# Map provider names that show up in resolved_provider / explicit-config
|
||||
# back to the chain labels used by _get_provider_chain(). Keep in sync
|
||||
# with the alias map in _try_payment_fallback below.
|
||||
_AUX_UNHEALTHY_LABEL_ALIASES = {
|
||||
"openrouter": "openrouter",
|
||||
"nous": "nous",
|
||||
"custom": "local/custom",
|
||||
"local/custom": "local/custom",
|
||||
"openai-codex": "openai-codex",
|
||||
"codex": "openai-codex",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_chain_label(provider: str) -> str:
|
||||
"""Normalize a resolved_provider value to a chain label used by
|
||||
``_get_provider_chain()``. Falls back to the lowercased input for
|
||||
direct API-key providers (deepseek, alibaba, minimax, etc.) which
|
||||
each report their own provider name from the api-key chain.
|
||||
"""
|
||||
if not provider:
|
||||
return ""
|
||||
p = str(provider).strip().lower()
|
||||
return _AUX_UNHEALTHY_LABEL_ALIASES.get(p, p)
|
||||
|
||||
|
||||
def _mark_provider_unhealthy(provider: str, ttl: Optional[float] = None) -> None:
|
||||
"""Mark ``provider`` as recently-402'd, hidden from chain iteration
|
||||
until the TTL expires. Called from the payment-fallback branches in
|
||||
``call_llm`` and ``acall_llm`` after a confirmed payment error.
|
||||
"""
|
||||
label = _normalize_chain_label(provider)
|
||||
if not label:
|
||||
return
|
||||
expires_at = time.time() + (ttl if ttl is not None else _AUX_UNHEALTHY_TTL_SECONDS)
|
||||
_aux_unhealthy_until[label] = expires_at
|
||||
logger.warning(
|
||||
"Auxiliary: marking %s unhealthy for %ds (payment / credit error). "
|
||||
"Subsequent auxiliary calls will skip it until %s.",
|
||||
label,
|
||||
int(ttl if ttl is not None else _AUX_UNHEALTHY_TTL_SECONDS),
|
||||
time.strftime("%H:%M:%S", time.localtime(expires_at)),
|
||||
)
|
||||
|
||||
|
||||
def _is_provider_unhealthy(label: str) -> bool:
|
||||
"""True iff ``label`` is in the unhealthy cache and the TTL hasn't expired.
|
||||
Lazily evicts expired entries so the cache stays small.
|
||||
"""
|
||||
if not label:
|
||||
return False
|
||||
expires_at = _aux_unhealthy_until.get(label)
|
||||
if expires_at is None:
|
||||
return False
|
||||
if time.time() >= expires_at:
|
||||
_aux_unhealthy_until.pop(label, None)
|
||||
_aux_unhealthy_logged_at.pop(label, None)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _log_skip_unhealthy(label: str, task: Optional[str] = None) -> None:
|
||||
"""Emit a single info-level log per minute when we skip an unhealthy
|
||||
provider. Avoids spamming the log on bursty sessions while still
|
||||
giving the user a trail.
|
||||
"""
|
||||
now = time.time()
|
||||
last = _aux_unhealthy_logged_at.get(label, 0.0)
|
||||
if now - last >= 60:
|
||||
_aux_unhealthy_logged_at[label] = now
|
||||
expires_at = _aux_unhealthy_until.get(label, now)
|
||||
logger.info(
|
||||
"Auxiliary %s: skipping %s (recently returned payment error, retry in %ds)",
|
||||
task or "call", label, max(0, int(expires_at - now)),
|
||||
)
|
||||
|
||||
|
||||
def _reset_aux_unhealthy_cache() -> None:
|
||||
"""Clear the unhealthy cache. Used by tests and by a future explicit
|
||||
user trigger (e.g. ``hermes config aux reset``)."""
|
||||
_aux_unhealthy_until.clear()
|
||||
_aux_unhealthy_logged_at.clear()
|
||||
|
||||
|
||||
def _is_payment_error(exc: Exception) -> bool:
|
||||
"""Detect payment/credit/quota exhaustion errors.
|
||||
|
||||
|
|
@ -2302,6 +2409,10 @@ def _try_payment_fallback(
|
|||
for label, try_fn in _get_provider_chain():
|
||||
if label in skip_chain_labels:
|
||||
continue
|
||||
if _is_provider_unhealthy(label):
|
||||
_log_skip_unhealthy(label, task)
|
||||
tried.append(f"{label} (unhealthy)")
|
||||
continue
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
logger.info(
|
||||
|
|
@ -2378,21 +2489,34 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
|
|||
resolved_provider = "custom"
|
||||
explicit_base_url = runtime_base_url
|
||||
explicit_api_key = runtime_api_key or None
|
||||
client, resolved = resolve_provider_client(
|
||||
resolved_provider,
|
||||
main_model,
|
||||
explicit_base_url=explicit_base_url,
|
||||
explicit_api_key=explicit_api_key,
|
||||
api_mode=runtime_api_mode or None,
|
||||
)
|
||||
if client is not None:
|
||||
logger.info("Auxiliary auto-detect: using main provider %s (%s)",
|
||||
main_provider, resolved or main_model)
|
||||
return client, resolved or main_model
|
||||
# Skip Step-1 if the main provider was recently 402'd. The unhealthy
|
||||
# cache TTL bounds how long we bypass it, so a topped-up account
|
||||
# recovers automatically. If we tried Step-1 anyway, every aux call
|
||||
# on a depleted main provider would pay one doomed 402 RTT before
|
||||
# falling to Step-2.
|
||||
main_chain_label = _normalize_chain_label(resolved_provider)
|
||||
if main_chain_label and _is_provider_unhealthy(main_chain_label):
|
||||
_log_skip_unhealthy(main_chain_label)
|
||||
else:
|
||||
client, resolved = resolve_provider_client(
|
||||
resolved_provider,
|
||||
main_model,
|
||||
explicit_base_url=explicit_base_url,
|
||||
explicit_api_key=explicit_api_key,
|
||||
api_mode=runtime_api_mode or None,
|
||||
)
|
||||
if client is not None:
|
||||
logger.info("Auxiliary auto-detect: using main provider %s (%s)",
|
||||
main_provider, resolved or main_model)
|
||||
return client, resolved or main_model
|
||||
|
||||
# ── Step 2: aggregator / fallback chain ──────────────────────────────
|
||||
tried = []
|
||||
for label, try_fn in _get_provider_chain():
|
||||
if _is_provider_unhealthy(label):
|
||||
_log_skip_unhealthy(label)
|
||||
tried.append(f"{label} (unhealthy)")
|
||||
continue
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
if tried:
|
||||
|
|
@ -4224,6 +4348,13 @@ def call_llm(
|
|||
if should_fallback and is_auto:
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
# Resolve the actual provider label (resolved_provider may be
|
||||
# "auto"; the client's base_url tells us which backend got the
|
||||
# 402). Mark THAT label unhealthy so subsequent aux calls
|
||||
# skip it instead of paying another doomed RTT.
|
||||
_mark_provider_unhealthy(
|
||||
_recoverable_pool_provider(resolved_provider, client) or resolved_provider
|
||||
)
|
||||
elif _is_rate_limit_error(first_err):
|
||||
reason = "rate limit"
|
||||
else:
|
||||
|
|
@ -4546,6 +4677,9 @@ async def async_call_llm(
|
|||
if should_fallback and is_auto:
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
_mark_provider_unhealthy(
|
||||
_recoverable_pool_provider(resolved_provider, client) or resolved_provider
|
||||
)
|
||||
elif _is_rate_limit_error(first_err):
|
||||
reason = "rate limit"
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -2496,3 +2496,165 @@ class TestAnthropicExplicitApiKey:
|
|||
assert mock_build.call_args.args[0] == "explicit-fallback-key", (
|
||||
"resolve_provider_client must forward explicit_api_key to _try_anthropic()"
|
||||
)
|
||||
|
||||
|
||||
# ── Auxiliary unhealthy-provider TTL cache (issue #23570) ────────────────
|
||||
|
||||
|
||||
class TestAuxUnhealthyCache:
|
||||
"""Recently-402'd providers are skipped on subsequent aux calls.
|
||||
|
||||
Without this, every compression / title-gen / session-search call on a
|
||||
long session retries a depleted OpenRouter (~1 RTT to 402) before
|
||||
falling back to the next provider. The TTL cache hides the unhealthy
|
||||
provider for ``_AUX_UNHEALTHY_TTL_SECONDS`` so the chain skips it.
|
||||
"""
|
||||
|
||||
def setup_method(self):
|
||||
from agent.auxiliary_client import _reset_aux_unhealthy_cache
|
||||
_reset_aux_unhealthy_cache()
|
||||
|
||||
def teardown_method(self):
|
||||
from agent.auxiliary_client import _reset_aux_unhealthy_cache
|
||||
_reset_aux_unhealthy_cache()
|
||||
|
||||
def test_mark_then_skip(self):
|
||||
from agent.auxiliary_client import (
|
||||
_mark_provider_unhealthy,
|
||||
_is_provider_unhealthy,
|
||||
)
|
||||
assert _is_provider_unhealthy("openrouter") is False
|
||||
_mark_provider_unhealthy("openrouter")
|
||||
assert _is_provider_unhealthy("openrouter") is True
|
||||
|
||||
def test_ttl_expiry_evicts(self):
|
||||
from agent.auxiliary_client import (
|
||||
_mark_provider_unhealthy,
|
||||
_is_provider_unhealthy,
|
||||
_aux_unhealthy_until,
|
||||
)
|
||||
_mark_provider_unhealthy("openrouter", ttl=0.01)
|
||||
assert _is_provider_unhealthy("openrouter") is True
|
||||
import time
|
||||
time.sleep(0.02)
|
||||
# Lazy eviction: first lookup after expiry returns False AND removes the entry.
|
||||
assert _is_provider_unhealthy("openrouter") is False
|
||||
assert "openrouter" not in _aux_unhealthy_until
|
||||
|
||||
def test_alias_normalization(self):
|
||||
"""'codex' should normalize to 'openai-codex' so the cache lookup
|
||||
matches the chain label."""
|
||||
from agent.auxiliary_client import (
|
||||
_mark_provider_unhealthy,
|
||||
_is_provider_unhealthy,
|
||||
)
|
||||
_mark_provider_unhealthy("codex")
|
||||
assert _is_provider_unhealthy("openai-codex") is True
|
||||
|
||||
def test_resolve_auto_skips_unhealthy_step2(self):
|
||||
"""_resolve_auto Step-2 chain skips unhealthy providers."""
|
||||
from agent.auxiliary_client import (
|
||||
_resolve_auto,
|
||||
_mark_provider_unhealthy,
|
||||
)
|
||||
nous_client = MagicMock()
|
||||
# Mark OpenRouter unhealthy → chain should skip it and pick nous.
|
||||
_mark_provider_unhealthy("openrouter")
|
||||
with patch("agent.auxiliary_client._read_main_provider", return_value=""), \
|
||||
patch("agent.auxiliary_client._read_main_model", return_value=""), \
|
||||
patch("agent.auxiliary_client._try_openrouter") as or_try, \
|
||||
patch("agent.auxiliary_client._try_nous", return_value=(nous_client, "nous-model")), \
|
||||
patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \
|
||||
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
||||
client, model = _resolve_auto()
|
||||
assert client is nous_client
|
||||
assert model == "nous-model"
|
||||
# The skipped provider's _try_* should NOT have been called at all.
|
||||
or_try.assert_not_called()
|
||||
|
||||
def test_resolve_auto_skips_unhealthy_main_in_step1(self):
|
||||
"""Step-1 also consults the unhealthy cache so a depleted main
|
||||
provider doesn't burn a 402 RTT every aux call. Falls through to
|
||||
Step-2 chain (which also respects the cache)."""
|
||||
from agent.auxiliary_client import (
|
||||
_resolve_auto,
|
||||
_mark_provider_unhealthy,
|
||||
)
|
||||
nous_client = MagicMock()
|
||||
_mark_provider_unhealthy("openrouter")
|
||||
with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="anthropic/claude-sonnet-4.6"), \
|
||||
patch("agent.auxiliary_client.resolve_provider_client") as step1, \
|
||||
patch("agent.auxiliary_client._try_openrouter") as or_try, \
|
||||
patch("agent.auxiliary_client._try_nous", return_value=(nous_client, "n-model")), \
|
||||
patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \
|
||||
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
||||
client, model = _resolve_auto()
|
||||
# Step-1 was bypassed — resolve_provider_client never invoked
|
||||
step1.assert_not_called()
|
||||
# Step-2 also skipped openrouter and landed on nous
|
||||
or_try.assert_not_called()
|
||||
assert client is nous_client
|
||||
|
||||
def test_payment_fallback_skips_unhealthy(self):
|
||||
"""_try_payment_fallback also consults the unhealthy cache so a 402
|
||||
on OpenRouter doesn't cause a second OR call within the same chain
|
||||
iteration if it gets re-entered."""
|
||||
from agent.auxiliary_client import (
|
||||
_try_payment_fallback,
|
||||
_mark_provider_unhealthy,
|
||||
)
|
||||
nous_client = MagicMock()
|
||||
# Mark BOTH the failed provider (openrouter) and a sibling (custom)
|
||||
# unhealthy. The chain should still find nous.
|
||||
_mark_provider_unhealthy("local/custom")
|
||||
with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
|
||||
patch("agent.auxiliary_client._try_openrouter") as or_try, \
|
||||
patch("agent.auxiliary_client._try_nous", return_value=(nous_client, "n-model")), \
|
||||
patch("agent.auxiliary_client._try_custom_endpoint") as custom_try, \
|
||||
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
||||
client, model, label = _try_payment_fallback("openrouter", task="compression")
|
||||
assert client is nous_client
|
||||
assert label == "nous"
|
||||
# OR is skipped via skip_chain_labels (failed provider), custom via unhealthy cache.
|
||||
or_try.assert_not_called()
|
||||
custom_try.assert_not_called()
|
||||
|
||||
def test_call_llm_marks_provider_unhealthy_on_402(self, monkeypatch):
|
||||
"""A 402 from call_llm causes the provider to be marked unhealthy
|
||||
so the next call skips it instead of re-trying the same depleted
|
||||
endpoint."""
|
||||
from agent.auxiliary_client import (
|
||||
call_llm,
|
||||
_is_provider_unhealthy,
|
||||
)
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
primary_client = MagicMock()
|
||||
# base_url tells _recoverable_pool_provider() that this is OpenRouter
|
||||
# (resolved_provider="auto" doesn't carry that information by itself).
|
||||
primary_client.base_url = "https://openrouter.ai/api/v1/"
|
||||
err = Exception("Payment Required: insufficient credits")
|
||||
err.status_code = 402
|
||||
primary_client.chat.completions.create.side_effect = err
|
||||
|
||||
nous_client = MagicMock()
|
||||
nous_resp = MagicMock()
|
||||
nous_resp.choices = [MagicMock(message=MagicMock(content="ok"))]
|
||||
nous_client.chat.completions.create.return_value = nous_resp
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "google/gemini-3-flash-preview")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=(nous_client, "n-model", "nous")), \
|
||||
patch("agent.auxiliary_client._build_call_kwargs",
|
||||
return_value={"model": "n-model", "messages": [{"role": "user", "content": "hi"}]}):
|
||||
assert _is_provider_unhealthy("openrouter") is False
|
||||
call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
# After the 402, OpenRouter is in the unhealthy cache.
|
||||
assert _is_provider_unhealthy("openrouter") is True
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue