mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(agent): only set rate-limit cooldown when leaving primary; add tests
This commit is contained in:
parent
a9fd8d7c88
commit
f7f7588893
4 changed files with 135 additions and 2 deletions
|
|
@ -343,6 +343,11 @@ def classify_api_error(
|
|||
"""
|
||||
status_code = _extract_status_code(error)
|
||||
error_type = type(error).__name__
|
||||
# Copilot/GitHub Models RateLimitError may not set .status_code; force 429
|
||||
# so downstream rate-limit handling (classifier reason, pool rotation,
|
||||
# fallback gating) fires correctly instead of misclassifying as generic.
|
||||
if status_code is None and error_type == "RateLimitError":
|
||||
status_code = 429
|
||||
body = _extract_error_body(error)
|
||||
error_code = _extract_error_code(body)
|
||||
|
||||
|
|
|
|||
16
run_agent.py
16
run_agent.py
|
|
@ -6494,7 +6494,7 @@ class AIAgent:
|
|||
|
||||
# ── Provider fallback ──────────────────────────────────────────────────
|
||||
|
||||
def _try_activate_fallback(self) -> bool:
|
||||
def _try_activate_fallback(self, reason: "FailoverReason | None" = None) -> bool:
|
||||
"""Switch to the next fallback model/provider in the chain.
|
||||
|
||||
Called when the current model is failing after retries. Swaps the
|
||||
|
|
@ -6506,6 +6506,15 @@ class AIAgent:
|
|||
auth resolution and client construction — no duplicated provider→key
|
||||
mappings.
|
||||
"""
|
||||
if reason in (FailoverReason.rate_limit, FailoverReason.billing):
|
||||
# Only start cooldown when leaving the primary provider. If we're
|
||||
# already on a fallback and chain-switching, the primary wasn't the
|
||||
# source of the 429 so the cooldown should not be reset/extended.
|
||||
fallback_already_active = bool(getattr(self, "_fallback_activated", False))
|
||||
current_provider = (getattr(self, "provider", "") or "").strip().lower()
|
||||
primary_provider = ((self._primary_runtime or {}).get("provider") or "").strip().lower()
|
||||
if (not fallback_already_active) or (primary_provider and current_provider == primary_provider):
|
||||
self._rate_limited_until = time.monotonic() + 60
|
||||
if self._fallback_index >= len(self._fallback_chain):
|
||||
return False
|
||||
|
||||
|
|
@ -6689,6 +6698,9 @@ class AIAgent:
|
|||
if not self._fallback_activated:
|
||||
return False
|
||||
|
||||
if getattr(self, "_rate_limited_until", 0) > time.monotonic():
|
||||
return False # primary still in rate-limit cooldown, stay on fallback
|
||||
|
||||
rt = self._primary_runtime
|
||||
try:
|
||||
# ── Core runtime state ──
|
||||
|
|
@ -10665,7 +10677,7 @@ class AIAgent:
|
|||
)
|
||||
if not pool_may_recover:
|
||||
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
||||
if self._try_activate_fallback():
|
||||
if self._try_activate_fallback(reason=classified.reason):
|
||||
retry_count = 0
|
||||
compression_attempts = 0
|
||||
primary_recovery_attempted = False
|
||||
|
|
|
|||
|
|
@ -1094,3 +1094,37 @@ class TestSSLTransientPatterns:
|
|||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.timeout
|
||||
assert result.retryable is True
|
||||
|
||||
# ── Test: RateLimitError without status_code (Copilot/GitHub Models) ──────────
|
||||
|
||||
class TestRateLimitErrorWithoutStatusCode:
|
||||
"""Regression tests for the Copilot/GitHub Models edge case where the
|
||||
OpenAI SDK raises RateLimitError but does not populate .status_code."""
|
||||
|
||||
def _make_rate_limit_error(self, status_code=None):
|
||||
"""Create an exception whose class name is 'RateLimitError' with
|
||||
an optionally missing status_code, mirroring the OpenAI SDK shape."""
|
||||
cls = type("RateLimitError", (Exception,), {})
|
||||
e = cls("You have exceeded your rate limit.")
|
||||
e.status_code = status_code # None simulates the Copilot case
|
||||
return e
|
||||
|
||||
def test_rate_limit_error_without_status_code_classified_as_rate_limit(self):
|
||||
"""RateLimitError with status_code=None must classify as rate_limit."""
|
||||
e = self._make_rate_limit_error(status_code=None)
|
||||
result = classify_api_error(e, provider="copilot", model="gpt-4o")
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
def test_rate_limit_error_with_status_code_429_classified_as_rate_limit(self):
|
||||
"""RateLimitError that does set status_code=429 still classifies correctly."""
|
||||
e = self._make_rate_limit_error(status_code=429)
|
||||
result = classify_api_error(e, provider="copilot", model="gpt-4o")
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
def test_other_error_without_status_code_not_forced_to_rate_limit(self):
|
||||
"""A non-RateLimitError with missing status_code must NOT be forced to 429."""
|
||||
cls = type("APIError", (Exception,), {})
|
||||
e = cls("something went wrong")
|
||||
e.status_code = None
|
||||
result = classify_api_error(e, provider="copilot", model="gpt-4o")
|
||||
assert result.reason != FailoverReason.rate_limit
|
||||
|
|
|
|||
|
|
@ -446,3 +446,85 @@ class TestRestoreInRunConversation:
|
|||
assert agent._fallback_index == 0
|
||||
assert agent.provider == "custom"
|
||||
assert agent.base_url == "https://my-llm.example.com/v1"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Rate-limit cooldown gate
|
||||
# =============================================================================
|
||||
|
||||
class TestRateLimitCooldown:
|
||||
"""Verify _restore_primary_runtime() respects the 60s rate-limit cooldown."""
|
||||
|
||||
def test_restore_blocked_during_cooldown(self):
|
||||
"""While _rate_limited_until is in the future, restore returns False."""
|
||||
agent = _make_agent(
|
||||
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||
)
|
||||
mock_client = _mock_resolve()
|
||||
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
|
||||
agent._try_activate_fallback()
|
||||
|
||||
assert agent._fallback_activated is True
|
||||
|
||||
# Manually set cooldown well into the future
|
||||
agent._rate_limited_until = time.monotonic() + 60
|
||||
|
||||
result = agent._restore_primary_runtime()
|
||||
assert result is False
|
||||
assert agent._fallback_activated is True # still on fallback
|
||||
|
||||
def test_restore_allowed_after_cooldown_expires(self):
|
||||
"""Once the cooldown window passes, restore proceeds normally."""
|
||||
agent = _make_agent(
|
||||
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||
)
|
||||
mock_client = _mock_resolve()
|
||||
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
|
||||
agent._try_activate_fallback()
|
||||
|
||||
assert agent._fallback_activated is True
|
||||
|
||||
# Cooldown already expired
|
||||
agent._rate_limited_until = time.monotonic() - 1
|
||||
|
||||
with patch("run_agent.OpenAI", return_value=MagicMock()):
|
||||
result = agent._restore_primary_runtime()
|
||||
|
||||
assert result is True
|
||||
assert agent._fallback_activated is False
|
||||
|
||||
def test_cooldown_set_on_rate_limit_reason(self):
|
||||
"""_try_activate_fallback with rate_limit reason sets _rate_limited_until."""
|
||||
from run_agent import FailoverReason
|
||||
agent = _make_agent(
|
||||
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||
)
|
||||
before = time.monotonic()
|
||||
mock_client = _mock_resolve()
|
||||
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
|
||||
agent._try_activate_fallback(reason=FailoverReason.rate_limit)
|
||||
|
||||
assert hasattr(agent, "_rate_limited_until")
|
||||
assert agent._rate_limited_until > before + 50 # ~60s from now
|
||||
|
||||
def test_cooldown_not_set_when_already_on_fallback(self):
|
||||
"""Chain-switching while already on fallback must not reset cooldown."""
|
||||
from run_agent import FailoverReason
|
||||
agent = _make_agent(
|
||||
fallback_model=[
|
||||
{"provider": "openrouter", "model": "model-a"},
|
||||
{"provider": "anthropic", "model": "model-b"},
|
||||
],
|
||||
)
|
||||
mock_client = _mock_resolve()
|
||||
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
|
||||
# First call: leaving primary → cooldown should be set
|
||||
agent._try_activate_fallback(reason=FailoverReason.rate_limit)
|
||||
first_cooldown = getattr(agent, "_rate_limited_until", 0)
|
||||
|
||||
# Second call: already on fallback (provider != primary) → cooldown must not advance
|
||||
agent._try_activate_fallback(reason=FailoverReason.rate_limit)
|
||||
second_cooldown = getattr(agent, "_rate_limited_until", 0)
|
||||
|
||||
# second call should not have extended the cooldown
|
||||
assert second_cooldown == first_cooldown
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue