mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
When Nous returns a 429, the retry amplification chain burns up to 9 API requests per conversation turn (3 SDK retries × 3 Hermes retries), each counting against RPH and deepening the rate limit. With multiple concurrent sessions (cron + gateway + auxiliary), this creates a spiral where retries keep the limit tapped indefinitely. New module: agent/nous_rate_guard.py - Shared file-based rate limit state (~/.hermes/rate_limits/nous.json) - Parses reset time from x-ratelimit-reset-requests-1h, x-ratelimit- reset-requests, retry-after headers, or error context - Falls back to 5-minute default cooldown if no header data - Atomic writes (tempfile + rename) for cross-process safety - Auto-cleanup of expired state files run_agent.py changes: - Top-of-retry-loop guard: when another session already recorded Nous as rate-limited, skip the API call entirely. Try fallback provider first, then return a clear message with the reset time. - On 429 from Nous: record rate limit state and skip further retries (sets retry_count = max_retries to trigger fallback path) - On success from Nous: clear the rate limit state so other sessions know they can resume auxiliary_client.py changes: - _try_nous() checks rate guard before attempting Nous in the auxiliary fallback chain. When rate-limited, returns (None, None) so the chain skips to the next provider instead of piling more requests onto Nous. This eliminates three sources of amplification: 1. Hermes-level retries (saves 6 of 9 calls per turn) 2. Cross-session retries (cron + gateway all skip Nous) 3. Auxiliary fallback to Nous (compression/session_search skip too) Includes 24 tests covering the rate guard module, header parsing, state lifecycle, and auxiliary client integration.
253 lines
8.7 KiB
Python
253 lines
8.7 KiB
Python
"""Tests for agent/nous_rate_guard.py — cross-session Nous Portal rate limit guard."""
|
|
|
|
import json
|
|
import os
|
|
import time
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def rate_guard_env(tmp_path, monkeypatch):
|
|
"""Isolate rate guard state to a temp directory."""
|
|
hermes_home = str(tmp_path / ".hermes")
|
|
os.makedirs(hermes_home, exist_ok=True)
|
|
monkeypatch.setenv("HERMES_HOME", hermes_home)
|
|
# Clear any cached module-level imports
|
|
return hermes_home
|
|
|
|
|
|
class TestRecordNousRateLimit:
|
|
"""Test recording rate limit state."""
|
|
|
|
def test_records_with_header_reset(self, rate_guard_env):
|
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
|
|
|
headers = {"x-ratelimit-reset-requests-1h": "1800"}
|
|
record_nous_rate_limit(headers=headers)
|
|
|
|
path = _state_path()
|
|
assert os.path.exists(path)
|
|
with open(path) as f:
|
|
state = json.load(f)
|
|
assert state["reset_seconds"] == pytest.approx(1800, abs=2)
|
|
assert state["reset_at"] > time.time()
|
|
|
|
def test_records_with_per_minute_header(self, rate_guard_env):
|
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
|
|
|
headers = {"x-ratelimit-reset-requests": "45"}
|
|
record_nous_rate_limit(headers=headers)
|
|
|
|
with open(_state_path()) as f:
|
|
state = json.load(f)
|
|
assert state["reset_seconds"] == pytest.approx(45, abs=2)
|
|
|
|
def test_records_with_retry_after_header(self, rate_guard_env):
|
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
|
|
|
headers = {"retry-after": "60"}
|
|
record_nous_rate_limit(headers=headers)
|
|
|
|
with open(_state_path()) as f:
|
|
state = json.load(f)
|
|
assert state["reset_seconds"] == pytest.approx(60, abs=2)
|
|
|
|
def test_prefers_hourly_over_per_minute(self, rate_guard_env):
|
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
|
|
|
headers = {
|
|
"x-ratelimit-reset-requests-1h": "1800",
|
|
"x-ratelimit-reset-requests": "45",
|
|
}
|
|
record_nous_rate_limit(headers=headers)
|
|
|
|
with open(_state_path()) as f:
|
|
state = json.load(f)
|
|
# Should use the hourly value, not the per-minute one
|
|
assert state["reset_seconds"] == pytest.approx(1800, abs=2)
|
|
|
|
def test_falls_back_to_error_context_reset_at(self, rate_guard_env):
|
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
|
|
|
future_reset = time.time() + 900
|
|
record_nous_rate_limit(
|
|
headers=None,
|
|
error_context={"reset_at": future_reset},
|
|
)
|
|
|
|
with open(_state_path()) as f:
|
|
state = json.load(f)
|
|
assert state["reset_at"] == pytest.approx(future_reset, abs=1)
|
|
|
|
def test_falls_back_to_default_cooldown(self, rate_guard_env):
|
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
|
|
|
record_nous_rate_limit(headers=None)
|
|
|
|
with open(_state_path()) as f:
|
|
state = json.load(f)
|
|
# Default is 300 seconds (5 minutes)
|
|
assert state["reset_seconds"] == pytest.approx(300, abs=2)
|
|
|
|
def test_custom_default_cooldown(self, rate_guard_env):
|
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
|
|
|
record_nous_rate_limit(headers=None, default_cooldown=120.0)
|
|
|
|
with open(_state_path()) as f:
|
|
state = json.load(f)
|
|
assert state["reset_seconds"] == pytest.approx(120, abs=2)
|
|
|
|
def test_creates_directory_if_missing(self, rate_guard_env):
|
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
|
|
|
record_nous_rate_limit(headers={"retry-after": "10"})
|
|
assert os.path.exists(_state_path())
|
|
|
|
|
|
class TestNousRateLimitRemaining:
|
|
"""Test checking remaining rate limit time."""
|
|
|
|
def test_returns_none_when_no_file(self, rate_guard_env):
|
|
from agent.nous_rate_guard import nous_rate_limit_remaining
|
|
|
|
assert nous_rate_limit_remaining() is None
|
|
|
|
def test_returns_remaining_seconds_when_active(self, rate_guard_env):
|
|
from agent.nous_rate_guard import record_nous_rate_limit, nous_rate_limit_remaining
|
|
|
|
record_nous_rate_limit(headers={"x-ratelimit-reset-requests-1h": "600"})
|
|
remaining = nous_rate_limit_remaining()
|
|
assert remaining is not None
|
|
assert 595 < remaining <= 605 # ~600 seconds, allowing for test execution time
|
|
|
|
def test_returns_none_when_expired(self, rate_guard_env):
|
|
from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
|
|
|
|
# Write an already-expired state
|
|
state_dir = os.path.dirname(_state_path())
|
|
os.makedirs(state_dir, exist_ok=True)
|
|
with open(_state_path(), "w") as f:
|
|
json.dump({"reset_at": time.time() - 10, "recorded_at": time.time() - 100}, f)
|
|
|
|
assert nous_rate_limit_remaining() is None
|
|
# File should be cleaned up
|
|
assert not os.path.exists(_state_path())
|
|
|
|
def test_handles_corrupt_file(self, rate_guard_env):
|
|
from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
|
|
|
|
state_dir = os.path.dirname(_state_path())
|
|
os.makedirs(state_dir, exist_ok=True)
|
|
with open(_state_path(), "w") as f:
|
|
f.write("not valid json{{{")
|
|
|
|
assert nous_rate_limit_remaining() is None
|
|
|
|
|
|
class TestClearNousRateLimit:
|
|
"""Test clearing rate limit state."""
|
|
|
|
def test_clears_existing_file(self, rate_guard_env):
|
|
from agent.nous_rate_guard import (
|
|
record_nous_rate_limit,
|
|
clear_nous_rate_limit,
|
|
nous_rate_limit_remaining,
|
|
_state_path,
|
|
)
|
|
|
|
record_nous_rate_limit(headers={"retry-after": "600"})
|
|
assert nous_rate_limit_remaining() is not None
|
|
|
|
clear_nous_rate_limit()
|
|
assert nous_rate_limit_remaining() is None
|
|
assert not os.path.exists(_state_path())
|
|
|
|
def test_clear_when_no_file(self, rate_guard_env):
|
|
from agent.nous_rate_guard import clear_nous_rate_limit
|
|
|
|
# Should not raise
|
|
clear_nous_rate_limit()
|
|
|
|
|
|
class TestFormatRemaining:
|
|
"""Test human-readable duration formatting."""
|
|
|
|
def test_seconds(self):
|
|
from agent.nous_rate_guard import format_remaining
|
|
|
|
assert format_remaining(30) == "30s"
|
|
|
|
def test_minutes(self):
|
|
from agent.nous_rate_guard import format_remaining
|
|
|
|
assert format_remaining(125) == "2m 5s"
|
|
|
|
def test_exact_minutes(self):
|
|
from agent.nous_rate_guard import format_remaining
|
|
|
|
assert format_remaining(120) == "2m"
|
|
|
|
def test_hours(self):
|
|
from agent.nous_rate_guard import format_remaining
|
|
|
|
assert format_remaining(3720) == "1h 2m"
|
|
|
|
|
|
class TestParseResetSeconds:
|
|
"""Test header parsing for reset times."""
|
|
|
|
def test_case_insensitive_headers(self, rate_guard_env):
|
|
from agent.nous_rate_guard import _parse_reset_seconds
|
|
|
|
headers = {"X-Ratelimit-Reset-Requests-1h": "1200"}
|
|
assert _parse_reset_seconds(headers) == 1200.0
|
|
|
|
def test_returns_none_for_empty_headers(self):
|
|
from agent.nous_rate_guard import _parse_reset_seconds
|
|
|
|
assert _parse_reset_seconds(None) is None
|
|
assert _parse_reset_seconds({}) is None
|
|
|
|
def test_ignores_zero_values(self):
|
|
from agent.nous_rate_guard import _parse_reset_seconds
|
|
|
|
headers = {"x-ratelimit-reset-requests-1h": "0"}
|
|
assert _parse_reset_seconds(headers) is None
|
|
|
|
def test_ignores_invalid_values(self):
|
|
from agent.nous_rate_guard import _parse_reset_seconds
|
|
|
|
headers = {"x-ratelimit-reset-requests-1h": "not-a-number"}
|
|
assert _parse_reset_seconds(headers) is None
|
|
|
|
|
|
class TestAuxiliaryClientIntegration:
|
|
"""Test that the auxiliary client respects the rate guard."""
|
|
|
|
def test_try_nous_skips_when_rate_limited(self, rate_guard_env, monkeypatch):
|
|
from agent.nous_rate_guard import record_nous_rate_limit
|
|
|
|
# Record a rate limit
|
|
record_nous_rate_limit(headers={"retry-after": "600"})
|
|
|
|
# Mock _read_nous_auth to return valid creds (would normally succeed)
|
|
import agent.auxiliary_client as aux
|
|
monkeypatch.setattr(aux, "_read_nous_auth", lambda: {
|
|
"access_token": "test-token",
|
|
"inference_base_url": "https://api.nous.test/v1",
|
|
})
|
|
|
|
result = aux._try_nous()
|
|
assert result == (None, None)
|
|
|
|
def test_try_nous_works_when_not_rate_limited(self, rate_guard_env, monkeypatch):
|
|
import agent.auxiliary_client as aux
|
|
|
|
# No rate limit recorded — _try_nous should proceed normally
|
|
# (will return None because no real creds, but won't be blocked
|
|
# by the rate guard)
|
|
monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
|
|
result = aux._try_nous()
|
|
assert result == (None, None)
|