fix: Nous Portal rate limit guard — prevent retry amplification (#10568)

When Nous returns a 429, the retry amplification chain burns up to 9 API requests per conversation turn (3 SDK retries × 3 Hermes retries), each counting against RPH and deepening the rate limit. With multiple concurrent sessions (cron + gateway + auxiliary), this creates a spiral where retries keep the limit tapped indefinitely. New module: agent/nous_rate_guard.py - Shared file-based rate limit state (~/.hermes/rate_limits/nous.json) - Parses reset time from x-ratelimit-reset-requests-1h, x-ratelimit- reset-requests, retry-after headers, or error context - Falls back to 5-minute default cooldown if no header data - Atomic writes (tempfile + rename) for cross-process safety - Auto-cleanup of expired state files run_agent.py changes: - Top-of-retry-loop guard: when another session already recorded Nous as rate-limited, skip the API call entirely. Try fallback provider first, then return a clear message with the reset time. - On 429 from Nous: record rate limit state and skip further retries (sets retry_count = max_retries to trigger fallback path) - On success from Nous: clear the rate limit state so other sessions know they can resume auxiliary_client.py changes: - _try_nous() checks rate guard before attempting Nous in the auxiliary fallback chain. When rate-limited, returns (None, None) so the chain skips to the next provider instead of piling more requests onto Nous. This eliminates three sources of amplification: 1. Hermes-level retries (saves 6 of 9 calls per turn) 2. Cross-session retries (cron + gateway all skip Nous) 3. Auxiliary fallback to Nous (compression/session_search skip too) Includes 24 tests covering the rate guard module, header parsing, state lifecycle, and auxiliary client integration.
2026-04-25 00:51:20 +00:00 · 2026-04-15 16:31:48 -07:00 · 2026-04-15 16:31:48 -07:00 · 9d9b424390
commit 9d9b424390
parent 0d05bd34f8
4 changed files with 538 additions and 0 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -775,6 +775,21 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
 def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
    # Check cross-session rate limit guard before attempting Nous —
    # if another session already recorded a 429, skip Nous entirely
    # to avoid piling more requests onto the tapped RPH bucket.
    try:
        from agent.nous_rate_guard import nous_rate_limit_remaining
        _remaining = nous_rate_limit_remaining()
        if _remaining is not None and _remaining > 0:
            logger.debug(
                "Auxiliary: skipping Nous Portal (rate-limited, resets in %.0fs)",
                _remaining,
            )
            return None, None
    except Exception:
        pass
    nous = _read_nous_auth()
    if not nous:
        return None, None
--- a/agent/nous_rate_guard.py
+++ b/agent/nous_rate_guard.py
@ -0,0 +1,182 @@
 """Cross-session rate limit guard for Nous Portal.
 Writes rate limit state to a shared file so all sessions (CLI, gateway,
 cron, auxiliary) can check whether Nous Portal is currently rate-limited
 before making requests.  Prevents retry amplification when RPH is tapped.
 Each 429 from Nous triggers up to 9 API calls per conversation turn
 (3 SDK retries x 3 Hermes retries), and every one of those calls counts
 against RPH.  By recording the rate limit state on first 429 and checking
 it before subsequent attempts, we eliminate the amplification effect.
 """
 from __future__ import annotations
 import json
 import logging
 import os
 import tempfile
 import time
 from typing import Any, Mapping, Optional
 logger = logging.getLogger(__name__)
 _STATE_SUBDIR = "rate_limits"
 _STATE_FILENAME = "nous.json"
 def _state_path() -> str:
    """Return the path to the Nous rate limit state file."""
    try:
        from hermes_constants import get_hermes_home
        base = get_hermes_home()
    except ImportError:
        base = os.path.join(os.path.expanduser("~"), ".hermes")
    return os.path.join(base, _STATE_SUBDIR, _STATE_FILENAME)
 def _parse_reset_seconds(headers: Optional[Mapping[str, str]]) -> Optional[float]:
    """Extract the best available reset-time estimate from response headers.
    Priority:
      1. x-ratelimit-reset-requests-1h  (hourly RPH window — most useful)
      2. x-ratelimit-reset-requests     (per-minute RPM window)
      3. retry-after                     (generic HTTP header)
    Returns seconds-from-now, or None if no usable header found.
    """
    if not headers:
        return None
    lowered = {k.lower(): v for k, v in headers.items()}
    for key in (
        "x-ratelimit-reset-requests-1h",
        "x-ratelimit-reset-requests",
        "retry-after",
    ):
        raw = lowered.get(key)
        if raw is not None:
            try:
                val = float(raw)
                if val > 0:
                    return val
            except (TypeError, ValueError):
                pass
    return None
 def record_nous_rate_limit(
    *,
    headers: Optional[Mapping[str, str]] = None,
    error_context: Optional[dict[str, Any]] = None,
    default_cooldown: float = 300.0,
 ) -> None:
    """Record that Nous Portal is rate-limited.
    Parses the reset time from response headers or error context.
    Falls back to ``default_cooldown`` (5 minutes) if no reset info
    is available.  Writes to a shared file that all sessions can read.
    Args:
        headers: HTTP response headers from the 429 error.
        error_context: Structured error context from _extract_api_error_context().
        default_cooldown: Fallback cooldown in seconds when no header data.
    """
    now = time.time()
    reset_at = None
    # Try headers first (most accurate)
    header_seconds = _parse_reset_seconds(headers)
    if header_seconds is not None:
        reset_at = now + header_seconds
    # Try error_context reset_at (from body parsing)
    if reset_at is None and isinstance(error_context, dict):
        ctx_reset = error_context.get("reset_at")
        if isinstance(ctx_reset, (int, float)) and ctx_reset > now:
            reset_at = float(ctx_reset)
    # Default cooldown
    if reset_at is None:
        reset_at = now + default_cooldown
    path = _state_path()
    try:
        state_dir = os.path.dirname(path)
        os.makedirs(state_dir, exist_ok=True)
        state = {
            "reset_at": reset_at,
            "recorded_at": now,
            "reset_seconds": reset_at - now,
        }
        # Atomic write: write to temp file + rename
        fd, tmp_path = tempfile.mkstemp(dir=state_dir, suffix=".tmp")
        try:
            with os.fdopen(fd, "w") as f:
                json.dump(state, f)
            os.replace(tmp_path, path)
        except Exception:
            # Clean up temp file on failure
            try:
                os.unlink(tmp_path)
            except OSError:
                pass
            raise
        logger.info(
            "Nous rate limit recorded: resets in %.0fs (at %.0f)",
            reset_at - now, reset_at,
        )
    except Exception as exc:
        logger.debug("Failed to write Nous rate limit state: %s", exc)
 def nous_rate_limit_remaining() -> Optional[float]:
    """Check if Nous Portal is currently rate-limited.
    Returns:
        Seconds remaining until reset, or None if not rate-limited.
    """
    path = _state_path()
    try:
        with open(path) as f:
            state = json.load(f)
        reset_at = state.get("reset_at", 0)
        remaining = reset_at - time.time()
        if remaining > 0:
            return remaining
        # Expired — clean up
        try:
            os.unlink(path)
        except OSError:
            pass
        return None
    except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError):
        return None
 def clear_nous_rate_limit() -> None:
    """Clear the rate limit state (e.g., after a successful Nous request)."""
    try:
        os.unlink(_state_path())
    except FileNotFoundError:
        pass
    except OSError as exc:
        logger.debug("Failed to clear Nous rate limit state: %s", exc)
 def format_remaining(seconds: float) -> str:
    """Format seconds remaining into human-readable duration."""
    s = max(0, int(seconds))
    if s < 60:
        return f"{s}s"
    if s < 3600:
        m, sec = divmod(s, 60)
        return f"{m}m {sec}s" if sec else f"{m}m"
    h, remainder = divmod(s, 3600)
    m = remainder // 60
    return f"{h}h {m}m" if m else f"{h}h"
--- a/run_agent.py
+++ b/run_agent.py
@ -8660,6 +8660,53 @@ class AIAgent:
            api_kwargs = None  # Guard against UnboundLocalError in except handler
            while retry_count < max_retries:
                # ── Nous Portal rate limit guard ──────────────────────
                # If another session already recorded that Nous is rate-
                # limited, skip the API call entirely.  Each attempt
                # (including SDK-level retries) counts against RPH and
                # deepens the rate limit hole.
                if self.provider == "nous":
                    try:
                        from agent.nous_rate_guard import (
                            nous_rate_limit_remaining,
                            format_remaining as _fmt_nous_remaining,
                        )
                        _nous_remaining = nous_rate_limit_remaining()
                        if _nous_remaining is not None and _nous_remaining > 0:
                            _nous_msg = (
                                f"Nous Portal rate limit active — "
                                f"resets in {_fmt_nous_remaining(_nous_remaining)}."
                            )
                            self._vprint(
                                f"{self.log_prefix}⏳ {_nous_msg} Trying fallback...",
                                force=True,
                            )
                            self._emit_status(f"⏳ {_nous_msg}")
                            if self._try_activate_fallback():
                                retry_count = 0
                                compression_attempts = 0
                                primary_recovery_attempted = False
                                continue
                            # No fallback available — return with clear message
                            self._persist_session(messages, conversation_history)
                            return {
                                "final_response": (
                                    f"⏳ {_nous_msg}\n\n"
                                    "No fallback provider available. "
                                    "Try again after the reset, or add a "
                                    "fallback provider in config.yaml."
                                ),
                                "messages": messages,
                                "api_calls": api_call_count,
                                "completed": False,
                                "failed": True,
                                "error": _nous_msg,
                            }
                    except ImportError:
                        pass
                    except Exception:
                        pass  # Never let rate guard break the agent loop
                try:
                    self._reset_stream_delivery_tracking()
                    api_kwargs = self._build_api_kwargs(api_messages)
@ -9248,6 +9295,15 @@ class AIAgent:
                                self._vprint(f"{self.log_prefix}   💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
                    has_retried_429 = False  # Reset on success
                    # Clear Nous rate limit state on successful request —
                    # proves the limit has reset and other sessions can
                    # resume hitting Nous.
                    if self.provider == "nous":
                        try:
                            from agent.nous_rate_guard import clear_nous_rate_limit
                            clear_nous_rate_limit()
                        except Exception:
                            pass
                    self._touch_activity(f"API call #{api_call_count} completed")
                    break  # Success, exit retry loop
@ -9659,6 +9715,38 @@ class AIAgent:
                                primary_recovery_attempted = False
                                continue
                    # ── Nous Portal: record rate limit & skip retries ─────
                    # When Nous returns a 429, record the reset time to a
                    # shared file so ALL sessions (cron, gateway, auxiliary)
                    # know not to pile on.  Then skip further retries —
                    # each one burns another RPH request and deepens the
                    # rate limit hole.  The retry loop's top-of-iteration
                    # guard will catch this on the next pass and try
                    # fallback or bail with a clear message.
                    if (
                        is_rate_limited
                        and self.provider == "nous"
                        and classified.reason == FailoverReason.rate_limit
                        and not recovered_with_pool
                    ):
                        try:
                            from agent.nous_rate_guard import record_nous_rate_limit
                            _err_resp = getattr(api_error, "response", None)
                            _err_hdrs = (
                                getattr(_err_resp, "headers", None)
                                if _err_resp else None
                            )
                            record_nous_rate_limit(
                                headers=_err_hdrs,
                                error_context=error_context,
                            )
                        except Exception:
                            pass
                        # Skip straight to max_retries — the top-of-loop
                        # guard will handle fallback or bail cleanly.
                        retry_count = max_retries
                        continue
                    is_payload_too_large = (
                        classified.reason == FailoverReason.payload_too_large
                    )
--- a/tests/agent/test_nous_rate_guard.py
+++ b/tests/agent/test_nous_rate_guard.py
@ -0,0 +1,253 @@
 """Tests for agent/nous_rate_guard.py — cross-session Nous Portal rate limit guard."""
 import json
 import os
 import time
 import pytest
@pytest.fixture
 def rate_guard_env(tmp_path, monkeypatch):
    """Isolate rate guard state to a temp directory."""
    hermes_home = str(tmp_path / ".hermes")
    os.makedirs(hermes_home, exist_ok=True)
    monkeypatch.setenv("HERMES_HOME", hermes_home)
    # Clear any cached module-level imports
    return hermes_home
 class TestRecordNousRateLimit:
    """Test recording rate limit state."""
    def test_records_with_header_reset(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        headers = {"x-ratelimit-reset-requests-1h": "1800"}
        record_nous_rate_limit(headers=headers)
        path = _state_path()
        assert os.path.exists(path)
        with open(path) as f:
            state = json.load(f)
        assert state["reset_seconds"] == pytest.approx(1800, abs=2)
        assert state["reset_at"] > time.time()
    def test_records_with_per_minute_header(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        headers = {"x-ratelimit-reset-requests": "45"}
        record_nous_rate_limit(headers=headers)
        with open(_state_path()) as f:
            state = json.load(f)
        assert state["reset_seconds"] == pytest.approx(45, abs=2)
    def test_records_with_retry_after_header(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        headers = {"retry-after": "60"}
        record_nous_rate_limit(headers=headers)
        with open(_state_path()) as f:
            state = json.load(f)
        assert state["reset_seconds"] == pytest.approx(60, abs=2)
    def test_prefers_hourly_over_per_minute(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        headers = {
            "x-ratelimit-reset-requests-1h": "1800",
            "x-ratelimit-reset-requests": "45",
        }
        record_nous_rate_limit(headers=headers)
        with open(_state_path()) as f:
            state = json.load(f)
        # Should use the hourly value, not the per-minute one
        assert state["reset_seconds"] == pytest.approx(1800, abs=2)
    def test_falls_back_to_error_context_reset_at(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        future_reset = time.time() + 900
        record_nous_rate_limit(
            headers=None,
            error_context={"reset_at": future_reset},
        )
        with open(_state_path()) as f:
            state = json.load(f)
        assert state["reset_at"] == pytest.approx(future_reset, abs=1)
    def test_falls_back_to_default_cooldown(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        record_nous_rate_limit(headers=None)
        with open(_state_path()) as f:
            state = json.load(f)
        # Default is 300 seconds (5 minutes)
        assert state["reset_seconds"] == pytest.approx(300, abs=2)
    def test_custom_default_cooldown(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        record_nous_rate_limit(headers=None, default_cooldown=120.0)
        with open(_state_path()) as f:
            state = json.load(f)
        assert state["reset_seconds"] == pytest.approx(120, abs=2)
    def test_creates_directory_if_missing(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        record_nous_rate_limit(headers={"retry-after": "10"})
        assert os.path.exists(_state_path())
 class TestNousRateLimitRemaining:
    """Test checking remaining rate limit time."""
    def test_returns_none_when_no_file(self, rate_guard_env):
        from agent.nous_rate_guard import nous_rate_limit_remaining
        assert nous_rate_limit_remaining() is None
    def test_returns_remaining_seconds_when_active(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, nous_rate_limit_remaining
        record_nous_rate_limit(headers={"x-ratelimit-reset-requests-1h": "600"})
        remaining = nous_rate_limit_remaining()
        assert remaining is not None
        assert 595 < remaining <= 605  # ~600 seconds, allowing for test execution time
    def test_returns_none_when_expired(self, rate_guard_env):
        from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
        # Write an already-expired state
        state_dir = os.path.dirname(_state_path())
        os.makedirs(state_dir, exist_ok=True)
        with open(_state_path(), "w") as f:
            json.dump({"reset_at": time.time() - 10, "recorded_at": time.time() - 100}, f)
        assert nous_rate_limit_remaining() is None
        # File should be cleaned up
        assert not os.path.exists(_state_path())
    def test_handles_corrupt_file(self, rate_guard_env):
        from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
        state_dir = os.path.dirname(_state_path())
        os.makedirs(state_dir, exist_ok=True)
        with open(_state_path(), "w") as f:
            f.write("not valid json{{{")
        assert nous_rate_limit_remaining() is None
 class TestClearNousRateLimit:
    """Test clearing rate limit state."""
    def test_clears_existing_file(self, rate_guard_env):
        from agent.nous_rate_guard import (
            record_nous_rate_limit,
            clear_nous_rate_limit,
            nous_rate_limit_remaining,
            _state_path,
        )
        record_nous_rate_limit(headers={"retry-after": "600"})
        assert nous_rate_limit_remaining() is not None
        clear_nous_rate_limit()
        assert nous_rate_limit_remaining() is None
        assert not os.path.exists(_state_path())
    def test_clear_when_no_file(self, rate_guard_env):
        from agent.nous_rate_guard import clear_nous_rate_limit
        # Should not raise
        clear_nous_rate_limit()
 class TestFormatRemaining:
    """Test human-readable duration formatting."""
    def test_seconds(self):
        from agent.nous_rate_guard import format_remaining
        assert format_remaining(30) == "30s"
    def test_minutes(self):
        from agent.nous_rate_guard import format_remaining
        assert format_remaining(125) == "2m 5s"
    def test_exact_minutes(self):
        from agent.nous_rate_guard import format_remaining
        assert format_remaining(120) == "2m"
    def test_hours(self):
        from agent.nous_rate_guard import format_remaining
        assert format_remaining(3720) == "1h 2m"
 class TestParseResetSeconds:
    """Test header parsing for reset times."""
    def test_case_insensitive_headers(self, rate_guard_env):
        from agent.nous_rate_guard import _parse_reset_seconds
        headers = {"X-Ratelimit-Reset-Requests-1h": "1200"}
        assert _parse_reset_seconds(headers) == 1200.0
    def test_returns_none_for_empty_headers(self):
        from agent.nous_rate_guard import _parse_reset_seconds
        assert _parse_reset_seconds(None) is None
        assert _parse_reset_seconds({}) is None
    def test_ignores_zero_values(self):
        from agent.nous_rate_guard import _parse_reset_seconds
        headers = {"x-ratelimit-reset-requests-1h": "0"}
        assert _parse_reset_seconds(headers) is None
    def test_ignores_invalid_values(self):
        from agent.nous_rate_guard import _parse_reset_seconds
        headers = {"x-ratelimit-reset-requests-1h": "not-a-number"}
        assert _parse_reset_seconds(headers) is None
 class TestAuxiliaryClientIntegration:
    """Test that the auxiliary client respects the rate guard."""
    def test_try_nous_skips_when_rate_limited(self, rate_guard_env, monkeypatch):
        from agent.nous_rate_guard import record_nous_rate_limit
        # Record a rate limit
        record_nous_rate_limit(headers={"retry-after": "600"})
        # Mock _read_nous_auth to return valid creds (would normally succeed)
        import agent.auxiliary_client as aux
        monkeypatch.setattr(aux, "_read_nous_auth", lambda: {
            "access_token": "test-token",
            "inference_base_url": "https://api.nous.test/v1",
        })
        result = aux._try_nous()
        assert result == (None, None)
    def test_try_nous_works_when_not_rate_limited(self, rate_guard_env, monkeypatch):
        import agent.auxiliary_client as aux
        # No rate limit recorded — _try_nous should proceed normally
        # (will return None because no real creds, but won't be blocked
        # by the rate guard)
        monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
        result = aux._try_nous()
        assert result == (None, None)