mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(credential-pool): rotate immediately when credential already exhausted
Closes #26145. When the user interrupts the retry loop between two 429s (Ctrl-C in interactive mode, /new, gateway disconnect), the local has_retried_429 flag dies with the recovery function. On the next user prompt the agent restarts with has_retried_429=False, hits 429 on the exhausted credential, sets the flag, returns 'retry once'. Repeat forever — the second 429 that would trigger rotation is never reached, and healthy entries (priority>0 free/paid accounts) are never tried. Fix: in recover_with_credential_pool's rate_limit branch, pre-check pool.current().last_status before running the retry-once dance. If the current entry is already STATUS_EXHAUSTED, rotate immediately. Uses getattr() for the attribute read so existing tests with SimpleNamespace mocks (which only set 'label') keep working. Co-authored-by: zccyman <16263913+zccyman@users.noreply.github.com>
This commit is contained in:
parent
0a6a0ba527
commit
973bb124a4
2 changed files with 124 additions and 0 deletions
|
|
@ -41,6 +41,7 @@ from agent.message_sanitization import (
|
|||
)
|
||||
from agent.tool_dispatch_helpers import _trajectory_normalize_msg, make_tool_result_message
|
||||
from agent.trajectory import convert_scratchpad_to_think
|
||||
from agent.credential_pool import STATUS_EXHAUSTED
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
from utils import base_url_host_matches, base_url_hostname, env_var_enabled, atomic_json_write
|
||||
|
||||
|
|
@ -582,6 +583,29 @@ def recover_with_credential_pool(
|
|||
return False, has_retried_429
|
||||
|
||||
if effective_reason == FailoverReason.rate_limit:
|
||||
# If current credential is already marked exhausted, skip retry and
|
||||
# rotate immediately. This prevents the "cancel-between-429s" trap
|
||||
# where has_retried_429 (a local var) gets reset on each new prompt,
|
||||
# causing the pool to retry the same exhausted credential forever.
|
||||
current_entry = pool.current()
|
||||
current_last_status = getattr(current_entry, "last_status", None) if current_entry else None
|
||||
if current_last_status == STATUS_EXHAUSTED:
|
||||
_ra().logger.info(
|
||||
"Credential already exhausted (last_status=%s) — rotating immediately instead of retrying",
|
||||
current_last_status,
|
||||
)
|
||||
rotate_status = status_code if status_code is not None else 429
|
||||
next_entry = pool.mark_exhausted_and_rotate(status_code=rotate_status, error_context=error_context)
|
||||
if next_entry is not None:
|
||||
_ra().logger.info(
|
||||
"Credential %s (rate limit, pre-exhausted) — rotated to pool entry %s",
|
||||
rotate_status,
|
||||
getattr(next_entry, "id", "?"),
|
||||
)
|
||||
agent._swap_credential(next_entry)
|
||||
return True, False
|
||||
return False, True
|
||||
|
||||
usage_limit_reached = False
|
||||
if error_context:
|
||||
context_reason = str(error_context.get("reason") or "").lower()
|
||||
|
|
|
|||
100
tests/run_agent/test_credential_pool_interrupt.py
Normal file
100
tests/run_agent/test_credential_pool_interrupt.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
"""Regression test for #26145: credential pool rotation after interrupt-resume.
|
||||
|
||||
When has_retried_429 is lost (user cancels between 429s), the pool should
|
||||
still rotate if the current credential is already marked exhausted.
|
||||
"""
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from agent.credential_pool import PooledCredential, STATUS_EXHAUSTED
|
||||
from agent.error_classifier import FailoverReason
|
||||
|
||||
|
||||
def _make_entry(idx, **overrides):
|
||||
defaults = dict(
|
||||
provider="test-provider",
|
||||
id=f"cred-{idx}",
|
||||
label=f"Credential {idx}",
|
||||
auth_type="api_key",
|
||||
priority=idx,
|
||||
source="manual",
|
||||
access_token=f"key-{idx}",
|
||||
)
|
||||
defaults.update(overrides)
|
||||
return PooledCredential(**defaults)
|
||||
|
||||
|
||||
def _make_pool(entries):
|
||||
pool = MagicMock()
|
||||
pool.entries = entries
|
||||
pool.current.return_value = entries[0]
|
||||
return pool
|
||||
|
||||
|
||||
def test_rotate_immediately_when_credential_already_exhausted():
|
||||
"""If current credential has last_status='exhausted', rotate on first 429
|
||||
instead of retrying (Option A fix for #26145)."""
|
||||
entries = [_make_entry(0, last_status=STATUS_EXHAUSTED, last_error_code=429), _make_entry(1)]
|
||||
pool = _make_pool(entries)
|
||||
pool.mark_exhausted_and_rotate.return_value = entries[1]
|
||||
|
||||
from run_agent import AIAgent
|
||||
with patch("run_agent.get_tool_definitions", return_value=[]), patch("run_agent.check_toolset_requirements", return_value={}), patch("run_agent.OpenAI"):
|
||||
agent = MagicMock(spec=AIAgent)
|
||||
agent._credential_pool = pool
|
||||
agent._swap_credential = MagicMock()
|
||||
recovered, retried = AIAgent._recover_with_credential_pool(
|
||||
agent,
|
||||
status_code=429,
|
||||
has_retried_429=False, # Key: False on first 429 after interrupt
|
||||
classified_reason=FailoverReason.rate_limit,
|
||||
)
|
||||
|
||||
assert recovered is True
|
||||
assert retried is False
|
||||
pool.mark_exhausted_and_rotate.assert_called_once()
|
||||
agent._swap_credential.assert_called_once_with(entries[1])
|
||||
|
||||
|
||||
def test_normal_retry_when_credential_not_exhausted():
|
||||
"""When credential is active, first 429 should still retry (existing behavior)."""
|
||||
entries = [_make_entry(0, last_status=None), _make_entry(1)]
|
||||
pool = _make_pool(entries)
|
||||
|
||||
from run_agent import AIAgent
|
||||
with patch("run_agent.get_tool_definitions", return_value=[]), patch("run_agent.check_toolset_requirements", return_value={}), patch("run_agent.OpenAI"):
|
||||
agent = MagicMock(spec=AIAgent)
|
||||
agent._credential_pool = pool
|
||||
recovered, retried = AIAgent._recover_with_credential_pool(
|
||||
agent,
|
||||
status_code=429,
|
||||
has_retried_429=False,
|
||||
classified_reason=FailoverReason.rate_limit,
|
||||
)
|
||||
|
||||
assert recovered is False
|
||||
assert retried is True
|
||||
pool.mark_exhausted_and_rotate.assert_not_called()
|
||||
|
||||
|
||||
def test_rotate_on_second_429_when_not_exhausted():
|
||||
"""When credential is active and this is the second 429, rotate (existing behavior)."""
|
||||
entries = [_make_entry(0, last_status=None), _make_entry(1)]
|
||||
pool = _make_pool(entries)
|
||||
pool.mark_exhausted_and_rotate.return_value = entries[1]
|
||||
|
||||
from run_agent import AIAgent
|
||||
with patch("run_agent.get_tool_definitions", return_value=[]), patch("run_agent.check_toolset_requirements", return_value={}), patch("run_agent.OpenAI"):
|
||||
agent = MagicMock(spec=AIAgent)
|
||||
agent._credential_pool = pool
|
||||
agent._swap_credential = MagicMock()
|
||||
recovered, retried = AIAgent._recover_with_credential_pool(
|
||||
agent,
|
||||
status_code=429,
|
||||
has_retried_429=True, # Second 429
|
||||
classified_reason=FailoverReason.rate_limit,
|
||||
)
|
||||
|
||||
assert recovered is True
|
||||
assert retried is False
|
||||
pool.mark_exhausted_and_rotate.assert_called_once()
|
||||
Loading…
Add table
Add a link
Reference in a new issue