fix(agent): disable OpenAI SDK auto-retry that double-fires inside the rate-limit loop

Same bug class as the Anthropic fix (#26293): the OpenAI/aggregator client is
built without max_retries, so the SDK default of 2 applies. The SDK's own 1-2s
backoff ignores Retry-After and retries inside hermes's outer conversation loop,
burning request slots against a rate-limited bucket. Set max_retries=0 at the
single create_openai_client chokepoint (covers init, switch_model, recovery,
restore, request-scoped). auxiliary_client builds its own clients and is not
wrapped by the loop, so it keeps SDK retries.
This commit is contained in:
teknium1 2026-06-27 19:05:48 -07:00 committed by Teknium
parent 1ab35ba25d
commit 0c372274cd
2 changed files with 87 additions and 0 deletions

View file

@ -1452,6 +1452,15 @@ def create_openai_client(agent, client_kwargs: dict, *, reason: str, shared: boo
keepalive_http = agent._build_keepalive_http_client(client_kwargs.get("base_url", ""))
if keepalive_http is not None:
client_kwargs["http_client"] = keepalive_http
# Delegate all rate-limit / 5xx retry to hermes's outer conversation loop,
# which honors Retry-After and applies adaptive/jittered backoff. The OpenAI
# SDK default (max_retries=2) uses its own 1-2s backoff that ignores
# Retry-After and double-retries inside our loop — the same deadlock the
# Anthropic clients hit (#26293). This is the single chokepoint every primary
# OpenAI/aggregator client passes through (init, switch_model, recovery,
# restore, request-scoped); auxiliary_client builds its own clients and keeps
# SDK retries because it is NOT wrapped by the conversation loop.
client_kwargs.setdefault("max_retries", 0)
# Uses the module-level `OpenAI` name, resolved lazily on first
# access via __getattr__ below. Tests patch via `run_agent.OpenAI`.
client = _ra().OpenAI(**client_kwargs)

View file

@ -0,0 +1,78 @@
"""Regression guard: _create_openai_client must disable SDK-level retries.
#26293: the OpenAI SDK default ``max_retries=2`` uses its own 1-2s backoff that
ignores ``Retry-After`` and double-retries *inside* hermes's outer conversation
loop burning request slots against a rate-limited bucket that won't refill for
minutes. The outer loop already owns rate-limit backoff (honors Retry-After,
adaptive + jittered), so every primary OpenAI/aggregator client must be built
with ``max_retries=0``. This is the OpenAI-path twin of the Anthropic adapter
fix in tests/agent/test_anthropic_adapter.py.
"""
from unittest.mock import MagicMock, patch
from run_agent import AIAgent
@patch("run_agent.OpenAI")
def test_create_openai_client_disables_sdk_retries(mock_openai):
mock_openai.return_value = MagicMock()
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
agent._create_openai_client(
{"api_key": "test-key", "base_url": "https://openrouter.ai/api/v1"},
reason="test",
shared=False,
)
# Find the construction call that carries our base_url (AIAgent() also
# builds a client during init; the assertion targets the explicit call).
matching = [
c for c in mock_openai.call_args_list
if c.kwargs.get("base_url") == "https://openrouter.ai/api/v1"
]
assert matching, "OpenAI was never constructed with the expected base_url"
for call in matching:
assert call.kwargs.get("max_retries") == 0, (
"_create_openai_client must set max_retries=0 so the SDK does not "
"double-retry inside the outer rate-limit loop (#26293); got "
f"{call.kwargs.get('max_retries')!r}"
)
@patch("run_agent.OpenAI")
def test_create_openai_client_honors_explicit_max_retries(mock_openai):
"""An explicit max_retries in client_kwargs is respected (setdefault, not
clobber) future callers can opt back into SDK retries if needed."""
mock_openai.return_value = MagicMock()
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
agent._create_openai_client(
{
"api_key": "test-key",
"base_url": "https://explicit.example.com/v1",
"max_retries": 5,
},
reason="test",
shared=False,
)
matching = [
c for c in mock_openai.call_args_list
if c.kwargs.get("base_url") == "https://explicit.example.com/v1"
]
assert matching, "OpenAI was never constructed with the explicit base_url"
assert matching[-1].kwargs.get("max_retries") == 5