fix(xai-proxy): handle 429 rate-limit responses in proxy retry path

get_retry_credential only triggered on 401; a 429 Too Many Requests from
xAI was silently streamed back with no key rotation or back-off signal.

- server.py: widen retry gate from == 401 to in {401, 429}
- xai.py: on 429, skip token refresh and call mark_exhausted_and_rotate
  to stamp the 1-hour cooldown on the rate-limited key and return the
  next available credential. Returns None if pool is exhausted.
This commit is contained in:
sprmn24 2026-05-20 00:07:15 +03:00 committed by Teknium
parent aa3466063b
commit 4ed482549f
2 changed files with 14 additions and 5 deletions

View file

@ -79,7 +79,7 @@ class XAIGrokAdapter(UpstreamAdapter):
failed_credential: UpstreamCredential,
status_code: int,
) -> Optional[UpstreamCredential]:
if status_code != 401:
if status_code not in {401, 429}:
return None
with self._lock:
@ -87,16 +87,25 @@ class XAIGrokAdapter(UpstreamAdapter):
if pool is None:
return None
refreshed = pool.try_refresh_current()
if refreshed is None:
if status_code == 429:
# Mark the rate-limited key with its 1-hour cooldown and rotate
# to the next available credential. Returns None when the pool
# has no other key to offer — the 429 will flow back to the client.
refreshed = pool.mark_exhausted_and_rotate(status_code=status_code)
else:
refreshed = pool.try_refresh_current()
if refreshed is None:
refreshed = pool.mark_exhausted_and_rotate(status_code=status_code)
if refreshed is None:
return None
retry_cred = self._credential_from_entry(refreshed)
if retry_cred.bearer == failed_credential.bearer:
return None
logger.info("proxy: xAI upstream rejected bearer; retrying with refreshed pool credential")
logger.info(
"proxy: xAI upstream returned %s; retrying with rotated pool credential",
status_code,
)
return retry_cred
def _load_pool(self) -> Optional[CredentialPool]:

View file

@ -206,7 +206,7 @@ def create_app(adapter: UpstreamAdapter) -> "web.Application":
return session_or_response
session = session_or_response
if upstream_resp.status == 401:
if upstream_resp.status in {401, 429}:
try:
retry_cred = adapter.get_retry_credential(
failed_credential=cred,