From 4ed482549f2652bb9ba0ee72380de5b4e632970b Mon Sep 17 00:00:00 2001 From: sprmn24 Date: Wed, 20 May 2026 00:07:15 +0300 Subject: [PATCH] fix(xai-proxy): handle 429 rate-limit responses in proxy retry path get_retry_credential only triggered on 401; a 429 Too Many Requests from xAI was silently streamed back with no key rotation or back-off signal. - server.py: widen retry gate from == 401 to in {401, 429} - xai.py: on 429, skip token refresh and call mark_exhausted_and_rotate to stamp the 1-hour cooldown on the rate-limited key and return the next available credential. Returns None if pool is exhausted. --- hermes_cli/proxy/adapters/xai.py | 17 +++++++++++++---- hermes_cli/proxy/server.py | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/hermes_cli/proxy/adapters/xai.py b/hermes_cli/proxy/adapters/xai.py index 30a640df750..d85db8630ab 100644 --- a/hermes_cli/proxy/adapters/xai.py +++ b/hermes_cli/proxy/adapters/xai.py @@ -79,7 +79,7 @@ class XAIGrokAdapter(UpstreamAdapter): failed_credential: UpstreamCredential, status_code: int, ) -> Optional[UpstreamCredential]: - if status_code != 401: + if status_code not in {401, 429}: return None with self._lock: @@ -87,16 +87,25 @@ class XAIGrokAdapter(UpstreamAdapter): if pool is None: return None - refreshed = pool.try_refresh_current() - if refreshed is None: + if status_code == 429: + # Mark the rate-limited key with its 1-hour cooldown and rotate + # to the next available credential. Returns None when the pool + # has no other key to offer — the 429 will flow back to the client. refreshed = pool.mark_exhausted_and_rotate(status_code=status_code) + else: + refreshed = pool.try_refresh_current() + if refreshed is None: + refreshed = pool.mark_exhausted_and_rotate(status_code=status_code) if refreshed is None: return None retry_cred = self._credential_from_entry(refreshed) if retry_cred.bearer == failed_credential.bearer: return None - logger.info("proxy: xAI upstream rejected bearer; retrying with refreshed pool credential") + logger.info( + "proxy: xAI upstream returned %s; retrying with rotated pool credential", + status_code, + ) return retry_cred def _load_pool(self) -> Optional[CredentialPool]: diff --git a/hermes_cli/proxy/server.py b/hermes_cli/proxy/server.py index a72f75d67ee..620f6bbb077 100644 --- a/hermes_cli/proxy/server.py +++ b/hermes_cli/proxy/server.py @@ -206,7 +206,7 @@ def create_app(adapter: UpstreamAdapter) -> "web.Application": return session_or_response session = session_or_response - if upstream_resp.status == 401: + if upstream_resp.status in {401, 429}: try: retry_cred = adapter.get_retry_credential( failed_credential=cred,