diff --git a/hermes_cli/proxy/adapters/xai.py b/hermes_cli/proxy/adapters/xai.py index 30a640df750..d85db8630ab 100644 --- a/hermes_cli/proxy/adapters/xai.py +++ b/hermes_cli/proxy/adapters/xai.py @@ -79,7 +79,7 @@ class XAIGrokAdapter(UpstreamAdapter): failed_credential: UpstreamCredential, status_code: int, ) -> Optional[UpstreamCredential]: - if status_code != 401: + if status_code not in {401, 429}: return None with self._lock: @@ -87,16 +87,25 @@ class XAIGrokAdapter(UpstreamAdapter): if pool is None: return None - refreshed = pool.try_refresh_current() - if refreshed is None: + if status_code == 429: + # Mark the rate-limited key with its 1-hour cooldown and rotate + # to the next available credential. Returns None when the pool + # has no other key to offer — the 429 will flow back to the client. refreshed = pool.mark_exhausted_and_rotate(status_code=status_code) + else: + refreshed = pool.try_refresh_current() + if refreshed is None: + refreshed = pool.mark_exhausted_and_rotate(status_code=status_code) if refreshed is None: return None retry_cred = self._credential_from_entry(refreshed) if retry_cred.bearer == failed_credential.bearer: return None - logger.info("proxy: xAI upstream rejected bearer; retrying with refreshed pool credential") + logger.info( + "proxy: xAI upstream returned %s; retrying with rotated pool credential", + status_code, + ) return retry_cred def _load_pool(self) -> Optional[CredentialPool]: diff --git a/hermes_cli/proxy/server.py b/hermes_cli/proxy/server.py index a72f75d67ee..620f6bbb077 100644 --- a/hermes_cli/proxy/server.py +++ b/hermes_cli/proxy/server.py @@ -206,7 +206,7 @@ def create_app(adapter: UpstreamAdapter) -> "web.Application": return session_or_response session = session_or_response - if upstream_resp.status == 401: + if upstream_resp.status in {401, 429}: try: retry_cred = adapter.get_retry_credential( failed_credential=cred,