mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(xai-proxy): handle 429 rate-limit responses in proxy retry path
get_retry_credential only triggered on 401; a 429 Too Many Requests from
xAI was silently streamed back with no key rotation or back-off signal.
- server.py: widen retry gate from == 401 to in {401, 429}
- xai.py: on 429, skip token refresh and call mark_exhausted_and_rotate
to stamp the 1-hour cooldown on the rate-limited key and return the
next available credential. Returns None if pool is exhausted.
This commit is contained in:
parent
aa3466063b
commit
4ed482549f
2 changed files with 14 additions and 5 deletions
|
|
@ -79,7 +79,7 @@ class XAIGrokAdapter(UpstreamAdapter):
|
|||
failed_credential: UpstreamCredential,
|
||||
status_code: int,
|
||||
) -> Optional[UpstreamCredential]:
|
||||
if status_code != 401:
|
||||
if status_code not in {401, 429}:
|
||||
return None
|
||||
|
||||
with self._lock:
|
||||
|
|
@ -87,16 +87,25 @@ class XAIGrokAdapter(UpstreamAdapter):
|
|||
if pool is None:
|
||||
return None
|
||||
|
||||
refreshed = pool.try_refresh_current()
|
||||
if refreshed is None:
|
||||
if status_code == 429:
|
||||
# Mark the rate-limited key with its 1-hour cooldown and rotate
|
||||
# to the next available credential. Returns None when the pool
|
||||
# has no other key to offer — the 429 will flow back to the client.
|
||||
refreshed = pool.mark_exhausted_and_rotate(status_code=status_code)
|
||||
else:
|
||||
refreshed = pool.try_refresh_current()
|
||||
if refreshed is None:
|
||||
refreshed = pool.mark_exhausted_and_rotate(status_code=status_code)
|
||||
if refreshed is None:
|
||||
return None
|
||||
|
||||
retry_cred = self._credential_from_entry(refreshed)
|
||||
if retry_cred.bearer == failed_credential.bearer:
|
||||
return None
|
||||
logger.info("proxy: xAI upstream rejected bearer; retrying with refreshed pool credential")
|
||||
logger.info(
|
||||
"proxy: xAI upstream returned %s; retrying with rotated pool credential",
|
||||
status_code,
|
||||
)
|
||||
return retry_cred
|
||||
|
||||
def _load_pool(self) -> Optional[CredentialPool]:
|
||||
|
|
|
|||
|
|
@ -206,7 +206,7 @@ def create_app(adapter: UpstreamAdapter) -> "web.Application":
|
|||
return session_or_response
|
||||
session = session_or_response
|
||||
|
||||
if upstream_resp.status == 401:
|
||||
if upstream_resp.status in {401, 429}:
|
||||
try:
|
||||
retry_cred = adapter.get_retry_credential(
|
||||
failed_credential=cred,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue