From c7f7783e5ca56175049fe8fa2e42bdc86a464013 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Thu, 28 May 2026 01:28:55 -0700 Subject: [PATCH] test(xai-proxy): regression coverage for #28932 429 handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three new tests in tests/hermes_cli/test_proxy.py: - xai_adapter_retry_rotates_pool_entry_on_429 — headline #28932 case. Two-entry pool, 429 on first entry, must rotate to second entry AND must NOT call refresh_xai_oauth_pure (refresh is irrelevant for rate limits). - xai_adapter_retry_returns_none_on_429_when_pool_exhausted — single-entry pool: 429 returns None so the rate-limit response flows back to the client unchanged (existing behavior preserved). - xai_adapter_retry_returns_none_for_unrelated_status — non-{401, 429} statuses must not trigger any retry path at all; guards against the gate becoming too broad in future changes. Each test asserts that refresh_xai_oauth_pure is never called on the 429 path — refresh is a 401-specific concern. 39/39 in tests/hermes_cli/test_proxy.py. --- tests/hermes_cli/test_proxy.py | 116 +++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/tests/hermes_cli/test_proxy.py b/tests/hermes_cli/test_proxy.py index 255610ae390..878efb6469d 100644 --- a/tests/hermes_cli/test_proxy.py +++ b/tests/hermes_cli/test_proxy.py @@ -450,6 +450,122 @@ def test_xai_adapter_retry_refreshes_current_pool_entry(tmp_path, monkeypatch): assert retry.bearer == "new-access-token" +def test_xai_adapter_retry_rotates_pool_entry_on_429(tmp_path, monkeypatch): + """429 from xAI must rotate to the next pool entry, not attempt refresh. + + Pre-fix (#28932) ``get_retry_credential`` only fired on 401, so a 429 + rate-limit response flowed back to the client unchanged AND the + rate-limited bearer stayed active for the next request — defeating + the whole point of pool rotation. + + Post-fix: 429 lands on ``mark_exhausted_and_rotate`` (no refresh — + that's irrelevant for rate limits), stamps the 1-hour cooldown + via ``EXHAUSTED_TTL_429_SECONDS`` on the offending key, and + returns the next available credential. + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + # Two pool entries so rotation has somewhere to go. + auth_path = tmp_path / "auth.json" + auth_path.write_text(json.dumps({ + "version": 1, + "providers": {}, + "credential_pool": { + "xai-oauth": [ + { + "id": "xai-first", + "label": "xai-first", + "auth_type": "oauth", + "priority": 0, + "source": "manual:xai_pkce", + "access_token": "first-access-token", + "refresh_token": "first-refresh-token", + "base_url": "https://api.x.ai/v1", + }, + { + "id": "xai-second", + "label": "xai-second", + "auth_type": "oauth", + "priority": 1, + "source": "manual:xai_pkce", + "access_token": "second-access-token", + "refresh_token": "second-refresh-token", + "base_url": "https://api.x.ai/v1", + }, + ] + }, + })) + + # Refresh must NOT be called on the 429 path — guard against + # the fix accidentally trying to refresh-on-rate-limit. + def _refresh_must_not_run(*args, **kwargs): + raise AssertionError("refresh_xai_oauth_pure must not run on 429") + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _refresh_must_not_run) + + adapter = XAIGrokAdapter() + failed = adapter.get_credential() + assert failed.bearer == "first-access-token", "starting bearer should be the first entry" + + retry = adapter.get_retry_credential( + failed_credential=failed, + status_code=429, + ) + + assert retry is not None, "429 must rotate to next pool entry" + assert retry.bearer == "second-access-token", ( + f"expected rotation to second entry, got {retry.bearer!r}" + ) + + +def test_xai_adapter_retry_returns_none_on_429_when_pool_exhausted(tmp_path, monkeypatch): + """Single-entry pool: 429 has nowhere to rotate to → return None + so the 429 flows back to the client unchanged (existing behavior + preserved).""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _write_xai_pool_entry(tmp_path) # single entry + + def _refresh_must_not_run(*args, **kwargs): + raise AssertionError("refresh_xai_oauth_pure must not run on 429") + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _refresh_must_not_run) + + adapter = XAIGrokAdapter() + failed = adapter.get_credential() + retry = adapter.get_retry_credential( + failed_credential=failed, + status_code=429, + ) + + assert retry is None, ( + "single-entry pool: 429 must return None so the response " + "flows back to the client unchanged" + ) + + +def test_xai_adapter_retry_returns_none_for_unrelated_status(tmp_path, monkeypatch): + """Non-{401, 429} statuses must NOT trigger any retry — pool + untouched, no refresh attempted, return None immediately.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _write_xai_pool_entry(tmp_path) + + def _refresh_must_not_run(*args, **kwargs): + raise AssertionError("refresh_xai_oauth_pure must not run on non-retry status") + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _refresh_must_not_run) + + adapter = XAIGrokAdapter() + failed = adapter.get_credential() + for status in (200, 400, 403, 500, 502, 503): + retry = adapter.get_retry_credential( + failed_credential=failed, + status_code=status, + ) + assert retry is None, ( + f"status {status} must not trigger retry, got {retry!r}" + ) + + # --------------------------------------------------------------------------- # Server: path filtering + forwarding #