From cbfa018aeff5620d893c9c7d3ea307c818183c1f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 17 Jun 2026 05:48:35 -0700 Subject: [PATCH] fix(auth): retry Codex device-code login on 429 with clear rate-limit message (#47860) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OpenAI device-code login (POST auth.openai.com/.../deviceauth/usercode) had no retry or 429 handling — a transient throttle from OpenAI surfaced as a bare "Device code request returned status 429" with no guidance, reading as a hard login failure. - Retry the device-code request with capped exponential backoff (honoring Retry-After), up to 4 attempts. - On persistent 429, raise a clear AuthError tagged CODEX_RATE_LIMITED_CODE (classified transient, not a credential problem) with a wait hint. - Apply the same 429 classification to the token-exchange step (same bug class). Unrelated to PR #47399 (Responses-API cache headers); this is the OAuth device-code path in hermes_cli/auth.py. --- hermes_cli/auth.py | 78 +++++++++++++++--- tests/hermes_cli/test_auth_codex_provider.py | 84 ++++++++++++++++++++ 2 files changed, 150 insertions(+), 12 deletions(-) diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 590b6794d2a..61c2bbed786 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -7231,23 +7231,61 @@ def _codex_device_code_login() -> Dict[str, Any]: issuer = "https://auth.openai.com" client_id = CODEX_OAUTH_CLIENT_ID - # Step 1: Request device code - try: - with httpx.Client(timeout=httpx.Timeout(15.0)) as client: - resp = client.post( - f"{issuer}/api/accounts/deviceauth/usercode", - json={"client_id": client_id}, - headers={"Content-Type": "application/json"}, + # Step 1: Request device code. OpenAI's auth endpoint rate-limits this + # request (HTTP 429) when login is attempted too often from the same + # IP/account — retry with capped backoff (honoring ``Retry-After``) + # before surfacing a clear, actionable message instead of a bare status. + resp = None + max_attempts = 4 + for attempt in range(1, max_attempts + 1): + try: + with httpx.Client(timeout=httpx.Timeout(15.0)) as client: + resp = client.post( + f"{issuer}/api/accounts/deviceauth/usercode", + json={"client_id": client_id}, + headers={"Content-Type": "application/json"}, + ) + except Exception as exc: + raise AuthError( + f"Failed to request device code: {exc}", + provider="openai-codex", code="device_code_request_failed", ) - except Exception as exc: + + if resp.status_code != 429: + break + + if attempt < max_attempts: + retry_after = _parse_retry_after_seconds( + getattr(resp, "headers", None) + ) + # Exponential backoff (2s, 4s, 8s) capped, preferring the + # server-provided Retry-After when present. + delay = retry_after if retry_after is not None else 2 ** attempt + delay = max(1, min(int(delay), 60)) + print( + "OpenAI is rate-limiting login requests " + f"(429); retrying in {delay}s..." + ) + _time.sleep(delay) + + if resp is not None and resp.status_code == 429: + retry_after = _parse_retry_after_seconds(getattr(resp, "headers", None)) + wait_hint = ( + f" Try again in about {retry_after}s." + if retry_after is not None + else " Wait a minute and run the login again." + ) raise AuthError( - f"Failed to request device code: {exc}", - provider="openai-codex", code="device_code_request_failed", + "OpenAI is rate-limiting Codex login requests (HTTP 429). " + "This is a temporary throttle on OpenAI's side, not a credential " + f"problem.{wait_hint}", + provider="openai-codex", code=CODEX_RATE_LIMITED_CODE, ) - if resp.status_code != 200: + if resp is None or resp.status_code != 200: + status = resp.status_code if resp is not None else "unknown" raise AuthError( - f"Device code request returned status {resp.status_code}.", + f"Device code request returned status {status}.", provider="openai-codex", code="device_code_request_error", ) @@ -7335,6 +7373,22 @@ def _codex_device_code_login() -> Dict[str, Any]: provider="openai-codex", code="token_exchange_failed", ) + if token_resp.status_code == 429: + retry_after = _parse_retry_after_seconds( + getattr(token_resp, "headers", None) + ) + wait_hint = ( + f" Try again in about {retry_after}s." + if retry_after is not None + else " Wait a minute and run the login again." + ) + raise AuthError( + "OpenAI is rate-limiting Codex login requests (HTTP 429) during " + "token exchange. This is a temporary throttle on OpenAI's side, " + f"not a credential problem.{wait_hint}", + provider="openai-codex", code=CODEX_RATE_LIMITED_CODE, + ) + if token_resp.status_code != 200: raise AuthError( f"Token exchange returned status {token_resp.status_code}.", diff --git a/tests/hermes_cli/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py index 2ce2907650d..0c97e127748 100644 --- a/tests/hermes_cli/test_auth_codex_provider.py +++ b/tests/hermes_cli/test_auth_codex_provider.py @@ -1009,3 +1009,87 @@ def test_login_openai_codex_force_new_login_skips_existing_reuse_prompt(monkeypa assert called["device_login"] == 1 assert called["tokens"]["access_token"] == "fresh-at" + + +class _FakeResp: + def __init__(self, status_code, json_data=None, headers=None): + self.status_code = status_code + self._json = json_data or {} + self.headers = headers or {} + + def json(self): + return self._json + + +def _patch_httpx_post(monkeypatch, responses): + """Patch hermes_cli.auth.httpx.Client so .post() returns queued responses.""" + seq = iter(responses) + + class _FakeClient: + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def post(self, *args, **kwargs): + return next(seq) + + monkeypatch.setattr("hermes_cli.auth.httpx.Client", lambda *a, **k: _FakeClient()) + + +def test_device_code_login_retries_on_429_then_succeeds(monkeypatch): + """A transient 429 on the device-code request is retried, not surfaced.""" + from hermes_cli import auth as auth_mod + + sleeps = [] + monkeypatch.setattr("time.sleep", lambda s: sleeps.append(s)) + + # First call 429 (with Retry-After), second call succeeds. The polling + # loop then returns the authorization code, and token exchange succeeds. + _patch_httpx_post( + monkeypatch, + [ + _FakeResp(429, headers={"retry-after": "1"}), + _FakeResp(200, {"user_code": "ABCD", "device_auth_id": "dev-1", "interval": "5"}), + _FakeResp(200, {"authorization_code": "auth-code", "code_verifier": "verifier"}), + _FakeResp(200, {"access_token": "at", "refresh_token": "rt", "expires_in": 3600}), + ], + ) + # Skip the polling sleep too (shares time.sleep, already patched). + + creds = auth_mod._codex_device_code_login() + + assert creds["tokens"]["access_token"] == "at" + # The 429 caused exactly one backoff sleep before the retry succeeded. + assert 1 in sleeps + + +def test_device_code_login_persistent_429_raises_rate_limited(monkeypatch): + """A persistent 429 surfaces a clear rate-limit error, not a bare status.""" + from hermes_cli import auth as auth_mod + + monkeypatch.setattr("time.sleep", lambda s: None) + _patch_httpx_post(monkeypatch, [_FakeResp(429, headers={"retry-after": "30"})] * 4) + + with pytest.raises(AuthError) as exc_info: + auth_mod._codex_device_code_login() + + err = exc_info.value + assert err.code == auth_mod.CODEX_RATE_LIMITED_CODE + assert "rate-limiting" in str(err) + assert "30s" in str(err) + assert auth_mod.is_rate_limited_auth_error(err) + + +def test_device_code_login_non_429_error_unchanged(monkeypatch): + """Non-429 failures keep the generic device_code_request_error code.""" + from hermes_cli import auth as auth_mod + + monkeypatch.setattr("time.sleep", lambda s: None) + _patch_httpx_post(monkeypatch, [_FakeResp(500)]) + + with pytest.raises(AuthError) as exc_info: + auth_mod._codex_device_code_login() + + assert exc_info.value.code == "device_code_request_error"