From cbfa018aeff5620d893c9c7d3ea307c818183c1f Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 17 Jun 2026 05:48:35 -0700
Subject: [PATCH] fix(auth): retry Codex device-code login on 429 with clear
 rate-limit message (#47860)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The OpenAI device-code login (POST auth.openai.com/.../deviceauth/usercode)
had no retry or 429 handling — a transient throttle from OpenAI surfaced as
a bare "Device code request returned status 429" with no guidance, reading
as a hard login failure.

- Retry the device-code request with capped exponential backoff (honoring
  Retry-After), up to 4 attempts.
- On persistent 429, raise a clear AuthError tagged CODEX_RATE_LIMITED_CODE
  (classified transient, not a credential problem) with a wait hint.
- Apply the same 429 classification to the token-exchange step (same bug
  class).

Unrelated to PR #47399 (Responses-API cache headers); this is the OAuth
device-code path in hermes_cli/auth.py.
---
 hermes_cli/auth.py                           | 78 +++++++++++++++---
 tests/hermes_cli/test_auth_codex_provider.py | 84 ++++++++++++++++++++
 2 files changed, 150 insertions(+), 12 deletions(-)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 590b6794d2a..61c2bbed786 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -7231,23 +7231,61 @@ def _codex_device_code_login() -> Dict[str, Any]:
     issuer = "https://auth.openai.com"
     client_id = CODEX_OAUTH_CLIENT_ID
 
-    # Step 1: Request device code
-    try:
-        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
-            resp = client.post(
-                f"{issuer}/api/accounts/deviceauth/usercode",
-                json={"client_id": client_id},
-                headers={"Content-Type": "application/json"},
+    # Step 1: Request device code. OpenAI's auth endpoint rate-limits this
+    # request (HTTP 429) when login is attempted too often from the same
+    # IP/account — retry with capped backoff (honoring ``Retry-After``)
+    # before surfacing a clear, actionable message instead of a bare status.
+    resp = None
+    max_attempts = 4
+    for attempt in range(1, max_attempts + 1):
+        try:
+            with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+                resp = client.post(
+                    f"{issuer}/api/accounts/deviceauth/usercode",
+                    json={"client_id": client_id},
+                    headers={"Content-Type": "application/json"},
+                )
+        except Exception as exc:
+            raise AuthError(
+                f"Failed to request device code: {exc}",
+                provider="openai-codex", code="device_code_request_failed",
             )
-    except Exception as exc:
+
+        if resp.status_code != 429:
+            break
+
+        if attempt < max_attempts:
+            retry_after = _parse_retry_after_seconds(
+                getattr(resp, "headers", None)
+            )
+            # Exponential backoff (2s, 4s, 8s) capped, preferring the
+            # server-provided Retry-After when present.
+            delay = retry_after if retry_after is not None else 2 ** attempt
+            delay = max(1, min(int(delay), 60))
+            print(
+                "OpenAI is rate-limiting login requests "
+                f"(429); retrying in {delay}s..."
+            )
+            _time.sleep(delay)
+
+    if resp is not None and resp.status_code == 429:
+        retry_after = _parse_retry_after_seconds(getattr(resp, "headers", None))
+        wait_hint = (
+            f" Try again in about {retry_after}s."
+            if retry_after is not None
+            else " Wait a minute and run the login again."
+        )
         raise AuthError(
-            f"Failed to request device code: {exc}",
-            provider="openai-codex", code="device_code_request_failed",
+            "OpenAI is rate-limiting Codex login requests (HTTP 429). "
+            "This is a temporary throttle on OpenAI's side, not a credential "
+            f"problem.{wait_hint}",
+            provider="openai-codex", code=CODEX_RATE_LIMITED_CODE,
         )
 
-    if resp.status_code != 200:
+    if resp is None or resp.status_code != 200:
+        status = resp.status_code if resp is not None else "unknown"
         raise AuthError(
-            f"Device code request returned status {resp.status_code}.",
+            f"Device code request returned status {status}.",
             provider="openai-codex", code="device_code_request_error",
         )
 
@@ -7335,6 +7373,22 @@ def _codex_device_code_login() -> Dict[str, Any]:
             provider="openai-codex", code="token_exchange_failed",
         )
 
+    if token_resp.status_code == 429:
+        retry_after = _parse_retry_after_seconds(
+            getattr(token_resp, "headers", None)
+        )
+        wait_hint = (
+            f" Try again in about {retry_after}s."
+            if retry_after is not None
+            else " Wait a minute and run the login again."
+        )
+        raise AuthError(
+            "OpenAI is rate-limiting Codex login requests (HTTP 429) during "
+            "token exchange. This is a temporary throttle on OpenAI's side, "
+            f"not a credential problem.{wait_hint}",
+            provider="openai-codex", code=CODEX_RATE_LIMITED_CODE,
+        )
+
     if token_resp.status_code != 200:
         raise AuthError(
             f"Token exchange returned status {token_resp.status_code}.",
diff --git a/tests/hermes_cli/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py
index 2ce2907650d..0c97e127748 100644
--- a/tests/hermes_cli/test_auth_codex_provider.py
+++ b/tests/hermes_cli/test_auth_codex_provider.py
@@ -1009,3 +1009,87 @@ def test_login_openai_codex_force_new_login_skips_existing_reuse_prompt(monkeypa
 
     assert called["device_login"] == 1
     assert called["tokens"]["access_token"] == "fresh-at"
+
+
+class _FakeResp:
+    def __init__(self, status_code, json_data=None, headers=None):
+        self.status_code = status_code
+        self._json = json_data or {}
+        self.headers = headers or {}
+
+    def json(self):
+        return self._json
+
+
+def _patch_httpx_post(monkeypatch, responses):
+    """Patch hermes_cli.auth.httpx.Client so .post() returns queued responses."""
+    seq = iter(responses)
+
+    class _FakeClient:
+        def __enter__(self):
+            return self
+
+        def __exit__(self, *a):
+            return False
+
+        def post(self, *args, **kwargs):
+            return next(seq)
+
+    monkeypatch.setattr("hermes_cli.auth.httpx.Client", lambda *a, **k: _FakeClient())
+
+
+def test_device_code_login_retries_on_429_then_succeeds(monkeypatch):
+    """A transient 429 on the device-code request is retried, not surfaced."""
+    from hermes_cli import auth as auth_mod
+
+    sleeps = []
+    monkeypatch.setattr("time.sleep", lambda s: sleeps.append(s))
+
+    # First call 429 (with Retry-After), second call succeeds. The polling
+    # loop then returns the authorization code, and token exchange succeeds.
+    _patch_httpx_post(
+        monkeypatch,
+        [
+            _FakeResp(429, headers={"retry-after": "1"}),
+            _FakeResp(200, {"user_code": "ABCD", "device_auth_id": "dev-1", "interval": "5"}),
+            _FakeResp(200, {"authorization_code": "auth-code", "code_verifier": "verifier"}),
+            _FakeResp(200, {"access_token": "at", "refresh_token": "rt", "expires_in": 3600}),
+        ],
+    )
+    # Skip the polling sleep too (shares time.sleep, already patched).
+
+    creds = auth_mod._codex_device_code_login()
+
+    assert creds["tokens"]["access_token"] == "at"
+    # The 429 caused exactly one backoff sleep before the retry succeeded.
+    assert 1 in sleeps
+
+
+def test_device_code_login_persistent_429_raises_rate_limited(monkeypatch):
+    """A persistent 429 surfaces a clear rate-limit error, not a bare status."""
+    from hermes_cli import auth as auth_mod
+
+    monkeypatch.setattr("time.sleep", lambda s: None)
+    _patch_httpx_post(monkeypatch, [_FakeResp(429, headers={"retry-after": "30"})] * 4)
+
+    with pytest.raises(AuthError) as exc_info:
+        auth_mod._codex_device_code_login()
+
+    err = exc_info.value
+    assert err.code == auth_mod.CODEX_RATE_LIMITED_CODE
+    assert "rate-limiting" in str(err)
+    assert "30s" in str(err)
+    assert auth_mod.is_rate_limited_auth_error(err)
+
+
+def test_device_code_login_non_429_error_unchanged(monkeypatch):
+    """Non-429 failures keep the generic device_code_request_error code."""
+    from hermes_cli import auth as auth_mod
+
+    monkeypatch.setattr("time.sleep", lambda s: None)
+    _patch_httpx_post(monkeypatch, [_FakeResp(500)])
+
+    with pytest.raises(AuthError) as exc_info:
+        auth_mod._codex_device_code_login()
+
+    assert exc_info.value.code == "device_code_request_error"