fix(gateway): classify Codex 429 quota as rate-limit, not missing credentials

When the Codex OAuth token endpoint returns 429 (usage-limit / quota
exhaustion), refresh_codex_oauth_pure raised a generic auth error that the
gateway surfaced as 'Primary provider auth failed: No Codex credentials
stored. Run hermes auth', prompting re-auth that cannot lift a quota cap.

Classify 429 distinctly (codex_rate_limited, relogin_required=False) with a
non-alarming quota message that honors Retry-After, log it as
'Primary provider rate-limited (429)', and stop format_auth_error from
appending the re-authenticate remediation. Also log the fallback provider's
literal config key instead of the resolved runtime category.

Refs #32790
This commit is contained in:
konsisumer 2026-05-27 09:07:21 +02:00 committed by Teknium
parent 2bbd53493d
commit f1422ffd77
3 changed files with 155 additions and 6 deletions

View file

@ -263,9 +263,10 @@ def test_resolve_returns_hermes_auth_store_source(tmp_path, monkeypatch):
class _StubHTTPResponse:
def __init__(self, status_code: int, payload):
def __init__(self, status_code: int, payload, headers=None):
self.status_code = status_code
self._payload = payload
self.headers = headers or {}
self.text = json.dumps(payload) if isinstance(payload, (dict, list)) else str(payload)
def json(self):
@ -382,6 +383,74 @@ def test_refresh_falls_back_to_generic_message_on_unparseable_body(monkeypatch):
assert "status 401" in str(err)
def test_refresh_429_classified_as_quota_not_auth_failure(monkeypatch):
"""429 from the token endpoint is a usage-quota cap, not an auth failure.
Regression test for #32790: must NOT force relogin and must carry the
dedicated rate-limit code so callers surface a "retry later" notice rather
than a misleading "run hermes auth".
"""
from hermes_cli.auth import (
CODEX_RATE_LIMITED_CODE,
format_auth_error,
is_rate_limited_auth_error,
)
response = _StubHTTPResponse(
429,
{"error": {"message": "You hit your usage limit.", "code": "usage_limit_reached"}},
headers={"retry-after": "120"},
)
_patch_httpx(monkeypatch, response)
with pytest.raises(AuthError) as exc_info:
refresh_codex_oauth_pure("a-tok", "r-tok")
err = exc_info.value
assert err.code == CODEX_RATE_LIMITED_CODE
assert err.relogin_required is False
assert is_rate_limited_auth_error(err) is True
assert "retry after 120s" in str(err)
# User-facing copy must not tell the operator to re-authenticate.
rendered = format_auth_error(err)
assert "re-authenticate" not in rendered
assert "hermes auth" not in rendered
def test_refresh_429_without_retry_after_header(monkeypatch):
"""429 without a Retry-After header still classifies as quota, no relogin."""
from hermes_cli.auth import CODEX_RATE_LIMITED_CODE
response = _StubHTTPResponse(429, {"error": "rate_limited"})
_patch_httpx(monkeypatch, response)
with pytest.raises(AuthError) as exc_info:
refresh_codex_oauth_pure("a-tok", "r-tok")
err = exc_info.value
assert err.code == CODEX_RATE_LIMITED_CODE
assert err.relogin_required is False
assert "quota exhausted" in str(err).lower()
def test_is_rate_limited_auth_error_distinguishes_credential_errors():
"""Missing/expired credentials must NOT be treated as rate-limit errors."""
from hermes_cli.auth import CODEX_RATE_LIMITED_CODE, is_rate_limited_auth_error
rate_limited = AuthError(
"quota", provider="openai-codex", code=CODEX_RATE_LIMITED_CODE, relogin_required=False
)
missing_creds = AuthError(
"No Codex credentials stored.",
provider="openai-codex",
code="codex_auth_missing",
relogin_required=True,
)
assert is_rate_limited_auth_error(rate_limited) is True
assert is_rate_limited_auth_error(missing_creds) is False
assert is_rate_limited_auth_error(ValueError("nope")) is False
def test_login_openai_codex_force_new_login_skips_existing_reuse_prompt(monkeypatch):
called = {"device_login": 0}