mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-31 06:51:29 +00:00
fix(gateway): classify Codex 429 quota as rate-limit, not missing credentials
When the Codex OAuth token endpoint returns 429 (usage-limit / quota exhaustion), refresh_codex_oauth_pure raised a generic auth error that the gateway surfaced as 'Primary provider auth failed: No Codex credentials stored. Run hermes auth', prompting re-auth that cannot lift a quota cap. Classify 429 distinctly (codex_rate_limited, relogin_required=False) with a non-alarming quota message that honors Retry-After, log it as 'Primary provider rate-limited (429)', and stop format_auth_error from appending the re-authenticate remediation. Also log the fallback provider's literal config key instead of the resolved runtime category. Refs #32790
This commit is contained in:
parent
2bbd53493d
commit
f1422ffd77
3 changed files with 155 additions and 6 deletions
|
|
@ -1078,14 +1078,19 @@ def _resolve_runtime_agent_kwargs() -> dict:
|
|||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
from hermes_cli.auth import AuthError
|
||||
from hermes_cli.auth import AuthError, is_rate_limited_auth_error
|
||||
|
||||
try:
|
||||
runtime = resolve_runtime_provider()
|
||||
except AuthError as auth_exc:
|
||||
# Primary provider auth failed (expired token, revoked key, etc.).
|
||||
# Try the fallback provider chain before raising.
|
||||
logger.warning("Primary provider auth failed: %s — trying fallback", auth_exc)
|
||||
# Distinguish a transient rate-limit/quota cap (credentials are fine,
|
||||
# re-auth cannot help) from a genuine auth failure (expired/revoked
|
||||
# token). Both fall through to the fallback chain, but the log message
|
||||
# must not mislabel a quota exhaustion as an auth failure (#32790).
|
||||
if is_rate_limited_auth_error(auth_exc):
|
||||
logger.warning("Primary provider rate-limited (429): %s — trying fallback", auth_exc)
|
||||
else:
|
||||
logger.warning("Primary provider auth failed: %s — trying fallback", auth_exc)
|
||||
fb_config = _try_resolve_fallback_provider()
|
||||
if fb_config is not None:
|
||||
return fb_config
|
||||
|
|
@ -1131,9 +1136,13 @@ def _try_resolve_fallback_provider() -> dict | None:
|
|||
explicit_base_url=entry.get("base_url"),
|
||||
explicit_api_key=explicit_api_key,
|
||||
)
|
||||
# Log the literal `provider` key from config, not the resolved
|
||||
# runtime category — an Ollama fallback resolves through the
|
||||
# OpenAI-compatible path and would otherwise be logged as
|
||||
# "openrouter", contradicting the operator's config (#32790).
|
||||
logger.info(
|
||||
"Fallback provider resolved: %s model=%s",
|
||||
runtime.get("provider"),
|
||||
entry.get("provider") or runtime.get("provider"),
|
||||
entry.get("model"),
|
||||
)
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -729,6 +729,12 @@ def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) ->
|
|||
# Error Types
|
||||
# =============================================================================
|
||||
|
||||
# Error code marking upstream rate-limit / usage-quota exhaustion (HTTP 429).
|
||||
# Such failures are transient and re-authenticating cannot resolve them, so
|
||||
# they must be kept distinct from missing/expired-credential errors.
|
||||
CODEX_RATE_LIMITED_CODE = "codex_rate_limited"
|
||||
|
||||
|
||||
class AuthError(RuntimeError):
|
||||
"""Structured auth error with UX mapping hints."""
|
||||
|
||||
|
|
@ -746,11 +752,52 @@ class AuthError(RuntimeError):
|
|||
self.relogin_required = relogin_required
|
||||
|
||||
|
||||
def is_rate_limited_auth_error(error: Exception) -> bool:
|
||||
"""True when an :class:`AuthError` represents upstream rate-limiting / quota
|
||||
exhaustion rather than missing or invalid credentials.
|
||||
|
||||
These failures are transient — re-authenticating cannot resolve them — so
|
||||
callers should surface a "retry later" notice and prefer a fallback chain
|
||||
instead of prompting the operator to run ``hermes auth``.
|
||||
"""
|
||||
return (
|
||||
isinstance(error, AuthError)
|
||||
and not error.relogin_required
|
||||
and error.code == CODEX_RATE_LIMITED_CODE
|
||||
)
|
||||
|
||||
|
||||
def _parse_retry_after_seconds(headers: Any) -> Optional[int]:
|
||||
"""Best-effort parse of a ``Retry-After`` header into whole seconds.
|
||||
|
||||
Supports the delta-seconds form (e.g. ``"120"``). HTTP-date forms and
|
||||
missing/unparseable values return ``None`` rather than guessing.
|
||||
"""
|
||||
if headers is None:
|
||||
return None
|
||||
try:
|
||||
raw = headers.get("retry-after")
|
||||
except Exception:
|
||||
return None
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
seconds = int(str(raw).strip())
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
return seconds if seconds >= 0 else None
|
||||
|
||||
|
||||
def format_auth_error(error: Exception) -> str:
|
||||
"""Map auth failures to concise user-facing guidance."""
|
||||
if not isinstance(error, AuthError):
|
||||
return str(error)
|
||||
|
||||
# Rate-limit / quota errors are not credential problems — never append the
|
||||
# "re-authenticate" remediation, which would mislead the operator.
|
||||
if is_rate_limited_auth_error(error):
|
||||
return str(error)
|
||||
|
||||
if error.relogin_required:
|
||||
return f"{error} Run `hermes model` to re-authenticate."
|
||||
|
||||
|
|
@ -3308,6 +3355,30 @@ def refresh_codex_oauth_pure(
|
|||
},
|
||||
)
|
||||
|
||||
if response.status_code == 429:
|
||||
# Upstream rate-limit / usage-quota exhaustion on the token endpoint.
|
||||
# The stored refresh token is still valid here — re-authenticating
|
||||
# cannot lift a quota cap. Classify distinctly from auth failures so
|
||||
# callers surface a "retry later" notice instead of a misleading
|
||||
# "run hermes auth" prompt (see issue #32790).
|
||||
retry_after = _parse_retry_after_seconds(getattr(response, "headers", None))
|
||||
if retry_after is not None:
|
||||
message = (
|
||||
f"Codex provider quota exhausted (429); retry after {retry_after}s. "
|
||||
"Credentials are still valid."
|
||||
)
|
||||
else:
|
||||
message = (
|
||||
"Codex provider quota exhausted (429). Credentials are still valid; "
|
||||
"retry after the usage limit resets."
|
||||
)
|
||||
raise AuthError(
|
||||
message,
|
||||
provider="openai-codex",
|
||||
code=CODEX_RATE_LIMITED_CODE,
|
||||
relogin_required=False,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
code = "codex_refresh_failed"
|
||||
message = f"Codex token refresh failed with status {response.status_code}."
|
||||
|
|
|
|||
|
|
@ -263,9 +263,10 @@ def test_resolve_returns_hermes_auth_store_source(tmp_path, monkeypatch):
|
|||
|
||||
|
||||
class _StubHTTPResponse:
|
||||
def __init__(self, status_code: int, payload):
|
||||
def __init__(self, status_code: int, payload, headers=None):
|
||||
self.status_code = status_code
|
||||
self._payload = payload
|
||||
self.headers = headers or {}
|
||||
self.text = json.dumps(payload) if isinstance(payload, (dict, list)) else str(payload)
|
||||
|
||||
def json(self):
|
||||
|
|
@ -382,6 +383,74 @@ def test_refresh_falls_back_to_generic_message_on_unparseable_body(monkeypatch):
|
|||
assert "status 401" in str(err)
|
||||
|
||||
|
||||
def test_refresh_429_classified_as_quota_not_auth_failure(monkeypatch):
|
||||
"""429 from the token endpoint is a usage-quota cap, not an auth failure.
|
||||
|
||||
Regression test for #32790: must NOT force relogin and must carry the
|
||||
dedicated rate-limit code so callers surface a "retry later" notice rather
|
||||
than a misleading "run hermes auth".
|
||||
"""
|
||||
from hermes_cli.auth import (
|
||||
CODEX_RATE_LIMITED_CODE,
|
||||
format_auth_error,
|
||||
is_rate_limited_auth_error,
|
||||
)
|
||||
|
||||
response = _StubHTTPResponse(
|
||||
429,
|
||||
{"error": {"message": "You hit your usage limit.", "code": "usage_limit_reached"}},
|
||||
headers={"retry-after": "120"},
|
||||
)
|
||||
_patch_httpx(monkeypatch, response)
|
||||
|
||||
with pytest.raises(AuthError) as exc_info:
|
||||
refresh_codex_oauth_pure("a-tok", "r-tok")
|
||||
|
||||
err = exc_info.value
|
||||
assert err.code == CODEX_RATE_LIMITED_CODE
|
||||
assert err.relogin_required is False
|
||||
assert is_rate_limited_auth_error(err) is True
|
||||
assert "retry after 120s" in str(err)
|
||||
# User-facing copy must not tell the operator to re-authenticate.
|
||||
rendered = format_auth_error(err)
|
||||
assert "re-authenticate" not in rendered
|
||||
assert "hermes auth" not in rendered
|
||||
|
||||
|
||||
def test_refresh_429_without_retry_after_header(monkeypatch):
|
||||
"""429 without a Retry-After header still classifies as quota, no relogin."""
|
||||
from hermes_cli.auth import CODEX_RATE_LIMITED_CODE
|
||||
|
||||
response = _StubHTTPResponse(429, {"error": "rate_limited"})
|
||||
_patch_httpx(monkeypatch, response)
|
||||
|
||||
with pytest.raises(AuthError) as exc_info:
|
||||
refresh_codex_oauth_pure("a-tok", "r-tok")
|
||||
|
||||
err = exc_info.value
|
||||
assert err.code == CODEX_RATE_LIMITED_CODE
|
||||
assert err.relogin_required is False
|
||||
assert "quota exhausted" in str(err).lower()
|
||||
|
||||
|
||||
def test_is_rate_limited_auth_error_distinguishes_credential_errors():
|
||||
"""Missing/expired credentials must NOT be treated as rate-limit errors."""
|
||||
from hermes_cli.auth import CODEX_RATE_LIMITED_CODE, is_rate_limited_auth_error
|
||||
|
||||
rate_limited = AuthError(
|
||||
"quota", provider="openai-codex", code=CODEX_RATE_LIMITED_CODE, relogin_required=False
|
||||
)
|
||||
missing_creds = AuthError(
|
||||
"No Codex credentials stored.",
|
||||
provider="openai-codex",
|
||||
code="codex_auth_missing",
|
||||
relogin_required=True,
|
||||
)
|
||||
assert is_rate_limited_auth_error(rate_limited) is True
|
||||
assert is_rate_limited_auth_error(missing_creds) is False
|
||||
assert is_rate_limited_auth_error(ValueError("nope")) is False
|
||||
|
||||
|
||||
def test_login_openai_codex_force_new_login_skips_existing_reuse_prompt(monkeypatch):
|
||||
called = {"device_login": 0}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue