feat(auth) normalise the way in which we check whether a user has free/paid access to nous portal so we can expose behaviour and error messages accordingly.

This commit is contained in:
Robin Fernandes 2026-05-25 15:10:14 +10:00 committed by Teknium
parent 0bf9b867cf
commit 406901b27d
32 changed files with 2470 additions and 181 deletions

View file

@ -992,6 +992,47 @@ class TestAuxiliaryPoolAwareness:
assert stale_client.chat.completions.create.call_count == 1
assert fresh_client.chat.completions.create.call_count == 1
def test_call_llm_refreshes_nous_after_free_tier_block_when_account_paid(self):
from hermes_cli.nous_account import NousPortalAccountInfo
class _Payment404(Exception):
status_code = 404
stale_client = MagicMock()
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
stale_client.chat.completions.create.side_effect = _Payment404(
"model_not_supported_on_free_tier: model is not available on the free tier"
)
fresh_client = MagicMock()
fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
fresh_client.chat.completions.create.return_value = {"ok": True}
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
patch(
"hermes_cli.nous_account.get_nous_portal_account_info",
return_value=NousPortalAccountInfo(
logged_in=True,
source="account_api",
fresh=True,
paid_service_access=True,
),
),
):
result = call_llm(
task="compression",
messages=[{"role": "user", "content": "hi"}],
)
assert result == {"ok": True}
assert stale_client.chat.completions.create.call_count == 1
assert fresh_client.chat.completions.create.call_count == 1
@pytest.mark.asyncio
async def test_async_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
@ -1021,6 +1062,48 @@ class TestAuxiliaryPoolAwareness:
assert stale_client.chat.completions.create.await_count == 1
assert fresh_async_client.chat.completions.create.await_count == 1
@pytest.mark.asyncio
async def test_async_call_llm_refreshes_nous_after_free_tier_block_when_account_paid(self):
from hermes_cli.nous_account import NousPortalAccountInfo
class _Payment404(Exception):
status_code = 404
stale_client = MagicMock()
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
stale_client.chat.completions.create = AsyncMock(side_effect=_Payment404(
"model_not_supported_on_free_tier: model is not available on the free tier"
))
fresh_async_client = MagicMock()
fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
patch(
"hermes_cli.nous_account.get_nous_portal_account_info",
return_value=NousPortalAccountInfo(
logged_in=True,
source="account_api",
fresh=True,
paid_service_access=True,
),
),
):
result = await async_call_llm(
task="session_search",
messages=[{"role": "user", "content": "hi"}],
)
assert result == {"ok": True}
assert stale_client.chat.completions.create.await_count == 1
assert fresh_async_client.chat.completions.create.await_count == 1
def test_cached_gmi_client_keeps_explicit_slash_model_override(self):
import agent.auxiliary_client as aux
@ -1076,6 +1159,19 @@ class TestIsPaymentError:
exc.status_code = 429
assert _is_payment_error(exc) is True
def test_404_free_tier_model_block_is_payment(self):
exc = Exception(
"Model 'gpt-5' is not available on the Free Tier. "
"Upgrade at https://portal.nousresearch.com or pick a free model."
)
exc.status_code = 404
assert _is_payment_error(exc) is True
def test_404_generic_not_found_is_not_payment(self):
exc = Exception("Not Found")
exc.status_code = 404
assert _is_payment_error(exc) is False
def test_429_without_credits_message_is_not_payment(self):
"""Normal rate limits should NOT be treated as payment errors."""
exc = Exception("Rate limit exceeded, try again in 2 seconds")

View file

@ -254,12 +254,51 @@ class TestClassifyApiError:
assert result.reason == FailoverReason.billing
assert result.retryable is False
def test_402_out_of_funds_billing(self):
e = MockAPIError(
"Payment Required",
status_code=402,
body={
"status": 402,
"message": (
"Your API key has run out of funds. Please go visit the "
"portal to sort that out: https://portal.nousresearch.com"
),
},
)
result = classify_api_error(e)
assert result.reason == FailoverReason.billing
assert result.retryable is False
def test_402_transient_usage_limit(self):
e = MockAPIError("usage limit exceeded, try again later", status_code=402)
result = classify_api_error(e)
assert result.reason == FailoverReason.rate_limit
assert result.retryable is True
def test_403_plan_entitlement_billing(self):
e = MockAPIError("This plan does not include the requested model", status_code=403)
result = classify_api_error(e)
assert result.reason == FailoverReason.billing
assert result.retryable is False
def test_404_free_tier_model_block_is_billing(self):
e = MockAPIError(
"Not Found",
status_code=404,
body={
"status": 404,
"message": (
"Model 'gpt-5' is not available on the Free Tier. "
"Upgrade at https://portal.nousresearch.com or pick a free model."
),
},
)
result = classify_api_error(e, provider="nous", model="gpt-5")
assert result.reason == FailoverReason.billing
assert result.retryable is False
assert result.should_fallback is True
# ── Rate limit ──
def test_429_rate_limit(self):
@ -753,6 +792,19 @@ class TestClassifyApiError:
result = classify_api_error(e)
assert result.reason == FailoverReason.context_overflow
def test_error_code_model_not_supported_on_free_tier_is_billing(self):
e = MockAPIError(
"Model unavailable",
body={
"error": {
"code": "model_not_supported_on_free_tier",
"message": "Model 'gpt-5' is not available on the Free Tier.",
}
},
)
result = classify_api_error(e, provider="nous", model="gpt-5")
assert result.reason == FailoverReason.billing
# ── Message-only patterns (no status code) ──
def test_message_billing_pattern(self):
@ -760,6 +812,11 @@ class TestClassifyApiError:
result = classify_api_error(e)
assert result.reason == FailoverReason.billing
def test_message_free_tier_model_block_is_billing(self):
e = Exception("Model 'gpt-5' is not available on the Free Tier.")
result = classify_api_error(e, provider="nous", model="gpt-5")
assert result.reason == FailoverReason.billing
def test_message_rate_limit_pattern(self):
e = Exception("rate limit reached for this model")
result = classify_api_error(e)