Suppress "Credit access paused" notice on free models (#43669)

* don't show credits message on free model

* PR comments
This commit is contained in:
rob-maron 2026-06-10 14:25:06 -04:00 committed by GitHub
parent 6de3963e37
commit 6110aed9be
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 238 additions and 18 deletions

View file

@ -194,17 +194,71 @@ class AgentNotice:
id: Optional[str] = None
# ── is_free_tier_model (local-data-only free-model check) ────────────────────
def is_free_tier_model(model: str, base_url: str = "") -> bool:
"""Return True when *model* is a Nous free-tier model, using ONLY local data.
Two signals, both zero-network:
1. The ``:free`` suffix the canonical Nous free SKU marker (e.g.
``nvidia/nemotron-3-ultra:free``). Free by construction on the API side
(spend is forced to 0 for ``:free`` ids).
2. A peek into the in-process pricing cache in ``hermes_cli.models``
(populated when the model picker fetched ``/v1/models`` pricing for
*base_url*). PEEK ONLY a cache miss never triggers a fetch. This is
CLI/TUI-session best-effort: gateway sessions never run the picker's
pricing fetch, so suppression there rests entirely on the ``:free``
suffix (which all Nous free SKUs carry).
Fail-open to False (the depleted notice still shows) on any error: wrongly
showing the warning is recoverable noise; wrongly hiding it on a paid model
would mask a real billing block.
"""
if not model:
return False
if model.endswith(":free"):
return True
if not base_url:
return False
try:
from hermes_cli.models import _is_model_free, _pricing_cache
# Mirror get_pricing_for_provider's key normalization: the agent's
# Nous base_url is /v1-suffixed (https://inference-api.nousresearch.com/v1)
# but the picker keys _pricing_cache on the pre-/v1 root.
key = base_url.rstrip("/")
if key.endswith("/v1"):
key = key[:-3].rstrip("/")
pricing = _pricing_cache.get(key)
if not pricing:
return False
return _is_model_free(model, pricing)
except Exception:
return False
# ── evaluate_credits_notices (pure reconciliation function) ──────────────────
def evaluate_credits_notices(
state: CreditsState,
latch: dict,
*,
model_is_free: bool = False,
) -> tuple[list[AgentNotice], list[str]]:
"""Reconcile credits notices against the latch. Mutates ``latch`` IN PLACE.
latch = {"active": set[str], "seen_below_90": bool, "usage_band": Optional[int]}.
``model_is_free``: True when the session's active model is a Nous free-tier
model (see :func:`is_free_tier_model`). Suppresses the ``credits.depleted``
notice a depleted account on a free model can keep inferencing, so the
error banner is noise (and confuses free-tier users who never had credits).
Suppression does NOT emit the "restored" success notice; that fires only on
a genuine ``paid_access`` flip back to True.
Returns ``(to_show: list[AgentNotice], to_clear: list[str])``.
Caller emits to_clear FIRST, then to_show.
@ -284,7 +338,11 @@ def evaluate_credits_notices(
active.discard("credits.grant_spent")
# ── depleted ─────────────────────────────────────────────────────────────
if depleted_cond and "credits.depleted" not in active:
# Suppressed while the active model is free: inference still works there,
# so the error banner would just alarm users (free-tier users especially,
# who never had paid credits to "lose").
show_depleted = depleted_cond and not model_is_free
if show_depleted and "credits.depleted" not in active:
to_show.append(
AgentNotice(
text="✕ Credit access paused · run /usage for balance",
@ -295,20 +353,23 @@ def evaluate_credits_notices(
)
)
active.add("credits.depleted")
elif "credits.depleted" in active and not depleted_cond:
elif "credits.depleted" in active and not show_depleted:
to_clear.append("credits.depleted")
active.discard("credits.depleted")
# Recovery: also emit the success notice
to_show.append(
AgentNotice(
text="✓ Credit access restored",
level="success",
kind="ttl",
ttl_ms=CREDITS_RESTORED_TTL_MS,
key="credits.restored",
id="credits.restored",
if not depleted_cond:
# Genuine recovery (paid_access flipped back True): also emit the
# success notice. A clear caused by switching to a free model while
# still depleted must NOT claim access was restored.
to_show.append(
AgentNotice(
text="✓ Credit access restored",
level="success",
kind="ttl",
ttl_ms=CREDITS_RESTORED_TTL_MS,
key="credits.restored",
id="credits.restored",
)
)
)
return (to_show, to_clear)

View file

@ -2831,11 +2831,18 @@ class AIAgent:
if state is None:
return
try:
from agent.credits_tracker import evaluate_credits_notices
from agent.credits_tracker import evaluate_credits_notices, is_free_tier_model
latch = getattr(self, "_credits_latch", None)
if latch is None:
latch = self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None}
to_show, to_clear = evaluate_credits_notices(state, latch)
# Free-model gate: a depleted account on a free model can still
# inference, so the depleted error banner is suppressed. Local-data
# only (":free" suffix + pricing-cache peek) — never a network call.
model_is_free = is_free_tier_model(
getattr(self, "model", "") or "",
getattr(self, "base_url", "") or "",
)
to_show, to_clear = evaluate_credits_notices(state, latch, model_is_free=model_is_free)
for key in to_clear: # clears FIRST …
self._emit_notice_clear(key)
for notice in to_show: # … then shows (depleted lands last in a latest-wins slot)

View file

@ -123,22 +123,29 @@ def test_dev_fixtures_drive_cold_start():
class _FakeAgent:
"""Minimal agent surface for the seed helper: state slots + an emit that runs
the real policy against the latch."""
the real policy against the latch (mirroring run_agent._emit_credits_notices,
including the free-model suppression flag)."""
def __init__(self, provider="nous"):
from agent.credits_tracker import evaluate_credits_notices
def __init__(self, provider="nous", model=""):
from agent.credits_tracker import evaluate_credits_notices, is_free_tier_model
self.provider = provider
self.model = model
self._credits_state = None
self._credits_session_start_micros = None
self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None}
self.emitted: list = []
self._eval = evaluate_credits_notices
self._is_free = is_free_tier_model
def _emit_credits_notices(self):
if self._credits_state is None:
return
show, clear = self._eval(self._credits_state, self._credits_latch)
show, clear = self._eval(
self._credits_state,
self._credits_latch,
model_is_free=self._is_free(self.model),
)
self.emitted.append(([n.key for n in show], clear))
@ -169,6 +176,14 @@ def test_seed_fires_depleted_at_session_open():
assert a.emitted == [(["credits.depleted"], [])]
def test_seed_depleted_suppressed_on_free_model():
"""A session that opens depleted but on a Nous ``:free`` model must NOT show
the depleted banner inference works fine on the free tier."""
a = _FakeAgent(model="nvidia/nemotron-3-ultra:free")
assert _seed(a, "depleted") is True
assert a.emitted == [([], [])]
def test_seed_healthy_no_notice():
a = _FakeAgent()
assert _seed(a, "healthy") is True

View file

@ -265,6 +265,143 @@ class TestDepleted:
assert "credits.depleted" in keys
# ── Scenario 5b: free-model suppression of the depleted notice ───────────────
class TestDepletedFreeModelSuppression:
def test_depleted_suppressed_when_model_is_free(self):
latch = fresh_latch()
s = CreditsState(paid_access=False)
to_show, to_clear = evaluate_credits_notices(s, latch, model_is_free=True)
assert all(n.key != "credits.depleted" for n in to_show)
assert "credits.depleted" not in latch["active"]
assert to_clear == []
def test_switch_to_free_model_clears_without_restored(self):
latch = fresh_latch()
# Depleted on a paid model → notice fires
evaluate_credits_notices(CreditsState(paid_access=False), latch)
assert "credits.depleted" in latch["active"]
# Same depleted account, but now on a free model → clear, NO "restored"
to_show, to_clear = evaluate_credits_notices(
CreditsState(paid_access=False), latch, model_is_free=True
)
assert "credits.depleted" in to_clear
assert "credits.depleted" not in latch["active"]
assert all(n.key != "credits.restored" for n in to_show)
def test_switch_back_to_paid_model_while_depleted_reshows(self):
latch = fresh_latch()
evaluate_credits_notices(CreditsState(paid_access=False), latch)
evaluate_credits_notices(CreditsState(paid_access=False), latch, model_is_free=True)
# Back on a paid model, still depleted → notice re-fires
to_show, to_clear = evaluate_credits_notices(CreditsState(paid_access=False), latch)
keys = [n.key for n in to_show]
assert "credits.depleted" in keys
assert "credits.depleted" in latch["active"]
def test_genuine_recovery_on_free_model_no_spurious_restored(self):
"""Recovery observed while suppressed (notice never shown) → nothing to
clear, no 'restored' (there was no visible depleted state to restore)."""
latch = fresh_latch()
evaluate_credits_notices(CreditsState(paid_access=False), latch, model_is_free=True)
to_show, to_clear = evaluate_credits_notices(
CreditsState(paid_access=True), latch, model_is_free=True
)
assert to_clear == []
assert all(n.key != "credits.restored" for n in to_show)
def test_genuine_recovery_still_emits_restored_when_notice_active(self):
"""paid_access flip back to True with the notice showing → clear + restored
(unchanged behaviour, regardless of the model-free flag)."""
latch = fresh_latch()
evaluate_credits_notices(CreditsState(paid_access=False), latch)
to_show, to_clear = evaluate_credits_notices(
CreditsState(paid_access=True), latch, model_is_free=True
)
assert "credits.depleted" in to_clear
restored = [n for n in to_show if n.key == "credits.restored"]
assert len(restored) == 1
def test_free_flag_does_not_affect_other_notices(self):
"""Usage-band and grant notices are independent of the model-free gate."""
latch = fresh_latch()
evaluate_credits_notices(state_with_fraction(0.10), latch, model_is_free=True)
to_show, _ = evaluate_credits_notices(
state_with_fraction(0.95, paid_access=False), latch, model_is_free=True
)
keys = [n.key for n in to_show]
assert "credits.usage" in keys
assert "credits.depleted" not in keys
# ── Scenario 5c: is_free_tier_model (local-data-only check) ──────────────────
class TestIsFreeTierModel:
def test_free_suffix_is_free(self):
from agent.credits_tracker import is_free_tier_model
assert is_free_tier_model("nvidia/nemotron-3-ultra:free") is True
assert is_free_tier_model("Hermes-4-70B:free", "https://inference-api.nousresearch.com") is True
def test_empty_or_paid_model_is_not_free(self):
from agent.credits_tracker import is_free_tier_model
assert is_free_tier_model("") is False
assert is_free_tier_model("Hermes-4-405B") is False
def test_pricing_cache_peek_zero_priced_model(self, monkeypatch):
from agent.credits_tracker import is_free_tier_model
import hermes_cli.models as models_mod
# The picker keys the cache on the pre-/v1 root (get_pricing_for_provider
# strips a trailing /v1 before fetch_models_with_pricing).
monkeypatch.setattr(
models_mod,
"_pricing_cache",
{
"https://inference-api.nousresearch.com": {
"some/zero-priced": {"prompt": "0", "completion": "0"},
"some/paid": {"prompt": "0.000001", "completion": "0.000002"},
}
},
)
# The agent holds the /v1-suffixed URL (DEFAULT_NOUS_INFERENCE_URL) —
# the helper must normalize it down to the picker's cache key.
base = "https://inference-api.nousresearch.com/v1"
assert is_free_tier_model("some/zero-priced", base) is True
assert is_free_tier_model("some/paid", base) is False
# Pre-stripped and trailing-slash variants resolve to the same key.
assert is_free_tier_model("some/zero-priced", "https://inference-api.nousresearch.com/") is True
assert is_free_tier_model("some/zero-priced", "https://inference-api.nousresearch.com/v1/") is True
def test_cache_miss_is_not_free_and_no_fetch(self, monkeypatch):
from agent.credits_tracker import is_free_tier_model
import hermes_cli.models as models_mod
monkeypatch.setattr(models_mod, "_pricing_cache", {})
def _boom(*args, **kwargs): # any network attempt fails the test
raise AssertionError("is_free_tier_model must never hit the network")
import urllib.request
monkeypatch.setattr(urllib.request, "urlopen", _boom)
assert is_free_tier_model("some/model", "https://inference-api.nousresearch.com/v1") is False
def test_exception_fails_open_to_false(self, monkeypatch):
from agent.credits_tracker import is_free_tier_model
import hermes_cli.models as models_mod
class _Exploding:
def get(self, *_a, **_kw):
raise RuntimeError("boom")
monkeypatch.setattr(models_mod, "_pricing_cache", _Exploding())
assert is_free_tier_model("some/model", "https://inference-api.nousresearch.com") is False
# ── Scenario 6: denominator none (uf is None) ────────────────────────────────