From 6110aed9be2f90c4982ca1022238573ed25d120b Mon Sep 17 00:00:00 2001 From: rob-maron <132852777+rob-maron@users.noreply.github.com> Date: Wed, 10 Jun 2026 14:25:06 -0400 Subject: [PATCH] Suppress "Credit access paused" notice on free models (#43669) * don't show credits message on free model * PR comments --- agent/credits_tracker.py | 85 ++++++++++++--- run_agent.py | 11 +- tests/agent/test_credits_cold_start.py | 23 ++++- tests/agent/test_credits_policy.py | 137 +++++++++++++++++++++++++ 4 files changed, 238 insertions(+), 18 deletions(-) diff --git a/agent/credits_tracker.py b/agent/credits_tracker.py index 79d05dbb196..f84bc9a7c0e 100644 --- a/agent/credits_tracker.py +++ b/agent/credits_tracker.py @@ -194,17 +194,71 @@ class AgentNotice: id: Optional[str] = None +# ── is_free_tier_model (local-data-only free-model check) ──────────────────── + + +def is_free_tier_model(model: str, base_url: str = "") -> bool: + """Return True when *model* is a Nous free-tier model, using ONLY local data. + + Two signals, both zero-network: + + 1. The ``:free`` suffix — the canonical Nous free SKU marker (e.g. + ``nvidia/nemotron-3-ultra:free``). Free by construction on the API side + (spend is forced to 0 for ``:free`` ids). + 2. A peek into the in-process pricing cache in ``hermes_cli.models`` + (populated when the model picker fetched ``/v1/models`` pricing for + *base_url*). PEEK ONLY — a cache miss never triggers a fetch. This is + CLI/TUI-session best-effort: gateway sessions never run the picker's + pricing fetch, so suppression there rests entirely on the ``:free`` + suffix (which all Nous free SKUs carry). + + Fail-open to False (the depleted notice still shows) on any error: wrongly + showing the warning is recoverable noise; wrongly hiding it on a paid model + would mask a real billing block. + """ + if not model: + return False + if model.endswith(":free"): + return True + if not base_url: + return False + try: + from hermes_cli.models import _is_model_free, _pricing_cache + + # Mirror get_pricing_for_provider's key normalization: the agent's + # Nous base_url is /v1-suffixed (https://inference-api.nousresearch.com/v1) + # but the picker keys _pricing_cache on the pre-/v1 root. + key = base_url.rstrip("/") + if key.endswith("/v1"): + key = key[:-3].rstrip("/") + pricing = _pricing_cache.get(key) + if not pricing: + return False + return _is_model_free(model, pricing) + except Exception: + return False + + # ── evaluate_credits_notices (pure reconciliation function) ────────────────── def evaluate_credits_notices( state: CreditsState, latch: dict, + *, + model_is_free: bool = False, ) -> tuple[list[AgentNotice], list[str]]: """Reconcile credits notices against the latch. Mutates ``latch`` IN PLACE. latch = {"active": set[str], "seen_below_90": bool, "usage_band": Optional[int]}. + ``model_is_free``: True when the session's active model is a Nous free-tier + model (see :func:`is_free_tier_model`). Suppresses the ``credits.depleted`` + notice — a depleted account on a free model can keep inferencing, so the + error banner is noise (and confuses free-tier users who never had credits). + Suppression does NOT emit the "restored" success notice; that fires only on + a genuine ``paid_access`` flip back to True. + Returns ``(to_show: list[AgentNotice], to_clear: list[str])``. Caller emits to_clear FIRST, then to_show. @@ -284,7 +338,11 @@ def evaluate_credits_notices( active.discard("credits.grant_spent") # ── depleted ───────────────────────────────────────────────────────────── - if depleted_cond and "credits.depleted" not in active: + # Suppressed while the active model is free: inference still works there, + # so the error banner would just alarm users (free-tier users especially, + # who never had paid credits to "lose"). + show_depleted = depleted_cond and not model_is_free + if show_depleted and "credits.depleted" not in active: to_show.append( AgentNotice( text="✕ Credit access paused · run /usage for balance", @@ -295,20 +353,23 @@ def evaluate_credits_notices( ) ) active.add("credits.depleted") - elif "credits.depleted" in active and not depleted_cond: + elif "credits.depleted" in active and not show_depleted: to_clear.append("credits.depleted") active.discard("credits.depleted") - # Recovery: also emit the success notice - to_show.append( - AgentNotice( - text="✓ Credit access restored", - level="success", - kind="ttl", - ttl_ms=CREDITS_RESTORED_TTL_MS, - key="credits.restored", - id="credits.restored", + if not depleted_cond: + # Genuine recovery (paid_access flipped back True): also emit the + # success notice. A clear caused by switching to a free model while + # still depleted must NOT claim access was restored. + to_show.append( + AgentNotice( + text="✓ Credit access restored", + level="success", + kind="ttl", + ttl_ms=CREDITS_RESTORED_TTL_MS, + key="credits.restored", + id="credits.restored", + ) ) - ) return (to_show, to_clear) diff --git a/run_agent.py b/run_agent.py index 5465bb9ae2d..e81bf3b93e7 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2831,11 +2831,18 @@ class AIAgent: if state is None: return try: - from agent.credits_tracker import evaluate_credits_notices + from agent.credits_tracker import evaluate_credits_notices, is_free_tier_model latch = getattr(self, "_credits_latch", None) if latch is None: latch = self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None} - to_show, to_clear = evaluate_credits_notices(state, latch) + # Free-model gate: a depleted account on a free model can still + # inference, so the depleted error banner is suppressed. Local-data + # only (":free" suffix + pricing-cache peek) — never a network call. + model_is_free = is_free_tier_model( + getattr(self, "model", "") or "", + getattr(self, "base_url", "") or "", + ) + to_show, to_clear = evaluate_credits_notices(state, latch, model_is_free=model_is_free) for key in to_clear: # clears FIRST … self._emit_notice_clear(key) for notice in to_show: # … then shows (depleted lands last in a latest-wins slot) diff --git a/tests/agent/test_credits_cold_start.py b/tests/agent/test_credits_cold_start.py index d48b6f972c0..9d3c3410874 100644 --- a/tests/agent/test_credits_cold_start.py +++ b/tests/agent/test_credits_cold_start.py @@ -123,22 +123,29 @@ def test_dev_fixtures_drive_cold_start(): class _FakeAgent: """Minimal agent surface for the seed helper: state slots + an emit that runs - the real policy against the latch.""" + the real policy against the latch (mirroring run_agent._emit_credits_notices, + including the free-model suppression flag).""" - def __init__(self, provider="nous"): - from agent.credits_tracker import evaluate_credits_notices + def __init__(self, provider="nous", model=""): + from agent.credits_tracker import evaluate_credits_notices, is_free_tier_model self.provider = provider + self.model = model self._credits_state = None self._credits_session_start_micros = None self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None} self.emitted: list = [] self._eval = evaluate_credits_notices + self._is_free = is_free_tier_model def _emit_credits_notices(self): if self._credits_state is None: return - show, clear = self._eval(self._credits_state, self._credits_latch) + show, clear = self._eval( + self._credits_state, + self._credits_latch, + model_is_free=self._is_free(self.model), + ) self.emitted.append(([n.key for n in show], clear)) @@ -169,6 +176,14 @@ def test_seed_fires_depleted_at_session_open(): assert a.emitted == [(["credits.depleted"], [])] +def test_seed_depleted_suppressed_on_free_model(): + """A session that opens depleted but on a Nous ``:free`` model must NOT show + the depleted banner — inference works fine on the free tier.""" + a = _FakeAgent(model="nvidia/nemotron-3-ultra:free") + assert _seed(a, "depleted") is True + assert a.emitted == [([], [])] + + def test_seed_healthy_no_notice(): a = _FakeAgent() assert _seed(a, "healthy") is True diff --git a/tests/agent/test_credits_policy.py b/tests/agent/test_credits_policy.py index a9686e0450c..1a0104d8b4c 100644 --- a/tests/agent/test_credits_policy.py +++ b/tests/agent/test_credits_policy.py @@ -265,6 +265,143 @@ class TestDepleted: assert "credits.depleted" in keys +# ── Scenario 5b: free-model suppression of the depleted notice ─────────────── + + +class TestDepletedFreeModelSuppression: + def test_depleted_suppressed_when_model_is_free(self): + latch = fresh_latch() + s = CreditsState(paid_access=False) + to_show, to_clear = evaluate_credits_notices(s, latch, model_is_free=True) + assert all(n.key != "credits.depleted" for n in to_show) + assert "credits.depleted" not in latch["active"] + assert to_clear == [] + + def test_switch_to_free_model_clears_without_restored(self): + latch = fresh_latch() + # Depleted on a paid model → notice fires + evaluate_credits_notices(CreditsState(paid_access=False), latch) + assert "credits.depleted" in latch["active"] + # Same depleted account, but now on a free model → clear, NO "restored" + to_show, to_clear = evaluate_credits_notices( + CreditsState(paid_access=False), latch, model_is_free=True + ) + assert "credits.depleted" in to_clear + assert "credits.depleted" not in latch["active"] + assert all(n.key != "credits.restored" for n in to_show) + + def test_switch_back_to_paid_model_while_depleted_reshows(self): + latch = fresh_latch() + evaluate_credits_notices(CreditsState(paid_access=False), latch) + evaluate_credits_notices(CreditsState(paid_access=False), latch, model_is_free=True) + # Back on a paid model, still depleted → notice re-fires + to_show, to_clear = evaluate_credits_notices(CreditsState(paid_access=False), latch) + keys = [n.key for n in to_show] + assert "credits.depleted" in keys + assert "credits.depleted" in latch["active"] + + def test_genuine_recovery_on_free_model_no_spurious_restored(self): + """Recovery observed while suppressed (notice never shown) → nothing to + clear, no 'restored' (there was no visible depleted state to restore).""" + latch = fresh_latch() + evaluate_credits_notices(CreditsState(paid_access=False), latch, model_is_free=True) + to_show, to_clear = evaluate_credits_notices( + CreditsState(paid_access=True), latch, model_is_free=True + ) + assert to_clear == [] + assert all(n.key != "credits.restored" for n in to_show) + + def test_genuine_recovery_still_emits_restored_when_notice_active(self): + """paid_access flip back to True with the notice showing → clear + restored + (unchanged behaviour, regardless of the model-free flag).""" + latch = fresh_latch() + evaluate_credits_notices(CreditsState(paid_access=False), latch) + to_show, to_clear = evaluate_credits_notices( + CreditsState(paid_access=True), latch, model_is_free=True + ) + assert "credits.depleted" in to_clear + restored = [n for n in to_show if n.key == "credits.restored"] + assert len(restored) == 1 + + def test_free_flag_does_not_affect_other_notices(self): + """Usage-band and grant notices are independent of the model-free gate.""" + latch = fresh_latch() + evaluate_credits_notices(state_with_fraction(0.10), latch, model_is_free=True) + to_show, _ = evaluate_credits_notices( + state_with_fraction(0.95, paid_access=False), latch, model_is_free=True + ) + keys = [n.key for n in to_show] + assert "credits.usage" in keys + assert "credits.depleted" not in keys + + +# ── Scenario 5c: is_free_tier_model (local-data-only check) ────────────────── + + +class TestIsFreeTierModel: + def test_free_suffix_is_free(self): + from agent.credits_tracker import is_free_tier_model + + assert is_free_tier_model("nvidia/nemotron-3-ultra:free") is True + assert is_free_tier_model("Hermes-4-70B:free", "https://inference-api.nousresearch.com") is True + + def test_empty_or_paid_model_is_not_free(self): + from agent.credits_tracker import is_free_tier_model + + assert is_free_tier_model("") is False + assert is_free_tier_model("Hermes-4-405B") is False + + def test_pricing_cache_peek_zero_priced_model(self, monkeypatch): + from agent.credits_tracker import is_free_tier_model + import hermes_cli.models as models_mod + + # The picker keys the cache on the pre-/v1 root (get_pricing_for_provider + # strips a trailing /v1 before fetch_models_with_pricing). + monkeypatch.setattr( + models_mod, + "_pricing_cache", + { + "https://inference-api.nousresearch.com": { + "some/zero-priced": {"prompt": "0", "completion": "0"}, + "some/paid": {"prompt": "0.000001", "completion": "0.000002"}, + } + }, + ) + # The agent holds the /v1-suffixed URL (DEFAULT_NOUS_INFERENCE_URL) — + # the helper must normalize it down to the picker's cache key. + base = "https://inference-api.nousresearch.com/v1" + assert is_free_tier_model("some/zero-priced", base) is True + assert is_free_tier_model("some/paid", base) is False + # Pre-stripped and trailing-slash variants resolve to the same key. + assert is_free_tier_model("some/zero-priced", "https://inference-api.nousresearch.com/") is True + assert is_free_tier_model("some/zero-priced", "https://inference-api.nousresearch.com/v1/") is True + + def test_cache_miss_is_not_free_and_no_fetch(self, monkeypatch): + from agent.credits_tracker import is_free_tier_model + import hermes_cli.models as models_mod + + monkeypatch.setattr(models_mod, "_pricing_cache", {}) + + def _boom(*args, **kwargs): # any network attempt fails the test + raise AssertionError("is_free_tier_model must never hit the network") + + import urllib.request + + monkeypatch.setattr(urllib.request, "urlopen", _boom) + assert is_free_tier_model("some/model", "https://inference-api.nousresearch.com/v1") is False + + def test_exception_fails_open_to_false(self, monkeypatch): + from agent.credits_tracker import is_free_tier_model + import hermes_cli.models as models_mod + + class _Exploding: + def get(self, *_a, **_kw): + raise RuntimeError("boom") + + monkeypatch.setattr(models_mod, "_pricing_cache", _Exploding()) + assert is_free_tier_model("some/model", "https://inference-api.nousresearch.com") is False + + # ── Scenario 6: denominator none (uf is None) ────────────────────────────────