From 6110aed9be2f90c4982ca1022238573ed25d120b Mon Sep 17 00:00:00 2001
From: rob-maron <132852777+rob-maron@users.noreply.github.com>
Date: Wed, 10 Jun 2026 14:25:06 -0400
Subject: [PATCH] Suppress "Credit access paused" notice on free models
 (#43669)

* don't show credits message on free model

* PR comments
---
 agent/credits_tracker.py               |  85 ++++++++++++---
 run_agent.py                           |  11 +-
 tests/agent/test_credits_cold_start.py |  23 ++++-
 tests/agent/test_credits_policy.py     | 137 +++++++++++++++++++++++++
 4 files changed, 238 insertions(+), 18 deletions(-)

diff --git a/agent/credits_tracker.py b/agent/credits_tracker.py
index 79d05dbb196..f84bc9a7c0e 100644
--- a/agent/credits_tracker.py
+++ b/agent/credits_tracker.py
@@ -194,17 +194,71 @@ class AgentNotice:
     id: Optional[str] = None
 
 
+# ── is_free_tier_model (local-data-only free-model check) ────────────────────
+
+
+def is_free_tier_model(model: str, base_url: str = "") -> bool:
+    """Return True when *model* is a Nous free-tier model, using ONLY local data.
+
+    Two signals, both zero-network:
+
+    1. The ``:free`` suffix — the canonical Nous free SKU marker (e.g.
+       ``nvidia/nemotron-3-ultra:free``). Free by construction on the API side
+       (spend is forced to 0 for ``:free`` ids).
+    2. A peek into the in-process pricing cache in ``hermes_cli.models``
+       (populated when the model picker fetched ``/v1/models`` pricing for
+       *base_url*). PEEK ONLY — a cache miss never triggers a fetch. This is
+       CLI/TUI-session best-effort: gateway sessions never run the picker's
+       pricing fetch, so suppression there rests entirely on the ``:free``
+       suffix (which all Nous free SKUs carry).
+
+    Fail-open to False (the depleted notice still shows) on any error: wrongly
+    showing the warning is recoverable noise; wrongly hiding it on a paid model
+    would mask a real billing block.
+    """
+    if not model:
+        return False
+    if model.endswith(":free"):
+        return True
+    if not base_url:
+        return False
+    try:
+        from hermes_cli.models import _is_model_free, _pricing_cache
+
+        # Mirror get_pricing_for_provider's key normalization: the agent's
+        # Nous base_url is /v1-suffixed (https://inference-api.nousresearch.com/v1)
+        # but the picker keys _pricing_cache on the pre-/v1 root.
+        key = base_url.rstrip("/")
+        if key.endswith("/v1"):
+            key = key[:-3].rstrip("/")
+        pricing = _pricing_cache.get(key)
+        if not pricing:
+            return False
+        return _is_model_free(model, pricing)
+    except Exception:
+        return False
+
+
 # ── evaluate_credits_notices (pure reconciliation function) ──────────────────
 
 
 def evaluate_credits_notices(
     state: CreditsState,
     latch: dict,
+    *,
+    model_is_free: bool = False,
 ) -> tuple[list[AgentNotice], list[str]]:
     """Reconcile credits notices against the latch. Mutates ``latch`` IN PLACE.
 
     latch = {"active": set[str], "seen_below_90": bool, "usage_band": Optional[int]}.
 
+    ``model_is_free``: True when the session's active model is a Nous free-tier
+    model (see :func:`is_free_tier_model`). Suppresses the ``credits.depleted``
+    notice — a depleted account on a free model can keep inferencing, so the
+    error banner is noise (and confuses free-tier users who never had credits).
+    Suppression does NOT emit the "restored" success notice; that fires only on
+    a genuine ``paid_access`` flip back to True.
+
     Returns ``(to_show: list[AgentNotice], to_clear: list[str])``.
     Caller emits to_clear FIRST, then to_show.
 
@@ -284,7 +338,11 @@ def evaluate_credits_notices(
         active.discard("credits.grant_spent")
 
     # ── depleted ─────────────────────────────────────────────────────────────
-    if depleted_cond and "credits.depleted" not in active:
+    # Suppressed while the active model is free: inference still works there,
+    # so the error banner would just alarm users (free-tier users especially,
+    # who never had paid credits to "lose").
+    show_depleted = depleted_cond and not model_is_free
+    if show_depleted and "credits.depleted" not in active:
         to_show.append(
             AgentNotice(
                 text="✕ Credit access paused · run /usage for balance",
@@ -295,20 +353,23 @@ def evaluate_credits_notices(
             )
         )
         active.add("credits.depleted")
-    elif "credits.depleted" in active and not depleted_cond:
+    elif "credits.depleted" in active and not show_depleted:
         to_clear.append("credits.depleted")
         active.discard("credits.depleted")
-        # Recovery: also emit the success notice
-        to_show.append(
-            AgentNotice(
-                text="✓ Credit access restored",
-                level="success",
-                kind="ttl",
-                ttl_ms=CREDITS_RESTORED_TTL_MS,
-                key="credits.restored",
-                id="credits.restored",
+        if not depleted_cond:
+            # Genuine recovery (paid_access flipped back True): also emit the
+            # success notice. A clear caused by switching to a free model while
+            # still depleted must NOT claim access was restored.
+            to_show.append(
+                AgentNotice(
+                    text="✓ Credit access restored",
+                    level="success",
+                    kind="ttl",
+                    ttl_ms=CREDITS_RESTORED_TTL_MS,
+                    key="credits.restored",
+                    id="credits.restored",
+                )
             )
-        )
 
     return (to_show, to_clear)
 
diff --git a/run_agent.py b/run_agent.py
index 5465bb9ae2d..e81bf3b93e7 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2831,11 +2831,18 @@ class AIAgent:
         if state is None:
             return
         try:
-            from agent.credits_tracker import evaluate_credits_notices
+            from agent.credits_tracker import evaluate_credits_notices, is_free_tier_model
             latch = getattr(self, "_credits_latch", None)
             if latch is None:
                 latch = self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None}
-            to_show, to_clear = evaluate_credits_notices(state, latch)
+            # Free-model gate: a depleted account on a free model can still
+            # inference, so the depleted error banner is suppressed. Local-data
+            # only (":free" suffix + pricing-cache peek) — never a network call.
+            model_is_free = is_free_tier_model(
+                getattr(self, "model", "") or "",
+                getattr(self, "base_url", "") or "",
+            )
+            to_show, to_clear = evaluate_credits_notices(state, latch, model_is_free=model_is_free)
             for key in to_clear:        # clears FIRST …
                 self._emit_notice_clear(key)
             for notice in to_show:      # … then shows (depleted lands last in a latest-wins slot)
diff --git a/tests/agent/test_credits_cold_start.py b/tests/agent/test_credits_cold_start.py
index d48b6f972c0..9d3c3410874 100644
--- a/tests/agent/test_credits_cold_start.py
+++ b/tests/agent/test_credits_cold_start.py
@@ -123,22 +123,29 @@ def test_dev_fixtures_drive_cold_start():
 
 class _FakeAgent:
     """Minimal agent surface for the seed helper: state slots + an emit that runs
-    the real policy against the latch."""
+    the real policy against the latch (mirroring run_agent._emit_credits_notices,
+    including the free-model suppression flag)."""
 
-    def __init__(self, provider="nous"):
-        from agent.credits_tracker import evaluate_credits_notices
+    def __init__(self, provider="nous", model=""):
+        from agent.credits_tracker import evaluate_credits_notices, is_free_tier_model
 
         self.provider = provider
+        self.model = model
         self._credits_state = None
         self._credits_session_start_micros = None
         self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None}
         self.emitted: list = []
         self._eval = evaluate_credits_notices
+        self._is_free = is_free_tier_model
 
     def _emit_credits_notices(self):
         if self._credits_state is None:
             return
-        show, clear = self._eval(self._credits_state, self._credits_latch)
+        show, clear = self._eval(
+            self._credits_state,
+            self._credits_latch,
+            model_is_free=self._is_free(self.model),
+        )
         self.emitted.append(([n.key for n in show], clear))
 
 
@@ -169,6 +176,14 @@ def test_seed_fires_depleted_at_session_open():
     assert a.emitted == [(["credits.depleted"], [])]
 
 
+def test_seed_depleted_suppressed_on_free_model():
+    """A session that opens depleted but on a Nous ``:free`` model must NOT show
+    the depleted banner — inference works fine on the free tier."""
+    a = _FakeAgent(model="nvidia/nemotron-3-ultra:free")
+    assert _seed(a, "depleted") is True
+    assert a.emitted == [([], [])]
+
+
 def test_seed_healthy_no_notice():
     a = _FakeAgent()
     assert _seed(a, "healthy") is True
diff --git a/tests/agent/test_credits_policy.py b/tests/agent/test_credits_policy.py
index a9686e0450c..1a0104d8b4c 100644
--- a/tests/agent/test_credits_policy.py
+++ b/tests/agent/test_credits_policy.py
@@ -265,6 +265,143 @@ class TestDepleted:
         assert "credits.depleted" in keys
 
 
+# ── Scenario 5b: free-model suppression of the depleted notice ───────────────
+
+
+class TestDepletedFreeModelSuppression:
+    def test_depleted_suppressed_when_model_is_free(self):
+        latch = fresh_latch()
+        s = CreditsState(paid_access=False)
+        to_show, to_clear = evaluate_credits_notices(s, latch, model_is_free=True)
+        assert all(n.key != "credits.depleted" for n in to_show)
+        assert "credits.depleted" not in latch["active"]
+        assert to_clear == []
+
+    def test_switch_to_free_model_clears_without_restored(self):
+        latch = fresh_latch()
+        # Depleted on a paid model → notice fires
+        evaluate_credits_notices(CreditsState(paid_access=False), latch)
+        assert "credits.depleted" in latch["active"]
+        # Same depleted account, but now on a free model → clear, NO "restored"
+        to_show, to_clear = evaluate_credits_notices(
+            CreditsState(paid_access=False), latch, model_is_free=True
+        )
+        assert "credits.depleted" in to_clear
+        assert "credits.depleted" not in latch["active"]
+        assert all(n.key != "credits.restored" for n in to_show)
+
+    def test_switch_back_to_paid_model_while_depleted_reshows(self):
+        latch = fresh_latch()
+        evaluate_credits_notices(CreditsState(paid_access=False), latch)
+        evaluate_credits_notices(CreditsState(paid_access=False), latch, model_is_free=True)
+        # Back on a paid model, still depleted → notice re-fires
+        to_show, to_clear = evaluate_credits_notices(CreditsState(paid_access=False), latch)
+        keys = [n.key for n in to_show]
+        assert "credits.depleted" in keys
+        assert "credits.depleted" in latch["active"]
+
+    def test_genuine_recovery_on_free_model_no_spurious_restored(self):
+        """Recovery observed while suppressed (notice never shown) → nothing to
+        clear, no 'restored' (there was no visible depleted state to restore)."""
+        latch = fresh_latch()
+        evaluate_credits_notices(CreditsState(paid_access=False), latch, model_is_free=True)
+        to_show, to_clear = evaluate_credits_notices(
+            CreditsState(paid_access=True), latch, model_is_free=True
+        )
+        assert to_clear == []
+        assert all(n.key != "credits.restored" for n in to_show)
+
+    def test_genuine_recovery_still_emits_restored_when_notice_active(self):
+        """paid_access flip back to True with the notice showing → clear + restored
+        (unchanged behaviour, regardless of the model-free flag)."""
+        latch = fresh_latch()
+        evaluate_credits_notices(CreditsState(paid_access=False), latch)
+        to_show, to_clear = evaluate_credits_notices(
+            CreditsState(paid_access=True), latch, model_is_free=True
+        )
+        assert "credits.depleted" in to_clear
+        restored = [n for n in to_show if n.key == "credits.restored"]
+        assert len(restored) == 1
+
+    def test_free_flag_does_not_affect_other_notices(self):
+        """Usage-band and grant notices are independent of the model-free gate."""
+        latch = fresh_latch()
+        evaluate_credits_notices(state_with_fraction(0.10), latch, model_is_free=True)
+        to_show, _ = evaluate_credits_notices(
+            state_with_fraction(0.95, paid_access=False), latch, model_is_free=True
+        )
+        keys = [n.key for n in to_show]
+        assert "credits.usage" in keys
+        assert "credits.depleted" not in keys
+
+
+# ── Scenario 5c: is_free_tier_model (local-data-only check) ──────────────────
+
+
+class TestIsFreeTierModel:
+    def test_free_suffix_is_free(self):
+        from agent.credits_tracker import is_free_tier_model
+
+        assert is_free_tier_model("nvidia/nemotron-3-ultra:free") is True
+        assert is_free_tier_model("Hermes-4-70B:free", "https://inference-api.nousresearch.com") is True
+
+    def test_empty_or_paid_model_is_not_free(self):
+        from agent.credits_tracker import is_free_tier_model
+
+        assert is_free_tier_model("") is False
+        assert is_free_tier_model("Hermes-4-405B") is False
+
+    def test_pricing_cache_peek_zero_priced_model(self, monkeypatch):
+        from agent.credits_tracker import is_free_tier_model
+        import hermes_cli.models as models_mod
+
+        # The picker keys the cache on the pre-/v1 root (get_pricing_for_provider
+        # strips a trailing /v1 before fetch_models_with_pricing).
+        monkeypatch.setattr(
+            models_mod,
+            "_pricing_cache",
+            {
+                "https://inference-api.nousresearch.com": {
+                    "some/zero-priced": {"prompt": "0", "completion": "0"},
+                    "some/paid": {"prompt": "0.000001", "completion": "0.000002"},
+                }
+            },
+        )
+        # The agent holds the /v1-suffixed URL (DEFAULT_NOUS_INFERENCE_URL) —
+        # the helper must normalize it down to the picker's cache key.
+        base = "https://inference-api.nousresearch.com/v1"
+        assert is_free_tier_model("some/zero-priced", base) is True
+        assert is_free_tier_model("some/paid", base) is False
+        # Pre-stripped and trailing-slash variants resolve to the same key.
+        assert is_free_tier_model("some/zero-priced", "https://inference-api.nousresearch.com/") is True
+        assert is_free_tier_model("some/zero-priced", "https://inference-api.nousresearch.com/v1/") is True
+
+    def test_cache_miss_is_not_free_and_no_fetch(self, monkeypatch):
+        from agent.credits_tracker import is_free_tier_model
+        import hermes_cli.models as models_mod
+
+        monkeypatch.setattr(models_mod, "_pricing_cache", {})
+
+        def _boom(*args, **kwargs):  # any network attempt fails the test
+            raise AssertionError("is_free_tier_model must never hit the network")
+
+        import urllib.request
+
+        monkeypatch.setattr(urllib.request, "urlopen", _boom)
+        assert is_free_tier_model("some/model", "https://inference-api.nousresearch.com/v1") is False
+
+    def test_exception_fails_open_to_false(self, monkeypatch):
+        from agent.credits_tracker import is_free_tier_model
+        import hermes_cli.models as models_mod
+
+        class _Exploding:
+            def get(self, *_a, **_kw):
+                raise RuntimeError("boom")
+
+        monkeypatch.setattr(models_mod, "_pricing_cache", _Exploding())
+        assert is_free_tier_model("some/model", "https://inference-api.nousresearch.com") is False
+
+
 # ── Scenario 6: denominator none (uf is None) ────────────────────────────────