fix(credits): suppress usage gauge when top-up funds exist + add display.credits_notices toggle (#44716)

The subscription-cap usage gauge (50/75/90% bands) ignored purchased (top-up) credits: a sub user with top-up funds got a sticky warn banner at 90% of their cap — permanently at >=100%, alongside grant_spent — despite being fully able to keep inferencing. The cap is the wrong denominator for an account that can keep spending. - evaluate_credits_notices: purchased_micros > 0 suppresses the usage band (grant_spent already covers the cap-reached + top-up case with the remaining balance). A top-up landing mid-session clears any showing band; spending top-up down to 0 resumes the gauge. - New display.credits_notices config (default true): false silences all credits notices. State capture and /usage are unaffected. Read once per agent (cached) in _emit_credits_notices, fail-open true. - Docs: configuration.md display block.
2026-07-30 19:09:28 +00:00 · 2026-06-12 01:06:46 -07:00 · 2026-06-12 01:06:46 -07:00 · c196269d8d
commit c196269d8d
parent 906bee9cf7
7 changed files with 224 additions and 11 deletions
--- a/agent/credits_tracker.py
+++ b/agent/credits_tracker.py
@ -286,6 +286,16 @@ def evaluate_credits_notices(
        for band in CREDITS_USAGE_BANDS:  # ascending → last match wins = highest
            if uf >= band[0]:
                current_band = band
+    # Top-up suppression: when the account holds purchased (top-up) credits,
+    # the subscription-cap gauge is the wrong denominator — warning "90% used"
+    # at a user sitting on $50 of top-up is noise (and it previously stuck
+    # PERMANENTLY alongside grant_spent at >=100%). Suppress the usage band
+    # entirely; the cap-reached case is covered by the grant_spent info notice
+    # below, which already names the remaining top-up balance. A top-up landing
+    # mid-session flips current_band → None and the clear path below removes
+    # any showing band line.
+    if state.purchased_micros > 0:
+        current_band = None
    grant_cond = (
        state.denominator_kind == "subscription_cap"
        and uf is not None
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -1438,6 +1438,11 @@ DEFAULT_CONFIG = {
        # class of over-claim that otherwise forces users to run
        # `git status` to verify edits landed.  Set false to suppress.
        "file_mutation_verifier": True,
+        # Nous credits status-bar notices (usage bands, grant-spent, depleted /
+        # restored).  When false, no credits notices are emitted — balance data
+        # is still captured and /usage keeps working.  Off switch for sub +
+        # top-up users who find the gauge noisy.
+        "credits_notices": True,
        # Turn-completion explainer.  When true (default), the agent appends a
        # one-line explanation to its final response whenever a turn ends
        # abnormally with no usable reply — empty content after retries, a
--- a/run_agent.py
+++ b/run_agent.py
@ -2827,6 +2827,8 @@ class AIAgent:
        """
        if getattr(self, "notice_callback", None) is None and getattr(self, "notice_clear_callback", None) is None:
            return
+        if not self._credits_notices_enabled():
+            return
        state = getattr(self, "_credits_state", None)
        if state is None:
            return
@ -2850,6 +2852,29 @@ class AIAgent:
        except Exception:
            logger.warning("credits notice evaluation/emit failed", exc_info=True)

+    def _credits_notices_enabled(self) -> bool:
+        """Whether credits notices are enabled (config display.credits_notices).
+
+        Read once per agent and cached — the policy runs after every API
+        response, and the setting governs UI noise, not correctness, so a
+        config flip applying on the next session is fine.  Fail-open True
+        (preserve current behaviour) on any config error.
+        """
+        cached = getattr(self, "_credits_notices_enabled_cache", None)
+        if cached is not None:
+            return cached
+        enabled = True
+        try:
+            from hermes_cli.config import load_config as _load_config
+            _cfg = _load_config() or {}
+            _display = _cfg.get("display") if isinstance(_cfg, dict) else None
+            if isinstance(_display, dict) and "credits_notices" in _display:
+                enabled = bool(_display.get("credits_notices"))
+        except Exception:
+            enabled = True
+        self._credits_notices_enabled_cache = enabled
+        return enabled
+
    def get_credits_state(self):
        """Return the last captured CreditsState, or None."""
        return self._credits_state
--- a/tests/agent/test_credits_cold_start.py
+++ b/tests/agent/test_credits_cold_start.py
@ -49,7 +49,13 @@ def test_cold_start_opens_already_at_90pct_warns():
    assert "credits.usage" in _cold_start_notices(s)


-def test_cold_start_grant_exhausted_warns_and_grant_spent():
+def test_cold_start_grant_exhausted_grant_spent_only():
+    """Cap reached but top-up funds remain → grant_spent info notice ONLY.
+
+    The usage band is suppressed whenever purchased (top-up) credits exist:
+    the sub-cap gauge is the wrong denominator for an account that can keep
+    spending, and previously the 90/100% warn banner stuck permanently
+    alongside grant_spent."""
    s = _state(
        remaining_micros=12_340_000, subscription_micros=0,
        subscription_limit_micros=20_000_000, subscription_limit_usd="20.00",
@ -57,7 +63,7 @@ def test_cold_start_grant_exhausted_warns_and_grant_spent():
    )
    assert s.used_fraction == 1.0
    keys = _cold_start_notices(s)
-    assert "credits.usage" in keys
+    assert "credits.usage" not in keys
    assert "credits.grant_spent" in keys


--- a/tests/agent/test_credits_policy.py
+++ b/tests/agent/test_credits_policy.py
@ -477,17 +477,17 @@ class TestNoticeCopy:

 class TestSeverityOrder:
    def test_multiple_new_notices_ordered_ascending_severity(self):
-        """warn90 < grant_spent < depleted in to_show when all fire in one call."""
-        # Construct a state where all three conditions fire simultaneously
-        # on first call (no latch state yet):
-        # - warn90: uf >= 0.9 AND seen_below_90 must be True → won't fire fresh latch
-        # So we pre-seed seen_below_90=True to allow warn90 to fire.
+        """grant_spent < depleted in to_show when both fire in one call.
+
+        (usage is suppressed here: purchased>0 — see TestTopUpSuppression.
+        usage + grant_spent are now mutually exclusive by design.)
+        """
        latch = {"active": set(), "seen_below_90": True, "usage_band": None}

        # Build state: subscription_cap, uf >= 1.0, purchased_micros > 0, NOT paid_access
-        # warn90_cond: uf >= 0.9 ✓ (uf=1.0)
        # grant_cond: subscription_cap + uf >= 1.0 + purchased > 0 ✓
        # depleted_cond: not paid_access ✓
+        # usage band: suppressed (purchased > 0)
        s = CreditsState(
            subscription_limit_micros=20_000_000,
            subscription_limit_usd="20.00",
@ -499,13 +499,100 @@ class TestSeverityOrder:
        )
        to_show, _ = evaluate_credits_notices(s, latch)
        keys = [n.key for n in to_show]
-        assert "credits.usage" in keys
+        assert "credits.usage" not in keys
        assert "credits.grant_spent" in keys
        assert "credits.depleted" in keys
-        # Ascending severity: warn90 before grant_spent before depleted
-        assert keys.index("credits.usage") < keys.index("credits.grant_spent")
+        # Ascending severity: grant_spent before depleted
        assert keys.index("credits.grant_spent") < keys.index("credits.depleted")

+    def test_usage_before_depleted_without_topup(self):
+        """With no top-up funds, usage fires and precedes depleted."""
+        latch = {"active": set(), "seen_below_90": True, "usage_band": None}
+        s = CreditsState(
+            subscription_limit_micros=20_000_000,
+            subscription_limit_usd="20.00",
+            subscription_micros=0,  # uf = 1.0
+            denominator_kind="subscription_cap",
+            purchased_micros=0,
+            purchased_usd="0.00",
+            paid_access=False,
+        )
+        to_show, _ = evaluate_credits_notices(s, latch)
+        keys = [n.key for n in to_show]
+        assert "credits.usage" in keys
+        assert "credits.depleted" in keys
+        assert keys.index("credits.usage") < keys.index("credits.depleted")
+
+
+# ── Scenario 8b: top-up suppression of the usage gauge ───────────────────────
+
+
+class TestTopUpSuppression:
+    """purchased_micros > 0 suppresses the sub-cap usage gauge: the cap is the
+    wrong denominator for an account that can keep spending top-up funds."""
+
+    def test_no_usage_band_with_topup_at_90pct(self):
+        latch = fresh_latch()
+        evaluate_credits_notices(
+            state_with_fraction(0.10, purchased_micros=5_000_000, purchased_usd="5.00"),
+            latch,
+        )
+        to_show, to_clear = evaluate_credits_notices(
+            state_with_fraction(0.95, purchased_micros=5_000_000, purchased_usd="5.00"),
+            latch,
+        )
+        assert all(n.key != "credits.usage" for n in to_show)
+        assert latch["usage_band"] is None
+
+    def test_topup_landing_mid_session_clears_active_band(self):
+        """A showing 90% warn must clear when a top-up lands (purchased 0 → >0)."""
+        latch = fresh_latch()
+        evaluate_credits_notices(state_with_fraction(0.10), latch)
+        evaluate_credits_notices(state_with_fraction(0.95), latch)
+        assert latch["usage_band"] == 90
+        to_show, to_clear = evaluate_credits_notices(
+            state_with_fraction(0.95, purchased_micros=10_000_000, purchased_usd="10.00"),
+            latch,
+        )
+        assert "credits.usage" in to_clear
+        assert latch["usage_band"] is None
+        assert all(n.key != "credits.usage" for n in to_show)
+
+    def test_band_resumes_after_topup_spent(self):
+        """purchased back to 0 with usage still in-band → gauge resumes."""
+        latch = fresh_latch()
+        evaluate_credits_notices(state_with_fraction(0.10), latch)
+        evaluate_credits_notices(
+            state_with_fraction(0.95, purchased_micros=10_000_000, purchased_usd="10.00"),
+            latch,
+        )
+        assert latch["usage_band"] is None
+        to_show, _ = evaluate_credits_notices(state_with_fraction(0.95), latch)
+        n = next(n for n in to_show if n.key == "credits.usage")
+        assert "90%" in n.text
+        assert latch["usage_band"] == 90
+
+    def test_grant_spent_still_fires_with_topup(self):
+        """Suppression only affects the gauge — grant_spent (which NEEDS purchased>0)
+        is untouched."""
+        latch = fresh_latch()
+        s = state_with_fraction(
+            1.0,
+            denominator_kind="subscription_cap",
+            purchased_micros=12_340_000,
+            purchased_usd="12.34",
+        )
+        to_show, _ = evaluate_credits_notices(s, latch)
+        keys = [n.key for n in to_show]
+        assert "credits.grant_spent" in keys
+        assert "credits.usage" not in keys
+
+    def test_depleted_unaffected_by_topup_suppression(self):
+        latch = fresh_latch()
+        s = CreditsState(paid_access=False, purchased_micros=5_000_000, purchased_usd="5.00")
+        to_show, _ = evaluate_credits_notices(s, latch)
+        assert any(n.key == "credits.depleted" for n in to_show)
+

 # ── Invariant: never fire + clear same key in one call ────────────────────────

--- a/tests/run_agent/test_credits_notices_toggle.py
+++ b/tests/run_agent/test_credits_notices_toggle.py
@ -0,0 +1,79 @@
+"""Tests for the display.credits_notices config gate on _emit_credits_notices.
+
+The toggle suppresses notice EMISSION only — credits state capture and /usage
+stay live. Uses the bare-AIAgent pattern (object.__new__) from test_notice_spine.py.
+"""
+from __future__ import annotations
+
+from unittest.mock import patch
+
+from agent.credits_tracker import CreditsState
+from run_agent import AIAgent
+
+
+def _agent_with_state(*, paid_access: bool = False) -> AIAgent:
+    """Bare agent with a depleted-shaped state that would normally emit."""
+    agent = object.__new__(AIAgent)
+    agent.notice_callback = None
+    agent.notice_clear_callback = None
+    agent._credits_state = CreditsState(paid_access=paid_access)
+    agent.model = ""
+    agent.base_url = ""
+    return agent
+
+
+def _cfg(enabled):
+    return {"display": {"credits_notices": enabled}}
+
+
+class TestCreditsNoticesToggle:
+    def test_disabled_emits_nothing(self):
+        agent = _agent_with_state()
+        received = []
+        agent.notice_callback = received.append
+        with patch("hermes_cli.config.load_config", return_value=_cfg(False)):
+            agent._emit_credits_notices()
+        assert received == []
+
+    def test_enabled_emits_depleted(self):
+        agent = _agent_with_state()
+        received = []
+        agent.notice_callback = received.append
+        with patch("hermes_cli.config.load_config", return_value=_cfg(True)):
+            agent._emit_credits_notices()
+        assert any(getattr(n, "key", None) == "credits.depleted" for n in received)
+
+    def test_default_missing_key_emits(self):
+        """Key absent from config → fail-open True (current behaviour preserved)."""
+        agent = _agent_with_state()
+        received = []
+        agent.notice_callback = received.append
+        with patch("hermes_cli.config.load_config", return_value={"display": {}}):
+            agent._emit_credits_notices()
+        assert any(getattr(n, "key", None) == "credits.depleted" for n in received)
+
+    def test_config_error_fails_open(self):
+        agent = _agent_with_state()
+        received = []
+        agent.notice_callback = received.append
+        with patch("hermes_cli.config.load_config", side_effect=RuntimeError("boom")):
+            agent._emit_credits_notices()
+        assert any(getattr(n, "key", None) == "credits.depleted" for n in received)
+
+    def test_toggle_cached_per_agent(self):
+        """load_config is consulted once per agent, not once per emission."""
+        agent = _agent_with_state()
+        agent.notice_callback = lambda n: None
+        with patch("hermes_cli.config.load_config", return_value=_cfg(True)) as mock_load:
+            agent._emit_credits_notices()
+            agent._emit_credits_notices()
+        assert mock_load.call_count == 1
+
+    def test_disabled_state_still_cached_for_usage(self):
+        """The gate stops emission only — get_credits_state still returns data."""
+        agent = _agent_with_state()
+        agent.notice_callback = lambda n: None
+        agent._credits_session_start_micros = None
+        with patch("hermes_cli.config.load_config", return_value=_cfg(False)):
+            agent._emit_credits_notices()
+        assert agent.get_credits_state() is not None
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@ -1264,6 +1264,7 @@ display:
    enabled: false
    fields: ["model", "context_pct", "cwd"]
  file_mutation_verifier: true    # Append an advisory footer when write_file/patch calls failed this turn
+  credits_notices: true   # Nous credits status-bar notices (usage bands, grant-spent, depleted). false = silence them; /usage still works
  language: en            # UI language for static messages (approval prompts, some gateway replies). en | zh | zh-hant | ja | de | es | fr | tr | uk | af | ko | it | ga | pt | ru | hu
 ```