mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-14 09:11:54 +00:00
Suppress "Credit access paused" notice on free models (#43669)
* don't show credits message on free model * PR comments
This commit is contained in:
parent
6de3963e37
commit
6110aed9be
4 changed files with 238 additions and 18 deletions
|
|
@ -194,17 +194,71 @@ class AgentNotice:
|
||||||
id: Optional[str] = None
|
id: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ── is_free_tier_model (local-data-only free-model check) ────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def is_free_tier_model(model: str, base_url: str = "") -> bool:
|
||||||
|
"""Return True when *model* is a Nous free-tier model, using ONLY local data.
|
||||||
|
|
||||||
|
Two signals, both zero-network:
|
||||||
|
|
||||||
|
1. The ``:free`` suffix — the canonical Nous free SKU marker (e.g.
|
||||||
|
``nvidia/nemotron-3-ultra:free``). Free by construction on the API side
|
||||||
|
(spend is forced to 0 for ``:free`` ids).
|
||||||
|
2. A peek into the in-process pricing cache in ``hermes_cli.models``
|
||||||
|
(populated when the model picker fetched ``/v1/models`` pricing for
|
||||||
|
*base_url*). PEEK ONLY — a cache miss never triggers a fetch. This is
|
||||||
|
CLI/TUI-session best-effort: gateway sessions never run the picker's
|
||||||
|
pricing fetch, so suppression there rests entirely on the ``:free``
|
||||||
|
suffix (which all Nous free SKUs carry).
|
||||||
|
|
||||||
|
Fail-open to False (the depleted notice still shows) on any error: wrongly
|
||||||
|
showing the warning is recoverable noise; wrongly hiding it on a paid model
|
||||||
|
would mask a real billing block.
|
||||||
|
"""
|
||||||
|
if not model:
|
||||||
|
return False
|
||||||
|
if model.endswith(":free"):
|
||||||
|
return True
|
||||||
|
if not base_url:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
from hermes_cli.models import _is_model_free, _pricing_cache
|
||||||
|
|
||||||
|
# Mirror get_pricing_for_provider's key normalization: the agent's
|
||||||
|
# Nous base_url is /v1-suffixed (https://inference-api.nousresearch.com/v1)
|
||||||
|
# but the picker keys _pricing_cache on the pre-/v1 root.
|
||||||
|
key = base_url.rstrip("/")
|
||||||
|
if key.endswith("/v1"):
|
||||||
|
key = key[:-3].rstrip("/")
|
||||||
|
pricing = _pricing_cache.get(key)
|
||||||
|
if not pricing:
|
||||||
|
return False
|
||||||
|
return _is_model_free(model, pricing)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
# ── evaluate_credits_notices (pure reconciliation function) ──────────────────
|
# ── evaluate_credits_notices (pure reconciliation function) ──────────────────
|
||||||
|
|
||||||
|
|
||||||
def evaluate_credits_notices(
|
def evaluate_credits_notices(
|
||||||
state: CreditsState,
|
state: CreditsState,
|
||||||
latch: dict,
|
latch: dict,
|
||||||
|
*,
|
||||||
|
model_is_free: bool = False,
|
||||||
) -> tuple[list[AgentNotice], list[str]]:
|
) -> tuple[list[AgentNotice], list[str]]:
|
||||||
"""Reconcile credits notices against the latch. Mutates ``latch`` IN PLACE.
|
"""Reconcile credits notices against the latch. Mutates ``latch`` IN PLACE.
|
||||||
|
|
||||||
latch = {"active": set[str], "seen_below_90": bool, "usage_band": Optional[int]}.
|
latch = {"active": set[str], "seen_below_90": bool, "usage_band": Optional[int]}.
|
||||||
|
|
||||||
|
``model_is_free``: True when the session's active model is a Nous free-tier
|
||||||
|
model (see :func:`is_free_tier_model`). Suppresses the ``credits.depleted``
|
||||||
|
notice — a depleted account on a free model can keep inferencing, so the
|
||||||
|
error banner is noise (and confuses free-tier users who never had credits).
|
||||||
|
Suppression does NOT emit the "restored" success notice; that fires only on
|
||||||
|
a genuine ``paid_access`` flip back to True.
|
||||||
|
|
||||||
Returns ``(to_show: list[AgentNotice], to_clear: list[str])``.
|
Returns ``(to_show: list[AgentNotice], to_clear: list[str])``.
|
||||||
Caller emits to_clear FIRST, then to_show.
|
Caller emits to_clear FIRST, then to_show.
|
||||||
|
|
||||||
|
|
@ -284,7 +338,11 @@ def evaluate_credits_notices(
|
||||||
active.discard("credits.grant_spent")
|
active.discard("credits.grant_spent")
|
||||||
|
|
||||||
# ── depleted ─────────────────────────────────────────────────────────────
|
# ── depleted ─────────────────────────────────────────────────────────────
|
||||||
if depleted_cond and "credits.depleted" not in active:
|
# Suppressed while the active model is free: inference still works there,
|
||||||
|
# so the error banner would just alarm users (free-tier users especially,
|
||||||
|
# who never had paid credits to "lose").
|
||||||
|
show_depleted = depleted_cond and not model_is_free
|
||||||
|
if show_depleted and "credits.depleted" not in active:
|
||||||
to_show.append(
|
to_show.append(
|
||||||
AgentNotice(
|
AgentNotice(
|
||||||
text="✕ Credit access paused · run /usage for balance",
|
text="✕ Credit access paused · run /usage for balance",
|
||||||
|
|
@ -295,20 +353,23 @@ def evaluate_credits_notices(
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
active.add("credits.depleted")
|
active.add("credits.depleted")
|
||||||
elif "credits.depleted" in active and not depleted_cond:
|
elif "credits.depleted" in active and not show_depleted:
|
||||||
to_clear.append("credits.depleted")
|
to_clear.append("credits.depleted")
|
||||||
active.discard("credits.depleted")
|
active.discard("credits.depleted")
|
||||||
# Recovery: also emit the success notice
|
if not depleted_cond:
|
||||||
to_show.append(
|
# Genuine recovery (paid_access flipped back True): also emit the
|
||||||
AgentNotice(
|
# success notice. A clear caused by switching to a free model while
|
||||||
text="✓ Credit access restored",
|
# still depleted must NOT claim access was restored.
|
||||||
level="success",
|
to_show.append(
|
||||||
kind="ttl",
|
AgentNotice(
|
||||||
ttl_ms=CREDITS_RESTORED_TTL_MS,
|
text="✓ Credit access restored",
|
||||||
key="credits.restored",
|
level="success",
|
||||||
id="credits.restored",
|
kind="ttl",
|
||||||
|
ttl_ms=CREDITS_RESTORED_TTL_MS,
|
||||||
|
key="credits.restored",
|
||||||
|
id="credits.restored",
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
return (to_show, to_clear)
|
return (to_show, to_clear)
|
||||||
|
|
||||||
|
|
|
||||||
11
run_agent.py
11
run_agent.py
|
|
@ -2831,11 +2831,18 @@ class AIAgent:
|
||||||
if state is None:
|
if state is None:
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
from agent.credits_tracker import evaluate_credits_notices
|
from agent.credits_tracker import evaluate_credits_notices, is_free_tier_model
|
||||||
latch = getattr(self, "_credits_latch", None)
|
latch = getattr(self, "_credits_latch", None)
|
||||||
if latch is None:
|
if latch is None:
|
||||||
latch = self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None}
|
latch = self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None}
|
||||||
to_show, to_clear = evaluate_credits_notices(state, latch)
|
# Free-model gate: a depleted account on a free model can still
|
||||||
|
# inference, so the depleted error banner is suppressed. Local-data
|
||||||
|
# only (":free" suffix + pricing-cache peek) — never a network call.
|
||||||
|
model_is_free = is_free_tier_model(
|
||||||
|
getattr(self, "model", "") or "",
|
||||||
|
getattr(self, "base_url", "") or "",
|
||||||
|
)
|
||||||
|
to_show, to_clear = evaluate_credits_notices(state, latch, model_is_free=model_is_free)
|
||||||
for key in to_clear: # clears FIRST …
|
for key in to_clear: # clears FIRST …
|
||||||
self._emit_notice_clear(key)
|
self._emit_notice_clear(key)
|
||||||
for notice in to_show: # … then shows (depleted lands last in a latest-wins slot)
|
for notice in to_show: # … then shows (depleted lands last in a latest-wins slot)
|
||||||
|
|
|
||||||
|
|
@ -123,22 +123,29 @@ def test_dev_fixtures_drive_cold_start():
|
||||||
|
|
||||||
class _FakeAgent:
|
class _FakeAgent:
|
||||||
"""Minimal agent surface for the seed helper: state slots + an emit that runs
|
"""Minimal agent surface for the seed helper: state slots + an emit that runs
|
||||||
the real policy against the latch."""
|
the real policy against the latch (mirroring run_agent._emit_credits_notices,
|
||||||
|
including the free-model suppression flag)."""
|
||||||
|
|
||||||
def __init__(self, provider="nous"):
|
def __init__(self, provider="nous", model=""):
|
||||||
from agent.credits_tracker import evaluate_credits_notices
|
from agent.credits_tracker import evaluate_credits_notices, is_free_tier_model
|
||||||
|
|
||||||
self.provider = provider
|
self.provider = provider
|
||||||
|
self.model = model
|
||||||
self._credits_state = None
|
self._credits_state = None
|
||||||
self._credits_session_start_micros = None
|
self._credits_session_start_micros = None
|
||||||
self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None}
|
self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None}
|
||||||
self.emitted: list = []
|
self.emitted: list = []
|
||||||
self._eval = evaluate_credits_notices
|
self._eval = evaluate_credits_notices
|
||||||
|
self._is_free = is_free_tier_model
|
||||||
|
|
||||||
def _emit_credits_notices(self):
|
def _emit_credits_notices(self):
|
||||||
if self._credits_state is None:
|
if self._credits_state is None:
|
||||||
return
|
return
|
||||||
show, clear = self._eval(self._credits_state, self._credits_latch)
|
show, clear = self._eval(
|
||||||
|
self._credits_state,
|
||||||
|
self._credits_latch,
|
||||||
|
model_is_free=self._is_free(self.model),
|
||||||
|
)
|
||||||
self.emitted.append(([n.key for n in show], clear))
|
self.emitted.append(([n.key for n in show], clear))
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -169,6 +176,14 @@ def test_seed_fires_depleted_at_session_open():
|
||||||
assert a.emitted == [(["credits.depleted"], [])]
|
assert a.emitted == [(["credits.depleted"], [])]
|
||||||
|
|
||||||
|
|
||||||
|
def test_seed_depleted_suppressed_on_free_model():
|
||||||
|
"""A session that opens depleted but on a Nous ``:free`` model must NOT show
|
||||||
|
the depleted banner — inference works fine on the free tier."""
|
||||||
|
a = _FakeAgent(model="nvidia/nemotron-3-ultra:free")
|
||||||
|
assert _seed(a, "depleted") is True
|
||||||
|
assert a.emitted == [([], [])]
|
||||||
|
|
||||||
|
|
||||||
def test_seed_healthy_no_notice():
|
def test_seed_healthy_no_notice():
|
||||||
a = _FakeAgent()
|
a = _FakeAgent()
|
||||||
assert _seed(a, "healthy") is True
|
assert _seed(a, "healthy") is True
|
||||||
|
|
|
||||||
|
|
@ -265,6 +265,143 @@ class TestDepleted:
|
||||||
assert "credits.depleted" in keys
|
assert "credits.depleted" in keys
|
||||||
|
|
||||||
|
|
||||||
|
# ── Scenario 5b: free-model suppression of the depleted notice ───────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestDepletedFreeModelSuppression:
|
||||||
|
def test_depleted_suppressed_when_model_is_free(self):
|
||||||
|
latch = fresh_latch()
|
||||||
|
s = CreditsState(paid_access=False)
|
||||||
|
to_show, to_clear = evaluate_credits_notices(s, latch, model_is_free=True)
|
||||||
|
assert all(n.key != "credits.depleted" for n in to_show)
|
||||||
|
assert "credits.depleted" not in latch["active"]
|
||||||
|
assert to_clear == []
|
||||||
|
|
||||||
|
def test_switch_to_free_model_clears_without_restored(self):
|
||||||
|
latch = fresh_latch()
|
||||||
|
# Depleted on a paid model → notice fires
|
||||||
|
evaluate_credits_notices(CreditsState(paid_access=False), latch)
|
||||||
|
assert "credits.depleted" in latch["active"]
|
||||||
|
# Same depleted account, but now on a free model → clear, NO "restored"
|
||||||
|
to_show, to_clear = evaluate_credits_notices(
|
||||||
|
CreditsState(paid_access=False), latch, model_is_free=True
|
||||||
|
)
|
||||||
|
assert "credits.depleted" in to_clear
|
||||||
|
assert "credits.depleted" not in latch["active"]
|
||||||
|
assert all(n.key != "credits.restored" for n in to_show)
|
||||||
|
|
||||||
|
def test_switch_back_to_paid_model_while_depleted_reshows(self):
|
||||||
|
latch = fresh_latch()
|
||||||
|
evaluate_credits_notices(CreditsState(paid_access=False), latch)
|
||||||
|
evaluate_credits_notices(CreditsState(paid_access=False), latch, model_is_free=True)
|
||||||
|
# Back on a paid model, still depleted → notice re-fires
|
||||||
|
to_show, to_clear = evaluate_credits_notices(CreditsState(paid_access=False), latch)
|
||||||
|
keys = [n.key for n in to_show]
|
||||||
|
assert "credits.depleted" in keys
|
||||||
|
assert "credits.depleted" in latch["active"]
|
||||||
|
|
||||||
|
def test_genuine_recovery_on_free_model_no_spurious_restored(self):
|
||||||
|
"""Recovery observed while suppressed (notice never shown) → nothing to
|
||||||
|
clear, no 'restored' (there was no visible depleted state to restore)."""
|
||||||
|
latch = fresh_latch()
|
||||||
|
evaluate_credits_notices(CreditsState(paid_access=False), latch, model_is_free=True)
|
||||||
|
to_show, to_clear = evaluate_credits_notices(
|
||||||
|
CreditsState(paid_access=True), latch, model_is_free=True
|
||||||
|
)
|
||||||
|
assert to_clear == []
|
||||||
|
assert all(n.key != "credits.restored" for n in to_show)
|
||||||
|
|
||||||
|
def test_genuine_recovery_still_emits_restored_when_notice_active(self):
|
||||||
|
"""paid_access flip back to True with the notice showing → clear + restored
|
||||||
|
(unchanged behaviour, regardless of the model-free flag)."""
|
||||||
|
latch = fresh_latch()
|
||||||
|
evaluate_credits_notices(CreditsState(paid_access=False), latch)
|
||||||
|
to_show, to_clear = evaluate_credits_notices(
|
||||||
|
CreditsState(paid_access=True), latch, model_is_free=True
|
||||||
|
)
|
||||||
|
assert "credits.depleted" in to_clear
|
||||||
|
restored = [n for n in to_show if n.key == "credits.restored"]
|
||||||
|
assert len(restored) == 1
|
||||||
|
|
||||||
|
def test_free_flag_does_not_affect_other_notices(self):
|
||||||
|
"""Usage-band and grant notices are independent of the model-free gate."""
|
||||||
|
latch = fresh_latch()
|
||||||
|
evaluate_credits_notices(state_with_fraction(0.10), latch, model_is_free=True)
|
||||||
|
to_show, _ = evaluate_credits_notices(
|
||||||
|
state_with_fraction(0.95, paid_access=False), latch, model_is_free=True
|
||||||
|
)
|
||||||
|
keys = [n.key for n in to_show]
|
||||||
|
assert "credits.usage" in keys
|
||||||
|
assert "credits.depleted" not in keys
|
||||||
|
|
||||||
|
|
||||||
|
# ── Scenario 5c: is_free_tier_model (local-data-only check) ──────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsFreeTierModel:
|
||||||
|
def test_free_suffix_is_free(self):
|
||||||
|
from agent.credits_tracker import is_free_tier_model
|
||||||
|
|
||||||
|
assert is_free_tier_model("nvidia/nemotron-3-ultra:free") is True
|
||||||
|
assert is_free_tier_model("Hermes-4-70B:free", "https://inference-api.nousresearch.com") is True
|
||||||
|
|
||||||
|
def test_empty_or_paid_model_is_not_free(self):
|
||||||
|
from agent.credits_tracker import is_free_tier_model
|
||||||
|
|
||||||
|
assert is_free_tier_model("") is False
|
||||||
|
assert is_free_tier_model("Hermes-4-405B") is False
|
||||||
|
|
||||||
|
def test_pricing_cache_peek_zero_priced_model(self, monkeypatch):
|
||||||
|
from agent.credits_tracker import is_free_tier_model
|
||||||
|
import hermes_cli.models as models_mod
|
||||||
|
|
||||||
|
# The picker keys the cache on the pre-/v1 root (get_pricing_for_provider
|
||||||
|
# strips a trailing /v1 before fetch_models_with_pricing).
|
||||||
|
monkeypatch.setattr(
|
||||||
|
models_mod,
|
||||||
|
"_pricing_cache",
|
||||||
|
{
|
||||||
|
"https://inference-api.nousresearch.com": {
|
||||||
|
"some/zero-priced": {"prompt": "0", "completion": "0"},
|
||||||
|
"some/paid": {"prompt": "0.000001", "completion": "0.000002"},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
# The agent holds the /v1-suffixed URL (DEFAULT_NOUS_INFERENCE_URL) —
|
||||||
|
# the helper must normalize it down to the picker's cache key.
|
||||||
|
base = "https://inference-api.nousresearch.com/v1"
|
||||||
|
assert is_free_tier_model("some/zero-priced", base) is True
|
||||||
|
assert is_free_tier_model("some/paid", base) is False
|
||||||
|
# Pre-stripped and trailing-slash variants resolve to the same key.
|
||||||
|
assert is_free_tier_model("some/zero-priced", "https://inference-api.nousresearch.com/") is True
|
||||||
|
assert is_free_tier_model("some/zero-priced", "https://inference-api.nousresearch.com/v1/") is True
|
||||||
|
|
||||||
|
def test_cache_miss_is_not_free_and_no_fetch(self, monkeypatch):
|
||||||
|
from agent.credits_tracker import is_free_tier_model
|
||||||
|
import hermes_cli.models as models_mod
|
||||||
|
|
||||||
|
monkeypatch.setattr(models_mod, "_pricing_cache", {})
|
||||||
|
|
||||||
|
def _boom(*args, **kwargs): # any network attempt fails the test
|
||||||
|
raise AssertionError("is_free_tier_model must never hit the network")
|
||||||
|
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
monkeypatch.setattr(urllib.request, "urlopen", _boom)
|
||||||
|
assert is_free_tier_model("some/model", "https://inference-api.nousresearch.com/v1") is False
|
||||||
|
|
||||||
|
def test_exception_fails_open_to_false(self, monkeypatch):
|
||||||
|
from agent.credits_tracker import is_free_tier_model
|
||||||
|
import hermes_cli.models as models_mod
|
||||||
|
|
||||||
|
class _Exploding:
|
||||||
|
def get(self, *_a, **_kw):
|
||||||
|
raise RuntimeError("boom")
|
||||||
|
|
||||||
|
monkeypatch.setattr(models_mod, "_pricing_cache", _Exploding())
|
||||||
|
assert is_free_tier_model("some/model", "https://inference-api.nousresearch.com") is False
|
||||||
|
|
||||||
|
|
||||||
# ── Scenario 6: denominator none (uf is None) ────────────────────────────────
|
# ── Scenario 6: denominator none (uf is None) ────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue