mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 08:51:53 +00:00
Suppress "Credit access paused" notice on free models (#43669)
* don't show credits message on free model * PR comments
This commit is contained in:
parent
6de3963e37
commit
6110aed9be
4 changed files with 238 additions and 18 deletions
|
|
@ -194,17 +194,71 @@ class AgentNotice:
|
|||
id: Optional[str] = None
|
||||
|
||||
|
||||
# ── is_free_tier_model (local-data-only free-model check) ────────────────────
|
||||
|
||||
|
||||
def is_free_tier_model(model: str, base_url: str = "") -> bool:
|
||||
"""Return True when *model* is a Nous free-tier model, using ONLY local data.
|
||||
|
||||
Two signals, both zero-network:
|
||||
|
||||
1. The ``:free`` suffix — the canonical Nous free SKU marker (e.g.
|
||||
``nvidia/nemotron-3-ultra:free``). Free by construction on the API side
|
||||
(spend is forced to 0 for ``:free`` ids).
|
||||
2. A peek into the in-process pricing cache in ``hermes_cli.models``
|
||||
(populated when the model picker fetched ``/v1/models`` pricing for
|
||||
*base_url*). PEEK ONLY — a cache miss never triggers a fetch. This is
|
||||
CLI/TUI-session best-effort: gateway sessions never run the picker's
|
||||
pricing fetch, so suppression there rests entirely on the ``:free``
|
||||
suffix (which all Nous free SKUs carry).
|
||||
|
||||
Fail-open to False (the depleted notice still shows) on any error: wrongly
|
||||
showing the warning is recoverable noise; wrongly hiding it on a paid model
|
||||
would mask a real billing block.
|
||||
"""
|
||||
if not model:
|
||||
return False
|
||||
if model.endswith(":free"):
|
||||
return True
|
||||
if not base_url:
|
||||
return False
|
||||
try:
|
||||
from hermes_cli.models import _is_model_free, _pricing_cache
|
||||
|
||||
# Mirror get_pricing_for_provider's key normalization: the agent's
|
||||
# Nous base_url is /v1-suffixed (https://inference-api.nousresearch.com/v1)
|
||||
# but the picker keys _pricing_cache on the pre-/v1 root.
|
||||
key = base_url.rstrip("/")
|
||||
if key.endswith("/v1"):
|
||||
key = key[:-3].rstrip("/")
|
||||
pricing = _pricing_cache.get(key)
|
||||
if not pricing:
|
||||
return False
|
||||
return _is_model_free(model, pricing)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ── evaluate_credits_notices (pure reconciliation function) ──────────────────
|
||||
|
||||
|
||||
def evaluate_credits_notices(
|
||||
state: CreditsState,
|
||||
latch: dict,
|
||||
*,
|
||||
model_is_free: bool = False,
|
||||
) -> tuple[list[AgentNotice], list[str]]:
|
||||
"""Reconcile credits notices against the latch. Mutates ``latch`` IN PLACE.
|
||||
|
||||
latch = {"active": set[str], "seen_below_90": bool, "usage_band": Optional[int]}.
|
||||
|
||||
``model_is_free``: True when the session's active model is a Nous free-tier
|
||||
model (see :func:`is_free_tier_model`). Suppresses the ``credits.depleted``
|
||||
notice — a depleted account on a free model can keep inferencing, so the
|
||||
error banner is noise (and confuses free-tier users who never had credits).
|
||||
Suppression does NOT emit the "restored" success notice; that fires only on
|
||||
a genuine ``paid_access`` flip back to True.
|
||||
|
||||
Returns ``(to_show: list[AgentNotice], to_clear: list[str])``.
|
||||
Caller emits to_clear FIRST, then to_show.
|
||||
|
||||
|
|
@ -284,7 +338,11 @@ def evaluate_credits_notices(
|
|||
active.discard("credits.grant_spent")
|
||||
|
||||
# ── depleted ─────────────────────────────────────────────────────────────
|
||||
if depleted_cond and "credits.depleted" not in active:
|
||||
# Suppressed while the active model is free: inference still works there,
|
||||
# so the error banner would just alarm users (free-tier users especially,
|
||||
# who never had paid credits to "lose").
|
||||
show_depleted = depleted_cond and not model_is_free
|
||||
if show_depleted and "credits.depleted" not in active:
|
||||
to_show.append(
|
||||
AgentNotice(
|
||||
text="✕ Credit access paused · run /usage for balance",
|
||||
|
|
@ -295,20 +353,23 @@ def evaluate_credits_notices(
|
|||
)
|
||||
)
|
||||
active.add("credits.depleted")
|
||||
elif "credits.depleted" in active and not depleted_cond:
|
||||
elif "credits.depleted" in active and not show_depleted:
|
||||
to_clear.append("credits.depleted")
|
||||
active.discard("credits.depleted")
|
||||
# Recovery: also emit the success notice
|
||||
to_show.append(
|
||||
AgentNotice(
|
||||
text="✓ Credit access restored",
|
||||
level="success",
|
||||
kind="ttl",
|
||||
ttl_ms=CREDITS_RESTORED_TTL_MS,
|
||||
key="credits.restored",
|
||||
id="credits.restored",
|
||||
if not depleted_cond:
|
||||
# Genuine recovery (paid_access flipped back True): also emit the
|
||||
# success notice. A clear caused by switching to a free model while
|
||||
# still depleted must NOT claim access was restored.
|
||||
to_show.append(
|
||||
AgentNotice(
|
||||
text="✓ Credit access restored",
|
||||
level="success",
|
||||
kind="ttl",
|
||||
ttl_ms=CREDITS_RESTORED_TTL_MS,
|
||||
key="credits.restored",
|
||||
id="credits.restored",
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
return (to_show, to_clear)
|
||||
|
||||
|
|
|
|||
11
run_agent.py
11
run_agent.py
|
|
@ -2831,11 +2831,18 @@ class AIAgent:
|
|||
if state is None:
|
||||
return
|
||||
try:
|
||||
from agent.credits_tracker import evaluate_credits_notices
|
||||
from agent.credits_tracker import evaluate_credits_notices, is_free_tier_model
|
||||
latch = getattr(self, "_credits_latch", None)
|
||||
if latch is None:
|
||||
latch = self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None}
|
||||
to_show, to_clear = evaluate_credits_notices(state, latch)
|
||||
# Free-model gate: a depleted account on a free model can still
|
||||
# inference, so the depleted error banner is suppressed. Local-data
|
||||
# only (":free" suffix + pricing-cache peek) — never a network call.
|
||||
model_is_free = is_free_tier_model(
|
||||
getattr(self, "model", "") or "",
|
||||
getattr(self, "base_url", "") or "",
|
||||
)
|
||||
to_show, to_clear = evaluate_credits_notices(state, latch, model_is_free=model_is_free)
|
||||
for key in to_clear: # clears FIRST …
|
||||
self._emit_notice_clear(key)
|
||||
for notice in to_show: # … then shows (depleted lands last in a latest-wins slot)
|
||||
|
|
|
|||
|
|
@ -123,22 +123,29 @@ def test_dev_fixtures_drive_cold_start():
|
|||
|
||||
class _FakeAgent:
|
||||
"""Minimal agent surface for the seed helper: state slots + an emit that runs
|
||||
the real policy against the latch."""
|
||||
the real policy against the latch (mirroring run_agent._emit_credits_notices,
|
||||
including the free-model suppression flag)."""
|
||||
|
||||
def __init__(self, provider="nous"):
|
||||
from agent.credits_tracker import evaluate_credits_notices
|
||||
def __init__(self, provider="nous", model=""):
|
||||
from agent.credits_tracker import evaluate_credits_notices, is_free_tier_model
|
||||
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self._credits_state = None
|
||||
self._credits_session_start_micros = None
|
||||
self._credits_latch = {"active": set(), "seen_below_90": False, "usage_band": None}
|
||||
self.emitted: list = []
|
||||
self._eval = evaluate_credits_notices
|
||||
self._is_free = is_free_tier_model
|
||||
|
||||
def _emit_credits_notices(self):
|
||||
if self._credits_state is None:
|
||||
return
|
||||
show, clear = self._eval(self._credits_state, self._credits_latch)
|
||||
show, clear = self._eval(
|
||||
self._credits_state,
|
||||
self._credits_latch,
|
||||
model_is_free=self._is_free(self.model),
|
||||
)
|
||||
self.emitted.append(([n.key for n in show], clear))
|
||||
|
||||
|
||||
|
|
@ -169,6 +176,14 @@ def test_seed_fires_depleted_at_session_open():
|
|||
assert a.emitted == [(["credits.depleted"], [])]
|
||||
|
||||
|
||||
def test_seed_depleted_suppressed_on_free_model():
|
||||
"""A session that opens depleted but on a Nous ``:free`` model must NOT show
|
||||
the depleted banner — inference works fine on the free tier."""
|
||||
a = _FakeAgent(model="nvidia/nemotron-3-ultra:free")
|
||||
assert _seed(a, "depleted") is True
|
||||
assert a.emitted == [([], [])]
|
||||
|
||||
|
||||
def test_seed_healthy_no_notice():
|
||||
a = _FakeAgent()
|
||||
assert _seed(a, "healthy") is True
|
||||
|
|
|
|||
|
|
@ -265,6 +265,143 @@ class TestDepleted:
|
|||
assert "credits.depleted" in keys
|
||||
|
||||
|
||||
# ── Scenario 5b: free-model suppression of the depleted notice ───────────────
|
||||
|
||||
|
||||
class TestDepletedFreeModelSuppression:
|
||||
def test_depleted_suppressed_when_model_is_free(self):
|
||||
latch = fresh_latch()
|
||||
s = CreditsState(paid_access=False)
|
||||
to_show, to_clear = evaluate_credits_notices(s, latch, model_is_free=True)
|
||||
assert all(n.key != "credits.depleted" for n in to_show)
|
||||
assert "credits.depleted" not in latch["active"]
|
||||
assert to_clear == []
|
||||
|
||||
def test_switch_to_free_model_clears_without_restored(self):
|
||||
latch = fresh_latch()
|
||||
# Depleted on a paid model → notice fires
|
||||
evaluate_credits_notices(CreditsState(paid_access=False), latch)
|
||||
assert "credits.depleted" in latch["active"]
|
||||
# Same depleted account, but now on a free model → clear, NO "restored"
|
||||
to_show, to_clear = evaluate_credits_notices(
|
||||
CreditsState(paid_access=False), latch, model_is_free=True
|
||||
)
|
||||
assert "credits.depleted" in to_clear
|
||||
assert "credits.depleted" not in latch["active"]
|
||||
assert all(n.key != "credits.restored" for n in to_show)
|
||||
|
||||
def test_switch_back_to_paid_model_while_depleted_reshows(self):
|
||||
latch = fresh_latch()
|
||||
evaluate_credits_notices(CreditsState(paid_access=False), latch)
|
||||
evaluate_credits_notices(CreditsState(paid_access=False), latch, model_is_free=True)
|
||||
# Back on a paid model, still depleted → notice re-fires
|
||||
to_show, to_clear = evaluate_credits_notices(CreditsState(paid_access=False), latch)
|
||||
keys = [n.key for n in to_show]
|
||||
assert "credits.depleted" in keys
|
||||
assert "credits.depleted" in latch["active"]
|
||||
|
||||
def test_genuine_recovery_on_free_model_no_spurious_restored(self):
|
||||
"""Recovery observed while suppressed (notice never shown) → nothing to
|
||||
clear, no 'restored' (there was no visible depleted state to restore)."""
|
||||
latch = fresh_latch()
|
||||
evaluate_credits_notices(CreditsState(paid_access=False), latch, model_is_free=True)
|
||||
to_show, to_clear = evaluate_credits_notices(
|
||||
CreditsState(paid_access=True), latch, model_is_free=True
|
||||
)
|
||||
assert to_clear == []
|
||||
assert all(n.key != "credits.restored" for n in to_show)
|
||||
|
||||
def test_genuine_recovery_still_emits_restored_when_notice_active(self):
|
||||
"""paid_access flip back to True with the notice showing → clear + restored
|
||||
(unchanged behaviour, regardless of the model-free flag)."""
|
||||
latch = fresh_latch()
|
||||
evaluate_credits_notices(CreditsState(paid_access=False), latch)
|
||||
to_show, to_clear = evaluate_credits_notices(
|
||||
CreditsState(paid_access=True), latch, model_is_free=True
|
||||
)
|
||||
assert "credits.depleted" in to_clear
|
||||
restored = [n for n in to_show if n.key == "credits.restored"]
|
||||
assert len(restored) == 1
|
||||
|
||||
def test_free_flag_does_not_affect_other_notices(self):
|
||||
"""Usage-band and grant notices are independent of the model-free gate."""
|
||||
latch = fresh_latch()
|
||||
evaluate_credits_notices(state_with_fraction(0.10), latch, model_is_free=True)
|
||||
to_show, _ = evaluate_credits_notices(
|
||||
state_with_fraction(0.95, paid_access=False), latch, model_is_free=True
|
||||
)
|
||||
keys = [n.key for n in to_show]
|
||||
assert "credits.usage" in keys
|
||||
assert "credits.depleted" not in keys
|
||||
|
||||
|
||||
# ── Scenario 5c: is_free_tier_model (local-data-only check) ──────────────────
|
||||
|
||||
|
||||
class TestIsFreeTierModel:
|
||||
def test_free_suffix_is_free(self):
|
||||
from agent.credits_tracker import is_free_tier_model
|
||||
|
||||
assert is_free_tier_model("nvidia/nemotron-3-ultra:free") is True
|
||||
assert is_free_tier_model("Hermes-4-70B:free", "https://inference-api.nousresearch.com") is True
|
||||
|
||||
def test_empty_or_paid_model_is_not_free(self):
|
||||
from agent.credits_tracker import is_free_tier_model
|
||||
|
||||
assert is_free_tier_model("") is False
|
||||
assert is_free_tier_model("Hermes-4-405B") is False
|
||||
|
||||
def test_pricing_cache_peek_zero_priced_model(self, monkeypatch):
|
||||
from agent.credits_tracker import is_free_tier_model
|
||||
import hermes_cli.models as models_mod
|
||||
|
||||
# The picker keys the cache on the pre-/v1 root (get_pricing_for_provider
|
||||
# strips a trailing /v1 before fetch_models_with_pricing).
|
||||
monkeypatch.setattr(
|
||||
models_mod,
|
||||
"_pricing_cache",
|
||||
{
|
||||
"https://inference-api.nousresearch.com": {
|
||||
"some/zero-priced": {"prompt": "0", "completion": "0"},
|
||||
"some/paid": {"prompt": "0.000001", "completion": "0.000002"},
|
||||
}
|
||||
},
|
||||
)
|
||||
# The agent holds the /v1-suffixed URL (DEFAULT_NOUS_INFERENCE_URL) —
|
||||
# the helper must normalize it down to the picker's cache key.
|
||||
base = "https://inference-api.nousresearch.com/v1"
|
||||
assert is_free_tier_model("some/zero-priced", base) is True
|
||||
assert is_free_tier_model("some/paid", base) is False
|
||||
# Pre-stripped and trailing-slash variants resolve to the same key.
|
||||
assert is_free_tier_model("some/zero-priced", "https://inference-api.nousresearch.com/") is True
|
||||
assert is_free_tier_model("some/zero-priced", "https://inference-api.nousresearch.com/v1/") is True
|
||||
|
||||
def test_cache_miss_is_not_free_and_no_fetch(self, monkeypatch):
|
||||
from agent.credits_tracker import is_free_tier_model
|
||||
import hermes_cli.models as models_mod
|
||||
|
||||
monkeypatch.setattr(models_mod, "_pricing_cache", {})
|
||||
|
||||
def _boom(*args, **kwargs): # any network attempt fails the test
|
||||
raise AssertionError("is_free_tier_model must never hit the network")
|
||||
|
||||
import urllib.request
|
||||
|
||||
monkeypatch.setattr(urllib.request, "urlopen", _boom)
|
||||
assert is_free_tier_model("some/model", "https://inference-api.nousresearch.com/v1") is False
|
||||
|
||||
def test_exception_fails_open_to_false(self, monkeypatch):
|
||||
from agent.credits_tracker import is_free_tier_model
|
||||
import hermes_cli.models as models_mod
|
||||
|
||||
class _Exploding:
|
||||
def get(self, *_a, **_kw):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
monkeypatch.setattr(models_mod, "_pricing_cache", _Exploding())
|
||||
assert is_free_tier_model("some/model", "https://inference-api.nousresearch.com") is False
|
||||
|
||||
|
||||
# ── Scenario 6: denominator none (uf is None) ────────────────────────────────
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue