mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(aux): use Portal /api/nous/recommended-models for auxiliary models
Wire the auxiliary client (compaction, vision, session search, web extract)
to the Nous Portal's curated recommended-models endpoint when running on
Nous Portal, with a TTL-cached fetch that mirrors how we pull /models for
pricing.
hermes_cli/models.py
- fetch_nous_recommended_models(portal_base_url, force_refresh=False)
10-minute TTL cache, keyed per portal URL (staging vs prod don't
collide). Public endpoint, no auth required. Returns {} on any
failure so callers always get a dict.
- get_nous_recommended_aux_model(vision, free_tier=None, ...)
Tier-aware pick from the payload:
- Paid tier → paidRecommended{Vision,Compaction}Model, falling back
to freeRecommended* when the paid field is null (common during
staged rollouts of new paid models).
- Free tier → freeRecommended* only, never leaks paid models.
When free_tier is None, auto-detects via the existing
check_nous_free_tier() helper (already cached 3 min against
/api/oauth/account). Detection errors default to paid so we never
silently downgrade a paying user.
agent/auxiliary_client.py — _try_nous()
- Replaces the hardcoded xiaomi/mimo free-tier branch with a single call
to get_nous_recommended_aux_model(vision=vision).
- Falls back to _NOUS_MODEL (google/gemini-3-flash-preview) when the
Portal is unreachable or returns a null recommendation.
- The Portal is now the source of truth for aux model selection; the
xiaomi allowlist we used to carry is effectively dead.
Tests (15 new)
- tests/hermes_cli/test_models.py::TestNousRecommendedModels
Fetch caching, per-portal keying, network failure, force_refresh;
paid-prefers-paid, paid-falls-to-free, free-never-leaks-paid,
auto-detect, detection-error → paid default, null/blank modelName
handling.
- tests/agent/test_auxiliary_client.py::TestNousAuxiliaryRefresh
_try_nous honors Portal recommendation for text + vision, falls
back to google/gemini-3-flash-preview on None or exception.
Behavior won't visibly change today — both tier recommendations currently
point at google/gemini-3-flash-preview — but the moment the Portal ships
a better paid recommendation, subscribers pick it up within 10 minutes
without a Hermes release.
This commit is contained in:
parent
c22f4a76de
commit
29693f9d8e
4 changed files with 417 additions and 17 deletions
|
|
@ -483,6 +483,7 @@ class TestNousAuxiliaryRefresh:
|
|||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
||||
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
|
|
@ -491,10 +492,60 @@ class TestNousAuxiliaryRefresh:
|
|||
client, model = _try_nous()
|
||||
|
||||
assert client is not None
|
||||
# No Portal recommendation → falls back to the hardcoded default.
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == fresh_base
|
||||
|
||||
def test_try_nous_uses_portal_recommendation_for_text(self):
|
||||
"""When the Portal recommends a compaction model, _try_nous honors it."""
|
||||
fresh_base = "https://inference-api.nousresearch.com/v1"
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
||||
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
|
||||
mock_openai.return_value = MagicMock()
|
||||
client, model = _try_nous(vision=False)
|
||||
|
||||
assert client is not None
|
||||
assert model == "minimax/minimax-m2.7"
|
||||
assert mock_rec.call_args.kwargs["vision"] is False
|
||||
|
||||
def test_try_nous_uses_portal_recommendation_for_vision(self):
|
||||
"""Vision tasks should ask for the vision-specific recommendation."""
|
||||
fresh_base = "https://inference-api.nousresearch.com/v1"
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
||||
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
|
||||
patch("agent.auxiliary_client.OpenAI"),
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
client, model = _try_nous(vision=True)
|
||||
|
||||
assert client is not None
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
assert mock_rec.call_args.kwargs["vision"] is True
|
||||
|
||||
def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
|
||||
"""If the Portal lookup throws, we must still return a usable model."""
|
||||
fresh_base = "https://inference-api.nousresearch.com/v1"
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
||||
patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
|
||||
patch("agent.auxiliary_client.OpenAI"),
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
client, model = _try_nous()
|
||||
|
||||
assert client is not None
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
|
||||
def test_call_llm_retries_nous_after_401(self):
|
||||
class _Auth401(Exception):
|
||||
status_code = 401
|
||||
|
|
|
|||
|
|
@ -417,3 +417,190 @@ class TestCheckNousFreeTierCache:
|
|||
def test_cache_ttl_is_short(self):
|
||||
"""TTL should be short enough to catch upgrades quickly (<=5 min)."""
|
||||
assert _FREE_TIER_CACHE_TTL <= 300
|
||||
|
||||
|
||||
class TestNousRecommendedModels:
|
||||
"""Tests for fetch_nous_recommended_models + get_nous_recommended_aux_model."""
|
||||
|
||||
_SAMPLE_PAYLOAD = {
|
||||
"paidRecommendedModels": [],
|
||||
"freeRecommendedModels": [],
|
||||
"paidRecommendedCompactionModel": None,
|
||||
"paidRecommendedVisionModel": None,
|
||||
"freeRecommendedCompactionModel": {
|
||||
"modelName": "google/gemini-3-flash-preview",
|
||||
"displayName": "Google: Gemini 3 Flash Preview",
|
||||
},
|
||||
"freeRecommendedVisionModel": {
|
||||
"modelName": "google/gemini-3-flash-preview",
|
||||
"displayName": "Google: Gemini 3 Flash Preview",
|
||||
},
|
||||
}
|
||||
|
||||
def setup_method(self):
|
||||
_models_mod._nous_recommended_cache.clear()
|
||||
|
||||
def teardown_method(self):
|
||||
_models_mod._nous_recommended_cache.clear()
|
||||
|
||||
def _mock_urlopen(self, payload):
|
||||
"""Return a context-manager mock mimicking urllib.request.urlopen()."""
|
||||
import json as _json
|
||||
response = MagicMock()
|
||||
response.read.return_value = _json.dumps(payload).encode()
|
||||
cm = MagicMock()
|
||||
cm.__enter__.return_value = response
|
||||
cm.__exit__.return_value = False
|
||||
return cm
|
||||
|
||||
def test_fetch_caches_per_portal_url(self):
|
||||
from hermes_cli.models import fetch_nous_recommended_models
|
||||
mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
|
||||
with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
|
||||
a = fetch_nous_recommended_models("https://portal.example.com")
|
||||
b = fetch_nous_recommended_models("https://portal.example.com")
|
||||
assert a == self._SAMPLE_PAYLOAD
|
||||
assert b == self._SAMPLE_PAYLOAD
|
||||
assert mock_urlopen.call_count == 1 # second call served from cache
|
||||
|
||||
def test_fetch_cache_is_keyed_per_portal(self):
|
||||
from hermes_cli.models import fetch_nous_recommended_models
|
||||
mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
|
||||
with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
|
||||
fetch_nous_recommended_models("https://portal.example.com")
|
||||
fetch_nous_recommended_models("https://portal.staging-nousresearch.com")
|
||||
assert mock_urlopen.call_count == 2 # different portals → separate fetches
|
||||
|
||||
def test_fetch_returns_empty_on_network_failure(self):
|
||||
from hermes_cli.models import fetch_nous_recommended_models
|
||||
with patch("urllib.request.urlopen", side_effect=OSError("boom")):
|
||||
result = fetch_nous_recommended_models("https://portal.example.com")
|
||||
assert result == {}
|
||||
|
||||
def test_fetch_force_refresh_bypasses_cache(self):
|
||||
from hermes_cli.models import fetch_nous_recommended_models
|
||||
mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
|
||||
with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
|
||||
fetch_nous_recommended_models("https://portal.example.com")
|
||||
fetch_nous_recommended_models("https://portal.example.com", force_refresh=True)
|
||||
assert mock_urlopen.call_count == 2
|
||||
|
||||
def test_get_aux_model_returns_vision_recommendation(self):
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=self._SAMPLE_PAYLOAD,
|
||||
):
|
||||
# Free tier → free vision recommendation.
|
||||
model = get_nous_recommended_aux_model(vision=True, free_tier=True)
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
|
||||
def test_get_aux_model_returns_compaction_recommendation(self):
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
payload = dict(self._SAMPLE_PAYLOAD)
|
||||
payload["freeRecommendedCompactionModel"] = {"modelName": "minimax/minimax-m2.7"}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=payload,
|
||||
):
|
||||
model = get_nous_recommended_aux_model(vision=False, free_tier=True)
|
||||
assert model == "minimax/minimax-m2.7"
|
||||
|
||||
def test_get_aux_model_returns_none_when_field_null(self):
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
payload = dict(self._SAMPLE_PAYLOAD)
|
||||
payload["freeRecommendedCompactionModel"] = None
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=payload,
|
||||
):
|
||||
model = get_nous_recommended_aux_model(vision=False, free_tier=True)
|
||||
assert model is None
|
||||
|
||||
def test_get_aux_model_returns_none_on_empty_payload(self):
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
|
||||
assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
|
||||
assert get_nous_recommended_aux_model(vision=True, free_tier=False) is None
|
||||
|
||||
def test_get_aux_model_returns_none_when_modelname_blank(self):
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
payload = {"freeRecommendedCompactionModel": {"modelName": " "}}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=payload,
|
||||
):
|
||||
assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
|
||||
|
||||
def test_paid_tier_prefers_paid_recommendation(self):
|
||||
"""Paid-tier users should get the paid model when it's populated."""
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
payload = {
|
||||
"paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
|
||||
"freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
|
||||
"paidRecommendedVisionModel": {"modelName": "openai/gpt-5.4"},
|
||||
"freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
|
||||
}
|
||||
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
|
||||
text = get_nous_recommended_aux_model(vision=False, free_tier=False)
|
||||
vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
|
||||
assert text == "anthropic/claude-opus-4.7"
|
||||
assert vision == "openai/gpt-5.4"
|
||||
|
||||
def test_paid_tier_falls_back_to_free_when_paid_is_null(self):
|
||||
"""If the Portal returns null for the paid field, fall back to free."""
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
payload = {
|
||||
"paidRecommendedCompactionModel": None,
|
||||
"freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
|
||||
"paidRecommendedVisionModel": None,
|
||||
"freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
|
||||
}
|
||||
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
|
||||
text = get_nous_recommended_aux_model(vision=False, free_tier=False)
|
||||
vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
|
||||
assert text == "google/gemini-3-flash-preview"
|
||||
assert vision == "google/gemini-3-flash-preview"
|
||||
|
||||
def test_free_tier_never_uses_paid_recommendation(self):
|
||||
"""Free-tier users must not get paid-only recommendations."""
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
payload = {
|
||||
"paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
|
||||
"freeRecommendedCompactionModel": None, # no free recommendation
|
||||
}
|
||||
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
|
||||
model = get_nous_recommended_aux_model(vision=False, free_tier=True)
|
||||
# Free tier must return None — never leak the paid model.
|
||||
assert model is None
|
||||
|
||||
def test_auto_detects_tier_when_not_supplied(self):
|
||||
"""Default behaviour: call check_nous_free_tier() to pick the tier."""
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
payload = {
|
||||
"paidRecommendedCompactionModel": {"modelName": "paid-model"},
|
||||
"freeRecommendedCompactionModel": {"modelName": "free-model"},
|
||||
}
|
||||
with (
|
||||
patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
|
||||
patch("hermes_cli.models.check_nous_free_tier", return_value=True),
|
||||
):
|
||||
assert get_nous_recommended_aux_model(vision=False) == "free-model"
|
||||
with (
|
||||
patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
|
||||
patch("hermes_cli.models.check_nous_free_tier", return_value=False),
|
||||
):
|
||||
assert get_nous_recommended_aux_model(vision=False) == "paid-model"
|
||||
|
||||
def test_tier_detection_error_defaults_to_paid(self):
|
||||
"""If tier detection raises, assume paid so we don't downgrade silently."""
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
payload = {
|
||||
"paidRecommendedCompactionModel": {"modelName": "paid-model"},
|
||||
"freeRecommendedCompactionModel": {"modelName": "free-model"},
|
||||
}
|
||||
with (
|
||||
patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
|
||||
patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")),
|
||||
):
|
||||
assert get_nous_recommended_aux_model(vision=False) == "paid-model"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue