feat(aux): use Portal /api/nous/recommended-models for auxiliary models

Wire the auxiliary client (compaction, vision, session search, web extract)
to the Nous Portal's curated recommended-models endpoint when running on
Nous Portal, with a TTL-cached fetch that mirrors how we pull /models for
pricing.

hermes_cli/models.py
  - fetch_nous_recommended_models(portal_base_url, force_refresh=False)
    10-minute TTL cache, keyed per portal URL (staging vs prod don't
    collide).  Public endpoint, no auth required.  Returns {} on any
    failure so callers always get a dict.
  - get_nous_recommended_aux_model(vision, free_tier=None, ...)
    Tier-aware pick from the payload:
      - Paid tier → paidRecommended{Vision,Compaction}Model, falling back
        to freeRecommended* when the paid field is null (common during
        staged rollouts of new paid models).
      - Free tier → freeRecommended* only, never leaks paid models.
    When free_tier is None, auto-detects via the existing
    check_nous_free_tier() helper (already cached 3 min against
    /api/oauth/account).  Detection errors default to paid so we never
    silently downgrade a paying user.

agent/auxiliary_client.py — _try_nous()
  - Replaces the hardcoded xiaomi/mimo free-tier branch with a single call
    to get_nous_recommended_aux_model(vision=vision).
  - Falls back to _NOUS_MODEL (google/gemini-3-flash-preview) when the
    Portal is unreachable or returns a null recommendation.
  - The Portal is now the source of truth for aux model selection; the
    xiaomi allowlist we used to carry is effectively dead.

Tests (15 new)
  - tests/hermes_cli/test_models.py::TestNousRecommendedModels
    Fetch caching, per-portal keying, network failure, force_refresh;
    paid-prefers-paid, paid-falls-to-free, free-never-leaks-paid,
    auto-detect, detection-error → paid default, null/blank modelName
    handling.
  - tests/agent/test_auxiliary_client.py::TestNousAuxiliaryRefresh
    _try_nous honors Portal recommendation for text + vision, falls
    back to google/gemini-3-flash-preview on None or exception.

Behavior won't visibly change today — both tier recommendations currently
point at google/gemini-3-flash-preview — but the moment the Portal ships
a better paid recommendation, subscribers pick it up within 10 minutes
without a Hermes release.
This commit is contained in:
emozilla 2026-04-21 22:53:45 -04:00 committed by Teknium
parent c22f4a76de
commit 29693f9d8e
4 changed files with 417 additions and 17 deletions

View file

@ -483,6 +483,7 @@ class TestNousAuxiliaryRefresh:
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
@ -491,10 +492,60 @@ class TestNousAuxiliaryRefresh:
client, model = _try_nous()
assert client is not None
# No Portal recommendation → falls back to the hardcoded default.
assert model == "google/gemini-3-flash-preview"
assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
assert mock_openai.call_args.kwargs["base_url"] == fresh_base
def test_try_nous_uses_portal_recommendation_for_text(self):
"""When the Portal recommends a compaction model, _try_nous honors it."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
mock_openai.return_value = MagicMock()
client, model = _try_nous(vision=False)
assert client is not None
assert model == "minimax/minimax-m2.7"
assert mock_rec.call_args.kwargs["vision"] is False
def test_try_nous_uses_portal_recommendation_for_vision(self):
"""Vision tasks should ask for the vision-specific recommendation."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
patch("agent.auxiliary_client.OpenAI"),
):
from agent.auxiliary_client import _try_nous
client, model = _try_nous(vision=True)
assert client is not None
assert model == "google/gemini-3-flash-preview"
assert mock_rec.call_args.kwargs["vision"] is True
def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
"""If the Portal lookup throws, we must still return a usable model."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
patch("agent.auxiliary_client.OpenAI"),
):
from agent.auxiliary_client import _try_nous
client, model = _try_nous()
assert client is not None
assert model == "google/gemini-3-flash-preview"
def test_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
status_code = 401

View file

@ -417,3 +417,190 @@ class TestCheckNousFreeTierCache:
def test_cache_ttl_is_short(self):
"""TTL should be short enough to catch upgrades quickly (<=5 min)."""
assert _FREE_TIER_CACHE_TTL <= 300
class TestNousRecommendedModels:
"""Tests for fetch_nous_recommended_models + get_nous_recommended_aux_model."""
_SAMPLE_PAYLOAD = {
"paidRecommendedModels": [],
"freeRecommendedModels": [],
"paidRecommendedCompactionModel": None,
"paidRecommendedVisionModel": None,
"freeRecommendedCompactionModel": {
"modelName": "google/gemini-3-flash-preview",
"displayName": "Google: Gemini 3 Flash Preview",
},
"freeRecommendedVisionModel": {
"modelName": "google/gemini-3-flash-preview",
"displayName": "Google: Gemini 3 Flash Preview",
},
}
def setup_method(self):
_models_mod._nous_recommended_cache.clear()
def teardown_method(self):
_models_mod._nous_recommended_cache.clear()
def _mock_urlopen(self, payload):
"""Return a context-manager mock mimicking urllib.request.urlopen()."""
import json as _json
response = MagicMock()
response.read.return_value = _json.dumps(payload).encode()
cm = MagicMock()
cm.__enter__.return_value = response
cm.__exit__.return_value = False
return cm
def test_fetch_caches_per_portal_url(self):
from hermes_cli.models import fetch_nous_recommended_models
mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
a = fetch_nous_recommended_models("https://portal.example.com")
b = fetch_nous_recommended_models("https://portal.example.com")
assert a == self._SAMPLE_PAYLOAD
assert b == self._SAMPLE_PAYLOAD
assert mock_urlopen.call_count == 1 # second call served from cache
def test_fetch_cache_is_keyed_per_portal(self):
from hermes_cli.models import fetch_nous_recommended_models
mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
fetch_nous_recommended_models("https://portal.example.com")
fetch_nous_recommended_models("https://portal.staging-nousresearch.com")
assert mock_urlopen.call_count == 2 # different portals → separate fetches
def test_fetch_returns_empty_on_network_failure(self):
from hermes_cli.models import fetch_nous_recommended_models
with patch("urllib.request.urlopen", side_effect=OSError("boom")):
result = fetch_nous_recommended_models("https://portal.example.com")
assert result == {}
def test_fetch_force_refresh_bypasses_cache(self):
from hermes_cli.models import fetch_nous_recommended_models
mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
fetch_nous_recommended_models("https://portal.example.com")
fetch_nous_recommended_models("https://portal.example.com", force_refresh=True)
assert mock_urlopen.call_count == 2
def test_get_aux_model_returns_vision_recommendation(self):
from hermes_cli.models import get_nous_recommended_aux_model
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._SAMPLE_PAYLOAD,
):
# Free tier → free vision recommendation.
model = get_nous_recommended_aux_model(vision=True, free_tier=True)
assert model == "google/gemini-3-flash-preview"
def test_get_aux_model_returns_compaction_recommendation(self):
from hermes_cli.models import get_nous_recommended_aux_model
payload = dict(self._SAMPLE_PAYLOAD)
payload["freeRecommendedCompactionModel"] = {"modelName": "minimax/minimax-m2.7"}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=payload,
):
model = get_nous_recommended_aux_model(vision=False, free_tier=True)
assert model == "minimax/minimax-m2.7"
def test_get_aux_model_returns_none_when_field_null(self):
from hermes_cli.models import get_nous_recommended_aux_model
payload = dict(self._SAMPLE_PAYLOAD)
payload["freeRecommendedCompactionModel"] = None
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=payload,
):
model = get_nous_recommended_aux_model(vision=False, free_tier=True)
assert model is None
def test_get_aux_model_returns_none_on_empty_payload(self):
from hermes_cli.models import get_nous_recommended_aux_model
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
assert get_nous_recommended_aux_model(vision=True, free_tier=False) is None
def test_get_aux_model_returns_none_when_modelname_blank(self):
from hermes_cli.models import get_nous_recommended_aux_model
payload = {"freeRecommendedCompactionModel": {"modelName": " "}}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=payload,
):
assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
def test_paid_tier_prefers_paid_recommendation(self):
"""Paid-tier users should get the paid model when it's populated."""
from hermes_cli.models import get_nous_recommended_aux_model
payload = {
"paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
"freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
"paidRecommendedVisionModel": {"modelName": "openai/gpt-5.4"},
"freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
}
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
text = get_nous_recommended_aux_model(vision=False, free_tier=False)
vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
assert text == "anthropic/claude-opus-4.7"
assert vision == "openai/gpt-5.4"
def test_paid_tier_falls_back_to_free_when_paid_is_null(self):
"""If the Portal returns null for the paid field, fall back to free."""
from hermes_cli.models import get_nous_recommended_aux_model
payload = {
"paidRecommendedCompactionModel": None,
"freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
"paidRecommendedVisionModel": None,
"freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
}
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
text = get_nous_recommended_aux_model(vision=False, free_tier=False)
vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
assert text == "google/gemini-3-flash-preview"
assert vision == "google/gemini-3-flash-preview"
def test_free_tier_never_uses_paid_recommendation(self):
"""Free-tier users must not get paid-only recommendations."""
from hermes_cli.models import get_nous_recommended_aux_model
payload = {
"paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
"freeRecommendedCompactionModel": None, # no free recommendation
}
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
model = get_nous_recommended_aux_model(vision=False, free_tier=True)
# Free tier must return None — never leak the paid model.
assert model is None
def test_auto_detects_tier_when_not_supplied(self):
"""Default behaviour: call check_nous_free_tier() to pick the tier."""
from hermes_cli.models import get_nous_recommended_aux_model
payload = {
"paidRecommendedCompactionModel": {"modelName": "paid-model"},
"freeRecommendedCompactionModel": {"modelName": "free-model"},
}
with (
patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
patch("hermes_cli.models.check_nous_free_tier", return_value=True),
):
assert get_nous_recommended_aux_model(vision=False) == "free-model"
with (
patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
patch("hermes_cli.models.check_nous_free_tier", return_value=False),
):
assert get_nous_recommended_aux_model(vision=False) == "paid-model"
def test_tier_detection_error_defaults_to_paid(self):
"""If tier detection raises, assume paid so we don't downgrade silently."""
from hermes_cli.models import get_nous_recommended_aux_model
payload = {
"paidRecommendedCompactionModel": {"modelName": "paid-model"},
"freeRecommendedCompactionModel": {"modelName": "free-model"},
}
with (
patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")),
):
assert get_nous_recommended_aux_model(vision=False) == "paid-model"