From 338b98161ad08fde4423ace88e542625fc834633 Mon Sep 17 00:00:00 2001 From: emozilla Date: Tue, 21 Apr 2026 22:53:45 -0400 Subject: [PATCH] feat(aux): use Portal /api/nous/recommended-models for auxiliary models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the auxiliary client (compaction, vision, session search, web extract) to the Nous Portal's curated recommended-models endpoint when running on Nous Portal, with a TTL-cached fetch that mirrors how we pull /models for pricing. hermes_cli/models.py - fetch_nous_recommended_models(portal_base_url, force_refresh=False) 10-minute TTL cache, keyed per portal URL (staging vs prod don't collide). Public endpoint, no auth required. Returns {} on any failure so callers always get a dict. - get_nous_recommended_aux_model(vision, free_tier=None, ...) Tier-aware pick from the payload: - Paid tier → paidRecommended{Vision,Compaction}Model, falling back to freeRecommended* when the paid field is null (common during staged rollouts of new paid models). - Free tier → freeRecommended* only, never leaks paid models. When free_tier is None, auto-detects via the existing check_nous_free_tier() helper (already cached 3 min against /api/oauth/account). Detection errors default to paid so we never silently downgrade a paying user. agent/auxiliary_client.py — _try_nous() - Replaces the hardcoded xiaomi/mimo free-tier branch with a single call to get_nous_recommended_aux_model(vision=vision). - Falls back to _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable or returns a null recommendation. - The Portal is now the source of truth for aux model selection; the xiaomi allowlist we used to carry is effectively dead. Tests (15 new) - tests/hermes_cli/test_models.py::TestNousRecommendedModels Fetch caching, per-portal keying, network failure, force_refresh; paid-prefers-paid, paid-falls-to-free, free-never-leaks-paid, auto-detect, detection-error → paid default, null/blank modelName handling. - tests/agent/test_auxiliary_client.py::TestNousAuxiliaryRefresh _try_nous honors Portal recommendation for text + vision, falls back to google/gemini-3-flash-preview on None or exception. Behavior won't visibly change today — both tier recommendations currently point at google/gemini-3-flash-preview — but the moment the Portal ships a better paid recommendation, subscribers pick it up within 10 minutes without a Hermes release. --- agent/auxiliary_client.py | 45 ++++--- hermes_cli/models.py | 151 +++++++++++++++++++++ tests/agent/test_auxiliary_client.py | 51 ++++++++ tests/hermes_cli/test_models.py | 187 +++++++++++++++++++++++++++ 4 files changed, 417 insertions(+), 17 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 5195b0952..d950f702a 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -182,8 +182,6 @@ auxiliary_is_nous: bool = False # Default auxiliary models per provider _OPENROUTER_MODEL = "google/gemini-3-flash-preview" _NOUS_MODEL = "google/gemini-3-flash-preview" -_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni" -_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro" _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1" _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com" _AUTH_JSON_PATH = get_hermes_home() / "auth.json" @@ -927,22 +925,35 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: global auxiliary_is_nous auxiliary_is_nous = True logger.debug("Auxiliary client: Nous Portal") - if nous.get("source") == "pool": - model = "gemini-3-flash" - else: - model = _NOUS_MODEL - # Free-tier users can't use paid auxiliary models — use the free - # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks. - # Paid accounts keep their tier-appropriate models: gemini-3-flash-preview - # for both text and vision tasks. + + # Ask the Portal which model it currently recommends for this task type. + # The /api/nous/recommended-models endpoint is the authoritative source: + # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model + # auto-detects the caller's tier via check_nous_free_tier(). Fall back to + # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable + # or returns a null recommendation for this task type. + model = _NOUS_MODEL try: - from hermes_cli.models import check_nous_free_tier - if check_nous_free_tier(): - model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL - logger.debug("Free-tier Nous account — using %s for auxiliary/%s", - model, "vision" if vision else "text") - except Exception: - pass + from hermes_cli.models import get_nous_recommended_aux_model + recommended = get_nous_recommended_aux_model(vision=vision) + if recommended: + model = recommended + logger.debug( + "Auxiliary/%s: using Portal-recommended model %s", + "vision" if vision else "text", model, + ) + else: + logger.debug( + "Auxiliary/%s: no Portal recommendation, falling back to %s", + "vision" if vision else "text", model, + ) + except Exception as exc: + logger.debug( + "Auxiliary/%s: recommended-models lookup failed (%s); " + "falling back to %s", + "vision" if vision else "text", exc, model, + ) + if runtime is not None: api_key, base_url = runtime else: diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 67c73ff83..24fcbc7b5 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -513,6 +513,157 @@ def check_nous_free_tier() -> bool: return False # default to paid on error — don't block users +# --------------------------------------------------------------------------- +# Nous Portal recommended models +# +# The Portal publishes a curated list of suggested models (separated into +# paid and free tiers) plus dedicated recommendations for compaction (text +# summarisation / auxiliary) and vision tasks. We fetch it once per process +# with a TTL cache so callers can ask "what's the best aux model right now?" +# without hitting the network on every lookup. +# +# Shape of the response (fields we care about): +# { +# "paidRecommendedModels": [ {modelName, ...}, ... ], +# "freeRecommendedModels": [ {modelName, ...}, ... ], +# "paidRecommendedCompactionModel": {modelName, ...} | null, +# "paidRecommendedVisionModel": {modelName, ...} | null, +# "freeRecommendedCompactionModel": {modelName, ...} | null, +# "freeRecommendedVisionModel": {modelName, ...} | null, +# } +# --------------------------------------------------------------------------- + +NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models" +_NOUS_RECOMMENDED_CACHE_TTL: int = 600 # seconds (10 minutes) +# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide. +_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {} + + +def fetch_nous_recommended_models( + portal_base_url: str = "", + timeout: float = 5.0, + *, + force_refresh: bool = False, +) -> dict[str, Any]: + """Fetch the Nous Portal's curated recommended-models payload. + + Hits ``/api/nous/recommended-models``. The endpoint is public — + no auth is required. Results are cached per portal URL for + ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to + bypass the cache. + + Returns the parsed JSON dict on success, or ``{}`` on any failure + (network, parse, non-2xx). Callers must treat missing/null fields as + "no recommendation" and fall back to their own default. + """ + base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/") + now = time.monotonic() + cached = _nous_recommended_cache.get(base) + if not force_refresh and cached is not None: + payload, cached_at = cached + if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL: + return payload + + url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}" + try: + req = urllib.request.Request( + url, + headers={"Accept": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + if not isinstance(data, dict): + data = {} + except Exception: + data = {} + + _nous_recommended_cache[base] = (data, now) + return data + + +def _resolve_nous_portal_url() -> str: + """Best-effort lookup of the Portal base URL the user is authed against.""" + try: + from hermes_cli.auth import ( + DEFAULT_NOUS_PORTAL_URL, + get_provider_auth_state, + ) + state = get_provider_auth_state("nous") or {} + portal = str(state.get("portal_base_url") or "").strip() + if portal: + return portal.rstrip("/") + return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/") + except Exception: + return "https://portal.nousresearch.com" + + +def _extract_model_name(entry: Any) -> Optional[str]: + """Pull the ``modelName`` field from a recommended-model entry, else None.""" + if not isinstance(entry, dict): + return None + model_name = entry.get("modelName") + if isinstance(model_name, str) and model_name.strip(): + return model_name.strip() + return None + + +def get_nous_recommended_aux_model( + *, + vision: bool = False, + free_tier: Optional[bool] = None, + portal_base_url: str = "", + force_refresh: bool = False, +) -> Optional[str]: + """Return the Portal's recommended model name for an auxiliary task. + + Picks the best field from the Portal's recommended-models payload: + + * ``vision=True`` → ``paidRecommendedVisionModel`` (paid tier) or + ``freeRecommendedVisionModel`` (free tier) + * ``vision=False`` → ``paidRecommendedCompactionModel`` or + ``freeRecommendedCompactionModel`` + + When ``free_tier`` is ``None`` (default) the user's tier is auto-detected + via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the + detection — useful for tests or when the caller already knows the tier. + + For paid-tier users we prefer the paid recommendation but gracefully fall + back to the free recommendation if the Portal returned ``null`` for the + paid field (common during the staged rollout of new paid models). + + Returns ``None`` when every candidate is missing, null, or the fetch + fails — callers should fall back to their own default (currently + ``google/gemini-3-flash-preview``). + """ + base = portal_base_url or _resolve_nous_portal_url() + payload = fetch_nous_recommended_models(base, force_refresh=force_refresh) + if not payload: + return None + + if free_tier is None: + try: + free_tier = check_nous_free_tier() + except Exception: + # On any detection error, assume paid — paid users see both fields + # anyway so this is a safe default that maximises model quality. + free_tier = False + + if vision: + paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel" + else: + paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel" + + # Preference order: + # free tier → free only + # paid tier → paid, then free (if paid field is null) + candidates = [free_key] if free_tier else [paid_key, free_key] + for key in candidates: + name = _extract_model_name(payload.get(key)) + if name: + return name + return None + + # --------------------------------------------------------------------------- # Canonical provider list — single source of truth for provider identity. # Every code path that lists, displays, or iterates providers derives from diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index b6958b08f..4c775b8a6 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -483,6 +483,7 @@ class TestNousAuxiliaryRefresh: with ( patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}), patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)), + patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None), patch("agent.auxiliary_client.OpenAI") as mock_openai, ): from agent.auxiliary_client import _try_nous @@ -491,10 +492,60 @@ class TestNousAuxiliaryRefresh: client, model = _try_nous() assert client is not None + # No Portal recommendation → falls back to the hardcoded default. assert model == "google/gemini-3-flash-preview" assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key" assert mock_openai.call_args.kwargs["base_url"] == fresh_base + def test_try_nous_uses_portal_recommendation_for_text(self): + """When the Portal recommends a compaction model, _try_nous honors it.""" + fresh_base = "https://inference-api.nousresearch.com/v1" + with ( + patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}), + patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)), + patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec, + patch("agent.auxiliary_client.OpenAI") as mock_openai, + ): + from agent.auxiliary_client import _try_nous + + mock_openai.return_value = MagicMock() + client, model = _try_nous(vision=False) + + assert client is not None + assert model == "minimax/minimax-m2.7" + assert mock_rec.call_args.kwargs["vision"] is False + + def test_try_nous_uses_portal_recommendation_for_vision(self): + """Vision tasks should ask for the vision-specific recommendation.""" + fresh_base = "https://inference-api.nousresearch.com/v1" + with ( + patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}), + patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)), + patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec, + patch("agent.auxiliary_client.OpenAI"), + ): + from agent.auxiliary_client import _try_nous + client, model = _try_nous(vision=True) + + assert client is not None + assert model == "google/gemini-3-flash-preview" + assert mock_rec.call_args.kwargs["vision"] is True + + def test_try_nous_falls_back_when_recommendation_lookup_raises(self): + """If the Portal lookup throws, we must still return a usable model.""" + fresh_base = "https://inference-api.nousresearch.com/v1" + with ( + patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}), + patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)), + patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")), + patch("agent.auxiliary_client.OpenAI"), + ): + from agent.auxiliary_client import _try_nous + client, model = _try_nous() + + assert client is not None + assert model == "google/gemini-3-flash-preview" + def test_call_llm_retries_nous_after_401(self): class _Auth401(Exception): status_code = 401 diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py index f3b66ed5e..b493fd2b6 100644 --- a/tests/hermes_cli/test_models.py +++ b/tests/hermes_cli/test_models.py @@ -417,3 +417,190 @@ class TestCheckNousFreeTierCache: def test_cache_ttl_is_short(self): """TTL should be short enough to catch upgrades quickly (<=5 min).""" assert _FREE_TIER_CACHE_TTL <= 300 + + +class TestNousRecommendedModels: + """Tests for fetch_nous_recommended_models + get_nous_recommended_aux_model.""" + + _SAMPLE_PAYLOAD = { + "paidRecommendedModels": [], + "freeRecommendedModels": [], + "paidRecommendedCompactionModel": None, + "paidRecommendedVisionModel": None, + "freeRecommendedCompactionModel": { + "modelName": "google/gemini-3-flash-preview", + "displayName": "Google: Gemini 3 Flash Preview", + }, + "freeRecommendedVisionModel": { + "modelName": "google/gemini-3-flash-preview", + "displayName": "Google: Gemini 3 Flash Preview", + }, + } + + def setup_method(self): + _models_mod._nous_recommended_cache.clear() + + def teardown_method(self): + _models_mod._nous_recommended_cache.clear() + + def _mock_urlopen(self, payload): + """Return a context-manager mock mimicking urllib.request.urlopen().""" + import json as _json + response = MagicMock() + response.read.return_value = _json.dumps(payload).encode() + cm = MagicMock() + cm.__enter__.return_value = response + cm.__exit__.return_value = False + return cm + + def test_fetch_caches_per_portal_url(self): + from hermes_cli.models import fetch_nous_recommended_models + mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD) + with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen: + a = fetch_nous_recommended_models("https://portal.example.com") + b = fetch_nous_recommended_models("https://portal.example.com") + assert a == self._SAMPLE_PAYLOAD + assert b == self._SAMPLE_PAYLOAD + assert mock_urlopen.call_count == 1 # second call served from cache + + def test_fetch_cache_is_keyed_per_portal(self): + from hermes_cli.models import fetch_nous_recommended_models + mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD) + with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen: + fetch_nous_recommended_models("https://portal.example.com") + fetch_nous_recommended_models("https://portal.staging-nousresearch.com") + assert mock_urlopen.call_count == 2 # different portals → separate fetches + + def test_fetch_returns_empty_on_network_failure(self): + from hermes_cli.models import fetch_nous_recommended_models + with patch("urllib.request.urlopen", side_effect=OSError("boom")): + result = fetch_nous_recommended_models("https://portal.example.com") + assert result == {} + + def test_fetch_force_refresh_bypasses_cache(self): + from hermes_cli.models import fetch_nous_recommended_models + mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD) + with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen: + fetch_nous_recommended_models("https://portal.example.com") + fetch_nous_recommended_models("https://portal.example.com", force_refresh=True) + assert mock_urlopen.call_count == 2 + + def test_get_aux_model_returns_vision_recommendation(self): + from hermes_cli.models import get_nous_recommended_aux_model + with patch( + "hermes_cli.models.fetch_nous_recommended_models", + return_value=self._SAMPLE_PAYLOAD, + ): + # Free tier → free vision recommendation. + model = get_nous_recommended_aux_model(vision=True, free_tier=True) + assert model == "google/gemini-3-flash-preview" + + def test_get_aux_model_returns_compaction_recommendation(self): + from hermes_cli.models import get_nous_recommended_aux_model + payload = dict(self._SAMPLE_PAYLOAD) + payload["freeRecommendedCompactionModel"] = {"modelName": "minimax/minimax-m2.7"} + with patch( + "hermes_cli.models.fetch_nous_recommended_models", + return_value=payload, + ): + model = get_nous_recommended_aux_model(vision=False, free_tier=True) + assert model == "minimax/minimax-m2.7" + + def test_get_aux_model_returns_none_when_field_null(self): + from hermes_cli.models import get_nous_recommended_aux_model + payload = dict(self._SAMPLE_PAYLOAD) + payload["freeRecommendedCompactionModel"] = None + with patch( + "hermes_cli.models.fetch_nous_recommended_models", + return_value=payload, + ): + model = get_nous_recommended_aux_model(vision=False, free_tier=True) + assert model is None + + def test_get_aux_model_returns_none_on_empty_payload(self): + from hermes_cli.models import get_nous_recommended_aux_model + with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}): + assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None + assert get_nous_recommended_aux_model(vision=True, free_tier=False) is None + + def test_get_aux_model_returns_none_when_modelname_blank(self): + from hermes_cli.models import get_nous_recommended_aux_model + payload = {"freeRecommendedCompactionModel": {"modelName": " "}} + with patch( + "hermes_cli.models.fetch_nous_recommended_models", + return_value=payload, + ): + assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None + + def test_paid_tier_prefers_paid_recommendation(self): + """Paid-tier users should get the paid model when it's populated.""" + from hermes_cli.models import get_nous_recommended_aux_model + payload = { + "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"}, + "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"}, + "paidRecommendedVisionModel": {"modelName": "openai/gpt-5.4"}, + "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"}, + } + with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload): + text = get_nous_recommended_aux_model(vision=False, free_tier=False) + vision = get_nous_recommended_aux_model(vision=True, free_tier=False) + assert text == "anthropic/claude-opus-4.7" + assert vision == "openai/gpt-5.4" + + def test_paid_tier_falls_back_to_free_when_paid_is_null(self): + """If the Portal returns null for the paid field, fall back to free.""" + from hermes_cli.models import get_nous_recommended_aux_model + payload = { + "paidRecommendedCompactionModel": None, + "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"}, + "paidRecommendedVisionModel": None, + "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"}, + } + with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload): + text = get_nous_recommended_aux_model(vision=False, free_tier=False) + vision = get_nous_recommended_aux_model(vision=True, free_tier=False) + assert text == "google/gemini-3-flash-preview" + assert vision == "google/gemini-3-flash-preview" + + def test_free_tier_never_uses_paid_recommendation(self): + """Free-tier users must not get paid-only recommendations.""" + from hermes_cli.models import get_nous_recommended_aux_model + payload = { + "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"}, + "freeRecommendedCompactionModel": None, # no free recommendation + } + with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload): + model = get_nous_recommended_aux_model(vision=False, free_tier=True) + # Free tier must return None — never leak the paid model. + assert model is None + + def test_auto_detects_tier_when_not_supplied(self): + """Default behaviour: call check_nous_free_tier() to pick the tier.""" + from hermes_cli.models import get_nous_recommended_aux_model + payload = { + "paidRecommendedCompactionModel": {"modelName": "paid-model"}, + "freeRecommendedCompactionModel": {"modelName": "free-model"}, + } + with ( + patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload), + patch("hermes_cli.models.check_nous_free_tier", return_value=True), + ): + assert get_nous_recommended_aux_model(vision=False) == "free-model" + with ( + patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload), + patch("hermes_cli.models.check_nous_free_tier", return_value=False), + ): + assert get_nous_recommended_aux_model(vision=False) == "paid-model" + + def test_tier_detection_error_defaults_to_paid(self): + """If tier detection raises, assume paid so we don't downgrade silently.""" + from hermes_cli.models import get_nous_recommended_aux_model + payload = { + "paidRecommendedCompactionModel": {"modelName": "paid-model"}, + "freeRecommendedCompactionModel": {"modelName": "free-model"}, + } + with ( + patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload), + patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")), + ): + assert get_nous_recommended_aux_model(vision=False) == "paid-model"