feat(aux): use Portal /api/nous/recommended-models for auxiliary models

Wire the auxiliary client (compaction, vision, session search, web extract) to the Nous Portal's curated recommended-models endpoint when running on Nous Portal, with a TTL-cached fetch that mirrors how we pull /models for pricing. hermes_cli/models.py - fetch_nous_recommended_models(portal_base_url, force_refresh=False) 10-minute TTL cache, keyed per portal URL (staging vs prod don't collide). Public endpoint, no auth required. Returns {} on any failure so callers always get a dict. - get_nous_recommended_aux_model(vision, free_tier=None, ...) Tier-aware pick from the payload: - Paid tier → paidRecommended{Vision,Compaction}Model, falling back to freeRecommended* when the paid field is null (common during staged rollouts of new paid models). - Free tier → freeRecommended* only, never leaks paid models. When free_tier is None, auto-detects via the existing check_nous_free_tier() helper (already cached 3 min against /api/oauth/account). Detection errors default to paid so we never silently downgrade a paying user. agent/auxiliary_client.py — _try_nous() - Replaces the hardcoded xiaomi/mimo free-tier branch with a single call to get_nous_recommended_aux_model(vision=vision). - Falls back to _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable or returns a null recommendation. - The Portal is now the source of truth for aux model selection; the xiaomi allowlist we used to carry is effectively dead. Tests (15 new) - tests/hermes_cli/test_models.py::TestNousRecommendedModels Fetch caching, per-portal keying, network failure, force_refresh; paid-prefers-paid, paid-falls-to-free, free-never-leaks-paid, auto-detect, detection-error → paid default, null/blank modelName handling. - tests/agent/test_auxiliary_client.py::TestNousAuxiliaryRefresh _try_nous honors Portal recommendation for text + vision, falls back to google/gemini-3-flash-preview on None or exception. Behavior won't visibly change today — both tier recommendations currently point at google/gemini-3-flash-preview — but the moment the Portal ships a better paid recommendation, subscribers pick it up within 10 minutes without a Hermes release.
2026-04-25 00:51:20 +00:00 · 2026-04-21 22:53:45 -04:00 · 2026-04-21 22:53:45 -04:00 · 29693f9d8e
commit 29693f9d8e
parent c22f4a76de
4 changed files with 417 additions and 17 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -182,8 +182,6 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
 _NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
 _NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@ -927,22 +925,35 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
-    if nous.get("source") == "pool":
+
-        model = "gemini-3-flash"
+    # Ask the Portal which model it currently recommends for this task type.
-    else:
+    # The /api/nous/recommended-models endpoint is the authoritative source:
-        model = _NOUS_MODEL
+    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
-    # Free-tier users can't use paid auxiliary models — use the free
+    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
-    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
+    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
-    # Paid accounts keep their tier-appropriate models: gemini-3-flash-preview
+    # or returns a null recommendation for this task type.
-    # for both text and vision tasks.
+    model = _NOUS_MODEL
    try:
-        from hermes_cli.models import check_nous_free_tier
+        from hermes_cli.models import get_nous_recommended_aux_model
-        if check_nous_free_tier():
+        recommended = get_nous_recommended_aux_model(vision=vision)
-            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
+        if recommended:
-            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
+            model = recommended
-                         model, "vision" if vision else "text")
+            logger.debug(
-    except Exception:
+                "Auxiliary/%s: using Portal-recommended model %s",
-        pass
+                "vision" if vision else "text", model,
            )
        else:
            logger.debug(
                "Auxiliary/%s: no Portal recommendation, falling back to %s",
                "vision" if vision else "text", model,
            )
    except Exception as exc:
        logger.debug(
            "Auxiliary/%s: recommended-models lookup failed (%s); "
            "falling back to %s",
            "vision" if vision else "text", exc, model,
        )
    if runtime is not None:
        api_key, base_url = runtime
    else:
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -513,6 +513,157 @@ def check_nous_free_tier() -> bool:
        return False  # default to paid on error — don't block users
 # ---------------------------------------------------------------------------
 # Nous Portal recommended models
 #
 # The Portal publishes a curated list of suggested models (separated into
 # paid and free tiers) plus dedicated recommendations for compaction (text
 # summarisation / auxiliary) and vision tasks. We fetch it once per process
 # with a TTL cache so callers can ask "what's the best aux model right now?"
 # without hitting the network on every lookup.
 #
 # Shape of the response (fields we care about):
 #   {
 #     "paidRecommendedModels":     [ {modelName, ...}, ... ],
 #     "freeRecommendedModels":     [ {modelName, ...}, ... ],
 #     "paidRecommendedCompactionModel":  {modelName, ...} | null,
 #     "paidRecommendedVisionModel":      {modelName, ...} | null,
 #     "freeRecommendedCompactionModel":  {modelName, ...} | null,
 #     "freeRecommendedVisionModel":      {modelName, ...} | null,
 #   }
 # ---------------------------------------------------------------------------
 NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
 _NOUS_RECOMMENDED_CACHE_TTL: int = 600  # seconds (10 minutes)
 # (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
 _nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
 def fetch_nous_recommended_models(
    portal_base_url: str = "",
    timeout: float = 5.0,
    *,
    force_refresh: bool = False,
 ) -> dict[str, Any]:
    """Fetch the Nous Portal's curated recommended-models payload.
    Hits ``<portal>/api/nous/recommended-models``. The endpoint is public —
    no auth is required. Results are cached per portal URL for
    ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
    bypass the cache.
    Returns the parsed JSON dict on success, or ``{}`` on any failure
    (network, parse, non-2xx). Callers must treat missing/null fields as
    "no recommendation" and fall back to their own default.
    """
    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
    now = time.monotonic()
    cached = _nous_recommended_cache.get(base)
    if not force_refresh and cached is not None:
        payload, cached_at = cached
        if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
            return payload
    url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
    try:
        req = urllib.request.Request(
            url,
            headers={"Accept": "application/json"},
        )
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            data = json.loads(resp.read().decode())
        if not isinstance(data, dict):
            data = {}
    except Exception:
        data = {}
    _nous_recommended_cache[base] = (data, now)
    return data
 def _resolve_nous_portal_url() -> str:
    """Best-effort lookup of the Portal base URL the user is authed against."""
    try:
        from hermes_cli.auth import (
            DEFAULT_NOUS_PORTAL_URL,
            get_provider_auth_state,
        )
        state = get_provider_auth_state("nous") or {}
        portal = str(state.get("portal_base_url") or "").strip()
        if portal:
            return portal.rstrip("/")
        return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
    except Exception:
        return "https://portal.nousresearch.com"
 def _extract_model_name(entry: Any) -> Optional[str]:
    """Pull the ``modelName`` field from a recommended-model entry, else None."""
    if not isinstance(entry, dict):
        return None
    model_name = entry.get("modelName")
    if isinstance(model_name, str) and model_name.strip():
        return model_name.strip()
    return None
 def get_nous_recommended_aux_model(
    *,
    vision: bool = False,
    free_tier: Optional[bool] = None,
    portal_base_url: str = "",
    force_refresh: bool = False,
 ) -> Optional[str]:
    """Return the Portal's recommended model name for an auxiliary task.
    Picks the best field from the Portal's recommended-models payload:
    * ``vision=True``  → ``paidRecommendedVisionModel``  (paid tier) or
                         ``freeRecommendedVisionModel``  (free tier)
    * ``vision=False`` → ``paidRecommendedCompactionModel`` or
                         ``freeRecommendedCompactionModel``
    When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
    via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
    detection — useful for tests or when the caller already knows the tier.
    For paid-tier users we prefer the paid recommendation but gracefully fall
    back to the free recommendation if the Portal returned ``null`` for the
    paid field (common during the staged rollout of new paid models).
    Returns ``None`` when every candidate is missing, null, or the fetch
    fails — callers should fall back to their own default (currently
    ``google/gemini-3-flash-preview``).
    """
    base = portal_base_url or _resolve_nous_portal_url()
    payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
    if not payload:
        return None
    if free_tier is None:
        try:
            free_tier = check_nous_free_tier()
        except Exception:
            # On any detection error, assume paid — paid users see both fields
            # anyway so this is a safe default that maximises model quality.
            free_tier = False
    if vision:
        paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
    else:
        paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
    # Preference order:
    #   free tier  → free only
    #   paid tier  → paid, then free (if paid field is null)
    candidates = [free_key] if free_tier else [paid_key, free_key]
    for key in candidates:
        name = _extract_model_name(payload.get(key))
        if name:
            return name
    return None
 # ---------------------------------------------------------------------------
 # Canonical provider list — single source of truth for provider identity.
 # Every code path that lists, displays, or iterates providers derives from
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -483,6 +483,7 @@ class TestNousAuxiliaryRefresh:
        with (
            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
            patch("agent.auxiliary_client.OpenAI") as mock_openai,
        ):
            from agent.auxiliary_client import _try_nous
@ -491,10 +492,60 @@ class TestNousAuxiliaryRefresh:
            client, model = _try_nous()
        assert client is not None
        # No Portal recommendation → falls back to the hardcoded default.
        assert model == "google/gemini-3-flash-preview"
        assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
        assert mock_openai.call_args.kwargs["base_url"] == fresh_base
    def test_try_nous_uses_portal_recommendation_for_text(self):
        """When the Portal recommends a compaction model, _try_nous honors it."""
        fresh_base = "https://inference-api.nousresearch.com/v1"
        with (
            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
            patch("agent.auxiliary_client.OpenAI") as mock_openai,
        ):
            from agent.auxiliary_client import _try_nous
            mock_openai.return_value = MagicMock()
            client, model = _try_nous(vision=False)
        assert client is not None
        assert model == "minimax/minimax-m2.7"
        assert mock_rec.call_args.kwargs["vision"] is False
    def test_try_nous_uses_portal_recommendation_for_vision(self):
        """Vision tasks should ask for the vision-specific recommendation."""
        fresh_base = "https://inference-api.nousresearch.com/v1"
        with (
            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
            patch("agent.auxiliary_client.OpenAI"),
        ):
            from agent.auxiliary_client import _try_nous
            client, model = _try_nous(vision=True)
        assert client is not None
        assert model == "google/gemini-3-flash-preview"
        assert mock_rec.call_args.kwargs["vision"] is True
    def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
        """If the Portal lookup throws, we must still return a usable model."""
        fresh_base = "https://inference-api.nousresearch.com/v1"
        with (
            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
            patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
            patch("agent.auxiliary_client.OpenAI"),
        ):
            from agent.auxiliary_client import _try_nous
            client, model = _try_nous()
        assert client is not None
        assert model == "google/gemini-3-flash-preview"
    def test_call_llm_retries_nous_after_401(self):
        class _Auth401(Exception):
            status_code = 401
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@ -417,3 +417,190 @@ class TestCheckNousFreeTierCache:
    def test_cache_ttl_is_short(self):
        """TTL should be short enough to catch upgrades quickly (<=5 min)."""
        assert _FREE_TIER_CACHE_TTL <= 300
 class TestNousRecommendedModels:
    """Tests for fetch_nous_recommended_models + get_nous_recommended_aux_model."""
    _SAMPLE_PAYLOAD = {
        "paidRecommendedModels": [],
        "freeRecommendedModels": [],
        "paidRecommendedCompactionModel": None,
        "paidRecommendedVisionModel": None,
        "freeRecommendedCompactionModel": {
            "modelName": "google/gemini-3-flash-preview",
            "displayName": "Google: Gemini 3 Flash Preview",
        },
        "freeRecommendedVisionModel": {
            "modelName": "google/gemini-3-flash-preview",
            "displayName": "Google: Gemini 3 Flash Preview",
        },
    }
    def setup_method(self):
        _models_mod._nous_recommended_cache.clear()
    def teardown_method(self):
        _models_mod._nous_recommended_cache.clear()
    def _mock_urlopen(self, payload):
        """Return a context-manager mock mimicking urllib.request.urlopen()."""
        import json as _json
        response = MagicMock()
        response.read.return_value = _json.dumps(payload).encode()
        cm = MagicMock()
        cm.__enter__.return_value = response
        cm.__exit__.return_value = False
        return cm
    def test_fetch_caches_per_portal_url(self):
        from hermes_cli.models import fetch_nous_recommended_models
        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
            a = fetch_nous_recommended_models("https://portal.example.com")
            b = fetch_nous_recommended_models("https://portal.example.com")
        assert a == self._SAMPLE_PAYLOAD
        assert b == self._SAMPLE_PAYLOAD
        assert mock_urlopen.call_count == 1  # second call served from cache
    def test_fetch_cache_is_keyed_per_portal(self):
        from hermes_cli.models import fetch_nous_recommended_models
        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
            fetch_nous_recommended_models("https://portal.example.com")
            fetch_nous_recommended_models("https://portal.staging-nousresearch.com")
        assert mock_urlopen.call_count == 2  # different portals → separate fetches
    def test_fetch_returns_empty_on_network_failure(self):
        from hermes_cli.models import fetch_nous_recommended_models
        with patch("urllib.request.urlopen", side_effect=OSError("boom")):
            result = fetch_nous_recommended_models("https://portal.example.com")
        assert result == {}
    def test_fetch_force_refresh_bypasses_cache(self):
        from hermes_cli.models import fetch_nous_recommended_models
        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
            fetch_nous_recommended_models("https://portal.example.com")
            fetch_nous_recommended_models("https://portal.example.com", force_refresh=True)
        assert mock_urlopen.call_count == 2
    def test_get_aux_model_returns_vision_recommendation(self):
        from hermes_cli.models import get_nous_recommended_aux_model
        with patch(
            "hermes_cli.models.fetch_nous_recommended_models",
            return_value=self._SAMPLE_PAYLOAD,
        ):
            # Free tier → free vision recommendation.
            model = get_nous_recommended_aux_model(vision=True, free_tier=True)
        assert model == "google/gemini-3-flash-preview"
    def test_get_aux_model_returns_compaction_recommendation(self):
        from hermes_cli.models import get_nous_recommended_aux_model
        payload = dict(self._SAMPLE_PAYLOAD)
        payload["freeRecommendedCompactionModel"] = {"modelName": "minimax/minimax-m2.7"}
        with patch(
            "hermes_cli.models.fetch_nous_recommended_models",
            return_value=payload,
        ):
            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
        assert model == "minimax/minimax-m2.7"
    def test_get_aux_model_returns_none_when_field_null(self):
        from hermes_cli.models import get_nous_recommended_aux_model
        payload = dict(self._SAMPLE_PAYLOAD)
        payload["freeRecommendedCompactionModel"] = None
        with patch(
            "hermes_cli.models.fetch_nous_recommended_models",
            return_value=payload,
        ):
            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
        assert model is None
    def test_get_aux_model_returns_none_on_empty_payload(self):
        from hermes_cli.models import get_nous_recommended_aux_model
        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
            assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
            assert get_nous_recommended_aux_model(vision=True, free_tier=False) is None
    def test_get_aux_model_returns_none_when_modelname_blank(self):
        from hermes_cli.models import get_nous_recommended_aux_model
        payload = {"freeRecommendedCompactionModel": {"modelName": "  "}}
        with patch(
            "hermes_cli.models.fetch_nous_recommended_models",
            return_value=payload,
        ):
            assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
    def test_paid_tier_prefers_paid_recommendation(self):
        """Paid-tier users should get the paid model when it's populated."""
        from hermes_cli.models import get_nous_recommended_aux_model
        payload = {
            "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
            "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
            "paidRecommendedVisionModel": {"modelName": "openai/gpt-5.4"},
            "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
        }
        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
            text = get_nous_recommended_aux_model(vision=False, free_tier=False)
            vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
        assert text == "anthropic/claude-opus-4.7"
        assert vision == "openai/gpt-5.4"
    def test_paid_tier_falls_back_to_free_when_paid_is_null(self):
        """If the Portal returns null for the paid field, fall back to free."""
        from hermes_cli.models import get_nous_recommended_aux_model
        payload = {
            "paidRecommendedCompactionModel": None,
            "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
            "paidRecommendedVisionModel": None,
            "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
        }
        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
            text = get_nous_recommended_aux_model(vision=False, free_tier=False)
            vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
        assert text == "google/gemini-3-flash-preview"
        assert vision == "google/gemini-3-flash-preview"
    def test_free_tier_never_uses_paid_recommendation(self):
        """Free-tier users must not get paid-only recommendations."""
        from hermes_cli.models import get_nous_recommended_aux_model
        payload = {
            "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
            "freeRecommendedCompactionModel": None,  # no free recommendation
        }
        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
        # Free tier must return None — never leak the paid model.
        assert model is None
    def test_auto_detects_tier_when_not_supplied(self):
        """Default behaviour: call check_nous_free_tier() to pick the tier."""
        from hermes_cli.models import get_nous_recommended_aux_model
        payload = {
            "paidRecommendedCompactionModel": {"modelName": "paid-model"},
            "freeRecommendedCompactionModel": {"modelName": "free-model"},
        }
        with (
            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
            patch("hermes_cli.models.check_nous_free_tier", return_value=True),
        ):
            assert get_nous_recommended_aux_model(vision=False) == "free-model"
        with (
            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
            patch("hermes_cli.models.check_nous_free_tier", return_value=False),
        ):
            assert get_nous_recommended_aux_model(vision=False) == "paid-model"
    def test_tier_detection_error_defaults_to_paid(self):
        """If tier detection raises, assume paid so we don't downgrade silently."""
        from hermes_cli.models import get_nous_recommended_aux_model
        payload = {
            "paidRecommendedCompactionModel": {"modelName": "paid-model"},
            "freeRecommendedCompactionModel": {"modelName": "free-model"},
        }
        with (
            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
            patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")),
        ):
            assert get_nous_recommended_aux_model(vision=False) == "paid-model"