Use nous portal as model metadata authority (#24502)

* nous portal metadata resolver * minor fixes
2026-05-24 05:41:40 +00:00 · 2026-05-12 14:59:31 -04:00 · 2026-05-12 14:59:31 -04:00 · 2863e9484a
commit 2863e9484a
parent c594a23047
2 changed files with 306 additions and 22 deletions
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@ -473,6 +473,240 @@ class TestCodexOAuthContextLength:
        assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected"


+# =========================================================================
+# Nous Portal context-window resolution (provider="nous")
+# =========================================================================
+
+class TestNousPortalContextResolution:
+    """Nous Portal /v1/models is authoritative for what Nous infra enforces
+    and may diverge from the OpenRouter catalog.
+
+    Invariants this class pins down:
+      1. Portal value wins over the OR fallback.
+      2. Portal-derived values are persisted to disk.
+      3. OR-fallback values are NEVER persisted — otherwise a single portal
+         blip would freeze the wrong value in via step-1 cache short-circuit.
+      4. Pre-fix persistent-cache entries (seeded from the OR catalog) are
+         bypassed at step 1 and overwritten once the portal responds.
+      5. Pre-fix persistent-cache entries SURVIVE on disk when the portal
+         is unreachable — no opportunistic invalidation that loses the only
+         value we have.
+    """
+
+    def setup_method(self):
+        import agent.model_metadata as mm
+        mm._endpoint_model_metadata_cache.clear()
+        mm._endpoint_model_metadata_cache_time.clear()
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_portal_value_wins_over_openrouter_catalog(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """The motivating case: OR catalog says 1M for qwen3.6-plus, but
+        the Nous portal correctly enforces 262144.  Portal must win."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        mock_portal.return_value = {
+            "qwen3.6-plus": {"context_length": 262_144},
+        }
+        mock_or.return_value = {
+            "qwen/qwen3.6-plus": {"context_length": 1_000_000},
+        }
+
+        ctx = mm.get_model_context_length(
+            model="qwen3.6-plus",
+            base_url="https://inference-api.nousresearch.com/v1",
+            api_key="fake-token",
+            provider="nous",
+        )
+        assert ctx == 262_144, (
+            f"Portal must override OR catalog; got {ctx} (OR leak?)"
+        )
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_portal_value_is_persisted_to_disk(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """Portal-derived value should land in the persistent cache so
+        cross-process callers (e.g. child agents) see the same value."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        mock_portal.return_value = {
+            "qwen3.6-plus": {"context_length": 262_144},
+        }
+        mock_or.return_value = {}
+
+        base_url = "https://inference-api.nousresearch.com/v1"
+        ctx = mm.get_model_context_length(
+            model="qwen3.6-plus",
+            base_url=base_url,
+            api_key="fake",
+            provider="nous",
+        )
+        assert ctx == 262_144
+        persisted = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
+        assert persisted.get(f"qwen3.6-plus@{base_url}") == 262_144, (
+            "Portal-derived value should be persisted to disk"
+        )
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_openrouter_fallback_is_not_persisted(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """When the portal can't resolve a model (network blip, auth glitch,
+        model not yet listed) we fall back to the OR catalog so the agent
+        keeps working — but we must NOT write the OR value to disk.  Once
+        cached on disk, step-1 short-circuits forever and the user is stuck
+        with the wrong number until they manually clear the cache."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        mock_portal.return_value = {}  # portal unreachable / model unknown
+        mock_or.return_value = {
+            "qwen/qwen3.6-plus": {"context_length": 1_000_000},
+        }
+
+        base_url = "https://inference-api.nousresearch.com/v1"
+        ctx = mm.get_model_context_length(
+            model="qwen3.6-plus",
+            base_url=base_url,
+            api_key="fake",
+            provider="nous",
+        )
+        assert ctx == 1_000_000, "OR fallback should still serve the request"
+        assert not cache_file.exists() or not yaml.safe_load(
+            cache_file.read_text()
+        ).get("context_lengths", {}), (
+            "OR-fallback values must NOT be persisted — a single portal blip "
+            "would otherwise freeze the wrong value in via step-1 cache hit"
+        )
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_stale_cache_is_bypassed_and_overwritten_by_portal(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """Users upgrading from pre-fix builds have ``qwen3.6-plus@…nous… =
+        1000000`` (OR-derived) sitting in their cache file.  Step 1 must
+        NOT short-circuit on that entry — step 5b reconciles against the
+        portal and overwrites the persistent value with 262144."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        base_url = "https://inference-api.nousresearch.com/v1"
+        stale_key = f"qwen3.6-plus@{base_url}"
+        other_key = "other-model@https://api.openai.com/v1"
+        cache_file.write_text(yaml.dump({"context_lengths": {
+            stale_key: 1_000_000,     # pre-fix OR-derived value
+            other_key: 128_000,       # unrelated, must survive
+        }}))
+
+        mock_portal.return_value = {
+            "qwen3.6-plus": {"context_length": 262_144},
+        }
+        mock_or.return_value = {}
+
+        ctx = mm.get_model_context_length(
+            model="qwen3.6-plus",
+            base_url=base_url,
+            api_key="fake",
+            provider="nous",
+        )
+        assert ctx == 262_144, (
+            f"Stale OR-derived cache entry should not have leaked through; got {ctx}"
+        )
+
+        remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
+        assert remaining.get(stale_key) == 262_144, (
+            "Portal value should have overwritten the stale entry on disk"
+        )
+        assert remaining.get(other_key) == 128_000, (
+            "Unrelated cache entries must not be touched"
+        )
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_stale_cache_survives_when_portal_unreachable(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """When the portal is unreachable AND we have a (potentially stale)
+        on-disk cache entry, the entry must survive untouched — we don't
+        want a transient outage to delete the only value we have.  The
+        request itself still gets served via OR fallback for this call."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        base_url = "https://inference-api.nousresearch.com/v1"
+        existing_key = f"qwen3.6-plus@{base_url}"
+        cache_file.write_text(yaml.dump({"context_lengths": {
+            existing_key: 1_000_000,
+        }}))
+
+        mock_portal.return_value = {}  # portal unreachable
+        mock_or.return_value = {
+            "qwen/qwen3.6-plus": {"context_length": 1_000_000},
+        }
+
+        mm.get_model_context_length(
+            model="qwen3.6-plus",
+            base_url=base_url,
+            api_key="fake",
+            provider="nous",
+        )
+
+        remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
+        assert remaining.get(existing_key) == 1_000_000, (
+            "Persistent cache entry must survive a transient portal outage"
+        )
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_bypass_keyed_on_url_not_provider_string(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """Some call sites pass ``provider=""`` or ``provider="openrouter"``
+        when the user is really on Nous Portal (e.g. cred-pool fallback).
+        The Nous-URL bypass must trigger off the URL host, not the provider
+        string, so the portal-first resolver still runs in that case."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        base_url = "https://inference-api.nousresearch.com/v1"
+        cache_file.write_text(yaml.dump({"context_lengths": {
+            f"qwen3.6-plus@{base_url}": 1_000_000,  # stale
+        }}))
+
+        mock_portal.return_value = {
+            "qwen3.6-plus": {"context_length": 262_144},
+        }
+        mock_or.return_value = {}
+
+        for provider_arg in ("", "openrouter", "custom"):
+            mm._endpoint_model_metadata_cache.clear()
+            mm._endpoint_model_metadata_cache_time.clear()
+            ctx = mm.get_model_context_length(
+                model="qwen3.6-plus",
+                base_url=base_url,
+                api_key="fake",
+                provider=provider_arg,
+            )
+            assert ctx == 262_144, (
+                f"URL-based Nous detection must fire for provider={provider_arg!r}; "
+                f"got {ctx}"
+            )
+
+
 # =========================================================================
 # get_model_context_length — resolution order
 # =========================================================================