From 2863e9484a1841d0a17044383c9a32482c01b20e Mon Sep 17 00:00:00 2001 From: rob-maron <132852777+rob-maron@users.noreply.github.com> Date: Tue, 12 May 2026 14:59:31 -0400 Subject: [PATCH] Use nous portal as model metadata authority (#24502) * nous portal metadata resolver * minor fixes --- agent/model_metadata.py | 94 +++++++++--- tests/agent/test_model_metadata.py | 234 +++++++++++++++++++++++++++++ 2 files changed, 306 insertions(+), 22 deletions(-) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 100c33a136c..f5e34fc18c6 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -10,7 +10,7 @@ import os import re import time from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple from urllib.parse import urlparse import requests @@ -1330,21 +1330,40 @@ def _resolve_codex_oauth_context_length( return None -def _resolve_nous_context_length(model: str) -> Optional[int]: - """Resolve Nous Portal model context length via OpenRouter metadata. +def _resolve_nous_context_length( + model: str, + base_url: str = "", + api_key: str = "", +) -> Tuple[Optional[int], str]: + """Resolve Nous Portal model context length. - Nous model IDs are bare (e.g. 'claude-opus-4-6') while OpenRouter uses - prefixed IDs (e.g. 'anthropic/claude-opus-4.6'). Try suffix matching - with version normalization (dot↔dash). + Tries the live Nous inference endpoint first (authoritative), then falls + back to OpenRouter metadata with suffix/version matching. + + Nous model IDs are bare after prefix-stripping (e.g. 'qwen3.6-plus', + 'claude-opus-4-6') while OpenRouter uses prefixed IDs (e.g. + 'qwen/qwen3.6-plus', 'anthropic/claude-opus-4.6'). Version + normalization (dot↔dash) is applied to handle name drifts. + + Returns ``(context_length, source)`` where ``source`` is one of: + - ``"portal"`` — live /v1/models response (authoritative) + - ``"openrouter"`` — OpenRouter cache fallback (non-authoritative; + callers must NOT persist this to the on-disk cache or a single + portal blip will freeze the wrong value in forever) + - ``""`` — could not resolve """ - metadata = fetch_model_metadata() # OpenRouter cache + # Portal first — the Nous /models endpoint is authoritative for what our + # infrastructure enforces and may differ from OR (e.g. OR reports 1M for + # qwen3.6-plus; the portal correctly says 262144). Fall back to the OR + # catalog only if the portal doesn't list the model. + if base_url: + portal_ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key) + if portal_ctx is not None: + return portal_ctx, "portal" + + metadata = fetch_model_metadata() def _safe_ctx(or_id: str, entry: dict) -> Optional[int]: - """Return context length, but reject stale 32k values for Kimi models. - - Apply the same guard used for the generic OpenRouter path (step 6 in - resolve_context_length) so the Nous portal path does not short-circuit it. - """ ctx = entry.get("context_length") if ctx is None: return None @@ -1357,19 +1376,20 @@ def _resolve_nous_context_length(model: str) -> Optional[int]: return None return ctx - # Exact match first if model in metadata: - return _safe_ctx(model, metadata[model]) + ctx = _safe_ctx(model, metadata[model]) + if ctx is not None: + return ctx, "openrouter" normalized = _normalize_model_version(model).lower() for or_id, entry in metadata.items(): bare = or_id.split("/", 1)[1] if "/" in or_id else or_id if bare.lower() == model.lower() or _normalize_model_version(bare).lower() == normalized: - return _safe_ctx(or_id, entry) + ctx = _safe_ctx(or_id, entry) + if ctx is not None: + return ctx, "openrouter" - # Partial prefix match for cases like gemini-3-flash → gemini-3-flash-preview - # Require match to be at a word boundary (followed by -, :, or end of string) model_lower = model.lower() for or_id, entry in metadata.items(): bare = or_id.split("/", 1)[1] if "/" in or_id else or_id @@ -1377,9 +1397,11 @@ def _resolve_nous_context_length(model: str) -> Optional[int]: if candidate.startswith(query) and ( len(candidate) == len(query) or candidate[len(query)] in "-:." ): - return _safe_ctx(or_id, entry) + ctx = _safe_ctx(or_id, entry) + if ctx is not None: + return ctx, "openrouter" - return None + return None, "" def get_model_context_length( @@ -1394,14 +1416,18 @@ def get_model_context_length( Resolution order: 0. Explicit config override (model.context_length or custom_providers per-model) - 1. Persistent cache (previously discovered via probing) + 1. Persistent cache (previously discovered via probing). Nous URLs + bypass the cache here so step 5b can always reconcile against + the authoritative portal /v1/models response. 1b. AWS Bedrock static table (must precede custom-endpoint probe) 2. Active endpoint metadata (/models for explicit custom endpoints) 3. Local server query (for local endpoints) 4. Anthropic /v1/models API (API-key users only, not OAuth) 5. Provider-aware lookups (before generic OpenRouter cache): a. Copilot live /models API - b. Nous suffix-match via OpenRouter cache + b. Nous: live /v1/models probe first (authoritative), then OR + cache fallback with suffix/version normalisation. Only + portal-derived values are persisted to disk. c. Codex OAuth /models probe d. GMI /models endpoint e. Ollama native /api/show probe (any base_url, provider-agnostic) @@ -1464,6 +1490,20 @@ def get_model_context_length( model, base_url, f"{cached:,}", ) _invalidate_cached_context_length(model, base_url) + # Nous Portal: the portal /v1/models endpoint is authoritative. + # Bypass the persistent cache so step 5b can always reconcile + # against it — this corrects pre-fix entries seeded from the + # OR catalog (the same OR underreport class that the Kimi/Qwen + # DEFAULT_CONTEXT_LENGTHS overrides exist to mitigate) without + # touching the on-disk file when the portal is unreachable. + # The in-memory 300s endpoint metadata cache makes the per-call + # cost amortise to ~0 within a process. + elif _infer_provider_from_url(base_url) == "nous": + logger.debug( + "Bypassing persistent cache for %s@%s (Nous portal authoritative)", + model, base_url, + ) + # Fall through; step 5b reconciles and overwrites if portal responds. else: return cached @@ -1555,8 +1595,18 @@ def get_model_context_length( pass # Fall through to models.dev if effective_provider == "nous": - ctx = _resolve_nous_context_length(model) + ctx, source = _resolve_nous_context_length( + model, base_url=base_url or "", api_key=api_key or "" + ) if ctx: + # Persist ONLY portal-derived values. Caching an OR-fallback + # value here would freeze in a wrong number on the first portal + # blip / auth glitch and step-1 would short-circuit it forever. + # OR's catalog is community-maintained and is precisely why the + # Kimi/Qwen DEFAULT_CONTEXT_LENGTHS overrides exist — we don't + # want it leaking into the persistent cache for Nous URLs. + if base_url and source == "portal": + save_context_length(model, base_url, ctx) return ctx if effective_provider == "openai-codex": # Codex OAuth enforces lower context limits than the direct OpenAI diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 63422ab5306..7686364dcac 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -473,6 +473,240 @@ class TestCodexOAuthContextLength: assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected" +# ========================================================================= +# Nous Portal context-window resolution (provider="nous") +# ========================================================================= + +class TestNousPortalContextResolution: + """Nous Portal /v1/models is authoritative for what Nous infra enforces + and may diverge from the OpenRouter catalog. + + Invariants this class pins down: + 1. Portal value wins over the OR fallback. + 2. Portal-derived values are persisted to disk. + 3. OR-fallback values are NEVER persisted — otherwise a single portal + blip would freeze the wrong value in via step-1 cache short-circuit. + 4. Pre-fix persistent-cache entries (seeded from the OR catalog) are + bypassed at step 1 and overwritten once the portal responds. + 5. Pre-fix persistent-cache entries SURVIVE on disk when the portal + is unreachable — no opportunistic invalidation that loses the only + value we have. + """ + + def setup_method(self): + import agent.model_metadata as mm + mm._endpoint_model_metadata_cache.clear() + mm._endpoint_model_metadata_cache_time.clear() + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + @patch("agent.model_metadata.fetch_model_metadata") + def test_portal_value_wins_over_openrouter_catalog( + self, mock_or, mock_portal, tmp_path, monkeypatch + ): + """The motivating case: OR catalog says 1M for qwen3.6-plus, but + the Nous portal correctly enforces 262144. Portal must win.""" + import agent.model_metadata as mm + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + mock_portal.return_value = { + "qwen3.6-plus": {"context_length": 262_144}, + } + mock_or.return_value = { + "qwen/qwen3.6-plus": {"context_length": 1_000_000}, + } + + ctx = mm.get_model_context_length( + model="qwen3.6-plus", + base_url="https://inference-api.nousresearch.com/v1", + api_key="fake-token", + provider="nous", + ) + assert ctx == 262_144, ( + f"Portal must override OR catalog; got {ctx} (OR leak?)" + ) + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + @patch("agent.model_metadata.fetch_model_metadata") + def test_portal_value_is_persisted_to_disk( + self, mock_or, mock_portal, tmp_path, monkeypatch + ): + """Portal-derived value should land in the persistent cache so + cross-process callers (e.g. child agents) see the same value.""" + import agent.model_metadata as mm + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + mock_portal.return_value = { + "qwen3.6-plus": {"context_length": 262_144}, + } + mock_or.return_value = {} + + base_url = "https://inference-api.nousresearch.com/v1" + ctx = mm.get_model_context_length( + model="qwen3.6-plus", + base_url=base_url, + api_key="fake", + provider="nous", + ) + assert ctx == 262_144 + persisted = yaml.safe_load(cache_file.read_text()).get("context_lengths", {}) + assert persisted.get(f"qwen3.6-plus@{base_url}") == 262_144, ( + "Portal-derived value should be persisted to disk" + ) + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + @patch("agent.model_metadata.fetch_model_metadata") + def test_openrouter_fallback_is_not_persisted( + self, mock_or, mock_portal, tmp_path, monkeypatch + ): + """When the portal can't resolve a model (network blip, auth glitch, + model not yet listed) we fall back to the OR catalog so the agent + keeps working — but we must NOT write the OR value to disk. Once + cached on disk, step-1 short-circuits forever and the user is stuck + with the wrong number until they manually clear the cache.""" + import agent.model_metadata as mm + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + mock_portal.return_value = {} # portal unreachable / model unknown + mock_or.return_value = { + "qwen/qwen3.6-plus": {"context_length": 1_000_000}, + } + + base_url = "https://inference-api.nousresearch.com/v1" + ctx = mm.get_model_context_length( + model="qwen3.6-plus", + base_url=base_url, + api_key="fake", + provider="nous", + ) + assert ctx == 1_000_000, "OR fallback should still serve the request" + assert not cache_file.exists() or not yaml.safe_load( + cache_file.read_text() + ).get("context_lengths", {}), ( + "OR-fallback values must NOT be persisted — a single portal blip " + "would otherwise freeze the wrong value in via step-1 cache hit" + ) + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + @patch("agent.model_metadata.fetch_model_metadata") + def test_stale_cache_is_bypassed_and_overwritten_by_portal( + self, mock_or, mock_portal, tmp_path, monkeypatch + ): + """Users upgrading from pre-fix builds have ``qwen3.6-plus@…nous… = + 1000000`` (OR-derived) sitting in their cache file. Step 1 must + NOT short-circuit on that entry — step 5b reconciles against the + portal and overwrites the persistent value with 262144.""" + import agent.model_metadata as mm + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + base_url = "https://inference-api.nousresearch.com/v1" + stale_key = f"qwen3.6-plus@{base_url}" + other_key = "other-model@https://api.openai.com/v1" + cache_file.write_text(yaml.dump({"context_lengths": { + stale_key: 1_000_000, # pre-fix OR-derived value + other_key: 128_000, # unrelated, must survive + }})) + + mock_portal.return_value = { + "qwen3.6-plus": {"context_length": 262_144}, + } + mock_or.return_value = {} + + ctx = mm.get_model_context_length( + model="qwen3.6-plus", + base_url=base_url, + api_key="fake", + provider="nous", + ) + assert ctx == 262_144, ( + f"Stale OR-derived cache entry should not have leaked through; got {ctx}" + ) + + remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {}) + assert remaining.get(stale_key) == 262_144, ( + "Portal value should have overwritten the stale entry on disk" + ) + assert remaining.get(other_key) == 128_000, ( + "Unrelated cache entries must not be touched" + ) + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + @patch("agent.model_metadata.fetch_model_metadata") + def test_stale_cache_survives_when_portal_unreachable( + self, mock_or, mock_portal, tmp_path, monkeypatch + ): + """When the portal is unreachable AND we have a (potentially stale) + on-disk cache entry, the entry must survive untouched — we don't + want a transient outage to delete the only value we have. The + request itself still gets served via OR fallback for this call.""" + import agent.model_metadata as mm + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + base_url = "https://inference-api.nousresearch.com/v1" + existing_key = f"qwen3.6-plus@{base_url}" + cache_file.write_text(yaml.dump({"context_lengths": { + existing_key: 1_000_000, + }})) + + mock_portal.return_value = {} # portal unreachable + mock_or.return_value = { + "qwen/qwen3.6-plus": {"context_length": 1_000_000}, + } + + mm.get_model_context_length( + model="qwen3.6-plus", + base_url=base_url, + api_key="fake", + provider="nous", + ) + + remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {}) + assert remaining.get(existing_key) == 1_000_000, ( + "Persistent cache entry must survive a transient portal outage" + ) + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + @patch("agent.model_metadata.fetch_model_metadata") + def test_bypass_keyed_on_url_not_provider_string( + self, mock_or, mock_portal, tmp_path, monkeypatch + ): + """Some call sites pass ``provider=""`` or ``provider="openrouter"`` + when the user is really on Nous Portal (e.g. cred-pool fallback). + The Nous-URL bypass must trigger off the URL host, not the provider + string, so the portal-first resolver still runs in that case.""" + import agent.model_metadata as mm + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + base_url = "https://inference-api.nousresearch.com/v1" + cache_file.write_text(yaml.dump({"context_lengths": { + f"qwen3.6-plus@{base_url}": 1_000_000, # stale + }})) + + mock_portal.return_value = { + "qwen3.6-plus": {"context_length": 262_144}, + } + mock_or.return_value = {} + + for provider_arg in ("", "openrouter", "custom"): + mm._endpoint_model_metadata_cache.clear() + mm._endpoint_model_metadata_cache_time.clear() + ctx = mm.get_model_context_length( + model="qwen3.6-plus", + base_url=base_url, + api_key="fake", + provider=provider_arg, + ) + assert ctx == 262_144, ( + f"URL-based Nous detection must fire for provider={provider_arg!r}; " + f"got {ctx}" + ) + + # ========================================================================= # get_model_context_length — resolution order # =========================================================================