Use nous portal as model metadata authority (#24502)

* nous portal metadata resolver

* minor fixes
This commit is contained in:
rob-maron 2026-05-12 14:59:31 -04:00 committed by GitHub
parent c594a23047
commit 2863e9484a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 306 additions and 22 deletions

View file

@ -10,7 +10,7 @@ import os
import re
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import urlparse
import requests
@ -1330,21 +1330,40 @@ def _resolve_codex_oauth_context_length(
return None
def _resolve_nous_context_length(model: str) -> Optional[int]:
"""Resolve Nous Portal model context length via OpenRouter metadata.
def _resolve_nous_context_length(
model: str,
base_url: str = "",
api_key: str = "",
) -> Tuple[Optional[int], str]:
"""Resolve Nous Portal model context length.
Nous model IDs are bare (e.g. 'claude-opus-4-6') while OpenRouter uses
prefixed IDs (e.g. 'anthropic/claude-opus-4.6'). Try suffix matching
with version normalization (dotdash).
Tries the live Nous inference endpoint first (authoritative), then falls
back to OpenRouter metadata with suffix/version matching.
Nous model IDs are bare after prefix-stripping (e.g. 'qwen3.6-plus',
'claude-opus-4-6') while OpenRouter uses prefixed IDs (e.g.
'qwen/qwen3.6-plus', 'anthropic/claude-opus-4.6'). Version
normalization (dotdash) is applied to handle name drifts.
Returns ``(context_length, source)`` where ``source`` is one of:
- ``"portal"`` live /v1/models response (authoritative)
- ``"openrouter"`` OpenRouter cache fallback (non-authoritative;
callers must NOT persist this to the on-disk cache or a single
portal blip will freeze the wrong value in forever)
- ``""`` could not resolve
"""
metadata = fetch_model_metadata() # OpenRouter cache
# Portal first — the Nous /models endpoint is authoritative for what our
# infrastructure enforces and may differ from OR (e.g. OR reports 1M for
# qwen3.6-plus; the portal correctly says 262144). Fall back to the OR
# catalog only if the portal doesn't list the model.
if base_url:
portal_ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if portal_ctx is not None:
return portal_ctx, "portal"
metadata = fetch_model_metadata()
def _safe_ctx(or_id: str, entry: dict) -> Optional[int]:
"""Return context length, but reject stale 32k values for Kimi models.
Apply the same guard used for the generic OpenRouter path (step 6 in
resolve_context_length) so the Nous portal path does not short-circuit it.
"""
ctx = entry.get("context_length")
if ctx is None:
return None
@ -1357,19 +1376,20 @@ def _resolve_nous_context_length(model: str) -> Optional[int]:
return None
return ctx
# Exact match first
if model in metadata:
return _safe_ctx(model, metadata[model])
ctx = _safe_ctx(model, metadata[model])
if ctx is not None:
return ctx, "openrouter"
normalized = _normalize_model_version(model).lower()
for or_id, entry in metadata.items():
bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
if bare.lower() == model.lower() or _normalize_model_version(bare).lower() == normalized:
return _safe_ctx(or_id, entry)
ctx = _safe_ctx(or_id, entry)
if ctx is not None:
return ctx, "openrouter"
# Partial prefix match for cases like gemini-3-flash → gemini-3-flash-preview
# Require match to be at a word boundary (followed by -, :, or end of string)
model_lower = model.lower()
for or_id, entry in metadata.items():
bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
@ -1377,9 +1397,11 @@ def _resolve_nous_context_length(model: str) -> Optional[int]:
if candidate.startswith(query) and (
len(candidate) == len(query) or candidate[len(query)] in "-:."
):
return _safe_ctx(or_id, entry)
ctx = _safe_ctx(or_id, entry)
if ctx is not None:
return ctx, "openrouter"
return None
return None, ""
def get_model_context_length(
@ -1394,14 +1416,18 @@ def get_model_context_length(
Resolution order:
0. Explicit config override (model.context_length or custom_providers per-model)
1. Persistent cache (previously discovered via probing)
1. Persistent cache (previously discovered via probing). Nous URLs
bypass the cache here so step 5b can always reconcile against
the authoritative portal /v1/models response.
1b. AWS Bedrock static table (must precede custom-endpoint probe)
2. Active endpoint metadata (/models for explicit custom endpoints)
3. Local server query (for local endpoints)
4. Anthropic /v1/models API (API-key users only, not OAuth)
5. Provider-aware lookups (before generic OpenRouter cache):
a. Copilot live /models API
b. Nous suffix-match via OpenRouter cache
b. Nous: live /v1/models probe first (authoritative), then OR
cache fallback with suffix/version normalisation. Only
portal-derived values are persisted to disk.
c. Codex OAuth /models probe
d. GMI /models endpoint
e. Ollama native /api/show probe (any base_url, provider-agnostic)
@ -1464,6 +1490,20 @@ def get_model_context_length(
model, base_url, f"{cached:,}",
)
_invalidate_cached_context_length(model, base_url)
# Nous Portal: the portal /v1/models endpoint is authoritative.
# Bypass the persistent cache so step 5b can always reconcile
# against it — this corrects pre-fix entries seeded from the
# OR catalog (the same OR underreport class that the Kimi/Qwen
# DEFAULT_CONTEXT_LENGTHS overrides exist to mitigate) without
# touching the on-disk file when the portal is unreachable.
# The in-memory 300s endpoint metadata cache makes the per-call
# cost amortise to ~0 within a process.
elif _infer_provider_from_url(base_url) == "nous":
logger.debug(
"Bypassing persistent cache for %s@%s (Nous portal authoritative)",
model, base_url,
)
# Fall through; step 5b reconciles and overwrites if portal responds.
else:
return cached
@ -1555,8 +1595,18 @@ def get_model_context_length(
pass # Fall through to models.dev
if effective_provider == "nous":
ctx = _resolve_nous_context_length(model)
ctx, source = _resolve_nous_context_length(
model, base_url=base_url or "", api_key=api_key or ""
)
if ctx:
# Persist ONLY portal-derived values. Caching an OR-fallback
# value here would freeze in a wrong number on the first portal
# blip / auth glitch and step-1 would short-circuit it forever.
# OR's catalog is community-maintained and is precisely why the
# Kimi/Qwen DEFAULT_CONTEXT_LENGTHS overrides exist — we don't
# want it leaking into the persistent cache for Nous URLs.
if base_url and source == "portal":
save_context_length(model, base_url, ctx)
return ctx
if effective_provider == "openai-codex":
# Codex OAuth enforces lower context limits than the direct OpenAI

View file

@ -473,6 +473,240 @@ class TestCodexOAuthContextLength:
assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected"
# =========================================================================
# Nous Portal context-window resolution (provider="nous")
# =========================================================================
class TestNousPortalContextResolution:
"""Nous Portal /v1/models is authoritative for what Nous infra enforces
and may diverge from the OpenRouter catalog.
Invariants this class pins down:
1. Portal value wins over the OR fallback.
2. Portal-derived values are persisted to disk.
3. OR-fallback values are NEVER persisted otherwise a single portal
blip would freeze the wrong value in via step-1 cache short-circuit.
4. Pre-fix persistent-cache entries (seeded from the OR catalog) are
bypassed at step 1 and overwritten once the portal responds.
5. Pre-fix persistent-cache entries SURVIVE on disk when the portal
is unreachable no opportunistic invalidation that loses the only
value we have.
"""
def setup_method(self):
import agent.model_metadata as mm
mm._endpoint_model_metadata_cache.clear()
mm._endpoint_model_metadata_cache_time.clear()
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_portal_value_wins_over_openrouter_catalog(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""The motivating case: OR catalog says 1M for qwen3.6-plus, but
the Nous portal correctly enforces 262144. Portal must win."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
}
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url="https://inference-api.nousresearch.com/v1",
api_key="fake-token",
provider="nous",
)
assert ctx == 262_144, (
f"Portal must override OR catalog; got {ctx} (OR leak?)"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_portal_value_is_persisted_to_disk(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""Portal-derived value should land in the persistent cache so
cross-process callers (e.g. child agents) see the same value."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {}
base_url = "https://inference-api.nousresearch.com/v1"
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
assert ctx == 262_144
persisted = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
assert persisted.get(f"qwen3.6-plus@{base_url}") == 262_144, (
"Portal-derived value should be persisted to disk"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_openrouter_fallback_is_not_persisted(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""When the portal can't resolve a model (network blip, auth glitch,
model not yet listed) we fall back to the OR catalog so the agent
keeps working but we must NOT write the OR value to disk. Once
cached on disk, step-1 short-circuits forever and the user is stuck
with the wrong number until they manually clear the cache."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
mock_portal.return_value = {} # portal unreachable / model unknown
mock_or.return_value = {
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
}
base_url = "https://inference-api.nousresearch.com/v1"
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
assert ctx == 1_000_000, "OR fallback should still serve the request"
assert not cache_file.exists() or not yaml.safe_load(
cache_file.read_text()
).get("context_lengths", {}), (
"OR-fallback values must NOT be persisted — a single portal blip "
"would otherwise freeze the wrong value in via step-1 cache hit"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_stale_cache_is_bypassed_and_overwritten_by_portal(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""Users upgrading from pre-fix builds have ``qwen3.6-plus@…nous… =
1000000`` (OR-derived) sitting in their cache file. Step 1 must
NOT short-circuit on that entry step 5b reconciles against the
portal and overwrites the persistent value with 262144."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
base_url = "https://inference-api.nousresearch.com/v1"
stale_key = f"qwen3.6-plus@{base_url}"
other_key = "other-model@https://api.openai.com/v1"
cache_file.write_text(yaml.dump({"context_lengths": {
stale_key: 1_000_000, # pre-fix OR-derived value
other_key: 128_000, # unrelated, must survive
}}))
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {}
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
assert ctx == 262_144, (
f"Stale OR-derived cache entry should not have leaked through; got {ctx}"
)
remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
assert remaining.get(stale_key) == 262_144, (
"Portal value should have overwritten the stale entry on disk"
)
assert remaining.get(other_key) == 128_000, (
"Unrelated cache entries must not be touched"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_stale_cache_survives_when_portal_unreachable(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""When the portal is unreachable AND we have a (potentially stale)
on-disk cache entry, the entry must survive untouched we don't
want a transient outage to delete the only value we have. The
request itself still gets served via OR fallback for this call."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
base_url = "https://inference-api.nousresearch.com/v1"
existing_key = f"qwen3.6-plus@{base_url}"
cache_file.write_text(yaml.dump({"context_lengths": {
existing_key: 1_000_000,
}}))
mock_portal.return_value = {} # portal unreachable
mock_or.return_value = {
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
}
mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
assert remaining.get(existing_key) == 1_000_000, (
"Persistent cache entry must survive a transient portal outage"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_bypass_keyed_on_url_not_provider_string(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""Some call sites pass ``provider=""`` or ``provider="openrouter"``
when the user is really on Nous Portal (e.g. cred-pool fallback).
The Nous-URL bypass must trigger off the URL host, not the provider
string, so the portal-first resolver still runs in that case."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
base_url = "https://inference-api.nousresearch.com/v1"
cache_file.write_text(yaml.dump({"context_lengths": {
f"qwen3.6-plus@{base_url}": 1_000_000, # stale
}}))
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {}
for provider_arg in ("", "openrouter", "custom"):
mm._endpoint_model_metadata_cache.clear()
mm._endpoint_model_metadata_cache_time.clear()
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider=provider_arg,
)
assert ctx == 262_144, (
f"URL-based Nous detection must fire for provider={provider_arg!r}; "
f"got {ctx}"
)
# =========================================================================
# get_model_context_length — resolution order
# =========================================================================