Use nous portal as model metadata authority (#24502)

* nous portal metadata resolver

* minor fixes
This commit is contained in:
rob-maron 2026-05-12 14:59:31 -04:00 committed by GitHub
parent c594a23047
commit 2863e9484a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 306 additions and 22 deletions

View file

@ -473,6 +473,240 @@ class TestCodexOAuthContextLength:
assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected"
# =========================================================================
# Nous Portal context-window resolution (provider="nous")
# =========================================================================
class TestNousPortalContextResolution:
"""Nous Portal /v1/models is authoritative for what Nous infra enforces
and may diverge from the OpenRouter catalog.
Invariants this class pins down:
1. Portal value wins over the OR fallback.
2. Portal-derived values are persisted to disk.
3. OR-fallback values are NEVER persisted otherwise a single portal
blip would freeze the wrong value in via step-1 cache short-circuit.
4. Pre-fix persistent-cache entries (seeded from the OR catalog) are
bypassed at step 1 and overwritten once the portal responds.
5. Pre-fix persistent-cache entries SURVIVE on disk when the portal
is unreachable no opportunistic invalidation that loses the only
value we have.
"""
def setup_method(self):
import agent.model_metadata as mm
mm._endpoint_model_metadata_cache.clear()
mm._endpoint_model_metadata_cache_time.clear()
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_portal_value_wins_over_openrouter_catalog(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""The motivating case: OR catalog says 1M for qwen3.6-plus, but
the Nous portal correctly enforces 262144. Portal must win."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
}
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url="https://inference-api.nousresearch.com/v1",
api_key="fake-token",
provider="nous",
)
assert ctx == 262_144, (
f"Portal must override OR catalog; got {ctx} (OR leak?)"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_portal_value_is_persisted_to_disk(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""Portal-derived value should land in the persistent cache so
cross-process callers (e.g. child agents) see the same value."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {}
base_url = "https://inference-api.nousresearch.com/v1"
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
assert ctx == 262_144
persisted = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
assert persisted.get(f"qwen3.6-plus@{base_url}") == 262_144, (
"Portal-derived value should be persisted to disk"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_openrouter_fallback_is_not_persisted(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""When the portal can't resolve a model (network blip, auth glitch,
model not yet listed) we fall back to the OR catalog so the agent
keeps working but we must NOT write the OR value to disk. Once
cached on disk, step-1 short-circuits forever and the user is stuck
with the wrong number until they manually clear the cache."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
mock_portal.return_value = {} # portal unreachable / model unknown
mock_or.return_value = {
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
}
base_url = "https://inference-api.nousresearch.com/v1"
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
assert ctx == 1_000_000, "OR fallback should still serve the request"
assert not cache_file.exists() or not yaml.safe_load(
cache_file.read_text()
).get("context_lengths", {}), (
"OR-fallback values must NOT be persisted — a single portal blip "
"would otherwise freeze the wrong value in via step-1 cache hit"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_stale_cache_is_bypassed_and_overwritten_by_portal(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""Users upgrading from pre-fix builds have ``qwen3.6-plus@…nous… =
1000000`` (OR-derived) sitting in their cache file. Step 1 must
NOT short-circuit on that entry step 5b reconciles against the
portal and overwrites the persistent value with 262144."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
base_url = "https://inference-api.nousresearch.com/v1"
stale_key = f"qwen3.6-plus@{base_url}"
other_key = "other-model@https://api.openai.com/v1"
cache_file.write_text(yaml.dump({"context_lengths": {
stale_key: 1_000_000, # pre-fix OR-derived value
other_key: 128_000, # unrelated, must survive
}}))
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {}
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
assert ctx == 262_144, (
f"Stale OR-derived cache entry should not have leaked through; got {ctx}"
)
remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
assert remaining.get(stale_key) == 262_144, (
"Portal value should have overwritten the stale entry on disk"
)
assert remaining.get(other_key) == 128_000, (
"Unrelated cache entries must not be touched"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_stale_cache_survives_when_portal_unreachable(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""When the portal is unreachable AND we have a (potentially stale)
on-disk cache entry, the entry must survive untouched we don't
want a transient outage to delete the only value we have. The
request itself still gets served via OR fallback for this call."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
base_url = "https://inference-api.nousresearch.com/v1"
existing_key = f"qwen3.6-plus@{base_url}"
cache_file.write_text(yaml.dump({"context_lengths": {
existing_key: 1_000_000,
}}))
mock_portal.return_value = {} # portal unreachable
mock_or.return_value = {
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
}
mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
assert remaining.get(existing_key) == 1_000_000, (
"Persistent cache entry must survive a transient portal outage"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_bypass_keyed_on_url_not_provider_string(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""Some call sites pass ``provider=""`` or ``provider="openrouter"``
when the user is really on Nous Portal (e.g. cred-pool fallback).
The Nous-URL bypass must trigger off the URL host, not the provider
string, so the portal-first resolver still runs in that case."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
base_url = "https://inference-api.nousresearch.com/v1"
cache_file.write_text(yaml.dump({"context_lengths": {
f"qwen3.6-plus@{base_url}": 1_000_000, # stale
}}))
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {}
for provider_arg in ("", "openrouter", "custom"):
mm._endpoint_model_metadata_cache.clear()
mm._endpoint_model_metadata_cache_time.clear()
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider=provider_arg,
)
assert ctx == 262_144, (
f"URL-based Nous detection must fire for provider={provider_arg!r}; "
f"got {ctx}"
)
# =========================================================================
# get_model_context_length — resolution order
# =========================================================================