fix(models): read OpenRouter live context_length before hardcoded catch-all (#42986)

OpenRouter-routed slugs that are absent from models.dev (e.g. a freshly
shipped anthropic/claude-fable-5) fell through to the generic
DEFAULT_CONTEXT_LENGTHS["claude"]=200K entry and under-reported their real
1M window. The step-6 OpenRouter live-metadata fallback was gated on
`not effective_provider`, but an OpenRouter selection sets
effective_provider="openrouter" (inferred from the base URL), so that
branch was dead code for every OR model.

Add a dedicated step-5 OpenRouter branch that consults the live /models
catalog (authoritative, refreshes as new slugs ship) before models.dev and
the hardcoded family defaults — mirroring the existing Nous/Copilot/GMI
branches. Keeps the Kimi-family 32k underreport guard. Per-model values are
respected (claude-haiku-4.5 stays 200K), so it does not blanket-bump to 1M.

Regression tests cover the fable-5 case, the genuinely-200k case, and the
Kimi guard.
This commit is contained in:
Teknium 2026-06-09 10:49:32 -07:00 committed by GitHub
parent f6f573ebaa
commit 967c325da8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 75 additions and 0 deletions

View file

@ -1810,6 +1810,28 @@ def get_model_context_length(
if ctx is not None:
save_context_length(model, base_url, ctx)
return ctx
# 5f. OpenRouter live /models metadata — authoritative for OpenRouter-routed
# models. OpenRouter's catalog carries per-model context_length (e.g.
# anthropic/claude-fable-5 -> 1M) and refreshes as new slugs ship, so it
# must win over both models.dev (step 5g) and the hardcoded family catch-all
# (step 8). Before this branch, an OpenRouter selection set
# effective_provider="openrouter", which (a) made the models.dev lookup miss
# brand-new slugs and (b) skipped the step-6 OR fallback (gated on `not
# effective_provider`), so a fresh slug like claude-fable-5 fell through to
# the generic "claude": 200K entry and under-reported a 1M window. Mirrors
# the dedicated Nous/Copilot/GMI branches above.
if effective_provider == "openrouter":
metadata = fetch_model_metadata()
entry = metadata.get(model)
if entry:
or_ctx = entry.get("context_length")
# Guard against the known OpenRouter Kimi-family 32k underreport
# (same class the hardcoded overrides exist to mitigate).
if isinstance(or_ctx, int) and or_ctx > 0 and not (
or_ctx == 32768 and _model_name_suggests_kimi(model)
):
return or_ctx
if effective_provider:
from agent.models_dev import lookup_models_dev_context
ctx = lookup_models_dev_context(effective_provider, model)

View file

@ -220,6 +220,59 @@ class TestDefaultContextLengths:
f"{model_id}: expected {expected_ctx}, got {actual}"
)
def test_openrouter_live_metadata_beats_hardcoded_catchall(self):
"""OpenRouter-routed slugs resolve via the live OR catalog before the
hardcoded family catch-all.
Regression for the claude-fable-5 under-report: a brand-new Anthropic
slug that is absent from models.dev but present in OpenRouter's live
catalog (with a 1M window) used to fall through to the generic
``"claude": 200000`` entry, because the step-6 OR fallback was gated on
``not effective_provider`` and ``effective_provider`` is "openrouter"
for any OpenRouter selection. The dedicated step-5 OR branch must read
the live value instead.
"""
from agent.model_metadata import get_model_context_length
from unittest.mock import patch as mock_patch
or_url = "https://openrouter.ai/api/v1"
live = {
"anthropic/claude-fable-5": {"context_length": 1_000_000},
"anthropic/claude-haiku-4.5": {"context_length": 200_000},
}
with mock_patch("agent.model_metadata.fetch_model_metadata", return_value=live), \
mock_patch("agent.model_metadata._query_ollama_api_show", return_value=None), \
mock_patch("agent.model_metadata.get_cached_context_length", return_value=None), \
mock_patch("agent.models_dev.lookup_models_dev_context", return_value=None):
# The bug: would have returned 200_000 via the "claude" catch-all.
assert get_model_context_length(
"anthropic/claude-fable-5", base_url=or_url, provider="openrouter"
) == 1_000_000
# A genuinely-200k model still resolves to its real OR value — the
# fix reads per-model context, it does not blanket-bump to 1M.
assert get_model_context_length(
"anthropic/claude-haiku-4.5", base_url=or_url, provider="openrouter"
) == 200_000
def test_openrouter_kimi_32k_underreport_still_guarded(self):
"""The live OR branch keeps the Kimi-family 32k underreport guard:
a bogus 32768 from OpenRouter for a Kimi slug must NOT win it falls
through to the hardcoded default instead.
"""
from agent.model_metadata import get_model_context_length
from unittest.mock import patch as mock_patch
or_url = "https://openrouter.ai/api/v1"
live = {"moonshotai/kimi-k2.6": {"context_length": 32768}}
with mock_patch("agent.model_metadata.fetch_model_metadata", return_value=live), \
mock_patch("agent.model_metadata._query_ollama_api_show", return_value=None), \
mock_patch("agent.model_metadata.get_cached_context_length", return_value=None), \
mock_patch("agent.models_dev.lookup_models_dev_context", return_value=None):
ctx = get_model_context_length(
"moonshotai/kimi-k2.6", base_url=or_url, provider="openrouter"
)
assert ctx != 32768, "Kimi 32k OR underreport must not be accepted"
# =========================================================================
# Codex OAuth context-window resolution (provider="openai-codex")