fix(models): read OpenRouter live context_length before hardcoded catch-all (#42986)

OpenRouter-routed slugs that are absent from models.dev (e.g. a freshly shipped anthropic/claude-fable-5) fell through to the generic DEFAULT_CONTEXT_LENGTHS["claude"]=200K entry and under-reported their real 1M window. The step-6 OpenRouter live-metadata fallback was gated on `not effective_provider`, but an OpenRouter selection sets effective_provider="openrouter" (inferred from the base URL), so that branch was dead code for every OR model. Add a dedicated step-5 OpenRouter branch that consults the live /models catalog (authoritative, refreshes as new slugs ship) before models.dev and the hardcoded family defaults — mirroring the existing Nous/Copilot/GMI branches. Keeps the Kimi-family 32k underreport guard. Per-model values are respected (claude-haiku-4.5 stays 200K), so it does not blanket-bump to 1M. Regression tests cover the fable-5 case, the genuinely-200k case, and the Kimi guard.
2026-06-14 09:11:54 +00:00 · 2026-06-09 10:49:32 -07:00 · 2026-06-09 10:49:32 -07:00 · 967c325da8
commit 967c325da8
parent f6f573ebaa
2 changed files with 75 additions and 0 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -1810,6 +1810,28 @@ def get_model_context_length(
        if ctx is not None:
            save_context_length(model, base_url, ctx)
            return ctx
+    # 5f. OpenRouter live /models metadata — authoritative for OpenRouter-routed
+    # models. OpenRouter's catalog carries per-model context_length (e.g.
+    # anthropic/claude-fable-5 -> 1M) and refreshes as new slugs ship, so it
+    # must win over both models.dev (step 5g) and the hardcoded family catch-all
+    # (step 8). Before this branch, an OpenRouter selection set
+    # effective_provider="openrouter", which (a) made the models.dev lookup miss
+    # brand-new slugs and (b) skipped the step-6 OR fallback (gated on `not
+    # effective_provider`), so a fresh slug like claude-fable-5 fell through to
+    # the generic "claude": 200K entry and under-reported a 1M window. Mirrors
+    # the dedicated Nous/Copilot/GMI branches above.
+    if effective_provider == "openrouter":
+        metadata = fetch_model_metadata()
+        entry = metadata.get(model)
+        if entry:
+            or_ctx = entry.get("context_length")
+            # Guard against the known OpenRouter Kimi-family 32k underreport
+            # (same class the hardcoded overrides exist to mitigate).
+            if isinstance(or_ctx, int) and or_ctx > 0 and not (
+                or_ctx == 32768 and _model_name_suggests_kimi(model)
+            ):
+                return or_ctx
+
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@ -220,6 +220,59 @@ class TestDefaultContextLengths:
                    f"{model_id}: expected {expected_ctx}, got {actual}"
                )

+    def test_openrouter_live_metadata_beats_hardcoded_catchall(self):
+        """OpenRouter-routed slugs resolve via the live OR catalog before the
+        hardcoded family catch-all.
+
+        Regression for the claude-fable-5 under-report: a brand-new Anthropic
+        slug that is absent from models.dev but present in OpenRouter's live
+        catalog (with a 1M window) used to fall through to the generic
+        ``"claude": 200000`` entry, because the step-6 OR fallback was gated on
+        ``not effective_provider`` and ``effective_provider`` is "openrouter"
+        for any OpenRouter selection. The dedicated step-5 OR branch must read
+        the live value instead.
+        """
+        from agent.model_metadata import get_model_context_length
+        from unittest.mock import patch as mock_patch
+
+        or_url = "https://openrouter.ai/api/v1"
+        live = {
+            "anthropic/claude-fable-5": {"context_length": 1_000_000},
+            "anthropic/claude-haiku-4.5": {"context_length": 200_000},
+        }
+        with mock_patch("agent.model_metadata.fetch_model_metadata", return_value=live), \
+             mock_patch("agent.model_metadata._query_ollama_api_show", return_value=None), \
+             mock_patch("agent.model_metadata.get_cached_context_length", return_value=None), \
+             mock_patch("agent.models_dev.lookup_models_dev_context", return_value=None):
+            # The bug: would have returned 200_000 via the "claude" catch-all.
+            assert get_model_context_length(
+                "anthropic/claude-fable-5", base_url=or_url, provider="openrouter"
+            ) == 1_000_000
+            # A genuinely-200k model still resolves to its real OR value — the
+            # fix reads per-model context, it does not blanket-bump to 1M.
+            assert get_model_context_length(
+                "anthropic/claude-haiku-4.5", base_url=or_url, provider="openrouter"
+            ) == 200_000
+
+    def test_openrouter_kimi_32k_underreport_still_guarded(self):
+        """The live OR branch keeps the Kimi-family 32k underreport guard:
+        a bogus 32768 from OpenRouter for a Kimi slug must NOT win — it falls
+        through to the hardcoded default instead.
+        """
+        from agent.model_metadata import get_model_context_length
+        from unittest.mock import patch as mock_patch
+
+        or_url = "https://openrouter.ai/api/v1"
+        live = {"moonshotai/kimi-k2.6": {"context_length": 32768}}
+        with mock_patch("agent.model_metadata.fetch_model_metadata", return_value=live), \
+             mock_patch("agent.model_metadata._query_ollama_api_show", return_value=None), \
+             mock_patch("agent.model_metadata.get_cached_context_length", return_value=None), \
+             mock_patch("agent.models_dev.lookup_models_dev_context", return_value=None):
+            ctx = get_model_context_length(
+                "moonshotai/kimi-k2.6", base_url=or_url, provider="openrouter"
+            )
+            assert ctx != 32768, "Kimi 32k OR underreport must not be accepted"
+

 # =========================================================================
 # Codex OAuth context-window resolution (provider="openai-codex")