From 2e61de06388ac0cb184198e1bfddb3d0f41b638a Mon Sep 17 00:00:00 2001 From: islam666 Date: Thu, 4 Jun 2026 16:19:24 +0000 Subject: [PATCH] fix(model_metadata): consult DEFAULT_CONTEXT_LENGTHS before 256K fallback on custom endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: get_model_context_length() had an early return at the end of the custom-endpoint probe branch (step 3) that returned DEFAULT_FALLBACK_CONTEXT (256K) without ever consulting the hardcoded DEFAULT_CONTEXT_LENGTHS catalog (step 8). Models served through a custom/proxied gateway (e.g. corporate Anthropic proxy) that didn't expose Ollama or local-server endpoints would hit this path and get capped at 256K, even when the model name clearly matched a known entry in the catalog (e.g. claude-opus-4-8 → 1M). Changes: - agent/model_metadata.py: Before returning DEFAULT_FALLBACK_CONTEXT at the end of the custom-endpoint branch, consult DEFAULT_CONTEXT_LENGTHS using the same longest-key-first fuzzy matching as step 8. Only fall through to 256K if no catalog entry matches. - tests/agent/test_model_metadata.py: Updated existing test and added new test covering the custom-endpoint → catalog fallback behavior. Fixes #38865 --- agent/model_metadata.py | 20 +++++++++ tests/agent/test_model_metadata.py | 72 ++++++++++++++++++++++++++++-- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 1080256e0ac..531e9ae8459 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -1684,6 +1684,26 @@ def get_model_context_length( "in config.yaml to override.", model, base_url, f"{DEFAULT_FALLBACK_CONTEXT:,}", ) + # 3b. Before falling back to the hard 256K default, consult the + # hardcoded catalog as a last resort. A proxied/custom Anthropic + # gateway (e.g. corporate proxy) fails the Ollama/local probes + # above, but the model name may still match an entry in + # DEFAULT_CONTEXT_LENGTHS (e.g. "claude-opus-4-8" → 1M). + # Without this, the early return here short-circuits the catalog + # lookup at step 8 and silently caps context at 256K. + model_lower = model.lower() + for default_model, length in sorted( + DEFAULT_CONTEXT_LENGTHS.items(), + key=lambda x: len(x[0]), + reverse=True, + ): + if default_model in model_lower: + logger.info( + "Using hardcoded context length %s for model %r " + "(custom endpoint, catalog match on %r)", + f"{length:,}", model, default_model, + ) + return length return DEFAULT_FALLBACK_CONTEXT # 4. Anthropic /v1/models API (only for regular API keys, not OAuth) diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 0eab4dcff05..35950170a2a 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -18,6 +18,7 @@ from unittest.mock import patch, MagicMock from agent.model_metadata import ( CONTEXT_PROBE_TIERS, DEFAULT_CONTEXT_LENGTHS, + DEFAULT_FALLBACK_CONTEXT, _strip_provider_prefix, estimate_tokens_rough, estimate_messages_tokens_rough, @@ -773,17 +774,24 @@ class TestGetModelContextLength: @patch("agent.model_metadata.fetch_model_metadata") @patch("agent.model_metadata.fetch_endpoint_model_metadata") - def test_custom_endpoint_without_metadata_skips_name_based_default(self, mock_endpoint_fetch, mock_fetch): + def test_custom_endpoint_without_metadata_falls_back_to_catalog(self, mock_endpoint_fetch, mock_fetch): + """Custom endpoint with no metadata should fall back to the hardcoded + catalog (not 256K) when the model name matches a known entry. + + Previously this returned CONTEXT_PROBE_TIERS[0] (256K) because the + custom-endpoint branch short-circuited before the catalog lookup. + See #38865. + """ mock_fetch.return_value = {} mock_endpoint_fetch.return_value = {} + # GLM-5-TEE matches the "glm" entry in DEFAULT_CONTEXT_LENGTHS result = get_model_context_length( "zai-org/GLM-5-TEE", base_url="https://llm.chutes.ai/v1", api_key="test-key", ) - - assert result == CONTEXT_PROBE_TIERS[0] + assert result == 202752 # "glm" entry in DEFAULT_CONTEXT_LENGTHS @patch("agent.model_metadata.fetch_model_metadata") @patch("agent.model_metadata.fetch_endpoint_model_metadata") @@ -858,6 +866,64 @@ class TestGetModelContextLength: assert result == 200000 + @patch("agent.model_metadata.fetch_model_metadata") + def test_custom_endpoint_falls_back_to_hardcoded_catalog(self, mock_fetch): + """Custom/proxied endpoint that fails all probes should still resolve + via DEFAULT_CONTEXT_LENGTHS instead of returning 256K. + + Regression test for #38865: a corporate Anthropic proxy (custom + base_url) caused the custom-endpoint branch to short-circuit before + the catalog lookup, capping context at 256K even for models like + claude-opus-4-8 that are in the hardcoded catalog with 1M. + """ + mock_fetch.return_value = {} + + # Patch all the probe functions that the custom-endpoint branch calls + # so they all fail (return None/empty), simulating a proxy that + # doesn't expose Ollama or local-server endpoints. + with ( + patch( + "agent.model_metadata._resolve_endpoint_context_length", + return_value=None, + ), + patch( + "agent.model_metadata._query_ollama_api_show", + return_value=None, + ), + patch( + "agent.model_metadata._query_local_context_length", + return_value=None, + ), + patch( + "agent.model_metadata.is_local_endpoint", + return_value=False, + ), + ): + # A known model behind a custom proxy should resolve to its + # catalog value (1M), NOT the 256K fallback. + ctx = get_model_context_length( + "claude-opus-4-8", + base_url="https://my-gateway.example.com/v1/claude", + ) + assert ctx == 1000000, f"Expected 1000000, got {ctx}" + + # Another known model + ctx2 = get_model_context_length( + "claude-sonnet-4-6", + base_url="https://my-gateway.example.com/v1/claude", + ) + assert ctx2 == 1000000, f"Expected 1000000, got {ctx2}" + + # An unknown model on a custom endpoint should still fall back + # to 256K (no catalog match). + ctx3 = get_model_context_length( + "totally-unknown-model", + base_url="https://my-gateway.example.com/v1/claude", + ) + assert ctx3 == DEFAULT_FALLBACK_CONTEXT, ( + f"Expected {DEFAULT_FALLBACK_CONTEXT}, got {ctx3}" + ) + # ========================================================================= # Bedrock context resolution — must run BEFORE custom-endpoint probe