diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 2916a7fa7..a15409101 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -1047,6 +1047,7 @@ def get_model_context_length( Resolution order: 0. Explicit config override (model.context_length or custom_providers per-model) 1. Persistent cache (previously discovered via probing) + 1b. AWS Bedrock static table (must precede custom-endpoint probe) 2. Active endpoint metadata (/models for explicit custom endpoints) 3. Local server query (for local endpoints) 4. Anthropic /v1/models API (API-key users only, not OAuth) @@ -1071,6 +1072,26 @@ def get_model_context_length( if cached is not None: return cached + # 1b. AWS Bedrock — use static context length table. + # Bedrock's ListFoundationModels API doesn't expose context window sizes, + # so we maintain a curated table in bedrock_adapter.py that reflects + # AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native + # Anthropic API). This must run BEFORE the custom-endpoint probe at + # step 2 — bedrock-runtime..amazonaws.com is not in + # _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint, + # fail the /models probe (Bedrock doesn't expose that shape), and fall + # back to the 128K default before reaching the original step 4b branch. + if provider == "bedrock" or ( + base_url + and base_url_hostname(base_url).startswith("bedrock-runtime.") + and base_url_host_matches(base_url, "amazonaws.com") + ): + try: + from agent.bedrock_adapter import get_bedrock_context_length + return get_bedrock_context_length(model) + except ImportError: + pass # boto3 not installed — fall through to generic resolution + # 2. Active endpoint metadata for truly custom/unknown endpoints. # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their # /models endpoint may report a provider-imposed limit (e.g. Copilot @@ -1116,19 +1137,7 @@ def get_model_context_length( if ctx: return ctx - # 4b. AWS Bedrock — use static context length table. - # Bedrock's ListFoundationModels doesn't expose context window sizes, - # so we maintain a curated table in bedrock_adapter.py. - if provider == "bedrock" or ( - base_url - and base_url_hostname(base_url).startswith("bedrock-runtime.") - and base_url_host_matches(base_url, "amazonaws.com") - ): - try: - from agent.bedrock_adapter import get_bedrock_context_length - return get_bedrock_context_length(model) - except ImportError: - pass # boto3 not installed — fall through to generic resolution + # 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.) # 5. Provider-aware lookups (before generic OpenRouter cache) # These are provider-specific and take priority over the generic OR cache, diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 45e716022..5953694d4 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -376,6 +376,57 @@ class TestGetModelContextLength: assert result == 200000 +# ========================================================================= +# Bedrock context resolution — must run BEFORE custom-endpoint probe +# ========================================================================= + +class TestBedrockContextResolution: + """Regression tests for Bedrock context-length resolution order. + + Bug: because ``bedrock-runtime..amazonaws.com`` is not listed in + ``_URL_TO_PROVIDER``, ``_is_known_provider_base_url`` returned False and + the custom-endpoint probe at step 2 ran first — fetching ``/models`` from + Bedrock (which it doesn't serve), returning the 128K default-fallback + before execution ever reached the Bedrock branch. + + Fix: promote the Bedrock branch ahead of the custom-endpoint probe. + """ + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + def test_bedrock_provider_returns_static_table_before_probe(self, mock_fetch): + """provider='bedrock' resolves via static table, bypasses /models probe.""" + ctx = get_model_context_length( + "anthropic.claude-opus-4-v1:0", + provider="bedrock", + base_url="https://bedrock-runtime.us-east-1.amazonaws.com", + ) + # Must return the static Bedrock table value (200K for Claude), + # NOT DEFAULT_FALLBACK_CONTEXT (128K). + assert ctx == 200000 + mock_fetch.assert_not_called() + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + def test_bedrock_url_without_provider_hint(self, mock_fetch): + """bedrock-runtime host infers Bedrock even when provider is omitted.""" + ctx = get_model_context_length( + "anthropic.claude-sonnet-4-v1:0", + base_url="https://bedrock-runtime.us-west-2.amazonaws.com", + ) + assert ctx == 200000 + mock_fetch.assert_not_called() + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + def test_non_bedrock_url_still_probes(self, mock_fetch): + """Non-Bedrock hosts still reach the custom-endpoint probe.""" + mock_fetch.return_value = {"some-model": {"context_length": 50000}} + ctx = get_model_context_length( + "some-model", + base_url="https://api.example.com/v1", + ) + assert ctx == 50000 + assert mock_fetch.called + + # ========================================================================= # _strip_provider_prefix — Ollama model:tag vs provider:model # =========================================================================