From a8e89cbbf66a563b655480827759a0591561cf05 Mon Sep 17 00:00:00 2001
From: Andre Kurait <andrekurait@gmail.com>
Date: Thu, 23 Apr 2026 20:33:09 +0000
Subject: [PATCH] fix(bedrock): resolve context length via static table before
 custom-endpoint probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem

`get_model_context_length()` in `agent/model_metadata.py` had a resolution
order bug that caused every Bedrock model to fall back to the 128K default
context length instead of reaching the static Bedrock table (200K for
Claude, etc.).

The root cause: `bedrock-runtime.<region>.amazonaws.com` is not listed in
`_URL_TO_PROVIDER`, so `_is_known_provider_base_url()` returned False.
The resolution order then ran the custom-endpoint probe (step 2) *before*
the Bedrock branch (step 4b), which:

  1. Treated Bedrock as a custom endpoint (via `_is_custom_endpoint`).
  2. Called `fetch_endpoint_model_metadata()` → `GET /models` on the
     bedrock-runtime URL (Bedrock doesn't serve this shape).
  3. Fell through to `return DEFAULT_FALLBACK_CONTEXT` (128K) at the
     "probe-down" branch — never reaching the Bedrock static table.

Result: users on Bedrock saw 128K context for Claude models that
actually support 200K on Bedrock, causing premature auto-compression.

## Fix

Promote the Bedrock branch from step 4b to step 1b, so it runs *before*
the custom-endpoint probe at step 2. The static table in
`bedrock_adapter.py::get_bedrock_context_length()` is the authoritative
source for Bedrock (the ListFoundationModels API doesn't expose context
window sizes), so there's no reason to probe `/models` first.

The original step 4b is replaced with a one-line breadcrumb comment
pointing to the new location, to make the resolution-order docstring
accurate.

## Changes

- `agent/model_metadata.py`
  - Add step 1b: Bedrock static-table branch (unchanged predicate, moved).
  - Remove dead step 4b block, replace with breadcrumb comment.
  - Update resolution-order docstring to include step 1b.

- `tests/agent/test_model_metadata.py`
  - New `TestBedrockContextResolution` class (3 tests):
    - `test_bedrock_provider_returns_static_table_before_probe`:
      confirms `provider="bedrock"` hits the static table and does NOT
      call `fetch_endpoint_model_metadata` (regression guard).
    - `test_bedrock_url_without_provider_hint`: confirms the
      `bedrock-runtime.*.amazonaws.com` host match works without an
      explicit `provider=` hint.
    - `test_non_bedrock_url_still_probes`: confirms the probe still
      fires for genuinely-custom endpoints (no over-reach).

## Testing

  pytest tests/agent/test_model_metadata.py -q
  # 83 passed in 1.95s (3 new + 80 existing)

## Risk

Very low.

- Predicate is identical to the original step 4b — no behaviour change
  for non-Bedrock paths.
- Original step 4b was dead code for the user-facing case (always hit
  the 128K fallback first), so removing it cannot regress behaviour.
- Bedrock path now short-circuits before any network I/O — faster too.
- `ImportError` fall-through preserved so users without `boto3`
  installed are unaffected.

## Related

- This is a prerequisite for accurate context-window accounting on
  Bedrock — the fix for #14710 (stale-connection client eviction)
  depends on correct context sizing to know when to compress.

Signed-off-by: Andre Kurait <andrekurait@gmail.com>
---
 agent/model_metadata.py            | 35 ++++++++++++--------
 tests/agent/test_model_metadata.py | 51 ++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 13 deletions(-)
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 2916a7fa7..a15409101 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -1047,6 +1047,7 @@ def get_model_context_length(
     Resolution order:
     0. Explicit config override (model.context_length or custom_providers per-model)
     1. Persistent cache (previously discovered via probing)
+    1b. AWS Bedrock static table (must precede custom-endpoint probe)
     2. Active endpoint metadata (/models for explicit custom endpoints)
     3. Local server query (for local endpoints)
     4. Anthropic /v1/models API (API-key users only, not OAuth)
@@ -1071,6 +1072,26 @@ def get_model_context_length(
         if cached is not None:
             return cached
 
+    # 1b. AWS Bedrock — use static context length table.
+    # Bedrock's ListFoundationModels API doesn't expose context window sizes,
+    # so we maintain a curated table in bedrock_adapter.py that reflects
+    # AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
+    # Anthropic API).  This must run BEFORE the custom-endpoint probe at
+    # step 2 — bedrock-runtime.<region>.amazonaws.com is not in
+    # _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
+    # fail the /models probe (Bedrock doesn't expose that shape), and fall
+    # back to the 128K default before reaching the original step 4b branch.
+    if provider == "bedrock" or (
+        base_url
+        and base_url_hostname(base_url).startswith("bedrock-runtime.")
+        and base_url_host_matches(base_url, "amazonaws.com")
+    ):
+        try:
+            from agent.bedrock_adapter import get_bedrock_context_length
+            return get_bedrock_context_length(model)
+        except ImportError:
+            pass  # boto3 not installed — fall through to generic resolution
+
     # 2. Active endpoint metadata for truly custom/unknown endpoints.
     # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
     # /models endpoint may report a provider-imposed limit (e.g. Copilot
@@ -1116,19 +1137,7 @@ def get_model_context_length(
         if ctx:
             return ctx
 
-    # 4b. AWS Bedrock — use static context length table.
-    # Bedrock's ListFoundationModels doesn't expose context window sizes,
-    # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (
-        base_url
-        and base_url_hostname(base_url).startswith("bedrock-runtime.")
-        and base_url_host_matches(base_url, "amazonaws.com")
-    ):
-        try:
-            from agent.bedrock_adapter import get_bedrock_context_length
-            return get_bedrock_context_length(model)
-        except ImportError:
-            pass  # boto3 not installed — fall through to generic resolution
+    # 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)
 
     # 5. Provider-aware lookups (before generic OpenRouter cache)
     # These are provider-specific and take priority over the generic OR cache,
diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py
index 45e716022..5953694d4 100644
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -376,6 +376,57 @@ class TestGetModelContextLength:
         assert result == 200000
 
 
+# =========================================================================
+# Bedrock context resolution — must run BEFORE custom-endpoint probe
+# =========================================================================
+
+class TestBedrockContextResolution:
+    """Regression tests for Bedrock context-length resolution order.
+
+    Bug: because ``bedrock-runtime.<region>.amazonaws.com`` is not listed in
+    ``_URL_TO_PROVIDER``, ``_is_known_provider_base_url`` returned False and
+    the custom-endpoint probe at step 2 ran first — fetching ``/models`` from
+    Bedrock (which it doesn't serve), returning the 128K default-fallback
+    before execution ever reached the Bedrock branch.
+
+    Fix: promote the Bedrock branch ahead of the custom-endpoint probe.
+    """
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    def test_bedrock_provider_returns_static_table_before_probe(self, mock_fetch):
+        """provider='bedrock' resolves via static table, bypasses /models probe."""
+        ctx = get_model_context_length(
+            "anthropic.claude-opus-4-v1:0",
+            provider="bedrock",
+            base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
+        )
+        # Must return the static Bedrock table value (200K for Claude),
+        # NOT DEFAULT_FALLBACK_CONTEXT (128K).
+        assert ctx == 200000
+        mock_fetch.assert_not_called()
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    def test_bedrock_url_without_provider_hint(self, mock_fetch):
+        """bedrock-runtime host infers Bedrock even when provider is omitted."""
+        ctx = get_model_context_length(
+            "anthropic.claude-sonnet-4-v1:0",
+            base_url="https://bedrock-runtime.us-west-2.amazonaws.com",
+        )
+        assert ctx == 200000
+        mock_fetch.assert_not_called()
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    def test_non_bedrock_url_still_probes(self, mock_fetch):
+        """Non-Bedrock hosts still reach the custom-endpoint probe."""
+        mock_fetch.return_value = {"some-model": {"context_length": 50000}}
+        ctx = get_model_context_length(
+            "some-model",
+            base_url="https://api.example.com/v1",
+        )
+        assert ctx == 50000
+        assert mock_fetch.called
+
+
 # =========================================================================
 # _strip_provider_prefix — Ollama model:tag vs provider:model
 # =========================================================================