diff --git a/agent/model_metadata.py b/agent/model_metadata.py index cbb38daf1..14508745e 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -737,6 +737,22 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]: return cache.get(key) +def _invalidate_cached_context_length(model: str, base_url: str) -> None: + """Drop a stale cache entry so it gets re-resolved on the next lookup.""" + key = f"{model}@{base_url}" + cache = _load_context_cache() + if key not in cache: + return + del cache[key] + path = _get_context_cache_path() + try: + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + yaml.dump({"context_lengths": cache}, f, default_flow_style=False) + except Exception as e: + logger.debug("Failed to invalidate context length cache entry %s: %s", key, e) + + def get_next_probe_tier(current_length: int) -> Optional[int]: """Return the next lower probe tier, or None if already at minimum.""" for tier in CONTEXT_PROBE_TIERS: @@ -1205,7 +1221,21 @@ def get_model_context_length( if base_url: cached = get_cached_context_length(model, base_url) if cached is not None: - return cached + # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds + # resolved gpt-5.x to the direct-API value (e.g. 1.05M) via + # models.dev and persisted it. Codex OAuth caps at 272K for every + # slug, so any cached Codex entry at or above 400K is a leftover + # from the old resolution path. Drop it and fall through to the + # live /models probe in step 5 below. + if provider == "openai-codex" and cached >= 400_000: + logger.info( + "Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); " + "re-resolving via live /models probe", + model, base_url, f"{cached:,}", + ) + _invalidate_cached_context_length(model, base_url) + else: + return cached # 2. Active endpoint metadata for truly custom/unknown endpoints. # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index ee6019429..a0b9747a8 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -319,6 +319,98 @@ class TestCodexOAuthContextLength: "leaked outside openai-codex provider" ) + def test_stale_codex_cache_over_400k_is_invalidated(self, tmp_path, monkeypatch): + """Pre-PR #14935 builds cached gpt-5.5 at 1.05M (from models.dev) + before the Codex-aware branch existed. Upgrading users keep that + stale entry on disk and the cache-first lookup returns it forever. + Codex OAuth caps at 272k for every slug, so any cached Codex + entry >= 400k must be dropped and re-resolved via the live probe. + """ + from agent import model_metadata as mm + + # Isolate the cache file to tmp_path + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + base_url = "https://chatgpt.com/backend-api/codex/" + stale_key = f"gpt-5.5@{base_url}" + other_key = "other-model@https://api.openai.com/v1/" + import yaml as _yaml + cache_file.write_text(_yaml.dump({"context_lengths": { + stale_key: 1_050_000, # stale pre-fix value + other_key: 128_000, # unrelated, must survive + }})) + + fake_response = MagicMock() + fake_response.status_code = 200 + fake_response.json.return_value = { + "models": [{"slug": "gpt-5.5", "context_window": 272_000}] + } + + with patch("agent.model_metadata.requests.get", return_value=fake_response), \ + patch("agent.model_metadata.save_context_length") as mock_save: + ctx = mm.get_model_context_length( + model="gpt-5.5", + base_url=base_url, + api_key="fake-token", + provider="openai-codex", + ) + + assert ctx == 272_000, f"Stale entry should have been re-resolved to 272k, got {ctx}" + # Live save was called with the fresh value + mock_save.assert_called_with("gpt-5.5", base_url, 272_000) + # The stale entry was removed from disk; unrelated entries survived + remaining = _yaml.safe_load(cache_file.read_text()).get("context_lengths", {}) + assert stale_key not in remaining, "Stale entry was not invalidated from the cache file" + assert remaining.get(other_key) == 128_000, "Unrelated cache entries must not be touched" + + def test_fresh_codex_cache_under_400k_is_respected(self, tmp_path, monkeypatch): + """Codex entries at the correct 272k must NOT be invalidated — + only stale pre-fix values (>= 400k) get dropped.""" + from agent import model_metadata as mm + + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + base_url = "https://chatgpt.com/backend-api/codex/" + import yaml as _yaml + cache_file.write_text(_yaml.dump({"context_lengths": { + f"gpt-5.5@{base_url}": 272_000, + }})) + + # If the invalidation incorrectly fired, this would be called; assert it isn't. + with patch("agent.model_metadata.requests.get") as mock_get: + ctx = mm.get_model_context_length( + model="gpt-5.5", + base_url=base_url, + api_key="fake-token", + provider="openai-codex", + ) + assert ctx == 272_000 + mock_get.assert_not_called() + + def test_stale_invalidation_scoped_to_codex_provider(self, tmp_path, monkeypatch): + """A cached 1M entry for a non-Codex provider (e.g. Anthropic opus on + OpenRouter, legitimately 1M) must NOT be invalidated by this guard.""" + from agent import model_metadata as mm + + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + base_url = "https://openrouter.ai/api/v1" + import yaml as _yaml + cache_file.write_text(_yaml.dump({"context_lengths": { + f"anthropic/claude-opus-4.6@{base_url}": 1_000_000, + }})) + + ctx = mm.get_model_context_length( + model="anthropic/claude-opus-4.6", + base_url=base_url, + api_key="fake", + provider="openrouter", + ) + assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected" + # ========================================================================= # get_model_context_length — resolution order