diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 14508745e..42fa5324d 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -124,6 +124,20 @@ DEFAULT_FALLBACK_CONTEXT = CONTEXT_PROBE_TIERS[0] # Sessions, model switches, and cron jobs should reject models below this. MINIMUM_CONTEXT_LENGTH = 64_000 +# GPT-5.5 is currently exposed through ChatGPT/Codex with a smaller effective +# agent-usable window than the larger native/advertised GPT-5 family windows. +# Keep compaction and preflight budgeting fail-closed until a larger window is +# explicitly verified and configured. +OPENAI_CODEX_GPT55_EFFECTIVE_CONTEXT = 272_000 + + +def _is_openai_codex_gpt55(model: str, provider: str = "", base_url: str = "") -> bool: + model_lower = (model or "").strip().lower() + if model_lower not in {"gpt-5.5", "gpt-5.5-codex"}: + return False + provider_lower = (provider or "").strip().lower() + return provider_lower == "openai-codex" or base_url_host_matches(base_url, "chatgpt.com") + # Thin fallback defaults — only broad model family patterns. # These fire only when provider is unknown AND models.dev/OpenRouter/Anthropic # all miss. Replaced the previous 80+ entry dict. @@ -144,8 +158,8 @@ DEFAULT_CONTEXT_LENGTHS = { # OpenAI — GPT-5 family (most have 400k; specific overrides first) # Source: https://developers.openai.com/api/docs/models # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we - # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of - # Apr 2026) and is resolved via _resolve_codex_oauth_context_length(). + # can't probe live. ChatGPT/Codex OAuth is guarded separately at 272k until + # a larger effective window is explicitly verified. "gpt-5.5": 400000, "gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4) @@ -1217,6 +1231,11 @@ def get_model_context_length( # local servers actually know about. Ollama "model:tag" colons are preserved. model = _strip_provider_prefix(model) + # GPT-5.5 on ChatGPT/Codex currently has a 272k effective cap for agent + # budgeting even if cache/discovery sources advertise a larger native window. + if _is_openai_codex_gpt55(model, provider=provider, base_url=base_url): + return OPENAI_CODEX_GPT55_EFFECTIVE_CONTEXT + # 1. Check persistent cache (model+provider) if base_url: cached = get_cached_context_length(model, base_url) diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index a0b9747a8..c7a474260 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -429,6 +429,25 @@ class TestGetModelContextLength: mock_fetch.return_value = {} assert get_model_context_length("anthropic/claude-sonnet-4") == 200000 + @patch("agent.model_metadata.fetch_model_metadata") + @patch("agent.models_dev.lookup_models_dev_context") + @patch("agent.model_metadata.get_cached_context_length") + def test_gpt55_codex_uses_safe_effective_context_cap(self, mock_cache, mock_models_dev, mock_fetch): + """GPT-5.5 Codex must fail closed to the current 272k effective cap. + + Discovery/cache sources may advertise a larger native context window; + compaction should not budget against that until explicitly lifted. + """ + mock_cache.return_value = 1_050_000 + mock_models_dev.return_value = 1_050_000 + mock_fetch.return_value = {"gpt-5.5": {"context_length": 1_050_000}} + + assert get_model_context_length( + "gpt-5.5", + provider="openai-codex", + base_url="https://chatgpt.com/backend-api/codex", + ) == 272_000 + @patch("agent.model_metadata.fetch_model_metadata") def test_unknown_model_returns_first_probe_tier(self, mock_fetch): mock_fetch.return_value = {}