fix: cap codex gpt-5.5 context

This commit is contained in:
Roger Gimbel 2026-04-24 06:05:17 -06:00
parent 0e235947b9
commit 7c86d0f311
2 changed files with 40 additions and 2 deletions

View file

@ -124,6 +124,20 @@ DEFAULT_FALLBACK_CONTEXT = CONTEXT_PROBE_TIERS[0]
# Sessions, model switches, and cron jobs should reject models below this.
MINIMUM_CONTEXT_LENGTH = 64_000
# GPT-5.5 is currently exposed through ChatGPT/Codex with a smaller effective
# agent-usable window than the larger native/advertised GPT-5 family windows.
# Keep compaction and preflight budgeting fail-closed until a larger window is
# explicitly verified and configured.
OPENAI_CODEX_GPT55_EFFECTIVE_CONTEXT = 272_000
def _is_openai_codex_gpt55(model: str, provider: str = "", base_url: str = "") -> bool:
model_lower = (model or "").strip().lower()
if model_lower not in {"gpt-5.5", "gpt-5.5-codex"}:
return False
provider_lower = (provider or "").strip().lower()
return provider_lower == "openai-codex" or base_url_host_matches(base_url, "chatgpt.com")
# Thin fallback defaults — only broad model family patterns.
# These fire only when provider is unknown AND models.dev/OpenRouter/Anthropic
# all miss. Replaced the previous 80+ entry dict.
@ -144,8 +158,8 @@ DEFAULT_CONTEXT_LENGTHS = {
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
# Source: https://developers.openai.com/api/docs/models
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
# can't probe live. ChatGPT/Codex OAuth is guarded separately at 272k until
# a larger effective window is explicitly verified.
"gpt-5.5": 400000,
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
@ -1217,6 +1231,11 @@ def get_model_context_length(
# local servers actually know about. Ollama "model:tag" colons are preserved.
model = _strip_provider_prefix(model)
# GPT-5.5 on ChatGPT/Codex currently has a 272k effective cap for agent
# budgeting even if cache/discovery sources advertise a larger native window.
if _is_openai_codex_gpt55(model, provider=provider, base_url=base_url):
return OPENAI_CODEX_GPT55_EFFECTIVE_CONTEXT
# 1. Check persistent cache (model+provider)
if base_url:
cached = get_cached_context_length(model, base_url)

View file

@ -429,6 +429,25 @@ class TestGetModelContextLength:
mock_fetch.return_value = {}
assert get_model_context_length("anthropic/claude-sonnet-4") == 200000
@patch("agent.model_metadata.fetch_model_metadata")
@patch("agent.models_dev.lookup_models_dev_context")
@patch("agent.model_metadata.get_cached_context_length")
def test_gpt55_codex_uses_safe_effective_context_cap(self, mock_cache, mock_models_dev, mock_fetch):
"""GPT-5.5 Codex must fail closed to the current 272k effective cap.
Discovery/cache sources may advertise a larger native context window;
compaction should not budget against that until explicitly lifted.
"""
mock_cache.return_value = 1_050_000
mock_models_dev.return_value = 1_050_000
mock_fetch.return_value = {"gpt-5.5": {"context_length": 1_050_000}}
assert get_model_context_length(
"gpt-5.5",
provider="openai-codex",
base_url="https://chatgpt.com/backend-api/codex",
) == 272_000
@patch("agent.model_metadata.fetch_model_metadata")
def test_unknown_model_returns_first_probe_tier(self, mock_fetch):
mock_fetch.return_value = {}