fix: cap codex gpt-5.5 context

2026-04-25 00:51:20 +00:00 · 2026-04-24 06:05:17 -06:00 · 2026-04-24 06:05:17 -06:00 · 7c86d0f311
commit 7c86d0f311
parent 0e235947b9
2 changed files with 40 additions and 2 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -124,6 +124,20 @@ DEFAULT_FALLBACK_CONTEXT = CONTEXT_PROBE_TIERS[0]
 # Sessions, model switches, and cron jobs should reject models below this.
 MINIMUM_CONTEXT_LENGTH = 64_000

+# GPT-5.5 is currently exposed through ChatGPT/Codex with a smaller effective
+# agent-usable window than the larger native/advertised GPT-5 family windows.
+# Keep compaction and preflight budgeting fail-closed until a larger window is
+# explicitly verified and configured.
+OPENAI_CODEX_GPT55_EFFECTIVE_CONTEXT = 272_000
+
+
+def _is_openai_codex_gpt55(model: str, provider: str = "", base_url: str = "") -> bool:
+    model_lower = (model or "").strip().lower()
+    if model_lower not in {"gpt-5.5", "gpt-5.5-codex"}:
+        return False
+    provider_lower = (provider or "").strip().lower()
+    return provider_lower == "openai-codex" or base_url_host_matches(base_url, "chatgpt.com")
+
 # Thin fallback defaults — only broad model family patterns.
 # These fire only when provider is unknown AND models.dev/OpenRouter/Anthropic
 # all miss. Replaced the previous 80+ entry dict.
@ -144,8 +158,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
    # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
-    # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
-    # Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
+    # can't probe live. ChatGPT/Codex OAuth is guarded separately at 272k until
+    # a larger effective window is explicitly verified.
    "gpt-5.5": 400000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
@ -1217,6 +1231,11 @@ def get_model_context_length(
    # local servers actually know about.  Ollama "model:tag" colons are preserved.
    model = _strip_provider_prefix(model)

+    # GPT-5.5 on ChatGPT/Codex currently has a 272k effective cap for agent
+    # budgeting even if cache/discovery sources advertise a larger native window.
+    if _is_openai_codex_gpt55(model, provider=provider, base_url=base_url):
+        return OPENAI_CODEX_GPT55_EFFECTIVE_CONTEXT
+
    # 1. Check persistent cache (model+provider)
    if base_url:
        cached = get_cached_context_length(model, base_url)
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@ -429,6 +429,25 @@ class TestGetModelContextLength:
        mock_fetch.return_value = {}
        assert get_model_context_length("anthropic/claude-sonnet-4") == 200000

+    @patch("agent.model_metadata.fetch_model_metadata")
+    @patch("agent.models_dev.lookup_models_dev_context")
+    @patch("agent.model_metadata.get_cached_context_length")
+    def test_gpt55_codex_uses_safe_effective_context_cap(self, mock_cache, mock_models_dev, mock_fetch):
+        """GPT-5.5 Codex must fail closed to the current 272k effective cap.
+
+        Discovery/cache sources may advertise a larger native context window;
+        compaction should not budget against that until explicitly lifted.
+        """
+        mock_cache.return_value = 1_050_000
+        mock_models_dev.return_value = 1_050_000
+        mock_fetch.return_value = {"gpt-5.5": {"context_length": 1_050_000}}
+
+        assert get_model_context_length(
+            "gpt-5.5",
+            provider="openai-codex",
+            base_url="https://chatgpt.com/backend-api/codex",
+        ) == 272_000
+
    @patch("agent.model_metadata.fetch_model_metadata")
    def test_unknown_model_returns_first_probe_tier(self, mock_fetch):
        mock_fetch.return_value = {}