mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: cap codex gpt-5.5 context
This commit is contained in:
parent
0e235947b9
commit
7c86d0f311
2 changed files with 40 additions and 2 deletions
|
|
@ -124,6 +124,20 @@ DEFAULT_FALLBACK_CONTEXT = CONTEXT_PROBE_TIERS[0]
|
|||
# Sessions, model switches, and cron jobs should reject models below this.
|
||||
MINIMUM_CONTEXT_LENGTH = 64_000
|
||||
|
||||
# GPT-5.5 is currently exposed through ChatGPT/Codex with a smaller effective
|
||||
# agent-usable window than the larger native/advertised GPT-5 family windows.
|
||||
# Keep compaction and preflight budgeting fail-closed until a larger window is
|
||||
# explicitly verified and configured.
|
||||
OPENAI_CODEX_GPT55_EFFECTIVE_CONTEXT = 272_000
|
||||
|
||||
|
||||
def _is_openai_codex_gpt55(model: str, provider: str = "", base_url: str = "") -> bool:
|
||||
model_lower = (model or "").strip().lower()
|
||||
if model_lower not in {"gpt-5.5", "gpt-5.5-codex"}:
|
||||
return False
|
||||
provider_lower = (provider or "").strip().lower()
|
||||
return provider_lower == "openai-codex" or base_url_host_matches(base_url, "chatgpt.com")
|
||||
|
||||
# Thin fallback defaults — only broad model family patterns.
|
||||
# These fire only when provider is unknown AND models.dev/OpenRouter/Anthropic
|
||||
# all miss. Replaced the previous 80+ entry dict.
|
||||
|
|
@ -144,8 +158,8 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
|
||||
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
|
||||
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
|
||||
# can't probe live. ChatGPT/Codex OAuth is guarded separately at 272k until
|
||||
# a larger effective window is explicitly verified.
|
||||
"gpt-5.5": 400000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
|
|
@ -1217,6 +1231,11 @@ def get_model_context_length(
|
|||
# local servers actually know about. Ollama "model:tag" colons are preserved.
|
||||
model = _strip_provider_prefix(model)
|
||||
|
||||
# GPT-5.5 on ChatGPT/Codex currently has a 272k effective cap for agent
|
||||
# budgeting even if cache/discovery sources advertise a larger native window.
|
||||
if _is_openai_codex_gpt55(model, provider=provider, base_url=base_url):
|
||||
return OPENAI_CODEX_GPT55_EFFECTIVE_CONTEXT
|
||||
|
||||
# 1. Check persistent cache (model+provider)
|
||||
if base_url:
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
|
|
|
|||
|
|
@ -429,6 +429,25 @@ class TestGetModelContextLength:
|
|||
mock_fetch.return_value = {}
|
||||
assert get_model_context_length("anthropic/claude-sonnet-4") == 200000
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
@patch("agent.models_dev.lookup_models_dev_context")
|
||||
@patch("agent.model_metadata.get_cached_context_length")
|
||||
def test_gpt55_codex_uses_safe_effective_context_cap(self, mock_cache, mock_models_dev, mock_fetch):
|
||||
"""GPT-5.5 Codex must fail closed to the current 272k effective cap.
|
||||
|
||||
Discovery/cache sources may advertise a larger native context window;
|
||||
compaction should not budget against that until explicitly lifted.
|
||||
"""
|
||||
mock_cache.return_value = 1_050_000
|
||||
mock_models_dev.return_value = 1_050_000
|
||||
mock_fetch.return_value = {"gpt-5.5": {"context_length": 1_050_000}}
|
||||
|
||||
assert get_model_context_length(
|
||||
"gpt-5.5",
|
||||
provider="openai-codex",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
) == 272_000
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_unknown_model_returns_first_probe_tier(self, mock_fetch):
|
||||
mock_fetch.return_value = {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue