mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(context): resolve real Codex OAuth context windows (272k, not 1M) (#14935)
On ChatGPT Codex OAuth every gpt-5.x slug actually caps at 272,000 tokens,
but Hermes was resolving gpt-5.5 / gpt-5.4 to 1,050,000 (from models.dev)
because openai-codex aliases to the openai entry there. At 1.05M the
compressor never fires and requests hard-fail with 'context window
exceeded' around the real 272k boundary.
Verified live against chatgpt.com/backend-api/codex/models:
gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex, gpt-5.2-codex,
gpt-5.2, gpt-5.1-codex-max → context_window = 272000
Changes:
- agent/model_metadata.py:
* _fetch_codex_oauth_context_lengths() — probe the Codex /models
endpoint with the OAuth bearer token and read context_window per
slug (1h in-memory TTL).
* _resolve_codex_oauth_context_length() — prefer the live probe,
fall back to hardcoded _CODEX_OAUTH_CONTEXT_FALLBACK (all 272k).
* Wire into get_model_context_length() when provider=='openai-codex',
running BEFORE the models.dev lookup (which returns 1.05M). Result
persists via save_context_length() so subsequent lookups skip the
probe entirely.
* Fixed the now-wrong comment on the DEFAULT_CONTEXT_LENGTHS gpt-5.5
entry (400k was never right for Codex; it's the catch-all for
providers we can't probe live).
Tests (4 new in TestCodexOAuthContextLength):
- fallback table used when no token is available (no models.dev leakage)
- live probe overrides the fallback
- probe failure (non-200) falls back to hardcoded 272k
- non-codex providers (openrouter, direct openai) unaffected
Non-codex context resolution is unchanged — the Codex branch only fires
when provider=='openai-codex'.
This commit is contained in:
parent
2e78a2b6b2
commit
51f4c9827f
2 changed files with 241 additions and 2 deletions
|
|
@ -200,6 +200,126 @@ class TestDefaultContextLengths:
|
|||
assert len(DEFAULT_CONTEXT_LENGTHS) >= 10
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Codex OAuth context-window resolution (provider="openai-codex")
|
||||
# =========================================================================
|
||||
|
||||
class TestCodexOAuthContextLength:
|
||||
"""ChatGPT Codex OAuth imposes lower context limits than the direct
|
||||
OpenAI API for the same slugs. Verified Apr 2026 via live probe of
|
||||
chatgpt.com/backend-api/codex/models: every model returns 272k, while
|
||||
models.dev reports 1.05M for gpt-5.5/gpt-5.4 and 400k for the rest.
|
||||
"""
|
||||
|
||||
def setup_method(self):
|
||||
import agent.model_metadata as mm
|
||||
mm._codex_oauth_context_cache = {}
|
||||
mm._codex_oauth_context_cache_time = 0.0
|
||||
|
||||
def test_fallback_table_used_without_token(self):
|
||||
"""With no access token, the hardcoded Codex fallback table wins
|
||||
over models.dev (which reports 1.05M for gpt-5.5 but Codex is 272k).
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
with patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.model_metadata.save_context_length"):
|
||||
for model in (
|
||||
"gpt-5.5",
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.1-codex-max",
|
||||
"gpt-5.1-codex-mini",
|
||||
):
|
||||
ctx = get_model_context_length(
|
||||
model=model,
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="",
|
||||
provider="openai-codex",
|
||||
)
|
||||
assert ctx == 272_000, (
|
||||
f"Codex {model}: expected 272000 fallback, got {ctx} "
|
||||
"(models.dev leakage?)"
|
||||
)
|
||||
|
||||
def test_live_probe_overrides_fallback(self):
|
||||
"""When a token is provided, the live /models probe is preferred
|
||||
and its context_window drives the result."""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.status_code = 200
|
||||
fake_response.json.return_value = {
|
||||
"models": [
|
||||
{"slug": "gpt-5.5", "context_window": 300_000},
|
||||
{"slug": "gpt-5.4", "context_window": 400_000},
|
||||
]
|
||||
}
|
||||
|
||||
with patch("agent.model_metadata.requests.get", return_value=fake_response), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.model_metadata.save_context_length"):
|
||||
ctx_55 = get_model_context_length(
|
||||
model="gpt-5.5",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="fake-token",
|
||||
provider="openai-codex",
|
||||
)
|
||||
ctx_54 = get_model_context_length(
|
||||
model="gpt-5.4",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="fake-token",
|
||||
provider="openai-codex",
|
||||
)
|
||||
assert ctx_55 == 300_000
|
||||
assert ctx_54 == 400_000
|
||||
|
||||
def test_probe_failure_falls_back_to_hardcoded(self):
|
||||
"""If the probe fails (non-200 / network error), we still return
|
||||
the hardcoded 272k rather than leaking through to models.dev 1.05M."""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.status_code = 401
|
||||
fake_response.json.return_value = {}
|
||||
|
||||
with patch("agent.model_metadata.requests.get", return_value=fake_response), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.model_metadata.save_context_length"):
|
||||
ctx = get_model_context_length(
|
||||
model="gpt-5.5",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="expired-token",
|
||||
provider="openai-codex",
|
||||
)
|
||||
assert ctx == 272_000
|
||||
|
||||
def test_non_codex_providers_unaffected(self):
|
||||
"""Resolving gpt-5.5 on non-Codex providers must NOT use the Codex
|
||||
272k override — OpenRouter / direct OpenAI API have different limits.
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
# OpenRouter — should hit its own catalog path first; when mocked
|
||||
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k).
|
||||
with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.models_dev.lookup_models_dev_context", return_value=None):
|
||||
ctx = get_model_context_length(
|
||||
model="openai/gpt-5.5",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_key="",
|
||||
provider="openrouter",
|
||||
)
|
||||
assert ctx == 400_000, (
|
||||
f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override "
|
||||
"leaked outside openai-codex provider"
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# get_model_context_length — resolution order
|
||||
# =========================================================================
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue