diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 653a90619a5..956d6b93095 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -157,6 +157,13 @@ DEFAULT_CONTEXT_LENGTHS = { "gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context) + # gpt-5.3-codex-spark is Codex-OAuth-only (ChatGPT Pro entitlement) and + # uses a smaller 128k window than other gpt-5.x slugs. Listed here as + # a defensive override so the longest-substring fallback doesn't match + # the generic "gpt-5" entry below (400k) and report the wrong limit if + # Spark's context ever needs to be resolved through this path. Real + # usage flows through _CODEX_OAUTH_CONTEXT_FALLBACK at line ~1113. + "gpt-5.3-codex-spark": 128000, "gpt-5.1-chat": 128000, # Chat variant has 128k context "gpt-5": 400000, # GPT-5.x base, mini, codex variants (400k) "gpt-4.1": 1047576, diff --git a/tests/hermes_cli/test_openai_codex_model_validation_fallback.py b/tests/hermes_cli/test_openai_codex_model_validation_fallback.py index e33dbe2ba44..2b742b058ef 100644 --- a/tests/hermes_cli/test_openai_codex_model_validation_fallback.py +++ b/tests/hermes_cli/test_openai_codex_model_validation_fallback.py @@ -1,9 +1,18 @@ """Regression tests for OpenAI Codex model validation when the listing lags behind actually usable backend model IDs. -The bug: `/model` and `switch_model()` reject `gpt-5.3-codex-spark` because the -OpenAI Codex listing omits it, even though direct runtime calls with -`--provider openai-codex -m gpt-5.3-codex-spark` succeed. +The bug originally reported in #16172: `/model` and `switch_model()` rejected +`gpt-5.3-codex-spark` because the curated listing omitted it, even though direct +runtime calls succeeded. PR #19729 fixed this by soft-accepting unknown-but- +plausible Codex slugs with a warning, and this test pins the soft-accept +behavior so it doesn't regress. + +Note: gpt-5.3-codex-spark itself is now in the curated catalog (PR #22991), +so the real-world Spark request takes the `recognized=True` fast path. This +test still uses Spark as the example slug but explicitly mocks +``provider_model_ids`` to omit it, exercising the soft-accept path generically +for any future entitlement-gated Codex slug that ships before Hermes catalogs +it. """ from unittest.mock import patch