From 44cdf555a83c1d8d605d095442e11efd58089533 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sun, 10 May 2026 11:44:11 +0530
Subject: [PATCH] fix(codex-spark): defensive 128k entry in
 DEFAULT_CONTEXT_LENGTHS + clarify validation test docstring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two follow-ups from self-review:

1. Add gpt-5.3-codex-spark to DEFAULT_CONTEXT_LENGTHS at 128k. The
   primary resolution path for Spark goes through provider='openai-codex'
   → _CODEX_OAUTH_CONTEXT_FALLBACK (already correct). But if any future
   code path resolves Spark's context with a different provider (custom
   proxy, generic fallthrough), the longest-substring-first lookup in
   step 8 would match 'gpt-5' and report 400k, which is wrong by ~3x.
   Adding the explicit override is a cheap defensive correctness fix
   matching how gpt-5.4-mini and gpt-5.4-nano already shadow the generic
   gpt-5 entry.

2. Update test_openai_codex_model_validation_fallback.py docstring. The
   bug it was originally written for (gpt-5.3-codex-spark missing from
   listing) is now resolved by this PR's catalog restoration. The test
   still validly exercises the soft-accept code path for any future
   entitlement-gated Codex slug that ships before Hermes catalogs it,
   but the framing was stale — clarified.
---
 agent/model_metadata.py                           |  7 +++++++
 ...test_openai_codex_model_validation_fallback.py | 15 ++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 653a90619a5..956d6b93095 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -157,6 +157,13 @@ DEFAULT_CONTEXT_LENGTHS = {
     "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
     "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
     "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
+    # gpt-5.3-codex-spark is Codex-OAuth-only (ChatGPT Pro entitlement) and
+    # uses a smaller 128k window than other gpt-5.x slugs. Listed here as
+    # a defensive override so the longest-substring fallback doesn't match
+    # the generic "gpt-5" entry below (400k) and report the wrong limit if
+    # Spark's context ever needs to be resolved through this path. Real
+    # usage flows through _CODEX_OAUTH_CONTEXT_FALLBACK at line ~1113.
+    "gpt-5.3-codex-spark": 128000,
     "gpt-5.1-chat": 128000,           # Chat variant has 128k context
     "gpt-5": 400000,                  # GPT-5.x base, mini, codex variants (400k)
     "gpt-4.1": 1047576,
diff --git a/tests/hermes_cli/test_openai_codex_model_validation_fallback.py b/tests/hermes_cli/test_openai_codex_model_validation_fallback.py
index e33dbe2ba44..2b742b058ef 100644
--- a/tests/hermes_cli/test_openai_codex_model_validation_fallback.py
+++ b/tests/hermes_cli/test_openai_codex_model_validation_fallback.py
@@ -1,9 +1,18 @@
 """Regression tests for OpenAI Codex model validation when the listing lags behind
 actually usable backend model IDs.
 
-The bug: `/model` and `switch_model()` reject `gpt-5.3-codex-spark` because the
-OpenAI Codex listing omits it, even though direct runtime calls with
-`--provider openai-codex -m gpt-5.3-codex-spark` succeed.
+The bug originally reported in #16172: `/model` and `switch_model()` rejected
+`gpt-5.3-codex-spark` because the curated listing omitted it, even though direct
+runtime calls succeeded. PR #19729 fixed this by soft-accepting unknown-but-
+plausible Codex slugs with a warning, and this test pins the soft-accept
+behavior so it doesn't regress.
+
+Note: gpt-5.3-codex-spark itself is now in the curated catalog (PR #22991),
+so the real-world Spark request takes the `recognized=True` fast path. This
+test still uses Spark as the example slug but explicitly mocks
+``provider_model_ids`` to omit it, exercising the soft-accept path generically
+for any future entitlement-gated Codex slug that ships before Hermes catalogs
+it.
 """
 
 from unittest.mock import patch