From 9756dff5fd83912b0cdd389f7ee1bb0a4c50f16c Mon Sep 17 00:00:00 2001 From: AhmetArif0 <147827411+AhmetArif0@users.noreply.github.com> Date: Tue, 2 Jun 2026 02:43:38 +0300 Subject: [PATCH] =?UTF-8?q?fix(model=5Fmetadata):=20drop=20stale=20?= =?UTF-8?q?=E2=89=A4256,000=20cache=20entries=20for=20Grok-4.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ``grok-4.3`` (1M context) catalog entry was added on 2026-05-15 (ce0e189d3). Between 2026-04-10 (when ``grok-4`` at 256,000 was first added by b57769718) and 2026-05-15, grok-4.3 slugs resolved via the generic ``grok-4`` substring catch-all and that 256,000 value was persisted to context_length_cache.yaml. Users who first queried grok-4.3 in that 35-day window are stuck at 256K forever — the cache is read at step 1 before the hardcoded defaults in step 8, so the correct 1M entry is never reached. Mirror the existing Kimi/Codex/MiniMax-M3 stale-cache guards: add _model_name_suggests_grok_4_3() and an elif branch that drops any cached value ≤ 256,000 for a grok-4.3 slug so the next lookup falls through to the 1M hardcoded default. Adds 4 regression tests: helper unit test, stale-drop-and-re-resolve, correct-cache-preserved, and no-clobber for plain grok-4 (256K correct). --- agent/model_metadata.py | 25 +++++++++++++ tests/agent/test_model_metadata.py | 56 ++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 831f26937d4..724457fd899 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -1140,6 +1140,18 @@ def _model_name_suggests_minimax_m3(model: str) -> bool: return "minimax-m3" in model.lower() +def _model_name_suggests_grok_4_3(model: str) -> bool: + """Return True if the model name looks like a Grok 4.3 variant. + + Catches ``grok-4.3``, ``grok-4.3-latest``, and similar slugs. + Used as a guard against stale cache entries seeded by pre-catalog builds + that resolved grok-4.3 via the generic ``grok-4`` catch-all (256,000) + before the ``grok-4.3`` (1M) entry was added to DEFAULT_CONTEXT_LENGTHS + on 2026-05-15. + """ + return "grok-4.3" in model.lower() + + def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]: """Query a local server for the model's context length.""" import httpx @@ -1564,6 +1576,19 @@ def get_model_context_length( model, base_url, f"{cached:,}", ) _invalidate_cached_context_length(model, base_url) + # Invalidate stale ≤256,000 cache entries for Grok-4.3. The + # ``grok-4.3`` (1M) entry was added to DEFAULT_CONTEXT_LENGTHS on + # 2026-05-15; prior to that, grok-4.3 slugs resolved via the + # ``grok-4`` catch-all (256,000) and that value was persisted. + # grok-4.3 is 1M, so any sub-262K cached value is a pre-catalog + # leftover — drop it and fall through to the hardcoded default. + elif cached <= 256_000 and _model_name_suggests_grok_4_3(model): + logger.info( + "Dropping stale Grok-4.3 cache entry %s@%s -> %s (pre-catalog value); " + "re-resolving via hardcoded defaults", + model, base_url, f"{cached:,}", + ) + _invalidate_cached_context_length(model, base_url) # Nous Portal: the portal /v1/models endpoint is authoritative. # Bypass the persistent cache so step 5b can always reconcile # against it — this corrects pre-fix entries seeded from the diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 5b1abfd32d0..0eab4dcff05 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -1247,3 +1247,59 @@ class TestContextLengthCache: with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): save_context_length(model, url, 200000) assert get_cached_context_length(model, url) == 200000 + + +class TestGrok43StaleCacheGuard: + """Pre-catalog builds resolved grok-4.3 via the generic 'grok-4' catch-all + (256,000) and persisted it before the 'grok-4.3' (1M) catalog entry was + added on 2026-05-15. The step-1 cache guard must drop that stale value + and re-resolve to 1M, while leaving correct grok-4 entries (256,000) + untouched. + """ + + def test_suggests_grok_4_3(self): + from agent.model_metadata import _model_name_suggests_grok_4_3 + assert _model_name_suggests_grok_4_3("grok-4.3") + assert _model_name_suggests_grok_4_3("grok-4.3-latest") + assert _model_name_suggests_grok_4_3("xai/grok-4.3") + assert not _model_name_suggests_grok_4_3("grok-4") + assert not _model_name_suggests_grok_4_3("grok-4-fast") + assert not _model_name_suggests_grok_4_3("grok-4.20") + + def test_stale_grok_4_3_dropped_and_reresolves_to_1m(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import agent.model_metadata as mm + importlib.reload(mm) + base = "https://api.x.ai/v1" + mm.save_context_length("grok-4.3", base, 256_000) + ctx = mm.get_model_context_length( + "grok-4.3", base_url=base, api_key="", provider="xai" + ) + assert ctx == 1_000_000 + + def test_correct_grok_4_3_cache_preserved(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import agent.model_metadata as mm + importlib.reload(mm) + base = "https://api.x.ai/v1" + mm.save_context_length("grok-4.3", base, 1_000_000) + ctx = mm.get_model_context_length( + "grok-4.3", base_url=base, api_key="", provider="xai" + ) + assert ctx == 1_000_000 + + def test_grok_4_not_clobbered(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import agent.model_metadata as mm + importlib.reload(mm) + base = "https://api.x.ai/v1" + # 256,000 is the CORRECT value for plain grok-4 — guard must not touch it. + for slug in ("grok-4", "grok-4-0709"): + mm.save_context_length(slug, base, 256_000) + ctx = mm.get_model_context_length( + slug, base_url=base, api_key="", provider="xai" + ) + assert ctx == 256_000, f"{slug} should stay 256000, got {ctx}"