diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 831f26937d4..724457fd899 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -1140,6 +1140,18 @@ def _model_name_suggests_minimax_m3(model: str) -> bool: return "minimax-m3" in model.lower() +def _model_name_suggests_grok_4_3(model: str) -> bool: + """Return True if the model name looks like a Grok 4.3 variant. + + Catches ``grok-4.3``, ``grok-4.3-latest``, and similar slugs. + Used as a guard against stale cache entries seeded by pre-catalog builds + that resolved grok-4.3 via the generic ``grok-4`` catch-all (256,000) + before the ``grok-4.3`` (1M) entry was added to DEFAULT_CONTEXT_LENGTHS + on 2026-05-15. + """ + return "grok-4.3" in model.lower() + + def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]: """Query a local server for the model's context length.""" import httpx @@ -1564,6 +1576,19 @@ def get_model_context_length( model, base_url, f"{cached:,}", ) _invalidate_cached_context_length(model, base_url) + # Invalidate stale ≤256,000 cache entries for Grok-4.3. The + # ``grok-4.3`` (1M) entry was added to DEFAULT_CONTEXT_LENGTHS on + # 2026-05-15; prior to that, grok-4.3 slugs resolved via the + # ``grok-4`` catch-all (256,000) and that value was persisted. + # grok-4.3 is 1M, so any sub-262K cached value is a pre-catalog + # leftover — drop it and fall through to the hardcoded default. + elif cached <= 256_000 and _model_name_suggests_grok_4_3(model): + logger.info( + "Dropping stale Grok-4.3 cache entry %s@%s -> %s (pre-catalog value); " + "re-resolving via hardcoded defaults", + model, base_url, f"{cached:,}", + ) + _invalidate_cached_context_length(model, base_url) # Nous Portal: the portal /v1/models endpoint is authoritative. # Bypass the persistent cache so step 5b can always reconcile # against it — this corrects pre-fix entries seeded from the diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 5b1abfd32d0..0eab4dcff05 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -1247,3 +1247,59 @@ class TestContextLengthCache: with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file): save_context_length(model, url, 200000) assert get_cached_context_length(model, url) == 200000 + + +class TestGrok43StaleCacheGuard: + """Pre-catalog builds resolved grok-4.3 via the generic 'grok-4' catch-all + (256,000) and persisted it before the 'grok-4.3' (1M) catalog entry was + added on 2026-05-15. The step-1 cache guard must drop that stale value + and re-resolve to 1M, while leaving correct grok-4 entries (256,000) + untouched. + """ + + def test_suggests_grok_4_3(self): + from agent.model_metadata import _model_name_suggests_grok_4_3 + assert _model_name_suggests_grok_4_3("grok-4.3") + assert _model_name_suggests_grok_4_3("grok-4.3-latest") + assert _model_name_suggests_grok_4_3("xai/grok-4.3") + assert not _model_name_suggests_grok_4_3("grok-4") + assert not _model_name_suggests_grok_4_3("grok-4-fast") + assert not _model_name_suggests_grok_4_3("grok-4.20") + + def test_stale_grok_4_3_dropped_and_reresolves_to_1m(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import agent.model_metadata as mm + importlib.reload(mm) + base = "https://api.x.ai/v1" + mm.save_context_length("grok-4.3", base, 256_000) + ctx = mm.get_model_context_length( + "grok-4.3", base_url=base, api_key="", provider="xai" + ) + assert ctx == 1_000_000 + + def test_correct_grok_4_3_cache_preserved(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import agent.model_metadata as mm + importlib.reload(mm) + base = "https://api.x.ai/v1" + mm.save_context_length("grok-4.3", base, 1_000_000) + ctx = mm.get_model_context_length( + "grok-4.3", base_url=base, api_key="", provider="xai" + ) + assert ctx == 1_000_000 + + def test_grok_4_not_clobbered(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import agent.model_metadata as mm + importlib.reload(mm) + base = "https://api.x.ai/v1" + # 256,000 is the CORRECT value for plain grok-4 — guard must not touch it. + for slug in ("grok-4", "grok-4-0709"): + mm.save_context_length(slug, base, 256_000) + ctx = mm.get_model_context_length( + slug, base_url=base, api_key="", provider="xai" + ) + assert ctx == 256_000, f"{slug} should stay 256000, got {ctx}"