mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(model_metadata): drop stale ≤256,000 cache entries for Grok-4.3
The ``grok-4.3`` (1M context) catalog entry was added on 2026-05-15 (ce0e189d3). Between 2026-04-10 (when ``grok-4`` at 256,000 was first added byb57769718) and 2026-05-15, grok-4.3 slugs resolved via the generic ``grok-4`` substring catch-all and that 256,000 value was persisted to context_length_cache.yaml. Users who first queried grok-4.3 in that 35-day window are stuck at 256K forever — the cache is read at step 1 before the hardcoded defaults in step 8, so the correct 1M entry is never reached. Mirror the existing Kimi/Codex/MiniMax-M3 stale-cache guards: add _model_name_suggests_grok_4_3() and an elif branch that drops any cached value ≤ 256,000 for a grok-4.3 slug so the next lookup falls through to the 1M hardcoded default. Adds 4 regression tests: helper unit test, stale-drop-and-re-resolve, correct-cache-preserved, and no-clobber for plain grok-4 (256K correct).
This commit is contained in:
parent
b04c6e95f6
commit
9756dff5fd
2 changed files with 81 additions and 0 deletions
|
|
@ -1140,6 +1140,18 @@ def _model_name_suggests_minimax_m3(model: str) -> bool:
|
|||
return "minimax-m3" in model.lower()
|
||||
|
||||
|
||||
def _model_name_suggests_grok_4_3(model: str) -> bool:
|
||||
"""Return True if the model name looks like a Grok 4.3 variant.
|
||||
|
||||
Catches ``grok-4.3``, ``grok-4.3-latest``, and similar slugs.
|
||||
Used as a guard against stale cache entries seeded by pre-catalog builds
|
||||
that resolved grok-4.3 via the generic ``grok-4`` catch-all (256,000)
|
||||
before the ``grok-4.3`` (1M) entry was added to DEFAULT_CONTEXT_LENGTHS
|
||||
on 2026-05-15.
|
||||
"""
|
||||
return "grok-4.3" in model.lower()
|
||||
|
||||
|
||||
def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
|
||||
"""Query a local server for the model's context length."""
|
||||
import httpx
|
||||
|
|
@ -1564,6 +1576,19 @@ def get_model_context_length(
|
|||
model, base_url, f"{cached:,}",
|
||||
)
|
||||
_invalidate_cached_context_length(model, base_url)
|
||||
# Invalidate stale ≤256,000 cache entries for Grok-4.3. The
|
||||
# ``grok-4.3`` (1M) entry was added to DEFAULT_CONTEXT_LENGTHS on
|
||||
# 2026-05-15; prior to that, grok-4.3 slugs resolved via the
|
||||
# ``grok-4`` catch-all (256,000) and that value was persisted.
|
||||
# grok-4.3 is 1M, so any sub-262K cached value is a pre-catalog
|
||||
# leftover — drop it and fall through to the hardcoded default.
|
||||
elif cached <= 256_000 and _model_name_suggests_grok_4_3(model):
|
||||
logger.info(
|
||||
"Dropping stale Grok-4.3 cache entry %s@%s -> %s (pre-catalog value); "
|
||||
"re-resolving via hardcoded defaults",
|
||||
model, base_url, f"{cached:,}",
|
||||
)
|
||||
_invalidate_cached_context_length(model, base_url)
|
||||
# Nous Portal: the portal /v1/models endpoint is authoritative.
|
||||
# Bypass the persistent cache so step 5b can always reconcile
|
||||
# against it — this corrects pre-fix entries seeded from the
|
||||
|
|
|
|||
|
|
@ -1247,3 +1247,59 @@ class TestContextLengthCache:
|
|||
with patch("agent.model_metadata._get_context_cache_path", return_value=cache_file):
|
||||
save_context_length(model, url, 200000)
|
||||
assert get_cached_context_length(model, url) == 200000
|
||||
|
||||
|
||||
class TestGrok43StaleCacheGuard:
|
||||
"""Pre-catalog builds resolved grok-4.3 via the generic 'grok-4' catch-all
|
||||
(256,000) and persisted it before the 'grok-4.3' (1M) catalog entry was
|
||||
added on 2026-05-15. The step-1 cache guard must drop that stale value
|
||||
and re-resolve to 1M, while leaving correct grok-4 entries (256,000)
|
||||
untouched.
|
||||
"""
|
||||
|
||||
def test_suggests_grok_4_3(self):
|
||||
from agent.model_metadata import _model_name_suggests_grok_4_3
|
||||
assert _model_name_suggests_grok_4_3("grok-4.3")
|
||||
assert _model_name_suggests_grok_4_3("grok-4.3-latest")
|
||||
assert _model_name_suggests_grok_4_3("xai/grok-4.3")
|
||||
assert not _model_name_suggests_grok_4_3("grok-4")
|
||||
assert not _model_name_suggests_grok_4_3("grok-4-fast")
|
||||
assert not _model_name_suggests_grok_4_3("grok-4.20")
|
||||
|
||||
def test_stale_grok_4_3_dropped_and_reresolves_to_1m(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
import importlib
|
||||
import agent.model_metadata as mm
|
||||
importlib.reload(mm)
|
||||
base = "https://api.x.ai/v1"
|
||||
mm.save_context_length("grok-4.3", base, 256_000)
|
||||
ctx = mm.get_model_context_length(
|
||||
"grok-4.3", base_url=base, api_key="", provider="xai"
|
||||
)
|
||||
assert ctx == 1_000_000
|
||||
|
||||
def test_correct_grok_4_3_cache_preserved(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
import importlib
|
||||
import agent.model_metadata as mm
|
||||
importlib.reload(mm)
|
||||
base = "https://api.x.ai/v1"
|
||||
mm.save_context_length("grok-4.3", base, 1_000_000)
|
||||
ctx = mm.get_model_context_length(
|
||||
"grok-4.3", base_url=base, api_key="", provider="xai"
|
||||
)
|
||||
assert ctx == 1_000_000
|
||||
|
||||
def test_grok_4_not_clobbered(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
import importlib
|
||||
import agent.model_metadata as mm
|
||||
importlib.reload(mm)
|
||||
base = "https://api.x.ai/v1"
|
||||
# 256,000 is the CORRECT value for plain grok-4 — guard must not touch it.
|
||||
for slug in ("grok-4", "grok-4-0709"):
|
||||
mm.save_context_length(slug, base, 256_000)
|
||||
ctx = mm.get_model_context_length(
|
||||
slug, base_url=base, api_key="", provider="xai"
|
||||
)
|
||||
assert ctx == 256_000, f"{slug} should stay 256000, got {ctx}"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue