diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 791f778c226..0fdf1a52451 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -126,6 +126,21 @@ DEFAULT_CONTEXT_LENGTHS = { "minimax": 1048576, # GLM "glm": 202752, + # xAI Grok — xAI /v1/models does not return context_length metadata, + # so these hardcoded fallbacks prevent Hermes from probing-down to + # the default 128k when the user points at https://api.x.ai/v1 + # via a custom provider. Values sourced from models.dev (2026-04). + # Keys use substring matching (longest-first), so e.g. "grok-4.20" + # matches "grok-4.20-0309-reasoning" / "-non-reasoning" / "-multi-agent-0309". + "grok-code-fast": 256000, # grok-code-fast-1 + "grok-4-1-fast": 2000000, # grok-4-1-fast-(non-)reasoning + "grok-2-vision": 8192, # grok-2-vision, -1212, -latest + "grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning + "grok-4.20": 2000000, # grok-4.20-0309-(non-)reasoning, -multi-agent-0309 + "grok-4": 256000, # grok-4, grok-4-0709 + "grok-3": 131072, # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast + "grok-2": 131072, # grok-2, grok-2-1212, grok-2-latest + "grok": 131072, # catch-all (grok-beta, unknown grok-*) # Kimi "kimi": 262144, # Arcee diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 51a4c887393..b95c72e13e1 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -132,6 +132,61 @@ class TestDefaultContextLengths: if "gemini" in key: assert value == 1048576, f"{key} should be 1048576" + def test_grok_models_context_lengths(self): + # xAI /v1/models does not return context_length metadata, so + # DEFAULT_CONTEXT_LENGTHS must cover the Grok family explicitly. + # Values sourced from models.dev (2026-04). + expected = { + "grok-4.20": 2000000, + "grok-4-1-fast": 2000000, + "grok-4-fast": 2000000, + "grok-4": 256000, + "grok-code-fast": 256000, + "grok-3": 131072, + "grok-2": 131072, + "grok-2-vision": 8192, + "grok": 131072, + } + for key, value in expected.items(): + assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing from DEFAULT_CONTEXT_LENGTHS" + assert DEFAULT_CONTEXT_LENGTHS[key] == value, ( + f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}" + ) + + def test_grok_substring_matching(self): + # Longest-first substring matching must resolve the real xAI model + # IDs to the correct fallback entries without 128k probe-down. + from agent.model_metadata import get_model_context_length + from unittest.mock import patch as mock_patch + + # Fake the provider/API/cache layers so the lookup falls through + # to DEFAULT_CONTEXT_LENGTHS. + with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), mock_patch("agent.model_metadata.get_cached_context_length", return_value=None): + cases = [ + ("grok-4.20-0309-reasoning", 2000000), + ("grok-4.20-0309-non-reasoning", 2000000), + ("grok-4.20-multi-agent-0309", 2000000), + ("grok-4-1-fast-reasoning", 2000000), + ("grok-4-1-fast-non-reasoning", 2000000), + ("grok-4-fast-reasoning", 2000000), + ("grok-4-fast-non-reasoning", 2000000), + ("grok-4", 256000), + ("grok-4-0709", 256000), + ("grok-code-fast-1", 256000), + ("grok-3", 131072), + ("grok-3-mini", 131072), + ("grok-3-mini-fast", 131072), + ("grok-2", 131072), + ("grok-2-vision", 8192), + ("grok-2-vision-1212", 8192), + ("grok-beta", 131072), + ] + for model_id, expected_ctx in cases: + actual = get_model_context_length(model_id) + assert actual == expected_ctx, ( + f"{model_id}: expected {expected_ctx}, got {actual}" + ) + def test_all_values_positive(self): for key, value in DEFAULT_CONTEXT_LENGTHS.items(): assert value > 0, f"{key} has non-positive context length"