fix(model_metadata): add xAI Grok context length fallbacks

xAI /v1/models does not return context_length metadata, so Hermes probes down to the 128k default whenever a user configures a custom provider pointing at https://api.x.ai/v1. This forces every xAI user to manually override model.context_length in config.yaml (2M for Grok 4.20 / 4.1-fast / 4-fast) or lose most of the usable context window. Add DEFAULT_CONTEXT_LENGTHS entries for the Grok family so the fallback lookup returns the correct value via substring matching. Values sourced from models.dev (2026-04) and cross-checked against the xAI /v1/models listing: - grok-4.20-* 2,000,000 (reasoning, non-reasoning, multi-agent) - grok-4-1-fast-* 2,000,000 - grok-4-fast-* 2,000,000 - grok-4 / grok-4-0709 256,000 - grok-code-fast-1 256,000 - grok-3* 131,072 - grok-2 / latest 131,072 - grok-2-vision* 8,192 - grok (catch-all) 131,072 Keys are ordered longest-first so that specific variants match before the catch-all, consistent with the existing Claude/Gemma/MiniMax entries. Add TestDefaultContextLengths.test_grok_models_context_lengths and test_grok_substring_matching to pin the values and verify the full lookup path. All 77 tests in test_model_metadata.py pass.
2026-07-24 16:54:43 +00:00 · 2026-04-10 12:08:16 +04:00 · 2026-04-10 12:08:16 +04:00 · b577697189
commit b577697189
parent 5b22e61cfa
2 changed files with 70 additions and 0 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -126,6 +126,21 @@ DEFAULT_CONTEXT_LENGTHS = {
    "minimax": 1048576,
    # GLM
    "glm": 202752,
+    # xAI Grok — xAI /v1/models does not return context_length metadata,
+    # so these hardcoded fallbacks prevent Hermes from probing-down to
+    # the default 128k when the user points at https://api.x.ai/v1
+    # via a custom provider. Values sourced from models.dev (2026-04).
+    # Keys use substring matching (longest-first), so e.g. "grok-4.20"
+    # matches "grok-4.20-0309-reasoning" / "-non-reasoning" / "-multi-agent-0309".
+    "grok-code-fast": 256000,   # grok-code-fast-1
+    "grok-4-1-fast": 2000000,   # grok-4-1-fast-(non-)reasoning
+    "grok-2-vision": 8192,      # grok-2-vision, -1212, -latest
+    "grok-4-fast": 2000000,     # grok-4-fast-(non-)reasoning
+    "grok-4.20": 2000000,       # grok-4.20-0309-(non-)reasoning, -multi-agent-0309
+    "grok-4": 256000,           # grok-4, grok-4-0709
+    "grok-3": 131072,           # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast
+    "grok-2": 131072,           # grok-2, grok-2-1212, grok-2-latest
+    "grok": 131072,             # catch-all (grok-beta, unknown grok-*)
    # Kimi
    "kimi": 262144,
    # Arcee
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@ -132,6 +132,61 @@ class TestDefaultContextLengths:
            if "gemini" in key:
                assert value == 1048576, f"{key} should be 1048576"

+    def test_grok_models_context_lengths(self):
+        # xAI /v1/models does not return context_length metadata, so
+        # DEFAULT_CONTEXT_LENGTHS must cover the Grok family explicitly.
+        # Values sourced from models.dev (2026-04).
+        expected = {
+            "grok-4.20": 2000000,
+            "grok-4-1-fast": 2000000,
+            "grok-4-fast": 2000000,
+            "grok-4": 256000,
+            "grok-code-fast": 256000,
+            "grok-3": 131072,
+            "grok-2": 131072,
+            "grok-2-vision": 8192,
+            "grok": 131072,
+        }
+        for key, value in expected.items():
+            assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing from DEFAULT_CONTEXT_LENGTHS"
+            assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
+                f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
+            )
+
+    def test_grok_substring_matching(self):
+        # Longest-first substring matching must resolve the real xAI model
+        # IDs to the correct fallback entries without 128k probe-down.
+        from agent.model_metadata import get_model_context_length
+        from unittest.mock import patch as mock_patch
+
+        # Fake the provider/API/cache layers so the lookup falls through
+        # to DEFAULT_CONTEXT_LENGTHS.
+        with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}),              mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}),              mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
+            cases = [
+                ("grok-4.20-0309-reasoning", 2000000),
+                ("grok-4.20-0309-non-reasoning", 2000000),
+                ("grok-4.20-multi-agent-0309", 2000000),
+                ("grok-4-1-fast-reasoning", 2000000),
+                ("grok-4-1-fast-non-reasoning", 2000000),
+                ("grok-4-fast-reasoning", 2000000),
+                ("grok-4-fast-non-reasoning", 2000000),
+                ("grok-4", 256000),
+                ("grok-4-0709", 256000),
+                ("grok-code-fast-1", 256000),
+                ("grok-3", 131072),
+                ("grok-3-mini", 131072),
+                ("grok-3-mini-fast", 131072),
+                ("grok-2", 131072),
+                ("grok-2-vision", 8192),
+                ("grok-2-vision-1212", 8192),
+                ("grok-beta", 131072),
+            ]
+            for model_id, expected_ctx in cases:
+                actual = get_model_context_length(model_id)
+                assert actual == expected_ctx, (
+                    f"{model_id}: expected {expected_ctx}, got {actual}"
+                )
+
    def test_all_values_positive(self):
        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
            assert value > 0, f"{key} has non-positive context length"