diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 8cfec23fe1f..e31fcdea48d 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -261,7 +261,13 @@ DEFAULT_CONTEXT_LENGTHS = { # https://platform.minimax.io/docs/api-reference/text-chat-openai "minimax-m3": 1000000, "minimax": 204800, - # GLM + # GLM — GLM-5.2 ships with a 1M context window (verified empirically: + # needle-in-a-haystack retrieval at 789K prompt tokens succeeded with + # zero errors on api.z.ai/api/coding/paas/v4). Older GLM models + # (5, 5.1, 5-turbo) are ~202K. Longest-key-first substring matching + # ensures "glm-5.2" resolves to 1M while older variants still hit the + # generic 202K fallback. + "glm-5.2": 1_048_576, "glm": 202752, # xAI Grok — xAI /v1/models does not return context_length metadata, # so these hardcoded fallbacks prevent Hermes from probing-down to diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 38bcab92907..0d5887ec9dd 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -616,8 +616,8 @@ ZAI_ENDPOINTS = [ # (id, base_url, probe_models, label) ("global", "https://api.z.ai/api/paas/v4", ["glm-5"], "Global"), ("cn", "https://open.bigmodel.cn/api/paas/v4", ["glm-5"], "China"), - ("coding-global", "https://api.z.ai/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"), - ("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"), + ("coding-global", "https://api.z.ai/api/coding/paas/v4", ["glm-5.2", "glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"), + ("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.2", "glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"), ] diff --git a/hermes_cli/models.py b/hermes_cli/models.py index afab5bac32d..ffdc370cb33 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -257,6 +257,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "gemini-3.5-flash", ], "zai": [ + "glm-5.2", "glm-5.1", "glm-5", "glm-5v-turbo", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 75bde2a93c4..b809af6ecf7 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -93,7 +93,7 @@ _DEFAULT_PROVIDER_MODELS = { "gemini-3.1-pro-preview", "gemini-3-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", ], - "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"], + "zai": ["glm-5.2", "glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"], "kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], "kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], "stepfun": ["step-3.5-flash", "step-3.5-flash-2603"], diff --git a/plugins/model-providers/zai/__init__.py b/plugins/model-providers/zai/__init__.py index 70aa8704d14..9fcdb2bec7d 100644 --- a/plugins/model-providers/zai/__init__.py +++ b/plugins/model-providers/zai/__init__.py @@ -11,6 +11,7 @@ zai = ProviderProfile( description="Z.AI / GLM — Zhipu AI models", signup_url="https://z.ai/", fallback_models=( + "glm-5.2", "glm-5", "glm-4-9b", ), diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 35651a00b66..b6c926f5a08 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -220,6 +220,31 @@ class TestDefaultContextLengths: f"{model_id}: expected {expected_ctx}, got {actual}" ) + def test_glm_52_context_1m(self): + """GLM-5.2 must resolve to 1M, not the generic GLM fallback of 202K. + + Context window was verified empirically via needle-in-a-haystack + retrieval at 789K prompt tokens on api.z.ai/api/coding/paas/v4 + (2026-06-13). + """ + from agent.model_metadata import get_model_context_length + from unittest.mock import patch as mock_patch + + assert DEFAULT_CONTEXT_LENGTHS["glm-5.2"] == 1_048_576 + assert DEFAULT_CONTEXT_LENGTHS["glm"] == 202752 + + with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \ + mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \ + mock_patch("agent.model_metadata.get_cached_context_length", return_value=None): + # GLM-5.2 (1M) must NOT fall through to the generic 202K entry + assert get_model_context_length("glm-5.2") == 1_048_576 + # Vendor-prefixed forms (zai provider, zhipu alias) + assert get_model_context_length("zai/glm-5.2") == 1_048_576 + assert get_model_context_length("zhipu/glm-5.2") == 1_048_576 + # Older GLM variants still resolve to the generic 202K fallback + assert get_model_context_length("glm-5") == 202752 + assert get_model_context_length("glm-5.1") == 202752 + def test_openrouter_live_metadata_beats_hardcoded_catchall(self): """OpenRouter-routed slugs resolve via the live OR catalog before the hardcoded family catch-all.