feat(zai): add GLM-5.2 with verified 1M context window

GLM-5.2 ships with a 1M (1,048,576) token context window. Without this
entry, Hermes falls through to the generic 'glm' key (202,752 tokens),
under-reporting the context bar and prematurely compressing conversations.

The 1M limit was verified empirically via needle-in-a-haystack retrieval
at 789,240 prompt tokens on api.z.ai/api/coding/paas/v4 — zero errors,
zero truncation, correct retrieval at every tested size (25K through 789K).

Changes:
- agent/model_metadata.py: add 'glm-5.2': 1_048_576 before 'glm' fallback
- hermes_cli/models.py: add glm-5.2 to zai curated models
- hermes_cli/setup.py: add glm-5.2 to setup wizard zai list
- hermes_cli/auth.py: add glm-5.2 to coding plan endpoint probes
- plugins/model-providers/zai/__init__.py: add glm-5.2 to fallback_models
- tests/agent/test_model_metadata.py: context resolution + vendor-prefix tests
This commit is contained in:
mr-r0b0t 2026-06-13 10:08:29 -05:00 committed by Teknium
parent 4e6d05c6a5
commit bff78a34dc
6 changed files with 37 additions and 4 deletions

View file

@ -261,7 +261,13 @@ DEFAULT_CONTEXT_LENGTHS = {
# https://platform.minimax.io/docs/api-reference/text-chat-openai
"minimax-m3": 1000000,
"minimax": 204800,
# GLM
# GLM — GLM-5.2 ships with a 1M context window (verified empirically:
# needle-in-a-haystack retrieval at 789K prompt tokens succeeded with
# zero errors on api.z.ai/api/coding/paas/v4). Older GLM models
# (5, 5.1, 5-turbo) are ~202K. Longest-key-first substring matching
# ensures "glm-5.2" resolves to 1M while older variants still hit the
# generic 202K fallback.
"glm-5.2": 1_048_576,
"glm": 202752,
# xAI Grok — xAI /v1/models does not return context_length metadata,
# so these hardcoded fallbacks prevent Hermes from probing-down to

View file

@ -616,8 +616,8 @@ ZAI_ENDPOINTS = [
# (id, base_url, probe_models, label)
("global", "https://api.z.ai/api/paas/v4", ["glm-5"], "Global"),
("cn", "https://open.bigmodel.cn/api/paas/v4", ["glm-5"], "China"),
("coding-global", "https://api.z.ai/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"),
("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"),
("coding-global", "https://api.z.ai/api/coding/paas/v4", ["glm-5.2", "glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"),
("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.2", "glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"),
]

View file

@ -257,6 +257,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"gemini-3.5-flash",
],
"zai": [
"glm-5.2",
"glm-5.1",
"glm-5",
"glm-5v-turbo",

View file

@ -93,7 +93,7 @@ _DEFAULT_PROVIDER_MODELS = {
"gemini-3.1-pro-preview", "gemini-3-pro-preview",
"gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
],
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"zai": ["glm-5.2", "glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"stepfun": ["step-3.5-flash", "step-3.5-flash-2603"],

View file

@ -11,6 +11,7 @@ zai = ProviderProfile(
description="Z.AI / GLM — Zhipu AI models",
signup_url="https://z.ai/",
fallback_models=(
"glm-5.2",
"glm-5",
"glm-4-9b",
),

View file

@ -220,6 +220,31 @@ class TestDefaultContextLengths:
f"{model_id}: expected {expected_ctx}, got {actual}"
)
def test_glm_52_context_1m(self):
"""GLM-5.2 must resolve to 1M, not the generic GLM fallback of 202K.
Context window was verified empirically via needle-in-a-haystack
retrieval at 789K prompt tokens on api.z.ai/api/coding/paas/v4
(2026-06-13).
"""
from agent.model_metadata import get_model_context_length
from unittest.mock import patch as mock_patch
assert DEFAULT_CONTEXT_LENGTHS["glm-5.2"] == 1_048_576
assert DEFAULT_CONTEXT_LENGTHS["glm"] == 202752
with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
# GLM-5.2 (1M) must NOT fall through to the generic 202K entry
assert get_model_context_length("glm-5.2") == 1_048_576
# Vendor-prefixed forms (zai provider, zhipu alias)
assert get_model_context_length("zai/glm-5.2") == 1_048_576
assert get_model_context_length("zhipu/glm-5.2") == 1_048_576
# Older GLM variants still resolve to the generic 202K fallback
assert get_model_context_length("glm-5") == 202752
assert get_model_context_length("glm-5.1") == 202752
def test_openrouter_live_metadata_beats_hardcoded_catchall(self):
"""OpenRouter-routed slugs resolve via the live OR catalog before the
hardcoded family catch-all.