mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 09:21:36 +00:00
feat(zai): add GLM-5.2 with verified 1M context window
GLM-5.2 ships with a 1M (1,048,576) token context window. Without this entry, Hermes falls through to the generic 'glm' key (202,752 tokens), under-reporting the context bar and prematurely compressing conversations. The 1M limit was verified empirically via needle-in-a-haystack retrieval at 789,240 prompt tokens on api.z.ai/api/coding/paas/v4 — zero errors, zero truncation, correct retrieval at every tested size (25K through 789K). Changes: - agent/model_metadata.py: add 'glm-5.2': 1_048_576 before 'glm' fallback - hermes_cli/models.py: add glm-5.2 to zai curated models - hermes_cli/setup.py: add glm-5.2 to setup wizard zai list - hermes_cli/auth.py: add glm-5.2 to coding plan endpoint probes - plugins/model-providers/zai/__init__.py: add glm-5.2 to fallback_models - tests/agent/test_model_metadata.py: context resolution + vendor-prefix tests
This commit is contained in:
parent
4e6d05c6a5
commit
bff78a34dc
6 changed files with 37 additions and 4 deletions
|
|
@ -261,7 +261,13 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
# https://platform.minimax.io/docs/api-reference/text-chat-openai
|
||||
"minimax-m3": 1000000,
|
||||
"minimax": 204800,
|
||||
# GLM
|
||||
# GLM — GLM-5.2 ships with a 1M context window (verified empirically:
|
||||
# needle-in-a-haystack retrieval at 789K prompt tokens succeeded with
|
||||
# zero errors on api.z.ai/api/coding/paas/v4). Older GLM models
|
||||
# (5, 5.1, 5-turbo) are ~202K. Longest-key-first substring matching
|
||||
# ensures "glm-5.2" resolves to 1M while older variants still hit the
|
||||
# generic 202K fallback.
|
||||
"glm-5.2": 1_048_576,
|
||||
"glm": 202752,
|
||||
# xAI Grok — xAI /v1/models does not return context_length metadata,
|
||||
# so these hardcoded fallbacks prevent Hermes from probing-down to
|
||||
|
|
|
|||
|
|
@ -616,8 +616,8 @@ ZAI_ENDPOINTS = [
|
|||
# (id, base_url, probe_models, label)
|
||||
("global", "https://api.z.ai/api/paas/v4", ["glm-5"], "Global"),
|
||||
("cn", "https://open.bigmodel.cn/api/paas/v4", ["glm-5"], "China"),
|
||||
("coding-global", "https://api.z.ai/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"),
|
||||
("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"),
|
||||
("coding-global", "https://api.z.ai/api/coding/paas/v4", ["glm-5.2", "glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"),
|
||||
("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.2", "glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -257,6 +257,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"gemini-3.5-flash",
|
||||
],
|
||||
"zai": [
|
||||
"glm-5.2",
|
||||
"glm-5.1",
|
||||
"glm-5",
|
||||
"glm-5v-turbo",
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ _DEFAULT_PROVIDER_MODELS = {
|
|||
"gemini-3.1-pro-preview", "gemini-3-pro-preview",
|
||||
"gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
|
||||
],
|
||||
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
|
||||
"zai": ["glm-5.2", "glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
|
||||
"kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
|
||||
"kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
|
||||
"stepfun": ["step-3.5-flash", "step-3.5-flash-2603"],
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ zai = ProviderProfile(
|
|||
description="Z.AI / GLM — Zhipu AI models",
|
||||
signup_url="https://z.ai/",
|
||||
fallback_models=(
|
||||
"glm-5.2",
|
||||
"glm-5",
|
||||
"glm-4-9b",
|
||||
),
|
||||
|
|
|
|||
|
|
@ -220,6 +220,31 @@ class TestDefaultContextLengths:
|
|||
f"{model_id}: expected {expected_ctx}, got {actual}"
|
||||
)
|
||||
|
||||
def test_glm_52_context_1m(self):
|
||||
"""GLM-5.2 must resolve to 1M, not the generic GLM fallback of 202K.
|
||||
|
||||
Context window was verified empirically via needle-in-a-haystack
|
||||
retrieval at 789K prompt tokens on api.z.ai/api/coding/paas/v4
|
||||
(2026-06-13).
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
from unittest.mock import patch as mock_patch
|
||||
|
||||
assert DEFAULT_CONTEXT_LENGTHS["glm-5.2"] == 1_048_576
|
||||
assert DEFAULT_CONTEXT_LENGTHS["glm"] == 202752
|
||||
|
||||
with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
|
||||
# GLM-5.2 (1M) must NOT fall through to the generic 202K entry
|
||||
assert get_model_context_length("glm-5.2") == 1_048_576
|
||||
# Vendor-prefixed forms (zai provider, zhipu alias)
|
||||
assert get_model_context_length("zai/glm-5.2") == 1_048_576
|
||||
assert get_model_context_length("zhipu/glm-5.2") == 1_048_576
|
||||
# Older GLM variants still resolve to the generic 202K fallback
|
||||
assert get_model_context_length("glm-5") == 202752
|
||||
assert get_model_context_length("glm-5.1") == 202752
|
||||
|
||||
def test_openrouter_live_metadata_beats_hardcoded_catchall(self):
|
||||
"""OpenRouter-routed slugs resolve via the live OR catalog before the
|
||||
hardcoded family catch-all.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue