mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 09:21:36 +00:00
GLM-5.2 ships with a 1M (1,048,576) token context window. Without this entry, Hermes falls through to the generic 'glm' key (202,752 tokens), under-reporting the context bar and prematurely compressing conversations. The 1M limit was verified empirically via needle-in-a-haystack retrieval at 789,240 prompt tokens on api.z.ai/api/coding/paas/v4 — zero errors, zero truncation, correct retrieval at every tested size (25K through 789K). Changes: - agent/model_metadata.py: add 'glm-5.2': 1_048_576 before 'glm' fallback - hermes_cli/models.py: add glm-5.2 to zai curated models - hermes_cli/setup.py: add glm-5.2 to setup wizard zai list - hermes_cli/auth.py: add glm-5.2 to coding plan endpoint probes - plugins/model-providers/zai/__init__.py: add glm-5.2 to fallback_models - tests/agent/test_model_metadata.py: context resolution + vendor-prefix tests
22 lines
573 B
Python
22 lines
573 B
Python
"""ZAI / GLM provider profile."""
|
|
|
|
from providers import register_provider
|
|
from providers.base import ProviderProfile
|
|
|
|
zai = ProviderProfile(
|
|
name="zai",
|
|
aliases=("glm", "z-ai", "z.ai", "zhipu"),
|
|
env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
|
|
display_name="Z.AI (GLM)",
|
|
description="Z.AI / GLM — Zhipu AI models",
|
|
signup_url="https://z.ai/",
|
|
fallback_models=(
|
|
"glm-5.2",
|
|
"glm-5",
|
|
"glm-4-9b",
|
|
),
|
|
base_url="https://api.z.ai/api/paas/v4",
|
|
default_aux_model="glm-4.5-flash",
|
|
)
|
|
|
|
register_provider(zai)
|