diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 49dea65f9b..4d23315487 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -112,6 +112,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { # "exotic provider" branch checks this before falling back to the main model. _PROVIDER_VISION_MODELS: Dict[str, str] = { "xiaomi": "mimo-v2-omni", + "zai": "glm-5v-turbo", } # OpenRouter app attribution headers diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index e63a1ebb6b..636416a974 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -383,13 +383,16 @@ def _resolve_api_key_provider_secret( # Z.AI has separate billing for general vs coding plans, and global vs China # endpoints. A key that works on one may return "Insufficient balance" on # another. We probe at setup time and store the working endpoint. +# Each entry lists candidate models to try in order — newer coding plan accounts +# may only have access to recent models (glm-5.1, glm-5v-turbo) while older +# ones still use glm-4.7. ZAI_ENDPOINTS = [ - # (id, base_url, default_model, label) - ("global", "https://api.z.ai/api/paas/v4", "glm-5", "Global"), - ("cn", "https://open.bigmodel.cn/api/paas/v4", "glm-5", "China"), - ("coding-global", "https://api.z.ai/api/coding/paas/v4", "glm-4.7", "Global (Coding Plan)"), - ("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", "glm-4.7", "China (Coding Plan)"), + # (id, base_url, probe_models, label) + ("global", "https://api.z.ai/api/paas/v4", ["glm-5"], "Global"), + ("cn", "https://open.bigmodel.cn/api/paas/v4", ["glm-5"], "China"), + ("coding-global", "https://api.z.ai/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"), + ("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"), ] @@ -397,35 +400,37 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str """Probe z.ai endpoints to find one that accepts this API key. Returns {"id": ..., "base_url": ..., "model": ..., "label": ...} for the - first working endpoint, or None if all fail. + first working endpoint, or None if all fail. For endpoints with multiple + candidate models, tries each in order and returns the first that succeeds. """ - for ep_id, base_url, model, label in ZAI_ENDPOINTS: - try: - resp = httpx.post( - f"{base_url}/chat/completions", - headers={ - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json", - }, - json={ - "model": model, - "stream": False, - "max_tokens": 1, - "messages": [{"role": "user", "content": "ping"}], - }, - timeout=timeout, - ) - if resp.status_code == 200: - logger.debug("Z.AI endpoint probe: %s (%s) OK", ep_id, base_url) - return { - "id": ep_id, - "base_url": base_url, - "model": model, - "label": label, - } - logger.debug("Z.AI endpoint probe: %s returned %s", ep_id, resp.status_code) - except Exception as exc: - logger.debug("Z.AI endpoint probe: %s failed: %s", ep_id, exc) + for ep_id, base_url, probe_models, label in ZAI_ENDPOINTS: + for model in probe_models: + try: + resp = httpx.post( + f"{base_url}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": model, + "stream": False, + "max_tokens": 1, + "messages": [{"role": "user", "content": "ping"}], + }, + timeout=timeout, + ) + if resp.status_code == 200: + logger.debug("Z.AI endpoint probe: %s (%s) model=%s OK", ep_id, base_url, model) + return { + "id": ep_id, + "base_url": base_url, + "model": model, + "label": label, + } + logger.debug("Z.AI endpoint probe: %s model=%s returned %s", ep_id, model, resp.status_code) + except Exception as exc: + logger.debug("Z.AI endpoint probe: %s model=%s failed: %s", ep_id, model, exc) return None diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 852601229e..18f29c6cd3 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -44,6 +44,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("minimax/minimax-m2.7", ""), ("minimax/minimax-m2.5", ""), ("z-ai/glm-5.1", ""), + ("z-ai/glm-5v-turbo", ""), ("z-ai/glm-5-turbo", ""), ("moonshotai/kimi-k2.5", ""), ("x-ai/grok-4.20", ""), @@ -89,6 +90,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "minimax/minimax-m2.7", "minimax/minimax-m2.5", "z-ai/glm-5.1", + "z-ai/glm-5v-turbo", "z-ai/glm-5-turbo", "moonshotai/kimi-k2.5", "x-ai/grok-4.20-beta", @@ -134,6 +136,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "zai": [ "glm-5.1", "glm-5", + "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5",