From 48e0dc87916e7da89bb81a4ef926cad005a24e86 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 7 Mar 2026 09:43:37 -0800
Subject: [PATCH] feat: implement Z.AI endpoint detection for API key
 validation

Added functionality to detect the appropriate Z.AI endpoint based on the provided API key, accommodating different billing plans and regions. The setup process now probes available endpoints and updates the configuration accordingly, enhancing user experience and reducing potential billing errors. Updated the setup model provider function to integrate this new detection logic.
---
 hermes_cli/auth.py  | 53 +++++++++++++++++++++++++++++++++++++++++++++
 hermes_cli/setup.py | 37 ++++++++++++++++++++++++++-----
 2 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 440fc2b6f..04a0736e4 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -138,6 +138,59 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
 }
 
 
+# =============================================================================
+# Z.AI Endpoint Detection
+# =============================================================================
+
+# Z.AI has separate billing for general vs coding plans, and global vs China
+# endpoints.  A key that works on one may return "Insufficient balance" on
+# another.  We probe at setup time and store the working endpoint.
+
+ZAI_ENDPOINTS = [
+    # (id, base_url, default_model, label)
+    ("global",        "https://api.z.ai/api/paas/v4",        "glm-5",   "Global"),
+    ("cn",            "https://open.bigmodel.cn/api/paas/v4", "glm-5",   "China"),
+    ("coding-global", "https://api.z.ai/api/coding/paas/v4",  "glm-4.7", "Global (Coding Plan)"),
+    ("coding-cn",     "https://open.bigmodel.cn/api/coding/paas/v4", "glm-4.7", "China (Coding Plan)"),
+]
+
+
+def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str, str]]:
+    """Probe z.ai endpoints to find one that accepts this API key.
+
+    Returns {"id": ..., "base_url": ..., "model": ..., "label": ...} for the
+    first working endpoint, or None if all fail.
+    """
+    for ep_id, base_url, model, label in ZAI_ENDPOINTS:
+        try:
+            resp = httpx.post(
+                f"{base_url}/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": model,
+                    "stream": False,
+                    "max_tokens": 1,
+                    "messages": [{"role": "user", "content": "ping"}],
+                },
+                timeout=timeout,
+            )
+            if resp.status_code == 200:
+                logger.debug("Z.AI endpoint probe: %s (%s) OK", ep_id, base_url)
+                return {
+                    "id": ep_id,
+                    "base_url": base_url,
+                    "model": model,
+                    "label": label,
+                }
+            logger.debug("Z.AI endpoint probe: %s returned %s", ep_id, resp.status_code)
+        except Exception as exc:
+            logger.debug("Z.AI endpoint probe: %s failed: %s", ep_id, exc)
+    return None
+
+
 # =============================================================================
 # Error Types
 # =============================================================================
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 8bbc70001..1d07351d5 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -667,16 +667,17 @@ def setup_model_provider(config: dict):
         print_header("Z.AI / GLM API Key")
         pconfig = PROVIDER_REGISTRY["zai"]
         print_info(f"Provider: {pconfig.name}")
-        print_info(f"Base URL: {pconfig.inference_base_url}")
         print_info("Get your API key at: https://open.bigmodel.cn/")
         print()
 
         existing_key = get_env_value("GLM_API_KEY") or get_env_value("ZAI_API_KEY")
+        api_key = existing_key  # will be overwritten if user enters a new one
         if existing_key:
             print_info(f"Current: {existing_key[:8]}... (configured)")
             if prompt_yes_no("Update API key?", False):
-                api_key = prompt("  GLM API key", password=True)
-                if api_key:
+                new_key = prompt("  GLM API key", password=True)
+                if new_key:
+                    api_key = new_key
                     save_env_value("GLM_API_KEY", api_key)
                     print_success("GLM API key updated")
         else:
@@ -687,11 +688,32 @@ def setup_model_provider(config: dict):
             else:
                 print_warning("Skipped - agent won't work without an API key")
 
+        # Detect the correct z.ai endpoint for this key.
+        # Z.AI has separate billing for general vs coding plans and
+        # global vs China endpoints — we probe to find the right one.
+        zai_base_url = pconfig.inference_base_url
+        if api_key:
+            print()
+            print_info("Detecting your z.ai endpoint...")
+            from hermes_cli.auth import detect_zai_endpoint
+            detected = detect_zai_endpoint(api_key)
+            if detected:
+                zai_base_url = detected["base_url"]
+                print_success(f"Detected: {detected['label']} endpoint")
+                print_info(f"  URL: {detected['base_url']}")
+                if detected["id"].startswith("coding"):
+                    print_info(f"  Note: Coding Plan detected — GLM-5 is not available, using {detected['model']}")
+                save_env_value("GLM_BASE_URL", zai_base_url)
+            else:
+                print_warning("Could not verify any z.ai endpoint with this key.")
+                print_info(f"  Using default: {zai_base_url}")
+                print_info("  If you get billing errors, check your plan at https://open.bigmodel.cn/")
+
         # Clear custom endpoint vars if switching
         if existing_custom:
             save_env_value("OPENAI_BASE_URL", "")
             save_env_value("OPENAI_API_KEY", "")
-        _update_config_for_provider("zai", pconfig.inference_base_url)
+        _update_config_for_provider("zai", zai_base_url)
 
     elif provider_idx == 5:  # Kimi / Moonshot
         selected_provider = "kimi-coding"
@@ -859,7 +881,12 @@ def setup_model_provider(config: dict):
                     save_env_value("LLM_MODEL", custom)
             _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
         elif selected_provider == "zai":
-            zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]
+            # Coding Plan endpoints don't have GLM-5
+            is_coding_plan = get_env_value("GLM_BASE_URL") and "coding" in (get_env_value("GLM_BASE_URL") or "")
+            if is_coding_plan:
+                zai_models = ["glm-4.7", "glm-4.5", "glm-4.5-flash"]
+            else:
+                zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]
             model_choices = list(zai_models)
             model_choices.append("Custom model")
             model_choices.append(f"Keep current ({current_model})")