diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 440fc2b6f..04a0736e4 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -138,6 +138,59 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
 }
 
 
+# =============================================================================
+# Z.AI Endpoint Detection
+# =============================================================================
+
+# Z.AI has separate billing for general vs coding plans, and global vs China
+# endpoints.  A key that works on one may return "Insufficient balance" on
+# another.  We probe at setup time and store the working endpoint.
+
+ZAI_ENDPOINTS = [
+    # (id, base_url, default_model, label)
+    ("global",        "https://api.z.ai/api/paas/v4",        "glm-5",   "Global"),
+    ("cn",            "https://open.bigmodel.cn/api/paas/v4", "glm-5",   "China"),
+    ("coding-global", "https://api.z.ai/api/coding/paas/v4",  "glm-4.7", "Global (Coding Plan)"),
+    ("coding-cn",     "https://open.bigmodel.cn/api/coding/paas/v4", "glm-4.7", "China (Coding Plan)"),
+]
+
+
+def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str, str]]:
+    """Probe z.ai endpoints to find one that accepts this API key.
+
+    Returns {"id": ..., "base_url": ..., "model": ..., "label": ...} for the
+    first working endpoint, or None if all fail.
+    """
+    for ep_id, base_url, model, label in ZAI_ENDPOINTS:
+        try:
+            resp = httpx.post(
+                f"{base_url}/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": model,
+                    "stream": False,
+                    "max_tokens": 1,
+                    "messages": [{"role": "user", "content": "ping"}],
+                },
+                timeout=timeout,
+            )
+            if resp.status_code == 200:
+                logger.debug("Z.AI endpoint probe: %s (%s) OK", ep_id, base_url)
+                return {
+                    "id": ep_id,
+                    "base_url": base_url,
+                    "model": model,
+                    "label": label,
+                }
+            logger.debug("Z.AI endpoint probe: %s returned %s", ep_id, resp.status_code)
+        except Exception as exc:
+            logger.debug("Z.AI endpoint probe: %s failed: %s", ep_id, exc)
+    return None
+
+
 # =============================================================================
 # Error Types
 # =============================================================================
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 8bbc70001..1d07351d5 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -667,16 +667,17 @@ def setup_model_provider(config: dict):
         print_header("Z.AI / GLM API Key")
         pconfig = PROVIDER_REGISTRY["zai"]
         print_info(f"Provider: {pconfig.name}")
-        print_info(f"Base URL: {pconfig.inference_base_url}")
         print_info("Get your API key at: https://open.bigmodel.cn/")
         print()
 
         existing_key = get_env_value("GLM_API_KEY") or get_env_value("ZAI_API_KEY")
+        api_key = existing_key  # will be overwritten if user enters a new one
         if existing_key:
             print_info(f"Current: {existing_key[:8]}... (configured)")
             if prompt_yes_no("Update API key?", False):
-                api_key = prompt("  GLM API key", password=True)
-                if api_key:
+                new_key = prompt("  GLM API key", password=True)
+                if new_key:
+                    api_key = new_key
                     save_env_value("GLM_API_KEY", api_key)
                     print_success("GLM API key updated")
         else:
@@ -687,11 +688,32 @@ def setup_model_provider(config: dict):
             else:
                 print_warning("Skipped - agent won't work without an API key")
 
+        # Detect the correct z.ai endpoint for this key.
+        # Z.AI has separate billing for general vs coding plans and
+        # global vs China endpoints — we probe to find the right one.
+        zai_base_url = pconfig.inference_base_url
+        if api_key:
+            print()
+            print_info("Detecting your z.ai endpoint...")
+            from hermes_cli.auth import detect_zai_endpoint
+            detected = detect_zai_endpoint(api_key)
+            if detected:
+                zai_base_url = detected["base_url"]
+                print_success(f"Detected: {detected['label']} endpoint")
+                print_info(f"  URL: {detected['base_url']}")
+                if detected["id"].startswith("coding"):
+                    print_info(f"  Note: Coding Plan detected — GLM-5 is not available, using {detected['model']}")
+                save_env_value("GLM_BASE_URL", zai_base_url)
+            else:
+                print_warning("Could not verify any z.ai endpoint with this key.")
+                print_info(f"  Using default: {zai_base_url}")
+                print_info("  If you get billing errors, check your plan at https://open.bigmodel.cn/")
+
         # Clear custom endpoint vars if switching
         if existing_custom:
             save_env_value("OPENAI_BASE_URL", "")
             save_env_value("OPENAI_API_KEY", "")
-        _update_config_for_provider("zai", pconfig.inference_base_url)
+        _update_config_for_provider("zai", zai_base_url)
 
     elif provider_idx == 5:  # Kimi / Moonshot
         selected_provider = "kimi-coding"
@@ -859,7 +881,12 @@ def setup_model_provider(config: dict):
                     save_env_value("LLM_MODEL", custom)
             _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
         elif selected_provider == "zai":
-            zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]
+            # Coding Plan endpoints don't have GLM-5
+            is_coding_plan = get_env_value("GLM_BASE_URL") and "coding" in (get_env_value("GLM_BASE_URL") or "")
+            if is_coding_plan:
+                zai_models = ["glm-4.7", "glm-4.5", "glm-4.5-flash"]
+            else:
+                zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]
             model_choices = list(zai_models)
             model_choices.append("Custom model")
             model_choices.append(f"Keep current ({current_model})")