From 9e844160f9b6e6485abe4294cb1962d7ef7813e3 Mon Sep 17 00:00:00 2001
From: Yang Zhi <yangzhi.see@gmail.com>
Date: Tue, 7 Apr 2026 00:52:29 +0800
Subject: [PATCH] fix(credential_pool): auto-detect Z.AI endpoint via probe and
 cache

The credential pool seeder and runtime credential resolver hardcoded
api.z.ai/api/paas/v4 for all Z.AI keys.  Keys on the Coding Plan (or CN
endpoint) would hit the wrong endpoint, causing 401/429 errors on the
first request even though a working endpoint exists.

Add _resolve_zai_base_url() that:
- Respects GLM_BASE_URL env var (no probe when explicitly set)
- Probes all candidate endpoints (global, cn, coding-global, coding-cn)
  via detect_zai_endpoint() to find one that returns HTTP 200
- Caches the detected endpoint in provider state (auth.json) keyed on
  a SHA-256 hash of the API key so subsequent starts skip the probe
- Falls back to the default URL if all probes fail

Wire into both _seed_from_env() in the credential pool and
resolve_api_key_provider_credentials() in the runtime resolver,
matching the pattern from the kimi-coding fix (PR #5566).

Fixes the same class of bug as #5561 but for the zai provider.
---
 agent/credential_pool.py        |  3 ++
 hermes_cli/auth.py              | 43 +++++++++++++++++++++++++++
 tests/test_api_key_providers.py | 52 ++++++++++++++++++++++++++++++++-
 3 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index f57ae049c..144a91015 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -27,6 +27,7 @@ from hermes_cli.auth import (
     _is_expiring,
     _load_auth_store,
     _load_provider_state,
+    _resolve_zai_base_url,
     read_credential_pool,
     write_credential_pool,
 )
@@ -1086,6 +1087,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
         active_sources.add(source)
         auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
         base_url = env_url or pconfig.inference_base_url
+        if provider == "zai":
+            base_url = _resolve_zai_base_url(token, pconfig.inference_base_url, env_url)
         changed |= _upsert_entry(
             entries,
             provider,
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 588d06d40..23119c661 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -404,6 +404,47 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str
     return None
 
 
+def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) -> str:
+    """Return the correct Z.AI base URL by probing endpoints.
+
+    If the user has explicitly set GLM_BASE_URL, that always wins.
+    Otherwise, probe the candidate endpoints to find one that accepts the
+    key.  The detected endpoint is cached in provider state (auth.json) keyed
+    on a hash of the API key so subsequent starts skip the probe.
+    """
+    if env_override:
+        return env_override
+
+    # Check provider-state cache for a previously-detected endpoint.
+    auth_store = _load_auth_store()
+    state = _load_provider_state(auth_store, "zai") or {}
+    cached = state.get("detected_endpoint")
+    if isinstance(cached, dict) and cached.get("base_url"):
+        key_hash = cached.get("key_hash", "")
+        if key_hash == hashlib.sha256(api_key.encode()).hexdigest()[:16]:
+            logger.debug("Z.AI: using cached endpoint %s", cached["base_url"])
+            return cached["base_url"]
+
+    # Probe — may take up to ~8s per endpoint.
+    detected = detect_zai_endpoint(api_key)
+    if detected and detected.get("base_url"):
+        # Persist the detection result keyed on the API key hash.
+        key_hash = hashlib.sha256(api_key.encode()).hexdigest()[:16]
+        state["detected_endpoint"] = {
+            "base_url": detected["base_url"],
+            "endpoint_id": detected.get("id", ""),
+            "model": detected.get("model", ""),
+            "label": detected.get("label", ""),
+            "key_hash": key_hash,
+        }
+        _save_provider_state(auth_store, "zai", state)
+        logger.info("Z.AI: auto-detected endpoint %s (%s)", detected["label"], detected["base_url"])
+        return detected["base_url"]
+
+    logger.debug("Z.AI: probe failed, falling back to default %s", default_url)
+    return default_url
+
+
 # =============================================================================
 # Error Types
 # =============================================================================
@@ -2063,6 +2104,8 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
 
     if provider_id == "kimi-coding":
         base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
+    elif provider_id == "zai":
+        base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
     elif env_url:
         base_url = env_url.rstrip("/")
     else:
diff --git a/tests/test_api_key_providers.py b/tests/test_api_key_providers.py
index ddf1d9722..ee86507a1 100644
--- a/tests/test_api_key_providers.py
+++ b/tests/test_api_key_providers.py
@@ -350,6 +350,7 @@ class TestResolveApiKeyProviderCredentials:
 
     def test_resolve_zai_with_key(self, monkeypatch):
         monkeypatch.setenv("GLM_API_KEY", "glm-secret-key")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
         creds = resolve_api_key_provider_credentials("zai")
         assert creds["provider"] == "zai"
         assert creds["api_key"] == "glm-secret-key"
@@ -471,6 +472,7 @@ class TestResolveApiKeyProviderCredentials:
         """GLM_API_KEY takes priority over ZAI_API_KEY."""
         monkeypatch.setenv("GLM_API_KEY", "primary")
         monkeypatch.setenv("ZAI_API_KEY", "secondary")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
         creds = resolve_api_key_provider_credentials("zai")
         assert creds["api_key"] == "primary"
         assert creds["source"] == "GLM_API_KEY"
@@ -478,6 +480,7 @@ class TestResolveApiKeyProviderCredentials:
     def test_zai_key_fallback(self, monkeypatch):
         """ZAI_API_KEY used when GLM_API_KEY not set."""
         monkeypatch.setenv("ZAI_API_KEY", "secondary")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
         creds = resolve_api_key_provider_credentials("zai")
         assert creds["api_key"] == "secondary"
         assert creds["source"] == "ZAI_API_KEY"
@@ -830,11 +833,58 @@ class TestKimiCodeCredentialAutoDetect:
 
     def test_non_kimi_providers_unaffected(self, monkeypatch):
         """Ensure the auto-detect logic doesn't leak to other providers."""
-        monkeypatch.setenv("GLM_API_KEY", "sk-kimi-looks-like-kimi-but-isnt")
+        monkeypatch.setenv("GLM_API_KEY", "sk-kim...isnt")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
         creds = resolve_api_key_provider_credentials("zai")
         assert creds["base_url"] == "https://api.z.ai/api/paas/v4"
 
 
+class TestZaiEndpointAutoDetect:
+    """Test that resolve_api_key_provider_credentials auto-detects Z.AI endpoints."""
+
+    def test_probe_success_returns_detected_url(self, monkeypatch):
+        monkeypatch.setenv("GLM_API_KEY", "glm-coding-key")
+        monkeypatch.setattr(
+            "hermes_cli.auth.detect_zai_endpoint",
+            lambda *a, **kw: {
+                "id": "coding-global",
+                "base_url": "https://api.z.ai/api/coding/paas/v4",
+                "model": "glm-4.7",
+                "label": "Global (Coding Plan)",
+            },
+        )
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["base_url"] == "https://api.z.ai/api/coding/paas/v4"
+
+    def test_probe_failure_falls_back_to_default(self, monkeypatch):
+        monkeypatch.setenv("GLM_API_KEY", "glm-key")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["base_url"] == "https://api.z.ai/api/paas/v4"
+
+    def test_env_override_skips_probe(self, monkeypatch):
+        """GLM_BASE_URL should always win without probing."""
+        monkeypatch.setenv("GLM_API_KEY", "glm-key")
+        monkeypatch.setenv("GLM_BASE_URL", "https://custom.example/v4")
+        probe_called = False
+
+        def _never_called(*a, **kw):
+            nonlocal probe_called
+            probe_called = True
+            return None
+
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", _never_called)
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["base_url"] == "https://custom.example/v4"
+        assert not probe_called
+
+    def test_no_key_skips_probe(self, monkeypatch):
+        """Without an API key, no probe should occur."""
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["api_key"] == ""
+
+
 # =============================================================================
 # Kimi / Moonshot model list isolation tests
 # =============================================================================