fix(credential_pool): auto-detect Z.AI endpoint via probe and cache

The credential pool seeder and runtime credential resolver hardcoded
api.z.ai/api/paas/v4 for all Z.AI keys.  Keys on the Coding Plan (or CN
endpoint) would hit the wrong endpoint, causing 401/429 errors on the
first request even though a working endpoint exists.

Add _resolve_zai_base_url() that:
- Respects GLM_BASE_URL env var (no probe when explicitly set)
- Probes all candidate endpoints (global, cn, coding-global, coding-cn)
  via detect_zai_endpoint() to find one that returns HTTP 200
- Caches the detected endpoint in provider state (auth.json) keyed on
  a SHA-256 hash of the API key so subsequent starts skip the probe
- Falls back to the default URL if all probes fail

Wire into both _seed_from_env() in the credential pool and
resolve_api_key_provider_credentials() in the runtime resolver,
matching the pattern from the kimi-coding fix (PR #5566).

Fixes the same class of bug as #5561 but for the zai provider.
This commit is contained in:
Yang Zhi 2026-04-07 00:52:29 +08:00 committed by Teknium
parent f609bf277d
commit 9e844160f9
3 changed files with 97 additions and 1 deletions

View file

@ -27,6 +27,7 @@ from hermes_cli.auth import (
_is_expiring, _is_expiring,
_load_auth_store, _load_auth_store,
_load_provider_state, _load_provider_state,
_resolve_zai_base_url,
read_credential_pool, read_credential_pool,
write_credential_pool, write_credential_pool,
) )
@ -1086,6 +1087,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
active_sources.add(source) active_sources.add(source)
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
base_url = env_url or pconfig.inference_base_url base_url = env_url or pconfig.inference_base_url
if provider == "zai":
base_url = _resolve_zai_base_url(token, pconfig.inference_base_url, env_url)
changed |= _upsert_entry( changed |= _upsert_entry(
entries, entries,
provider, provider,

View file

@ -404,6 +404,47 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str
return None return None
def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) -> str:
"""Return the correct Z.AI base URL by probing endpoints.
If the user has explicitly set GLM_BASE_URL, that always wins.
Otherwise, probe the candidate endpoints to find one that accepts the
key. The detected endpoint is cached in provider state (auth.json) keyed
on a hash of the API key so subsequent starts skip the probe.
"""
if env_override:
return env_override
# Check provider-state cache for a previously-detected endpoint.
auth_store = _load_auth_store()
state = _load_provider_state(auth_store, "zai") or {}
cached = state.get("detected_endpoint")
if isinstance(cached, dict) and cached.get("base_url"):
key_hash = cached.get("key_hash", "")
if key_hash == hashlib.sha256(api_key.encode()).hexdigest()[:16]:
logger.debug("Z.AI: using cached endpoint %s", cached["base_url"])
return cached["base_url"]
# Probe — may take up to ~8s per endpoint.
detected = detect_zai_endpoint(api_key)
if detected and detected.get("base_url"):
# Persist the detection result keyed on the API key hash.
key_hash = hashlib.sha256(api_key.encode()).hexdigest()[:16]
state["detected_endpoint"] = {
"base_url": detected["base_url"],
"endpoint_id": detected.get("id", ""),
"model": detected.get("model", ""),
"label": detected.get("label", ""),
"key_hash": key_hash,
}
_save_provider_state(auth_store, "zai", state)
logger.info("Z.AI: auto-detected endpoint %s (%s)", detected["label"], detected["base_url"])
return detected["base_url"]
logger.debug("Z.AI: probe failed, falling back to default %s", default_url)
return default_url
# ============================================================================= # =============================================================================
# Error Types # Error Types
# ============================================================================= # =============================================================================
@ -2063,6 +2104,8 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
if provider_id == "kimi-coding": if provider_id == "kimi-coding":
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url) base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
elif provider_id == "zai":
base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
elif env_url: elif env_url:
base_url = env_url.rstrip("/") base_url = env_url.rstrip("/")
else: else:

View file

@ -350,6 +350,7 @@ class TestResolveApiKeyProviderCredentials:
def test_resolve_zai_with_key(self, monkeypatch): def test_resolve_zai_with_key(self, monkeypatch):
monkeypatch.setenv("GLM_API_KEY", "glm-secret-key") monkeypatch.setenv("GLM_API_KEY", "glm-secret-key")
monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
creds = resolve_api_key_provider_credentials("zai") creds = resolve_api_key_provider_credentials("zai")
assert creds["provider"] == "zai" assert creds["provider"] == "zai"
assert creds["api_key"] == "glm-secret-key" assert creds["api_key"] == "glm-secret-key"
@ -471,6 +472,7 @@ class TestResolveApiKeyProviderCredentials:
"""GLM_API_KEY takes priority over ZAI_API_KEY.""" """GLM_API_KEY takes priority over ZAI_API_KEY."""
monkeypatch.setenv("GLM_API_KEY", "primary") monkeypatch.setenv("GLM_API_KEY", "primary")
monkeypatch.setenv("ZAI_API_KEY", "secondary") monkeypatch.setenv("ZAI_API_KEY", "secondary")
monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
creds = resolve_api_key_provider_credentials("zai") creds = resolve_api_key_provider_credentials("zai")
assert creds["api_key"] == "primary" assert creds["api_key"] == "primary"
assert creds["source"] == "GLM_API_KEY" assert creds["source"] == "GLM_API_KEY"
@ -478,6 +480,7 @@ class TestResolveApiKeyProviderCredentials:
def test_zai_key_fallback(self, monkeypatch): def test_zai_key_fallback(self, monkeypatch):
"""ZAI_API_KEY used when GLM_API_KEY not set.""" """ZAI_API_KEY used when GLM_API_KEY not set."""
monkeypatch.setenv("ZAI_API_KEY", "secondary") monkeypatch.setenv("ZAI_API_KEY", "secondary")
monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
creds = resolve_api_key_provider_credentials("zai") creds = resolve_api_key_provider_credentials("zai")
assert creds["api_key"] == "secondary" assert creds["api_key"] == "secondary"
assert creds["source"] == "ZAI_API_KEY" assert creds["source"] == "ZAI_API_KEY"
@ -830,11 +833,58 @@ class TestKimiCodeCredentialAutoDetect:
def test_non_kimi_providers_unaffected(self, monkeypatch): def test_non_kimi_providers_unaffected(self, monkeypatch):
"""Ensure the auto-detect logic doesn't leak to other providers.""" """Ensure the auto-detect logic doesn't leak to other providers."""
monkeypatch.setenv("GLM_API_KEY", "sk-kimi-looks-like-kimi-but-isnt") monkeypatch.setenv("GLM_API_KEY", "sk-kim...isnt")
monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
creds = resolve_api_key_provider_credentials("zai") creds = resolve_api_key_provider_credentials("zai")
assert creds["base_url"] == "https://api.z.ai/api/paas/v4" assert creds["base_url"] == "https://api.z.ai/api/paas/v4"
class TestZaiEndpointAutoDetect:
"""Test that resolve_api_key_provider_credentials auto-detects Z.AI endpoints."""
def test_probe_success_returns_detected_url(self, monkeypatch):
monkeypatch.setenv("GLM_API_KEY", "glm-coding-key")
monkeypatch.setattr(
"hermes_cli.auth.detect_zai_endpoint",
lambda *a, **kw: {
"id": "coding-global",
"base_url": "https://api.z.ai/api/coding/paas/v4",
"model": "glm-4.7",
"label": "Global (Coding Plan)",
},
)
creds = resolve_api_key_provider_credentials("zai")
assert creds["base_url"] == "https://api.z.ai/api/coding/paas/v4"
def test_probe_failure_falls_back_to_default(self, monkeypatch):
monkeypatch.setenv("GLM_API_KEY", "glm-key")
monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
creds = resolve_api_key_provider_credentials("zai")
assert creds["base_url"] == "https://api.z.ai/api/paas/v4"
def test_env_override_skips_probe(self, monkeypatch):
"""GLM_BASE_URL should always win without probing."""
monkeypatch.setenv("GLM_API_KEY", "glm-key")
monkeypatch.setenv("GLM_BASE_URL", "https://custom.example/v4")
probe_called = False
def _never_called(*a, **kw):
nonlocal probe_called
probe_called = True
return None
monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", _never_called)
creds = resolve_api_key_provider_credentials("zai")
assert creds["base_url"] == "https://custom.example/v4"
assert not probe_called
def test_no_key_skips_probe(self, monkeypatch):
"""Without an API key, no probe should occur."""
monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
creds = resolve_api_key_provider_credentials("zai")
assert creds["api_key"] == ""
# ============================================================================= # =============================================================================
# Kimi / Moonshot model list isolation tests # Kimi / Moonshot model list isolation tests
# ============================================================================= # =============================================================================