mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(zai): add GLM-5V-Turbo support for coding plan (#9907)
- Add glm-5v-turbo to OpenRouter, Nous, and native Z.AI model lists - Add glm-5v context length entry (200K tokens) to model metadata - Update Z.AI endpoint probe to try multiple candidate models per endpoint (glm-5.1, glm-5v-turbo, glm-4.7) — fixes detection for newer coding plan accounts that lack older models - Add zai to _PROVIDER_VISION_MODELS so auxiliary vision tasks (vision_analyze, browser screenshots) route through 5v Fixes #9888
This commit is contained in:
parent
1e5e1e822b
commit
6448e1da23
3 changed files with 42 additions and 33 deletions
|
|
@ -112,6 +112,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
|||
# "exotic provider" branch checks this before falling back to the main model.
|
||||
_PROVIDER_VISION_MODELS: Dict[str, str] = {
|
||||
"xiaomi": "mimo-v2-omni",
|
||||
"zai": "glm-5v-turbo",
|
||||
}
|
||||
|
||||
# OpenRouter app attribution headers
|
||||
|
|
|
|||
|
|
@ -383,13 +383,16 @@ def _resolve_api_key_provider_secret(
|
|||
# Z.AI has separate billing for general vs coding plans, and global vs China
|
||||
# endpoints. A key that works on one may return "Insufficient balance" on
|
||||
# another. We probe at setup time and store the working endpoint.
|
||||
# Each entry lists candidate models to try in order — newer coding plan accounts
|
||||
# may only have access to recent models (glm-5.1, glm-5v-turbo) while older
|
||||
# ones still use glm-4.7.
|
||||
|
||||
ZAI_ENDPOINTS = [
|
||||
# (id, base_url, default_model, label)
|
||||
("global", "https://api.z.ai/api/paas/v4", "glm-5", "Global"),
|
||||
("cn", "https://open.bigmodel.cn/api/paas/v4", "glm-5", "China"),
|
||||
("coding-global", "https://api.z.ai/api/coding/paas/v4", "glm-4.7", "Global (Coding Plan)"),
|
||||
("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", "glm-4.7", "China (Coding Plan)"),
|
||||
# (id, base_url, probe_models, label)
|
||||
("global", "https://api.z.ai/api/paas/v4", ["glm-5"], "Global"),
|
||||
("cn", "https://open.bigmodel.cn/api/paas/v4", ["glm-5"], "China"),
|
||||
("coding-global", "https://api.z.ai/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"),
|
||||
("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"),
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -397,35 +400,37 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str
|
|||
"""Probe z.ai endpoints to find one that accepts this API key.
|
||||
|
||||
Returns {"id": ..., "base_url": ..., "model": ..., "label": ...} for the
|
||||
first working endpoint, or None if all fail.
|
||||
first working endpoint, or None if all fail. For endpoints with multiple
|
||||
candidate models, tries each in order and returns the first that succeeds.
|
||||
"""
|
||||
for ep_id, base_url, model, label in ZAI_ENDPOINTS:
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{base_url}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": model,
|
||||
"stream": False,
|
||||
"max_tokens": 1,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
},
|
||||
timeout=timeout,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
logger.debug("Z.AI endpoint probe: %s (%s) OK", ep_id, base_url)
|
||||
return {
|
||||
"id": ep_id,
|
||||
"base_url": base_url,
|
||||
"model": model,
|
||||
"label": label,
|
||||
}
|
||||
logger.debug("Z.AI endpoint probe: %s returned %s", ep_id, resp.status_code)
|
||||
except Exception as exc:
|
||||
logger.debug("Z.AI endpoint probe: %s failed: %s", ep_id, exc)
|
||||
for ep_id, base_url, probe_models, label in ZAI_ENDPOINTS:
|
||||
for model in probe_models:
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{base_url}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": model,
|
||||
"stream": False,
|
||||
"max_tokens": 1,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
},
|
||||
timeout=timeout,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
logger.debug("Z.AI endpoint probe: %s (%s) model=%s OK", ep_id, base_url, model)
|
||||
return {
|
||||
"id": ep_id,
|
||||
"base_url": base_url,
|
||||
"model": model,
|
||||
"label": label,
|
||||
}
|
||||
logger.debug("Z.AI endpoint probe: %s model=%s returned %s", ep_id, model, resp.status_code)
|
||||
except Exception as exc:
|
||||
logger.debug("Z.AI endpoint probe: %s model=%s failed: %s", ep_id, model, exc)
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
|||
("minimax/minimax-m2.7", ""),
|
||||
("minimax/minimax-m2.5", ""),
|
||||
("z-ai/glm-5.1", ""),
|
||||
("z-ai/glm-5v-turbo", ""),
|
||||
("z-ai/glm-5-turbo", ""),
|
||||
("moonshotai/kimi-k2.5", ""),
|
||||
("x-ai/grok-4.20", ""),
|
||||
|
|
@ -89,6 +90,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"minimax/minimax-m2.7",
|
||||
"minimax/minimax-m2.5",
|
||||
"z-ai/glm-5.1",
|
||||
"z-ai/glm-5v-turbo",
|
||||
"z-ai/glm-5-turbo",
|
||||
"moonshotai/kimi-k2.5",
|
||||
"x-ai/grok-4.20-beta",
|
||||
|
|
@ -134,6 +136,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"zai": [
|
||||
"glm-5.1",
|
||||
"glm-5",
|
||||
"glm-5v-turbo",
|
||||
"glm-5-turbo",
|
||||
"glm-4.7",
|
||||
"glm-4.5",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue