hermes-agent/providers/base.py
teknium1 5360b54244 fix(providers): set User-Agent on ProviderProfile.fetch_models
Some catalog endpoints (OpenCode Zen, etc.) sit behind a WAF that
returns 403 for the default Python-urllib/<ver> User-Agent.  The
generic profile-based live fetch in providers/base.py was silently
failing for any such provider — falling through to the static catalog
and missing newly-launched models.

Set a generic 'hermes-cli/<version>' UA on the catalog probe so every
api_key provider profile benefits.  Verified live against opencode-zen:
before this change, profile.fetch_models() raised HTTP 403; after, it
returns 42 models including gpt-5.5, gpt-5.5-pro, kimi-k2.6, glm-5.1
and the *-free variants the static catalog doesn't list.

Also strip the now-stale comment in validate_requested_model() claiming
opencode-zen's /models returns 404 against the HTML marketing site —
the API endpoint at /zen/v1/models returns 200 with valid JSON.

Surfaced by #2651 (@aashizpoudel) — fixes the same user-facing gap
their PR targeted, applied at the right layer so all api_key provider
profiles get live catalogs through the same code path.

Co-authored-by: Aashish Poudel <mr.aashiz@gmail.com>
2026-05-15 01:42:21 -07:00

184 lines
7.4 KiB
Python

"""Provider profile base class.
A ProviderProfile declares everything about an inference provider in one place:
auth, endpoints, client quirks, request-time quirks. The transport reads this
instead of receiving 20+ boolean flags.
Provider profiles are DECLARATIVE — they describe the provider's behavior.
They do NOT own client construction, credential rotation, or streaming.
Those stay on AIAgent.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import Any
logger = logging.getLogger(__name__)
# Sentinel for "omit temperature entirely" (Kimi: server manages it)
OMIT_TEMPERATURE = object()
def _profile_user_agent() -> str:
"""Return a ``hermes-cli/<version>`` UA string, with a stable fallback.
Used by ``ProviderProfile.fetch_models`` so the catalog probe is not
served the default ``Python-urllib/<ver>`` UA — some providers
(OpenCode Zen, etc.) sit behind a WAF that returns 403 for that.
"""
try:
from hermes_cli import __version__ as _ver # lazy: avoid layer cycle at import time
return f"hermes-cli/{_ver}"
except Exception:
return "hermes-cli"
@dataclass
class ProviderProfile:
"""Base provider profile — subclass or instantiate with overrides."""
# ── Identity ─────────────────────────────────────────────
name: str
api_mode: str = "chat_completions"
aliases: tuple = ()
# ── Human-readable metadata ───────────────────────────────
display_name: str = "" # e.g. "GMI Cloud" — shown in picker/labels
description: str = "" # e.g. "GMI Cloud (multi-model direct API)" — picker subtitle
signup_url: str = "" # e.g. "https://www.gmicloud.ai/" — shown during setup
# ── Auth & endpoints ─────────────────────────────────────
env_vars: tuple = ()
base_url: str = ""
models_url: str = "" # explicit models endpoint; falls back to {base_url}/models
auth_type: str = "api_key" # api_key|oauth_device_code|oauth_external|copilot|aws_sdk
supports_health_check: bool = True # False → doctor skips /models probe for this provider
# ── Model catalog ─────────────────────────────────────────
# fallback_models: curated list shown in /model picker when live fetch fails.
# Only agentic models that support tool calling should appear here.
fallback_models: tuple = ()
# hostname: base hostname for URL→provider reverse-mapping in model_metadata.py
# e.g. "api.gmi-serving.com". Derived from base_url when empty.
hostname: str = ""
# ── Client-level quirks (set once at client construction) ─
default_headers: dict[str, str] = field(default_factory=dict)
# ── Request-level quirks ─────────────────────────────────
# Temperature: None = use caller's default, OMIT_TEMPERATURE = don't send
fixed_temperature: Any = None
default_max_tokens: int | None = None
default_aux_model: str = (
"" # cheap model for auxiliary tasks (compression, vision, etc.)
)
# empty = use main model
# ── Hooks (override in subclass for complex providers) ───
def get_hostname(self) -> str:
"""Return the provider's base hostname for URL-based detection.
Uses self.hostname if set explicitly, otherwise derives it from base_url.
e.g. 'https://api.gmi-serving.com/v1''api.gmi-serving.com'
"""
if self.hostname:
return self.hostname
if self.base_url:
from urllib.parse import urlparse
return urlparse(self.base_url).hostname or ""
return ""
def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Provider-specific message preprocessing.
Called AFTER codex field sanitization, BEFORE developer role swap.
Default: pass-through.
"""
return messages
def build_extra_body(
self, *, session_id: str | None = None, **context: Any
) -> dict[str, Any]:
"""Provider-specific extra_body fields.
Merged into the API kwargs extra_body. Default: empty dict.
"""
return {}
def build_api_kwargs_extras(
self,
*,
reasoning_config: dict | None = None,
**context: Any,
) -> tuple[dict[str, Any], dict[str, Any]]:
"""Provider-specific kwargs split between extra_body and top-level api_kwargs.
Returns (extra_body_additions, top_level_kwargs).
The transport merges extra_body_additions into extra_body, and
top_level_kwargs directly into api_kwargs.
This split exists because some providers put reasoning config in
extra_body (OpenRouter: extra_body.reasoning) while others put it
as top-level api_kwargs (Kimi: api_kwargs.reasoning_effort).
Default: ({}, {}).
"""
return {}, {}
def fetch_models(
self,
*,
api_key: str | None = None,
timeout: float = 8.0,
) -> list[str] | None:
"""Fetch the live model list from the provider's models endpoint.
Returns a list of model ID strings, or None if the fetch failed or
the provider does not support live model listing.
Resolution order for the endpoint URL:
1. self.models_url (explicit override — use when the models
endpoint differs from the inference base URL, e.g. OpenRouter
exposes a public catalog at /api/v1/models while inference is
at /api/v1)
2. self.base_url + "/models" (standard OpenAI-compat fallback)
The default implementation sends Bearer auth when api_key is given
and forwards self.default_headers. Override to customise auth, path,
response shape, or to return None for providers with no REST catalog.
Callers must always fall back to the static _PROVIDER_MODELS list
when this returns None.
"""
url = (self.models_url or "").strip()
if not url:
if not self.base_url:
return None
url = self.base_url.rstrip("/") + "/models"
import json
import urllib.request
req = urllib.request.Request(url)
if api_key:
req.add_header("Authorization", f"Bearer {api_key}")
req.add_header("Accept", "application/json")
# Some providers (e.g. OpenCode Zen) sit behind a WAF that blocks
# the default ``Python-urllib/<ver>`` User-Agent. Set a generic
# hermes-cli UA so the catalog endpoint is reachable.
req.add_header("User-Agent", _profile_user_agent())
for k, v in self.default_headers.items():
req.add_header(k, v)
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
items = data if isinstance(data, list) else data.get("data", [])
return [m["id"] for m in items if isinstance(m, dict) and "id" in m]
except Exception as exc:
logger.debug("fetch_models(%s): %s", self.name, exc)
return None