mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
feat: provider modules — ProviderProfile ABC, 33 providers, fetch_models, transport single-path
Introduces providers/ package — single source of truth for every inference provider. Adding a simple api-key provider now requires one providers/<name>.py file with zero edits anywhere else. What this PR ships: - providers/ package (ProviderProfile ABC + 33 profiles across 4 api_modes) - ProviderProfile declarative fields: name, api_mode, aliases, display_name, env_vars, base_url, models_url, auth_type, fallback_models, hostname, default_headers, fixed_temperature, default_max_tokens, default_aux_model - 4 overridable hooks: prepare_messages, build_extra_body, build_api_kwargs_extras, fetch_models - chat_completions.build_kwargs: profile path via _build_kwargs_from_profile, legacy flag path retained for lmstudio/tencent-tokenhub (which have session-aware reasoning probing that doesn't map cleanly to hooks yet) - run_agent.py: profile path for all registered providers; legacy path variable scoping fixed (all flags defined before branching) - Auto-wires: auth.PROVIDER_REGISTRY, models.CANONICAL_PROVIDERS, doctor health checks, config.OPTIONAL_ENV_VARS, model_metadata._URL_TO_PROVIDER - GeminiProfile: thinking_config translation (native + openai-compat nested) - New tests/providers/ (79 tests covering profile declarations, transport parity, hook overrides, e2e kwargs assembly) Deltas vs original PR (salvaged onto current main): - Added profiles: alibaba-coding-plan, azure-foundry, minimax-oauth (were added to main since original PR) - Skipped profiles: lmstudio, tencent-tokenhub stay on legacy path (their reasoning_effort probing has no clean hook equivalent yet) - Removed lmstudio alias from custom profile (it's a separate provider now) - Skipped openrouter/custom from PROVIDER_REGISTRY auto-extension (resolve_provider special-cases them; adding breaks runtime resolution) - runtime_provider: profile.api_mode only as fallback when URL detection finds nothing (was breaking minimax /v1 override) - Preserved main's legacy-path improvements: deepseek reasoning_content preserve, gemini Gemma skip, OpenRouter response caching, Anthropic 1M beta recovery, etc. - Kept agent/copilot_acp_client.py in place (rejected PR's relocation — main has 7 fixes landed since; relocation would revert them) - _API_KEY_PROVIDER_AUX_MODELS alias kept for backward compat with existing test imports Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Closes #14418
This commit is contained in:
parent
2b500ed68a
commit
20a4f79ed1
57 changed files with 3149 additions and 177 deletions
|
|
@ -216,7 +216,26 @@ def _fixed_temperature_for_model(
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
|
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
|
||||||
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
def _get_aux_model_for_provider(provider_id: str) -> str:
|
||||||
|
"""Return the cheap auxiliary model for a provider.
|
||||||
|
|
||||||
|
Reads from ProviderProfile.default_aux_model first, falling back to the
|
||||||
|
legacy hardcoded dict for providers that predate the profiles system.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from providers import get_provider_profile
|
||||||
|
_p = get_provider_profile(provider_id)
|
||||||
|
if _p and _p.default_aux_model:
|
||||||
|
return _p.default_aux_model
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return _API_KEY_PROVIDER_AUX_MODELS_FALLBACK.get(provider_id, "")
|
||||||
|
|
||||||
|
|
||||||
|
# Fallback for providers not yet migrated to ProviderProfile.default_aux_model,
|
||||||
|
# plus providers we intentionally keep pinned here (e.g. Anthropic predates
|
||||||
|
# profiles). New providers should set default_aux_model on their profile instead.
|
||||||
|
_API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
|
||||||
"gemini": "gemini-3-flash-preview",
|
"gemini": "gemini-3-flash-preview",
|
||||||
"zai": "glm-4.5-flash",
|
"zai": "glm-4.5-flash",
|
||||||
"kimi-coding": "kimi-k2-turbo-preview",
|
"kimi-coding": "kimi-k2-turbo-preview",
|
||||||
|
|
@ -235,6 +254,10 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||||
"tencent-tokenhub": "hy3-preview",
|
"tencent-tokenhub": "hy3-preview",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Legacy alias — callers that haven't been updated to _get_aux_model_for_provider()
|
||||||
|
# can still use this dict directly. Kept in sync with _FALLBACK above.
|
||||||
|
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = _API_KEY_PROVIDER_AUX_MODELS_FALLBACK
|
||||||
|
|
||||||
# Vision-specific model overrides for direct providers.
|
# Vision-specific model overrides for direct providers.
|
||||||
# When the user's main provider has a dedicated vision/multimodal model that
|
# When the user's main provider has a dedicated vision/multimodal model that
|
||||||
# differs from their main chat model, map it here. The vision auto-detect
|
# differs from their main chat model, map it here. The vision auto-detect
|
||||||
|
|
@ -1157,7 +1180,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
|
|
||||||
raw_base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
|
raw_base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
|
||||||
base_url = _to_openai_base_url(raw_base_url)
|
base_url = _to_openai_base_url(raw_base_url)
|
||||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
|
model = _get_aux_model_for_provider(provider_id) or None
|
||||||
if model is None:
|
if model is None:
|
||||||
continue # skip provider if we don't know a valid aux model
|
continue # skip provider if we don't know a valid aux model
|
||||||
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
|
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
|
||||||
|
|
@ -1173,6 +1196,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
from hermes_cli.models import copilot_default_headers
|
from hermes_cli.models import copilot_default_headers
|
||||||
|
|
||||||
extra["default_headers"] = copilot_default_headers()
|
extra["default_headers"] = copilot_default_headers()
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
from providers import get_provider_profile as _gpf_aux
|
||||||
|
_ph_aux = _gpf_aux(provider_id)
|
||||||
|
if _ph_aux and _ph_aux.default_headers:
|
||||||
|
extra["default_headers"] = dict(_ph_aux.default_headers)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
||||||
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
|
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
|
||||||
return _client, model
|
return _client, model
|
||||||
|
|
@ -1184,7 +1215,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
|
|
||||||
raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
|
raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
|
||||||
base_url = _to_openai_base_url(raw_base_url)
|
base_url = _to_openai_base_url(raw_base_url)
|
||||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
|
model = _get_aux_model_for_provider(provider_id) or None
|
||||||
if model is None:
|
if model is None:
|
||||||
continue # skip provider if we don't know a valid aux model
|
continue # skip provider if we don't know a valid aux model
|
||||||
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
|
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
|
||||||
|
|
@ -1200,6 +1231,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
from hermes_cli.models import copilot_default_headers
|
from hermes_cli.models import copilot_default_headers
|
||||||
|
|
||||||
extra["default_headers"] = copilot_default_headers()
|
extra["default_headers"] = copilot_default_headers()
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
from providers import get_provider_profile as _gpf_aux2
|
||||||
|
_ph_aux2 = _gpf_aux2(provider_id)
|
||||||
|
if _ph_aux2 and _ph_aux2.default_headers:
|
||||||
|
extra["default_headers"] = dict(_ph_aux2.default_headers)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
||||||
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
|
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
|
||||||
return _client, model
|
return _client, model
|
||||||
|
|
@ -1572,7 +1611,7 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona
|
||||||
|
|
||||||
from agent.anthropic_adapter import _is_oauth_token
|
from agent.anthropic_adapter import _is_oauth_token
|
||||||
is_oauth = _is_oauth_token(token)
|
is_oauth = _is_oauth_token(token)
|
||||||
model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
|
model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001"
|
||||||
logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
|
logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
|
||||||
try:
|
try:
|
||||||
real_client = build_anthropic_client(token, base_url)
|
real_client = build_anthropic_client(token, base_url)
|
||||||
|
|
@ -2408,7 +2447,7 @@ def resolve_provider_client(
|
||||||
if explicit_base_url:
|
if explicit_base_url:
|
||||||
base_url = _to_openai_base_url(explicit_base_url.strip().rstrip("/"))
|
base_url = _to_openai_base_url(explicit_base_url.strip().rstrip("/"))
|
||||||
|
|
||||||
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
|
default_model = _get_aux_model_for_provider(provider)
|
||||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||||
|
|
||||||
if provider == "gemini":
|
if provider == "gemini":
|
||||||
|
|
|
||||||
|
|
@ -318,6 +318,17 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||||
"ollama.com": "ollama-cloud",
|
"ollama.com": "ollama-cloud",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Auto-extend with hostnames derived from provider profiles.
|
||||||
|
# Any provider with a base_url not already in the map gets added automatically.
|
||||||
|
try:
|
||||||
|
from providers import list_providers as _list_providers
|
||||||
|
for _pp in _list_providers():
|
||||||
|
_host = _pp.get_hostname()
|
||||||
|
if _host and _host not in _URL_TO_PROVIDER:
|
||||||
|
_URL_TO_PROVIDER[_host] = _pp.name
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _infer_provider_from_url(base_url: str) -> Optional[str]:
|
def _infer_provider_from_url(base_url: str) -> Optional[str]:
|
||||||
"""Infer the models.dev provider name from a base URL.
|
"""Infer the models.dev provider name from a base URL.
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,16 @@ Usage:
|
||||||
result = transport.normalize_response(raw_response)
|
result = transport.normalize_response(raw_response)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
|
from agent.transports.types import (
|
||||||
|
NormalizedResponse,
|
||||||
|
ToolCall,
|
||||||
|
Usage,
|
||||||
|
build_tool_call,
|
||||||
|
map_finish_reason,
|
||||||
|
) # noqa: F401
|
||||||
|
|
||||||
_REGISTRY: dict = {}
|
_REGISTRY: dict = {}
|
||||||
|
_discovered: bool = False
|
||||||
|
|
||||||
|
|
||||||
def register_transport(api_mode: str, transport_cls: type) -> None:
|
def register_transport(api_mode: str, transport_cls: type) -> None:
|
||||||
|
|
@ -23,6 +30,9 @@ def get_transport(api_mode: str):
|
||||||
This allows gradual migration — call sites can check for None
|
This allows gradual migration — call sites can check for None
|
||||||
and fall back to the legacy code path.
|
and fall back to the legacy code path.
|
||||||
"""
|
"""
|
||||||
|
global _discovered
|
||||||
|
if not _discovered:
|
||||||
|
_discover_transports()
|
||||||
cls = _REGISTRY.get(api_mode)
|
cls = _REGISTRY.get(api_mode)
|
||||||
if cls is None:
|
if cls is None:
|
||||||
# The registry can be partially populated when a specific transport
|
# The registry can be partially populated when a specific transport
|
||||||
|
|
@ -38,6 +48,8 @@ def get_transport(api_mode: str):
|
||||||
|
|
||||||
def _discover_transports() -> None:
|
def _discover_transports() -> None:
|
||||||
"""Import all transport modules to trigger auto-registration."""
|
"""Import all transport modules to trigger auto-registration."""
|
||||||
|
global _discovered
|
||||||
|
_discovered = True
|
||||||
try:
|
try:
|
||||||
import agent.transports.anthropic # noqa: F401
|
import agent.transports.anthropic # noqa: F401
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,9 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
def api_mode(self) -> str:
|
def api_mode(self) -> str:
|
||||||
return "chat_completions"
|
return "chat_completions"
|
||||||
|
|
||||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
|
def convert_messages(
|
||||||
|
self, messages: list[dict[str, Any]], **kwargs
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||||
|
|
||||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||||
|
|
@ -126,7 +128,9 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
tool_calls = msg.get("tool_calls")
|
tool_calls = msg.get("tool_calls")
|
||||||
if isinstance(tool_calls, list):
|
if isinstance(tool_calls, list):
|
||||||
for tc in tool_calls:
|
for tc in tool_calls:
|
||||||
if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
|
if isinstance(tc, dict) and (
|
||||||
|
"call_id" in tc or "response_item_id" in tc
|
||||||
|
):
|
||||||
needs_sanitize = True
|
needs_sanitize = True
|
||||||
break
|
break
|
||||||
if needs_sanitize:
|
if needs_sanitize:
|
||||||
|
|
@ -149,39 +153,41 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
tc.pop("response_item_id", None)
|
tc.pop("response_item_id", None)
|
||||||
return sanitized
|
return sanitized
|
||||||
|
|
||||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
"""Tools are already in OpenAI format — identity."""
|
"""Tools are already in OpenAI format — identity."""
|
||||||
return tools
|
return tools
|
||||||
|
|
||||||
def build_kwargs(
|
def build_kwargs(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
messages: List[Dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
tools: Optional[List[Dict[str, Any]]] = None,
|
tools: list[dict[str, Any]] | None = None,
|
||||||
**params,
|
**params,
|
||||||
) -> Dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Build chat.completions.create() kwargs.
|
"""Build chat.completions.create() kwargs.
|
||||||
|
|
||||||
This is the most complex transport method — it handles ~16 providers
|
params (all optional):
|
||||||
via params rather than subclasses.
|
|
||||||
|
|
||||||
params:
|
|
||||||
timeout: float — API call timeout
|
timeout: float — API call timeout
|
||||||
max_tokens: int | None — user-configured max tokens
|
max_tokens: int | None — user-configured max tokens
|
||||||
ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
|
ephemeral_max_output_tokens: int | None — one-shot override
|
||||||
max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
|
max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
|
||||||
reasoning_config: dict | None
|
reasoning_config: dict | None
|
||||||
request_overrides: dict | None
|
request_overrides: dict | None
|
||||||
session_id: str | None
|
session_id: str | None
|
||||||
qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
|
|
||||||
model_lower: str — lowercase model name for pattern matching
|
model_lower: str — lowercase model name for pattern matching
|
||||||
# Provider detection flags (all optional, default False)
|
# Provider profile path (all per-provider quirks live in providers/)
|
||||||
|
provider_profile: ProviderProfile | None — when present, delegates to
|
||||||
|
_build_kwargs_from_profile(); all flag params below are bypassed.
|
||||||
|
# Legacy-path flags — only used when provider_profile is None
|
||||||
|
# (i.e. custom / unregistered providers). Known providers all go
|
||||||
|
# through provider_profile.
|
||||||
is_openrouter: bool
|
is_openrouter: bool
|
||||||
is_nous: bool
|
is_nous: bool
|
||||||
is_qwen_portal: bool
|
is_qwen_portal: bool
|
||||||
is_github_models: bool
|
is_github_models: bool
|
||||||
is_nvidia_nim: bool
|
is_nvidia_nim: bool
|
||||||
is_kimi: bool
|
is_kimi: bool
|
||||||
|
is_tokenhub: bool
|
||||||
is_lmstudio: bool
|
is_lmstudio: bool
|
||||||
is_custom_provider: bool
|
is_custom_provider: bool
|
||||||
ollama_num_ctx: int | None
|
ollama_num_ctx: int | None
|
||||||
|
|
@ -190,6 +196,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
# Qwen-specific
|
# Qwen-specific
|
||||||
qwen_prepare_fn: callable | None — runs AFTER codex sanitization
|
qwen_prepare_fn: callable | None — runs AFTER codex sanitization
|
||||||
qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
|
qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
|
||||||
|
qwen_session_metadata: dict | None
|
||||||
# Temperature
|
# Temperature
|
||||||
fixed_temperature: Any — from _fixed_temperature_for_model()
|
fixed_temperature: Any — from _fixed_temperature_for_model()
|
||||||
omit_temperature: bool
|
omit_temperature: bool
|
||||||
|
|
@ -199,28 +206,21 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
lmstudio_reasoning_options: list[str] | None # raw allowed_options from /api/v1/models
|
lmstudio_reasoning_options: list[str] | None # raw allowed_options from /api/v1/models
|
||||||
# Claude on OpenRouter/Nous max output
|
# Claude on OpenRouter/Nous max output
|
||||||
anthropic_max_output: int | None
|
anthropic_max_output: int | None
|
||||||
# Extra
|
extra_body_additions: dict | None
|
||||||
extra_body_additions: dict | None — pre-built extra_body entries
|
|
||||||
"""
|
"""
|
||||||
# Codex sanitization: drop reasoning_items / call_id / response_item_id
|
# Codex sanitization: drop reasoning_items / call_id / response_item_id
|
||||||
sanitized = self.convert_messages(messages)
|
sanitized = self.convert_messages(messages)
|
||||||
|
|
||||||
# Qwen portal prep AFTER codex sanitization. If sanitize already
|
# ── Provider profile: single-path when present ──────────────────
|
||||||
# deepcopied, reuse that copy via the in-place variant to avoid a
|
_profile = params.get("provider_profile")
|
||||||
# second deepcopy.
|
if _profile:
|
||||||
is_qwen = params.get("is_qwen_portal", False)
|
return self._build_kwargs_from_profile(
|
||||||
if is_qwen:
|
_profile, model, sanitized, tools, params
|
||||||
qwen_prep = params.get("qwen_prepare_fn")
|
)
|
||||||
qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
|
|
||||||
if sanitized is messages:
|
# ── Legacy fallback (unregistered / unknown provider) ───────────
|
||||||
if qwen_prep is not None:
|
# Reached only when get_provider_profile() returned None.
|
||||||
sanitized = qwen_prep(sanitized)
|
# Known providers always go through the profile path above.
|
||||||
else:
|
|
||||||
# Already deepcopied — transform in place
|
|
||||||
if qwen_prep_inplace is not None:
|
|
||||||
qwen_prep_inplace(sanitized)
|
|
||||||
elif qwen_prep is not None:
|
|
||||||
sanitized = qwen_prep(sanitized)
|
|
||||||
|
|
||||||
# Developer role swap for GPT-5/Codex models
|
# Developer role swap for GPT-5/Codex models
|
||||||
model_lower = params.get("model_lower", (model or "").lower())
|
model_lower = params.get("model_lower", (model or "").lower())
|
||||||
|
|
@ -233,7 +233,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
sanitized = list(sanitized)
|
sanitized = list(sanitized)
|
||||||
sanitized[0] = {**sanitized[0], "role": "developer"}
|
sanitized[0] = {**sanitized[0], "role": "developer"}
|
||||||
|
|
||||||
api_kwargs: Dict[str, Any] = {
|
api_kwargs: dict[str, Any] = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"messages": sanitized,
|
"messages": sanitized,
|
||||||
}
|
}
|
||||||
|
|
@ -242,19 +242,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
api_kwargs["timeout"] = timeout
|
api_kwargs["timeout"] = timeout
|
||||||
|
|
||||||
# Temperature
|
|
||||||
fixed_temp = params.get("fixed_temperature")
|
|
||||||
omit_temp = params.get("omit_temperature", False)
|
|
||||||
if omit_temp:
|
|
||||||
api_kwargs.pop("temperature", None)
|
|
||||||
elif fixed_temp is not None:
|
|
||||||
api_kwargs["temperature"] = fixed_temp
|
|
||||||
|
|
||||||
# Qwen metadata (caller precomputes {sessionId, promptId})
|
|
||||||
qwen_meta = params.get("qwen_session_metadata")
|
|
||||||
if qwen_meta and is_qwen:
|
|
||||||
api_kwargs["metadata"] = qwen_meta
|
|
||||||
|
|
||||||
# Tools
|
# Tools
|
||||||
if tools:
|
if tools:
|
||||||
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
||||||
|
|
@ -278,13 +265,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
api_kwargs.update(max_tokens_fn(ephemeral))
|
api_kwargs.update(max_tokens_fn(ephemeral))
|
||||||
elif max_tokens is not None and max_tokens_fn:
|
elif max_tokens is not None and max_tokens_fn:
|
||||||
api_kwargs.update(max_tokens_fn(max_tokens))
|
api_kwargs.update(max_tokens_fn(max_tokens))
|
||||||
elif is_nvidia_nim and max_tokens_fn:
|
|
||||||
api_kwargs.update(max_tokens_fn(16384))
|
|
||||||
elif is_qwen and max_tokens_fn:
|
|
||||||
api_kwargs.update(max_tokens_fn(65536))
|
|
||||||
elif is_kimi and max_tokens_fn:
|
|
||||||
# Kimi/Moonshot: 32000 matches Kimi CLI's default
|
|
||||||
api_kwargs.update(max_tokens_fn(32000))
|
|
||||||
elif anthropic_max_out is not None:
|
elif anthropic_max_out is not None:
|
||||||
api_kwargs["max_tokens"] = anthropic_max_out
|
api_kwargs["max_tokens"] = anthropic_max_out
|
||||||
|
|
||||||
|
|
@ -331,7 +311,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
api_kwargs["reasoning_effort"] = _lm_effort
|
api_kwargs["reasoning_effort"] = _lm_effort
|
||||||
|
|
||||||
# extra_body assembly
|
# extra_body assembly
|
||||||
extra_body: Dict[str, Any] = {}
|
extra_body: dict[str, Any] = {}
|
||||||
|
|
||||||
is_openrouter = params.get("is_openrouter", False)
|
is_openrouter = params.get("is_openrouter", False)
|
||||||
is_nous = params.get("is_nous", False)
|
is_nous = params.get("is_nous", False)
|
||||||
|
|
@ -361,35 +341,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
if gh_reasoning is not None:
|
if gh_reasoning is not None:
|
||||||
extra_body["reasoning"] = gh_reasoning
|
extra_body["reasoning"] = gh_reasoning
|
||||||
else:
|
else:
|
||||||
if reasoning_config is not None:
|
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||||
rc = dict(reasoning_config)
|
|
||||||
if is_nous and rc.get("enabled") is False:
|
|
||||||
pass # omit for Nous when disabled
|
|
||||||
else:
|
|
||||||
extra_body["reasoning"] = rc
|
|
||||||
else:
|
|
||||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
|
||||||
|
|
||||||
if is_nous:
|
|
||||||
extra_body["tags"] = ["product=hermes-agent"]
|
|
||||||
|
|
||||||
# Ollama num_ctx
|
|
||||||
ollama_ctx = params.get("ollama_num_ctx")
|
|
||||||
if ollama_ctx:
|
|
||||||
options = extra_body.get("options", {})
|
|
||||||
options["num_ctx"] = ollama_ctx
|
|
||||||
extra_body["options"] = options
|
|
||||||
|
|
||||||
# Ollama/custom think=false
|
|
||||||
if params.get("is_custom_provider", False):
|
|
||||||
if reasoning_config and isinstance(reasoning_config, dict):
|
|
||||||
_effort = (reasoning_config.get("effort") or "").strip().lower()
|
|
||||||
_enabled = reasoning_config.get("enabled", True)
|
|
||||||
if _effort == "none" or _enabled is False:
|
|
||||||
extra_body["think"] = False
|
|
||||||
|
|
||||||
if is_qwen:
|
|
||||||
extra_body["vl_high_resolution_images"] = True
|
|
||||||
|
|
||||||
if provider_name == "gemini":
|
if provider_name == "gemini":
|
||||||
raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||||
|
|
@ -423,6 +375,120 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
|
|
||||||
return api_kwargs
|
return api_kwargs
|
||||||
|
|
||||||
|
def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
|
||||||
|
"""Build API kwargs using a ProviderProfile — single path, no legacy flags.
|
||||||
|
|
||||||
|
This method replaces the entire flag-based kwargs assembly when a
|
||||||
|
provider_profile is passed. Every quirk comes from the profile object.
|
||||||
|
"""
|
||||||
|
from providers.base import OMIT_TEMPERATURE
|
||||||
|
|
||||||
|
# Message preprocessing
|
||||||
|
sanitized = profile.prepare_messages(sanitized)
|
||||||
|
|
||||||
|
# Developer role swap — model-name-based, applies to all providers
|
||||||
|
_model_lower = (model or "").lower()
|
||||||
|
if (
|
||||||
|
sanitized
|
||||||
|
and isinstance(sanitized[0], dict)
|
||||||
|
and sanitized[0].get("role") == "system"
|
||||||
|
and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
|
||||||
|
):
|
||||||
|
sanitized = list(sanitized)
|
||||||
|
sanitized[0] = {**sanitized[0], "role": "developer"}
|
||||||
|
|
||||||
|
api_kwargs: dict[str, Any] = {
|
||||||
|
"model": model,
|
||||||
|
"messages": sanitized,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Temperature
|
||||||
|
if profile.fixed_temperature is OMIT_TEMPERATURE:
|
||||||
|
pass # Don't include temperature at all
|
||||||
|
elif profile.fixed_temperature is not None:
|
||||||
|
api_kwargs["temperature"] = profile.fixed_temperature
|
||||||
|
else:
|
||||||
|
# Use caller's temperature if provided
|
||||||
|
temp = params.get("temperature")
|
||||||
|
if temp is not None:
|
||||||
|
api_kwargs["temperature"] = temp
|
||||||
|
|
||||||
|
# Timeout
|
||||||
|
timeout = params.get("timeout")
|
||||||
|
if timeout is not None:
|
||||||
|
api_kwargs["timeout"] = timeout
|
||||||
|
|
||||||
|
# Tools — apply Moonshot/Kimi schema sanitization regardless of path
|
||||||
|
if tools:
|
||||||
|
if is_moonshot_model(model):
|
||||||
|
tools = sanitize_moonshot_tools(tools)
|
||||||
|
api_kwargs["tools"] = tools
|
||||||
|
|
||||||
|
# max_tokens resolution — priority: ephemeral > user > profile default
|
||||||
|
max_tokens_fn = params.get("max_tokens_param_fn")
|
||||||
|
ephemeral = params.get("ephemeral_max_output_tokens")
|
||||||
|
user_max = params.get("max_tokens")
|
||||||
|
anthropic_max = params.get("anthropic_max_output")
|
||||||
|
|
||||||
|
if ephemeral is not None and max_tokens_fn:
|
||||||
|
api_kwargs.update(max_tokens_fn(ephemeral))
|
||||||
|
elif user_max is not None and max_tokens_fn:
|
||||||
|
api_kwargs.update(max_tokens_fn(user_max))
|
||||||
|
elif profile.default_max_tokens and max_tokens_fn:
|
||||||
|
api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
|
||||||
|
elif anthropic_max is not None:
|
||||||
|
api_kwargs["max_tokens"] = anthropic_max
|
||||||
|
|
||||||
|
# Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
|
||||||
|
reasoning_config = params.get("reasoning_config")
|
||||||
|
extra_body_from_profile, top_level_from_profile = (
|
||||||
|
profile.build_api_kwargs_extras(
|
||||||
|
reasoning_config=reasoning_config,
|
||||||
|
supports_reasoning=params.get("supports_reasoning", False),
|
||||||
|
qwen_session_metadata=params.get("qwen_session_metadata"),
|
||||||
|
model=model,
|
||||||
|
ollama_num_ctx=params.get("ollama_num_ctx"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
api_kwargs.update(top_level_from_profile)
|
||||||
|
|
||||||
|
# extra_body assembly
|
||||||
|
extra_body: dict[str, Any] = {}
|
||||||
|
|
||||||
|
# Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
|
||||||
|
profile_body = profile.build_extra_body(
|
||||||
|
session_id=params.get("session_id"),
|
||||||
|
provider_preferences=params.get("provider_preferences"),
|
||||||
|
model=model,
|
||||||
|
base_url=params.get("base_url"),
|
||||||
|
reasoning_config=reasoning_config,
|
||||||
|
)
|
||||||
|
if profile_body:
|
||||||
|
extra_body.update(profile_body)
|
||||||
|
|
||||||
|
# Profile's reasoning/thinking extra_body entries
|
||||||
|
if extra_body_from_profile:
|
||||||
|
extra_body.update(extra_body_from_profile)
|
||||||
|
|
||||||
|
# Merge any pre-built extra_body additions from the caller
|
||||||
|
additions = params.get("extra_body_additions")
|
||||||
|
if additions:
|
||||||
|
extra_body.update(additions)
|
||||||
|
|
||||||
|
# Request overrides (user config)
|
||||||
|
overrides = params.get("request_overrides")
|
||||||
|
if overrides:
|
||||||
|
for k, v in overrides.items():
|
||||||
|
if k == "extra_body" and isinstance(v, dict):
|
||||||
|
extra_body.update(v)
|
||||||
|
else:
|
||||||
|
api_kwargs[k] = v
|
||||||
|
|
||||||
|
if extra_body:
|
||||||
|
api_kwargs["extra_body"] = extra_body
|
||||||
|
|
||||||
|
return api_kwargs
|
||||||
|
|
||||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||||
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
|
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
|
||||||
|
|
||||||
|
|
@ -444,7 +510,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
# Gemini 3 thinking models attach extra_content with
|
# Gemini 3 thinking models attach extra_content with
|
||||||
# thought_signature — without replay on the next turn the API
|
# thought_signature — without replay on the next turn the API
|
||||||
# rejects the request with 400.
|
# rejects the request with 400.
|
||||||
tc_provider_data: Dict[str, Any] = {}
|
tc_provider_data: dict[str, Any] = {}
|
||||||
extra = getattr(tc, "extra_content", None)
|
extra = getattr(tc, "extra_content", None)
|
||||||
if extra is None and hasattr(tc, "model_extra"):
|
if extra is None and hasattr(tc, "model_extra"):
|
||||||
extra = (tc.model_extra or {}).get("extra_content")
|
extra = (tc.model_extra or {}).get("extra_content")
|
||||||
|
|
@ -455,12 +521,14 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
tc_provider_data["extra_content"] = extra
|
tc_provider_data["extra_content"] = extra
|
||||||
tool_calls.append(ToolCall(
|
tool_calls.append(
|
||||||
id=tc.id,
|
ToolCall(
|
||||||
name=tc.function.name,
|
id=tc.id,
|
||||||
arguments=tc.function.arguments,
|
name=tc.function.name,
|
||||||
provider_data=tc_provider_data or None,
|
arguments=tc.function.arguments,
|
||||||
))
|
provider_data=tc_provider_data or None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
usage = None
|
usage = None
|
||||||
if hasattr(response, "usage") and response.usage:
|
if hasattr(response, "usage") and response.usage:
|
||||||
|
|
@ -508,7 +576,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
|
def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
|
||||||
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
|
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
|
||||||
usage = getattr(response, "usage", None)
|
usage = getattr(response, "usage", None)
|
||||||
if usage is None:
|
if usage is None:
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -32,10 +32,10 @@ class ToolCall:
|
||||||
* Others: ``None``
|
* Others: ``None``
|
||||||
"""
|
"""
|
||||||
|
|
||||||
id: Optional[str]
|
id: str | None
|
||||||
name: str
|
name: str
|
||||||
arguments: str # JSON string
|
arguments: str # JSON string
|
||||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
provider_data: dict[str, Any] | None = field(default=None, repr=False)
|
||||||
|
|
||||||
# ── Backward compatibility ──────────────────────────────────
|
# ── Backward compatibility ──────────────────────────────────
|
||||||
# The agent loop reads tc.function.name / tc.function.arguments
|
# The agent loop reads tc.function.name / tc.function.arguments
|
||||||
|
|
@ -47,17 +47,17 @@ class ToolCall:
|
||||||
return "function"
|
return "function"
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def function(self) -> "ToolCall":
|
def function(self) -> ToolCall:
|
||||||
"""Return self so tc.function.name / tc.function.arguments work."""
|
"""Return self so tc.function.name / tc.function.arguments work."""
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def call_id(self) -> Optional[str]:
|
def call_id(self) -> str | None:
|
||||||
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
|
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
|
||||||
return (self.provider_data or {}).get("call_id")
|
return (self.provider_data or {}).get("call_id")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def response_item_id(self) -> Optional[str]:
|
def response_item_id(self) -> str | None:
|
||||||
"""Codex response_item_id from provider_data."""
|
"""Codex response_item_id from provider_data."""
|
||||||
return (self.provider_data or {}).get("response_item_id")
|
return (self.provider_data or {}).get("response_item_id")
|
||||||
|
|
||||||
|
|
@ -101,18 +101,18 @@ class NormalizedResponse:
|
||||||
* Others: ``None``
|
* Others: ``None``
|
||||||
"""
|
"""
|
||||||
|
|
||||||
content: Optional[str]
|
content: str | None
|
||||||
tool_calls: Optional[List[ToolCall]]
|
tool_calls: list[ToolCall] | None
|
||||||
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
|
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
|
||||||
reasoning: Optional[str] = None
|
reasoning: str | None = None
|
||||||
usage: Optional[Usage] = None
|
usage: Usage | None = None
|
||||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
provider_data: dict[str, Any] | None = field(default=None, repr=False)
|
||||||
|
|
||||||
# ── Backward compatibility ──────────────────────────────────
|
# ── Backward compatibility ──────────────────────────────────
|
||||||
# The shim _nr_to_assistant_message() mapped these from provider_data.
|
# The shim _nr_to_assistant_message() mapped these from provider_data.
|
||||||
# These properties let NormalizedResponse pass through directly.
|
# These properties let NormalizedResponse pass through directly.
|
||||||
@property
|
@property
|
||||||
def reasoning_content(self) -> Optional[str]:
|
def reasoning_content(self) -> str | None:
|
||||||
pd = self.provider_data or {}
|
pd = self.provider_data or {}
|
||||||
return pd.get("reasoning_content")
|
return pd.get("reasoning_content")
|
||||||
|
|
||||||
|
|
@ -136,8 +136,9 @@ class NormalizedResponse:
|
||||||
# Factory helpers
|
# Factory helpers
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
def build_tool_call(
|
def build_tool_call(
|
||||||
id: Optional[str],
|
id: str | None,
|
||||||
name: str,
|
name: str,
|
||||||
arguments: Any,
|
arguments: Any,
|
||||||
**provider_fields: Any,
|
**provider_fields: Any,
|
||||||
|
|
@ -151,7 +152,7 @@ def build_tool_call(
|
||||||
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
|
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
|
||||||
|
|
||||||
|
|
||||||
def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
|
def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
|
||||||
"""Translate a provider-specific stop reason to the normalised set.
|
"""Translate a provider-specific stop reason to the normalised set.
|
||||||
|
|
||||||
Falls back to ``"stop"`` for unknown or ``None`` reasons.
|
Falls back to ``"stop"`` for unknown or ``None`` reasons.
|
||||||
|
|
|
||||||
|
|
@ -416,6 +416,40 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Auto-extend PROVIDER_REGISTRY with any api-key provider registered in
|
||||||
|
# providers/ that is not already declared above. New providers only need a
|
||||||
|
# providers/*.py file — no edits to this file required.
|
||||||
|
try:
|
||||||
|
from providers import list_providers as _list_providers_for_registry
|
||||||
|
for _pp in _list_providers_for_registry():
|
||||||
|
if _pp.name in PROVIDER_REGISTRY:
|
||||||
|
continue
|
||||||
|
if _pp.auth_type != "api_key" or not _pp.env_vars:
|
||||||
|
continue
|
||||||
|
# Skip providers that need custom token resolution or are special-cased
|
||||||
|
# in resolve_provider() (copilot/kimi/zai have bespoke token refresh;
|
||||||
|
# openrouter/custom are aggregator/user-supplied and handled outside
|
||||||
|
# the registry — adding them here breaks runtime_provider resolution
|
||||||
|
# that relies on `openrouter not in PROVIDER_REGISTRY`).
|
||||||
|
if _pp.name in {"copilot", "kimi-coding", "kimi-coding-cn", "zai", "openrouter", "custom"}:
|
||||||
|
continue
|
||||||
|
_api_key_vars = tuple(v for v in _pp.env_vars if not v.endswith("_BASE_URL") and not v.endswith("_URL"))
|
||||||
|
_base_url_var = next((v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), None)
|
||||||
|
PROVIDER_REGISTRY[_pp.name] = ProviderConfig(
|
||||||
|
id=_pp.name,
|
||||||
|
name=_pp.display_name or _pp.name,
|
||||||
|
auth_type="api_key",
|
||||||
|
inference_base_url=_pp.base_url,
|
||||||
|
api_key_env_vars=_api_key_vars or _pp.env_vars,
|
||||||
|
base_url_env_var=_base_url_var or "",
|
||||||
|
)
|
||||||
|
# Also register aliases so resolve_provider() resolves them
|
||||||
|
for _alias in _pp.aliases:
|
||||||
|
if _alias not in PROVIDER_REGISTRY:
|
||||||
|
PROVIDER_REGISTRY[_alias] = PROVIDER_REGISTRY[_pp.name]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Anthropic Key Helper
|
# Anthropic Key Helper
|
||||||
|
|
@ -1195,6 +1229,17 @@ def resolve_provider(
|
||||||
"vllm": "custom", "llamacpp": "custom",
|
"vllm": "custom", "llamacpp": "custom",
|
||||||
"llama.cpp": "custom", "llama-cpp": "custom",
|
"llama.cpp": "custom", "llama-cpp": "custom",
|
||||||
}
|
}
|
||||||
|
# Extend with aliases declared in providers/*.py that aren't already mapped.
|
||||||
|
# This keeps providers/ as the single source for new aliases while the
|
||||||
|
# hardcoded dict above remains authoritative for existing ones.
|
||||||
|
try:
|
||||||
|
from providers import list_providers as _lp
|
||||||
|
for _pp in _lp():
|
||||||
|
for _alias in _pp.aliases:
|
||||||
|
if _alias not in _PROVIDER_ALIASES:
|
||||||
|
_PROVIDER_ALIASES[_alias] = _pp.name
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
|
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
|
||||||
|
|
||||||
if normalized == "openrouter":
|
if normalized == "openrouter":
|
||||||
|
|
|
||||||
|
|
@ -4840,3 +4840,45 @@ def config_command(args):
|
||||||
print(" hermes config path Show config file path")
|
print(" hermes config path Show config file path")
|
||||||
print(" hermes config env-path Show .env file path")
|
print(" hermes config env-path Show .env file path")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Profile-driven env var injection ─────────────────────────────────────────
|
||||||
|
# Any provider registered in providers/ with auth_type="api_key" automatically
|
||||||
|
# gets its env_vars exposed in OPTIONAL_ENV_VARS without editing this file.
|
||||||
|
# Runs once at import time.
|
||||||
|
|
||||||
|
_profile_env_vars_injected = False
|
||||||
|
|
||||||
|
|
||||||
|
def _inject_profile_env_vars() -> None:
|
||||||
|
"""Populate OPTIONAL_ENV_VARS from provider profiles not already listed.
|
||||||
|
|
||||||
|
Called once at module load time. Idempotent — repeated calls are no-ops.
|
||||||
|
"""
|
||||||
|
global _profile_env_vars_injected
|
||||||
|
if _profile_env_vars_injected:
|
||||||
|
return
|
||||||
|
_profile_env_vars_injected = True
|
||||||
|
try:
|
||||||
|
from providers import list_providers
|
||||||
|
for _pp in list_providers():
|
||||||
|
if _pp.auth_type not in ("api_key",):
|
||||||
|
continue
|
||||||
|
for _var in _pp.env_vars:
|
||||||
|
if _var in OPTIONAL_ENV_VARS:
|
||||||
|
continue
|
||||||
|
_is_key = not _var.endswith("_BASE_URL") and not _var.endswith("_URL")
|
||||||
|
OPTIONAL_ENV_VARS[_var] = {
|
||||||
|
"description": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL override'}",
|
||||||
|
"prompt": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL (leave empty for default)'}",
|
||||||
|
"url": _pp.signup_url or None,
|
||||||
|
"password": _is_key,
|
||||||
|
"category": "provider",
|
||||||
|
"advanced": True,
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Eagerly inject so that OPTIONAL_ENV_VARS is fully populated at import time.
|
||||||
|
_inject_profile_env_vars()
|
||||||
|
|
|
||||||
|
|
@ -169,6 +169,85 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
|
||||||
check_warn("Could not verify systemd linger", f"({linger_detail})")
|
check_warn("Could not verify systemd linger", f"({linger_detail})")
|
||||||
|
|
||||||
|
|
||||||
|
_APIKEY_PROVIDERS_CACHE: list | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _build_apikey_providers_list() -> list:
|
||||||
|
"""Build the API-key provider health-check list once and cache it.
|
||||||
|
|
||||||
|
Tuple format: (name, env_vars, default_url, base_env, supports_models_endpoint)
|
||||||
|
Base list augmented with any ProviderProfile with auth_type="api_key" not
|
||||||
|
already present — adding providers/*.py is sufficient to get into doctor.
|
||||||
|
"""
|
||||||
|
_static = [
|
||||||
|
("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
|
||||||
|
("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
|
||||||
|
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
|
||||||
|
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
|
||||||
|
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
||||||
|
("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
|
||||||
|
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||||
|
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
||||||
|
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
|
||||||
|
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
|
||||||
|
# MiniMax global: /v1 endpoint supports /models.
|
||||||
|
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
|
||||||
|
# MiniMax CN: /v1 endpoint does NOT support /models (returns 404).
|
||||||
|
("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", False),
|
||||||
|
("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
|
||||||
|
("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
|
||||||
|
("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
|
||||||
|
# OpenCode Go has no shared /models endpoint; skip the health check.
|
||||||
|
("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False),
|
||||||
|
]
|
||||||
|
_known_names = {t[0] for t in _static}
|
||||||
|
# Also index by profile canonical name so profiles without display_name
|
||||||
|
# don't create duplicate entries for providers already in the static list.
|
||||||
|
_known_canonical: set[str] = set()
|
||||||
|
_name_to_canonical = {
|
||||||
|
"Z.AI / GLM": "zai", "Kimi / Moonshot": "kimi-coding",
|
||||||
|
"StepFun Step Plan": "stepfun", "Kimi / Moonshot (China)": "kimi-coding-cn",
|
||||||
|
"Arcee AI": "arcee", "GMI Cloud": "gmi", "DeepSeek": "deepseek",
|
||||||
|
"Hugging Face": "huggingface", "NVIDIA NIM": "nvidia",
|
||||||
|
"Alibaba/DashScope": "alibaba", "MiniMax": "minimax",
|
||||||
|
"MiniMax (China)": "minimax-cn", "Vercel AI Gateway": "ai-gateway",
|
||||||
|
"Kilo Code": "kilocode", "OpenCode Zen": "opencode-zen",
|
||||||
|
"OpenCode Go": "opencode-go",
|
||||||
|
}
|
||||||
|
for _label, _canonical in _name_to_canonical.items():
|
||||||
|
_known_canonical.add(_canonical)
|
||||||
|
try:
|
||||||
|
from providers import list_providers
|
||||||
|
from providers.base import ProviderProfile as _PP
|
||||||
|
for _pp in list_providers():
|
||||||
|
if not isinstance(_pp, _PP) or _pp.auth_type != "api_key" or not _pp.env_vars:
|
||||||
|
continue
|
||||||
|
_label = _pp.display_name or _pp.name
|
||||||
|
if _label in _known_names or _pp.name in _known_canonical:
|
||||||
|
continue
|
||||||
|
# Separate API-key vars from base-URL override vars — the health-check
|
||||||
|
# loop sends the first found value as Authorization: Bearer, so a URL
|
||||||
|
# string must never be picked.
|
||||||
|
_key_vars = tuple(
|
||||||
|
v for v in _pp.env_vars
|
||||||
|
if not v.endswith("_BASE_URL") and not v.endswith("_URL")
|
||||||
|
)
|
||||||
|
_base_var = next(
|
||||||
|
(v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if not _key_vars:
|
||||||
|
continue
|
||||||
|
_models_url = (
|
||||||
|
(_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
|
||||||
|
if _pp.base_url else None
|
||||||
|
)
|
||||||
|
_static.append((_label, _key_vars, _models_url, _base_var, True))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return _static
|
||||||
|
|
||||||
|
|
||||||
def run_doctor(args):
|
def run_doctor(args):
|
||||||
"""Run diagnostic checks."""
|
"""Run diagnostic checks."""
|
||||||
should_fix = getattr(args, 'fix', False)
|
should_fix = getattr(args, 'fix', False)
|
||||||
|
|
@ -1081,27 +1160,11 @@ def run_doctor(args):
|
||||||
# -- API-key providers --
|
# -- API-key providers --
|
||||||
# Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
|
# Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
|
||||||
# If supports_models_endpoint is False, we skip the health check and just show "configured"
|
# If supports_models_endpoint is False, we skip the health check and just show "configured"
|
||||||
_apikey_providers = [
|
# Cached at module level after first build — profiles auto-extend it.
|
||||||
("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
|
global _APIKEY_PROVIDERS_CACHE
|
||||||
("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
|
if _APIKEY_PROVIDERS_CACHE is None:
|
||||||
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
|
_APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list()
|
||||||
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
|
_apikey_providers = _APIKEY_PROVIDERS_CACHE
|
||||||
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
|
||||||
("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
|
|
||||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
|
||||||
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
|
||||||
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
|
|
||||||
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
|
|
||||||
# MiniMax global: /v1 endpoint supports /models.
|
|
||||||
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
|
|
||||||
# MiniMax CN: /v1 endpoint does NOT support /models (returns 404).
|
|
||||||
("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", False),
|
|
||||||
("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
|
|
||||||
("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
|
|
||||||
("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
|
|
||||||
# OpenCode Go has no shared /models endpoint; skip the health check.
|
|
||||||
("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False),
|
|
||||||
]
|
|
||||||
for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
|
for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
|
||||||
_key = ""
|
_key = ""
|
||||||
for _ev in _env_vars:
|
for _ev in _env_vars:
|
||||||
|
|
|
||||||
|
|
@ -1611,6 +1611,21 @@ def cmd_model(args):
|
||||||
select_provider_and_model(args=args)
|
select_provider_and_model(args=args)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_profile_api_key_provider(provider_id: str) -> bool:
|
||||||
|
"""Return True when provider_id maps to a profile with auth_type='api_key'.
|
||||||
|
|
||||||
|
Used as a catch-all in select_provider_and_model() so that new providers
|
||||||
|
declared in providers/*.py automatically dispatch to _model_flow_api_key_provider
|
||||||
|
without requiring an explicit elif branch here.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from providers import get_provider_profile
|
||||||
|
_p = get_provider_profile(provider_id)
|
||||||
|
return _p is not None and _p.auth_type == "api_key"
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def select_provider_and_model(args=None):
|
def select_provider_and_model(args=None):
|
||||||
"""Core provider selection + model picking logic.
|
"""Core provider selection + model picking logic.
|
||||||
|
|
||||||
|
|
@ -1907,7 +1922,7 @@ def select_provider_and_model(args=None):
|
||||||
"ollama-cloud",
|
"ollama-cloud",
|
||||||
"tencent-tokenhub",
|
"tencent-tokenhub",
|
||||||
"lmstudio",
|
"lmstudio",
|
||||||
):
|
) or _is_profile_api_key_provider(selected_provider):
|
||||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||||
|
|
||||||
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
|
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
|
||||||
|
|
@ -8215,6 +8230,22 @@ def cmd_logs(args):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_provider_choices() -> list[str]:
|
||||||
|
"""Build the --provider choices list from CANONICAL_PROVIDERS + 'auto'."""
|
||||||
|
try:
|
||||||
|
from hermes_cli.models import CANONICAL_PROVIDERS as _cp
|
||||||
|
return ["auto"] + [p.slug for p in _cp]
|
||||||
|
except Exception:
|
||||||
|
# Fallback: static list guarantees the CLI always works
|
||||||
|
return [
|
||||||
|
"auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot",
|
||||||
|
"anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry",
|
||||||
|
"ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn",
|
||||||
|
"stepfun", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee",
|
||||||
|
"nvidia", "deepseek", "alibaba", "qwen-oauth", "opencode-zen", "opencode-go",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main entry point for hermes CLI."""
|
"""Main entry point for hermes CLI."""
|
||||||
from hermes_cli._parser import build_top_level_parser
|
from hermes_cli._parser import build_top_level_parser
|
||||||
|
|
|
||||||
|
|
@ -806,6 +806,25 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||||
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway"),
|
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
|
||||||
|
# that is not already in the list above. Adding providers/*.py is sufficient
|
||||||
|
# to expose a new provider in the model picker, /model, and all downstream
|
||||||
|
# consumers — no edits to this file needed.
|
||||||
|
_canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS}
|
||||||
|
try:
|
||||||
|
from providers import list_providers as _list_providers_for_canonical
|
||||||
|
for _pp in _list_providers_for_canonical():
|
||||||
|
if _pp.name in _canonical_slugs:
|
||||||
|
continue
|
||||||
|
if _pp.auth_type in ("oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"):
|
||||||
|
continue # non-api-key flows need bespoke picker UX; skip auto-inject
|
||||||
|
_label = _pp.display_name or _pp.name
|
||||||
|
_desc = _pp.description or f"{_label} (direct API)"
|
||||||
|
CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc))
|
||||||
|
_canonical_slugs.add(_pp.name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Derived dicts — used throughout the codebase
|
# Derived dicts — used throughout the codebase
|
||||||
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
|
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
|
||||||
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
|
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
|
||||||
|
|
@ -2023,6 +2042,34 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||||
return ids
|
return ids
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# ── Profile-based generic live fetch (all simple api-key providers) ──
|
||||||
|
# Handles any provider registered in providers/ with auth_type="api_key".
|
||||||
|
# Replaces per-provider copy-paste blocks (stepfun, gmi, zai, etc.).
|
||||||
|
try:
|
||||||
|
from providers import get_provider_profile
|
||||||
|
from hermes_cli.auth import resolve_api_key_provider_credentials
|
||||||
|
|
||||||
|
_p = get_provider_profile(normalized)
|
||||||
|
if _p and _p.auth_type == "api_key" and _p.base_url:
|
||||||
|
try:
|
||||||
|
creds = resolve_api_key_provider_credentials(normalized)
|
||||||
|
api_key = str(creds.get("api_key") or "").strip()
|
||||||
|
base_url = str(creds.get("base_url") or "").strip()
|
||||||
|
except Exception:
|
||||||
|
api_key, base_url = "", _p.base_url
|
||||||
|
if not base_url:
|
||||||
|
base_url = _p.base_url
|
||||||
|
if api_key:
|
||||||
|
live = _p.fetch_models(api_key=api_key)
|
||||||
|
if live:
|
||||||
|
return live
|
||||||
|
# Use profile's fallback_models if defined
|
||||||
|
if _p.fallback_models:
|
||||||
|
return list(_p.fallback_models)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
curated_static = list(_PROVIDER_MODELS.get(normalized, []))
|
curated_static = list(_PROVIDER_MODELS.get(normalized, []))
|
||||||
if normalized in _MODELS_DEV_PREFERRED:
|
if normalized in _MODELS_DEV_PREFERRED:
|
||||||
return _merge_with_models_dev(normalized, curated_static)
|
return _merge_with_models_dev(normalized, curated_static)
|
||||||
|
|
|
||||||
307
providers/README.md
Normal file
307
providers/README.md
Normal file
|
|
@ -0,0 +1,307 @@
|
||||||
|
# providers/
|
||||||
|
|
||||||
|
Single source of truth for every inference provider Hermes knows about.
|
||||||
|
|
||||||
|
Each provider is declared once here as a `ProviderProfile`. Every other layer —
|
||||||
|
auth resolution, transport kwargs, model listing, runtime routing — reads from
|
||||||
|
these profiles instead of maintaining its own parallel data.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Directory layout
|
||||||
|
|
||||||
|
```
|
||||||
|
providers/
|
||||||
|
├── base.py ProviderProfile dataclass + OMIT_TEMPERATURE sentinel
|
||||||
|
├── __init__.py Registry: register_provider(), get_provider_profile()
|
||||||
|
├── README.md This file
|
||||||
|
│
|
||||||
|
├── # Simple providers — just identity + auth + endpoint
|
||||||
|
├── alibaba.py Alibaba Cloud DashScope
|
||||||
|
├── arcee.py Arcee AI
|
||||||
|
├── bedrock.py AWS Bedrock (api_mode=bedrock_converse)
|
||||||
|
├── deepseek.py DeepSeek
|
||||||
|
├── huggingface.py Hugging Face Inference API
|
||||||
|
├── kilocode.py Kilo Code
|
||||||
|
├── minimax.py MiniMax (international + CN)
|
||||||
|
├── nvidia.py NVIDIA NIM (default_max_tokens=16384)
|
||||||
|
├── ollama_cloud.py Ollama Cloud
|
||||||
|
├── stepfun.py StepFun
|
||||||
|
├── xiaomi.py Xiaomi MiMo
|
||||||
|
├── xai.py xAI Grok (api_mode=codex_responses)
|
||||||
|
├── zai.py Z.AI / GLM
|
||||||
|
│
|
||||||
|
├── # Medium — one or two quirks
|
||||||
|
├── anthropic.py Native Anthropic (x-api-key header, api_mode=anthropic_messages)
|
||||||
|
├── copilot.py GitHub Copilot (auth_type=copilot, reasoning per model)
|
||||||
|
├── copilot_acp.py Copilot ACP subprocess (api_mode=copilot_acp)
|
||||||
|
├── custom.py Custom/Ollama local (think=false, num_ctx)
|
||||||
|
├── gemini.py Google Gemini AI Studio + Cloud Code OAuth
|
||||||
|
├── kimi.py Kimi Coding (OMIT_TEMPERATURE, thinking, dual endpoint)
|
||||||
|
├── openai_codex.py OpenAI Codex OAuth (api_mode=codex_responses)
|
||||||
|
├── opencode.py OpenCode Zen + Go (per-model api_mode routing)
|
||||||
|
│
|
||||||
|
├── # Complex — subclasses with multiple overrides
|
||||||
|
├── nous.py Nous Portal (tags, attribution, reasoning omit-when-disabled)
|
||||||
|
├── openrouter.py OpenRouter (provider preferences, public model fetch)
|
||||||
|
├── qwen.py Qwen OAuth (message normalization, cache_control, vl_hires)
|
||||||
|
└── vercel.py Vercel AI Gateway (attribution headers, reasoning passthrough)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ProviderProfile fields
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class ProviderProfile:
|
||||||
|
# Identity
|
||||||
|
name: str # canonical ID — auto-registered as PROVIDER_REGISTRY key for new api-key providers
|
||||||
|
api_mode: str # "chat_completions" | "anthropic_messages" |
|
||||||
|
# "codex_responses" | "bedrock_converse" | "copilot_acp"
|
||||||
|
aliases: tuple # alternate names resolved by get_provider_profile()
|
||||||
|
|
||||||
|
# Auth & endpoints
|
||||||
|
env_vars: tuple # env var names holding the API key, in priority order
|
||||||
|
base_url: str # default inference endpoint
|
||||||
|
models_url: str # explicit models endpoint; falls back to {base_url}/models
|
||||||
|
# set when the models catalog lives at a different URL
|
||||||
|
# (e.g. OpenRouter: public /api/v1/models vs /api/v1 inference)
|
||||||
|
auth_type: str # "api_key" | "oauth_device_code" | "oauth_external" |
|
||||||
|
# "copilot" | "aws" | "external_process"
|
||||||
|
|
||||||
|
# Client-level quirks
|
||||||
|
default_headers: dict # extra HTTP headers sent on every request
|
||||||
|
|
||||||
|
# Request-level quirks
|
||||||
|
fixed_temperature: Any # None = use caller's default; OMIT_TEMPERATURE = don't send
|
||||||
|
default_max_tokens: int|None # inject max_tokens when caller omits it
|
||||||
|
default_aux_model: str # cheap model for auxiliary tasks (compression, vision, etc.)
|
||||||
|
# empty string = use main model (default)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Hooks (override in a subclass)
|
||||||
|
|
||||||
|
| Method | When to override |
|
||||||
|
|--------|-----------------|
|
||||||
|
| `prepare_messages(messages)` | Provider needs message pre-processing (Qwen: string → list-of-parts, cache_control) |
|
||||||
|
| `build_extra_body(*, session_id, **ctx)` | Provider-specific `extra_body` fields (Nous: tags, OpenRouter: provider preferences) |
|
||||||
|
| `build_api_kwargs_extras(*, reasoning_config, **ctx)` | Returns `(extra_body_additions, top_level_kwargs)` — use when some fields go to `extra_body` and some go top-level (Kimi: `reasoning_effort` top-level; OpenRouter: `reasoning` in extra_body) |
|
||||||
|
| `fetch_models(*, api_key, timeout)` | Custom model listing (Anthropic: x-api-key header; OpenRouter: public endpoint, no auth; Bedrock/copilot-acp: return None) |
|
||||||
|
|
||||||
|
All hooks have safe defaults — only override what differs from the base.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How to add a new provider
|
||||||
|
|
||||||
|
### 1. Simple (standard OpenAI-compatible endpoint)
|
||||||
|
|
||||||
|
```python
|
||||||
|
# providers/myprovider.py
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
myprovider = ProviderProfile(
|
||||||
|
name="myprovider", # must match id in hermes_cli/auth.py PROVIDER_REGISTRY
|
||||||
|
aliases=("my-provider", "myp"),
|
||||||
|
api_mode="chat_completions",
|
||||||
|
env_vars=("MYPROVIDER_API_KEY",),
|
||||||
|
base_url="https://api.myprovider.com/v1",
|
||||||
|
auth_type="api_key",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(myprovider)
|
||||||
|
```
|
||||||
|
|
||||||
|
The default `fetch_models()` will call `GET https://api.myprovider.com/v1/models`
|
||||||
|
with Bearer auth automatically. No override needed for standard `/v1/models`.
|
||||||
|
|
||||||
|
### 2. With quirks (subclass)
|
||||||
|
|
||||||
|
```python
|
||||||
|
# providers/myprovider.py
|
||||||
|
from typing import Any
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
|
class MyProviderProfile(ProviderProfile):
|
||||||
|
"""My provider — custom reasoning header."""
|
||||||
|
|
||||||
|
def build_api_kwargs_extras(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
reasoning_config: dict | None = None,
|
||||||
|
**ctx: Any,
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
|
extra_body: dict[str, Any] = {}
|
||||||
|
if reasoning_config:
|
||||||
|
extra_body["my_reasoning"] = reasoning_config.get("effort", "medium")
|
||||||
|
return extra_body, {}
|
||||||
|
|
||||||
|
def fetch_models(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str | None = None,
|
||||||
|
timeout: float = 8.0,
|
||||||
|
) -> list[str] | None:
|
||||||
|
# Override only if your endpoint differs from standard /v1/models
|
||||||
|
return super().fetch_models(api_key=api_key, timeout=timeout)
|
||||||
|
|
||||||
|
|
||||||
|
myprovider = MyProviderProfile(
|
||||||
|
name="myprovider",
|
||||||
|
aliases=("myp",),
|
||||||
|
env_vars=("MYPROVIDER_API_KEY",),
|
||||||
|
base_url="https://api.myprovider.com/v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(myprovider)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Wire it up
|
||||||
|
|
||||||
|
After creating the file, add `name` to the `_PROFILE_ACTIVE_PROVIDERS` set in
|
||||||
|
`run_agent.py` once you've verified parity against the legacy flag path. Start
|
||||||
|
with a simple provider (no message prep, no reasoning quirks) and work up.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## fetch_models contract
|
||||||
|
|
||||||
|
```python
|
||||||
|
def fetch_models(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str | None = None,
|
||||||
|
timeout: float = 8.0,
|
||||||
|
) -> list[str] | None:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
- Returns `list[str]`: model IDs from the provider's live endpoint.
|
||||||
|
- Returns `None`: provider doesn't support REST model listing (Bedrock, copilot-acp),
|
||||||
|
or the request failed. Callers **must** fall back to `_PROVIDER_MODELS` on `None`.
|
||||||
|
- Never raises — swallow exceptions and return `None`.
|
||||||
|
- Default implementation: `GET {base_url}/models` with Bearer auth. Works for any
|
||||||
|
standard OpenAI-compatible provider.
|
||||||
|
|
||||||
|
**Override when:**
|
||||||
|
- Auth header is not `Bearer` (Anthropic: `x-api-key`)
|
||||||
|
- Endpoint path differs from `/models` AND you can't just set `models_url` (OpenRouter: public endpoint, pass `api_key=None` explicitly)
|
||||||
|
- Response format differs (extra wrapping, non-standard `id` field)
|
||||||
|
- Provider has no REST endpoint (Bedrock, copilot-acp → return `None`)
|
||||||
|
- Filtering needed post-fetch (only tool-capable models, etc.)
|
||||||
|
|
||||||
|
Use `models_url` instead of overriding when the only difference is the URL:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# No subclass needed — just set models_url
|
||||||
|
myprovider = ProviderProfile(
|
||||||
|
name="myprovider",
|
||||||
|
base_url="https://api.myprovider.com/v1",
|
||||||
|
models_url="https://catalog.myprovider.com/models", # different host
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Debugging
|
||||||
|
|
||||||
|
### Check if a provider resolves
|
||||||
|
|
||||||
|
```python
|
||||||
|
from providers import get_provider_profile
|
||||||
|
|
||||||
|
p = get_provider_profile("myprovider")
|
||||||
|
print(p) # ProviderProfile(name='myprovider', ...)
|
||||||
|
print(p.base_url)
|
||||||
|
print(p.api_mode)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check all registered providers
|
||||||
|
|
||||||
|
```python
|
||||||
|
from providers import _REGISTRY
|
||||||
|
print(list(_REGISTRY.keys()))
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test live model fetch
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from providers import get_provider_profile
|
||||||
|
|
||||||
|
p = get_provider_profile("myprovider")
|
||||||
|
key = os.getenv("MYPROVIDER_API_KEY")
|
||||||
|
models = p.fetch_models(api_key=key, timeout=5.0)
|
||||||
|
print(models) # list of model IDs, or None on failure
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test alias resolution
|
||||||
|
|
||||||
|
```python
|
||||||
|
from providers import get_provider_profile
|
||||||
|
|
||||||
|
# All of these should return the same profile
|
||||||
|
assert get_provider_profile("openrouter").name == "openrouter"
|
||||||
|
assert get_provider_profile("or").name == "openrouter"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Run the provider test suite
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From the repo root
|
||||||
|
source venv/bin/activate
|
||||||
|
python -m pytest tests/providers/ -v
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check ruff + ty compliance
|
||||||
|
|
||||||
|
```bash
|
||||||
|
source venv/bin/activate
|
||||||
|
ruff format providers/*.py
|
||||||
|
ruff check providers/*.py --select UP,E,F,I,W
|
||||||
|
ty check providers/*.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Common mistakes
|
||||||
|
|
||||||
|
**Wrong `name`** — must be the same string that appears as the key in
|
||||||
|
`hermes_cli/auth.py` `PROVIDER_REGISTRY`. New api-key providers auto-register
|
||||||
|
into `PROVIDER_REGISTRY` from the profile, so the name IS the key. For providers
|
||||||
|
with a pre-existing `PROVIDER_REGISTRY` entry, use the exact `id` field value.
|
||||||
|
|
||||||
|
**Wrong `env_vars`** — separate API-key vars from base-URL override vars in the
|
||||||
|
tuple. Env vars that end with `_BASE_URL` or `_URL` are treated as URL overrides;
|
||||||
|
everything else is treated as an API key. Getting this wrong causes the doctor
|
||||||
|
health check to send a URL string as a Bearer token.
|
||||||
|
|
||||||
|
**Wrong `base_url`** — several providers have non-obvious paths:
|
||||||
|
`stepfun: /step_plan/v1`, `opencode-go: /zen/go/v1`. The profile's `base_url`
|
||||||
|
is also used as the `inference_base_url` when auto-registering into `PROVIDER_REGISTRY`
|
||||||
|
for new providers, so it must be correct for auth resolution to work.
|
||||||
|
|
||||||
|
**Skipping `api_mode`** — defaults to `chat_completions`. Providers that use
|
||||||
|
`anthropic_messages`, `codex_responses`, `bedrock_converse`, or `copilot_acp`
|
||||||
|
must set it explicitly.
|
||||||
|
|
||||||
|
**Forgetting `register_provider()`** — auto-discovery runs `pkgutil.iter_modules`
|
||||||
|
over the package and imports each module, but only if `register_provider()` is
|
||||||
|
called at module level. Without it the profile is never in `_REGISTRY`.
|
||||||
|
|
||||||
|
**`fetch_models` returning the wrong shape** — must return `list[str]` (plain
|
||||||
|
model IDs), not `list[tuple]` or `list[dict]`. Callers expect plain strings.
|
||||||
|
|
||||||
|
**Wrong `build_api_kwargs_extras` return shape** — must return a 2-tuple
|
||||||
|
`(extra_body_dict, top_level_dict)`. Returning a single dict causes a
|
||||||
|
`ValueError: not enough values to unpack` in the transport.
|
||||||
|
|
||||||
|
**`build_api_kwargs_extras` wrong tuple** — must return `(extra_body_dict,
|
||||||
|
top_level_dict)`. Returning a flat dict or swapping the order silently sends
|
||||||
|
fields to the wrong place.
|
||||||
76
providers/__init__.py
Normal file
76
providers/__init__.py
Normal file
|
|
@ -0,0 +1,76 @@
|
||||||
|
"""Provider module registry.
|
||||||
|
|
||||||
|
Auto-discovers ProviderProfile instances from providers/*.py modules.
|
||||||
|
Each module should define a module-level PROVIDER or PROVIDERS list.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("nvidia") # returns ProviderProfile or None
|
||||||
|
profile = get_provider_profile("kimi") # checks name + aliases
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from providers.base import OMIT_TEMPERATURE, ProviderProfile # noqa: F401
|
||||||
|
|
||||||
|
_REGISTRY: dict[str, ProviderProfile] = {}
|
||||||
|
_ALIASES: dict[str, str] = {}
|
||||||
|
_discovered = False
|
||||||
|
|
||||||
|
|
||||||
|
def register_provider(profile: ProviderProfile) -> None:
|
||||||
|
"""Register a provider profile by name and aliases."""
|
||||||
|
_REGISTRY[profile.name] = profile
|
||||||
|
for alias in profile.aliases:
|
||||||
|
_ALIASES[alias] = profile.name
|
||||||
|
|
||||||
|
|
||||||
|
def get_provider_profile(name: str) -> ProviderProfile | None:
|
||||||
|
"""Look up a provider profile by name or alias.
|
||||||
|
|
||||||
|
Returns None if the provider has no profile (falls back to generic).
|
||||||
|
"""
|
||||||
|
if not _discovered:
|
||||||
|
_discover_providers()
|
||||||
|
canonical = _ALIASES.get(name, name)
|
||||||
|
return _REGISTRY.get(canonical)
|
||||||
|
|
||||||
|
|
||||||
|
def list_providers() -> list[ProviderProfile]:
|
||||||
|
"""Return all registered provider profiles (one per canonical name)."""
|
||||||
|
if not _discovered:
|
||||||
|
_discover_providers()
|
||||||
|
# Deduplicate: _REGISTRY has canonical names; _ALIASES points to same objects
|
||||||
|
seen: set[int] = set()
|
||||||
|
result: list[ProviderProfile] = []
|
||||||
|
for profile in _REGISTRY.values():
|
||||||
|
pid = id(profile)
|
||||||
|
if pid not in seen:
|
||||||
|
seen.add(pid)
|
||||||
|
result.append(profile)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _discover_providers() -> None:
|
||||||
|
"""Import all provider modules to trigger registration."""
|
||||||
|
global _discovered
|
||||||
|
if _discovered:
|
||||||
|
return
|
||||||
|
_discovered = True
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
import pkgutil
|
||||||
|
|
||||||
|
import providers as _pkg
|
||||||
|
|
||||||
|
for _importer, modname, _ispkg in pkgutil.iter_modules(_pkg.__path__):
|
||||||
|
if modname.startswith("_") or modname == "base":
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
importlib.import_module(f"providers.{modname}")
|
||||||
|
except ImportError as e:
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.getLogger(__name__).warning(
|
||||||
|
"Failed to import provider module %s: %s", modname, e
|
||||||
|
)
|
||||||
13
providers/alibaba.py
Normal file
13
providers/alibaba.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
"""Alibaba Cloud DashScope provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
alibaba = ProviderProfile(
|
||||||
|
name="alibaba",
|
||||||
|
aliases=("dashscope", "alibaba-cloud", "qwen-dashscope"),
|
||||||
|
env_vars=("DASHSCOPE_API_KEY",),
|
||||||
|
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(alibaba)
|
||||||
21
providers/alibaba_coding_plan.py
Normal file
21
providers/alibaba_coding_plan.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
"""Alibaba Cloud Coding Plan provider profile.
|
||||||
|
|
||||||
|
Separate from the standard `alibaba` profile because it hits a different
|
||||||
|
endpoint (coding-intl.dashscope.aliyuncs.com) with a dedicated API key tier.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
alibaba_coding_plan = ProviderProfile(
|
||||||
|
name="alibaba-coding-plan",
|
||||||
|
aliases=("alibaba_coding", "alibaba-coding", "dashscope-coding"),
|
||||||
|
display_name="Alibaba Cloud (Coding Plan)",
|
||||||
|
description="Alibaba Cloud Coding Plan — dedicated coding tier",
|
||||||
|
signup_url="https://help.aliyun.com/zh/model-studio/",
|
||||||
|
env_vars=("ALIBABA_CODING_PLAN_API_KEY", "DASHSCOPE_API_KEY", "ALIBABA_CODING_PLAN_BASE_URL"),
|
||||||
|
base_url="https://coding-intl.dashscope.aliyuncs.com/v1",
|
||||||
|
auth_type="api_key",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(alibaba_coding_plan)
|
||||||
52
providers/anthropic.py
Normal file
52
providers/anthropic.py
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
"""Native Anthropic provider profile."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class AnthropicProfile(ProviderProfile):
|
||||||
|
"""Native Anthropic — uses x-api-key header, not Bearer."""
|
||||||
|
|
||||||
|
def fetch_models(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str | None = None,
|
||||||
|
timeout: float = 8.0,
|
||||||
|
) -> list[str] | None:
|
||||||
|
"""Anthropic uses x-api-key header and anthropic-version."""
|
||||||
|
if not api_key:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request("https://api.anthropic.com/v1/models")
|
||||||
|
req.add_header("x-api-key", api_key)
|
||||||
|
req.add_header("anthropic-version", "2023-06-01")
|
||||||
|
req.add_header("Accept", "application/json")
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
data = json.loads(resp.read().decode())
|
||||||
|
return [
|
||||||
|
m["id"]
|
||||||
|
for m in data.get("data", [])
|
||||||
|
if isinstance(m, dict) and "id" in m
|
||||||
|
]
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("fetch_models(anthropic): %s", exc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
anthropic = AnthropicProfile(
|
||||||
|
name="anthropic",
|
||||||
|
aliases=("claude", "claude-oauth", "claude-code"),
|
||||||
|
api_mode="anthropic_messages",
|
||||||
|
env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
|
||||||
|
base_url="https://api.anthropic.com",
|
||||||
|
auth_type="api_key",
|
||||||
|
default_aux_model="claude-haiku-4-5-20251001",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(anthropic)
|
||||||
13
providers/arcee.py
Normal file
13
providers/arcee.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
"""Arcee AI provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
arcee = ProviderProfile(
|
||||||
|
name="arcee",
|
||||||
|
aliases=("arcee-ai", "arceeai"),
|
||||||
|
env_vars=("ARCEEAI_API_KEY",),
|
||||||
|
base_url="https://api.arcee.ai/api/v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(arcee)
|
||||||
21
providers/azure_foundry.py
Normal file
21
providers/azure_foundry.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
"""Azure AI Foundry provider profile.
|
||||||
|
|
||||||
|
Azure Foundry exposes an OpenAI-compatible endpoint; users supply their own
|
||||||
|
base URL at setup since endpoints are per-resource.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
azure_foundry = ProviderProfile(
|
||||||
|
name="azure-foundry",
|
||||||
|
aliases=("azure", "azure-ai-foundry", "azure-ai"),
|
||||||
|
display_name="Azure Foundry",
|
||||||
|
description="Azure AI Foundry — OpenAI-compatible endpoint (user-supplied base URL)",
|
||||||
|
signup_url="https://ai.azure.com/",
|
||||||
|
env_vars=("AZURE_FOUNDRY_API_KEY", "AZURE_FOUNDRY_BASE_URL"),
|
||||||
|
base_url="", # per-resource; user provides at setup
|
||||||
|
auth_type="api_key",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(azure_foundry)
|
||||||
165
providers/base.py
Normal file
165
providers/base.py
Normal file
|
|
@ -0,0 +1,165 @@
|
||||||
|
"""Provider profile base class.
|
||||||
|
|
||||||
|
A ProviderProfile declares everything about an inference provider in one place:
|
||||||
|
auth, endpoints, client quirks, request-time quirks. The transport reads this
|
||||||
|
instead of receiving 20+ boolean flags.
|
||||||
|
|
||||||
|
Provider profiles are DECLARATIVE — they describe the provider's behavior.
|
||||||
|
They do NOT own client construction, credential rotation, or streaming.
|
||||||
|
Those stay on AIAgent.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Sentinel for "omit temperature entirely" (Kimi: server manages it)
|
||||||
|
OMIT_TEMPERATURE = object()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ProviderProfile:
|
||||||
|
"""Base provider profile — subclass or instantiate with overrides."""
|
||||||
|
|
||||||
|
# ── Identity ─────────────────────────────────────────────
|
||||||
|
name: str
|
||||||
|
api_mode: str = "chat_completions"
|
||||||
|
aliases: tuple = ()
|
||||||
|
|
||||||
|
# ── Human-readable metadata ───────────────────────────────
|
||||||
|
display_name: str = "" # e.g. "GMI Cloud" — shown in picker/labels
|
||||||
|
description: str = "" # e.g. "GMI Cloud (multi-model direct API)" — picker subtitle
|
||||||
|
signup_url: str = "" # e.g. "https://www.gmicloud.ai/" — shown during setup
|
||||||
|
|
||||||
|
# ── Auth & endpoints ─────────────────────────────────────
|
||||||
|
env_vars: tuple = ()
|
||||||
|
base_url: str = ""
|
||||||
|
models_url: str = "" # explicit models endpoint; falls back to {base_url}/models
|
||||||
|
auth_type: str = "api_key" # api_key|oauth_device_code|oauth_external|copilot|aws_sdk
|
||||||
|
|
||||||
|
# ── Model catalog ─────────────────────────────────────────
|
||||||
|
# fallback_models: curated list shown in /model picker when live fetch fails.
|
||||||
|
# Only agentic models that support tool calling should appear here.
|
||||||
|
fallback_models: tuple = ()
|
||||||
|
|
||||||
|
# hostname: base hostname for URL→provider reverse-mapping in model_metadata.py
|
||||||
|
# e.g. "api.gmi-serving.com". Derived from base_url when empty.
|
||||||
|
hostname: str = ""
|
||||||
|
|
||||||
|
# ── Client-level quirks (set once at client construction) ─
|
||||||
|
default_headers: dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
# ── Request-level quirks ─────────────────────────────────
|
||||||
|
# Temperature: None = use caller's default, OMIT_TEMPERATURE = don't send
|
||||||
|
fixed_temperature: Any = None
|
||||||
|
default_max_tokens: int | None = None
|
||||||
|
default_aux_model: str = (
|
||||||
|
"" # cheap model for auxiliary tasks (compression, vision, etc.)
|
||||||
|
)
|
||||||
|
# empty = use main model
|
||||||
|
|
||||||
|
# ── Hooks (override in subclass for complex providers) ───
|
||||||
|
|
||||||
|
def get_hostname(self) -> str:
|
||||||
|
"""Return the provider's base hostname for URL-based detection.
|
||||||
|
|
||||||
|
Uses self.hostname if set explicitly, otherwise derives it from base_url.
|
||||||
|
e.g. 'https://api.gmi-serving.com/v1' → 'api.gmi-serving.com'
|
||||||
|
"""
|
||||||
|
if self.hostname:
|
||||||
|
return self.hostname
|
||||||
|
if self.base_url:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
return urlparse(self.base_url).hostname or ""
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
"""Provider-specific message preprocessing.
|
||||||
|
|
||||||
|
Called AFTER codex field sanitization, BEFORE developer role swap.
|
||||||
|
Default: pass-through.
|
||||||
|
"""
|
||||||
|
return messages
|
||||||
|
|
||||||
|
def build_extra_body(
|
||||||
|
self, *, session_id: str | None = None, **context: Any
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Provider-specific extra_body fields.
|
||||||
|
|
||||||
|
Merged into the API kwargs extra_body. Default: empty dict.
|
||||||
|
"""
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def build_api_kwargs_extras(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
reasoning_config: dict | None = None,
|
||||||
|
**context: Any,
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
|
"""Provider-specific kwargs split between extra_body and top-level api_kwargs.
|
||||||
|
|
||||||
|
Returns (extra_body_additions, top_level_kwargs).
|
||||||
|
The transport merges extra_body_additions into extra_body, and
|
||||||
|
top_level_kwargs directly into api_kwargs.
|
||||||
|
|
||||||
|
This split exists because some providers put reasoning config in
|
||||||
|
extra_body (OpenRouter: extra_body.reasoning) while others put it
|
||||||
|
as top-level api_kwargs (Kimi: api_kwargs.reasoning_effort).
|
||||||
|
|
||||||
|
Default: ({}, {}).
|
||||||
|
"""
|
||||||
|
return {}, {}
|
||||||
|
|
||||||
|
def fetch_models(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str | None = None,
|
||||||
|
timeout: float = 8.0,
|
||||||
|
) -> list[str] | None:
|
||||||
|
"""Fetch the live model list from the provider's models endpoint.
|
||||||
|
|
||||||
|
Returns a list of model ID strings, or None if the fetch failed or
|
||||||
|
the provider does not support live model listing.
|
||||||
|
|
||||||
|
Resolution order for the endpoint URL:
|
||||||
|
1. self.models_url (explicit override — use when the models
|
||||||
|
endpoint differs from the inference base URL, e.g. OpenRouter
|
||||||
|
exposes a public catalog at /api/v1/models while inference is
|
||||||
|
at /api/v1)
|
||||||
|
2. self.base_url + "/models" (standard OpenAI-compat fallback)
|
||||||
|
|
||||||
|
The default implementation sends Bearer auth when api_key is given
|
||||||
|
and forwards self.default_headers. Override to customise auth, path,
|
||||||
|
response shape, or to return None for providers with no REST catalog.
|
||||||
|
|
||||||
|
Callers must always fall back to the static _PROVIDER_MODELS list
|
||||||
|
when this returns None.
|
||||||
|
"""
|
||||||
|
url = (self.models_url or "").strip()
|
||||||
|
if not url:
|
||||||
|
if not self.base_url:
|
||||||
|
return None
|
||||||
|
url = self.base_url.rstrip("/") + "/models"
|
||||||
|
|
||||||
|
import json
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
req = urllib.request.Request(url)
|
||||||
|
if api_key:
|
||||||
|
req.add_header("Authorization", f"Bearer {api_key}")
|
||||||
|
req.add_header("Accept", "application/json")
|
||||||
|
for k, v in self.default_headers.items():
|
||||||
|
req.add_header(k, v)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
data = json.loads(resp.read().decode())
|
||||||
|
items = data if isinstance(data, list) else data.get("data", [])
|
||||||
|
return [m["id"] for m in items if isinstance(m, dict) and "id" in m]
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("fetch_models(%s): %s", self.name, exc)
|
||||||
|
return None
|
||||||
29
providers/bedrock.py
Normal file
29
providers/bedrock.py
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
"""AWS Bedrock provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
|
class BedrockProfile(ProviderProfile):
|
||||||
|
"""AWS Bedrock — no REST /v1/models endpoint; uses AWS SDK."""
|
||||||
|
|
||||||
|
def fetch_models(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str | None = None,
|
||||||
|
timeout: float = 8.0,
|
||||||
|
) -> list[str] | None:
|
||||||
|
"""Bedrock model listing requires AWS SDK, not a REST call."""
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
bedrock = BedrockProfile(
|
||||||
|
name="bedrock",
|
||||||
|
aliases=("aws", "aws-bedrock", "amazon-bedrock", "amazon"),
|
||||||
|
api_mode="bedrock_converse",
|
||||||
|
env_vars=(), # AWS SDK credentials — not env vars
|
||||||
|
base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
|
||||||
|
auth_type="aws_sdk",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(bedrock)
|
||||||
58
providers/copilot.py
Normal file
58
providers/copilot.py
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
"""Copilot / GitHub Models provider profile.
|
||||||
|
|
||||||
|
Copilot uses per-model api_mode routing:
|
||||||
|
- GPT-5+ / Codex models → codex_responses
|
||||||
|
- Claude models → anthropic_messages
|
||||||
|
- Everything else → chat_completions (this profile covers that subset)
|
||||||
|
|
||||||
|
Key quirks for the chat_completions subset:
|
||||||
|
- Editor attribution headers (via copilot_default_headers())
|
||||||
|
- GitHub Models reasoning extra_body (model-catalog gated)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
|
class CopilotProfile(ProviderProfile):
|
||||||
|
"""GitHub Copilot / GitHub Models — editor headers + reasoning."""
|
||||||
|
|
||||||
|
def build_api_kwargs_extras(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
model: str | None = None,
|
||||||
|
reasoning_config: dict | None = None,
|
||||||
|
supports_reasoning: bool = False,
|
||||||
|
**ctx,
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
|
extra_body: dict[str, Any] = {}
|
||||||
|
if supports_reasoning and model:
|
||||||
|
try:
|
||||||
|
from hermes_cli.models import github_model_reasoning_efforts
|
||||||
|
|
||||||
|
supported_efforts = github_model_reasoning_efforts(model)
|
||||||
|
if supported_efforts and reasoning_config:
|
||||||
|
effort = reasoning_config.get("effort", "medium")
|
||||||
|
# Normalize non-standard effort levels to the nearest supported
|
||||||
|
if effort == "xhigh":
|
||||||
|
effort = "high"
|
||||||
|
if effort in supported_efforts:
|
||||||
|
extra_body["reasoning"] = {"effort": effort}
|
||||||
|
elif supported_efforts:
|
||||||
|
extra_body["reasoning"] = {"effort": "medium"}
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return extra_body, {}
|
||||||
|
|
||||||
|
|
||||||
|
copilot = CopilotProfile(
|
||||||
|
name="copilot",
|
||||||
|
aliases=("github-copilot", "github-models", "github-model", "github"),
|
||||||
|
env_vars=("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
|
||||||
|
base_url="https://api.githubcopilot.com",
|
||||||
|
auth_type="copilot",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(copilot)
|
||||||
34
providers/copilot_acp.py
Normal file
34
providers/copilot_acp.py
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
"""GitHub Copilot ACP provider profile.
|
||||||
|
|
||||||
|
copilot-acp uses an external ACP subprocess — NOT the standard
|
||||||
|
transport. api_mode="copilot_acp" is handled separately in run_agent.py.
|
||||||
|
The profile captures auth + endpoint metadata for registry migration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
|
class CopilotACPProfile(ProviderProfile):
|
||||||
|
"""GitHub Copilot ACP — external process, no REST models endpoint."""
|
||||||
|
|
||||||
|
def fetch_models(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str | None = None,
|
||||||
|
timeout: float = 8.0,
|
||||||
|
) -> list[str] | None:
|
||||||
|
"""Model listing is handled by the ACP subprocess."""
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
copilot_acp = CopilotACPProfile(
|
||||||
|
name="copilot-acp",
|
||||||
|
aliases=("github-copilot-acp", "copilot-acp-agent"),
|
||||||
|
api_mode="chat_completions", # ACP subprocess uses chat_completions routing
|
||||||
|
env_vars=(), # Managed by ACP subprocess
|
||||||
|
base_url="acp://copilot", # ACP internal scheme
|
||||||
|
auth_type="external_process",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(copilot_acp)
|
||||||
68
providers/custom.py
Normal file
68
providers/custom.py
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
"""Custom / Ollama (local) provider profile.
|
||||||
|
|
||||||
|
Covers any endpoint registered as provider="custom", including local
|
||||||
|
Ollama instances. Key quirks:
|
||||||
|
- ollama_num_ctx → extra_body.options.num_ctx (local context window)
|
||||||
|
- reasoning_config disabled → extra_body.think = False
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
|
class CustomProfile(ProviderProfile):
|
||||||
|
"""Custom/Ollama local provider — think=false and num_ctx support."""
|
||||||
|
|
||||||
|
def build_api_kwargs_extras(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
reasoning_config: dict | None = None,
|
||||||
|
ollama_num_ctx: int | None = None,
|
||||||
|
**ctx: Any,
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
|
extra_body: dict[str, Any] = {}
|
||||||
|
|
||||||
|
# Ollama context window
|
||||||
|
if ollama_num_ctx:
|
||||||
|
options = extra_body.get("options", {})
|
||||||
|
options["num_ctx"] = ollama_num_ctx
|
||||||
|
extra_body["options"] = options
|
||||||
|
|
||||||
|
# Disable thinking when reasoning is turned off
|
||||||
|
if reasoning_config and isinstance(reasoning_config, dict):
|
||||||
|
_effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||||
|
_enabled = reasoning_config.get("enabled", True)
|
||||||
|
if _effort == "none" or _enabled is False:
|
||||||
|
extra_body["think"] = False
|
||||||
|
|
||||||
|
return extra_body, {}
|
||||||
|
|
||||||
|
def fetch_models(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str | None = None,
|
||||||
|
timeout: float = 8.0,
|
||||||
|
) -> list[str] | None:
|
||||||
|
"""Custom/Ollama: base_url is user-configured; fetch if set."""
|
||||||
|
if not self.base_url:
|
||||||
|
return None
|
||||||
|
return super().fetch_models(api_key=api_key, timeout=timeout)
|
||||||
|
|
||||||
|
|
||||||
|
custom = CustomProfile(
|
||||||
|
name="custom",
|
||||||
|
aliases=(
|
||||||
|
"ollama",
|
||||||
|
"local",
|
||||||
|
"vllm",
|
||||||
|
"llamacpp",
|
||||||
|
"llama.cpp",
|
||||||
|
"llama-cpp",
|
||||||
|
),
|
||||||
|
env_vars=(), # No fixed key — custom endpoint
|
||||||
|
base_url="", # User-configured
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(custom)
|
||||||
20
providers/deepseek.py
Normal file
20
providers/deepseek.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
"""DeepSeek provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
deepseek = ProviderProfile(
|
||||||
|
name="deepseek",
|
||||||
|
aliases=("deepseek-chat",),
|
||||||
|
env_vars=("DEEPSEEK_API_KEY",),
|
||||||
|
display_name="DeepSeek",
|
||||||
|
description="DeepSeek — native DeepSeek API",
|
||||||
|
signup_url="https://platform.deepseek.com/",
|
||||||
|
fallback_models=(
|
||||||
|
"deepseek-chat",
|
||||||
|
"deepseek-reasoner",
|
||||||
|
),
|
||||||
|
base_url="https://api.deepseek.com/v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(deepseek)
|
||||||
72
providers/gemini.py
Normal file
72
providers/gemini.py
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
"""Google Gemini provider profiles.
|
||||||
|
|
||||||
|
gemini: Google AI Studio (API key) — uses GeminiNativeClient
|
||||||
|
google-gemini-cli: Google Cloud Code Assist (OAuth) — uses GeminiCloudCodeClient
|
||||||
|
|
||||||
|
Both report api_mode="chat_completions" but use custom native clients
|
||||||
|
that bypass the standard OpenAI transport. The profile captures auth
|
||||||
|
and endpoint metadata for auth.py / runtime_provider.py migration, and
|
||||||
|
carries the thinking_config translation hook so the transport's profile
|
||||||
|
path produces the same extra_body shape the legacy flag path did.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
|
class GeminiProfile(ProviderProfile):
|
||||||
|
"""Gemini — translate reasoning_config to thinking_config in extra_body."""
|
||||||
|
|
||||||
|
def build_extra_body(
|
||||||
|
self, *, session_id: str | None = None, **context: Any
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Emit extra_body.thinking_config (native) or extra_body.extra_body.google.thinking_config
|
||||||
|
(OpenAI-compat /openai subpath), mirroring the legacy path's behavior.
|
||||||
|
"""
|
||||||
|
from agent.transports.chat_completions import (
|
||||||
|
_build_gemini_thinking_config,
|
||||||
|
_is_gemini_openai_compat_base_url,
|
||||||
|
_snake_case_gemini_thinking_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
model = context.get("model") or ""
|
||||||
|
reasoning_config = context.get("reasoning_config")
|
||||||
|
base_url = context.get("base_url") or self.base_url
|
||||||
|
|
||||||
|
raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||||
|
if not raw_thinking_config:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
body: dict[str, Any] = {}
|
||||||
|
if self.name == "gemini" and _is_gemini_openai_compat_base_url(base_url):
|
||||||
|
thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
|
||||||
|
if thinking_config:
|
||||||
|
body["extra_body"] = {"google": {"thinking_config": thinking_config}}
|
||||||
|
else:
|
||||||
|
body["thinking_config"] = raw_thinking_config
|
||||||
|
return body
|
||||||
|
|
||||||
|
|
||||||
|
gemini = GeminiProfile(
|
||||||
|
name="gemini",
|
||||||
|
aliases=("google", "google-gemini", "google-ai-studio"),
|
||||||
|
api_mode="chat_completions",
|
||||||
|
env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"),
|
||||||
|
base_url="https://generativelanguage.googleapis.com/v1beta",
|
||||||
|
auth_type="api_key",
|
||||||
|
default_aux_model="gemini-3-flash-preview",
|
||||||
|
)
|
||||||
|
|
||||||
|
google_gemini_cli = GeminiProfile(
|
||||||
|
name="google-gemini-cli",
|
||||||
|
aliases=("gemini-cli", "gemini-oauth"),
|
||||||
|
api_mode="chat_completions",
|
||||||
|
env_vars=(), # OAuth — no API key
|
||||||
|
base_url="cloudcode-pa://google", # Cloud Code Assist internal scheme
|
||||||
|
auth_type="oauth_external",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(gemini)
|
||||||
|
register_provider(google_gemini_cli)
|
||||||
26
providers/gmi.py
Normal file
26
providers/gmi.py
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
"""GMI Cloud provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
gmi = ProviderProfile(
|
||||||
|
name="gmi",
|
||||||
|
aliases=("gmi-cloud", "gmicloud"),
|
||||||
|
display_name="GMI Cloud",
|
||||||
|
description="GMI Cloud — multi-model direct API (slash-form model IDs)",
|
||||||
|
signup_url="https://www.gmicloud.ai/",
|
||||||
|
env_vars=("GMI_API_KEY", "GMI_BASE_URL"),
|
||||||
|
base_url="https://api.gmi-serving.com/v1",
|
||||||
|
auth_type="api_key",
|
||||||
|
default_aux_model="google/gemini-3.1-flash-lite-preview",
|
||||||
|
fallback_models=(
|
||||||
|
"zai-org/GLM-5.1-FP8",
|
||||||
|
"deepseek-ai/DeepSeek-V3.2",
|
||||||
|
"moonshotai/Kimi-K2.5",
|
||||||
|
"google/gemini-3.1-flash-lite-preview",
|
||||||
|
"anthropic/claude-sonnet-4.6",
|
||||||
|
"openai/gpt-5.4",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(gmi)
|
||||||
20
providers/huggingface.py
Normal file
20
providers/huggingface.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
"""Hugging Face provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
huggingface = ProviderProfile(
|
||||||
|
name="huggingface",
|
||||||
|
aliases=("hf", "hugging-face", "huggingface-hub"),
|
||||||
|
env_vars=("HF_TOKEN",),
|
||||||
|
display_name="HuggingFace",
|
||||||
|
description="HuggingFace Inference API",
|
||||||
|
signup_url="https://huggingface.co/settings/tokens",
|
||||||
|
fallback_models=(
|
||||||
|
"Qwen/Qwen3.5-72B-Instruct",
|
||||||
|
"deepseek-ai/DeepSeek-V3.2",
|
||||||
|
),
|
||||||
|
base_url="https://router.huggingface.co/v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(huggingface)
|
||||||
14
providers/kilocode.py
Normal file
14
providers/kilocode.py
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
"""Kilo Code provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
kilocode = ProviderProfile(
|
||||||
|
name="kilocode",
|
||||||
|
aliases=("kilo-code", "kilo", "kilo-gateway"),
|
||||||
|
env_vars=("KILOCODE_API_KEY",),
|
||||||
|
base_url="https://api.kilo.ai/api/gateway",
|
||||||
|
default_aux_model="google/gemini-3-flash-preview",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(kilocode)
|
||||||
71
providers/kimi.py
Normal file
71
providers/kimi.py
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
"""Kimi / Moonshot provider profiles.
|
||||||
|
|
||||||
|
Kimi has dual endpoints:
|
||||||
|
- sk-kimi-* keys → api.kimi.com/coding (Anthropic Messages API)
|
||||||
|
- legacy keys → api.moonshot.ai/v1 (OpenAI chat completions)
|
||||||
|
|
||||||
|
This module covers the chat_completions path (/v1 endpoint).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import OMIT_TEMPERATURE, ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
|
class KimiProfile(ProviderProfile):
|
||||||
|
"""Kimi/Moonshot — temperature omitted, thinking + reasoning_effort."""
|
||||||
|
|
||||||
|
def build_api_kwargs_extras(
|
||||||
|
self, *, reasoning_config: dict | None = None, **context
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
|
"""Kimi uses extra_body.thinking + top-level reasoning_effort."""
|
||||||
|
extra_body = {}
|
||||||
|
top_level = {}
|
||||||
|
|
||||||
|
if not reasoning_config or not isinstance(reasoning_config, dict):
|
||||||
|
# No config → thinking enabled, default effort
|
||||||
|
extra_body["thinking"] = {"type": "enabled"}
|
||||||
|
top_level["reasoning_effort"] = "medium"
|
||||||
|
return extra_body, top_level
|
||||||
|
|
||||||
|
enabled = reasoning_config.get("enabled", True)
|
||||||
|
if enabled is False:
|
||||||
|
extra_body["thinking"] = {"type": "disabled"}
|
||||||
|
return extra_body, top_level
|
||||||
|
|
||||||
|
# Enabled
|
||||||
|
extra_body["thinking"] = {"type": "enabled"}
|
||||||
|
effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||||
|
if effort in ("low", "medium", "high"):
|
||||||
|
top_level["reasoning_effort"] = effort
|
||||||
|
else:
|
||||||
|
top_level["reasoning_effort"] = "medium"
|
||||||
|
|
||||||
|
return extra_body, top_level
|
||||||
|
|
||||||
|
|
||||||
|
kimi = KimiProfile(
|
||||||
|
name="kimi-coding",
|
||||||
|
aliases=("kimi", "moonshot", "kimi-for-coding"),
|
||||||
|
env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
|
||||||
|
base_url="https://api.moonshot.ai/v1",
|
||||||
|
fixed_temperature=OMIT_TEMPERATURE,
|
||||||
|
default_max_tokens=32000,
|
||||||
|
default_headers={"User-Agent": "hermes-agent/1.0"},
|
||||||
|
default_aux_model="kimi-k2-turbo-preview",
|
||||||
|
)
|
||||||
|
|
||||||
|
kimi_cn = KimiProfile(
|
||||||
|
name="kimi-coding-cn",
|
||||||
|
aliases=("kimi-cn", "moonshot-cn"),
|
||||||
|
env_vars=("KIMI_CN_API_KEY",),
|
||||||
|
base_url="https://api.moonshot.cn/v1",
|
||||||
|
fixed_temperature=OMIT_TEMPERATURE,
|
||||||
|
default_max_tokens=32000,
|
||||||
|
default_headers={"User-Agent": "hermes-agent/1.0"},
|
||||||
|
default_aux_model="kimi-k2-turbo-preview",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(kimi)
|
||||||
|
register_provider(kimi_cn)
|
||||||
45
providers/minimax.py
Normal file
45
providers/minimax.py
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
"""MiniMax provider profiles (international + China).
|
||||||
|
|
||||||
|
Both use anthropic_messages api_mode — their inference_base_url
|
||||||
|
ends with /anthropic which triggers auto-detection to anthropic_messages.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
minimax = ProviderProfile(
|
||||||
|
name="minimax",
|
||||||
|
aliases=("mini-max",),
|
||||||
|
api_mode="anthropic_messages",
|
||||||
|
env_vars=("MINIMAX_API_KEY",),
|
||||||
|
base_url="https://api.minimax.io/anthropic",
|
||||||
|
auth_type="api_key",
|
||||||
|
default_aux_model="MiniMax-M2.7",
|
||||||
|
)
|
||||||
|
|
||||||
|
minimax_cn = ProviderProfile(
|
||||||
|
name="minimax-cn",
|
||||||
|
aliases=("minimax-china", "minimax_cn"),
|
||||||
|
api_mode="anthropic_messages",
|
||||||
|
env_vars=("MINIMAX_CN_API_KEY",),
|
||||||
|
base_url="https://api.minimaxi.com/anthropic",
|
||||||
|
auth_type="api_key",
|
||||||
|
default_aux_model="MiniMax-M2.7",
|
||||||
|
)
|
||||||
|
|
||||||
|
minimax_oauth = ProviderProfile(
|
||||||
|
name="minimax-oauth",
|
||||||
|
aliases=("minimax_oauth", "minimax-oauth-io"),
|
||||||
|
api_mode="anthropic_messages",
|
||||||
|
display_name="MiniMax (OAuth)",
|
||||||
|
description="MiniMax via OAuth browser flow — no API key required",
|
||||||
|
signup_url="https://api.minimax.io/",
|
||||||
|
env_vars=(), # OAuth — tokens in auth.json, not env
|
||||||
|
base_url="https://api.minimax.io/anthropic",
|
||||||
|
auth_type="oauth_external",
|
||||||
|
default_aux_model="MiniMax-M2.7-highspeed",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(minimax)
|
||||||
|
register_provider(minimax_cn)
|
||||||
|
register_provider(minimax_oauth)
|
||||||
53
providers/nous.py
Normal file
53
providers/nous.py
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
"""Nous Portal provider profile."""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
|
class NousProfile(ProviderProfile):
|
||||||
|
"""Nous Portal — product tags, reasoning with Nous-specific omission."""
|
||||||
|
|
||||||
|
def build_extra_body(
|
||||||
|
self, *, session_id: str | None = None, **context
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
return {"tags": ["product=hermes-agent"]}
|
||||||
|
|
||||||
|
def build_api_kwargs_extras(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
reasoning_config: dict | None = None,
|
||||||
|
supports_reasoning: bool = False,
|
||||||
|
**context,
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
|
"""Nous: passes full reasoning_config, but OMITS when disabled."""
|
||||||
|
extra_body = {}
|
||||||
|
if supports_reasoning:
|
||||||
|
if reasoning_config is not None:
|
||||||
|
rc = dict(reasoning_config)
|
||||||
|
if rc.get("enabled") is False:
|
||||||
|
pass # Nous omits reasoning when disabled
|
||||||
|
else:
|
||||||
|
extra_body["reasoning"] = rc
|
||||||
|
else:
|
||||||
|
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||||
|
return extra_body, {}
|
||||||
|
|
||||||
|
|
||||||
|
nous = NousProfile(
|
||||||
|
name="nous",
|
||||||
|
aliases=("nous-portal", "nousresearch"),
|
||||||
|
env_vars=("NOUS_API_KEY",),
|
||||||
|
display_name="Nous Research",
|
||||||
|
description="Nous Research — Hermes model family",
|
||||||
|
signup_url="https://nousresearch.com/",
|
||||||
|
fallback_models=(
|
||||||
|
"hermes-3-405b",
|
||||||
|
"hermes-3-70b",
|
||||||
|
),
|
||||||
|
base_url="https://inference.nousresearch.com/v1",
|
||||||
|
auth_type="oauth_device_code",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(nous)
|
||||||
21
providers/nvidia.py
Normal file
21
providers/nvidia.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
"""NVIDIA NIM provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
nvidia = ProviderProfile(
|
||||||
|
name="nvidia",
|
||||||
|
aliases=("nvidia-nim",),
|
||||||
|
env_vars=("NVIDIA_API_KEY",),
|
||||||
|
display_name="NVIDIA NIM",
|
||||||
|
description="NVIDIA NIM — accelerated inference",
|
||||||
|
signup_url="https://build.nvidia.com/",
|
||||||
|
fallback_models=(
|
||||||
|
"nvidia/llama-3.1-nemotron-70b-instruct",
|
||||||
|
"nvidia/llama-3.3-70b-instruct",
|
||||||
|
),
|
||||||
|
base_url="https://integrate.api.nvidia.com/v1",
|
||||||
|
default_max_tokens=16384,
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(nvidia)
|
||||||
14
providers/ollama_cloud.py
Normal file
14
providers/ollama_cloud.py
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
"""Ollama Cloud provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
ollama_cloud = ProviderProfile(
|
||||||
|
name="ollama-cloud",
|
||||||
|
aliases=("ollama_cloud",),
|
||||||
|
default_aux_model="nemotron-3-nano:30b",
|
||||||
|
env_vars=("OLLAMA_API_KEY",),
|
||||||
|
base_url="https://ollama.com/v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(ollama_cloud)
|
||||||
15
providers/openai_codex.py
Normal file
15
providers/openai_codex.py
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
"""OpenAI Codex (Responses API) provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
openai_codex = ProviderProfile(
|
||||||
|
name="openai-codex",
|
||||||
|
aliases=("codex", "openai_codex"),
|
||||||
|
api_mode="codex_responses",
|
||||||
|
env_vars=(), # OAuth external — no API key
|
||||||
|
base_url="https://chatgpt.com/backend-api/codex",
|
||||||
|
auth_type="oauth_external",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(openai_codex)
|
||||||
30
providers/opencode.py
Normal file
30
providers/opencode.py
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
"""OpenCode provider profiles (Zen + Go).
|
||||||
|
|
||||||
|
Both use per-model api_mode routing:
|
||||||
|
- OpenCode Zen: Claude → anthropic_messages, GPT-5/Codex → codex_responses,
|
||||||
|
everything else → chat_completions (this profile)
|
||||||
|
- OpenCode Go: MiniMax → anthropic_messages, GLM/Kimi → chat_completions
|
||||||
|
(this profile)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
opencode_zen = ProviderProfile(
|
||||||
|
name="opencode-zen",
|
||||||
|
aliases=("opencode", "opencode_zen", "zen"),
|
||||||
|
env_vars=("OPENCODE_ZEN_API_KEY",),
|
||||||
|
base_url="https://opencode.ai/zen/v1",
|
||||||
|
default_aux_model="gemini-3-flash",
|
||||||
|
)
|
||||||
|
|
||||||
|
opencode_go = ProviderProfile(
|
||||||
|
name="opencode-go",
|
||||||
|
aliases=("opencode_go", "go", "opencode-go-sub"),
|
||||||
|
env_vars=("OPENCODE_GO_API_KEY",),
|
||||||
|
base_url="https://opencode.ai/zen/go/v1",
|
||||||
|
default_aux_model="glm-5",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(opencode_zen)
|
||||||
|
register_provider(opencode_go)
|
||||||
86
providers/openrouter.py
Normal file
86
providers/openrouter.py
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
"""OpenRouter provider profile."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_CACHE: list[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class OpenRouterProfile(ProviderProfile):
|
||||||
|
"""OpenRouter aggregator — provider preferences, reasoning config passthrough."""
|
||||||
|
|
||||||
|
def fetch_models(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str | None = None,
|
||||||
|
timeout: float = 8.0,
|
||||||
|
) -> list[str] | None:
|
||||||
|
"""Fetch from public OpenRouter catalog — no auth required.
|
||||||
|
|
||||||
|
Note: Tool-call capability filtering is applied by hermes_cli/models.py
|
||||||
|
via fetch_openrouter_models() → _openrouter_model_supports_tools(), not
|
||||||
|
here. The picker early-returns via the dedicated openrouter path before
|
||||||
|
reaching this method, so filtering here would be unreachable.
|
||||||
|
"""
|
||||||
|
global _CACHE # noqa: PLW0603
|
||||||
|
if _CACHE is not None:
|
||||||
|
return _CACHE
|
||||||
|
try:
|
||||||
|
result = super().fetch_models(api_key=None, timeout=timeout)
|
||||||
|
if result is not None:
|
||||||
|
_CACHE = result
|
||||||
|
return result
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("fetch_models(openrouter): %s", exc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def build_extra_body(
|
||||||
|
self, *, session_id: str | None = None, **context: Any
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
body: dict[str, Any] = {}
|
||||||
|
prefs = context.get("provider_preferences")
|
||||||
|
if prefs:
|
||||||
|
body["provider"] = prefs
|
||||||
|
return body
|
||||||
|
|
||||||
|
def build_api_kwargs_extras(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
reasoning_config: dict | None = None,
|
||||||
|
supports_reasoning: bool = False,
|
||||||
|
**context: Any,
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
|
"""OpenRouter passes the full reasoning_config dict as extra_body.reasoning."""
|
||||||
|
extra_body: dict[str, Any] = {}
|
||||||
|
if supports_reasoning:
|
||||||
|
if reasoning_config is not None:
|
||||||
|
extra_body["reasoning"] = dict(reasoning_config)
|
||||||
|
else:
|
||||||
|
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||||
|
return extra_body, {}
|
||||||
|
|
||||||
|
|
||||||
|
openrouter = OpenRouterProfile(
|
||||||
|
name="openrouter",
|
||||||
|
aliases=("or",),
|
||||||
|
env_vars=("OPENROUTER_API_KEY",),
|
||||||
|
display_name="OpenRouter",
|
||||||
|
description="OpenRouter — unified API for 200+ models",
|
||||||
|
signup_url="https://openrouter.ai/keys",
|
||||||
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
models_url="https://openrouter.ai/api/v1/models",
|
||||||
|
fallback_models=(
|
||||||
|
"anthropic/claude-sonnet-4.6",
|
||||||
|
"openai/gpt-5.4",
|
||||||
|
"deepseek/deepseek-chat",
|
||||||
|
"google/gemini-3-flash-preview",
|
||||||
|
"qwen/qwen3-plus",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(openrouter)
|
||||||
82
providers/qwen.py
Normal file
82
providers/qwen.py
Normal file
|
|
@ -0,0 +1,82 @@
|
||||||
|
"""Qwen Portal provider profile."""
|
||||||
|
|
||||||
|
import copy
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
|
class QwenProfile(ProviderProfile):
|
||||||
|
"""Qwen Portal — message normalization, vl_high_resolution, metadata top-level."""
|
||||||
|
|
||||||
|
def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
"""Normalize content to list-of-dicts format.
|
||||||
|
|
||||||
|
Inject cache_control on system message.
|
||||||
|
|
||||||
|
Matches the behavior of run_agent.py:_qwen_prepare_chat_messages().
|
||||||
|
"""
|
||||||
|
prepared = copy.deepcopy(messages)
|
||||||
|
if not prepared:
|
||||||
|
return prepared
|
||||||
|
|
||||||
|
for msg in prepared:
|
||||||
|
if not isinstance(msg, dict):
|
||||||
|
continue
|
||||||
|
content = msg.get("content")
|
||||||
|
if isinstance(content, str):
|
||||||
|
msg["content"] = [{"type": "text", "text": content}]
|
||||||
|
elif isinstance(content, list):
|
||||||
|
normalized_parts = []
|
||||||
|
for part in content:
|
||||||
|
if isinstance(part, str):
|
||||||
|
normalized_parts.append({"type": "text", "text": part})
|
||||||
|
elif isinstance(part, dict):
|
||||||
|
normalized_parts.append(part)
|
||||||
|
if normalized_parts:
|
||||||
|
msg["content"] = normalized_parts
|
||||||
|
|
||||||
|
# Inject cache_control on the last part of the system message.
|
||||||
|
for msg in prepared:
|
||||||
|
if isinstance(msg, dict) and msg.get("role") == "system":
|
||||||
|
content = msg.get("content")
|
||||||
|
if (
|
||||||
|
isinstance(content, list)
|
||||||
|
and content
|
||||||
|
and isinstance(content[-1], dict)
|
||||||
|
):
|
||||||
|
content[-1]["cache_control"] = {"type": "ephemeral"}
|
||||||
|
break
|
||||||
|
|
||||||
|
return prepared
|
||||||
|
|
||||||
|
def build_extra_body(
|
||||||
|
self, *, session_id: str | None = None, **context
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
return {"vl_high_resolution_images": True}
|
||||||
|
|
||||||
|
def build_api_kwargs_extras(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
reasoning_config: dict | None = None,
|
||||||
|
qwen_session_metadata: dict | None = None,
|
||||||
|
**context,
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
|
"""Qwen metadata goes to top-level api_kwargs, not extra_body."""
|
||||||
|
top_level = {}
|
||||||
|
if qwen_session_metadata:
|
||||||
|
top_level["metadata"] = qwen_session_metadata
|
||||||
|
return {}, top_level
|
||||||
|
|
||||||
|
|
||||||
|
qwen = QwenProfile(
|
||||||
|
name="qwen-oauth",
|
||||||
|
aliases=("qwen", "qwen-portal", "qwen-cli"),
|
||||||
|
env_vars=("QWEN_API_KEY",),
|
||||||
|
base_url="https://portal.qwen.ai/v1",
|
||||||
|
auth_type="oauth_external",
|
||||||
|
default_max_tokens=65536,
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(qwen)
|
||||||
14
providers/stepfun.py
Normal file
14
providers/stepfun.py
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
"""StepFun provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
stepfun = ProviderProfile(
|
||||||
|
name="stepfun",
|
||||||
|
aliases=("step", "stepfun-coding-plan"),
|
||||||
|
default_aux_model="step-3.5-flash",
|
||||||
|
env_vars=("STEPFUN_API_KEY",),
|
||||||
|
base_url="https://api.stepfun.ai/step_plan/v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(stepfun)
|
||||||
43
providers/vercel.py
Normal file
43
providers/vercel.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
"""Vercel AI Gateway provider profile.
|
||||||
|
|
||||||
|
AI Gateway routes to multiple backends. Hermes sends attribution
|
||||||
|
headers and full reasoning config passthrough.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
|
||||||
|
class VercelAIGatewayProfile(ProviderProfile):
|
||||||
|
"""Vercel AI Gateway — attribution headers + reasoning passthrough."""
|
||||||
|
|
||||||
|
def build_api_kwargs_extras(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
reasoning_config: dict | None = None,
|
||||||
|
supports_reasoning: bool = True,
|
||||||
|
**ctx: Any,
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
|
extra_body: dict[str, Any] = {}
|
||||||
|
if supports_reasoning and reasoning_config is not None:
|
||||||
|
extra_body["reasoning"] = dict(reasoning_config)
|
||||||
|
elif supports_reasoning:
|
||||||
|
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||||
|
return extra_body, {}
|
||||||
|
|
||||||
|
|
||||||
|
vercel = VercelAIGatewayProfile(
|
||||||
|
name="ai-gateway",
|
||||||
|
aliases=("vercel", "vercel-ai-gateway", "ai_gateway", "aigateway"),
|
||||||
|
env_vars=("AI_GATEWAY_API_KEY",),
|
||||||
|
base_url="https://ai-gateway.vercel.sh/v1",
|
||||||
|
default_headers={
|
||||||
|
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||||
|
"X-Title": "Hermes Agent",
|
||||||
|
},
|
||||||
|
default_aux_model="google/gemini-3-flash",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(vercel)
|
||||||
15
providers/xai.py
Normal file
15
providers/xai.py
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
"""xAI (Grok) provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
xai = ProviderProfile(
|
||||||
|
name="xai",
|
||||||
|
aliases=("grok", "x-ai", "x.ai"),
|
||||||
|
api_mode="codex_responses",
|
||||||
|
env_vars=("XAI_API_KEY",),
|
||||||
|
base_url="https://api.x.ai/v1",
|
||||||
|
auth_type="api_key",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(xai)
|
||||||
13
providers/xiaomi.py
Normal file
13
providers/xiaomi.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
"""Xiaomi MiMo provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
xiaomi = ProviderProfile(
|
||||||
|
name="xiaomi",
|
||||||
|
aliases=("mimo", "xiaomi-mimo"),
|
||||||
|
env_vars=("XIAOMI_API_KEY",),
|
||||||
|
base_url="https://api.xiaomimimo.com/v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(xiaomi)
|
||||||
21
providers/zai.py
Normal file
21
providers/zai.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
"""ZAI / GLM provider profile."""
|
||||||
|
|
||||||
|
from providers import register_provider
|
||||||
|
from providers.base import ProviderProfile
|
||||||
|
|
||||||
|
zai = ProviderProfile(
|
||||||
|
name="zai",
|
||||||
|
aliases=("glm", "z-ai", "z.ai", "zhipu"),
|
||||||
|
env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
|
||||||
|
display_name="Z.AI (GLM)",
|
||||||
|
description="Z.AI / GLM — Zhipu AI models",
|
||||||
|
signup_url="https://z.ai/",
|
||||||
|
fallback_models=(
|
||||||
|
"glm-5",
|
||||||
|
"glm-4-9b",
|
||||||
|
),
|
||||||
|
base_url="https://api.z.ai/api/paas/v4",
|
||||||
|
default_aux_model="glm-4.5-flash",
|
||||||
|
)
|
||||||
|
|
||||||
|
register_provider(zai)
|
||||||
|
|
@ -142,7 +142,7 @@ hermes_cli = ["web_dist/**/*"]
|
||||||
gateway = ["assets/**/*"]
|
gateway = ["assets/**/*"]
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
|
include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"]
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
testpaths = ["tests"]
|
testpaths = ["tests"]
|
||||||
|
|
|
||||||
73
run_agent.py
73
run_agent.py
|
|
@ -1461,6 +1461,17 @@ class AIAgent:
|
||||||
elif base_url_host_matches(effective_base, "chatgpt.com"):
|
elif base_url_host_matches(effective_base, "chatgpt.com"):
|
||||||
from agent.auxiliary_client import _codex_cloudflare_headers
|
from agent.auxiliary_client import _codex_cloudflare_headers
|
||||||
client_kwargs["default_headers"] = _codex_cloudflare_headers(api_key)
|
client_kwargs["default_headers"] = _codex_cloudflare_headers(api_key)
|
||||||
|
elif "default_headers" not in client_kwargs:
|
||||||
|
# Fall back to profile.default_headers for providers that
|
||||||
|
# declare custom headers (e.g. Vercel AI Gateway attribution,
|
||||||
|
# Kimi User-Agent on non-kimi.com endpoints).
|
||||||
|
try:
|
||||||
|
from providers import get_provider_profile as _gpf
|
||||||
|
_ph = _gpf(self.provider)
|
||||||
|
if _ph and _ph.default_headers:
|
||||||
|
client_kwargs["default_headers"] = dict(_ph.default_headers)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
# No explicit creds — use the centralized provider router
|
# No explicit creds — use the centralized provider router
|
||||||
from agent.auxiliary_client import resolve_provider_client
|
from agent.auxiliary_client import resolve_provider_client
|
||||||
|
|
@ -6261,7 +6272,19 @@ class AIAgent:
|
||||||
self._client_kwargs.get("api_key", "")
|
self._client_kwargs.get("api_key", "")
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self._client_kwargs.pop("default_headers", None)
|
# No URL-specific headers — check profile.default_headers before clearing.
|
||||||
|
_ph_headers = None
|
||||||
|
try:
|
||||||
|
from providers import get_provider_profile as _gpf2
|
||||||
|
_ph2 = _gpf2(self.provider)
|
||||||
|
if _ph2 and _ph2.default_headers:
|
||||||
|
_ph_headers = dict(_ph2.default_headers)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if _ph_headers:
|
||||||
|
self._client_kwargs["default_headers"] = _ph_headers
|
||||||
|
else:
|
||||||
|
self._client_kwargs.pop("default_headers", None)
|
||||||
|
|
||||||
def _swap_credential(self, entry) -> None:
|
def _swap_credential(self, entry) -> None:
|
||||||
runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
|
runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
|
||||||
|
|
@ -8494,7 +8517,7 @@ class AIAgent:
|
||||||
_omit_temp = False
|
_omit_temp = False
|
||||||
_fixed_temp = None
|
_fixed_temp = None
|
||||||
|
|
||||||
# Provider preferences (OpenRouter-specific)
|
# Provider preferences (OpenRouter-style)
|
||||||
_prefs: Dict[str, Any] = {}
|
_prefs: Dict[str, Any] = {}
|
||||||
if self.providers_allowed:
|
if self.providers_allowed:
|
||||||
_prefs["only"] = self.providers_allowed
|
_prefs["only"] = self.providers_allowed
|
||||||
|
|
@ -8509,16 +8532,16 @@ class AIAgent:
|
||||||
if self.provider_data_collection:
|
if self.provider_data_collection:
|
||||||
_prefs["data_collection"] = self.provider_data_collection
|
_prefs["data_collection"] = self.provider_data_collection
|
||||||
|
|
||||||
# Anthropic max output for Claude on OpenRouter/Nous
|
# Claude max-output override on aggregators
|
||||||
_ant_max = None
|
_ant_max = None
|
||||||
if (_is_or or _is_nous) and "claude" in (self.model or "").lower():
|
if (_is_or or _is_nous) and "claude" in (self.model or "").lower():
|
||||||
try:
|
try:
|
||||||
from agent.anthropic_adapter import _get_anthropic_max_output
|
from agent.anthropic_adapter import _get_anthropic_max_output
|
||||||
_ant_max = _get_anthropic_max_output(self.model)
|
_ant_max = _get_anthropic_max_output(self.model)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # fail open — let the proxy pick its default
|
pass
|
||||||
|
|
||||||
# Qwen session metadata precomputed here (promptId is per-call random)
|
# Qwen session metadata
|
||||||
_qwen_meta = None
|
_qwen_meta = None
|
||||||
if _is_qwen:
|
if _is_qwen:
|
||||||
_qwen_meta = {
|
_qwen_meta = {
|
||||||
|
|
@ -8526,8 +8549,44 @@ class AIAgent:
|
||||||
"promptId": str(uuid.uuid4()),
|
"promptId": str(uuid.uuid4()),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Ephemeral max output override — consume immediately so the next
|
# ── Provider profile path (registered providers) ───────────────────
|
||||||
# turn doesn't inherit it.
|
# Profiles handle per-provider quirks via hooks. When a profile is
|
||||||
|
# found, delegate fully; otherwise fall through to the legacy flag path.
|
||||||
|
try:
|
||||||
|
from providers import get_provider_profile
|
||||||
|
_profile = get_provider_profile(self.provider)
|
||||||
|
except Exception:
|
||||||
|
_profile = None
|
||||||
|
|
||||||
|
if _profile:
|
||||||
|
_ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
|
||||||
|
if _ephemeral_out is not None:
|
||||||
|
self._ephemeral_max_output_tokens = None
|
||||||
|
|
||||||
|
return _ct.build_kwargs(
|
||||||
|
model=self.model,
|
||||||
|
messages=api_messages,
|
||||||
|
tools=self.tools,
|
||||||
|
base_url=self.base_url,
|
||||||
|
timeout=self._resolved_api_call_timeout(),
|
||||||
|
max_tokens=self.max_tokens,
|
||||||
|
ephemeral_max_output_tokens=_ephemeral_out,
|
||||||
|
max_tokens_param_fn=self._max_tokens_param,
|
||||||
|
reasoning_config=self.reasoning_config,
|
||||||
|
request_overrides=self.request_overrides,
|
||||||
|
session_id=getattr(self, "session_id", None),
|
||||||
|
provider_profile=_profile,
|
||||||
|
ollama_num_ctx=self._ollama_num_ctx,
|
||||||
|
# Context forwarded to profile hooks:
|
||||||
|
provider_preferences=_prefs or None,
|
||||||
|
anthropic_max_output=_ant_max,
|
||||||
|
supports_reasoning=self._supports_reasoning_extra_body(),
|
||||||
|
qwen_session_metadata=_qwen_meta,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Legacy flag path ────────────────────────────────────────────
|
||||||
|
# Reached only when get_provider_profile() returns None — i.e. a
|
||||||
|
# completely unknown provider not in providers/ registry.
|
||||||
_ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
|
_ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
|
||||||
if _ephemeral_out is not None:
|
if _ephemeral_out is not None:
|
||||||
self._ephemeral_max_output_tokens = None
|
self._ephemeral_max_output_tokens = None
|
||||||
|
|
|
||||||
|
|
@ -71,17 +71,17 @@ class TestMinimaxThinkingSupport:
|
||||||
|
|
||||||
|
|
||||||
class TestMinimaxAuxModel:
|
class TestMinimaxAuxModel:
|
||||||
"""Verify auxiliary model is standard (not highspeed)."""
|
"""Verify auxiliary model is standard (not highspeed) — now reads from profiles."""
|
||||||
|
|
||||||
def test_minimax_aux_is_standard(self):
|
def test_minimax_aux_is_standard(self):
|
||||||
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
|
from agent.auxiliary_client import _get_aux_model_for_provider
|
||||||
assert _API_KEY_PROVIDER_AUX_MODELS["minimax"] == "MiniMax-M2.7"
|
assert _get_aux_model_for_provider("minimax") == "MiniMax-M2.7"
|
||||||
assert _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] == "MiniMax-M2.7"
|
assert _get_aux_model_for_provider("minimax-cn") == "MiniMax-M2.7"
|
||||||
|
|
||||||
def test_minimax_aux_not_highspeed(self):
|
def test_minimax_aux_not_highspeed(self):
|
||||||
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
|
from agent.auxiliary_client import _get_aux_model_for_provider
|
||||||
assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax"]
|
assert "highspeed" not in _get_aux_model_for_provider("minimax")
|
||||||
assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
|
assert "highspeed" not in _get_aux_model_for_provider("minimax-cn")
|
||||||
|
|
||||||
|
|
||||||
class TestMinimaxBetaHeaders:
|
class TestMinimaxBetaHeaders:
|
||||||
|
|
|
||||||
|
|
@ -73,17 +73,21 @@ class TestChatCompletionsBuildKwargs:
|
||||||
assert kw["tools"] == tools
|
assert kw["tools"] == tools
|
||||||
|
|
||||||
def test_openrouter_provider_prefs(self, transport):
|
def test_openrouter_provider_prefs(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("openrouter")
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
model="gpt-4o", messages=msgs,
|
model="gpt-4o", messages=msgs,
|
||||||
is_openrouter=True,
|
provider_profile=profile,
|
||||||
provider_preferences={"only": ["openai"]},
|
provider_preferences={"only": ["openai"]},
|
||||||
)
|
)
|
||||||
assert kw["extra_body"]["provider"] == {"only": ["openai"]}
|
assert kw["extra_body"]["provider"] == {"only": ["openai"]}
|
||||||
|
|
||||||
def test_nous_tags(self, transport):
|
def test_nous_tags(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("nous")
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, is_nous=True)
|
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, provider_profile=profile)
|
||||||
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
|
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
|
||||||
|
|
||||||
def test_reasoning_default(self, transport):
|
def test_reasoning_default(self, transport):
|
||||||
|
|
@ -95,29 +99,36 @@ class TestChatCompletionsBuildKwargs:
|
||||||
assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
|
assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
|
||||||
|
|
||||||
def test_nous_omits_disabled_reasoning(self, transport):
|
def test_nous_omits_disabled_reasoning(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("nous")
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
model="gpt-4o", messages=msgs,
|
model="gpt-4o", messages=msgs,
|
||||||
|
provider_profile=profile,
|
||||||
supports_reasoning=True,
|
supports_reasoning=True,
|
||||||
is_nous=True,
|
|
||||||
reasoning_config={"enabled": False},
|
reasoning_config={"enabled": False},
|
||||||
)
|
)
|
||||||
# Nous rejects enabled=false; reasoning omitted entirely
|
# Nous rejects enabled=false; reasoning omitted entirely
|
||||||
assert "reasoning" not in kw.get("extra_body", {})
|
assert "reasoning" not in kw.get("extra_body", {})
|
||||||
|
|
||||||
def test_ollama_num_ctx(self, transport):
|
def test_ollama_num_ctx(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("custom")
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
model="llama3", messages=msgs,
|
model="llama3", messages=msgs,
|
||||||
|
provider_profile=profile,
|
||||||
ollama_num_ctx=32768,
|
ollama_num_ctx=32768,
|
||||||
)
|
)
|
||||||
assert kw["extra_body"]["options"]["num_ctx"] == 32768
|
assert kw["extra_body"]["options"]["num_ctx"] == 32768
|
||||||
|
|
||||||
def test_custom_think_false(self, transport):
|
def test_custom_think_false(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("custom")
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
model="qwen3", messages=msgs,
|
model="qwen3", messages=msgs,
|
||||||
is_custom_provider=True,
|
provider_profile=profile,
|
||||||
reasoning_config={"effort": "none"},
|
reasoning_config={"effort": "none"},
|
||||||
)
|
)
|
||||||
assert kw["extra_body"]["think"] is False
|
assert kw["extra_body"]["think"] is False
|
||||||
|
|
@ -304,23 +315,29 @@ class TestChatCompletionsBuildKwargs:
|
||||||
assert kw["max_tokens"] == 2048
|
assert kw["max_tokens"] == 2048
|
||||||
|
|
||||||
def test_nvidia_default_max_tokens(self, transport):
|
def test_nvidia_default_max_tokens(self, transport):
|
||||||
|
"""NVIDIA max_tokens=16384 is now set via ProviderProfile, not legacy flag."""
|
||||||
|
from providers import get_provider_profile
|
||||||
|
|
||||||
|
profile = get_provider_profile("nvidia")
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
model="glm-4.7", messages=msgs,
|
model="nvidia/llama-3.1-405b-instruct",
|
||||||
is_nvidia_nim=True,
|
messages=msgs,
|
||||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||||
|
provider_profile=profile,
|
||||||
)
|
)
|
||||||
# NVIDIA default: 16384
|
|
||||||
assert kw["max_tokens"] == 16384
|
assert kw["max_tokens"] == 16384
|
||||||
|
|
||||||
def test_qwen_default_max_tokens(self, transport):
|
def test_qwen_default_max_tokens(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("qwen-oauth")
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
model="qwen3-coder-plus", messages=msgs,
|
model="qwen3-coder-plus", messages=msgs,
|
||||||
is_qwen_portal=True,
|
provider_profile=profile,
|
||||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||||
)
|
)
|
||||||
# Qwen default: 65536
|
# Qwen default: 65536 from profile.default_max_tokens
|
||||||
assert kw["max_tokens"] == 65536
|
assert kw["max_tokens"] == 65536
|
||||||
|
|
||||||
def test_anthropic_max_output_for_claude_on_aggregator(self, transport):
|
def test_anthropic_max_output_for_claude_on_aggregator(self, transport):
|
||||||
|
|
@ -343,14 +360,23 @@ class TestChatCompletionsBuildKwargs:
|
||||||
assert kw["service_tier"] == "priority"
|
assert kw["service_tier"] == "priority"
|
||||||
|
|
||||||
def test_fixed_temperature(self, transport):
|
def test_fixed_temperature(self, transport):
|
||||||
|
"""Fixed temperature is now set via ProviderProfile.fixed_temperature."""
|
||||||
|
from providers.base import ProviderProfile
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, fixed_temperature=0.6)
|
kw = transport.build_kwargs(
|
||||||
|
model="gpt-4o", messages=msgs,
|
||||||
|
provider_profile=ProviderProfile(name="_t", fixed_temperature=0.6),
|
||||||
|
)
|
||||||
assert kw["temperature"] == 0.6
|
assert kw["temperature"] == 0.6
|
||||||
|
|
||||||
def test_omit_temperature(self, transport):
|
def test_omit_temperature(self, transport):
|
||||||
|
"""Omit temperature is set via ProviderProfile with OMIT_TEMPERATURE sentinel."""
|
||||||
|
from providers.base import ProviderProfile, OMIT_TEMPERATURE
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, omit_temperature=True, fixed_temperature=0.5)
|
kw = transport.build_kwargs(
|
||||||
# omit wins
|
model="gpt-4o", messages=msgs,
|
||||||
|
provider_profile=ProviderProfile(name="_t", fixed_temperature=OMIT_TEMPERATURE),
|
||||||
|
)
|
||||||
assert "temperature" not in kw
|
assert "temperature" not in kw
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -358,18 +384,22 @@ class TestChatCompletionsKimi:
|
||||||
"""Regression tests for the Kimi/Moonshot quirks migrated into the transport."""
|
"""Regression tests for the Kimi/Moonshot quirks migrated into the transport."""
|
||||||
|
|
||||||
def test_kimi_max_tokens_default(self, transport):
|
def test_kimi_max_tokens_default(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("kimi-coding")
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
||||||
is_kimi=True,
|
provider_profile=profile,
|
||||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||||
)
|
)
|
||||||
# Kimi CLI default: 32000
|
# Kimi CLI default: 32000 from KimiProfile.default_max_tokens
|
||||||
assert kw["max_tokens"] == 32000
|
assert kw["max_tokens"] == 32000
|
||||||
|
|
||||||
def test_kimi_reasoning_effort_top_level(self, transport):
|
def test_kimi_reasoning_effort_top_level(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("kimi-coding")
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
||||||
is_kimi=True,
|
provider_profile=profile,
|
||||||
reasoning_config={"effort": "high"},
|
reasoning_config={"effort": "high"},
|
||||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||||
)
|
)
|
||||||
|
|
@ -387,17 +417,21 @@ class TestChatCompletionsKimi:
|
||||||
assert "reasoning_effort" not in kw
|
assert "reasoning_effort" not in kw
|
||||||
|
|
||||||
def test_kimi_thinking_enabled_extra_body(self, transport):
|
def test_kimi_thinking_enabled_extra_body(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("kimi-coding")
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
||||||
is_kimi=True,
|
provider_profile=profile,
|
||||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||||
)
|
)
|
||||||
assert kw["extra_body"]["thinking"] == {"type": "enabled"}
|
assert kw["extra_body"]["thinking"] == {"type": "enabled"}
|
||||||
|
|
||||||
def test_kimi_thinking_disabled_extra_body(self, transport):
|
def test_kimi_thinking_disabled_extra_body(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
profile = get_provider_profile("kimi-coding")
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
||||||
is_kimi=True,
|
provider_profile=profile,
|
||||||
reasoning_config={"enabled": False},
|
reasoning_config={"enabled": False},
|
||||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -269,9 +269,9 @@ class TestGmiModelMetadata:
|
||||||
|
|
||||||
class TestGmiAuxiliary:
|
class TestGmiAuxiliary:
|
||||||
def test_aux_default_model(self):
|
def test_aux_default_model(self):
|
||||||
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
|
from agent.auxiliary_client import _get_aux_model_for_provider
|
||||||
|
|
||||||
assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "google/gemini-3.1-flash-lite-preview"
|
assert _get_aux_model_for_provider("gmi") == "google/gemini-3.1-flash-lite-preview"
|
||||||
|
|
||||||
def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
|
def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
|
||||||
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
|
||||||
|
|
|
||||||
0
tests/providers/__init__.py
Normal file
0
tests/providers/__init__.py
Normal file
118
tests/providers/test_e2e_wiring.py
Normal file
118
tests/providers/test_e2e_wiring.py
Normal file
|
|
@ -0,0 +1,118 @@
|
||||||
|
"""E2E tests: verify _build_kwargs_from_profile produces correct output.
|
||||||
|
|
||||||
|
These tests call _build_kwargs_from_profile on the transport directly,
|
||||||
|
without importing run_agent (which would cause xdist worker contamination).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from agent.transports.chat_completions import ChatCompletionsTransport
|
||||||
|
from providers import get_provider_profile
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def transport():
|
||||||
|
return ChatCompletionsTransport()
|
||||||
|
|
||||||
|
|
||||||
|
def _msgs():
|
||||||
|
return [{"role": "user", "content": "hi"}]
|
||||||
|
|
||||||
|
|
||||||
|
class TestNvidiaProfileWiring:
|
||||||
|
def test_nvidia_gets_default_max_tokens(self, transport):
|
||||||
|
profile = get_provider_profile("nvidia")
|
||||||
|
kwargs = transport.build_kwargs(
|
||||||
|
model="nvidia/llama-3.1-nemotron-70b-instruct",
|
||||||
|
messages=_msgs(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=profile,
|
||||||
|
max_tokens=None,
|
||||||
|
max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {},
|
||||||
|
timeout=300,
|
||||||
|
reasoning_config=None,
|
||||||
|
request_overrides=None,
|
||||||
|
session_id="test",
|
||||||
|
ollama_num_ctx=None,
|
||||||
|
)
|
||||||
|
# NVIDIA profile sets default_max_tokens=16384
|
||||||
|
assert kwargs.get("max_tokens") == 16384
|
||||||
|
|
||||||
|
def test_nvidia_nim_alias(self, transport):
|
||||||
|
profile = get_provider_profile("nvidia-nim")
|
||||||
|
assert profile is not None
|
||||||
|
assert profile.name == "nvidia"
|
||||||
|
assert profile.default_max_tokens == 16384
|
||||||
|
|
||||||
|
def test_nvidia_model_passed(self, transport):
|
||||||
|
profile = get_provider_profile("nvidia")
|
||||||
|
kwargs = transport.build_kwargs(
|
||||||
|
model="nvidia/test-model",
|
||||||
|
messages=_msgs(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=profile,
|
||||||
|
max_tokens=None,
|
||||||
|
max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {},
|
||||||
|
timeout=300,
|
||||||
|
reasoning_config=None,
|
||||||
|
request_overrides=None,
|
||||||
|
session_id="test",
|
||||||
|
ollama_num_ctx=None,
|
||||||
|
)
|
||||||
|
assert kwargs["model"] == "nvidia/test-model"
|
||||||
|
|
||||||
|
def test_nvidia_messages_passed(self, transport):
|
||||||
|
profile = get_provider_profile("nvidia")
|
||||||
|
msgs = _msgs()
|
||||||
|
kwargs = transport.build_kwargs(
|
||||||
|
model="nvidia/test",
|
||||||
|
messages=msgs,
|
||||||
|
tools=None,
|
||||||
|
provider_profile=profile,
|
||||||
|
max_tokens=None,
|
||||||
|
max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {},
|
||||||
|
timeout=300,
|
||||||
|
reasoning_config=None,
|
||||||
|
request_overrides=None,
|
||||||
|
session_id="test",
|
||||||
|
ollama_num_ctx=None,
|
||||||
|
)
|
||||||
|
assert kwargs["messages"] == msgs
|
||||||
|
|
||||||
|
|
||||||
|
class TestDeepSeekProfileWiring:
|
||||||
|
def test_deepseek_no_forced_max_tokens(self, transport):
|
||||||
|
profile = get_provider_profile("deepseek")
|
||||||
|
kwargs = transport.build_kwargs(
|
||||||
|
model="deepseek-chat",
|
||||||
|
messages=_msgs(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=profile,
|
||||||
|
max_tokens=None,
|
||||||
|
max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {},
|
||||||
|
timeout=300,
|
||||||
|
reasoning_config=None,
|
||||||
|
request_overrides=None,
|
||||||
|
session_id="test",
|
||||||
|
ollama_num_ctx=None,
|
||||||
|
)
|
||||||
|
# DeepSeek has no default_max_tokens
|
||||||
|
assert kwargs["model"] == "deepseek-chat"
|
||||||
|
assert kwargs.get("max_tokens") is None or "max_tokens" not in kwargs
|
||||||
|
|
||||||
|
def test_deepseek_messages_passed(self, transport):
|
||||||
|
profile = get_provider_profile("deepseek")
|
||||||
|
msgs = _msgs()
|
||||||
|
kwargs = transport.build_kwargs(
|
||||||
|
model="deepseek-chat",
|
||||||
|
messages=msgs,
|
||||||
|
tools=None,
|
||||||
|
provider_profile=profile,
|
||||||
|
max_tokens=None,
|
||||||
|
max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {},
|
||||||
|
timeout=300,
|
||||||
|
reasoning_config=None,
|
||||||
|
request_overrides=None,
|
||||||
|
session_id="test",
|
||||||
|
ollama_num_ctx=None,
|
||||||
|
)
|
||||||
|
assert kwargs["messages"] == msgs
|
||||||
290
tests/providers/test_profile_wiring.py
Normal file
290
tests/providers/test_profile_wiring.py
Normal file
|
|
@ -0,0 +1,290 @@
|
||||||
|
"""Profile-path parity tests: verify profile path produces identical output to legacy flags.
|
||||||
|
|
||||||
|
Each test calls build_kwargs twice — once with legacy flags, once with provider_profile —
|
||||||
|
and asserts the output is identical. This catches any behavioral drift between the two paths.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from agent.transports.chat_completions import ChatCompletionsTransport
|
||||||
|
from providers import get_provider_profile
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def transport():
|
||||||
|
return ChatCompletionsTransport()
|
||||||
|
|
||||||
|
|
||||||
|
def _msgs():
|
||||||
|
return [{"role": "user", "content": "hello"}]
|
||||||
|
|
||||||
|
|
||||||
|
def _max_tokens_fn(n):
|
||||||
|
return {"max_completion_tokens": n}
|
||||||
|
|
||||||
|
|
||||||
|
class TestNvidiaProfileParity:
|
||||||
|
def test_max_tokens_match(self, transport):
|
||||||
|
"""NVIDIA profile sets max_tokens=16384; legacy flag is removed."""
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="nvidia/nemotron", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("nvidia"),
|
||||||
|
max_tokens_param_fn=_max_tokens_fn,
|
||||||
|
)
|
||||||
|
assert profile["max_completion_tokens"] == 16384
|
||||||
|
|
||||||
|
|
||||||
|
class TestKimiProfileParity:
|
||||||
|
def test_temperature_omitted(self, transport):
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="kimi-k2", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"), omit_temperature=True,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="kimi-k2", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi"),
|
||||||
|
)
|
||||||
|
assert "temperature" not in legacy
|
||||||
|
assert "temperature" not in profile
|
||||||
|
|
||||||
|
def test_max_tokens(self, transport):
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="kimi-k2", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"), max_tokens_param_fn=_max_tokens_fn,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="kimi-k2", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi"),
|
||||||
|
max_tokens_param_fn=_max_tokens_fn,
|
||||||
|
)
|
||||||
|
assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 32000
|
||||||
|
|
||||||
|
def test_thinking_enabled(self, transport):
|
||||||
|
rc = {"enabled": True, "effort": "high"}
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="kimi-k2", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"), reasoning_config=rc,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="kimi-k2", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi"),
|
||||||
|
reasoning_config=rc,
|
||||||
|
)
|
||||||
|
assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"]
|
||||||
|
assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "high"
|
||||||
|
|
||||||
|
def test_thinking_disabled(self, transport):
|
||||||
|
rc = {"enabled": False}
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="kimi-k2", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"), reasoning_config=rc,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="kimi-k2", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi"),
|
||||||
|
reasoning_config=rc,
|
||||||
|
)
|
||||||
|
assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"]
|
||||||
|
assert profile["extra_body"]["thinking"]["type"] == "disabled"
|
||||||
|
assert "reasoning_effort" not in profile
|
||||||
|
assert "reasoning_effort" not in legacy
|
||||||
|
|
||||||
|
def test_reasoning_effort_default(self, transport):
|
||||||
|
rc = {"enabled": True}
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="kimi-k2", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"), reasoning_config=rc,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="kimi-k2", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi"),
|
||||||
|
reasoning_config=rc,
|
||||||
|
)
|
||||||
|
assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "medium"
|
||||||
|
|
||||||
|
|
||||||
|
class TestOpenRouterProfileParity:
|
||||||
|
def test_provider_preferences(self, transport):
|
||||||
|
prefs = {"allow": ["anthropic"]}
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"), provider_preferences=prefs,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
provider_preferences=prefs,
|
||||||
|
)
|
||||||
|
assert profile["extra_body"]["provider"] == legacy["extra_body"]["provider"]
|
||||||
|
|
||||||
|
def test_reasoning_full_config(self, transport):
|
||||||
|
rc = {"enabled": True, "effort": "high"}
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"), supports_reasoning=True, reasoning_config=rc,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
supports_reasoning=True, reasoning_config=rc,
|
||||||
|
)
|
||||||
|
assert profile["extra_body"]["reasoning"] == legacy["extra_body"]["reasoning"]
|
||||||
|
|
||||||
|
def test_default_reasoning(self, transport):
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"), supports_reasoning=True,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
supports_reasoning=True,
|
||||||
|
)
|
||||||
|
assert profile["extra_body"]["reasoning"] == legacy["extra_body"]["reasoning"]
|
||||||
|
|
||||||
|
|
||||||
|
class TestNousProfileParity:
|
||||||
|
def test_tags(self, transport):
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="hermes-3", messages=_msgs(), tools=None, provider_profile=get_provider_profile("nous"),
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="hermes-3", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("nous"),
|
||||||
|
)
|
||||||
|
assert profile["extra_body"]["tags"] == legacy["extra_body"]["tags"]
|
||||||
|
|
||||||
|
def test_reasoning_omitted_when_disabled(self, transport):
|
||||||
|
rc = {"enabled": False}
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="hermes-3", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("nous"), supports_reasoning=True, reasoning_config=rc,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="hermes-3", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("nous"),
|
||||||
|
supports_reasoning=True, reasoning_config=rc,
|
||||||
|
)
|
||||||
|
assert "reasoning" not in legacy.get("extra_body", {})
|
||||||
|
assert "reasoning" not in profile.get("extra_body", {})
|
||||||
|
|
||||||
|
|
||||||
|
class TestQwenProfileParity:
|
||||||
|
def test_max_tokens(self, transport):
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="qwen3.5", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("qwen-oauth"), max_tokens_param_fn=_max_tokens_fn,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="qwen3.5", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("qwen"),
|
||||||
|
max_tokens_param_fn=_max_tokens_fn,
|
||||||
|
)
|
||||||
|
assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 65536
|
||||||
|
|
||||||
|
def test_vl_high_resolution(self, transport):
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="qwen3.5", messages=_msgs(), tools=None, provider_profile=get_provider_profile("qwen-oauth"),
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="qwen3.5", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("qwen"),
|
||||||
|
)
|
||||||
|
assert profile["extra_body"]["vl_high_resolution_images"] == legacy["extra_body"]["vl_high_resolution_images"]
|
||||||
|
|
||||||
|
def test_metadata_top_level(self, transport):
|
||||||
|
meta = {"sessionId": "s123", "promptId": "p456"}
|
||||||
|
legacy = transport.build_kwargs(
|
||||||
|
model="qwen3.5", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("qwen-oauth"), qwen_session_metadata=meta,
|
||||||
|
)
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="qwen3.5", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("qwen"),
|
||||||
|
qwen_session_metadata=meta,
|
||||||
|
)
|
||||||
|
assert profile["metadata"] == legacy["metadata"] == meta
|
||||||
|
assert "metadata" not in profile.get("extra_body", {})
|
||||||
|
|
||||||
|
def test_message_preprocessing(self, transport):
|
||||||
|
"""Qwen profile normalizes string content to list-of-parts."""
|
||||||
|
msgs = [
|
||||||
|
{"role": "system", "content": "You are helpful."},
|
||||||
|
{"role": "user", "content": "hello"},
|
||||||
|
]
|
||||||
|
profile = transport.build_kwargs(
|
||||||
|
model="qwen3.5", messages=msgs, tools=None,
|
||||||
|
provider_profile=get_provider_profile("qwen"),
|
||||||
|
)
|
||||||
|
out_msgs = profile["messages"]
|
||||||
|
# System message content normalized + cache_control injected
|
||||||
|
assert isinstance(out_msgs[0]["content"], list)
|
||||||
|
assert out_msgs[0]["content"][0]["type"] == "text"
|
||||||
|
assert "cache_control" in out_msgs[0]["content"][-1]
|
||||||
|
# User message content normalized
|
||||||
|
assert isinstance(out_msgs[1]["content"], list)
|
||||||
|
assert out_msgs[1]["content"][0] == {"type": "text", "text": "hello"}
|
||||||
|
|
||||||
|
|
||||||
|
class TestDeveloperRoleParity:
|
||||||
|
"""Developer role swap must work on BOTH legacy and profile paths."""
|
||||||
|
|
||||||
|
def test_legacy_path_swaps_for_gpt5(self, transport):
|
||||||
|
msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gpt-5.4", messages=msgs, tools=None,
|
||||||
|
)
|
||||||
|
assert kw["messages"][0]["role"] == "developer"
|
||||||
|
|
||||||
|
def test_profile_path_swaps_for_gpt5(self, transport):
|
||||||
|
msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gpt-5.4", messages=msgs, tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
)
|
||||||
|
assert kw["messages"][0]["role"] == "developer"
|
||||||
|
|
||||||
|
def test_profile_path_no_swap_for_claude(self, transport):
|
||||||
|
msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="anthropic/claude-sonnet-4.6", messages=msgs, tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
)
|
||||||
|
assert kw["messages"][0]["role"] == "system"
|
||||||
|
|
||||||
|
|
||||||
|
class TestRequestOverridesParity:
|
||||||
|
"""request_overrides with extra_body must merge identically on both paths."""
|
||||||
|
|
||||||
|
def test_extra_body_override_legacy(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gpt-5.4", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
request_overrides={"extra_body": {"custom_key": "custom_val"}},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["custom_key"] == "custom_val"
|
||||||
|
|
||||||
|
def test_extra_body_override_profile(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gpt-5.4", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
request_overrides={"extra_body": {"custom_key": "custom_val"}},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["custom_key"] == "custom_val"
|
||||||
|
|
||||||
|
def test_extra_body_override_merges_with_provider_body(self, transport):
|
||||||
|
"""Override extra_body merges WITH provider extra_body, not replaces."""
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="hermes-3", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("nous"),
|
||||||
|
request_overrides={"extra_body": {"custom": True}},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["tags"] == ["product=hermes-agent"] # from profile
|
||||||
|
assert kw["extra_body"]["custom"] is True # from override
|
||||||
|
|
||||||
|
def test_top_level_override(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gpt-5.4", messages=_msgs(), tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
request_overrides={"top_p": 0.9},
|
||||||
|
)
|
||||||
|
assert kw["top_p"] == 0.9
|
||||||
203
tests/providers/test_provider_profiles.py
Normal file
203
tests/providers/test_provider_profiles.py
Normal file
|
|
@ -0,0 +1,203 @@
|
||||||
|
"""Tests for the provider module registry and profiles."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from providers import get_provider_profile, _REGISTRY
|
||||||
|
from providers.base import ProviderProfile, OMIT_TEMPERATURE
|
||||||
|
|
||||||
|
|
||||||
|
class TestRegistry:
|
||||||
|
def test_discovery_populates_registry(self):
|
||||||
|
p = get_provider_profile("nvidia")
|
||||||
|
assert p is not None
|
||||||
|
assert p.name == "nvidia"
|
||||||
|
|
||||||
|
def test_alias_lookup(self):
|
||||||
|
assert get_provider_profile("kimi").name == "kimi-coding"
|
||||||
|
assert get_provider_profile("moonshot").name == "kimi-coding"
|
||||||
|
assert get_provider_profile("kimi-coding-cn").name == "kimi-coding-cn"
|
||||||
|
assert get_provider_profile("or").name == "openrouter"
|
||||||
|
assert get_provider_profile("nous-portal").name == "nous"
|
||||||
|
assert get_provider_profile("qwen").name == "qwen-oauth"
|
||||||
|
assert get_provider_profile("qwen-portal").name == "qwen-oauth"
|
||||||
|
|
||||||
|
def test_unknown_provider_returns_none(self):
|
||||||
|
assert get_provider_profile("nonexistent-provider") is None
|
||||||
|
|
||||||
|
def test_all_providers_have_name(self):
|
||||||
|
get_provider_profile("nvidia") # trigger discovery
|
||||||
|
for name, profile in _REGISTRY.items():
|
||||||
|
assert profile.name == name
|
||||||
|
|
||||||
|
|
||||||
|
class TestNvidiaProfile:
|
||||||
|
def test_max_tokens(self):
|
||||||
|
p = get_provider_profile("nvidia")
|
||||||
|
assert p.default_max_tokens == 16384
|
||||||
|
|
||||||
|
def test_no_special_temperature(self):
|
||||||
|
p = get_provider_profile("nvidia")
|
||||||
|
assert p.fixed_temperature is None
|
||||||
|
|
||||||
|
def test_base_url(self):
|
||||||
|
p = get_provider_profile("nvidia")
|
||||||
|
assert "nvidia.com" in p.base_url
|
||||||
|
|
||||||
|
|
||||||
|
class TestKimiProfile:
|
||||||
|
def test_temperature_omit(self):
|
||||||
|
p = get_provider_profile("kimi")
|
||||||
|
assert p.fixed_temperature is OMIT_TEMPERATURE
|
||||||
|
|
||||||
|
def test_max_tokens(self):
|
||||||
|
p = get_provider_profile("kimi")
|
||||||
|
assert p.default_max_tokens == 32000
|
||||||
|
|
||||||
|
def test_cn_separate_profile(self):
|
||||||
|
p = get_provider_profile("kimi-coding-cn")
|
||||||
|
assert p.name == "kimi-coding-cn"
|
||||||
|
assert p.env_vars == ("KIMI_CN_API_KEY",)
|
||||||
|
assert "moonshot.cn" in p.base_url
|
||||||
|
|
||||||
|
def test_cn_not_alias_of_kimi(self):
|
||||||
|
kimi = get_provider_profile("kimi-coding")
|
||||||
|
cn = get_provider_profile("kimi-coding-cn")
|
||||||
|
assert kimi is not cn
|
||||||
|
assert kimi.base_url != cn.base_url
|
||||||
|
|
||||||
|
def test_thinking_enabled(self):
|
||||||
|
p = get_provider_profile("kimi")
|
||||||
|
eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True, "effort": "high"})
|
||||||
|
assert eb["thinking"] == {"type": "enabled"}
|
||||||
|
assert tl["reasoning_effort"] == "high"
|
||||||
|
|
||||||
|
def test_thinking_disabled(self):
|
||||||
|
p = get_provider_profile("kimi")
|
||||||
|
eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": False})
|
||||||
|
assert eb["thinking"] == {"type": "disabled"}
|
||||||
|
assert "reasoning_effort" not in tl
|
||||||
|
|
||||||
|
def test_reasoning_effort_default(self):
|
||||||
|
p = get_provider_profile("kimi")
|
||||||
|
eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True})
|
||||||
|
assert tl["reasoning_effort"] == "medium"
|
||||||
|
|
||||||
|
def test_no_config_defaults(self):
|
||||||
|
p = get_provider_profile("kimi")
|
||||||
|
eb, tl = p.build_api_kwargs_extras(reasoning_config=None)
|
||||||
|
assert eb["thinking"] == {"type": "enabled"}
|
||||||
|
assert tl["reasoning_effort"] == "medium"
|
||||||
|
|
||||||
|
|
||||||
|
class TestOpenRouterProfile:
|
||||||
|
def test_extra_body_with_prefs(self):
|
||||||
|
p = get_provider_profile("openrouter")
|
||||||
|
body = p.build_extra_body(provider_preferences={"allow": ["anthropic"]})
|
||||||
|
assert body["provider"] == {"allow": ["anthropic"]}
|
||||||
|
|
||||||
|
def test_extra_body_no_prefs(self):
|
||||||
|
p = get_provider_profile("openrouter")
|
||||||
|
body = p.build_extra_body()
|
||||||
|
assert body == {}
|
||||||
|
|
||||||
|
def test_reasoning_full_config(self):
|
||||||
|
p = get_provider_profile("openrouter")
|
||||||
|
eb, _ = p.build_api_kwargs_extras(
|
||||||
|
reasoning_config={"enabled": True, "effort": "high"},
|
||||||
|
supports_reasoning=True,
|
||||||
|
)
|
||||||
|
assert eb["reasoning"] == {"enabled": True, "effort": "high"}
|
||||||
|
|
||||||
|
def test_reasoning_disabled_still_passes(self):
|
||||||
|
"""OpenRouter passes disabled reasoning through (unlike Nous)."""
|
||||||
|
p = get_provider_profile("openrouter")
|
||||||
|
eb, _ = p.build_api_kwargs_extras(
|
||||||
|
reasoning_config={"enabled": False},
|
||||||
|
supports_reasoning=True,
|
||||||
|
)
|
||||||
|
assert eb["reasoning"] == {"enabled": False}
|
||||||
|
|
||||||
|
def test_default_reasoning(self):
|
||||||
|
p = get_provider_profile("openrouter")
|
||||||
|
eb, _ = p.build_api_kwargs_extras(supports_reasoning=True)
|
||||||
|
assert eb["reasoning"] == {"enabled": True, "effort": "medium"}
|
||||||
|
|
||||||
|
|
||||||
|
class TestNousProfile:
|
||||||
|
def test_tags(self):
|
||||||
|
p = get_provider_profile("nous")
|
||||||
|
body = p.build_extra_body()
|
||||||
|
assert body["tags"] == ["product=hermes-agent"]
|
||||||
|
|
||||||
|
def test_auth_type(self):
|
||||||
|
p = get_provider_profile("nous")
|
||||||
|
assert p.auth_type == "oauth_device_code"
|
||||||
|
|
||||||
|
def test_reasoning_enabled(self):
|
||||||
|
p = get_provider_profile("nous")
|
||||||
|
eb, _ = p.build_api_kwargs_extras(
|
||||||
|
reasoning_config={"enabled": True, "effort": "medium"},
|
||||||
|
supports_reasoning=True,
|
||||||
|
)
|
||||||
|
assert eb["reasoning"] == {"enabled": True, "effort": "medium"}
|
||||||
|
|
||||||
|
def test_reasoning_omitted_when_disabled(self):
|
||||||
|
p = get_provider_profile("nous")
|
||||||
|
eb, _ = p.build_api_kwargs_extras(
|
||||||
|
reasoning_config={"enabled": False},
|
||||||
|
supports_reasoning=True,
|
||||||
|
)
|
||||||
|
assert "reasoning" not in eb
|
||||||
|
|
||||||
|
|
||||||
|
class TestQwenProfile:
|
||||||
|
def test_max_tokens(self):
|
||||||
|
p = get_provider_profile("qwen-oauth")
|
||||||
|
assert p.default_max_tokens == 65536
|
||||||
|
|
||||||
|
def test_auth_type(self):
|
||||||
|
p = get_provider_profile("qwen-oauth")
|
||||||
|
assert p.auth_type == "oauth_external"
|
||||||
|
|
||||||
|
def test_extra_body_vl(self):
|
||||||
|
p = get_provider_profile("qwen-oauth")
|
||||||
|
body = p.build_extra_body()
|
||||||
|
assert body["vl_high_resolution_images"] is True
|
||||||
|
|
||||||
|
def test_prepare_messages_normalizes_content(self):
|
||||||
|
p = get_provider_profile("qwen-oauth")
|
||||||
|
msgs = [
|
||||||
|
{"role": "system", "content": "Be helpful"},
|
||||||
|
{"role": "user", "content": "hello"},
|
||||||
|
]
|
||||||
|
result = p.prepare_messages(msgs)
|
||||||
|
# System message: content normalized to list, cache_control on last part
|
||||||
|
assert isinstance(result[0]["content"], list)
|
||||||
|
assert result[0]["content"][-1].get("cache_control") == {"type": "ephemeral"}
|
||||||
|
assert result[0]["content"][-1]["text"] == "Be helpful"
|
||||||
|
# User message: content normalized to list
|
||||||
|
assert isinstance(result[1]["content"], list)
|
||||||
|
assert result[1]["content"][0]["text"] == "hello"
|
||||||
|
|
||||||
|
def test_metadata_top_level(self):
|
||||||
|
p = get_provider_profile("qwen-oauth")
|
||||||
|
meta = {"sessionId": "s123", "promptId": "p456"}
|
||||||
|
eb, tl = p.build_api_kwargs_extras(qwen_session_metadata=meta)
|
||||||
|
assert tl["metadata"] == meta
|
||||||
|
assert "metadata" not in eb
|
||||||
|
|
||||||
|
|
||||||
|
class TestBaseProfile:
|
||||||
|
def test_prepare_messages_passthrough(self):
|
||||||
|
p = ProviderProfile(name="test")
|
||||||
|
msgs = [{"role": "user", "content": "hi"}]
|
||||||
|
assert p.prepare_messages(msgs) is msgs
|
||||||
|
|
||||||
|
def test_build_extra_body_empty(self):
|
||||||
|
p = ProviderProfile(name="test")
|
||||||
|
assert p.build_extra_body() == {}
|
||||||
|
|
||||||
|
def test_build_api_kwargs_extras_empty(self):
|
||||||
|
p = ProviderProfile(name="test")
|
||||||
|
eb, tl = p.build_api_kwargs_extras()
|
||||||
|
assert eb == {}
|
||||||
|
assert tl == {}
|
||||||
258
tests/providers/test_transport_parity.py
Normal file
258
tests/providers/test_transport_parity.py
Normal file
|
|
@ -0,0 +1,258 @@
|
||||||
|
"""Parity tests: pin the exact current transport behavior per provider.
|
||||||
|
|
||||||
|
These tests document the flag-based contract between run_agent.py and
|
||||||
|
ChatCompletionsTransport.build_kwargs(). When the next PR wires profiles
|
||||||
|
to replace flags, every assertion here must still pass — any failure is
|
||||||
|
a behavioral regression.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from agent.transports.chat_completions import ChatCompletionsTransport
|
||||||
|
from providers import get_provider_profile
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def transport():
|
||||||
|
return ChatCompletionsTransport()
|
||||||
|
|
||||||
|
|
||||||
|
def _simple_messages():
|
||||||
|
return [{"role": "user", "content": "hello"}]
|
||||||
|
|
||||||
|
|
||||||
|
def _max_tokens_fn(n):
|
||||||
|
return {"max_completion_tokens": n}
|
||||||
|
|
||||||
|
|
||||||
|
class TestNvidiaParity:
|
||||||
|
"""NVIDIA NIM: default max_tokens=16384."""
|
||||||
|
|
||||||
|
def test_default_max_tokens(self, transport):
|
||||||
|
"""NVIDIA default max_tokens=16384 comes from profile, not legacy is_nvidia_nim flag."""
|
||||||
|
from providers import get_provider_profile
|
||||||
|
|
||||||
|
profile = get_provider_profile("nvidia")
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="nvidia/llama-3.1-nemotron-70b-instruct",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
max_tokens_param_fn=_max_tokens_fn,
|
||||||
|
provider_profile=profile,
|
||||||
|
)
|
||||||
|
assert kw["max_completion_tokens"] == 16384
|
||||||
|
|
||||||
|
def test_user_max_tokens_overrides(self, transport):
|
||||||
|
from providers import get_provider_profile
|
||||||
|
|
||||||
|
profile = get_provider_profile("nvidia")
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="nvidia/llama-3.1-nemotron-70b-instruct",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
max_tokens=4096,
|
||||||
|
max_tokens_param_fn=_max_tokens_fn,
|
||||||
|
provider_profile=profile,
|
||||||
|
)
|
||||||
|
assert kw["max_completion_tokens"] == 4096 # user overrides default
|
||||||
|
|
||||||
|
|
||||||
|
class TestKimiParity:
|
||||||
|
"""Kimi: OMIT temperature, max_tokens=32000, thinking + reasoning_effort."""
|
||||||
|
|
||||||
|
def test_temperature_omitted(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="kimi-k2",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"),
|
||||||
|
omit_temperature=True,
|
||||||
|
)
|
||||||
|
assert "temperature" not in kw
|
||||||
|
|
||||||
|
def test_default_max_tokens(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="kimi-k2",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"),
|
||||||
|
max_tokens_param_fn=_max_tokens_fn,
|
||||||
|
)
|
||||||
|
assert kw["max_completion_tokens"] == 32000
|
||||||
|
|
||||||
|
def test_thinking_enabled(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="kimi-k2",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"),
|
||||||
|
reasoning_config={"enabled": True, "effort": "high"},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["thinking"] == {"type": "enabled"}
|
||||||
|
|
||||||
|
def test_thinking_disabled(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="kimi-k2",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"),
|
||||||
|
reasoning_config={"enabled": False},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["thinking"] == {"type": "disabled"}
|
||||||
|
|
||||||
|
def test_reasoning_effort_top_level(self, transport):
|
||||||
|
"""Kimi reasoning_effort is a TOP-LEVEL api_kwargs key, NOT in extra_body."""
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="kimi-k2",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"),
|
||||||
|
reasoning_config={"enabled": True, "effort": "high"},
|
||||||
|
)
|
||||||
|
assert kw.get("reasoning_effort") == "high"
|
||||||
|
assert "reasoning_effort" not in kw.get("extra_body", {})
|
||||||
|
|
||||||
|
def test_reasoning_effort_default_medium(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="kimi-k2",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("kimi-coding"),
|
||||||
|
reasoning_config={"enabled": True},
|
||||||
|
)
|
||||||
|
assert kw.get("reasoning_effort") == "medium"
|
||||||
|
|
||||||
|
|
||||||
|
class TestOpenRouterParity:
|
||||||
|
"""OpenRouter: provider preferences, reasoning in extra_body."""
|
||||||
|
|
||||||
|
def test_provider_preferences(self, transport):
|
||||||
|
prefs = {"allow": ["anthropic"], "sort": "price"}
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="anthropic/claude-sonnet-4.6",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
provider_preferences=prefs,
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["provider"] == prefs
|
||||||
|
|
||||||
|
def test_reasoning_passes_full_config(self, transport):
|
||||||
|
"""OpenRouter passes the FULL reasoning_config dict, not just effort."""
|
||||||
|
rc = {"enabled": True, "effort": "high"}
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="anthropic/claude-sonnet-4.6",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
supports_reasoning=True,
|
||||||
|
reasoning_config=rc,
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["reasoning"] == rc
|
||||||
|
|
||||||
|
def test_default_reasoning_when_no_config(self, transport):
|
||||||
|
"""When supports_reasoning=True but no config, adds default."""
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="anthropic/claude-sonnet-4.6",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("openrouter"),
|
||||||
|
supports_reasoning=True,
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
|
||||||
|
|
||||||
|
|
||||||
|
class TestNousParity:
|
||||||
|
"""Nous: product tags, reasoning, omit when disabled."""
|
||||||
|
|
||||||
|
def test_tags(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="hermes-3-llama-3.1-405b",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("nous"),
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
|
||||||
|
|
||||||
|
def test_reasoning_omitted_when_disabled(self, transport):
|
||||||
|
"""Nous special case: reasoning omitted entirely when disabled."""
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="hermes-3-llama-3.1-405b",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("nous"),
|
||||||
|
supports_reasoning=True,
|
||||||
|
reasoning_config={"enabled": False},
|
||||||
|
)
|
||||||
|
assert "reasoning" not in kw.get("extra_body", {})
|
||||||
|
|
||||||
|
def test_reasoning_enabled(self, transport):
|
||||||
|
rc = {"enabled": True, "effort": "high"}
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="hermes-3-llama-3.1-405b",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("nous"),
|
||||||
|
supports_reasoning=True,
|
||||||
|
reasoning_config=rc,
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["reasoning"] == rc
|
||||||
|
|
||||||
|
|
||||||
|
class TestQwenParity:
|
||||||
|
"""Qwen: max_tokens=65536, vl_high_resolution, metadata top-level."""
|
||||||
|
|
||||||
|
def test_default_max_tokens(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="qwen3.5-plus",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("qwen-oauth"),
|
||||||
|
max_tokens_param_fn=_max_tokens_fn,
|
||||||
|
)
|
||||||
|
assert kw["max_completion_tokens"] == 65536
|
||||||
|
|
||||||
|
def test_vl_high_resolution(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="qwen3.5-plus",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("qwen-oauth"),
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["vl_high_resolution_images"] is True
|
||||||
|
|
||||||
|
def test_metadata_top_level(self, transport):
|
||||||
|
"""Qwen metadata goes to top-level api_kwargs, NOT extra_body."""
|
||||||
|
meta = {"sessionId": "s123", "promptId": "p456"}
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="qwen3.5-plus",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("qwen-oauth"),
|
||||||
|
qwen_session_metadata=meta,
|
||||||
|
)
|
||||||
|
assert kw["metadata"] == meta
|
||||||
|
assert "metadata" not in kw.get("extra_body", {})
|
||||||
|
|
||||||
|
|
||||||
|
class TestCustomOllamaParity:
|
||||||
|
"""Custom/Ollama: num_ctx, think=false — now tested via profile."""
|
||||||
|
|
||||||
|
def test_ollama_num_ctx(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="llama3.1",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("custom"),
|
||||||
|
ollama_num_ctx=131072,
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["options"]["num_ctx"] == 131072
|
||||||
|
|
||||||
|
def test_think_false_when_disabled(self, transport):
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="qwen3:72b",
|
||||||
|
messages=_simple_messages(),
|
||||||
|
tools=None,
|
||||||
|
provider_profile=get_provider_profile("custom"),
|
||||||
|
reasoning_config={"enabled": False, "effort": "none"},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["think"] is False
|
||||||
|
|
@ -1117,6 +1117,7 @@ class TestBuildApiKwargs:
|
||||||
assert "temperature" not in kwargs
|
assert "temperature" not in kwargs
|
||||||
|
|
||||||
def test_kimi_coding_endpoint_omits_temperature(self, agent):
|
def test_kimi_coding_endpoint_omits_temperature(self, agent):
|
||||||
|
agent.provider = "kimi-coding"
|
||||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
agent.model = "kimi-k2.5"
|
agent.model = "kimi-k2.5"
|
||||||
|
|
@ -1129,6 +1130,7 @@ class TestBuildApiKwargs:
|
||||||
def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||||
"""Kimi endpoint should send max_tokens=32000 and reasoning_effort as
|
"""Kimi endpoint should send max_tokens=32000 and reasoning_effort as
|
||||||
top-level params, matching Kimi CLI's default behavior."""
|
top-level params, matching Kimi CLI's default behavior."""
|
||||||
|
agent.provider = "kimi-coding"
|
||||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
agent.model = "kimi-for-coding"
|
agent.model = "kimi-for-coding"
|
||||||
|
|
@ -1141,6 +1143,7 @@ class TestBuildApiKwargs:
|
||||||
|
|
||||||
def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
|
def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
|
||||||
"""reasoning_effort should reflect reasoning_config.effort when set."""
|
"""reasoning_effort should reflect reasoning_config.effort when set."""
|
||||||
|
agent.provider = "kimi-coding"
|
||||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
agent.model = "kimi-for-coding"
|
agent.model = "kimi-for-coding"
|
||||||
|
|
@ -1154,6 +1157,7 @@ class TestBuildApiKwargs:
|
||||||
def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
|
def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
|
||||||
"""Kimi endpoint should send extra_body.thinking={"type":"enabled"}
|
"""Kimi endpoint should send extra_body.thinking={"type":"enabled"}
|
||||||
to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
|
to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
|
||||||
|
agent.provider = "kimi-coding"
|
||||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
agent.model = "kimi-for-coding"
|
agent.model = "kimi-for-coding"
|
||||||
|
|
@ -1167,6 +1171,7 @@ class TestBuildApiKwargs:
|
||||||
"""When reasoning_config.enabled=False, thinking should be disabled
|
"""When reasoning_config.enabled=False, thinking should be disabled
|
||||||
and reasoning_effort should be omitted entirely — mirroring Kimi
|
and reasoning_effort should be omitted entirely — mirroring Kimi
|
||||||
CLI's with_thinking("off") which maps to reasoning_effort=None."""
|
CLI's with_thinking("off") which maps to reasoning_effort=None."""
|
||||||
|
agent.provider = "kimi-coding"
|
||||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
agent.model = "kimi-for-coding"
|
agent.model = "kimi-for-coding"
|
||||||
|
|
@ -1180,6 +1185,7 @@ class TestBuildApiKwargs:
|
||||||
|
|
||||||
def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||||
"""api.moonshot.ai should get the same Kimi-compatible params."""
|
"""api.moonshot.ai should get the same Kimi-compatible params."""
|
||||||
|
agent.provider = "kimi-coding"
|
||||||
agent.base_url = "https://api.moonshot.ai/v1"
|
agent.base_url = "https://api.moonshot.ai/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
agent.model = "kimi-k2.5"
|
agent.model = "kimi-k2.5"
|
||||||
|
|
@ -1193,6 +1199,7 @@ class TestBuildApiKwargs:
|
||||||
|
|
||||||
def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||||
"""api.moonshot.cn (China endpoint) should get the same params."""
|
"""api.moonshot.cn (China endpoint) should get the same params."""
|
||||||
|
agent.provider = "kimi-coding-cn"
|
||||||
agent.base_url = "https://api.moonshot.cn/v1"
|
agent.base_url = "https://api.moonshot.cn/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
agent.model = "kimi-k2.5"
|
agent.model = "kimi-k2.5"
|
||||||
|
|
@ -1205,6 +1212,7 @@ class TestBuildApiKwargs:
|
||||||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||||
|
|
||||||
def test_provider_preferences_injected(self, agent):
|
def test_provider_preferences_injected(self, agent):
|
||||||
|
agent.provider = "openrouter"
|
||||||
agent.base_url = "https://openrouter.ai/api/v1"
|
agent.base_url = "https://openrouter.ai/api/v1"
|
||||||
agent.providers_allowed = ["Anthropic"]
|
agent.providers_allowed = ["Anthropic"]
|
||||||
messages = [{"role": "user", "content": "hi"}]
|
messages = [{"role": "user", "content": "hi"}]
|
||||||
|
|
@ -1213,6 +1221,7 @@ class TestBuildApiKwargs:
|
||||||
|
|
||||||
def test_reasoning_config_default_openrouter(self, agent):
|
def test_reasoning_config_default_openrouter(self, agent):
|
||||||
"""Default reasoning config for OpenRouter should be medium."""
|
"""Default reasoning config for OpenRouter should be medium."""
|
||||||
|
agent.provider = "openrouter"
|
||||||
agent.base_url = "https://openrouter.ai/api/v1"
|
agent.base_url = "https://openrouter.ai/api/v1"
|
||||||
agent.model = "anthropic/claude-sonnet-4-20250514"
|
agent.model = "anthropic/claude-sonnet-4-20250514"
|
||||||
messages = [{"role": "user", "content": "hi"}]
|
messages = [{"role": "user", "content": "hi"}]
|
||||||
|
|
@ -1222,6 +1231,7 @@ class TestBuildApiKwargs:
|
||||||
assert reasoning["effort"] == "medium"
|
assert reasoning["effort"] == "medium"
|
||||||
|
|
||||||
def test_reasoning_config_custom(self, agent):
|
def test_reasoning_config_custom(self, agent):
|
||||||
|
agent.provider = "openrouter"
|
||||||
agent.base_url = "https://openrouter.ai/api/v1"
|
agent.base_url = "https://openrouter.ai/api/v1"
|
||||||
agent.model = "anthropic/claude-sonnet-4-20250514"
|
agent.model = "anthropic/claude-sonnet-4-20250514"
|
||||||
agent.reasoning_config = {"enabled": False}
|
agent.reasoning_config = {"enabled": False}
|
||||||
|
|
@ -1237,6 +1247,7 @@ class TestBuildApiKwargs:
|
||||||
assert "reasoning" not in kwargs.get("extra_body", {})
|
assert "reasoning" not in kwargs.get("extra_body", {})
|
||||||
|
|
||||||
def test_reasoning_sent_for_supported_openrouter_model(self, agent):
|
def test_reasoning_sent_for_supported_openrouter_model(self, agent):
|
||||||
|
agent.provider = "openrouter"
|
||||||
agent.base_url = "https://openrouter.ai/api/v1"
|
agent.base_url = "https://openrouter.ai/api/v1"
|
||||||
agent.model = "qwen/qwen3.5-plus-02-15"
|
agent.model = "qwen/qwen3.5-plus-02-15"
|
||||||
messages = [{"role": "user", "content": "hi"}]
|
messages = [{"role": "user", "content": "hi"}]
|
||||||
|
|
@ -1244,6 +1255,7 @@ class TestBuildApiKwargs:
|
||||||
assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
|
assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
|
||||||
|
|
||||||
def test_reasoning_sent_for_nous_route(self, agent):
|
def test_reasoning_sent_for_nous_route(self, agent):
|
||||||
|
agent.provider = "nous"
|
||||||
agent.base_url = "https://inference-api.nousresearch.com/v1"
|
agent.base_url = "https://inference-api.nousresearch.com/v1"
|
||||||
agent.model = "minimax/minimax-m2.5"
|
agent.model = "minimax/minimax-m2.5"
|
||||||
messages = [{"role": "user", "content": "hi"}]
|
messages = [{"role": "user", "content": "hi"}]
|
||||||
|
|
@ -1251,18 +1263,38 @@ class TestBuildApiKwargs:
|
||||||
assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
|
assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
|
||||||
|
|
||||||
def test_reasoning_sent_for_copilot_gpt5(self, agent):
|
def test_reasoning_sent_for_copilot_gpt5(self, agent):
|
||||||
agent.base_url = "https://api.githubcopilot.com"
|
"""Copilot/GitHub Models: GPT-5 reasoning goes in extra_body.reasoning."""
|
||||||
agent.model = "gpt-5.4"
|
from agent.transports import get_transport
|
||||||
messages = [{"role": "user", "content": "hi"}]
|
from providers import get_provider_profile
|
||||||
kwargs = agent._build_api_kwargs(messages)
|
|
||||||
|
transport = get_transport("chat_completions")
|
||||||
|
profile = get_provider_profile("copilot")
|
||||||
|
msgs = [{"role": "user", "content": "hi"}]
|
||||||
|
kwargs = transport.build_kwargs(
|
||||||
|
model="gpt-5.4",
|
||||||
|
messages=msgs,
|
||||||
|
tools=None,
|
||||||
|
supports_reasoning=True,
|
||||||
|
provider_profile=profile,
|
||||||
|
)
|
||||||
assert kwargs["extra_body"]["reasoning"] == {"effort": "medium"}
|
assert kwargs["extra_body"]["reasoning"] == {"effort": "medium"}
|
||||||
|
|
||||||
def test_reasoning_xhigh_normalized_for_copilot(self, agent):
|
def test_reasoning_xhigh_normalized_for_copilot(self, agent):
|
||||||
agent.base_url = "https://api.githubcopilot.com"
|
"""xhigh effort should normalize to high for Copilot GitHub Models."""
|
||||||
agent.model = "gpt-5.4"
|
from agent.transports import get_transport
|
||||||
agent.reasoning_config = {"enabled": True, "effort": "xhigh"}
|
from providers import get_provider_profile
|
||||||
messages = [{"role": "user", "content": "hi"}]
|
|
||||||
kwargs = agent._build_api_kwargs(messages)
|
transport = get_transport("chat_completions")
|
||||||
|
profile = get_provider_profile("copilot")
|
||||||
|
msgs = [{"role": "user", "content": "hi"}]
|
||||||
|
kwargs = transport.build_kwargs(
|
||||||
|
model="gpt-5.4",
|
||||||
|
messages=msgs,
|
||||||
|
tools=None,
|
||||||
|
supports_reasoning=True,
|
||||||
|
reasoning_config={"enabled": True, "effort": "xhigh"},
|
||||||
|
provider_profile=profile,
|
||||||
|
)
|
||||||
assert kwargs["extra_body"]["reasoning"] == {"effort": "high"}
|
assert kwargs["extra_body"]["reasoning"] == {"effort": "high"}
|
||||||
|
|
||||||
def test_reasoning_omitted_for_non_reasoning_copilot_model(self, agent):
|
def test_reasoning_omitted_for_non_reasoning_copilot_model(self, agent):
|
||||||
|
|
@ -1280,6 +1312,7 @@ class TestBuildApiKwargs:
|
||||||
|
|
||||||
|
|
||||||
def test_qwen_portal_formats_messages_and_metadata(self, agent):
|
def test_qwen_portal_formats_messages_and_metadata(self, agent):
|
||||||
|
agent.provider = "qwen-oauth"
|
||||||
agent.base_url = "https://portal.qwen.ai/v1"
|
agent.base_url = "https://portal.qwen.ai/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
agent.session_id = "sess-123"
|
agent.session_id = "sess-123"
|
||||||
|
|
@ -1296,6 +1329,7 @@ class TestBuildApiKwargs:
|
||||||
assert kwargs["messages"][2]["content"][0]["text"] == "hi"
|
assert kwargs["messages"][2]["content"][0]["text"] == "hi"
|
||||||
|
|
||||||
def test_qwen_portal_normalizes_bare_string_content_parts(self, agent):
|
def test_qwen_portal_normalizes_bare_string_content_parts(self, agent):
|
||||||
|
agent.provider = "qwen-oauth"
|
||||||
agent.base_url = "https://portal.qwen.ai/v1"
|
agent.base_url = "https://portal.qwen.ai/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
messages = [
|
messages = [
|
||||||
|
|
@ -1308,6 +1342,7 @@ class TestBuildApiKwargs:
|
||||||
assert user_content[1] == {"type": "text", "text": "world"}
|
assert user_content[1] == {"type": "text", "text": "world"}
|
||||||
|
|
||||||
def test_qwen_portal_no_system_message(self, agent):
|
def test_qwen_portal_no_system_message(self, agent):
|
||||||
|
agent.provider = "qwen-oauth"
|
||||||
agent.base_url = "https://portal.qwen.ai/v1"
|
agent.base_url = "https://portal.qwen.ai/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
messages = [{"role": "user", "content": "hi"}]
|
messages = [{"role": "user", "content": "hi"}]
|
||||||
|
|
@ -1328,6 +1363,7 @@ class TestBuildApiKwargs:
|
||||||
def test_qwen_portal_default_max_tokens(self, agent):
|
def test_qwen_portal_default_max_tokens(self, agent):
|
||||||
"""When max_tokens is None, Qwen Portal gets a default of 65536
|
"""When max_tokens is None, Qwen Portal gets a default of 65536
|
||||||
to prevent reasoning models from exhausting their output budget."""
|
to prevent reasoning models from exhausting their output budget."""
|
||||||
|
agent.provider = "qwen-oauth"
|
||||||
agent.base_url = "https://portal.qwen.ai/v1"
|
agent.base_url = "https://portal.qwen.ai/v1"
|
||||||
agent._base_url_lower = agent.base_url.lower()
|
agent._base_url_lower = agent.base_url.lower()
|
||||||
agent.max_tokens = None
|
agent.max_tokens = None
|
||||||
|
|
|
||||||
|
|
@ -93,6 +93,42 @@ This path includes everything from Path A plus:
|
||||||
11. `run_agent.py`
|
11. `run_agent.py`
|
||||||
12. `pyproject.toml` if a provider SDK is required
|
12. `pyproject.toml` if a provider SDK is required
|
||||||
|
|
||||||
|
## Fast path: Simple API-key providers
|
||||||
|
|
||||||
|
If your provider is just an OpenAI-compatible endpoint that authenticates with a single API key, you do not need to touch `auth.py`, `runtime_provider.py`, `main.py`, or any of the other files in the full checklist below.
|
||||||
|
|
||||||
|
All you need is:
|
||||||
|
|
||||||
|
1. A file in `providers/` (e.g. `providers/myprovider.py`) that calls `register_provider()` with the provider config.
|
||||||
|
2. That's it. `auth.py` auto-registers every file in `providers/` at startup via a module-level import sweep.
|
||||||
|
|
||||||
|
When you add a `providers/*.py` file and call `register_provider()`, the following wire up automatically:
|
||||||
|
|
||||||
|
1. `PROVIDER_REGISTRY` entry in `auth.py` (credential resolution, env-var lookup)
|
||||||
|
2. `api_mode` set to `chat_completions`
|
||||||
|
3. `base_url` sourced from the config or the declared env var
|
||||||
|
4. `env_vars` checked in priority order for the API key
|
||||||
|
5. `fallback_models` list registered for the provider
|
||||||
|
6. `--provider` CLI flag accepts the provider id
|
||||||
|
7. `hermes model` menu includes the provider
|
||||||
|
8. `hermes setup` wizard delegates to `main.py` automatically
|
||||||
|
9. `provider:model` alias syntax works
|
||||||
|
10. Runtime resolver returns the correct `base_url` and `api_key`
|
||||||
|
11. `HERMES_INFERENCE_PROVIDER` env-var override accepts the provider id
|
||||||
|
12. Fallback model activation can switch into the provider cleanly
|
||||||
|
|
||||||
|
See `providers/nvidia.py` or `providers/gmi.py` as a template.
|
||||||
|
|
||||||
|
## Full path: OAuth and complex providers
|
||||||
|
|
||||||
|
Use the full checklist below when your provider needs any of the following:
|
||||||
|
|
||||||
|
- OAuth or token refresh (Nous Portal, Codex, Google Gemini, Qwen Portal, Copilot)
|
||||||
|
- A non-OpenAI API shape that requires a new adapter (Anthropic Messages, Codex Responses)
|
||||||
|
- Custom endpoint detection or multi-region probing (z.ai, Kimi)
|
||||||
|
- A curated static model catalog or live `/models` fetch
|
||||||
|
- Provider-specific `hermes model` menu entries with bespoke auth flows
|
||||||
|
|
||||||
## Step 1: Pick one canonical provider id
|
## Step 1: Pick one canonical provider id
|
||||||
|
|
||||||
Choose a single provider id and use it everywhere.
|
Choose a single provider id and use it everywhere.
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,9 @@ Primary implementation:
|
||||||
- `hermes_cli/auth.py` — provider registry, `resolve_provider()`
|
- `hermes_cli/auth.py` — provider registry, `resolve_provider()`
|
||||||
- `hermes_cli/model_switch.py` — shared `/model` switch pipeline (CLI + gateway)
|
- `hermes_cli/model_switch.py` — shared `/model` switch pipeline (CLI + gateway)
|
||||||
- `agent/auxiliary_client.py` — auxiliary model routing
|
- `agent/auxiliary_client.py` — auxiliary model routing
|
||||||
|
- `providers/` — declarative source for `api_mode`, `base_url`, `env_vars`, `fallback_models` (auto-registered into `auth.py` `PROVIDER_REGISTRY` at startup)
|
||||||
|
|
||||||
|
`get_provider_profile()` in `providers/` returns a typed dict for a given provider id. `runtime_provider.py` calls this at resolution time to get the canonical `base_url`, `env_vars` priority list, `api_mode`, and `fallback_models` without needing to duplicate that data in multiple files. Adding a new `providers/*.py` file that calls `register_provider()` is enough for `runtime_provider.py` to pick it up — no branch needed in the resolver itself.
|
||||||
|
|
||||||
If you are trying to add a new first-class inference provider, read [Adding Providers](./adding-providers.md) alongside this page.
|
If you are trying to add a new first-class inference provider, read [Adding Providers](./adding-providers.md) alongside this page.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -480,6 +480,44 @@ model:
|
||||||
For on-prem deployments (DGX Spark, local GPU), set `NVIDIA_BASE_URL=http://localhost:8000/v1`. NIM exposes the same OpenAI-compatible chat completions API as build.nvidia.com, so switching between cloud and local is a one-line env-var change.
|
For on-prem deployments (DGX Spark, local GPU), set `NVIDIA_BASE_URL=http://localhost:8000/v1`. NIM exposes the same OpenAI-compatible chat completions API as build.nvidia.com, so switching between cloud and local is a one-line env-var change.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
### GMI Cloud
|
||||||
|
|
||||||
|
Open and reasoning models via [GMI Cloud](https://inference.gmi.ai) — OpenAI-compatible API, API key authentication.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# GMI Cloud
|
||||||
|
hermes chat --provider gmi --model deepseek-ai/DeepSeek-R1
|
||||||
|
# Requires: GMI_API_KEY in ~/.hermes/.env
|
||||||
|
```
|
||||||
|
|
||||||
|
Or set it permanently in `config.yaml`:
|
||||||
|
```yaml
|
||||||
|
model:
|
||||||
|
provider: "gmi"
|
||||||
|
default: "deepseek-ai/DeepSeek-R1"
|
||||||
|
```
|
||||||
|
|
||||||
|
The base URL can be overridden with `GMI_BASE_URL` (default: `https://api.gmi.ai/v1`).
|
||||||
|
|
||||||
|
### StepFun
|
||||||
|
|
||||||
|
Step-series models via [StepFun](https://platform.stepfun.com) — OpenAI-compatible API, API key authentication.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# StepFun
|
||||||
|
hermes chat --provider stepfun --model step-3-mini
|
||||||
|
# Requires: STEPFUN_API_KEY in ~/.hermes/.env
|
||||||
|
```
|
||||||
|
|
||||||
|
Or set it permanently in `config.yaml`:
|
||||||
|
```yaml
|
||||||
|
model:
|
||||||
|
provider: "stepfun"
|
||||||
|
default: "step-3-mini"
|
||||||
|
```
|
||||||
|
|
||||||
|
The base URL can be overridden with `STEPFUN_BASE_URL` (default: `https://api.stepfun.com/v1`).
|
||||||
|
|
||||||
### Hugging Face Inference Providers
|
### Hugging Face Inference Providers
|
||||||
|
|
||||||
[Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover.
|
[Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover.
|
||||||
|
|
@ -1239,7 +1277,7 @@ fallback_model:
|
||||||
|
|
||||||
When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session.
|
When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session.
|
||||||
|
|
||||||
Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `tencent-tokenhub`, `custom`.
|
Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `tencent-tokenhub`, `custom`.
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
|
Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
|
||||||
|
|
|
||||||
|
|
@ -69,6 +69,10 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
||||||
| `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL |
|
| `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL |
|
||||||
| `NVIDIA_API_KEY` | NVIDIA NIM API key — Nemotron and open models ([build.nvidia.com](https://build.nvidia.com)) |
|
| `NVIDIA_API_KEY` | NVIDIA NIM API key — Nemotron and open models ([build.nvidia.com](https://build.nvidia.com)) |
|
||||||
| `NVIDIA_BASE_URL` | Override NVIDIA base URL (default: `https://integrate.api.nvidia.com/v1`; set to `http://localhost:8000/v1` for a local NIM endpoint) |
|
| `NVIDIA_BASE_URL` | Override NVIDIA base URL (default: `https://integrate.api.nvidia.com/v1`; set to `http://localhost:8000/v1` for a local NIM endpoint) |
|
||||||
|
| `GMI_API_KEY` | GMI Cloud API key — open and reasoning models ([inference.gmi.ai](https://inference.gmi.ai)) |
|
||||||
|
| `GMI_BASE_URL` | Override GMI Cloud base URL (default: `https://api.gmi.ai/v1`) |
|
||||||
|
| `STEPFUN_API_KEY` | StepFun API key — Step-series models ([platform.stepfun.com](https://platform.stepfun.com)) |
|
||||||
|
| `STEPFUN_BASE_URL` | Override StepFun base URL (default: `https://api.stepfun.com/v1`) |
|
||||||
| `OLLAMA_API_KEY` | Ollama Cloud API key — managed Ollama catalog without local GPU ([ollama.com/settings/keys](https://ollama.com/settings/keys)) |
|
| `OLLAMA_API_KEY` | Ollama Cloud API key — managed Ollama catalog without local GPU ([ollama.com/settings/keys](https://ollama.com/settings/keys)) |
|
||||||
| `OLLAMA_BASE_URL` | Override Ollama Cloud base URL (default: `https://ollama.com/v1`) |
|
| `OLLAMA_BASE_URL` | Override Ollama Cloud base URL (default: `https://ollama.com/v1`) |
|
||||||
| `XAI_API_KEY` | xAI (Grok) API key for chat + TTS ([console.x.ai](https://console.x.ai/)) |
|
| `XAI_API_KEY` | xAI (Grok) API key for chat + TTS ([console.x.ai](https://console.x.ai/)) |
|
||||||
|
|
@ -99,7 +103,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||||
|
|
||||||
| Variable | Description |
|
| Variable | Description |
|
||||||
|----------|-------------|
|
|----------|-------------|
|
||||||
| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth` (browser OAuth login — no API key required; see [MiniMax OAuth guide](../guides/minimax-oauth.md)), `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `tencent-tokenhub` (default: `auto`) |
|
| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth` (browser OAuth login — no API key required; see [MiniMax OAuth guide](../guides/minimax-oauth.md)), `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `tencent-tokenhub` (default: `auto`) |
|
||||||
| `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
|
| `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
|
||||||
| `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
|
| `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
|
||||||
| `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
|
| `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,8 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
|
||||||
| MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` |
|
| MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` |
|
||||||
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
|
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
|
||||||
| NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
|
| NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
|
||||||
|
| GMI Cloud | `gmi` | `GMI_API_KEY` (optional: `GMI_BASE_URL`) |
|
||||||
|
| StepFun | `stepfun` | `STEPFUN_API_KEY` (optional: `STEPFUN_BASE_URL`) |
|
||||||
| Ollama Cloud | `ollama-cloud` | `OLLAMA_API_KEY` |
|
| Ollama Cloud | `ollama-cloud` | `OLLAMA_API_KEY` |
|
||||||
| Google Gemini (OAuth) | `google-gemini-cli` | `hermes model` (Google OAuth; optional: `HERMES_GEMINI_PROJECT_ID`) |
|
| Google Gemini (OAuth) | `google-gemini-cli` | `hermes model` (Google OAuth; optional: `HERMES_GEMINI_PROJECT_ID`) |
|
||||||
| Google AI Studio | `gemini` | `GOOGLE_API_KEY` (alias: `GEMINI_API_KEY`) |
|
| Google AI Studio | `gemini` | `GOOGLE_API_KEY` (alias: `GEMINI_API_KEY`) |
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue