mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-14 04:02:26 +00:00
feat: provider modules — ProviderProfile ABC, 33 providers, fetch_models, transport single-path
Introduces providers/ package — single source of truth for every inference provider. Adding a simple api-key provider now requires one providers/<name>.py file with zero edits anywhere else. What this PR ships: - providers/ package (ProviderProfile ABC + 33 profiles across 4 api_modes) - ProviderProfile declarative fields: name, api_mode, aliases, display_name, env_vars, base_url, models_url, auth_type, fallback_models, hostname, default_headers, fixed_temperature, default_max_tokens, default_aux_model - 4 overridable hooks: prepare_messages, build_extra_body, build_api_kwargs_extras, fetch_models - chat_completions.build_kwargs: profile path via _build_kwargs_from_profile, legacy flag path retained for lmstudio/tencent-tokenhub (which have session-aware reasoning probing that doesn't map cleanly to hooks yet) - run_agent.py: profile path for all registered providers; legacy path variable scoping fixed (all flags defined before branching) - Auto-wires: auth.PROVIDER_REGISTRY, models.CANONICAL_PROVIDERS, doctor health checks, config.OPTIONAL_ENV_VARS, model_metadata._URL_TO_PROVIDER - GeminiProfile: thinking_config translation (native + openai-compat nested) - New tests/providers/ (79 tests covering profile declarations, transport parity, hook overrides, e2e kwargs assembly) Deltas vs original PR (salvaged onto current main): - Added profiles: alibaba-coding-plan, azure-foundry, minimax-oauth (were added to main since original PR) - Skipped profiles: lmstudio, tencent-tokenhub stay on legacy path (their reasoning_effort probing has no clean hook equivalent yet) - Removed lmstudio alias from custom profile (it's a separate provider now) - Skipped openrouter/custom from PROVIDER_REGISTRY auto-extension (resolve_provider special-cases them; adding breaks runtime resolution) - runtime_provider: profile.api_mode only as fallback when URL detection finds nothing (was breaking minimax /v1 override) - Preserved main's legacy-path improvements: deepseek reasoning_content preserve, gemini Gemma skip, OpenRouter response caching, Anthropic 1M beta recovery, etc. - Kept agent/copilot_acp_client.py in place (rejected PR's relocation — main has 7 fixes landed since; relocation would revert them) - _API_KEY_PROVIDER_AUX_MODELS alias kept for backward compat with existing test imports Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Closes #14418
This commit is contained in:
parent
2b500ed68a
commit
20a4f79ed1
57 changed files with 3149 additions and 177 deletions
307
providers/README.md
Normal file
307
providers/README.md
Normal file
|
|
@ -0,0 +1,307 @@
|
|||
# providers/
|
||||
|
||||
Single source of truth for every inference provider Hermes knows about.
|
||||
|
||||
Each provider is declared once here as a `ProviderProfile`. Every other layer —
|
||||
auth resolution, transport kwargs, model listing, runtime routing — reads from
|
||||
these profiles instead of maintaining its own parallel data.
|
||||
|
||||
---
|
||||
|
||||
## Directory layout
|
||||
|
||||
```
|
||||
providers/
|
||||
├── base.py ProviderProfile dataclass + OMIT_TEMPERATURE sentinel
|
||||
├── __init__.py Registry: register_provider(), get_provider_profile()
|
||||
├── README.md This file
|
||||
│
|
||||
├── # Simple providers — just identity + auth + endpoint
|
||||
├── alibaba.py Alibaba Cloud DashScope
|
||||
├── arcee.py Arcee AI
|
||||
├── bedrock.py AWS Bedrock (api_mode=bedrock_converse)
|
||||
├── deepseek.py DeepSeek
|
||||
├── huggingface.py Hugging Face Inference API
|
||||
├── kilocode.py Kilo Code
|
||||
├── minimax.py MiniMax (international + CN)
|
||||
├── nvidia.py NVIDIA NIM (default_max_tokens=16384)
|
||||
├── ollama_cloud.py Ollama Cloud
|
||||
├── stepfun.py StepFun
|
||||
├── xiaomi.py Xiaomi MiMo
|
||||
├── xai.py xAI Grok (api_mode=codex_responses)
|
||||
├── zai.py Z.AI / GLM
|
||||
│
|
||||
├── # Medium — one or two quirks
|
||||
├── anthropic.py Native Anthropic (x-api-key header, api_mode=anthropic_messages)
|
||||
├── copilot.py GitHub Copilot (auth_type=copilot, reasoning per model)
|
||||
├── copilot_acp.py Copilot ACP subprocess (api_mode=copilot_acp)
|
||||
├── custom.py Custom/Ollama local (think=false, num_ctx)
|
||||
├── gemini.py Google Gemini AI Studio + Cloud Code OAuth
|
||||
├── kimi.py Kimi Coding (OMIT_TEMPERATURE, thinking, dual endpoint)
|
||||
├── openai_codex.py OpenAI Codex OAuth (api_mode=codex_responses)
|
||||
├── opencode.py OpenCode Zen + Go (per-model api_mode routing)
|
||||
│
|
||||
├── # Complex — subclasses with multiple overrides
|
||||
├── nous.py Nous Portal (tags, attribution, reasoning omit-when-disabled)
|
||||
├── openrouter.py OpenRouter (provider preferences, public model fetch)
|
||||
├── qwen.py Qwen OAuth (message normalization, cache_control, vl_hires)
|
||||
└── vercel.py Vercel AI Gateway (attribution headers, reasoning passthrough)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ProviderProfile fields
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ProviderProfile:
|
||||
# Identity
|
||||
name: str # canonical ID — auto-registered as PROVIDER_REGISTRY key for new api-key providers
|
||||
api_mode: str # "chat_completions" | "anthropic_messages" |
|
||||
# "codex_responses" | "bedrock_converse" | "copilot_acp"
|
||||
aliases: tuple # alternate names resolved by get_provider_profile()
|
||||
|
||||
# Auth & endpoints
|
||||
env_vars: tuple # env var names holding the API key, in priority order
|
||||
base_url: str # default inference endpoint
|
||||
models_url: str # explicit models endpoint; falls back to {base_url}/models
|
||||
# set when the models catalog lives at a different URL
|
||||
# (e.g. OpenRouter: public /api/v1/models vs /api/v1 inference)
|
||||
auth_type: str # "api_key" | "oauth_device_code" | "oauth_external" |
|
||||
# "copilot" | "aws" | "external_process"
|
||||
|
||||
# Client-level quirks
|
||||
default_headers: dict # extra HTTP headers sent on every request
|
||||
|
||||
# Request-level quirks
|
||||
fixed_temperature: Any # None = use caller's default; OMIT_TEMPERATURE = don't send
|
||||
default_max_tokens: int|None # inject max_tokens when caller omits it
|
||||
default_aux_model: str # cheap model for auxiliary tasks (compression, vision, etc.)
|
||||
# empty string = use main model (default)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Hooks (override in a subclass)
|
||||
|
||||
| Method | When to override |
|
||||
|--------|-----------------|
|
||||
| `prepare_messages(messages)` | Provider needs message pre-processing (Qwen: string → list-of-parts, cache_control) |
|
||||
| `build_extra_body(*, session_id, **ctx)` | Provider-specific `extra_body` fields (Nous: tags, OpenRouter: provider preferences) |
|
||||
| `build_api_kwargs_extras(*, reasoning_config, **ctx)` | Returns `(extra_body_additions, top_level_kwargs)` — use when some fields go to `extra_body` and some go top-level (Kimi: `reasoning_effort` top-level; OpenRouter: `reasoning` in extra_body) |
|
||||
| `fetch_models(*, api_key, timeout)` | Custom model listing (Anthropic: x-api-key header; OpenRouter: public endpoint, no auth; Bedrock/copilot-acp: return None) |
|
||||
|
||||
All hooks have safe defaults — only override what differs from the base.
|
||||
|
||||
---
|
||||
|
||||
## How to add a new provider
|
||||
|
||||
### 1. Simple (standard OpenAI-compatible endpoint)
|
||||
|
||||
```python
|
||||
# providers/myprovider.py
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
myprovider = ProviderProfile(
|
||||
name="myprovider", # must match id in hermes_cli/auth.py PROVIDER_REGISTRY
|
||||
aliases=("my-provider", "myp"),
|
||||
api_mode="chat_completions",
|
||||
env_vars=("MYPROVIDER_API_KEY",),
|
||||
base_url="https://api.myprovider.com/v1",
|
||||
auth_type="api_key",
|
||||
)
|
||||
|
||||
register_provider(myprovider)
|
||||
```
|
||||
|
||||
The default `fetch_models()` will call `GET https://api.myprovider.com/v1/models`
|
||||
with Bearer auth automatically. No override needed for standard `/v1/models`.
|
||||
|
||||
### 2. With quirks (subclass)
|
||||
|
||||
```python
|
||||
# providers/myprovider.py
|
||||
from typing import Any
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
|
||||
class MyProviderProfile(ProviderProfile):
|
||||
"""My provider — custom reasoning header."""
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self,
|
||||
*,
|
||||
reasoning_config: dict | None = None,
|
||||
**ctx: Any,
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
extra_body: dict[str, Any] = {}
|
||||
if reasoning_config:
|
||||
extra_body["my_reasoning"] = reasoning_config.get("effort", "medium")
|
||||
return extra_body, {}
|
||||
|
||||
def fetch_models(
|
||||
self,
|
||||
*,
|
||||
api_key: str | None = None,
|
||||
timeout: float = 8.0,
|
||||
) -> list[str] | None:
|
||||
# Override only if your endpoint differs from standard /v1/models
|
||||
return super().fetch_models(api_key=api_key, timeout=timeout)
|
||||
|
||||
|
||||
myprovider = MyProviderProfile(
|
||||
name="myprovider",
|
||||
aliases=("myp",),
|
||||
env_vars=("MYPROVIDER_API_KEY",),
|
||||
base_url="https://api.myprovider.com/v1",
|
||||
)
|
||||
|
||||
register_provider(myprovider)
|
||||
```
|
||||
|
||||
### 3. Wire it up
|
||||
|
||||
After creating the file, add `name` to the `_PROFILE_ACTIVE_PROVIDERS` set in
|
||||
`run_agent.py` once you've verified parity against the legacy flag path. Start
|
||||
with a simple provider (no message prep, no reasoning quirks) and work up.
|
||||
|
||||
---
|
||||
|
||||
## fetch_models contract
|
||||
|
||||
```python
|
||||
def fetch_models(
|
||||
self,
|
||||
*,
|
||||
api_key: str | None = None,
|
||||
timeout: float = 8.0,
|
||||
) -> list[str] | None:
|
||||
...
|
||||
```
|
||||
|
||||
- Returns `list[str]`: model IDs from the provider's live endpoint.
|
||||
- Returns `None`: provider doesn't support REST model listing (Bedrock, copilot-acp),
|
||||
or the request failed. Callers **must** fall back to `_PROVIDER_MODELS` on `None`.
|
||||
- Never raises — swallow exceptions and return `None`.
|
||||
- Default implementation: `GET {base_url}/models` with Bearer auth. Works for any
|
||||
standard OpenAI-compatible provider.
|
||||
|
||||
**Override when:**
|
||||
- Auth header is not `Bearer` (Anthropic: `x-api-key`)
|
||||
- Endpoint path differs from `/models` AND you can't just set `models_url` (OpenRouter: public endpoint, pass `api_key=None` explicitly)
|
||||
- Response format differs (extra wrapping, non-standard `id` field)
|
||||
- Provider has no REST endpoint (Bedrock, copilot-acp → return `None`)
|
||||
- Filtering needed post-fetch (only tool-capable models, etc.)
|
||||
|
||||
Use `models_url` instead of overriding when the only difference is the URL:
|
||||
|
||||
```python
|
||||
# No subclass needed — just set models_url
|
||||
myprovider = ProviderProfile(
|
||||
name="myprovider",
|
||||
base_url="https://api.myprovider.com/v1",
|
||||
models_url="https://catalog.myprovider.com/models", # different host
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Debugging
|
||||
|
||||
### Check if a provider resolves
|
||||
|
||||
```python
|
||||
from providers import get_provider_profile
|
||||
|
||||
p = get_provider_profile("myprovider")
|
||||
print(p) # ProviderProfile(name='myprovider', ...)
|
||||
print(p.base_url)
|
||||
print(p.api_mode)
|
||||
```
|
||||
|
||||
### Check all registered providers
|
||||
|
||||
```python
|
||||
from providers import _REGISTRY
|
||||
print(list(_REGISTRY.keys()))
|
||||
```
|
||||
|
||||
### Test live model fetch
|
||||
|
||||
```python
|
||||
import os
|
||||
from providers import get_provider_profile
|
||||
|
||||
p = get_provider_profile("myprovider")
|
||||
key = os.getenv("MYPROVIDER_API_KEY")
|
||||
models = p.fetch_models(api_key=key, timeout=5.0)
|
||||
print(models) # list of model IDs, or None on failure
|
||||
```
|
||||
|
||||
### Test alias resolution
|
||||
|
||||
```python
|
||||
from providers import get_provider_profile
|
||||
|
||||
# All of these should return the same profile
|
||||
assert get_provider_profile("openrouter").name == "openrouter"
|
||||
assert get_provider_profile("or").name == "openrouter"
|
||||
```
|
||||
|
||||
### Run the provider test suite
|
||||
|
||||
```bash
|
||||
# From the repo root
|
||||
source venv/bin/activate
|
||||
python -m pytest tests/providers/ -v
|
||||
```
|
||||
|
||||
### Check ruff + ty compliance
|
||||
|
||||
```bash
|
||||
source venv/bin/activate
|
||||
ruff format providers/*.py
|
||||
ruff check providers/*.py --select UP,E,F,I,W
|
||||
ty check providers/*.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common mistakes
|
||||
|
||||
**Wrong `name`** — must be the same string that appears as the key in
|
||||
`hermes_cli/auth.py` `PROVIDER_REGISTRY`. New api-key providers auto-register
|
||||
into `PROVIDER_REGISTRY` from the profile, so the name IS the key. For providers
|
||||
with a pre-existing `PROVIDER_REGISTRY` entry, use the exact `id` field value.
|
||||
|
||||
**Wrong `env_vars`** — separate API-key vars from base-URL override vars in the
|
||||
tuple. Env vars that end with `_BASE_URL` or `_URL` are treated as URL overrides;
|
||||
everything else is treated as an API key. Getting this wrong causes the doctor
|
||||
health check to send a URL string as a Bearer token.
|
||||
|
||||
**Wrong `base_url`** — several providers have non-obvious paths:
|
||||
`stepfun: /step_plan/v1`, `opencode-go: /zen/go/v1`. The profile's `base_url`
|
||||
is also used as the `inference_base_url` when auto-registering into `PROVIDER_REGISTRY`
|
||||
for new providers, so it must be correct for auth resolution to work.
|
||||
|
||||
**Skipping `api_mode`** — defaults to `chat_completions`. Providers that use
|
||||
`anthropic_messages`, `codex_responses`, `bedrock_converse`, or `copilot_acp`
|
||||
must set it explicitly.
|
||||
|
||||
**Forgetting `register_provider()`** — auto-discovery runs `pkgutil.iter_modules`
|
||||
over the package and imports each module, but only if `register_provider()` is
|
||||
called at module level. Without it the profile is never in `_REGISTRY`.
|
||||
|
||||
**`fetch_models` returning the wrong shape** — must return `list[str]` (plain
|
||||
model IDs), not `list[tuple]` or `list[dict]`. Callers expect plain strings.
|
||||
|
||||
**Wrong `build_api_kwargs_extras` return shape** — must return a 2-tuple
|
||||
`(extra_body_dict, top_level_dict)`. Returning a single dict causes a
|
||||
`ValueError: not enough values to unpack` in the transport.
|
||||
|
||||
**`build_api_kwargs_extras` wrong tuple** — must return `(extra_body_dict,
|
||||
top_level_dict)`. Returning a flat dict or swapping the order silently sends
|
||||
fields to the wrong place.
|
||||
76
providers/__init__.py
Normal file
76
providers/__init__.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
"""Provider module registry.
|
||||
|
||||
Auto-discovers ProviderProfile instances from providers/*.py modules.
|
||||
Each module should define a module-level PROVIDER or PROVIDERS list.
|
||||
|
||||
Usage:
|
||||
from providers import get_provider_profile
|
||||
profile = get_provider_profile("nvidia") # returns ProviderProfile or None
|
||||
profile = get_provider_profile("kimi") # checks name + aliases
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from providers.base import OMIT_TEMPERATURE, ProviderProfile # noqa: F401
|
||||
|
||||
_REGISTRY: dict[str, ProviderProfile] = {}
|
||||
_ALIASES: dict[str, str] = {}
|
||||
_discovered = False
|
||||
|
||||
|
||||
def register_provider(profile: ProviderProfile) -> None:
|
||||
"""Register a provider profile by name and aliases."""
|
||||
_REGISTRY[profile.name] = profile
|
||||
for alias in profile.aliases:
|
||||
_ALIASES[alias] = profile.name
|
||||
|
||||
|
||||
def get_provider_profile(name: str) -> ProviderProfile | None:
|
||||
"""Look up a provider profile by name or alias.
|
||||
|
||||
Returns None if the provider has no profile (falls back to generic).
|
||||
"""
|
||||
if not _discovered:
|
||||
_discover_providers()
|
||||
canonical = _ALIASES.get(name, name)
|
||||
return _REGISTRY.get(canonical)
|
||||
|
||||
|
||||
def list_providers() -> list[ProviderProfile]:
|
||||
"""Return all registered provider profiles (one per canonical name)."""
|
||||
if not _discovered:
|
||||
_discover_providers()
|
||||
# Deduplicate: _REGISTRY has canonical names; _ALIASES points to same objects
|
||||
seen: set[int] = set()
|
||||
result: list[ProviderProfile] = []
|
||||
for profile in _REGISTRY.values():
|
||||
pid = id(profile)
|
||||
if pid not in seen:
|
||||
seen.add(pid)
|
||||
result.append(profile)
|
||||
return result
|
||||
|
||||
|
||||
def _discover_providers() -> None:
|
||||
"""Import all provider modules to trigger registration."""
|
||||
global _discovered
|
||||
if _discovered:
|
||||
return
|
||||
_discovered = True
|
||||
|
||||
import importlib
|
||||
import pkgutil
|
||||
|
||||
import providers as _pkg
|
||||
|
||||
for _importer, modname, _ispkg in pkgutil.iter_modules(_pkg.__path__):
|
||||
if modname.startswith("_") or modname == "base":
|
||||
continue
|
||||
try:
|
||||
importlib.import_module(f"providers.{modname}")
|
||||
except ImportError as e:
|
||||
import logging
|
||||
|
||||
logging.getLogger(__name__).warning(
|
||||
"Failed to import provider module %s: %s", modname, e
|
||||
)
|
||||
13
providers/alibaba.py
Normal file
13
providers/alibaba.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
"""Alibaba Cloud DashScope provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
alibaba = ProviderProfile(
|
||||
name="alibaba",
|
||||
aliases=("dashscope", "alibaba-cloud", "qwen-dashscope"),
|
||||
env_vars=("DASHSCOPE_API_KEY",),
|
||||
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
|
||||
)
|
||||
|
||||
register_provider(alibaba)
|
||||
21
providers/alibaba_coding_plan.py
Normal file
21
providers/alibaba_coding_plan.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
"""Alibaba Cloud Coding Plan provider profile.
|
||||
|
||||
Separate from the standard `alibaba` profile because it hits a different
|
||||
endpoint (coding-intl.dashscope.aliyuncs.com) with a dedicated API key tier.
|
||||
"""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
alibaba_coding_plan = ProviderProfile(
|
||||
name="alibaba-coding-plan",
|
||||
aliases=("alibaba_coding", "alibaba-coding", "dashscope-coding"),
|
||||
display_name="Alibaba Cloud (Coding Plan)",
|
||||
description="Alibaba Cloud Coding Plan — dedicated coding tier",
|
||||
signup_url="https://help.aliyun.com/zh/model-studio/",
|
||||
env_vars=("ALIBABA_CODING_PLAN_API_KEY", "DASHSCOPE_API_KEY", "ALIBABA_CODING_PLAN_BASE_URL"),
|
||||
base_url="https://coding-intl.dashscope.aliyuncs.com/v1",
|
||||
auth_type="api_key",
|
||||
)
|
||||
|
||||
register_provider(alibaba_coding_plan)
|
||||
52
providers/anthropic.py
Normal file
52
providers/anthropic.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
"""Native Anthropic provider profile."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import urllib.request
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AnthropicProfile(ProviderProfile):
|
||||
"""Native Anthropic — uses x-api-key header, not Bearer."""
|
||||
|
||||
def fetch_models(
|
||||
self,
|
||||
*,
|
||||
api_key: str | None = None,
|
||||
timeout: float = 8.0,
|
||||
) -> list[str] | None:
|
||||
"""Anthropic uses x-api-key header and anthropic-version."""
|
||||
if not api_key:
|
||||
return None
|
||||
try:
|
||||
req = urllib.request.Request("https://api.anthropic.com/v1/models")
|
||||
req.add_header("x-api-key", api_key)
|
||||
req.add_header("anthropic-version", "2023-06-01")
|
||||
req.add_header("Accept", "application/json")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
return [
|
||||
m["id"]
|
||||
for m in data.get("data", [])
|
||||
if isinstance(m, dict) and "id" in m
|
||||
]
|
||||
except Exception as exc:
|
||||
logger.debug("fetch_models(anthropic): %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
anthropic = AnthropicProfile(
|
||||
name="anthropic",
|
||||
aliases=("claude", "claude-oauth", "claude-code"),
|
||||
api_mode="anthropic_messages",
|
||||
env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
|
||||
base_url="https://api.anthropic.com",
|
||||
auth_type="api_key",
|
||||
default_aux_model="claude-haiku-4-5-20251001",
|
||||
)
|
||||
|
||||
register_provider(anthropic)
|
||||
13
providers/arcee.py
Normal file
13
providers/arcee.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
"""Arcee AI provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
arcee = ProviderProfile(
|
||||
name="arcee",
|
||||
aliases=("arcee-ai", "arceeai"),
|
||||
env_vars=("ARCEEAI_API_KEY",),
|
||||
base_url="https://api.arcee.ai/api/v1",
|
||||
)
|
||||
|
||||
register_provider(arcee)
|
||||
21
providers/azure_foundry.py
Normal file
21
providers/azure_foundry.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
"""Azure AI Foundry provider profile.
|
||||
|
||||
Azure Foundry exposes an OpenAI-compatible endpoint; users supply their own
|
||||
base URL at setup since endpoints are per-resource.
|
||||
"""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
azure_foundry = ProviderProfile(
|
||||
name="azure-foundry",
|
||||
aliases=("azure", "azure-ai-foundry", "azure-ai"),
|
||||
display_name="Azure Foundry",
|
||||
description="Azure AI Foundry — OpenAI-compatible endpoint (user-supplied base URL)",
|
||||
signup_url="https://ai.azure.com/",
|
||||
env_vars=("AZURE_FOUNDRY_API_KEY", "AZURE_FOUNDRY_BASE_URL"),
|
||||
base_url="", # per-resource; user provides at setup
|
||||
auth_type="api_key",
|
||||
)
|
||||
|
||||
register_provider(azure_foundry)
|
||||
165
providers/base.py
Normal file
165
providers/base.py
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
"""Provider profile base class.
|
||||
|
||||
A ProviderProfile declares everything about an inference provider in one place:
|
||||
auth, endpoints, client quirks, request-time quirks. The transport reads this
|
||||
instead of receiving 20+ boolean flags.
|
||||
|
||||
Provider profiles are DECLARATIVE — they describe the provider's behavior.
|
||||
They do NOT own client construction, credential rotation, or streaming.
|
||||
Those stay on AIAgent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Sentinel for "omit temperature entirely" (Kimi: server manages it)
|
||||
OMIT_TEMPERATURE = object()
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProviderProfile:
|
||||
"""Base provider profile — subclass or instantiate with overrides."""
|
||||
|
||||
# ── Identity ─────────────────────────────────────────────
|
||||
name: str
|
||||
api_mode: str = "chat_completions"
|
||||
aliases: tuple = ()
|
||||
|
||||
# ── Human-readable metadata ───────────────────────────────
|
||||
display_name: str = "" # e.g. "GMI Cloud" — shown in picker/labels
|
||||
description: str = "" # e.g. "GMI Cloud (multi-model direct API)" — picker subtitle
|
||||
signup_url: str = "" # e.g. "https://www.gmicloud.ai/" — shown during setup
|
||||
|
||||
# ── Auth & endpoints ─────────────────────────────────────
|
||||
env_vars: tuple = ()
|
||||
base_url: str = ""
|
||||
models_url: str = "" # explicit models endpoint; falls back to {base_url}/models
|
||||
auth_type: str = "api_key" # api_key|oauth_device_code|oauth_external|copilot|aws_sdk
|
||||
|
||||
# ── Model catalog ─────────────────────────────────────────
|
||||
# fallback_models: curated list shown in /model picker when live fetch fails.
|
||||
# Only agentic models that support tool calling should appear here.
|
||||
fallback_models: tuple = ()
|
||||
|
||||
# hostname: base hostname for URL→provider reverse-mapping in model_metadata.py
|
||||
# e.g. "api.gmi-serving.com". Derived from base_url when empty.
|
||||
hostname: str = ""
|
||||
|
||||
# ── Client-level quirks (set once at client construction) ─
|
||||
default_headers: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# ── Request-level quirks ─────────────────────────────────
|
||||
# Temperature: None = use caller's default, OMIT_TEMPERATURE = don't send
|
||||
fixed_temperature: Any = None
|
||||
default_max_tokens: int | None = None
|
||||
default_aux_model: str = (
|
||||
"" # cheap model for auxiliary tasks (compression, vision, etc.)
|
||||
)
|
||||
# empty = use main model
|
||||
|
||||
# ── Hooks (override in subclass for complex providers) ───
|
||||
|
||||
def get_hostname(self) -> str:
|
||||
"""Return the provider's base hostname for URL-based detection.
|
||||
|
||||
Uses self.hostname if set explicitly, otherwise derives it from base_url.
|
||||
e.g. 'https://api.gmi-serving.com/v1' → 'api.gmi-serving.com'
|
||||
"""
|
||||
if self.hostname:
|
||||
return self.hostname
|
||||
if self.base_url:
|
||||
from urllib.parse import urlparse
|
||||
return urlparse(self.base_url).hostname or ""
|
||||
return ""
|
||||
|
||||
def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Provider-specific message preprocessing.
|
||||
|
||||
Called AFTER codex field sanitization, BEFORE developer role swap.
|
||||
Default: pass-through.
|
||||
"""
|
||||
return messages
|
||||
|
||||
def build_extra_body(
|
||||
self, *, session_id: str | None = None, **context: Any
|
||||
) -> dict[str, Any]:
|
||||
"""Provider-specific extra_body fields.
|
||||
|
||||
Merged into the API kwargs extra_body. Default: empty dict.
|
||||
"""
|
||||
return {}
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self,
|
||||
*,
|
||||
reasoning_config: dict | None = None,
|
||||
**context: Any,
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
"""Provider-specific kwargs split between extra_body and top-level api_kwargs.
|
||||
|
||||
Returns (extra_body_additions, top_level_kwargs).
|
||||
The transport merges extra_body_additions into extra_body, and
|
||||
top_level_kwargs directly into api_kwargs.
|
||||
|
||||
This split exists because some providers put reasoning config in
|
||||
extra_body (OpenRouter: extra_body.reasoning) while others put it
|
||||
as top-level api_kwargs (Kimi: api_kwargs.reasoning_effort).
|
||||
|
||||
Default: ({}, {}).
|
||||
"""
|
||||
return {}, {}
|
||||
|
||||
def fetch_models(
|
||||
self,
|
||||
*,
|
||||
api_key: str | None = None,
|
||||
timeout: float = 8.0,
|
||||
) -> list[str] | None:
|
||||
"""Fetch the live model list from the provider's models endpoint.
|
||||
|
||||
Returns a list of model ID strings, or None if the fetch failed or
|
||||
the provider does not support live model listing.
|
||||
|
||||
Resolution order for the endpoint URL:
|
||||
1. self.models_url (explicit override — use when the models
|
||||
endpoint differs from the inference base URL, e.g. OpenRouter
|
||||
exposes a public catalog at /api/v1/models while inference is
|
||||
at /api/v1)
|
||||
2. self.base_url + "/models" (standard OpenAI-compat fallback)
|
||||
|
||||
The default implementation sends Bearer auth when api_key is given
|
||||
and forwards self.default_headers. Override to customise auth, path,
|
||||
response shape, or to return None for providers with no REST catalog.
|
||||
|
||||
Callers must always fall back to the static _PROVIDER_MODELS list
|
||||
when this returns None.
|
||||
"""
|
||||
url = (self.models_url or "").strip()
|
||||
if not url:
|
||||
if not self.base_url:
|
||||
return None
|
||||
url = self.base_url.rstrip("/") + "/models"
|
||||
|
||||
import json
|
||||
import urllib.request
|
||||
|
||||
req = urllib.request.Request(url)
|
||||
if api_key:
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("Accept", "application/json")
|
||||
for k, v in self.default_headers.items():
|
||||
req.add_header(k, v)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
items = data if isinstance(data, list) else data.get("data", [])
|
||||
return [m["id"] for m in items if isinstance(m, dict) and "id" in m]
|
||||
except Exception as exc:
|
||||
logger.debug("fetch_models(%s): %s", self.name, exc)
|
||||
return None
|
||||
29
providers/bedrock.py
Normal file
29
providers/bedrock.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
"""AWS Bedrock provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
|
||||
class BedrockProfile(ProviderProfile):
|
||||
"""AWS Bedrock — no REST /v1/models endpoint; uses AWS SDK."""
|
||||
|
||||
def fetch_models(
|
||||
self,
|
||||
*,
|
||||
api_key: str | None = None,
|
||||
timeout: float = 8.0,
|
||||
) -> list[str] | None:
|
||||
"""Bedrock model listing requires AWS SDK, not a REST call."""
|
||||
return None
|
||||
|
||||
|
||||
bedrock = BedrockProfile(
|
||||
name="bedrock",
|
||||
aliases=("aws", "aws-bedrock", "amazon-bedrock", "amazon"),
|
||||
api_mode="bedrock_converse",
|
||||
env_vars=(), # AWS SDK credentials — not env vars
|
||||
base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
|
||||
auth_type="aws_sdk",
|
||||
)
|
||||
|
||||
register_provider(bedrock)
|
||||
58
providers/copilot.py
Normal file
58
providers/copilot.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
"""Copilot / GitHub Models provider profile.
|
||||
|
||||
Copilot uses per-model api_mode routing:
|
||||
- GPT-5+ / Codex models → codex_responses
|
||||
- Claude models → anthropic_messages
|
||||
- Everything else → chat_completions (this profile covers that subset)
|
||||
|
||||
Key quirks for the chat_completions subset:
|
||||
- Editor attribution headers (via copilot_default_headers())
|
||||
- GitHub Models reasoning extra_body (model-catalog gated)
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
|
||||
class CopilotProfile(ProviderProfile):
|
||||
"""GitHub Copilot / GitHub Models — editor headers + reasoning."""
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self,
|
||||
*,
|
||||
model: str | None = None,
|
||||
reasoning_config: dict | None = None,
|
||||
supports_reasoning: bool = False,
|
||||
**ctx,
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
extra_body: dict[str, Any] = {}
|
||||
if supports_reasoning and model:
|
||||
try:
|
||||
from hermes_cli.models import github_model_reasoning_efforts
|
||||
|
||||
supported_efforts = github_model_reasoning_efforts(model)
|
||||
if supported_efforts and reasoning_config:
|
||||
effort = reasoning_config.get("effort", "medium")
|
||||
# Normalize non-standard effort levels to the nearest supported
|
||||
if effort == "xhigh":
|
||||
effort = "high"
|
||||
if effort in supported_efforts:
|
||||
extra_body["reasoning"] = {"effort": effort}
|
||||
elif supported_efforts:
|
||||
extra_body["reasoning"] = {"effort": "medium"}
|
||||
except Exception:
|
||||
pass
|
||||
return extra_body, {}
|
||||
|
||||
|
||||
copilot = CopilotProfile(
|
||||
name="copilot",
|
||||
aliases=("github-copilot", "github-models", "github-model", "github"),
|
||||
env_vars=("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
|
||||
base_url="https://api.githubcopilot.com",
|
||||
auth_type="copilot",
|
||||
)
|
||||
|
||||
register_provider(copilot)
|
||||
34
providers/copilot_acp.py
Normal file
34
providers/copilot_acp.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
"""GitHub Copilot ACP provider profile.
|
||||
|
||||
copilot-acp uses an external ACP subprocess — NOT the standard
|
||||
transport. api_mode="copilot_acp" is handled separately in run_agent.py.
|
||||
The profile captures auth + endpoint metadata for registry migration.
|
||||
"""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
|
||||
class CopilotACPProfile(ProviderProfile):
|
||||
"""GitHub Copilot ACP — external process, no REST models endpoint."""
|
||||
|
||||
def fetch_models(
|
||||
self,
|
||||
*,
|
||||
api_key: str | None = None,
|
||||
timeout: float = 8.0,
|
||||
) -> list[str] | None:
|
||||
"""Model listing is handled by the ACP subprocess."""
|
||||
return None
|
||||
|
||||
|
||||
copilot_acp = CopilotACPProfile(
|
||||
name="copilot-acp",
|
||||
aliases=("github-copilot-acp", "copilot-acp-agent"),
|
||||
api_mode="chat_completions", # ACP subprocess uses chat_completions routing
|
||||
env_vars=(), # Managed by ACP subprocess
|
||||
base_url="acp://copilot", # ACP internal scheme
|
||||
auth_type="external_process",
|
||||
)
|
||||
|
||||
register_provider(copilot_acp)
|
||||
68
providers/custom.py
Normal file
68
providers/custom.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
"""Custom / Ollama (local) provider profile.
|
||||
|
||||
Covers any endpoint registered as provider="custom", including local
|
||||
Ollama instances. Key quirks:
|
||||
- ollama_num_ctx → extra_body.options.num_ctx (local context window)
|
||||
- reasoning_config disabled → extra_body.think = False
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
|
||||
class CustomProfile(ProviderProfile):
|
||||
"""Custom/Ollama local provider — think=false and num_ctx support."""
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self,
|
||||
*,
|
||||
reasoning_config: dict | None = None,
|
||||
ollama_num_ctx: int | None = None,
|
||||
**ctx: Any,
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
extra_body: dict[str, Any] = {}
|
||||
|
||||
# Ollama context window
|
||||
if ollama_num_ctx:
|
||||
options = extra_body.get("options", {})
|
||||
options["num_ctx"] = ollama_num_ctx
|
||||
extra_body["options"] = options
|
||||
|
||||
# Disable thinking when reasoning is turned off
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||
_enabled = reasoning_config.get("enabled", True)
|
||||
if _effort == "none" or _enabled is False:
|
||||
extra_body["think"] = False
|
||||
|
||||
return extra_body, {}
|
||||
|
||||
def fetch_models(
|
||||
self,
|
||||
*,
|
||||
api_key: str | None = None,
|
||||
timeout: float = 8.0,
|
||||
) -> list[str] | None:
|
||||
"""Custom/Ollama: base_url is user-configured; fetch if set."""
|
||||
if not self.base_url:
|
||||
return None
|
||||
return super().fetch_models(api_key=api_key, timeout=timeout)
|
||||
|
||||
|
||||
custom = CustomProfile(
|
||||
name="custom",
|
||||
aliases=(
|
||||
"ollama",
|
||||
"local",
|
||||
"vllm",
|
||||
"llamacpp",
|
||||
"llama.cpp",
|
||||
"llama-cpp",
|
||||
),
|
||||
env_vars=(), # No fixed key — custom endpoint
|
||||
base_url="", # User-configured
|
||||
)
|
||||
|
||||
register_provider(custom)
|
||||
20
providers/deepseek.py
Normal file
20
providers/deepseek.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
"""DeepSeek provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
deepseek = ProviderProfile(
|
||||
name="deepseek",
|
||||
aliases=("deepseek-chat",),
|
||||
env_vars=("DEEPSEEK_API_KEY",),
|
||||
display_name="DeepSeek",
|
||||
description="DeepSeek — native DeepSeek API",
|
||||
signup_url="https://platform.deepseek.com/",
|
||||
fallback_models=(
|
||||
"deepseek-chat",
|
||||
"deepseek-reasoner",
|
||||
),
|
||||
base_url="https://api.deepseek.com/v1",
|
||||
)
|
||||
|
||||
register_provider(deepseek)
|
||||
72
providers/gemini.py
Normal file
72
providers/gemini.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""Google Gemini provider profiles.
|
||||
|
||||
gemini: Google AI Studio (API key) — uses GeminiNativeClient
|
||||
google-gemini-cli: Google Cloud Code Assist (OAuth) — uses GeminiCloudCodeClient
|
||||
|
||||
Both report api_mode="chat_completions" but use custom native clients
|
||||
that bypass the standard OpenAI transport. The profile captures auth
|
||||
and endpoint metadata for auth.py / runtime_provider.py migration, and
|
||||
carries the thinking_config translation hook so the transport's profile
|
||||
path produces the same extra_body shape the legacy flag path did.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
|
||||
class GeminiProfile(ProviderProfile):
|
||||
"""Gemini — translate reasoning_config to thinking_config in extra_body."""
|
||||
|
||||
def build_extra_body(
|
||||
self, *, session_id: str | None = None, **context: Any
|
||||
) -> dict[str, Any]:
|
||||
"""Emit extra_body.thinking_config (native) or extra_body.extra_body.google.thinking_config
|
||||
(OpenAI-compat /openai subpath), mirroring the legacy path's behavior.
|
||||
"""
|
||||
from agent.transports.chat_completions import (
|
||||
_build_gemini_thinking_config,
|
||||
_is_gemini_openai_compat_base_url,
|
||||
_snake_case_gemini_thinking_config,
|
||||
)
|
||||
|
||||
model = context.get("model") or ""
|
||||
reasoning_config = context.get("reasoning_config")
|
||||
base_url = context.get("base_url") or self.base_url
|
||||
|
||||
raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
if not raw_thinking_config:
|
||||
return {}
|
||||
|
||||
body: dict[str, Any] = {}
|
||||
if self.name == "gemini" and _is_gemini_openai_compat_base_url(base_url):
|
||||
thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
|
||||
if thinking_config:
|
||||
body["extra_body"] = {"google": {"thinking_config": thinking_config}}
|
||||
else:
|
||||
body["thinking_config"] = raw_thinking_config
|
||||
return body
|
||||
|
||||
|
||||
gemini = GeminiProfile(
|
||||
name="gemini",
|
||||
aliases=("google", "google-gemini", "google-ai-studio"),
|
||||
api_mode="chat_completions",
|
||||
env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"),
|
||||
base_url="https://generativelanguage.googleapis.com/v1beta",
|
||||
auth_type="api_key",
|
||||
default_aux_model="gemini-3-flash-preview",
|
||||
)
|
||||
|
||||
google_gemini_cli = GeminiProfile(
|
||||
name="google-gemini-cli",
|
||||
aliases=("gemini-cli", "gemini-oauth"),
|
||||
api_mode="chat_completions",
|
||||
env_vars=(), # OAuth — no API key
|
||||
base_url="cloudcode-pa://google", # Cloud Code Assist internal scheme
|
||||
auth_type="oauth_external",
|
||||
)
|
||||
|
||||
register_provider(gemini)
|
||||
register_provider(google_gemini_cli)
|
||||
26
providers/gmi.py
Normal file
26
providers/gmi.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
"""GMI Cloud provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
gmi = ProviderProfile(
|
||||
name="gmi",
|
||||
aliases=("gmi-cloud", "gmicloud"),
|
||||
display_name="GMI Cloud",
|
||||
description="GMI Cloud — multi-model direct API (slash-form model IDs)",
|
||||
signup_url="https://www.gmicloud.ai/",
|
||||
env_vars=("GMI_API_KEY", "GMI_BASE_URL"),
|
||||
base_url="https://api.gmi-serving.com/v1",
|
||||
auth_type="api_key",
|
||||
default_aux_model="google/gemini-3.1-flash-lite-preview",
|
||||
fallback_models=(
|
||||
"zai-org/GLM-5.1-FP8",
|
||||
"deepseek-ai/DeepSeek-V3.2",
|
||||
"moonshotai/Kimi-K2.5",
|
||||
"google/gemini-3.1-flash-lite-preview",
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
"openai/gpt-5.4",
|
||||
),
|
||||
)
|
||||
|
||||
register_provider(gmi)
|
||||
20
providers/huggingface.py
Normal file
20
providers/huggingface.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
"""Hugging Face provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
huggingface = ProviderProfile(
|
||||
name="huggingface",
|
||||
aliases=("hf", "hugging-face", "huggingface-hub"),
|
||||
env_vars=("HF_TOKEN",),
|
||||
display_name="HuggingFace",
|
||||
description="HuggingFace Inference API",
|
||||
signup_url="https://huggingface.co/settings/tokens",
|
||||
fallback_models=(
|
||||
"Qwen/Qwen3.5-72B-Instruct",
|
||||
"deepseek-ai/DeepSeek-V3.2",
|
||||
),
|
||||
base_url="https://router.huggingface.co/v1",
|
||||
)
|
||||
|
||||
register_provider(huggingface)
|
||||
14
providers/kilocode.py
Normal file
14
providers/kilocode.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
"""Kilo Code provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
kilocode = ProviderProfile(
|
||||
name="kilocode",
|
||||
aliases=("kilo-code", "kilo", "kilo-gateway"),
|
||||
env_vars=("KILOCODE_API_KEY",),
|
||||
base_url="https://api.kilo.ai/api/gateway",
|
||||
default_aux_model="google/gemini-3-flash-preview",
|
||||
)
|
||||
|
||||
register_provider(kilocode)
|
||||
71
providers/kimi.py
Normal file
71
providers/kimi.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
"""Kimi / Moonshot provider profiles.
|
||||
|
||||
Kimi has dual endpoints:
|
||||
- sk-kimi-* keys → api.kimi.com/coding (Anthropic Messages API)
|
||||
- legacy keys → api.moonshot.ai/v1 (OpenAI chat completions)
|
||||
|
||||
This module covers the chat_completions path (/v1 endpoint).
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import OMIT_TEMPERATURE, ProviderProfile
|
||||
|
||||
|
||||
class KimiProfile(ProviderProfile):
|
||||
"""Kimi/Moonshot — temperature omitted, thinking + reasoning_effort."""
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self, *, reasoning_config: dict | None = None, **context
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
"""Kimi uses extra_body.thinking + top-level reasoning_effort."""
|
||||
extra_body = {}
|
||||
top_level = {}
|
||||
|
||||
if not reasoning_config or not isinstance(reasoning_config, dict):
|
||||
# No config → thinking enabled, default effort
|
||||
extra_body["thinking"] = {"type": "enabled"}
|
||||
top_level["reasoning_effort"] = "medium"
|
||||
return extra_body, top_level
|
||||
|
||||
enabled = reasoning_config.get("enabled", True)
|
||||
if enabled is False:
|
||||
extra_body["thinking"] = {"type": "disabled"}
|
||||
return extra_body, top_level
|
||||
|
||||
# Enabled
|
||||
extra_body["thinking"] = {"type": "enabled"}
|
||||
effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if effort in ("low", "medium", "high"):
|
||||
top_level["reasoning_effort"] = effort
|
||||
else:
|
||||
top_level["reasoning_effort"] = "medium"
|
||||
|
||||
return extra_body, top_level
|
||||
|
||||
|
||||
kimi = KimiProfile(
|
||||
name="kimi-coding",
|
||||
aliases=("kimi", "moonshot", "kimi-for-coding"),
|
||||
env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
|
||||
base_url="https://api.moonshot.ai/v1",
|
||||
fixed_temperature=OMIT_TEMPERATURE,
|
||||
default_max_tokens=32000,
|
||||
default_headers={"User-Agent": "hermes-agent/1.0"},
|
||||
default_aux_model="kimi-k2-turbo-preview",
|
||||
)
|
||||
|
||||
kimi_cn = KimiProfile(
|
||||
name="kimi-coding-cn",
|
||||
aliases=("kimi-cn", "moonshot-cn"),
|
||||
env_vars=("KIMI_CN_API_KEY",),
|
||||
base_url="https://api.moonshot.cn/v1",
|
||||
fixed_temperature=OMIT_TEMPERATURE,
|
||||
default_max_tokens=32000,
|
||||
default_headers={"User-Agent": "hermes-agent/1.0"},
|
||||
default_aux_model="kimi-k2-turbo-preview",
|
||||
)
|
||||
|
||||
register_provider(kimi)
|
||||
register_provider(kimi_cn)
|
||||
45
providers/minimax.py
Normal file
45
providers/minimax.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
"""MiniMax provider profiles (international + China).
|
||||
|
||||
Both use anthropic_messages api_mode — their inference_base_url
|
||||
ends with /anthropic which triggers auto-detection to anthropic_messages.
|
||||
"""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
minimax = ProviderProfile(
|
||||
name="minimax",
|
||||
aliases=("mini-max",),
|
||||
api_mode="anthropic_messages",
|
||||
env_vars=("MINIMAX_API_KEY",),
|
||||
base_url="https://api.minimax.io/anthropic",
|
||||
auth_type="api_key",
|
||||
default_aux_model="MiniMax-M2.7",
|
||||
)
|
||||
|
||||
minimax_cn = ProviderProfile(
|
||||
name="minimax-cn",
|
||||
aliases=("minimax-china", "minimax_cn"),
|
||||
api_mode="anthropic_messages",
|
||||
env_vars=("MINIMAX_CN_API_KEY",),
|
||||
base_url="https://api.minimaxi.com/anthropic",
|
||||
auth_type="api_key",
|
||||
default_aux_model="MiniMax-M2.7",
|
||||
)
|
||||
|
||||
minimax_oauth = ProviderProfile(
|
||||
name="minimax-oauth",
|
||||
aliases=("minimax_oauth", "minimax-oauth-io"),
|
||||
api_mode="anthropic_messages",
|
||||
display_name="MiniMax (OAuth)",
|
||||
description="MiniMax via OAuth browser flow — no API key required",
|
||||
signup_url="https://api.minimax.io/",
|
||||
env_vars=(), # OAuth — tokens in auth.json, not env
|
||||
base_url="https://api.minimax.io/anthropic",
|
||||
auth_type="oauth_external",
|
||||
default_aux_model="MiniMax-M2.7-highspeed",
|
||||
)
|
||||
|
||||
register_provider(minimax)
|
||||
register_provider(minimax_cn)
|
||||
register_provider(minimax_oauth)
|
||||
53
providers/nous.py
Normal file
53
providers/nous.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
"""Nous Portal provider profile."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
|
||||
class NousProfile(ProviderProfile):
|
||||
"""Nous Portal — product tags, reasoning with Nous-specific omission."""
|
||||
|
||||
def build_extra_body(
|
||||
self, *, session_id: str | None = None, **context
|
||||
) -> dict[str, Any]:
|
||||
return {"tags": ["product=hermes-agent"]}
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self,
|
||||
*,
|
||||
reasoning_config: dict | None = None,
|
||||
supports_reasoning: bool = False,
|
||||
**context,
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
"""Nous: passes full reasoning_config, but OMITS when disabled."""
|
||||
extra_body = {}
|
||||
if supports_reasoning:
|
||||
if reasoning_config is not None:
|
||||
rc = dict(reasoning_config)
|
||||
if rc.get("enabled") is False:
|
||||
pass # Nous omits reasoning when disabled
|
||||
else:
|
||||
extra_body["reasoning"] = rc
|
||||
else:
|
||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||
return extra_body, {}
|
||||
|
||||
|
||||
nous = NousProfile(
|
||||
name="nous",
|
||||
aliases=("nous-portal", "nousresearch"),
|
||||
env_vars=("NOUS_API_KEY",),
|
||||
display_name="Nous Research",
|
||||
description="Nous Research — Hermes model family",
|
||||
signup_url="https://nousresearch.com/",
|
||||
fallback_models=(
|
||||
"hermes-3-405b",
|
||||
"hermes-3-70b",
|
||||
),
|
||||
base_url="https://inference.nousresearch.com/v1",
|
||||
auth_type="oauth_device_code",
|
||||
)
|
||||
|
||||
register_provider(nous)
|
||||
21
providers/nvidia.py
Normal file
21
providers/nvidia.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
"""NVIDIA NIM provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
nvidia = ProviderProfile(
|
||||
name="nvidia",
|
||||
aliases=("nvidia-nim",),
|
||||
env_vars=("NVIDIA_API_KEY",),
|
||||
display_name="NVIDIA NIM",
|
||||
description="NVIDIA NIM — accelerated inference",
|
||||
signup_url="https://build.nvidia.com/",
|
||||
fallback_models=(
|
||||
"nvidia/llama-3.1-nemotron-70b-instruct",
|
||||
"nvidia/llama-3.3-70b-instruct",
|
||||
),
|
||||
base_url="https://integrate.api.nvidia.com/v1",
|
||||
default_max_tokens=16384,
|
||||
)
|
||||
|
||||
register_provider(nvidia)
|
||||
14
providers/ollama_cloud.py
Normal file
14
providers/ollama_cloud.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
"""Ollama Cloud provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
ollama_cloud = ProviderProfile(
|
||||
name="ollama-cloud",
|
||||
aliases=("ollama_cloud",),
|
||||
default_aux_model="nemotron-3-nano:30b",
|
||||
env_vars=("OLLAMA_API_KEY",),
|
||||
base_url="https://ollama.com/v1",
|
||||
)
|
||||
|
||||
register_provider(ollama_cloud)
|
||||
15
providers/openai_codex.py
Normal file
15
providers/openai_codex.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
"""OpenAI Codex (Responses API) provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
openai_codex = ProviderProfile(
|
||||
name="openai-codex",
|
||||
aliases=("codex", "openai_codex"),
|
||||
api_mode="codex_responses",
|
||||
env_vars=(), # OAuth external — no API key
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
auth_type="oauth_external",
|
||||
)
|
||||
|
||||
register_provider(openai_codex)
|
||||
30
providers/opencode.py
Normal file
30
providers/opencode.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
"""OpenCode provider profiles (Zen + Go).
|
||||
|
||||
Both use per-model api_mode routing:
|
||||
- OpenCode Zen: Claude → anthropic_messages, GPT-5/Codex → codex_responses,
|
||||
everything else → chat_completions (this profile)
|
||||
- OpenCode Go: MiniMax → anthropic_messages, GLM/Kimi → chat_completions
|
||||
(this profile)
|
||||
"""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
opencode_zen = ProviderProfile(
|
||||
name="opencode-zen",
|
||||
aliases=("opencode", "opencode_zen", "zen"),
|
||||
env_vars=("OPENCODE_ZEN_API_KEY",),
|
||||
base_url="https://opencode.ai/zen/v1",
|
||||
default_aux_model="gemini-3-flash",
|
||||
)
|
||||
|
||||
opencode_go = ProviderProfile(
|
||||
name="opencode-go",
|
||||
aliases=("opencode_go", "go", "opencode-go-sub"),
|
||||
env_vars=("OPENCODE_GO_API_KEY",),
|
||||
base_url="https://opencode.ai/zen/go/v1",
|
||||
default_aux_model="glm-5",
|
||||
)
|
||||
|
||||
register_provider(opencode_zen)
|
||||
register_provider(opencode_go)
|
||||
86
providers/openrouter.py
Normal file
86
providers/openrouter.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
"""OpenRouter provider profile."""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CACHE: list[str] | None = None
|
||||
|
||||
|
||||
class OpenRouterProfile(ProviderProfile):
|
||||
"""OpenRouter aggregator — provider preferences, reasoning config passthrough."""
|
||||
|
||||
def fetch_models(
|
||||
self,
|
||||
*,
|
||||
api_key: str | None = None,
|
||||
timeout: float = 8.0,
|
||||
) -> list[str] | None:
|
||||
"""Fetch from public OpenRouter catalog — no auth required.
|
||||
|
||||
Note: Tool-call capability filtering is applied by hermes_cli/models.py
|
||||
via fetch_openrouter_models() → _openrouter_model_supports_tools(), not
|
||||
here. The picker early-returns via the dedicated openrouter path before
|
||||
reaching this method, so filtering here would be unreachable.
|
||||
"""
|
||||
global _CACHE # noqa: PLW0603
|
||||
if _CACHE is not None:
|
||||
return _CACHE
|
||||
try:
|
||||
result = super().fetch_models(api_key=None, timeout=timeout)
|
||||
if result is not None:
|
||||
_CACHE = result
|
||||
return result
|
||||
except Exception as exc:
|
||||
logger.debug("fetch_models(openrouter): %s", exc)
|
||||
return None
|
||||
|
||||
def build_extra_body(
|
||||
self, *, session_id: str | None = None, **context: Any
|
||||
) -> dict[str, Any]:
|
||||
body: dict[str, Any] = {}
|
||||
prefs = context.get("provider_preferences")
|
||||
if prefs:
|
||||
body["provider"] = prefs
|
||||
return body
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self,
|
||||
*,
|
||||
reasoning_config: dict | None = None,
|
||||
supports_reasoning: bool = False,
|
||||
**context: Any,
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
"""OpenRouter passes the full reasoning_config dict as extra_body.reasoning."""
|
||||
extra_body: dict[str, Any] = {}
|
||||
if supports_reasoning:
|
||||
if reasoning_config is not None:
|
||||
extra_body["reasoning"] = dict(reasoning_config)
|
||||
else:
|
||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||
return extra_body, {}
|
||||
|
||||
|
||||
openrouter = OpenRouterProfile(
|
||||
name="openrouter",
|
||||
aliases=("or",),
|
||||
env_vars=("OPENROUTER_API_KEY",),
|
||||
display_name="OpenRouter",
|
||||
description="OpenRouter — unified API for 200+ models",
|
||||
signup_url="https://openrouter.ai/keys",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
models_url="https://openrouter.ai/api/v1/models",
|
||||
fallback_models=(
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
"openai/gpt-5.4",
|
||||
"deepseek/deepseek-chat",
|
||||
"google/gemini-3-flash-preview",
|
||||
"qwen/qwen3-plus",
|
||||
),
|
||||
)
|
||||
|
||||
register_provider(openrouter)
|
||||
82
providers/qwen.py
Normal file
82
providers/qwen.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
"""Qwen Portal provider profile."""
|
||||
|
||||
import copy
|
||||
from typing import Any
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
|
||||
class QwenProfile(ProviderProfile):
|
||||
"""Qwen Portal — message normalization, vl_high_resolution, metadata top-level."""
|
||||
|
||||
def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Normalize content to list-of-dicts format.
|
||||
|
||||
Inject cache_control on system message.
|
||||
|
||||
Matches the behavior of run_agent.py:_qwen_prepare_chat_messages().
|
||||
"""
|
||||
prepared = copy.deepcopy(messages)
|
||||
if not prepared:
|
||||
return prepared
|
||||
|
||||
for msg in prepared:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if isinstance(content, str):
|
||||
msg["content"] = [{"type": "text", "text": content}]
|
||||
elif isinstance(content, list):
|
||||
normalized_parts = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
normalized_parts.append({"type": "text", "text": part})
|
||||
elif isinstance(part, dict):
|
||||
normalized_parts.append(part)
|
||||
if normalized_parts:
|
||||
msg["content"] = normalized_parts
|
||||
|
||||
# Inject cache_control on the last part of the system message.
|
||||
for msg in prepared:
|
||||
if isinstance(msg, dict) and msg.get("role") == "system":
|
||||
content = msg.get("content")
|
||||
if (
|
||||
isinstance(content, list)
|
||||
and content
|
||||
and isinstance(content[-1], dict)
|
||||
):
|
||||
content[-1]["cache_control"] = {"type": "ephemeral"}
|
||||
break
|
||||
|
||||
return prepared
|
||||
|
||||
def build_extra_body(
|
||||
self, *, session_id: str | None = None, **context
|
||||
) -> dict[str, Any]:
|
||||
return {"vl_high_resolution_images": True}
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self,
|
||||
*,
|
||||
reasoning_config: dict | None = None,
|
||||
qwen_session_metadata: dict | None = None,
|
||||
**context,
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
"""Qwen metadata goes to top-level api_kwargs, not extra_body."""
|
||||
top_level = {}
|
||||
if qwen_session_metadata:
|
||||
top_level["metadata"] = qwen_session_metadata
|
||||
return {}, top_level
|
||||
|
||||
|
||||
qwen = QwenProfile(
|
||||
name="qwen-oauth",
|
||||
aliases=("qwen", "qwen-portal", "qwen-cli"),
|
||||
env_vars=("QWEN_API_KEY",),
|
||||
base_url="https://portal.qwen.ai/v1",
|
||||
auth_type="oauth_external",
|
||||
default_max_tokens=65536,
|
||||
)
|
||||
|
||||
register_provider(qwen)
|
||||
14
providers/stepfun.py
Normal file
14
providers/stepfun.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
"""StepFun provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
stepfun = ProviderProfile(
|
||||
name="stepfun",
|
||||
aliases=("step", "stepfun-coding-plan"),
|
||||
default_aux_model="step-3.5-flash",
|
||||
env_vars=("STEPFUN_API_KEY",),
|
||||
base_url="https://api.stepfun.ai/step_plan/v1",
|
||||
)
|
||||
|
||||
register_provider(stepfun)
|
||||
43
providers/vercel.py
Normal file
43
providers/vercel.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
"""Vercel AI Gateway provider profile.
|
||||
|
||||
AI Gateway routes to multiple backends. Hermes sends attribution
|
||||
headers and full reasoning config passthrough.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
|
||||
class VercelAIGatewayProfile(ProviderProfile):
|
||||
"""Vercel AI Gateway — attribution headers + reasoning passthrough."""
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self,
|
||||
*,
|
||||
reasoning_config: dict | None = None,
|
||||
supports_reasoning: bool = True,
|
||||
**ctx: Any,
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
extra_body: dict[str, Any] = {}
|
||||
if supports_reasoning and reasoning_config is not None:
|
||||
extra_body["reasoning"] = dict(reasoning_config)
|
||||
elif supports_reasoning:
|
||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||
return extra_body, {}
|
||||
|
||||
|
||||
vercel = VercelAIGatewayProfile(
|
||||
name="ai-gateway",
|
||||
aliases=("vercel", "vercel-ai-gateway", "ai_gateway", "aigateway"),
|
||||
env_vars=("AI_GATEWAY_API_KEY",),
|
||||
base_url="https://ai-gateway.vercel.sh/v1",
|
||||
default_headers={
|
||||
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||
"X-Title": "Hermes Agent",
|
||||
},
|
||||
default_aux_model="google/gemini-3-flash",
|
||||
)
|
||||
|
||||
register_provider(vercel)
|
||||
15
providers/xai.py
Normal file
15
providers/xai.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
"""xAI (Grok) provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
xai = ProviderProfile(
|
||||
name="xai",
|
||||
aliases=("grok", "x-ai", "x.ai"),
|
||||
api_mode="codex_responses",
|
||||
env_vars=("XAI_API_KEY",),
|
||||
base_url="https://api.x.ai/v1",
|
||||
auth_type="api_key",
|
||||
)
|
||||
|
||||
register_provider(xai)
|
||||
13
providers/xiaomi.py
Normal file
13
providers/xiaomi.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
"""Xiaomi MiMo provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
xiaomi = ProviderProfile(
|
||||
name="xiaomi",
|
||||
aliases=("mimo", "xiaomi-mimo"),
|
||||
env_vars=("XIAOMI_API_KEY",),
|
||||
base_url="https://api.xiaomimimo.com/v1",
|
||||
)
|
||||
|
||||
register_provider(xiaomi)
|
||||
21
providers/zai.py
Normal file
21
providers/zai.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
"""ZAI / GLM provider profile."""
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
zai = ProviderProfile(
|
||||
name="zai",
|
||||
aliases=("glm", "z-ai", "z.ai", "zhipu"),
|
||||
env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
|
||||
display_name="Z.AI (GLM)",
|
||||
description="Z.AI / GLM — Zhipu AI models",
|
||||
signup_url="https://z.ai/",
|
||||
fallback_models=(
|
||||
"glm-5",
|
||||
"glm-4-9b",
|
||||
),
|
||||
base_url="https://api.z.ai/api/paas/v4",
|
||||
default_aux_model="glm-4.5-flash",
|
||||
)
|
||||
|
||||
register_provider(zai)
|
||||
Loading…
Add table
Add a link
Reference in a new issue