mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(providers): add native NVIDIA NIM provider
Adds NVIDIA NIM as a first-class provider: ProviderConfig in auth.py, HermesOverlay in providers.py, curated models (Nemotron plus other open source models hosted on build.nvidia.com), URL mapping in model_metadata.py, aliases (nim, nvidia-nim, build-nvidia, nemotron), and env var tests. Docs updated: providers page, quickstart table, fallback providers table, and README provider list.
This commit is contained in:
parent
cc3aa76675
commit
3b569ff576
9 changed files with 67 additions and 1 deletions
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
|
||||
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
|
||||
|
||||
<table>
|
||||
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
|||
"mimo", "xiaomi-mimo",
|
||||
"arcee-ai", "arceeai",
|
||||
"xai", "x-ai", "x.ai", "grok",
|
||||
"nvidia", "nim", "nvidia-nim", "nemotron",
|
||||
"qwen-portal",
|
||||
})
|
||||
|
||||
|
|
@ -240,6 +241,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
|||
"api.fireworks.ai": "fireworks",
|
||||
"opencode.ai": "opencode-go",
|
||||
"api.x.ai": "xai",
|
||||
"integrate.api.nvidia.com": "nvidia",
|
||||
"api.xiaomimimo.com": "xiaomi",
|
||||
"xiaomimimo.com": "xiaomi",
|
||||
"ollama.com": "ollama-cloud",
|
||||
|
|
|
|||
|
|
@ -233,6 +233,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
|||
api_key_env_vars=("XAI_API_KEY",),
|
||||
base_url_env_var="XAI_BASE_URL",
|
||||
),
|
||||
"nvidia": ProviderConfig(
|
||||
id="nvidia",
|
||||
name="NVIDIA NIM",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://integrate.api.nvidia.com/v1",
|
||||
api_key_env_vars=("NVIDIA_API_KEY",),
|
||||
base_url_env_var="NVIDIA_BASE_URL",
|
||||
),
|
||||
"ai-gateway": ProviderConfig(
|
||||
id="ai-gateway",
|
||||
name="Vercel AI Gateway",
|
||||
|
|
|
|||
|
|
@ -155,6 +155,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"grok-4.20-reasoning",
|
||||
"grok-4-1-fast-reasoning",
|
||||
],
|
||||
"nvidia": [
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
"nvidia/nemotron-3-nano-8b-a4b",
|
||||
"z-ai/glm5",
|
||||
"moonshotai/kimi-k2.5",
|
||||
"minimaxai/minimax-m2.5",
|
||||
],
|
||||
"kimi-coding": [
|
||||
"kimi-k2.5",
|
||||
"kimi-for-coding",
|
||||
|
|
@ -544,6 +551,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
|||
ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"),
|
||||
ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
|
||||
ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"),
|
||||
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
|
||||
ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"),
|
||||
ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
|
||||
ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"),
|
||||
|
|
@ -618,6 +626,10 @@ _PROVIDER_ALIASES = {
|
|||
"grok": "xai",
|
||||
"x-ai": "xai",
|
||||
"x.ai": "xai",
|
||||
"nim": "nvidia",
|
||||
"nvidia-nim": "nvidia",
|
||||
"build-nvidia": "nvidia",
|
||||
"nemotron": "nvidia",
|
||||
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
|
||||
"ollama_cloud": "ollama-cloud",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -137,6 +137,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
|||
base_url_override="https://api.x.ai/v1",
|
||||
base_url_env_var="XAI_BASE_URL",
|
||||
),
|
||||
"nvidia": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_override="https://integrate.api.nvidia.com/v1",
|
||||
base_url_env_var="NVIDIA_BASE_URL",
|
||||
),
|
||||
"xiaomi": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_env_var="XIAOMI_BASE_URL",
|
||||
|
|
@ -191,6 +196,12 @@ ALIASES: Dict[str, str] = {
|
|||
"x.ai": "xai",
|
||||
"grok": "xai",
|
||||
|
||||
# nvidia
|
||||
"nim": "nvidia",
|
||||
"nvidia-nim": "nvidia",
|
||||
"build-nvidia": "nvidia",
|
||||
"nemotron": "nvidia",
|
||||
|
||||
# kimi-for-coding (models.dev ID)
|
||||
"kimi": "kimi-for-coding",
|
||||
"kimi-coding": "kimi-for-coding",
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ class TestProviderRegistry:
|
|||
("huggingface", "Hugging Face", "api_key"),
|
||||
("zai", "Z.AI / GLM", "api_key"),
|
||||
("xai", "xAI", "api_key"),
|
||||
("nvidia", "NVIDIA NIM", "api_key"),
|
||||
("kimi-coding", "Kimi / Moonshot", "api_key"),
|
||||
("minimax", "MiniMax", "api_key"),
|
||||
("minimax-cn", "MiniMax (China)", "api_key"),
|
||||
|
|
@ -57,6 +58,12 @@ class TestProviderRegistry:
|
|||
assert pconfig.base_url_env_var == "XAI_BASE_URL"
|
||||
assert pconfig.inference_base_url == "https://api.x.ai/v1"
|
||||
|
||||
def test_nvidia_env_vars(self):
|
||||
pconfig = PROVIDER_REGISTRY["nvidia"]
|
||||
assert pconfig.api_key_env_vars == ("NVIDIA_API_KEY",)
|
||||
assert pconfig.base_url_env_var == "NVIDIA_BASE_URL"
|
||||
assert pconfig.inference_base_url == "https://integrate.api.nvidia.com/v1"
|
||||
|
||||
def test_copilot_env_vars(self):
|
||||
pconfig = PROVIDER_REGISTRY["copilot"]
|
||||
assert pconfig.api_key_env_vars == ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN")
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ hermes setup # Or configure everything at once
|
|||
| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
|
||||
| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
|
||||
| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
|
||||
| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
|
||||
| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
|
||||
| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
|
||||
| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
|
||||
|
|
|
|||
|
|
@ -295,6 +295,30 @@ When using xAI as a provider (any base URL containing `x.ai`), Hermes automatica
|
|||
|
||||
No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations.
|
||||
|
||||
### NVIDIA NIM
|
||||
|
||||
Nemotron and other open source models via [build.nvidia.com](https://build.nvidia.com) (free API key) or a local NIM endpoint.
|
||||
|
||||
```bash
|
||||
# Cloud (build.nvidia.com)
|
||||
hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b
|
||||
# Requires: NVIDIA_API_KEY in ~/.hermes/.env
|
||||
|
||||
# Local NIM endpoint — override base URL
|
||||
NVIDIA_BASE_URL=http://localhost:8000/v1 hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b
|
||||
```
|
||||
|
||||
Or set it permanently in `config.yaml`:
|
||||
```yaml
|
||||
model:
|
||||
provider: "nvidia"
|
||||
default: "nvidia/nemotron-3-super-120b-a12b"
|
||||
```
|
||||
|
||||
:::tip Local NIM
|
||||
For on-prem deployments (DGX Spark, local GPU), set `NVIDIA_BASE_URL=http://localhost:8000/v1`. NIM exposes the same OpenAI-compatible chat completions API as build.nvidia.com, so switching between cloud and local is a one-line env-var change.
|
||||
:::
|
||||
|
||||
### Hugging Face Inference Providers
|
||||
|
||||
[Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover.
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
|
|||
| MiniMax | `minimax` | `MINIMAX_API_KEY` |
|
||||
| MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` |
|
||||
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
|
||||
| NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
|
||||
| OpenCode Zen | `opencode-zen` | `OPENCODE_ZEN_API_KEY` |
|
||||
| OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` |
|
||||
| Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue