diff --git a/README.md b/README.md
index 07a140419..088c3b91b 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
-Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
| A real terminal interface | Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output. |
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 089fd132a..ec0e3540f 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -38,6 +38,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
"xai", "x-ai", "x.ai", "grok",
+ "nvidia", "nim", "nvidia-nim", "nemotron",
"qwen-portal",
})
@@ -240,6 +241,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.fireworks.ai": "fireworks",
"opencode.ai": "opencode-go",
"api.x.ai": "xai",
+ "integrate.api.nvidia.com": "nvidia",
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"ollama.com": "ollama-cloud",
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index e79a6dca6..421836c23 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -233,6 +233,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("XAI_API_KEY",),
base_url_env_var="XAI_BASE_URL",
),
+ "nvidia": ProviderConfig(
+ id="nvidia",
+ name="NVIDIA NIM",
+ auth_type="api_key",
+ inference_base_url="https://integrate.api.nvidia.com/v1",
+ api_key_env_vars=("NVIDIA_API_KEY",),
+ base_url_env_var="NVIDIA_BASE_URL",
+ ),
"ai-gateway": ProviderConfig(
id="ai-gateway",
name="Vercel AI Gateway",
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 5b998ddc6..6ec5c750b 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -155,6 +155,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"grok-4.20-reasoning",
"grok-4-1-fast-reasoning",
],
+ "nvidia": [
+ "nvidia/nemotron-3-super-120b-a12b",
+ "nvidia/nemotron-3-nano-8b-a4b",
+ "z-ai/glm5",
+ "moonshotai/kimi-k2.5",
+ "minimaxai/minimax-m2.5",
+ ],
"kimi-coding": [
"kimi-k2.5",
"kimi-for-coding",
@@ -544,6 +551,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"),
ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"),
+ ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"),
ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"),
@@ -618,6 +626,10 @@ _PROVIDER_ALIASES = {
"grok": "xai",
"x-ai": "xai",
"x.ai": "xai",
+ "nim": "nvidia",
+ "nvidia-nim": "nvidia",
+ "build-nvidia": "nvidia",
+ "nemotron": "nvidia",
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
"ollama_cloud": "ollama-cloud",
}
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index b2dda20be..a71055cfe 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -137,6 +137,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_override="https://api.x.ai/v1",
base_url_env_var="XAI_BASE_URL",
),
+ "nvidia": HermesOverlay(
+ transport="openai_chat",
+ base_url_override="https://integrate.api.nvidia.com/v1",
+ base_url_env_var="NVIDIA_BASE_URL",
+ ),
"xiaomi": HermesOverlay(
transport="openai_chat",
base_url_env_var="XIAOMI_BASE_URL",
@@ -191,6 +196,12 @@ ALIASES: Dict[str, str] = {
"x.ai": "xai",
"grok": "xai",
+ # nvidia
+ "nim": "nvidia",
+ "nvidia-nim": "nvidia",
+ "build-nvidia": "nvidia",
+ "nemotron": "nvidia",
+
# kimi-for-coding (models.dev ID)
"kimi": "kimi-for-coding",
"kimi-coding": "kimi-for-coding",
diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
index 97deab89e..c56edc4bb 100644
--- a/tests/hermes_cli/test_api_key_providers.py
+++ b/tests/hermes_cli/test_api_key_providers.py
@@ -33,6 +33,7 @@ class TestProviderRegistry:
("huggingface", "Hugging Face", "api_key"),
("zai", "Z.AI / GLM", "api_key"),
("xai", "xAI", "api_key"),
+ ("nvidia", "NVIDIA NIM", "api_key"),
("kimi-coding", "Kimi / Moonshot", "api_key"),
("minimax", "MiniMax", "api_key"),
("minimax-cn", "MiniMax (China)", "api_key"),
@@ -57,6 +58,12 @@ class TestProviderRegistry:
assert pconfig.base_url_env_var == "XAI_BASE_URL"
assert pconfig.inference_base_url == "https://api.x.ai/v1"
+ def test_nvidia_env_vars(self):
+ pconfig = PROVIDER_REGISTRY["nvidia"]
+ assert pconfig.api_key_env_vars == ("NVIDIA_API_KEY",)
+ assert pconfig.base_url_env_var == "NVIDIA_BASE_URL"
+ assert pconfig.inference_base_url == "https://integrate.api.nvidia.com/v1"
+
def test_copilot_env_vars(self):
pconfig = PROVIDER_REGISTRY["copilot"]
assert pconfig.api_key_env_vars == ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN")
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index 880c01cb2..bda74b9ed 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -61,6 +61,7 @@ hermes setup # Or configure everything at once
| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
+| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index e3d0ad828..750ad671c 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -295,6 +295,30 @@ When using xAI as a provider (any base URL containing `x.ai`), Hermes automatica
No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations.
+### NVIDIA NIM
+
+Nemotron and other open source models via [build.nvidia.com](https://build.nvidia.com) (free API key) or a local NIM endpoint.
+
+```bash
+# Cloud (build.nvidia.com)
+hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b
+# Requires: NVIDIA_API_KEY in ~/.hermes/.env
+
+# Local NIM endpoint — override base URL
+NVIDIA_BASE_URL=http://localhost:8000/v1 hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b
+```
+
+Or set it permanently in `config.yaml`:
+```yaml
+model:
+ provider: "nvidia"
+ default: "nvidia/nemotron-3-super-120b-a12b"
+```
+
+:::tip Local NIM
+For on-prem deployments (DGX Spark, local GPU), set `NVIDIA_BASE_URL=http://localhost:8000/v1`. NIM exposes the same OpenAI-compatible chat completions API as build.nvidia.com, so switching between cloud and local is a one-line env-var change.
+:::
+
### Hugging Face Inference Providers
[Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover.
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index 1e2b2a803..12fde185d 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -47,6 +47,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
| MiniMax | `minimax` | `MINIMAX_API_KEY` |
| MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` |
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
+| NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
| OpenCode Zen | `opencode-zen` | `OPENCODE_ZEN_API_KEY` |
| OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` |
| Kilo Code | `kilocode` | `KILOCODE_API_KEY` |