diff --git a/README.md b/README.md index 07a140419..088c3b91b 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM. -Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in. +Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in. diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 089fd132a..ec0e3540f 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -38,6 +38,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "mimo", "xiaomi-mimo", "arcee-ai", "arceeai", "xai", "x-ai", "x.ai", "grok", + "nvidia", "nim", "nvidia-nim", "nemotron", "qwen-portal", }) @@ -240,6 +241,7 @@ _URL_TO_PROVIDER: Dict[str, str] = { "api.fireworks.ai": "fireworks", "opencode.ai": "opencode-go", "api.x.ai": "xai", + "integrate.api.nvidia.com": "nvidia", "api.xiaomimimo.com": "xiaomi", "xiaomimimo.com": "xiaomi", "ollama.com": "ollama-cloud", diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index e79a6dca6..421836c23 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -233,6 +233,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("XAI_API_KEY",), base_url_env_var="XAI_BASE_URL", ), + "nvidia": ProviderConfig( + id="nvidia", + name="NVIDIA NIM", + auth_type="api_key", + inference_base_url="https://integrate.api.nvidia.com/v1", + api_key_env_vars=("NVIDIA_API_KEY",), + base_url_env_var="NVIDIA_BASE_URL", + ), "ai-gateway": ProviderConfig( id="ai-gateway", name="Vercel AI Gateway", diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 5b998ddc6..6ec5c750b 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -155,6 +155,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "grok-4.20-reasoning", "grok-4-1-fast-reasoning", ], + "nvidia": [ + "nvidia/nemotron-3-super-120b-a12b", + "nvidia/nemotron-3-nano-8b-a4b", + "z-ai/glm5", + "moonshotai/kimi-k2.5", + "minimaxai/minimax-m2.5", + ], "kimi-coding": [ "kimi-k2.5", "kimi-for-coding", @@ -544,6 +551,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"), ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"), ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"), + ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"), ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"), ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"), ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"), @@ -618,6 +626,10 @@ _PROVIDER_ALIASES = { "grok": "xai", "x-ai": "xai", "x.ai": "xai", + "nim": "nvidia", + "nvidia-nim": "nvidia", + "build-nvidia": "nvidia", + "nemotron": "nvidia", "ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud "ollama_cloud": "ollama-cloud", } diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index b2dda20be..a71055cfe 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -137,6 +137,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { base_url_override="https://api.x.ai/v1", base_url_env_var="XAI_BASE_URL", ), + "nvidia": HermesOverlay( + transport="openai_chat", + base_url_override="https://integrate.api.nvidia.com/v1", + base_url_env_var="NVIDIA_BASE_URL", + ), "xiaomi": HermesOverlay( transport="openai_chat", base_url_env_var="XIAOMI_BASE_URL", @@ -191,6 +196,12 @@ ALIASES: Dict[str, str] = { "x.ai": "xai", "grok": "xai", + # nvidia + "nim": "nvidia", + "nvidia-nim": "nvidia", + "build-nvidia": "nvidia", + "nemotron": "nvidia", + # kimi-for-coding (models.dev ID) "kimi": "kimi-for-coding", "kimi-coding": "kimi-for-coding", diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py index 97deab89e..c56edc4bb 100644 --- a/tests/hermes_cli/test_api_key_providers.py +++ b/tests/hermes_cli/test_api_key_providers.py @@ -33,6 +33,7 @@ class TestProviderRegistry: ("huggingface", "Hugging Face", "api_key"), ("zai", "Z.AI / GLM", "api_key"), ("xai", "xAI", "api_key"), + ("nvidia", "NVIDIA NIM", "api_key"), ("kimi-coding", "Kimi / Moonshot", "api_key"), ("minimax", "MiniMax", "api_key"), ("minimax-cn", "MiniMax (China)", "api_key"), @@ -57,6 +58,12 @@ class TestProviderRegistry: assert pconfig.base_url_env_var == "XAI_BASE_URL" assert pconfig.inference_base_url == "https://api.x.ai/v1" + def test_nvidia_env_vars(self): + pconfig = PROVIDER_REGISTRY["nvidia"] + assert pconfig.api_key_env_vars == ("NVIDIA_API_KEY",) + assert pconfig.base_url_env_var == "NVIDIA_BASE_URL" + assert pconfig.inference_base_url == "https://integrate.api.nvidia.com/v1" + def test_copilot_env_vars(self): pconfig = PROVIDER_REGISTRY["copilot"] assert pconfig.api_key_env_vars == ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN") diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md index 880c01cb2..bda74b9ed 100644 --- a/website/docs/getting-started/quickstart.md +++ b/website/docs/getting-started/quickstart.md @@ -61,6 +61,7 @@ hermes setup # Or configure everything at once | **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` | | **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` | | **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` | +| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) | | **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` | | **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) | | **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` | diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index e3d0ad828..750ad671c 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -295,6 +295,30 @@ When using xAI as a provider (any base URL containing `x.ai`), Hermes automatica No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations. +### NVIDIA NIM + +Nemotron and other open source models via [build.nvidia.com](https://build.nvidia.com) (free API key) or a local NIM endpoint. + +```bash +# Cloud (build.nvidia.com) +hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b +# Requires: NVIDIA_API_KEY in ~/.hermes/.env + +# Local NIM endpoint — override base URL +NVIDIA_BASE_URL=http://localhost:8000/v1 hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b +``` + +Or set it permanently in `config.yaml`: +```yaml +model: + provider: "nvidia" + default: "nvidia/nemotron-3-super-120b-a12b" +``` + +:::tip Local NIM +For on-prem deployments (DGX Spark, local GPU), set `NVIDIA_BASE_URL=http://localhost:8000/v1`. NIM exposes the same OpenAI-compatible chat completions API as build.nvidia.com, so switching between cloud and local is a one-line env-var change. +::: + ### Hugging Face Inference Providers [Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover. diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index 1e2b2a803..12fde185d 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -47,6 +47,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback | MiniMax | `minimax` | `MINIMAX_API_KEY` | | MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` | | DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` | +| NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) | | OpenCode Zen | `opencode-zen` | `OPENCODE_ZEN_API_KEY` | | OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` | | Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
A real terminal interfaceFull TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.