diff --git a/agent/model_metadata.py b/agent/model_metadata.py index ec0e3540f..b30af6e48 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -159,6 +159,8 @@ DEFAULT_CONTEXT_LENGTHS = { "grok": 131072, # catch-all (grok-beta, unknown grok-*) # Kimi "kimi": 262144, + # Nemotron — NVIDIA's open-weights series (128K context across all sizes) + "nemotron": 131072, # Arcee "trinity": 262144, # OpenRouter diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 8c0484abd..20b54b788 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -24,6 +24,7 @@ model: # "minimax" - MiniMax global (requires: MINIMAX_API_KEY) # "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY) # "huggingface" - Hugging Face Inference (requires: HF_TOKEN) + # "nvidia" - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY) # "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY) # "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY) # "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index c7df03370..f08e29266 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -861,6 +861,22 @@ OPTIONAL_ENV_VARS = { "category": "provider", "advanced": True, }, + "NVIDIA_API_KEY": { + "description": "NVIDIA NIM API key (build.nvidia.com or local NIM endpoint)", + "prompt": "NVIDIA NIM API key", + "url": "https://build.nvidia.com/", + "password": True, + "category": "provider", + "advanced": True, + }, + "NVIDIA_BASE_URL": { + "description": "NVIDIA NIM base URL override (e.g. http://localhost:8000/v1 for local NIM)", + "prompt": "NVIDIA NIM base URL (leave empty for default)", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, "GLM_API_KEY": { "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)", "prompt": "Z.AI / GLM API key", diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index d044ddf4c..28c4af1fa 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -825,6 +825,7 @@ def run_doctor(args): ("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True), ("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True), ("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True), + ("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True), ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True), # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does. ("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True), diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py index a52079085..ae8ecc641 100644 --- a/hermes_cli/dump.py +++ b/hermes_cli/dump.py @@ -296,6 +296,7 @@ def run_dump(args): ("DEEPSEEK_API_KEY", "deepseek"), ("DASHSCOPE_API_KEY", "dashscope"), ("HF_TOKEN", "huggingface"), + ("NVIDIA_API_KEY", "nvidia"), ("AI_GATEWAY_API_KEY", "ai_gateway"), ("OPENCODE_ZEN_API_KEY", "opencode_zen"), ("OPENCODE_GO_API_KEY", "opencode_go"), diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 53f59a210..10597db15 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1143,7 +1143,7 @@ def select_provider_and_model(args=None): _model_flow_kimi(config, current_model) elif selected_provider == "bedrock": _model_flow_bedrock(config, current_model) - elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "ollama-cloud"): + elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "nvidia", "ollama-cloud"): _model_flow_api_key_provider(config, selected_provider, current_model) # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ────────────── @@ -4954,7 +4954,7 @@ For more help on a command: ) chat_parser.add_argument( "--provider", - choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"], + choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee", "nvidia"], default=None, help="Inference provider (default: auto)" ) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 6ec5c750b..7a897cb79 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -156,11 +156,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "grok-4-1-fast-reasoning", ], "nvidia": [ + # NVIDIA flagship reasoning models "nvidia/nemotron-3-super-120b-a12b", - "nvidia/nemotron-3-nano-8b-a4b", - "z-ai/glm5", + "nvidia/nemotron-3-nano-30b-a3b", + "nvidia/llama-3.3-nemotron-super-49b-v1.5", + # Third-party agentic models hosted on build.nvidia.com + # (map to OpenRouter defaults — users get familiar picks on NIM) + "qwen/qwen3.5-397b-a17b", + "deepseek-ai/deepseek-v3.2", "moonshotai/kimi-k2.5", "minimaxai/minimax-m2.5", + "z-ai/glm5", + "openai/gpt-oss-120b", ], "kimi-coding": [ "kimi-k2.5", @@ -543,6 +550,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"), ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"), ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"), + ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"), ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"), ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"), ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"), @@ -551,7 +559,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"), ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"), ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"), - ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"), ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"), ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"), ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"), diff --git a/scripts/release.py b/scripts/release.py index 028f75ba6..880aebef9 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -256,6 +256,7 @@ AUTHOR_MAP = { "anthhub@163.com": "anthhub", "shenuu@gmail.com": "shenuu", "xiayh17@gmail.com": "xiayh0107", + "asurla@nvidia.com": "anniesurla", } diff --git a/tests/tools/test_local_env_blocklist.py b/tests/tools/test_local_env_blocklist.py index b196cea78..0377d59b3 100644 --- a/tests/tools/test_local_env_blocklist.py +++ b/tests/tools/test_local_env_blocklist.py @@ -86,6 +86,7 @@ class TestProviderEnvBlocklist: "MINIMAX_API_KEY": "mm-key", "MINIMAX_CN_API_KEY": "mmcn-key", "DEEPSEEK_API_KEY": "deepseek-key", + "NVIDIA_API_KEY": "nvidia-key", } result_env = _run_with_env(extra_os_env=registry_vars)