From c76e8795744a00208c683b2c6319902416bce1a8 Mon Sep 17 00:00:00 2001 From: Alex-wuhu Date: Fri, 10 Apr 2026 22:22:47 +0800 Subject: [PATCH] feat: add NovitaAI as LLM provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add NovitaAI as a first-class provider with dedicated model selection flow, live pricing, and authoritative context length resolution. - Register provider in PROVIDER_REGISTRY, HERMES_OVERLAYS, and all alias/label maps (ID: novita, aliases: novita-ai, novitaai) - Add dedicated _model_flow_novita() with 3-tier model list fallback: Novita API → models.dev → static curated list - Fetch live pricing from /v1/models with correct unit conversion (input_token_price_per_m is 0.0001 USD per Mtok) - Add Novita-specific context length resolution (step 4b) in get_model_context_length(), prioritized over models.dev/OpenRouter - Register api.novita.ai in _URL_TO_PROVIDER to prevent early return from the custom-endpoint code path - Add models.dev mapping (novita → novita-ai) - Add default auxiliary model (deepseek/deepseek-v3-0324) - Add NOVITA_API_KEY to test isolation (conftest.py) - Update docs: providers page, env vars reference, CLI reference, .env.example, README, and landing page --- .env.example | 8 +++ README.md | 2 +- agent/model_metadata.py | 25 +++++++- agent/models_dev.py | 1 + hermes_cli/main.py | 33 ++++++++++- hermes_cli/models.py | 59 ++++++++++++++++++- hermes_cli/providers.py | 9 +++ plugins/model-providers/novita/__init__.py | 27 +++++++++ plugins/model-providers/novita/plugin.yaml | 5 ++ website/docs/integrations/providers.md | 24 ++++++++ website/docs/reference/cli-commands.md | 2 +- .../docs/reference/environment-variables.md | 4 +- 12 files changed, 192 insertions(+), 7 deletions(-) create mode 100644 plugins/model-providers/novita/__init__.py create mode 100644 plugins/model-providers/novita/plugin.yaml diff --git a/.env.example b/.env.example index e6763f18fd2..4dfa7a4e284 100644 --- a/.env.example +++ b/.env.example @@ -14,6 +14,14 @@ # LLM_MODEL is no longer read from .env — this line is kept for reference only. # LLM_MODEL=anthropic/claude-opus-4.6 +# ============================================================================= +# LLM PROVIDER (NovitaAI) +# ============================================================================= +# NovitaAI — multi-model aggregator with pay-per-use pricing +# Get your key at: https://novita.ai/settings/key-management +# NOVITA_API_KEY= +# NOVITA_BASE_URL=https://api.novita.ai/openai/v1 # Override default base URL + # ============================================================================= # LLM PROVIDER (Google AI Studio / Gemini) # ============================================================================= diff --git a/README.md b/README.md index 8b8a078b250..dc44df02232 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM. -Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in. +Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NovitaAI](https://novita.ai) (multi-model, pay-per-use), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in. diff --git a/agent/model_metadata.py b/agent/model_metadata.py index f5e34fc18c6..a10a01e3cc2 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -47,7 +47,7 @@ def _resolve_requests_verify() -> bool | str: _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "openrouter", "nous", "openai-codex", "copilot", "copilot-acp", "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek", - "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba", + "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba", "novita", "qwen-oauth", "xiaomi", "arcee", @@ -66,7 +66,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "gmi-cloud", "gmicloud", "xai", "x-ai", "x.ai", "grok", "nvidia", "nim", "nvidia-nim", "nemotron", - "qwen-portal", + "qwen-portal", "novita-ai", "novitaai", }) @@ -104,6 +104,8 @@ def _strip_provider_prefix(model: str) -> str: _model_metadata_cache: Dict[str, Dict[str, Any]] = {} _model_metadata_cache_time: float = 0 +_novita_metadata_cache: Dict[str, Dict[str, Any]] = {} +_novita_metadata_cache_time: float = 0 _MODEL_CACHE_TTL = 3600 _endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {} _endpoint_model_metadata_cache_time: Dict[str, float] = {} @@ -285,6 +287,7 @@ def grok_supports_reasoning_effort(model: str) -> bool: _CONTEXT_LENGTH_KEYS = ( "context_length", "context_window", + "context_size", "max_context_length", "max_position_embeddings", "max_model_len", @@ -361,6 +364,7 @@ _URL_TO_PROVIDER: Dict[str, str] = { "api.xiaomimimo.com": "xiaomi", "xiaomimimo.com": "xiaomi", "api.gmi-serving.com": "gmi", + "api.novita.ai": "novita", "tokenhub.tencentmaas.com": "tencent-tokenhub", "ollama.com": "ollama-cloud", } @@ -557,6 +561,16 @@ def _extract_max_completion_tokens(payload: Dict[str, Any]) -> Optional[int]: def _extract_pricing(payload: Dict[str, Any]) -> Dict[str, Any]: + novita_input = payload.get("input_token_price_per_m") + novita_output = payload.get("output_token_price_per_m") + if novita_input is not None or novita_output is not None: + pricing: Dict[str, Any] = {} + if novita_input is not None: + pricing["prompt"] = str(float(novita_input) / 10_000 / 1_000_000) + if novita_output is not None: + pricing["completion"] = str(float(novita_output) / 10_000 / 1_000_000) + return pricing + alias_map = { "prompt": ("prompt", "input", "input_cost_per_token", "prompt_token_cost"), "completion": ("completion", "output", "output_cost_per_token", "completion_token_cost"), @@ -1527,6 +1541,13 @@ def get_model_context_length( except ImportError: pass # boto3 not installed — fall through to generic resolution + if provider == "novita" or (base_url and base_url_host_matches(base_url, "api.novita.ai")): + ctx = _resolve_endpoint_context_length(model, base_url or "https://api.novita.ai/openai/v1", api_key=api_key) + if ctx is not None: + if base_url: + save_context_length(model, base_url, ctx) + return ctx + # 2. Active endpoint metadata for truly custom/unknown endpoints. # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their # /models endpoint may report a provider-imposed limit (e.g. Copilot diff --git a/agent/models_dev.py b/agent/models_dev.py index d709d7176d4..8fabb276645 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -141,6 +141,7 @@ class ProviderInfo: # Hermes provider names → models.dev provider IDs PROVIDER_TO_MODELS_DEV: Dict[str, str] = { "openrouter": "openrouter", + "novita": "novita-ai", "anthropic": "anthropic", "openai": "openai", "openai-codex": "openai", diff --git a/hermes_cli/main.py b/hermes_cli/main.py index c93fa485c98..4683c8f3126 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -4970,6 +4970,37 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): ) if model_list: print(f" Found {len(model_list)} model(s) from Ollama Cloud") + elif provider_id == "novita": + from hermes_cli.models import fetch_api_models + + api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") + curated = _PROVIDER_MODELS.get(provider_id, []) + live_models = fetch_api_models(api_key_for_probe, effective_base) + if live_models: + model_list = live_models + print(f" Found {len(model_list)} model(s) from {pconfig.name} API") + else: + mdev_models: list = [] + try: + from agent.models_dev import list_agentic_models + + mdev_models = list_agentic_models(provider_id) + except Exception: + pass + if mdev_models: + seen = {m.lower() for m in mdev_models} + model_list = list(mdev_models) + for m in curated: + if m.lower() not in seen: + model_list.append(m) + seen.add(m.lower()) + print(f" Found {len(model_list)} model(s) from models.dev registry") + else: + model_list = curated + if model_list: + print( + f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.' + ) else: curated = _PROVIDER_MODELS.get(provider_id, []) @@ -9269,7 +9300,7 @@ def _build_provider_choices() -> list[str]: "auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", - "stepfun", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee", + "stepfun", "minimax", "minimax-cn", "kilocode", "novita", "xiaomi", "arcee", "nvidia", "deepseek", "alibaba", "qwen-oauth", "opencode-zen", "opencode-go", ] diff --git a/hermes_cli/models.py b/hermes_cli/models.py index eb55b59ee5d..b3d2e1cd81b 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -445,6 +445,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = { # Azure Foundry: user-provided endpoint and model. # Empty list because models depend on the endpoint configuration. "azure-foundry": [], + "novita": [ + "moonshotai/kimi-k2.5", + "minimax/minimax-m2.7", + "zai-org/glm-5", + "deepseek/deepseek-v3-0324", + "deepseek/deepseek-r1-0528", + "qwen/qwen3-235b-a22b-fp8", + ], } # Vercel AI Gateway: derive the bare-model-id catalog from the curated @@ -905,6 +913,7 @@ class ProviderEntry(NamedTuple): CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"), ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"), + ProviderEntry("novita", "NovitaAI", "NovitaAI (90+ models, pay-per-use)"), ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"), ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"), ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"), @@ -1014,6 +1023,8 @@ _PROVIDER_ALIASES = { "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface", + "novita-ai": "novita", + "novitaai": "novita", "mimo": "xiaomi", "xiaomi-mimo": "xiaomi", "tencent": "tencent-tokenhub", @@ -1494,7 +1505,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]: def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]: - """Return live pricing for providers that support it (openrouter, nous, ai-gateway).""" + """Return live pricing for providers that support it (openrouter, nous, ai-gateway, novita).""" normalized = normalize_provider(provider) if normalized == "openrouter": return fetch_models_with_pricing( @@ -1504,6 +1515,8 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d ) if normalized == "ai-gateway": return fetch_ai_gateway_pricing(force_refresh=force_refresh) + if normalized == "novita": + return _fetch_novita_pricing() if normalized == "nous": api_key, base_url = _resolve_nous_pricing_credentials() if base_url: @@ -1520,6 +1533,50 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d return {} +def _fetch_novita_pricing(timeout: float = 8.0) -> dict[str, dict[str, str]]: + """Fetch pricing from NovitaAI /v1/models. + + NovitaAI returns input/output prices per million tokens in units of + 0.0001 USD. Convert them to the per-token strings used by the shared + pricing formatter. + """ + api_key = os.getenv("NOVITA_API_KEY", "").strip() + if not api_key: + return {} + + base_url = os.getenv("NOVITA_BASE_URL", "").strip() or "https://api.novita.ai/openai/v1" + url = base_url.rstrip("/") + "/models" + headers = { + "Authorization": f"Bearer {api_key}", + "Accept": "application/json", + "User-Agent": _HERMES_USER_AGENT, + } + + try: + req = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(req, timeout=timeout) as resp: + payload = json.loads(resp.read().decode()) + except Exception: + return {} + + result: dict[str, dict[str, str]] = {} + for item in payload.get("data", []): + if not isinstance(item, dict): + continue + mid = item.get("id") + if not mid: + continue + inp = item.get("input_token_price_per_m") + out = item.get("output_token_price_per_m") + if inp is None and out is None: + continue + result[str(mid)] = { + "prompt": str(float(inp or 0) / 10_000 / 1_000_000), + "completion": str(float(out or 0) / 10_000 / 1_000_000), + } + return result + + # All provider IDs and aliases that are valid for the provider:model syntax. _KNOWN_PROVIDER_NAMES: set[str] = ( set(_PROVIDER_LABELS.keys()) diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index f766a50ebf9..08fc173dc69 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -156,6 +156,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { is_aggregator=True, base_url_env_var="HF_BASE_URL", ), + "novita": HermesOverlay( + transport="openai_chat", + is_aggregator=True, + base_url_env_var="NOVITA_BASE_URL", + ), "xai": HermesOverlay( transport="codex_responses", base_url_override="https://api.x.ai/v1", @@ -309,6 +314,10 @@ ALIASES: Dict[str, str] = { "hugging-face": "huggingface", "huggingface-hub": "huggingface", + # novita + "novita-ai": "novita", + "novitaai": "novita", + # xiaomi "mimo": "xiaomi", "xiaomi-mimo": "xiaomi", diff --git a/plugins/model-providers/novita/__init__.py b/plugins/model-providers/novita/__init__.py new file mode 100644 index 00000000000..c39087e52d9 --- /dev/null +++ b/plugins/model-providers/novita/__init__.py @@ -0,0 +1,27 @@ +"""NovitaAI provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + + +novita = ProviderProfile( + name="novita", + aliases=("novita-ai", "novitaai"), + display_name="NovitaAI", + description="NovitaAI — multi-model aggregator with pay-per-use pricing", + signup_url="https://novita.ai/settings/key-management", + env_vars=("NOVITA_API_KEY", "NOVITA_BASE_URL"), + base_url="https://api.novita.ai/openai/v1", + auth_type="api_key", + default_aux_model="deepseek/deepseek-v3-0324", + fallback_models=( + "moonshotai/kimi-k2.5", + "minimax/minimax-m2.7", + "zai-org/glm-5", + "deepseek/deepseek-v3-0324", + "deepseek/deepseek-r1-0528", + "qwen/qwen3-235b-a22b-fp8", + ), +) + +register_provider(novita) diff --git a/plugins/model-providers/novita/plugin.yaml b/plugins/model-providers/novita/plugin.yaml new file mode 100644 index 00000000000..681db199433 --- /dev/null +++ b/plugins/model-providers/novita/plugin.yaml @@ -0,0 +1,5 @@ +name: novita-provider +kind: model-provider +version: 1.0.0 +description: NovitaAI multi-model aggregator +author: Nous Research diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index 93e4ba630d3..28ba035452d 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -20,6 +20,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro | **GitHub Copilot ACP** | `hermes model` (spawns local `copilot --acp --stdio`) | | **Anthropic** | `hermes model` (Claude Max + extra usage credits via OAuth; also supports Anthropic API key or manual setup-token — see note below) | | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` | +| **NovitaAI** | `NOVITA_API_KEY` in `~/.hermes/.env` (provider: `novita`, multi-model aggregator) | | **AI Gateway** | `AI_GATEWAY_API_KEY` in `~/.hermes/.env` (provider: `ai-gateway`) | | **z.ai / GLM** | `GLM_API_KEY` in `~/.hermes/.env` (provider: `zai`) | | **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) | @@ -546,6 +547,29 @@ You can append routing suffixes to model names: `:fastest` (default), `:cheapest The base URL can be overridden with `HF_BASE_URL`. +### NovitaAI + +[NovitaAI](https://novita.ai) is a multi-model aggregator with pay-per-use pricing. Access models from DeepSeek, Kimi, MiniMax, GLM, Qwen, and more through a unified OpenAI-compatible API. + +```bash +# Use any available model +hermes chat --provider novita --model moonshotai/kimi-k2.5 +# Requires: NOVITA_API_KEY in ~/.hermes/.env + +# Short alias +hermes chat --provider novita-ai --model deepseek/deepseek-v3-0324 +``` + +Or set it permanently in `config.yaml`: +```yaml +model: + provider: "novita" + default: "moonshotai/kimi-k2.5" + base_url: "https://api.novita.ai/openai/v1" +``` + +Get your API key at [novita.ai/settings/key-management](https://novita.ai/settings/key-management). The base URL can be overridden with `NOVITA_BASE_URL`. + ## Custom & Self-Hosted LLM Providers Hermes Agent works with **any OpenAI-compatible API endpoint**. If a server implements `/v1/chat/completions`, you can point Hermes at it. This means you can use local models, GPU inference servers, multi-provider routers, or any third-party API. diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 4ce8a331a94..4bb361a987e 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -91,7 +91,7 @@ Common options: | `-q`, `--query "..."` | One-shot, non-interactive prompt. | | `-m`, `--model ` | Override the model for this run. | | `-t`, `--toolsets ` | Enable a comma-separated set of toolsets. | -| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`, `lmstudio`, `stepfun`, `tencent-tokenhub` (alias `tencent`, `tokenhub`). | +| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `novita`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`, `lmstudio`, `stepfun`, `tencent-tokenhub` (alias `tencent`, `tokenhub`). | | `-s`, `--skills ` | Preload one or more skills for the session (can be repeated or comma-separated). | | `-v`, `--verbose` | Verbose output. | | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. | diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 409ddf8fe35..a427c901ce1 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -67,6 +67,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `DASHSCOPE_BASE_URL` | Custom DashScope base URL (default: `https://dashscope-intl.aliyuncs.com/compatible-mode/v1`; use `https://dashscope.aliyuncs.com/compatible-mode/v1` for mainland-China region) | | `DEEPSEEK_API_KEY` | DeepSeek API key for direct DeepSeek access ([platform.deepseek.com](https://platform.deepseek.com/api_keys)) | | `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL | +| `NOVITA_API_KEY` | NovitaAI API key — multi-model aggregator ([novita.ai/settings/key-management](https://novita.ai/settings/key-management)) | +| `NOVITA_BASE_URL` | Override NovitaAI base URL (default: `https://api.novita.ai/openai/v1`) | | `NVIDIA_API_KEY` | NVIDIA NIM API key — Nemotron and open models ([build.nvidia.com](https://build.nvidia.com)) | | `NVIDIA_BASE_URL` | Override NVIDIA base URL (default: `https://integrate.api.nvidia.com/v1`; set to `http://localhost:8000/v1` for a local NIM endpoint) | | `STEPFUN_API_KEY` | StepFun API key — Step-series models ([platform.stepfun.com](https://platform.stepfun.com)) | @@ -103,7 +105,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | Variable | Description | |----------|-------------| -| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth` (browser OAuth login — no API key required; see [MiniMax OAuth guide](../guides/minimax-oauth.md)), `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `tencent-tokenhub` (default: `auto`) | +| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `novita`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth` (browser OAuth login — no API key required; see [MiniMax OAuth guide](../guides/minimax-oauth.md)), `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `tencent-tokenhub` (default: `auto`) | | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) | | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL | | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
A real terminal interfaceFull TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.