diff --git a/.env.example b/.env.example
index e6763f18fd2..4dfa7a4e284 100644
--- a/.env.example
+++ b/.env.example
@@ -14,6 +14,14 @@
# LLM_MODEL is no longer read from .env — this line is kept for reference only.
# LLM_MODEL=anthropic/claude-opus-4.6
+# =============================================================================
+# LLM PROVIDER (NovitaAI)
+# =============================================================================
+# NovitaAI — multi-model aggregator with pay-per-use pricing
+# Get your key at: https://novita.ai/settings/key-management
+# NOVITA_API_KEY=
+# NOVITA_BASE_URL=https://api.novita.ai/openai/v1 # Override default base URL
+
# =============================================================================
# LLM PROVIDER (Google AI Studio / Gemini)
# =============================================================================
diff --git a/README.md b/README.md
index 8b8a078b250..dc44df02232 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
-Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NovitaAI](https://novita.ai) (multi-model, pay-per-use), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
| A real terminal interface | Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output. |
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index f5e34fc18c6..a10a01e3cc2 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -47,7 +47,7 @@ def _resolve_requests_verify() -> bool | str:
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek",
- "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
+ "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba", "novita",
"qwen-oauth",
"xiaomi",
"arcee",
@@ -66,7 +66,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"gmi-cloud", "gmicloud",
"xai", "x-ai", "x.ai", "grok",
"nvidia", "nim", "nvidia-nim", "nemotron",
- "qwen-portal",
+ "qwen-portal", "novita-ai", "novitaai",
})
@@ -104,6 +104,8 @@ def _strip_provider_prefix(model: str) -> str:
_model_metadata_cache: Dict[str, Dict[str, Any]] = {}
_model_metadata_cache_time: float = 0
+_novita_metadata_cache: Dict[str, Dict[str, Any]] = {}
+_novita_metadata_cache_time: float = 0
_MODEL_CACHE_TTL = 3600
_endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
_endpoint_model_metadata_cache_time: Dict[str, float] = {}
@@ -285,6 +287,7 @@ def grok_supports_reasoning_effort(model: str) -> bool:
_CONTEXT_LENGTH_KEYS = (
"context_length",
"context_window",
+ "context_size",
"max_context_length",
"max_position_embeddings",
"max_model_len",
@@ -361,6 +364,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"api.gmi-serving.com": "gmi",
+ "api.novita.ai": "novita",
"tokenhub.tencentmaas.com": "tencent-tokenhub",
"ollama.com": "ollama-cloud",
}
@@ -557,6 +561,16 @@ def _extract_max_completion_tokens(payload: Dict[str, Any]) -> Optional[int]:
def _extract_pricing(payload: Dict[str, Any]) -> Dict[str, Any]:
+ novita_input = payload.get("input_token_price_per_m")
+ novita_output = payload.get("output_token_price_per_m")
+ if novita_input is not None or novita_output is not None:
+ pricing: Dict[str, Any] = {}
+ if novita_input is not None:
+ pricing["prompt"] = str(float(novita_input) / 10_000 / 1_000_000)
+ if novita_output is not None:
+ pricing["completion"] = str(float(novita_output) / 10_000 / 1_000_000)
+ return pricing
+
alias_map = {
"prompt": ("prompt", "input", "input_cost_per_token", "prompt_token_cost"),
"completion": ("completion", "output", "output_cost_per_token", "completion_token_cost"),
@@ -1527,6 +1541,13 @@ def get_model_context_length(
except ImportError:
pass # boto3 not installed — fall through to generic resolution
+ if provider == "novita" or (base_url and base_url_host_matches(base_url, "api.novita.ai")):
+ ctx = _resolve_endpoint_context_length(model, base_url or "https://api.novita.ai/openai/v1", api_key=api_key)
+ if ctx is not None:
+ if base_url:
+ save_context_length(model, base_url, ctx)
+ return ctx
+
# 2. Active endpoint metadata for truly custom/unknown endpoints.
# Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
# /models endpoint may report a provider-imposed limit (e.g. Copilot
diff --git a/agent/models_dev.py b/agent/models_dev.py
index d709d7176d4..8fabb276645 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -141,6 +141,7 @@ class ProviderInfo:
# Hermes provider names → models.dev provider IDs
PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
"openrouter": "openrouter",
+ "novita": "novita-ai",
"anthropic": "anthropic",
"openai": "openai",
"openai-codex": "openai",
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index c93fa485c98..4683c8f3126 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -4970,6 +4970,37 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
)
if model_list:
print(f" Found {len(model_list)} model(s) from Ollama Cloud")
+ elif provider_id == "novita":
+ from hermes_cli.models import fetch_api_models
+
+ api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
+ curated = _PROVIDER_MODELS.get(provider_id, [])
+ live_models = fetch_api_models(api_key_for_probe, effective_base)
+ if live_models:
+ model_list = live_models
+ print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
+ else:
+ mdev_models: list = []
+ try:
+ from agent.models_dev import list_agentic_models
+
+ mdev_models = list_agentic_models(provider_id)
+ except Exception:
+ pass
+ if mdev_models:
+ seen = {m.lower() for m in mdev_models}
+ model_list = list(mdev_models)
+ for m in curated:
+ if m.lower() not in seen:
+ model_list.append(m)
+ seen.add(m.lower())
+ print(f" Found {len(model_list)} model(s) from models.dev registry")
+ else:
+ model_list = curated
+ if model_list:
+ print(
+ f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.'
+ )
else:
curated = _PROVIDER_MODELS.get(provider_id, [])
@@ -9269,7 +9300,7 @@ def _build_provider_choices() -> list[str]:
"auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot",
"anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry",
"ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn",
- "stepfun", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee",
+ "stepfun", "minimax", "minimax-cn", "kilocode", "novita", "xiaomi", "arcee",
"nvidia", "deepseek", "alibaba", "qwen-oauth", "opencode-zen", "opencode-go",
]
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index eb55b59ee5d..b3d2e1cd81b 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -445,6 +445,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
# Azure Foundry: user-provided endpoint and model.
# Empty list because models depend on the endpoint configuration.
"azure-foundry": [],
+ "novita": [
+ "moonshotai/kimi-k2.5",
+ "minimax/minimax-m2.7",
+ "zai-org/glm-5",
+ "deepseek/deepseek-v3-0324",
+ "deepseek/deepseek-r1-0528",
+ "qwen/qwen3-235b-a22b-fp8",
+ ],
}
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
@@ -905,6 +913,7 @@ class ProviderEntry(NamedTuple):
CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
+ ProviderEntry("novita", "NovitaAI", "NovitaAI (90+ models, pay-per-use)"),
ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"),
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
@@ -1014,6 +1023,8 @@ _PROVIDER_ALIASES = {
"hf": "huggingface",
"hugging-face": "huggingface",
"huggingface-hub": "huggingface",
+ "novita-ai": "novita",
+ "novitaai": "novita",
"mimo": "xiaomi",
"xiaomi-mimo": "xiaomi",
"tencent": "tencent-tokenhub",
@@ -1494,7 +1505,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:
def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
- """Return live pricing for providers that support it (openrouter, nous, ai-gateway)."""
+ """Return live pricing for providers that support it (openrouter, nous, ai-gateway, novita)."""
normalized = normalize_provider(provider)
if normalized == "openrouter":
return fetch_models_with_pricing(
@@ -1504,6 +1515,8 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
)
if normalized == "ai-gateway":
return fetch_ai_gateway_pricing(force_refresh=force_refresh)
+ if normalized == "novita":
+ return _fetch_novita_pricing()
if normalized == "nous":
api_key, base_url = _resolve_nous_pricing_credentials()
if base_url:
@@ -1520,6 +1533,50 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
return {}
+def _fetch_novita_pricing(timeout: float = 8.0) -> dict[str, dict[str, str]]:
+ """Fetch pricing from NovitaAI /v1/models.
+
+ NovitaAI returns input/output prices per million tokens in units of
+ 0.0001 USD. Convert them to the per-token strings used by the shared
+ pricing formatter.
+ """
+ api_key = os.getenv("NOVITA_API_KEY", "").strip()
+ if not api_key:
+ return {}
+
+ base_url = os.getenv("NOVITA_BASE_URL", "").strip() or "https://api.novita.ai/openai/v1"
+ url = base_url.rstrip("/") + "/models"
+ headers = {
+ "Authorization": f"Bearer {api_key}",
+ "Accept": "application/json",
+ "User-Agent": _HERMES_USER_AGENT,
+ }
+
+ try:
+ req = urllib.request.Request(url, headers=headers)
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
+ payload = json.loads(resp.read().decode())
+ except Exception:
+ return {}
+
+ result: dict[str, dict[str, str]] = {}
+ for item in payload.get("data", []):
+ if not isinstance(item, dict):
+ continue
+ mid = item.get("id")
+ if not mid:
+ continue
+ inp = item.get("input_token_price_per_m")
+ out = item.get("output_token_price_per_m")
+ if inp is None and out is None:
+ continue
+ result[str(mid)] = {
+ "prompt": str(float(inp or 0) / 10_000 / 1_000_000),
+ "completion": str(float(out or 0) / 10_000 / 1_000_000),
+ }
+ return result
+
+
# All provider IDs and aliases that are valid for the provider:model syntax.
_KNOWN_PROVIDER_NAMES: set[str] = (
set(_PROVIDER_LABELS.keys())
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index f766a50ebf9..08fc173dc69 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -156,6 +156,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
is_aggregator=True,
base_url_env_var="HF_BASE_URL",
),
+ "novita": HermesOverlay(
+ transport="openai_chat",
+ is_aggregator=True,
+ base_url_env_var="NOVITA_BASE_URL",
+ ),
"xai": HermesOverlay(
transport="codex_responses",
base_url_override="https://api.x.ai/v1",
@@ -309,6 +314,10 @@ ALIASES: Dict[str, str] = {
"hugging-face": "huggingface",
"huggingface-hub": "huggingface",
+ # novita
+ "novita-ai": "novita",
+ "novitaai": "novita",
+
# xiaomi
"mimo": "xiaomi",
"xiaomi-mimo": "xiaomi",
diff --git a/plugins/model-providers/novita/__init__.py b/plugins/model-providers/novita/__init__.py
new file mode 100644
index 00000000000..c39087e52d9
--- /dev/null
+++ b/plugins/model-providers/novita/__init__.py
@@ -0,0 +1,27 @@
+"""NovitaAI provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+
+novita = ProviderProfile(
+ name="novita",
+ aliases=("novita-ai", "novitaai"),
+ display_name="NovitaAI",
+ description="NovitaAI — multi-model aggregator with pay-per-use pricing",
+ signup_url="https://novita.ai/settings/key-management",
+ env_vars=("NOVITA_API_KEY", "NOVITA_BASE_URL"),
+ base_url="https://api.novita.ai/openai/v1",
+ auth_type="api_key",
+ default_aux_model="deepseek/deepseek-v3-0324",
+ fallback_models=(
+ "moonshotai/kimi-k2.5",
+ "minimax/minimax-m2.7",
+ "zai-org/glm-5",
+ "deepseek/deepseek-v3-0324",
+ "deepseek/deepseek-r1-0528",
+ "qwen/qwen3-235b-a22b-fp8",
+ ),
+)
+
+register_provider(novita)
diff --git a/plugins/model-providers/novita/plugin.yaml b/plugins/model-providers/novita/plugin.yaml
new file mode 100644
index 00000000000..681db199433
--- /dev/null
+++ b/plugins/model-providers/novita/plugin.yaml
@@ -0,0 +1,5 @@
+name: novita-provider
+kind: model-provider
+version: 1.0.0
+description: NovitaAI multi-model aggregator
+author: Nous Research
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index 93e4ba630d3..28ba035452d 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -20,6 +20,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
| **GitHub Copilot ACP** | `hermes model` (spawns local `copilot --acp --stdio`) |
| **Anthropic** | `hermes model` (Claude Max + extra usage credits via OAuth; also supports Anthropic API key or manual setup-token — see note below) |
| **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
+| **NovitaAI** | `NOVITA_API_KEY` in `~/.hermes/.env` (provider: `novita`, multi-model aggregator) |
| **AI Gateway** | `AI_GATEWAY_API_KEY` in `~/.hermes/.env` (provider: `ai-gateway`) |
| **z.ai / GLM** | `GLM_API_KEY` in `~/.hermes/.env` (provider: `zai`) |
| **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) |
@@ -546,6 +547,29 @@ You can append routing suffixes to model names: `:fastest` (default), `:cheapest
The base URL can be overridden with `HF_BASE_URL`.
+### NovitaAI
+
+[NovitaAI](https://novita.ai) is a multi-model aggregator with pay-per-use pricing. Access models from DeepSeek, Kimi, MiniMax, GLM, Qwen, and more through a unified OpenAI-compatible API.
+
+```bash
+# Use any available model
+hermes chat --provider novita --model moonshotai/kimi-k2.5
+# Requires: NOVITA_API_KEY in ~/.hermes/.env
+
+# Short alias
+hermes chat --provider novita-ai --model deepseek/deepseek-v3-0324
+```
+
+Or set it permanently in `config.yaml`:
+```yaml
+model:
+ provider: "novita"
+ default: "moonshotai/kimi-k2.5"
+ base_url: "https://api.novita.ai/openai/v1"
+```
+
+Get your API key at [novita.ai/settings/key-management](https://novita.ai/settings/key-management). The base URL can be overridden with `NOVITA_BASE_URL`.
+
## Custom & Self-Hosted LLM Providers
Hermes Agent works with **any OpenAI-compatible API endpoint**. If a server implements `/v1/chat/completions`, you can point Hermes at it. This means you can use local models, GPU inference servers, multi-provider routers, or any third-party API.
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 4ce8a331a94..4bb361a987e 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -91,7 +91,7 @@ Common options:
| `-q`, `--query "..."` | One-shot, non-interactive prompt. |
| `-m`, `--model ` | Override the model for this run. |
| `-t`, `--toolsets ` | Enable a comma-separated set of toolsets. |
-| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`, `lmstudio`, `stepfun`, `tencent-tokenhub` (alias `tencent`, `tokenhub`). |
+| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `novita`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`, `lmstudio`, `stepfun`, `tencent-tokenhub` (alias `tencent`, `tokenhub`). |
| `-s`, `--skills ` | Preload one or more skills for the session (can be repeated or comma-separated). |
| `-v`, `--verbose` | Verbose output. |
| `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 409ddf8fe35..a427c901ce1 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -67,6 +67,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
| `DASHSCOPE_BASE_URL` | Custom DashScope base URL (default: `https://dashscope-intl.aliyuncs.com/compatible-mode/v1`; use `https://dashscope.aliyuncs.com/compatible-mode/v1` for mainland-China region) |
| `DEEPSEEK_API_KEY` | DeepSeek API key for direct DeepSeek access ([platform.deepseek.com](https://platform.deepseek.com/api_keys)) |
| `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL |
+| `NOVITA_API_KEY` | NovitaAI API key — multi-model aggregator ([novita.ai/settings/key-management](https://novita.ai/settings/key-management)) |
+| `NOVITA_BASE_URL` | Override NovitaAI base URL (default: `https://api.novita.ai/openai/v1`) |
| `NVIDIA_API_KEY` | NVIDIA NIM API key — Nemotron and open models ([build.nvidia.com](https://build.nvidia.com)) |
| `NVIDIA_BASE_URL` | Override NVIDIA base URL (default: `https://integrate.api.nvidia.com/v1`; set to `http://localhost:8000/v1` for a local NIM endpoint) |
| `STEPFUN_API_KEY` | StepFun API key — Step-series models ([platform.stepfun.com](https://platform.stepfun.com)) |
@@ -103,7 +105,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
| Variable | Description |
|----------|-------------|
-| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth` (browser OAuth login — no API key required; see [MiniMax OAuth guide](../guides/minimax-oauth.md)), `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `tencent-tokenhub` (default: `auto`) |
+| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `novita`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth` (browser OAuth login — no API key required; see [MiniMax OAuth guide](../guides/minimax-oauth.md)), `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `tencent-tokenhub` (default: `auto`) |
| `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
| `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
| `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |