Merge remote-tracking branch 'origin/main' into feat/dashboard-chat

This commit is contained in:
emozilla 2026-04-22 21:42:14 -04:00
commit 1cd2b280fd
373 changed files with 35795 additions and 7622 deletions

View file

@ -72,6 +72,8 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1"
STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@ -168,8 +170,11 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
id="kimi-coding",
name="Kimi / Moonshot",
auth_type="api_key",
# Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
# sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
# by _resolve_kimi_base_url() below.
inference_base_url="https://api.moonshot.ai/v1",
api_key_env_vars=("KIMI_API_KEY",),
api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
base_url_env_var="KIMI_BASE_URL",
),
"kimi-coding-cn": ProviderConfig(
@ -179,6 +184,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
inference_base_url="https://api.moonshot.cn/v1",
api_key_env_vars=("KIMI_CN_API_KEY",),
),
"stepfun": ProviderConfig(
id="stepfun",
name="StepFun Step Plan",
auth_type="api_key",
inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL,
api_key_env_vars=("STEPFUN_API_KEY",),
base_url_env_var="STEPFUN_BASE_URL",
),
"arcee": ProviderConfig(
id="arcee",
name="Arcee AI",
@ -201,6 +214,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
auth_type="api_key",
inference_base_url="https://api.anthropic.com",
api_key_env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
base_url_env_var="ANTHROPIC_BASE_URL",
),
"alibaba": ProviderConfig(
id="alibaba",
@ -340,10 +354,16 @@ def get_anthropic_key() -> str:
# =============================================================================
# Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
# on api.kimi.com/coding/v1. Legacy keys from platform.moonshot.ai work on
# api.moonshot.ai/v1 (the default). Auto-detect when user hasn't set
# on api.kimi.com/coding. Legacy keys from platform.moonshot.ai work on
# api.moonshot.ai/v1 (the old default). Auto-detect when user hasn't set
# KIMI_BASE_URL explicitly.
KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
#
# Note: the base URL intentionally has NO /v1 suffix. The /coding endpoint
# speaks the Anthropic Messages protocol, and the anthropic SDK appends
# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
# (the correct target). Using "/coding/v1" here would produce
# "/coding/v1/v1/messages" (a 404).
KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"
def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
@ -983,6 +1003,7 @@ def resolve_provider(
"x-ai": "xai", "x.ai": "xai", "grok": "xai",
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
"step": "stepfun", "stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee", "arceeai": "arcee",
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
"claude": "anthropic", "claude-code": "anthropic",
@ -3375,7 +3396,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
)
from hermes_cli.models import (
_PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
_PROVIDER_MODELS, get_pricing_for_provider,
check_nous_free_tier, partition_nous_models_by_tier,
)
model_ids = _PROVIDER_MODELS.get("nous", [])
@ -3384,7 +3405,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
unavailable_models: list = []
if model_ids:
pricing = get_pricing_for_provider("nous")
model_ids = filter_nous_free_models(model_ids, pricing)
free_tier = check_nous_free_tier()
if free_tier:
model_ids, unavailable_models = partition_nous_models_by_tier(

View file

@ -152,6 +152,23 @@ def auth_add_command(args) -> None:
pool = load_pool(provider)
# Clear ALL suppressions for this provider — re-adding a credential is
# a strong signal the user wants auth re-enabled. This covers env:*
# (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
# device_code (codex), etc. One consistent re-engagement pattern.
# Matches the Codex device_code re-link pattern that predates this.
if not provider.startswith(CUSTOM_POOL_PREFIX):
try:
from hermes_cli.auth import (
_load_auth_store,
unsuppress_credential_source,
)
suppressed = _load_auth_store().get("suppressed_sources", {})
for src in list(suppressed.get(provider, []) or []):
unsuppress_credential_source(provider, src)
except Exception:
pass
if requested_type == AUTH_TYPE_API_KEY:
token = (getattr(args, "api_key", None) or "").strip()
if not token:
@ -338,71 +355,28 @@ def auth_remove_command(args) -> None:
raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
print(f"Removed {provider} credential #{index} ({removed.label})")
# If this was an env-seeded credential, also clear the env var from .env
# so it doesn't get re-seeded on the next load_pool() call.
if removed.source.startswith("env:"):
env_var = removed.source[len("env:"):]
if env_var:
from hermes_cli.config import remove_env_value
cleared = remove_env_value(env_var)
if cleared:
print(f"Cleared {env_var} from .env")
# Unified removal dispatch. Every credential source Hermes reads from
# (env vars, external OAuth files, auth.json blocks, custom config)
# has a RemovalStep registered in agent.credential_sources. The step
# handles its source-specific cleanup and we centralise suppression +
# user-facing output here so every source behaves identically from
# the user's perspective.
from agent.credential_sources import find_removal_step
from hermes_cli.auth import suppress_credential_source
# If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
# clear the underlying auth store / credential file so it doesn't get
# re-seeded on the next load_pool() call.
elif provider == "openai-codex" and (
removed.source == "device_code" or removed.source.endswith(":device_code")
):
# Codex tokens live in TWO places: the Hermes auth store and
# ~/.codex/auth.json (the Codex CLI shared file). On every refresh,
# refresh_codex_oauth_pure() writes to both. So clearing only the
# Hermes auth store is not enough — _seed_from_singletons() will
# auto-import from ~/.codex/auth.json on the next load_pool() and
# the removal is instantly undone. Mark the source as suppressed
# so auto-import is skipped; leave ~/.codex/auth.json untouched so
# the Codex CLI itself keeps working.
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
suppress_credential_source,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
print(f"Cleared {provider} OAuth tokens from auth store")
suppress_credential_source(provider, "device_code")
print("Suppressed openai-codex device_code source — it will not be re-seeded.")
print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
print("Run `hermes auth add openai-codex` to re-enable if needed.")
step = find_removal_step(provider, removed.source)
if step is None:
# Unregistered source — e.g. "manual", which has nothing external
# to clean up. The pool entry is already gone; we're done.
return
elif removed.source == "device_code" and provider == "nous":
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
print(f"Cleared {provider} OAuth tokens from auth store")
elif removed.source == "hermes_pkce" and provider == "anthropic":
from hermes_constants import get_hermes_home
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
if oauth_file.exists():
oauth_file.unlink()
print("Cleared Hermes Anthropic OAuth credentials")
elif removed.source == "claude_code" and provider == "anthropic":
from hermes_cli.auth import suppress_credential_source
suppress_credential_source(provider, "claude_code")
print("Suppressed claude_code credential — it will not be re-seeded.")
print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
print("Run `hermes auth add anthropic` to re-enable if needed.")
result = step.remove_fn(provider, removed)
for line in result.cleaned:
print(line)
if result.suppress:
suppress_credential_source(provider, removed.source)
for line in result.hints:
print(line)
def auth_reset_command(args) -> None:

View file

@ -249,7 +249,7 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
state_path = child / state_name
if state_path.exists():
kind = "directory" if state_path.is_dir() else "file"
rel = state_path.relative_to(source_dir)
rel = state_path.relative_to(source_dir).as_posix()
findings.append((state_path, f"Workspace {kind}: {rel}"))
return findings

View file

@ -260,6 +260,26 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
)
def is_gateway_known_command(name: str | None) -> bool:
"""Return True if ``name`` resolves to a gateway-dispatchable slash command.
This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
looked up lazily so importing this module never forces plugin
discovery. Gateway code uses this to decide whether to emit
``command:<name>`` hooks plugin commands get the same lifecycle
events as built-ins.
"""
if not name:
return False
if name in GATEWAY_KNOWN_COMMANDS:
return True
for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
if plugin_name == name:
return True
return False
# Commands with explicit Level-2 running-agent handlers in gateway/run.py.
# Listed here for introspection / tests; semantically a subset of
# "all resolvable commands" — which is the real bypass set (see
@ -371,12 +391,47 @@ def gateway_help_lines() -> list[str]:
return lines
def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
"""Yield (name, description, args_hint) tuples for all plugin slash commands.
Plugin commands are registered via
:func:`hermes_cli.plugins.PluginContext.register_command`. They behave
like ``CommandDef`` entries for gateway surfacing: they appear in the
Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
(via :func:`gateway.platforms.discord._register_slash_commands`) in
Discord's native slash command picker.
Lookup is lazy so importing this module never forces plugin discovery
(which can trigger filesystem scans and environment-dependent
behavior).
"""
try:
from hermes_cli.plugins import get_plugin_commands
except Exception:
return []
try:
commands = get_plugin_commands() or {}
except Exception:
return []
entries: list[tuple[str, str, str]] = []
for name, meta in commands.items():
if not isinstance(name, str) or not isinstance(meta, dict):
continue
description = str(meta.get("description") or f"Run /{name}")
args_hint = str(meta.get("args_hint") or "").strip()
entries.append((name, description, args_hint))
return entries
def telegram_bot_commands() -> list[tuple[str, str]]:
"""Return (command_name, description) pairs for Telegram setMyCommands.
Telegram command names cannot contain hyphens, so they are replaced with
underscores. Aliases are skipped -- Telegram shows one menu entry per
canonical command.
Plugin-registered slash commands are included so plugins get native
autocomplete in Telegram without touching core code.
"""
overrides = _resolve_config_gates()
result: list[tuple[str, str]] = []
@ -386,6 +441,10 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
tg_name = _sanitize_telegram_name(cmd.name)
if tg_name:
result.append((tg_name, cmd.description))
for name, description, _args_hint in _iter_plugin_command_entries():
tg_name = _sanitize_telegram_name(name)
if tg_name:
result.append((tg_name, description))
return result
@ -750,6 +809,9 @@ def slack_subcommand_map() -> dict[str, str]:
Maps both canonical names and aliases so /hermes bg do stuff works
the same as /hermes background do stuff.
Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
routes through the plugin handler.
"""
overrides = _resolve_config_gates()
mapping: dict[str, str] = {}
@ -759,6 +821,9 @@ def slack_subcommand_map() -> dict[str, str]:
mapping[cmd.name] = f"/{cmd.name}"
for alias in cmd.aliases:
mapping[alias] = f"/{alias}"
for name, _description, _args_hint in _iter_plugin_command_entries():
if name not in mapping:
mapping[name] = f"/{name}"
return mapping
@ -924,12 +989,22 @@ class SlashCommandCompleter(Completer):
display_meta=meta,
)
# If the user typed @file: or @folder:, delegate to path completions
# If the user typed @file: / @folder: (or just @file / @folder with
# no colon yet), delegate to path completions. Accepting the bare
# form lets the picker surface directories as soon as the user has
# typed `@folder`, without requiring them to first accept the static
# `@folder:` hint and re-trigger completion.
for prefix in ("@file:", "@folder:"):
if word.startswith(prefix):
path_part = word[len(prefix):] or "."
bare = prefix[:-1]
if word == bare or word.startswith(prefix):
want_dir = prefix == "@folder:"
path_part = '' if word == bare else word[len(prefix):]
expanded = os.path.expanduser(path_part)
if expanded.endswith("/"):
if not expanded or expanded == ".":
search_dir, match_prefix = ".", ""
elif expanded.endswith("/"):
search_dir, match_prefix = expanded, ""
else:
search_dir = os.path.dirname(expanded) or "."
@ -945,15 +1020,21 @@ class SlashCommandCompleter(Completer):
for entry in sorted(entries):
if match_prefix and not entry.lower().startswith(prefix_lower):
continue
if count >= limit:
break
full_path = os.path.join(search_dir, entry)
is_dir = os.path.isdir(full_path)
# `@folder:` must only surface directories; `@file:` only
# regular files. Without this filter `@folder:` listed
# every .env / .gitignore in the cwd, defeating the
# explicit prefix and confusing users expecting a
# directory picker.
if want_dir != is_dir:
continue
if count >= limit:
break
display_path = os.path.relpath(full_path)
suffix = "/" if is_dir else ""
kind = "folder" if is_dir else "file"
meta = "dir" if is_dir else _file_size_label(full_path)
completion = f"@{kind}:{display_path}{suffix}"
completion = f"{prefix}{display_path}{suffix}"
yield Completion(
completion,
start_position=-len(word),

View file

@ -387,6 +387,26 @@ DEFAULT_CONFIG = {
# (terminal and execute_code). Skill-declared required_environment_variables
# are passed through automatically; this list is for non-skill use cases.
"env_passthrough": [],
# Extra files to source in the login shell when building the
# per-session environment snapshot. Use this when tools like nvm,
# pyenv, asdf, or custom PATH entries are registered by files that
# a bash login shell would skip — most commonly ``~/.bashrc``
# (bash doesn't source bashrc in non-interactive login mode) or
# zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
# Paths support ``~`` / ``${VAR}``. Missing files are silently
# skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
# snapshot shell is bash (this is the ``auto_source_bashrc``
# behaviour — disable with that key if you want strict login-only
# semantics).
"shell_init_files": [],
# When true (default), Hermes sources ``~/.bashrc`` in the login
# shell used to build the environment snapshot. This captures
# PATH additions, shell functions, and aliases defined in the
# user's bashrc — which a plain ``bash -l -c`` would otherwise
# miss because bash skips bashrc in non-interactive login mode.
# Turn this off if you have a bashrc that misbehaves when sourced
# non-interactively (e.g. one that hard-exits on TTY checks).
"auto_source_bashrc": True,
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
"docker_forward_env": [],
# Explicit environment variables to set inside Docker containers.
@ -593,6 +613,10 @@ DEFAULT_CONFIG = {
},
# Text-to-speech configuration
# Each provider supports an optional `max_text_length:` override for the
# per-request input-character cap. Omit it to use the provider's documented
# limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
# Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
"tts": {
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
"edge": {
@ -645,6 +669,7 @@ DEFAULT_CONFIG = {
"record_key": "ctrl+b",
"max_recording_seconds": 120,
"auto_tts": False,
"beep_enabled": True, # Play record start/stop beeps in CLI voice mode
"silence_threshold": 200, # RMS below this = silence (0-32767)
"silence_duration": 3.0, # Seconds of silence before auto-stop
},
@ -687,10 +712,22 @@ DEFAULT_CONFIG = {
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
"base_url": "", # direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
# When delegate_task narrows child toolsets explicitly, preserve any
# MCP toolsets the parent already has enabled. On by default so
# narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
# extras" without silently stripping MCP tools the parent already has.
# Set to false for strict intersection.
"inherit_mcp_toolsets": True,
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
# independent of the parent's max_iterations)
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
# "low", "minimal", "none" (empty = inherit parent's level)
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
# Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
# and _get_orchestrator_enabled). Values are clamped to [1, 3] with a
# warning log if out of range.
"max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
"orchestrator_enabled": True, # kill switch for role="orchestrator"
},
# Ephemeral prefill messages file — JSON list of {role, content} dicts
@ -703,6 +740,20 @@ DEFAULT_CONFIG = {
# always goes to ~/.hermes/skills/.
"skills": {
"external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"]
# Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
# content with the absolute skill directory and the active session id
# before the agent sees it. Lets skill authors reference bundled
# scripts without the agent having to join paths.
"template_vars": True,
# Pre-execute inline shell snippets written as !`cmd` in SKILL.md
# body. Their stdout is inlined into the skill message before the
# agent reads it, so skills can inject dynamic context (dates, git
# state, detected tool versions, …). Off by default because any
# content from the skill author runs on the host without approval;
# only enable for skill sources you trust.
"inline_shell": False,
# Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
"inline_shell_timeout": 10,
},
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@ -795,6 +846,7 @@ DEFAULT_CONFIG = {
# Pre-exec security scanning via tirith
"security": {
"allow_private_urls": False, # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
"redact_secrets": True,
"tirith_enabled": True,
"tirith_path": "tirith",
@ -848,8 +900,36 @@ DEFAULT_CONFIG = {
"force_ipv4": False,
},
# Session storage — controls automatic cleanup of ~/.hermes/state.db.
# state.db accumulates every session, message, tool call, and FTS5 index
# entry forever. Without auto-pruning, a heavy user (gateway + cron)
# reports 384MB+ databases with 68K+ messages, which slows down FTS5
# inserts, /resume listing, and insights queries.
"sessions": {
# When true, prune ended sessions older than retention_days once
# per (roughly) min_interval_hours at CLI/gateway/cron startup.
# Only touches ended sessions — active sessions are always preserved.
# Default false: session history is valuable for search recall, and
# silently deleting it could surprise users. Opt in explicitly.
"auto_prune": False,
# How many days of ended-session history to keep. Matches the
# default of ``hermes sessions prune``.
"retention_days": 90,
# VACUUM after a prune that actually deleted rows. SQLite does not
# reclaim disk space on DELETE — freed pages are just reused on
# subsequent INSERTs — so without VACUUM the file stays bloated
# even after pruning. VACUUM blocks writes for a few seconds per
# 100MB, so it only runs at startup, and only when prune deleted
# ≥1 session.
"vacuum_after_prune": True,
# Minimum hours between auto-maintenance runs (avoids repeating
# the sweep on every CLI invocation). Tracked via state_meta in
# state.db itself, so it's shared across all processes.
"min_interval_hours": 24,
},
# Config schema version - bump this when adding new required fields
"_config_version": 21,
"_config_version": 22,
}
# =============================================================================
@ -1005,6 +1085,22 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"STEPFUN_API_KEY": {
"description": "StepFun Step Plan API key",
"prompt": "StepFun Step Plan API key",
"url": "https://platform.stepfun.com/",
"password": True,
"category": "provider",
"advanced": True,
},
"STEPFUN_BASE_URL": {
"description": "StepFun Step Plan base URL override",
"prompt": "StepFun Step Plan base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"ARCEEAI_API_KEY": {
"description": "Arcee AI API key",
"prompt": "Arcee AI API key",
@ -2057,6 +2153,7 @@ _KNOWN_ROOT_KEYS = {
"fallback_providers", "credential_pool_strategies", "toolsets",
"agent", "terminal", "display", "compression", "delegation",
"auxiliary", "custom_providers", "context", "memory", "gateway",
"sessions",
}
# Valid fields inside a custom_providers list entry
@ -2214,7 +2311,6 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
if not issues:
return
import sys
lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
for ci in issues:
marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
@ -2229,7 +2325,6 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
These env vars are deprecated the canonical setting is terminal.cwd
in config.yaml. Prints a migration hint to stderr.
"""
import os, sys
messaging_cwd = os.environ.get("MESSAGING_CWD")
terminal_cwd_env = os.environ.get("TERMINAL_CWD")
@ -2572,8 +2667,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
# Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins.
grandfathered: List[str] = []
try:
from hermes_constants import get_hermes_home as _ghome
user_plugins_dir = _ghome() / "plugins"
user_plugins_dir = get_hermes_home() / "plugins"
if user_plugins_dir.is_dir():
for child in sorted(user_plugins_dir.iterdir()):
if not child.is_dir():
@ -3075,7 +3169,7 @@ def save_config(config: Dict[str, Any]):
if not sec or sec.get("redact_secrets") is None:
parts.append(_SECURITY_COMMENT)
fb = normalized.get("fallback_model", {})
if not fb or not (fb.get("provider") and fb.get("model")):
if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
parts.append(_FALLBACK_COMMENT)
atomic_yaml_write(
@ -3238,7 +3332,6 @@ def _check_non_ascii_credential(key: str, value: str) -> str:
bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})")
sanitized = value.encode("ascii", errors="ignore").decode("ascii")
import sys
print(
f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n"
f" This usually happens when copy-pasting from a PDF, rich-text editor,\n"

View file

@ -13,6 +13,7 @@ import time
import urllib.error
import urllib.parse
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
@ -147,6 +148,14 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
return (deleted, len(remaining))
def _best_effort_sweep_expired_pastes() -> None:
"""Attempt pending-paste cleanup without letting /debug fail offline."""
try:
_sweep_expired_pastes()
except Exception:
pass
# ---------------------------------------------------------------------------
# Privacy / delete helpers
# ---------------------------------------------------------------------------
@ -314,72 +323,128 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
# Log file reading
# ---------------------------------------------------------------------------
def _resolve_log_path(log_name: str) -> Optional[Path]:
"""Find the log file for *log_name*, falling back to the .1 rotation.
Returns the path if found, or None.
"""
@dataclass
class LogSnapshot:
"""Single-read snapshot of a log file used by debug-share."""
path: Optional[Path]
tail_text: str
full_text: Optional[str]
def _primary_log_path(log_name: str) -> Optional[Path]:
"""Where *log_name* would live if present. Doesn't check existence."""
from hermes_cli.logs import LOG_FILES
filename = LOG_FILES.get(log_name)
if not filename:
return (get_hermes_home() / "logs" / filename) if filename else None
def _resolve_log_path(log_name: str) -> Optional[Path]:
"""Find the log file for *log_name*, falling back to the .1 rotation.
Returns the first non-empty candidate (primary, then .1), or None.
Callers distinguish 'empty primary' from 'truly missing' via
:func:`_primary_log_path`.
"""
primary = _primary_log_path(log_name)
if primary is None:
return None
log_dir = get_hermes_home() / "logs"
primary = log_dir / filename
if primary.exists() and primary.stat().st_size > 0:
return primary
# Fall back to the most recent rotated file (.1).
rotated = log_dir / f"{filename}.1"
rotated = primary.parent / f"{primary.name}.1"
if rotated.exists() and rotated.stat().st_size > 0:
return rotated
return None
def _read_log_tail(log_name: str, num_lines: int) -> str:
"""Read the last *num_lines* from a log file, or return a placeholder."""
from hermes_cli.logs import _read_last_n_lines
def _capture_log_snapshot(
log_name: str,
*,
tail_lines: int,
max_bytes: int = _MAX_LOG_BYTES,
) -> LogSnapshot:
"""Capture a log once and derive summary/full-log views from it.
log_path = _resolve_log_path(log_name)
if log_path is None:
return "(file not found)"
try:
lines = _read_last_n_lines(log_path, num_lines)
return "".join(lines).rstrip("\n")
except Exception as exc:
return f"(error reading: {exc})"
def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
"""Read a log file for standalone upload.
Returns the file content (last *max_bytes* if truncated), or None if the
file doesn't exist or is empty.
The report tail and standalone log upload must come from the same file
snapshot. Otherwise a rotation/truncate between reads can make the report
look newer than the uploaded ``agent.log`` paste.
"""
log_path = _resolve_log_path(log_name)
if log_path is None:
return None
primary = _primary_log_path(log_name)
tail = "(file empty)" if primary and primary.exists() else "(file not found)"
return LogSnapshot(path=None, tail_text=tail, full_text=None)
try:
size = log_path.stat().st_size
if size == 0:
return None
# race: file was truncated between _resolve_log_path and stat
return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)
if size <= max_bytes:
return log_path.read_text(encoding="utf-8", errors="replace")
# File is larger than max_bytes — read the tail.
with open(log_path, "rb") as f:
f.seek(size - max_bytes)
# Skip partial line at the seek point.
f.readline()
content = f.read().decode("utf-8", errors="replace")
return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
except Exception:
return None
if size <= max_bytes:
raw = f.read()
truncated = False
else:
# Read from the end until we have enough bytes for the
# standalone upload and enough newline context to render the
# summary tail from the same snapshot.
chunk_size = 8192
pos = size
chunks: list[bytes] = []
total = 0
newline_count = 0
while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
read_size = min(chunk_size, pos)
pos -= read_size
f.seek(pos)
chunk = f.read(read_size)
chunks.insert(0, chunk)
total += len(chunk)
newline_count += chunk.count(b"\n")
chunk_size = min(chunk_size * 2, 65536)
raw = b"".join(chunks)
truncated = pos > 0
full_raw = raw
if truncated and len(full_raw) > max_bytes:
cut = len(full_raw) - max_bytes
# Check whether the cut lands exactly on a line boundary. If the
# byte just before the cut position is a newline the first retained
# byte starts a complete line and we should keep it. Only drop a
# partial first line when we're genuinely mid-line.
on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
full_raw = full_raw[cut:]
if not on_boundary and b"\n" in full_raw:
full_raw = full_raw.split(b"\n", 1)[1]
all_text = raw.decode("utf-8", errors="replace")
tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
full_text = full_raw.decode("utf-8", errors="replace")
if truncated:
full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
except Exception as exc:
return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
"""Capture all logs used by debug-share exactly once."""
errors_lines = min(log_lines, 100)
return {
"agent": _capture_log_snapshot("agent", tail_lines=log_lines),
"errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
"gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
}
# ---------------------------------------------------------------------------
@ -405,7 +470,12 @@ def _capture_dump() -> str:
return capture.getvalue()
def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
def collect_debug_report(
*,
log_lines: int = 200,
dump_text: str = "",
log_snapshots: Optional[dict[str, LogSnapshot]] = None,
) -> str:
"""Build the summary debug report: system dump + log tails.
Parameters
@ -424,19 +494,22 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
dump_text = _capture_dump()
buf.write(dump_text)
if log_snapshots is None:
log_snapshots = _capture_default_log_snapshots(log_lines)
# ── Recent log tails (summary only) ──────────────────────────────────
buf.write("\n\n")
buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
buf.write(_read_log_tail("agent", log_lines))
buf.write(log_snapshots["agent"].tail_text)
buf.write("\n\n")
errors_lines = min(log_lines, 100)
buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
buf.write(_read_log_tail("errors", errors_lines))
buf.write(log_snapshots["errors"].tail_text)
buf.write("\n\n")
buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
buf.write(_read_log_tail("gateway", errors_lines))
buf.write(log_snapshots["gateway"].tail_text)
buf.write("\n")
return buf.getvalue()
@ -448,6 +521,8 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
def run_debug_share(args):
"""Collect debug report + full logs, upload each, print URLs."""
_best_effort_sweep_expired_pastes()
log_lines = getattr(args, "lines", 200)
expiry = getattr(args, "expire", 7)
local_only = getattr(args, "local", False)
@ -459,10 +534,15 @@ def run_debug_share(args):
# Capture dump once — prepended to every paste for context.
dump_text = _capture_dump()
log_snapshots = _capture_default_log_snapshots(log_lines)
report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
agent_log = _read_full_log("agent")
gateway_log = _read_full_log("gateway")
report = collect_debug_report(
log_lines=log_lines,
dump_text=dump_text,
log_snapshots=log_snapshots,
)
agent_log = log_snapshots["agent"].full_text
gateway_log = log_snapshots["gateway"].full_text
# Prepend dump header to each full log so every paste is self-contained.
if agent_log:

View file

@ -912,6 +912,7 @@ def run_doctor(args):
_apikey_providers = [
("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
@ -943,18 +944,22 @@ def run_doctor(args):
try:
import httpx
_base = os.getenv(_base_env, "") if _base_env else ""
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
# (OpenAI-compat surface, which exposes /models for health check).
if not _base and _key.startswith("sk-kimi-"):
_base = "https://api.kimi.com/coding/v1"
# Anthropic-compat endpoints (/anthropic) don't support /models.
# Rewrite to the OpenAI-compat /v1 surface for health checks.
# Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
# with no /v1) don't support /models. Rewrite to the OpenAI-compat
# /v1 surface for health checks.
if _base and _base.rstrip("/").endswith("/anthropic"):
from agent.auxiliary_client import _to_openai_base_url
_base = _to_openai_base_url(_base)
if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
_base = _base.rstrip("/") + "/v1"
_url = (_base.rstrip("/") + "/models") if _base else _default_url
_headers = {"Authorization": f"Bearer {_key}"}
if base_url_host_matches(_base, "api.kimi.com"):
_headers["User-Agent"] = "KimiCLI/1.30.0"
_headers["User-Agent"] = "claude-code/0.1.0"
_resp = httpx.get(
_url,
headers=_headers,

View file

@ -160,6 +160,8 @@ def load_hermes_dotenv(
# Fix corrupted .env files before python-dotenv parses them (#8908).
if user_env.exists():
_sanitize_env_file_if_needed(user_env)
if project_env_path and project_env_path.exists():
_sanitize_env_file_if_needed(project_env_path)
if user_env.exists():
_load_dotenv_with_fallback(user_env, override=True)

View file

@ -333,6 +333,147 @@ def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
return selected_system, result.stdout.strip() == "active"
def _read_systemd_unit_properties(
system: bool = False,
properties: tuple[str, ...] = (
"ActiveState",
"SubState",
"Result",
"ExecMainStatus",
),
) -> dict[str, str]:
"""Return selected ``systemctl show`` properties for the gateway unit."""
selected_system = _select_systemd_scope(system)
try:
result = _run_systemctl(
[
"show",
get_service_name(),
"--no-pager",
"--property",
",".join(properties),
],
system=selected_system,
capture_output=True,
text=True,
timeout=10,
)
except (RuntimeError, subprocess.TimeoutExpired, OSError):
return {}
if result.returncode != 0:
return {}
parsed: dict[str, str] = {}
for line in result.stdout.splitlines():
if "=" not in line:
continue
key, value = line.split("=", 1)
parsed[key] = value.strip()
return parsed
def _wait_for_systemd_service_restart(
*,
system: bool = False,
previous_pid: int | None = None,
timeout: float = 60.0,
) -> bool:
"""Wait for the gateway service to become active after a restart handoff."""
import time
svc = get_service_name()
scope_label = _service_scope_label(system).capitalize()
deadline = time.time() + timeout
while time.time() < deadline:
props = _read_systemd_unit_properties(system=system)
active_state = props.get("ActiveState", "")
sub_state = props.get("SubState", "")
new_pid = None
try:
from gateway.status import get_running_pid
new_pid = get_running_pid()
except Exception:
new_pid = None
if active_state == "active":
if new_pid and (previous_pid is None or new_pid != previous_pid):
print(f"{scope_label} service restarted (PID {new_pid})")
return True
if previous_pid is None:
print(f"{scope_label} service restarted")
return True
if active_state == "activating" and sub_state == "auto-restart":
time.sleep(1)
continue
time.sleep(2)
print(
f"{scope_label} service did not become active within {int(timeout)}s.\n"
f" Check status: {'sudo ' if system else ''}hermes gateway status\n"
f" Check logs: journalctl {'--user ' if not system else ''}-u {svc} -l --since '2 min ago'"
)
return False
def _recover_pending_systemd_restart(system: bool = False, previous_pid: int | None = None) -> bool:
"""Recover a planned service restart that is stuck in systemd state."""
props = _read_systemd_unit_properties(system=system)
if not props:
return False
try:
from gateway.status import read_runtime_status
except Exception:
return False
runtime_state = read_runtime_status() or {}
if not runtime_state.get("restart_requested"):
return False
active_state = props.get("ActiveState", "")
sub_state = props.get("SubState", "")
exec_main_status = props.get("ExecMainStatus", "")
result = props.get("Result", "")
if active_state == "activating" and sub_state == "auto-restart":
print("⏳ Service restart already pending — waiting for systemd relaunch...")
return _wait_for_systemd_service_restart(
system=system,
previous_pid=previous_pid,
)
if active_state == "failed" and (
exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE)
or result == "exit-code"
):
svc = get_service_name()
scope_label = _service_scope_label(system).capitalize()
print(f"↻ Clearing failed state for pending {scope_label.lower()} service restart...")
_run_systemctl(
["reset-failed", svc],
system=system,
check=False,
timeout=30,
)
_run_systemctl(
["start", svc],
system=system,
check=False,
timeout=90,
)
return _wait_for_systemd_service_restart(
system=system,
previous_pid=previous_pid,
)
return False
def _probe_launchd_service_running() -> bool:
if not get_launchd_plist_path().exists():
return False
@ -470,7 +611,8 @@ def stop_profile_gateway() -> bool:
except (ProcessLookupError, PermissionError):
break
remove_pid_file()
if get_running_pid() is None:
remove_pid_file()
return True
@ -994,8 +1136,6 @@ def get_systemd_linger_status() -> tuple[bool | None, str]:
if not is_linux():
return None, "not supported on this platform"
import shutil
if not shutil.which("loginctl"):
return None, "loginctl not found"
@ -1347,7 +1487,6 @@ def _ensure_linger_enabled() -> None:
return
import getpass
import shutil
username = getpass.getuser()
linger_file = Path(f"/var/lib/systemd/linger/{username}")
@ -1508,14 +1647,9 @@ def systemd_restart(system: bool = False):
pid = get_running_pid()
if pid is not None and _request_gateway_self_restart(pid):
# SIGUSR1 sent — the gateway will drain active agents, exit with
# code 75, and systemd will restart it after RestartSec (30s).
# Wait for the old process to die and the new one to become active
# so the CLI doesn't return while the service is still restarting.
import time
scope_label = _service_scope_label(system).capitalize()
svc = get_service_name()
scope_cmd = _systemctl_cmd(system)
# Phase 1: wait for old process to exit (drain + shutdown)
print(f"{scope_label} service draining active work...")
@ -1529,48 +1663,41 @@ def systemd_restart(system: bool = False):
else:
print(f"⚠ Old process (PID {pid}) still alive after 90s")
# Phase 2: wait for systemd to start the new process
print(f"⏳ Waiting for {svc} to restart...")
deadline = time.time() + 60
while time.time() < deadline:
try:
result = subprocess.run(
scope_cmd + ["is-active", svc],
capture_output=True, text=True, timeout=5,
)
if result.stdout.strip() == "active":
# Verify it's a NEW process, not the old one somehow
new_pid = get_running_pid()
if new_pid and new_pid != pid:
print(f"{scope_label} service restarted (PID {new_pid})")
return
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
time.sleep(2)
# Timed out — check final state
try:
result = subprocess.run(
scope_cmd + ["is-active", svc],
capture_output=True, text=True, timeout=5,
)
if result.stdout.strip() == "active":
print(f"{scope_label} service restarted")
return
except Exception:
pass
print(
f"{scope_label} service did not become active within 60s.\n"
f" Check status: {'sudo ' if system else ''}hermes gateway status\n"
f" Check logs: journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
# The gateway exits with code 75 for a planned service restart.
# systemd can sit in the RestartSec window or even wedge itself into a
# failed/rate-limited state if the operator asks for another restart in
# the middle of that handoff. Clear any stale failed state and kick the
# unit immediately so `hermes gateway restart` behaves idempotently.
_run_systemctl(
["reset-failed", svc],
system=system,
check=False,
timeout=30,
)
_run_systemctl(
["start", svc],
system=system,
check=False,
timeout=90,
)
_wait_for_systemd_service_restart(system=system, previous_pid=pid)
return
if _recover_pending_systemd_restart(system=system, previous_pid=pid):
return
_run_systemctl(
["reset-failed", get_service_name()],
system=system,
check=False,
timeout=30,
)
_run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
print(f"{_service_scope_label(system).capitalize()} service restarted")
def systemd_status(deep: bool = False, system: bool = False):
def systemd_status(deep: bool = False, system: bool = False, full: bool = False):
system = _select_systemd_scope(system)
unit_path = get_systemd_unit_path(system=system)
scope_flag = " --system" if system else ""
@ -1593,8 +1720,12 @@ def systemd_status(deep: bool = False, system: bool = False):
print(f" Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag} # auto-refreshes the unit")
print()
status_cmd = ["status", get_service_name(), "--no-pager"]
if full:
status_cmd.append("-l")
_run_systemctl(
["status", get_service_name(), "--no-pager"],
status_cmd,
system=system,
capture_output=False,
timeout=10,
@ -1627,6 +1758,19 @@ def systemd_status(deep: bool = False, system: bool = False):
for line in runtime_lines:
print(f" {line}")
unit_props = _read_systemd_unit_properties(system=system)
active_state = unit_props.get("ActiveState", "")
sub_state = unit_props.get("SubState", "")
exec_main_status = unit_props.get("ExecMainStatus", "")
result_code = unit_props.get("Result", "")
if active_state == "activating" and sub_state == "auto-restart":
print(" ⏳ Restart pending: systemd is waiting to relaunch the gateway")
elif active_state == "failed" and exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE):
print(" ⚠ Planned restart is stuck in systemd failed state (exit 75)")
print(f" Run: systemctl {'--user ' if not system else ''}reset-failed {get_service_name()} && {'sudo ' if system else ''}hermes gateway start{scope_flag}")
elif active_state == "failed" and result_code:
print(f" ⚠ Systemd unit result: {result_code}")
if system:
print("✓ System service starts at boot without requiring systemd linger")
elif deep:
@ -1642,7 +1786,10 @@ def systemd_status(deep: bool = False, system: bool = False):
if deep:
print()
print("Recent logs:")
subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"], timeout=10)
log_cmd = _journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]
if full:
log_cmd.append("-l")
subprocess.run(log_cmd, timeout=10)
# =============================================================================
@ -1656,7 +1803,6 @@ def get_launchd_label() -> str:
def _launchd_domain() -> str:
import os
return f"gui/{os.getuid()}"
@ -2643,9 +2789,120 @@ def _setup_dingtalk():
def _setup_wecom():
"""Configure WeCom (Enterprise WeChat) via the standard platform setup."""
wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
_setup_standard_platform(wecom_platform)
"""Interactive setup for WeCom — scan QR code or manual credential input."""
print()
print(color(" ─── 💬 WeCom (Enterprise WeChat) Setup ───", Colors.CYAN))
existing_bot_id = get_env_value("WECOM_BOT_ID")
existing_secret = get_env_value("WECOM_SECRET")
if existing_bot_id and existing_secret:
print()
print_success("WeCom is already configured.")
if not prompt_yes_no(" Reconfigure WeCom?", False):
return
# ── Choose setup method ──
print()
method_choices = [
"Scan QR code to obtain Bot ID and Secret automatically (recommended)",
"Enter existing Bot ID and Secret manually",
]
method_idx = prompt_choice(" How would you like to set up WeCom?", method_choices, 0)
bot_id = None
secret = None
if method_idx == 0:
# ── QR scan flow ──
try:
from gateway.platforms.wecom import qr_scan_for_bot_info
except Exception as exc:
print_error(f" WeCom QR scan import failed: {exc}")
qr_scan_for_bot_info = None
if qr_scan_for_bot_info is not None:
try:
credentials = qr_scan_for_bot_info()
except KeyboardInterrupt:
print()
print_warning(" WeCom setup cancelled.")
return
except Exception as exc:
print_warning(f" QR scan failed: {exc}")
credentials = None
if credentials:
bot_id = credentials.get("bot_id", "")
secret = credentials.get("secret", "")
print_success(" ✔ QR scan successful! Bot ID and Secret obtained.")
if not bot_id or not secret:
print_info(" QR scan did not complete. Continuing with manual input.")
bot_id = None
secret = None
# ── Manual credential input ──
if not bot_id or not secret:
print()
print_info(" 1. Go to WeCom Application → Workspace → Smart Robot -> Create smart robots")
print_info(" 2. Select API Mode")
print_info(" 3. Copy the Bot ID and Secret from the bot's credentials info")
print_info(" 4. The bot connects via WebSocket — no public endpoint needed")
print()
bot_id = prompt(" Bot ID", password=False)
if not bot_id:
print_warning(" Skipped — WeCom won't work without a Bot ID.")
return
secret = prompt(" Secret", password=True)
if not secret:
print_warning(" Skipped — WeCom won't work without a Secret.")
return
# ── Save core credentials ──
save_env_value("WECOM_BOT_ID", bot_id)
save_env_value("WECOM_SECRET", secret)
# ── Allowed users (deny-by-default security) ──
print()
print_info(" The gateway DENIES all users by default for security.")
print_info(" Enter user IDs to create an allowlist, or leave empty.")
allowed = prompt(" Allowed user IDs (comma-separated, or empty)", password=False)
if allowed:
cleaned = allowed.replace(" ", "")
save_env_value("WECOM_ALLOWED_USERS", cleaned)
print_success(" Saved — only these users can interact with the bot.")
else:
print()
access_choices = [
"Enable open access (anyone can message the bot)",
"Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')",
"Disable direct messages",
"Skip for now (bot will deny all users until configured)",
]
access_idx = prompt_choice(" How should unauthorized users be handled?", access_choices, 1)
if access_idx == 0:
save_env_value("WECOM_DM_POLICY", "open")
save_env_value("GATEWAY_ALLOW_ALL_USERS", "true")
print_warning(" Open access enabled — anyone can use your bot!")
elif access_idx == 1:
save_env_value("WECOM_DM_POLICY", "pairing")
print_success(" DM pairing mode — users will receive a code to request access.")
print_info(" Approve with: hermes pairing approve <platform> <code>")
elif access_idx == 2:
save_env_value("WECOM_DM_POLICY", "disabled")
print_warning(" Direct messages disabled.")
else:
print_info(" Skipped — configure later with 'hermes gateway setup'")
# ── Home channel (optional) ──
print()
print_info(" Chat ID for scheduled results and notifications.")
home = prompt(" Home chat ID (optional, for cron/notifications)", password=False)
if home:
save_env_value("WECOM_HOME_CHANNEL", home)
print_success(f" Home channel set to {home}")
print()
print_success("💬 WeCom configured!")
def _is_service_installed() -> bool:
@ -3025,7 +3282,8 @@ def _setup_qqbot():
if method_idx == 0:
# ── QR scan-to-configure ──
try:
credentials = _qqbot_qr_flow()
from gateway.platforms.qqbot import qr_register
credentials = qr_register()
except KeyboardInterrupt:
print()
print_warning(" QQ Bot setup cancelled.")
@ -3107,106 +3365,6 @@ def _setup_qqbot():
print_info(f" App ID: {credentials['app_id']}")
def _qqbot_render_qr(url: str) -> bool:
"""Try to render a QR code in the terminal. Returns True if successful."""
try:
import qrcode as _qr
qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
qr.add_data(url)
qr.make(fit=True)
qr.print_ascii(invert=True)
return True
except Exception:
return False
def _qqbot_qr_flow():
"""Run the QR-code scan-to-configure flow.
Returns a dict with app_id, client_secret, user_openid on success,
or None on failure/cancel.
"""
try:
from gateway.platforms.qqbot import (
create_bind_task, poll_bind_result, build_connect_url,
decrypt_secret, BindStatus,
)
from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
except Exception as exc:
print_error(f" QQBot onboard import failed: {exc}")
return None
import asyncio
import time
MAX_REFRESHES = 3
refresh_count = 0
while refresh_count <= MAX_REFRESHES:
loop = asyncio.new_event_loop()
# ── Create bind task ──
try:
task_id, aes_key = loop.run_until_complete(create_bind_task())
except Exception as e:
print_warning(f" Failed to create bind task: {e}")
loop.close()
return None
url = build_connect_url(task_id)
# ── Display QR code + URL ──
print()
if _qqbot_render_qr(url):
print(f" Scan the QR code above, or open this URL directly:\n {url}")
else:
print(f" Open this URL in QQ on your phone:\n {url}")
print_info(" Tip: pip install qrcode to show a scannable QR code here")
# ── Poll loop (silent — keep QR visible at bottom) ──
try:
while True:
try:
status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
poll_bind_result(task_id)
)
except Exception:
time.sleep(ONBOARD_POLL_INTERVAL)
continue
if status == BindStatus.COMPLETED:
client_secret = decrypt_secret(encrypted_secret, aes_key)
print()
print_success(f" QR scan complete! (App ID: {app_id})")
if user_openid:
print_info(f" Scanner's OpenID: {user_openid}")
return {
"app_id": app_id,
"client_secret": client_secret,
"user_openid": user_openid,
}
if status == BindStatus.EXPIRED:
refresh_count += 1
if refresh_count > MAX_REFRESHES:
print()
print_warning(f" QR code expired {MAX_REFRESHES} times — giving up.")
return None
print()
print_warning(f" QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
loop.close()
break # outer while creates a new task
time.sleep(ONBOARD_POLL_INTERVAL)
except KeyboardInterrupt:
loop.close()
raise
finally:
loop.close()
return None
def _setup_signal():
"""Interactive setup for Signal messenger."""
import shutil
@ -3394,6 +3552,8 @@ def gateway_setup():
_setup_feishu()
elif platform["key"] == "qqbot":
_setup_qqbot()
elif platform["key"] == "wecom":
_setup_wecom()
else:
_setup_standard_platform(platform)
@ -3752,12 +3912,13 @@ def gateway_command(args):
elif subcmd == "status":
deep = getattr(args, 'deep', False)
full = getattr(args, 'full', False)
system = getattr(args, 'system', False)
snapshot = get_gateway_runtime_snapshot(system=system)
# Check for service first
if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
systemd_status(deep, system=system)
systemd_status(deep, system=system, full=full)
_print_gateway_process_mismatch(snapshot)
elif is_macos() and get_launchd_plist_path().exists():
launchd_status(deep)

View file

@ -618,7 +618,6 @@ def _exec_in_container(container_info: dict, cli_args: list):
container_info: dict with backend, container_name, exec_user, hermes_bin
cli_args: the original CLI arguments (everything after 'hermes')
"""
import shutil
backend = container_info["backend"]
container_name = container_info["container_name"]
@ -1181,8 +1180,6 @@ def cmd_gateway(args):
def cmd_whatsapp(args):
"""Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
_require_tty("whatsapp")
import subprocess
from pathlib import Path
from hermes_cli.config import get_env_value, save_env_value
print()
@ -1330,8 +1327,6 @@ def cmd_whatsapp(args):
except (EOFError, KeyboardInterrupt):
response = "n"
if response.lower() in ("y", "yes"):
import shutil
shutil.rmtree(session_dir, ignore_errors=True)
session_dir.mkdir(parents=True, exist_ok=True)
print(" ✓ Session cleared")
@ -1427,8 +1422,6 @@ def select_provider_and_model(args=None):
# Read effective provider the same way the CLI does at startup:
# config.yaml model.provider > env var > auto-detect
import os
config_provider = None
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
@ -1573,6 +1566,8 @@ def select_provider_and_model(args=None):
_model_flow_anthropic(config, current_model)
elif selected_provider == "kimi-coding":
_model_flow_kimi(config, current_model)
elif selected_provider == "stepfun":
_model_flow_stepfun(config, current_model)
elif selected_provider == "bedrock":
_model_flow_bedrock(config, current_model)
elif selected_provider in (
@ -2134,7 +2129,6 @@ def _model_flow_nous(config, current_model="", args=None):
save_env_value,
)
from hermes_cli.nous_subscription import prompt_enable_tool_gateway
import argparse
state = get_provider_auth_state("nous")
if not state or not state.get("access_token"):
@ -2173,7 +2167,6 @@ def _model_flow_nous(config, current_model="", args=None):
from hermes_cli.models import (
_PROVIDER_MODELS,
get_pricing_for_provider,
filter_nous_free_models,
check_nous_free_tier,
partition_nous_models_by_tier,
)
@ -2216,10 +2209,8 @@ def _model_flow_nous(config, current_model="", args=None):
# Check if user is on free tier
free_tier = check_nous_free_tier()
# For both tiers: apply the allowlist filter first (removes non-allowlisted
# free models and allowlist models that aren't actually free).
# Then for free users: partition remaining models into selectable/unavailable.
model_ids = filter_nous_free_models(model_ids, pricing)
# For free users: partition models into selectable/unavailable based on
# whether they are free per the Portal-reported pricing.
unavailable_models: list[str] = []
if free_tier:
model_ids, unavailable_models = partition_nous_models_by_tier(
@ -2302,7 +2293,6 @@ def _model_flow_openai_codex(config, current_model=""):
DEFAULT_CODEX_BASE_URL,
)
from hermes_cli.codex_models import get_codex_model_ids
import argparse
status = get_codex_auth_status()
if not status.get("logged_in"):
@ -3474,6 +3464,140 @@ def _model_flow_kimi(config, current_model=""):
print("No change.")
def _infer_stepfun_region(base_url: str) -> str:
"""Infer the current StepFun region from the configured endpoint."""
normalized = (base_url or "").strip().lower()
if "api.stepfun.com" in normalized:
return "china"
return "international"
def _stepfun_base_url_for_region(region: str) -> str:
from hermes_cli.auth import (
STEPFUN_STEP_PLAN_CN_BASE_URL,
STEPFUN_STEP_PLAN_INTL_BASE_URL,
)
return (
STEPFUN_STEP_PLAN_CN_BASE_URL
if region == "china"
else STEPFUN_STEP_PLAN_INTL_BASE_URL
)
def _model_flow_stepfun(config, current_model=""):
"""StepFun Step Plan flow with region-specific endpoints."""
from hermes_cli.auth import (
PROVIDER_REGISTRY,
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
from hermes_cli.models import fetch_api_models
provider_id = "stepfun"
pconfig = PROVIDER_REGISTRY[provider_id]
key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
base_url_env = pconfig.base_url_env_var or ""
existing_key = ""
for ev in pconfig.api_key_env_vars:
existing_key = get_env_value(ev) or os.getenv(ev, "")
if existing_key:
break
if not existing_key:
print(f"No {pconfig.name} API key configured.")
if key_env:
try:
import getpass
new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if not new_key:
print("Cancelled.")
return
save_env_value(key_env, new_key)
existing_key = new_key
print("API key saved.")
print()
else:
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓")
print()
current_base = ""
if base_url_env:
current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
if not current_base:
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
current_base = str(model_cfg.get("base_url") or "").strip()
current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
region_choices = [
("international", f"International ({_stepfun_base_url_for_region('international')})"),
("china", f"China ({_stepfun_base_url_for_region('china')})"),
]
ordered_regions = []
for region_key, label in region_choices:
if region_key == current_region:
ordered_regions.insert(0, (region_key, f"{label} ← currently active"))
else:
ordered_regions.append((region_key, label))
ordered_regions.append(("cancel", "Cancel"))
region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
if region_idx is None or ordered_regions[region_idx][0] == "cancel":
print("No change.")
return
selected_region = ordered_regions[region_idx][0]
effective_base = _stepfun_base_url_for_region(selected_region)
if base_url_env:
save_env_value(base_url_env, effective_base)
live_models = fetch_api_models(existing_key, effective_base)
if live_models:
model_list = live_models
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
else:
model_list = _PROVIDER_MODELS.get(provider_id, [])
if model_list:
print(
f" Could not auto-detect models from {pconfig.name} API — "
"showing Step Plan fallback catalog."
)
if model_list:
selected = _prompt_model_selection(model_list, current_model=current_model)
else:
try:
selected = input("Model name: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
_save_model_choice(selected)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = provider_id
model["base_url"] = effective_base
model.pop("api_mode", None)
save_config(cfg)
deactivate_provider()
config["model"] = dict(model)
print(f"Default model set to: {selected} (via {pconfig.name})")
else:
print("No change.")
def _model_flow_bedrock_api_key(config, region, current_model=""):
"""Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
@ -4289,9 +4413,7 @@ def _clear_bytecode_cache(root: Path) -> int:
]
if os.path.basename(dirpath) == "__pycache__":
try:
import shutil as _shutil
_shutil.rmtree(dirpath)
shutil.rmtree(dirpath)
removed += 1
except OSError:
pass
@ -4330,8 +4452,6 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
tmp.replace(prompt_path)
# Poll for response
import time as _time
deadline = _time.monotonic() + timeout
while _time.monotonic() < deadline:
if response_path.exists():
@ -4363,7 +4483,6 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
"""
if not (web_dir / "package.json").exists():
return True
import shutil
npm = shutil.which("npm")
if not npm:
@ -4400,7 +4519,6 @@ def _update_via_zip(args):
Used on Windows when git file I/O is broken (antivirus, NTFS filter
drivers causing 'Invalid argument' errors on file creation).
"""
import shutil
import tempfile
import zipfile
from urllib.request import urlretrieve
@ -4477,7 +4595,6 @@ def _update_via_zip(args):
# breaks on this machine, keep base deps and reinstall the remaining extras
# individually so update does not silently strip working capabilities.
print("→ Updating Python dependencies...")
import subprocess
uv_bin = shutil.which("uv")
if uv_bin:
@ -5228,9 +5345,11 @@ def _install_hangup_protection(gateway_mode: bool = False):
# (2) Mirror output to update.log and wrap stdio for broken-pipe
# tolerance. Any failure here is non-fatal; we just skip the wrap.
try:
from hermes_cli.config import get_hermes_home
# Late-bound import so tests can monkeypatch
# hermes_cli.config.get_hermes_home to simulate setup failure.
from hermes_cli.config import get_hermes_home as _get_hermes_home
logs_dir = get_hermes_home() / "logs"
logs_dir = _get_hermes_home() / "logs"
logs_dir.mkdir(parents=True, exist_ok=True)
log_path = logs_dir / "update.log"
log_file = open(log_path, "a", buffering=1, encoding="utf-8")
@ -5805,8 +5924,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
# Verify the service actually survived the
# restart. systemctl restart returns 0 even
# if the new process crashes immediately.
import time as _time
_time.sleep(3)
verify = subprocess.run(
scope_cmd + ["is-active", svc_name],
@ -6549,6 +6666,7 @@ For more help on a command:
"zai",
"kimi-coding",
"kimi-coding-cn",
"stepfun",
"minimax",
"minimax-cn",
"kilocode",
@ -6770,6 +6888,12 @@ For more help on a command:
# gateway status
gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
gateway_status.add_argument(
"-l",
"--full",
action="store_true",
help="Show full, untruncated service/log output where supported",
)
gateway_status.add_argument(
"--system",
action="store_true",
@ -7693,9 +7817,7 @@ Examples:
)
cmd_info["setup_fn"](plugin_parser)
except Exception as _exc:
import logging as _log
_log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
# =========================================================================
# memory command
@ -8080,7 +8202,6 @@ Examples:
return
line = _json.dumps(data, ensure_ascii=False) + "\n"
if args.output == "-":
import sys
sys.stdout.write(line)
else:
@ -8090,7 +8211,6 @@ Examples:
else:
sessions = db.export_all(source=args.source)
if args.output == "-":
import sys
for s in sessions:
sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n")
@ -8161,8 +8281,6 @@ Examples:
# Launch hermes --resume <id> by replacing the current process
print(f"Resuming session: {selected_id}")
import shutil
hermes_bin = shutil.which("hermes")
if hermes_bin:
os.execvp(hermes_bin, ["hermes", "--resume", selected_id])

View file

@ -143,7 +143,7 @@ MODEL_ALIASES: dict[str, ModelIdentity] = {
# Z.AI / GLM
"glm": ModelIdentity("z-ai", "glm"),
# StepFun
# Step Plan (StepFun)
"step": ModelIdentity("stepfun", "step"),
# Xiaomi
@ -678,6 +678,7 @@ def switch_model(
_da = DIRECT_ALIASES.get(resolved_alias)
if _da is not None and _da.base_url:
base_url = _da.base_url
api_mode = "" # clear so determine_api_mode re-detects from URL
if not api_key:
api_key = "no-key-required"
@ -809,7 +810,10 @@ def list_authenticated_providers(
get_provider_info as _mdev_pinfo,
)
from hermes_cli.auth import PROVIDER_REGISTRY
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
from hermes_cli.models import (
OPENROUTER_MODELS, _PROVIDER_MODELS,
_MODELS_DEV_PREFERRED, _merge_with_models_dev,
)
results: List[dict] = []
seen_slugs: set = set() # lowercase-normalized to catch case variants (#9545)
@ -855,8 +859,13 @@ def list_authenticated_providers(
if not has_creds:
continue
# Use curated list, falling back to models.dev if no curated list
# Use curated list, falling back to models.dev if no curated list.
# For preferred providers, merge models.dev entries into the curated
# catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
# show up in the picker without requiring a Hermes release.
model_ids = curated.get(hermes_id, [])
if hermes_id in _MODELS_DEV_PREFERRED:
model_ids = _merge_with_models_dev(hermes_id, model_ids)
total = len(model_ids)
top = model_ids[:max_models]
@ -960,6 +969,9 @@ def list_authenticated_providers(
# Use curated list — look up by Hermes slug, fall back to overlay key
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
# Merge with models.dev for preferred providers (same rationale as above).
if hermes_slug in _MODELS_DEV_PREFERRED:
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
total = len(model_ids)
top = model_ids[:max_models]

View file

@ -42,7 +42,8 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
("openrouter/elephant-alpha", "free"),
("openai/gpt-5.4", ""),
("openai/gpt-5.4-mini", ""),
("xiaomi/mimo-v2-pro", ""),
("xiaomi/mimo-v2.5-pro", ""),
("xiaomi/mimo-v2.5", ""),
("openai/gpt-5.3-codex", ""),
("google/gemini-3-pro-image-preview", ""),
("google/gemini-3-flash-preview", ""),
@ -53,6 +54,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
("stepfun/step-3.5-flash", ""),
("minimax/minimax-m2.7", ""),
("minimax/minimax-m2.5", ""),
("minimax/minimax-m2.5:free", "free"),
("z-ai/glm-5.1", ""),
("z-ai/glm-5v-turbo", ""),
("z-ai/glm-5-turbo", ""),
@ -107,7 +109,8 @@ def _codex_curated_models() -> list[str]:
_PROVIDER_MODELS: dict[str, list[str]] = {
"nous": [
"moonshotai/kimi-k2.6",
"xiaomi/mimo-v2-pro",
"xiaomi/mimo-v2.5-pro",
"xiaomi/mimo-v2.5",
"anthropic/claude-opus-4.7",
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.6",
@ -125,17 +128,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"stepfun/step-3.5-flash",
"minimax/minimax-m2.7",
"minimax/minimax-m2.5",
"minimax/minimax-m2.5:free",
"z-ai/glm-5.1",
"z-ai/glm-5v-turbo",
"z-ai/glm-5-turbo",
"x-ai/grok-4.20-beta",
"nvidia/nemotron-3-super-120b-a12b",
"nvidia/nemotron-3-super-120b-a12b:free",
"arcee-ai/trinity-large-preview:free",
"arcee-ai/trinity-large-thinking",
"openai/gpt-5.4-pro",
"openai/gpt-5.4-nano",
"openrouter/elephant-alpha",
],
"openai-codex": _codex_curated_models(),
"copilot-acp": [
@ -211,6 +212,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
],
"stepfun": [
"step-3.5-flash",
"step-3.5-flash-2603",
],
"moonshot": [
"kimi-k2.6",
"kimi-k2.5",
@ -292,6 +297,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"big-pickle",
],
"opencode-go": [
"kimi-k2.6",
"kimi-k2.5",
"glm-5.1",
"glm-5",
@ -299,6 +305,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"mimo-v2-omni",
"minimax-m2.7",
"minimax-m2.5",
"qwen3.6-plus",
"qwen3.5-plus",
],
"kilocode": [
"anthropic/claude-opus-4.6",
@ -359,17 +367,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
# ---------------------------------------------------------------------------
# Nous Portal free-model filtering
# Nous Portal free-model helper
# ---------------------------------------------------------------------------
# Models that are ALLOWED to appear when priced as free on Nous Portal.
# Any other free model is hidden — prevents promotional/temporary free models
# from cluttering the selection when users are paying subscribers.
# Models in this list are ALSO filtered out if they are NOT free (i.e. they
# should only appear in the menu when they are genuinely free).
_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
"xiaomi/mimo-v2-pro",
"xiaomi/mimo-v2-omni",
})
# The Nous Portal models endpoint is the source of truth for which models
# are currently offered (free or paid). We trust whatever it returns and
# surface it to users as-is — no local allowlist filtering.
def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@ -383,35 +385,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
return False
def filter_nous_free_models(
model_ids: list[str],
pricing: dict[str, dict[str, str]],
) -> list[str]:
"""Filter the Nous Portal model list according to free-model policy.
Rules:
Paid models that are NOT in the allowlist keep (normal case).
Free models that are NOT in the allowlist drop.
Allowlist models that ARE free keep.
Allowlist models that are NOT free drop.
"""
if not pricing:
return model_ids # no pricing data — can't filter, show everything
result: list[str] = []
for mid in model_ids:
free = _is_model_free(mid, pricing)
if mid in _NOUS_ALLOWED_FREE_MODELS:
# Allowlist model: only show when it's actually free
if free:
result.append(mid)
else:
# Regular model: keep only when it's NOT free
if not free:
result.append(mid)
return result
# ---------------------------------------------------------------------------
# Nous Portal account tier detection
# ---------------------------------------------------------------------------
@ -475,8 +448,7 @@ def partition_nous_models_by_tier(
) -> tuple[list[str], list[str]]:
"""Split Nous models into (selectable, unavailable) based on user tier.
For paid-tier users: all models are selectable, none unavailable
(free-model filtering is handled separately by ``filter_nous_free_models``).
For paid-tier users: all models are selectable, none unavailable.
For free-tier users: only free models are selectable; paid models
are returned as unavailable (shown grayed out in the menu).
@ -515,8 +487,6 @@ def check_nous_free_tier() -> bool:
Returns False (assume paid) on any error never blocks paying users.
"""
global _free_tier_cache
import time
now = time.monotonic()
if _free_tier_cache is not None:
cached_result, cached_at = _free_tier_cache
@ -548,6 +518,157 @@ def check_nous_free_tier() -> bool:
return False # default to paid on error — don't block users
# ---------------------------------------------------------------------------
# Nous Portal recommended models
#
# The Portal publishes a curated list of suggested models (separated into
# paid and free tiers) plus dedicated recommendations for compaction (text
# summarisation / auxiliary) and vision tasks. We fetch it once per process
# with a TTL cache so callers can ask "what's the best aux model right now?"
# without hitting the network on every lookup.
#
# Shape of the response (fields we care about):
# {
# "paidRecommendedModels": [ {modelName, ...}, ... ],
# "freeRecommendedModels": [ {modelName, ...}, ... ],
# "paidRecommendedCompactionModel": {modelName, ...} | null,
# "paidRecommendedVisionModel": {modelName, ...} | null,
# "freeRecommendedCompactionModel": {modelName, ...} | null,
# "freeRecommendedVisionModel": {modelName, ...} | null,
# }
# ---------------------------------------------------------------------------
NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
_NOUS_RECOMMENDED_CACHE_TTL: int = 600 # seconds (10 minutes)
# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
def fetch_nous_recommended_models(
portal_base_url: str = "",
timeout: float = 5.0,
*,
force_refresh: bool = False,
) -> dict[str, Any]:
"""Fetch the Nous Portal's curated recommended-models payload.
Hits ``<portal>/api/nous/recommended-models``. The endpoint is public
no auth is required. Results are cached per portal URL for
``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
bypass the cache.
Returns the parsed JSON dict on success, or ``{}`` on any failure
(network, parse, non-2xx). Callers must treat missing/null fields as
"no recommendation" and fall back to their own default.
"""
base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
now = time.monotonic()
cached = _nous_recommended_cache.get(base)
if not force_refresh and cached is not None:
payload, cached_at = cached
if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
return payload
url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
try:
req = urllib.request.Request(
url,
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
if not isinstance(data, dict):
data = {}
except Exception:
data = {}
_nous_recommended_cache[base] = (data, now)
return data
def _resolve_nous_portal_url() -> str:
"""Best-effort lookup of the Portal base URL the user is authed against."""
try:
from hermes_cli.auth import (
DEFAULT_NOUS_PORTAL_URL,
get_provider_auth_state,
)
state = get_provider_auth_state("nous") or {}
portal = str(state.get("portal_base_url") or "").strip()
if portal:
return portal.rstrip("/")
return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
except Exception:
return "https://portal.nousresearch.com"
def _extract_model_name(entry: Any) -> Optional[str]:
"""Pull the ``modelName`` field from a recommended-model entry, else None."""
if not isinstance(entry, dict):
return None
model_name = entry.get("modelName")
if isinstance(model_name, str) and model_name.strip():
return model_name.strip()
return None
def get_nous_recommended_aux_model(
*,
vision: bool = False,
free_tier: Optional[bool] = None,
portal_base_url: str = "",
force_refresh: bool = False,
) -> Optional[str]:
"""Return the Portal's recommended model name for an auxiliary task.
Picks the best field from the Portal's recommended-models payload:
* ``vision=True`` ``paidRecommendedVisionModel`` (paid tier) or
``freeRecommendedVisionModel`` (free tier)
* ``vision=False`` ``paidRecommendedCompactionModel`` or
``freeRecommendedCompactionModel``
When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
detection useful for tests or when the caller already knows the tier.
For paid-tier users we prefer the paid recommendation but gracefully fall
back to the free recommendation if the Portal returned ``null`` for the
paid field (common during the staged rollout of new paid models).
Returns ``None`` when every candidate is missing, null, or the fetch
fails callers should fall back to their own default (currently
``google/gemini-3-flash-preview``).
"""
base = portal_base_url or _resolve_nous_portal_url()
payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
if not payload:
return None
if free_tier is None:
try:
free_tier = check_nous_free_tier()
except Exception:
# On any detection error, assume paid — paid users see both fields
# anyway so this is a safe default that maximises model quality.
free_tier = False
if vision:
paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
else:
paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
# Preference order:
# free tier → free only
# paid tier → paid, then free (if paid field is null)
candidates = [free_key] if free_tier else [paid_key, free_key]
for key in candidates:
name = _extract_model_name(payload.get(key))
if name:
return name
return None
# ---------------------------------------------------------------------------
# Canonical provider list — single source of truth for provider identity.
# Every code path that lists, displays, or iterates providers derives from
@ -584,6 +705,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"),
ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"),
ProviderEntry("stepfun", "StepFun Step Plan", "StepFun Step Plan (agent/coding models via Step Plan API)"),
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
@ -618,6 +740,8 @@ _PROVIDER_ALIASES = {
"moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn",
"moonshot-cn": "kimi-coding-cn",
"step": "stepfun",
"stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee",
"arceeai": "arcee",
"minimax-china": "minimax-cn",
@ -687,6 +811,31 @@ def _openrouter_model_is_free(pricing: Any) -> bool:
return False
def _openrouter_model_supports_tools(item: Any) -> bool:
"""Return True when the model's ``supported_parameters`` advertise tool calling.
hermes-agent is tool-calling-first every provider path assumes the model
can invoke tools. Models that don't advertise ``tools`` in their
``supported_parameters`` (e.g. image-only or completion-only models) cannot
be driven by the agent loop and would fail at the first tool call.
**Permissive when the field is missing.** Some OpenRouter-compatible gateways
(Nous Portal, private mirrors, older catalog snapshots) don't populate
``supported_parameters`` at all. Treat that as "unknown capability → allow"
so the picker doesn't silently empty for those users. Only hide models
whose ``supported_parameters`` is an explicit list that omits ``tools``.
Ported from Kilo-Org/kilocode#9068.
"""
if not isinstance(item, dict):
return True
params = item.get("supported_parameters")
if not isinstance(params, list):
# Field absent / malformed / None — be permissive.
return True
return "tools" in params
def fetch_openrouter_models(
timeout: float = 8.0,
*,
@ -729,6 +878,11 @@ def fetch_openrouter_models(
live_item = live_by_id.get(preferred_id)
if live_item is None:
continue
# Hide models that don't advertise tool-calling support — hermes-agent
# requires it and surfacing them leads to immediate runtime failures
# when the user selects them. Ported from Kilo-Org/kilocode#9068.
if not _openrouter_model_supports_tools(live_item):
continue
desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
curated.append((preferred_id, desc))
@ -1259,7 +1413,6 @@ def detect_provider_for_model(
from hermes_cli.auth import PROVIDER_REGISTRY
pconfig = PROVIDER_REGISTRY.get(direct_match)
if pconfig:
import os
for env_var in pconfig.api_key_env_vars:
if os.getenv(env_var, "").strip():
has_creds = True
@ -1436,11 +1589,84 @@ def _resolve_copilot_catalog_api_key() -> str:
return ""
# Providers where models.dev is treated as authoritative: curated static
# lists are kept only as an offline fallback and to capture custom additions
# the registry doesn't publish yet. Adding a provider here causes its
# curated list to be merged with fresh models.dev entries (fresh first, any
# curated-only names appended) for both the CLI and the gateway /model picker.
#
# DELIBERATELY EXCLUDED:
# - "openrouter": curated list is already a hand-picked agentic subset of
# OpenRouter's 400+ catalog. Blindly merging would dump everything.
# - "nous": curated list and Portal /models endpoint are the source of
# truth for the subscription tier.
# Also excluded: providers that already have dedicated live-endpoint
# branches below (copilot, anthropic, ai-gateway, ollama-cloud, custom,
# stepfun, openai-codex) — those paths handle freshness themselves.
_MODELS_DEV_PREFERRED: frozenset[str] = frozenset({
"opencode-go",
"opencode-zen",
"deepseek",
"kilocode",
"fireworks",
"mistral",
"togetherai",
"cohere",
"perplexity",
"groq",
"nvidia",
"huggingface",
"zai",
"gemini",
"google",
})
def _merge_with_models_dev(provider: str, curated: list[str]) -> list[str]:
"""Merge curated list with fresh models.dev entries for a preferred provider.
Returns models.dev entries first (in models.dev order), then any
curated-only entries appended. Preserves case for curated fallbacks
(e.g. ``MiniMax-M2.7``) while trusting models.dev for newer variants.
If models.dev is unreachable or returns nothing, the curated list is
returned unchanged this is the offline/CI fallback path.
"""
try:
from agent.models_dev import list_agentic_models
mdev = list_agentic_models(provider)
except Exception:
mdev = []
if not mdev:
return list(curated)
# Case-insensitive dedup while preserving order and curated casing.
seen_lower: set[str] = set()
merged: list[str] = []
for mid in mdev:
key = str(mid).lower()
if key in seen_lower:
continue
seen_lower.add(key)
merged.append(mid)
for mid in curated:
key = str(mid).lower()
if key in seen_lower:
continue
seen_lower.add(key)
merged.append(mid)
return merged
def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]:
"""Return the best known model catalog for a provider.
Tries live API endpoints for providers that support them (Codex, Nous),
falling back to static lists.
falling back to static lists. For providers in ``_MODELS_DEV_PREFERRED``
(opencode-go/zen, xiaomi, deepseek, smaller inference providers, etc.),
models.dev entries are merged on top of curated so new models released
on the platform appear in ``/model`` without a Hermes release.
"""
normalized = normalize_provider(provider)
if normalized == "openrouter":
@ -1469,6 +1695,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
return live
except Exception:
pass
if normalized == "stepfun":
try:
from hermes_cli.auth import resolve_api_key_provider_credentials
creds = resolve_api_key_provider_credentials("stepfun")
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip()
if api_key and base_url:
live = fetch_api_models(api_key, base_url)
if live:
return live
except Exception:
pass
if normalized == "anthropic":
live = _fetch_anthropic_models()
if live:
@ -1493,7 +1732,10 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
live = fetch_api_models(api_key, base_url)
if live:
return live
return list(_PROVIDER_MODELS.get(normalized, []))
curated_static = list(_PROVIDER_MODELS.get(normalized, []))
if normalized in _MODELS_DEV_PREFERRED:
return _merge_with_models_dev(normalized, curated_static)
return curated_static
def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
@ -2396,13 +2638,70 @@ def validate_requested_model(
except Exception:
pass # Fall through to generic warning
# Static-catalog fallback: when the /models probe was unreachable,
# validate against the curated list from provider_model_ids() — same
# pattern as the openai-codex and minimax branches above. This fixes
# /model switches in the gateway for providers like opencode-go and
# opencode-zen whose /models endpoint returns 404 against the HTML
# marketing site. Without this block, validate_requested_model would
# reject every model on such providers, switch_model() would return
# success=False, and the gateway would never write to
# _session_model_overrides.
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
try:
catalog_models = provider_model_ids(normalized)
except Exception:
catalog_models = []
if catalog_models:
catalog_lower = {m.lower(): m for m in catalog_models}
if requested_for_lookup.lower() in catalog_lower:
return {
"accepted": True,
"persist": True,
"recognized": True,
"message": None,
}
catalog_lower_list = list(catalog_lower.keys())
auto = get_close_matches(
requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9
)
if auto:
corrected = catalog_lower[auto[0]]
return {
"accepted": True,
"persist": True,
"recognized": True,
"corrected_model": corrected,
"message": f"Auto-corrected `{requested}` → `{corrected}`",
}
suggestions = get_close_matches(
requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5
)
suggestion_text = ""
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(
f"`{catalog_lower[s]}`" for s in suggestions
)
return {
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Note: `{requested}` was not found in the {provider_label} curated catalog "
f"and the /models endpoint was unreachable.{suggestion_text}"
f"\n The model may still work if it exists on the provider."
),
}
# No catalog available — accept with a warning, matching the comment's
# stated intent ("Accept and persist, but warn").
return {
"accepted": False,
"persist": False,
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Could not reach the {provider_label} API to validate `{requested}`. "
f"Note: could not reach the {provider_label} API to validate `{requested}`. "
f"If the service isn't down, this model may not be valid."
),
}

View file

@ -10,6 +10,7 @@ from hermes_cli.auth import get_nous_auth_status
from hermes_cli.config import get_env_value, load_config
from tools.managed_tool_gateway import is_managed_tool_gateway_ready
from tools.tool_backend_helpers import (
fal_key_is_configured,
has_direct_modal_credentials,
managed_nous_tools_enabled,
normalize_browser_cloud_provider,
@ -271,7 +272,7 @@ def get_nous_subscription_features(
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
direct_fal = bool(get_env_value("FAL_KEY"))
direct_fal = fal_key_is_configured()
direct_openai_tts = bool(resolve_openai_audio_api_key())
direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
direct_camofox = bool(get_env_value("CAMOFOX_URL"))
@ -520,7 +521,7 @@ def apply_nous_managed_defaults(
browser_cfg["cloud_provider"] = "browser-use"
changed.add("browser")
if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
if "image_gen" in selected_toolsets and not fal_key_is_configured():
changed.add("image_gen")
return changed
@ -548,7 +549,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
or get_env_value("TAVILY_API_KEY")
or get_env_value("EXA_API_KEY")
),
"image_gen": bool(get_env_value("FAL_KEY")),
"image_gen": fal_key_is_configured(),
"tts": bool(
resolve_openai_audio_api_key()
or get_env_value("ELEVENLABS_API_KEY")
@ -586,7 +587,6 @@ def get_gateway_eligible_tools(
return [], [], []
if config is None:
from hermes_cli.config import load_config
config = load_config() or {}
# Quick provider check without the heavy get_nous_subscription_features call

View file

@ -133,6 +133,9 @@ def _get_enabled_plugins() -> Optional[set]:
# Data classes
# ---------------------------------------------------------------------------
_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"}
@dataclass
class PluginManifest:
"""Parsed representation of a plugin.yaml manifest."""
@ -146,6 +149,23 @@ class PluginManifest:
provides_hooks: List[str] = field(default_factory=list)
source: str = "" # "user", "project", or "entrypoint"
path: Optional[str] = None
# Plugin kind — see plugins.py module docstring for semantics.
# ``standalone`` (default): hooks/tools of its own; opt-in via
# ``plugins.enabled``.
# ``backend``: pluggable backend for an existing core tool (e.g.
# image_gen). Built-in (bundled) backends auto-load;
# user-installed still gated by ``plugins.enabled``.
# ``exclusive``: category with exactly one active provider (memory).
# Selection via ``<category>.provider`` config key; the
# category's own discovery system handles loading and the
# general scanner skips these.
kind: str = "standalone"
# Registry key — path-derived, used by ``plugins.enabled``/``disabled``
# lookups and by ``hermes plugins list``. For a flat plugin at
# ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested
# category plugin at ``plugins/image_gen/openai/`` the key is
# ``image_gen/openai``. When empty, falls back to ``name``.
key: str = ""
@dataclass
@ -263,6 +283,7 @@ class PluginContext:
name: str,
handler: Callable,
description: str = "",
args_hint: str = "",
) -> None:
"""Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.
@ -273,6 +294,13 @@ class PluginContext:
terminal commands), this registers in-session slash commands that users
invoke during a conversation.
``args_hint`` is an optional short string (e.g. ``"<file>"`` or
``"dias:7 formato:json"``) used by gateway adapters to surface the
command with an argument field for example Discord's native slash
command picker. Plugin commands without ``args_hint`` register as
parameterless in Discord and still accept trailing text when invoked
as free-form chat.
Names conflicting with built-in commands are rejected with a warning.
"""
clean = name.lower().strip().lstrip("/").replace(" ", "-")
@ -300,6 +328,7 @@ class PluginContext:
"handler": handler,
"description": description or "Plugin command",
"plugin": self.manifest.name,
"args_hint": (args_hint or "").strip(),
}
logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)
@ -366,6 +395,33 @@ class PluginContext:
self.manifest.name, engine.name,
)
# -- image gen provider registration ------------------------------------
def register_image_gen_provider(self, provider) -> None:
"""Register an image generation backend.
``provider`` must be an instance of
:class:`agent.image_gen_provider.ImageGenProvider`. The
``provider.name`` attribute is what ``image_gen.provider`` in
``config.yaml`` matches against when routing ``image_generate``
tool calls.
"""
from agent.image_gen_provider import ImageGenProvider
from agent.image_gen_registry import register_provider
if not isinstance(provider, ImageGenProvider):
logger.warning(
"Plugin '%s' tried to register an image_gen provider that does "
"not inherit from ImageGenProvider. Ignoring.",
self.manifest.name,
)
return
register_provider(provider)
logger.info(
"Plugin '%s' registered image_gen provider: %s",
self.manifest.name, provider.name,
)
# -- hook registration --------------------------------------------------
def register_hook(self, hook_name: str, callback: Callable) -> None:
@ -465,11 +521,16 @@ class PluginManager:
manifests: List[PluginManifest] = []
# 1. Bundled plugins (<repo>/plugins/<name>/)
# Repo-shipped generic plugins live next to hermes_cli/. Memory and
# context_engine subdirs are handled by their own discovery paths, so
# skip those names here. Bundled plugins are discovered (so they
# show up in `hermes plugins`) but only loaded when added to
# `plugins.enabled` in config.yaml — opt-in like any other plugin.
#
# Repo-shipped plugins live next to hermes_cli/. Two layouts are
# supported (see ``_scan_directory`` for details):
#
# - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
# - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
#
# ``memory/`` and ``context_engine/`` are skipped at the top level —
# they have their own discovery systems. Porting those to the
# category-namespace ``kind: exclusive`` model is a future PR.
repo_plugins = Path(__file__).resolve().parent.parent / "plugins"
manifests.extend(
self._scan_directory(
@ -492,36 +553,69 @@ class PluginManager:
manifests.extend(self._scan_entry_points())
# Load each manifest (skip user-disabled plugins).
# Later sources override earlier ones on name collision — user plugins
# take precedence over bundled, project plugins take precedence over
# user. Dedup here so we only load the final winner.
# Later sources override earlier ones on key collision — user
# plugins take precedence over bundled, project plugins take
# precedence over user. Dedup here so we only load the final
# winner. Keys are path-derived (``image_gen/openai``,
# ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai``
# don't collide even when both manifests say ``name: openai``.
disabled = _get_disabled_plugins()
enabled = _get_enabled_plugins() # None = opt-in default (nothing enabled)
winners: Dict[str, PluginManifest] = {}
for manifest in manifests:
winners[manifest.name] = manifest
winners[manifest.key or manifest.name] = manifest
for manifest in winners.values():
# Explicit disable always wins.
if manifest.name in disabled:
lookup_key = manifest.key or manifest.name
# Explicit disable always wins (matches on key or on legacy
# bare name for back-compat with existing user configs).
if lookup_key in disabled or manifest.name in disabled:
loaded = LoadedPlugin(manifest=manifest, enabled=False)
loaded.error = "disabled via config"
self._plugins[manifest.name] = loaded
logger.debug("Skipping disabled plugin '%s'", manifest.name)
self._plugins[lookup_key] = loaded
logger.debug("Skipping disabled plugin '%s'", lookup_key)
continue
# Opt-in gate: plugins must be in the enabled allow-list.
# If the allow-list is missing (None), treat as "nothing enabled"
# — users have to explicitly enable plugins to load them.
# Memory and context_engine providers are excluded from this gate
# since they have their own single-select config (memory.provider
# / context.engine), not the enabled list.
if enabled is None or manifest.name not in enabled:
# Exclusive plugins (memory providers) have their own
# discovery/activation path. The general loader records the
# manifest for introspection but does not load the module.
if manifest.kind == "exclusive":
loaded = LoadedPlugin(manifest=manifest, enabled=False)
loaded.error = "not enabled in config (run `hermes plugins enable {}` to activate)".format(
manifest.name
loaded.error = (
"exclusive plugin — activate via <category>.provider config"
)
self._plugins[manifest.name] = loaded
self._plugins[lookup_key] = loaded
logger.debug(
"Skipping '%s' (not in plugins.enabled)", manifest.name
"Skipping '%s' (exclusive, handled by category discovery)",
lookup_key,
)
continue
# Built-in backends auto-load — they ship with hermes and must
# just work. Selection among them (e.g. which image_gen backend
# services calls) is driven by ``<category>.provider`` config,
# enforced by the tool wrapper.
if manifest.kind == "backend" and manifest.source == "bundled":
self._load_plugin(manifest)
continue
# Everything else (standalone, user-installed backends,
# entry-point plugins) is opt-in via plugins.enabled.
# Accept both the path-derived key and the legacy bare name
# so existing configs keep working.
is_enabled = (
enabled is not None
and (lookup_key in enabled or manifest.name in enabled)
)
if not is_enabled:
loaded = LoadedPlugin(manifest=manifest, enabled=False)
loaded.error = (
"not enabled in config (run `hermes plugins enable {}` to activate)"
.format(lookup_key)
)
self._plugins[lookup_key] = loaded
logger.debug(
"Skipping '%s' (not in plugins.enabled)", lookup_key
)
continue
self._load_plugin(manifest)
@ -545,9 +639,37 @@ class PluginManager:
) -> List[PluginManifest]:
"""Read ``plugin.yaml`` manifests from subdirectories of *path*.
*skip_names* is an optional allow-list of names to ignore (used
for the bundled scan to exclude ``memory`` / ``context_engine``
subdirs that have their own discovery path).
Supports two layouts, mixed freely:
* **Flat** ``<root>/<plugin-name>/plugin.yaml``. Key is
``<plugin-name>`` (e.g. ``disk-cleanup``).
* **Category** ``<root>/<category>/<plugin-name>/plugin.yaml``,
where the ``<category>`` directory itself has no ``plugin.yaml``.
Key is ``<category>/<plugin-name>`` (e.g. ``image_gen/openai``).
Depth is capped at two segments.
*skip_names* is an optional allow-list of names to ignore at the
top level (kept for back-compat; the current call sites no longer
pass it now that categories are first-class).
"""
return self._scan_directory_level(
path, source, skip_names=skip_names, prefix="", depth=0
)
def _scan_directory_level(
self,
path: Path,
source: str,
*,
skip_names: Optional[Set[str]],
prefix: str,
depth: int,
) -> List[PluginManifest]:
"""Recursive implementation of :meth:`_scan_directory`.
``prefix`` is the category path already accumulated ("" at root,
"image_gen" one level in). ``depth`` is the recursion depth; we
cap at 2 so ``<root>/a/b/c/`` is ignored.
"""
manifests: List[PluginManifest] = []
if not path.is_dir():
@ -556,37 +678,112 @@ class PluginManager:
for child in sorted(path.iterdir()):
if not child.is_dir():
continue
if skip_names and child.name in skip_names:
if depth == 0 and skip_names and child.name in skip_names:
continue
manifest_file = child / "plugin.yaml"
if not manifest_file.exists():
manifest_file = child / "plugin.yml"
if not manifest_file.exists():
logger.debug("Skipping %s (no plugin.yaml)", child)
if manifest_file.exists():
manifest = self._parse_manifest(
manifest_file, child, source, prefix
)
if manifest is not None:
manifests.append(manifest)
continue
try:
if yaml is None:
logger.warning("PyYAML not installed cannot load %s", manifest_file)
continue
data = yaml.safe_load(manifest_file.read_text()) or {}
manifest = PluginManifest(
name=data.get("name", child.name),
version=str(data.get("version", "")),
description=data.get("description", ""),
author=data.get("author", ""),
requires_env=data.get("requires_env", []),
provides_tools=data.get("provides_tools", []),
provides_hooks=data.get("provides_hooks", []),
source=source,
path=str(child),
# No manifest at this level. If we're still within the depth
# cap, treat this directory as a category namespace and recurse
# one level in looking for children with manifests.
if depth >= 1:
logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child)
continue
sub_prefix = f"{prefix}/{child.name}" if prefix else child.name
manifests.extend(
self._scan_directory_level(
child,
source,
skip_names=None,
prefix=sub_prefix,
depth=depth + 1,
)
manifests.append(manifest)
except Exception as exc:
logger.warning("Failed to parse %s: %s", manifest_file, exc)
)
return manifests
def _parse_manifest(
self,
manifest_file: Path,
plugin_dir: Path,
source: str,
prefix: str,
) -> Optional[PluginManifest]:
"""Parse a single ``plugin.yaml`` into a :class:`PluginManifest`.
Returns ``None`` on parse failure (logs a warning).
"""
try:
if yaml is None:
logger.warning("PyYAML not installed cannot load %s", manifest_file)
return None
data = yaml.safe_load(manifest_file.read_text()) or {}
name = data.get("name", plugin_dir.name)
key = f"{prefix}/{plugin_dir.name}" if prefix else name
raw_kind = data.get("kind", "standalone")
if not isinstance(raw_kind, str):
raw_kind = "standalone"
kind = raw_kind.strip().lower()
if kind not in _VALID_PLUGIN_KINDS:
logger.warning(
"Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'",
key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)),
)
kind = "standalone"
# Auto-coerce user-installed memory providers to kind="exclusive"
# so they're routed to plugins/memory discovery instead of being
# loaded by the general PluginManager (which has no
# register_memory_provider on PluginContext). Mirrors the
# heuristic in plugins/memory/__init__.py:_is_memory_provider_dir.
# Bundled memory providers are already skipped via skip_names.
if kind == "standalone" and "kind" not in data:
init_file = plugin_dir / "__init__.py"
if init_file.exists():
try:
source_text = init_file.read_text(errors="replace")[:8192]
if (
"register_memory_provider" in source_text
or "MemoryProvider" in source_text
):
kind = "exclusive"
logger.debug(
"Plugin %s: detected memory provider, "
"treating as kind='exclusive'",
key,
)
except Exception:
pass
return PluginManifest(
name=name,
version=str(data.get("version", "")),
description=data.get("description", ""),
author=data.get("author", ""),
requires_env=data.get("requires_env", []),
provides_tools=data.get("provides_tools", []),
provides_hooks=data.get("provides_hooks", []),
source=source,
path=str(plugin_dir),
kind=kind,
key=key,
)
except Exception as exc:
logger.warning("Failed to parse %s: %s", manifest_file, exc)
return None
# -----------------------------------------------------------------------
# Entry-point scanning
# -----------------------------------------------------------------------
@ -609,6 +806,7 @@ class PluginManager:
name=ep.name,
source="entrypoint",
path=ep.value,
key=ep.name,
)
manifests.append(manifest)
except Exception as exc:
@ -670,10 +868,16 @@ class PluginManager:
loaded.error = str(exc)
logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)
self._plugins[manifest.name] = loaded
self._plugins[manifest.key or manifest.name] = loaded
def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
"""Import a directory-based plugin as ``hermes_plugins.<name>``."""
"""Import a directory-based plugin as ``hermes_plugins.<slug>``.
The module slug is derived from ``manifest.key`` so category-namespaced
plugins (``image_gen/openai``) import as
``hermes_plugins.image_gen__openai`` without colliding with any
future ``tts/openai``.
"""
plugin_dir = Path(manifest.path) # type: ignore[arg-type]
init_file = plugin_dir / "__init__.py"
if not init_file.exists():
@ -686,7 +890,9 @@ class PluginManager:
ns_pkg.__package__ = _NS_PARENT
sys.modules[_NS_PARENT] = ns_pkg
module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
key = manifest.key or manifest.name
slug = key.replace("/", "__").replace("-", "_")
module_name = f"{_NS_PARENT}.{slug}"
spec = importlib.util.spec_from_file_location(
module_name,
init_file,
@ -767,10 +973,12 @@ class PluginManager:
def list_plugins(self) -> List[Dict[str, Any]]:
"""Return a list of info dicts for all discovered plugins."""
result: List[Dict[str, Any]] = []
for name, loaded in sorted(self._plugins.items()):
for key, loaded in sorted(self._plugins.items()):
result.append(
{
"name": name,
"name": loaded.manifest.name,
"key": loaded.manifest.key or loaded.manifest.name,
"kind": loaded.manifest.kind,
"version": loaded.manifest.version,
"description": loaded.manifest.description,
"source": loaded.manifest.source,

View file

@ -94,6 +94,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
transport="openai_chat",
base_url_env_var="KIMI_BASE_URL",
),
"stepfun": HermesOverlay(
transport="openai_chat",
extra_env_vars=("STEPFUN_API_KEY",),
base_url_override="https://api.stepfun.ai/step_plan/v1",
base_url_env_var="STEPFUN_BASE_URL",
),
"minimax": HermesOverlay(
transport="anthropic_messages",
base_url_env_var="MINIMAX_BASE_URL",
@ -210,6 +216,10 @@ ALIASES: Dict[str, str] = {
"kimi-coding-cn": "kimi-for-coding",
"moonshot": "kimi-for-coding",
# stepfun
"step": "stepfun",
"stepfun-coding-plan": "stepfun",
# minimax-cn
"minimax-china": "minimax-cn",
"minimax_cn": "minimax-cn",
@ -294,6 +304,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"nous": "Nous Portal",
"openai-codex": "OpenAI Codex",
"copilot-acp": "GitHub Copilot ACP",
"stepfun": "StepFun Step Plan",
"xiaomi": "Xiaomi MiMo",
"local": "Local endpoint",
"bedrock": "AWS Bedrock",
@ -427,6 +438,16 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
"""
pdef = get_provider(provider)
if pdef is not None:
# Even for known providers, check URL heuristics for special endpoints
# (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom')
if base_url:
url_lower = base_url.rstrip("/").lower()
if "api.kimi.com/coding" in url_lower:
return "anthropic_messages"
if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
return "anthropic_messages"
if "api.openai.com" in url_lower:
return "codex_responses"
return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
# Direct provider checks for providers not in HERMES_OVERLAYS
@ -439,6 +460,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
hostname = base_url_hostname(base_url)
if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
return "anthropic_messages"
if hostname == "api.kimi.com" and "/coding" in url_lower:
return "anthropic_messages"
if hostname == "api.openai.com":
return "codex_responses"
if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"):

View file

@ -46,6 +46,9 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
protocol under a ``/anthropic`` suffix treat those as
``anthropic_messages`` transport instead of the default
``chat_completions``.
- Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the
Anthropic Messages protocol (the /coding route accepts Claude
Code's native request shape).
"""
normalized = (base_url or "").strip().lower().rstrip("/")
hostname = base_url_hostname(base_url)
@ -55,6 +58,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
return "codex_responses"
if normalized.endswith("/anthropic"):
return "anthropic_messages"
if hostname == "api.kimi.com" and "/coding" in normalized:
return "anthropic_messages"
return None
@ -205,7 +210,8 @@ def _resolve_runtime_from_pool_entry(
api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
else:
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
# api.openai.com → codex_responses, api.x.ai → codex_responses).
# Kimi /coding, api.openai.com → codex_responses, api.x.ai →
# codex_responses).
detected = _detect_api_mode_for_url(base_url)
if detected:
api_mode = detected
@ -492,8 +498,12 @@ def _resolve_openrouter_runtime(
else:
# Custom endpoint: use api_key from config when using config base_url (#1760).
# When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
# the canonical env var for ollama.com authentication.
_is_ollama_url = "ollama.com" in base_url.lower()
# the canonical env var for ollama.com authentication. Match on
# HOST, not substring — a custom base_url whose path contains
# "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
# hostname is a look-alike (ollama.com.attacker.test) must not
# receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
api_key_candidates = [
explicit_api_key,
(cfg_api_key if use_config_base_url else ""),
@ -656,7 +666,8 @@ def _resolve_explicit_runtime(
if configured_mode:
api_mode = configured_mode
else:
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix).
# Auto-detect from URL (Anthropic /anthropic suffix,
# api.openai.com → Responses, Kimi /coding, etc.).
detected = _detect_api_mode_for_url(base_url)
if detected:
api_mode = detected
@ -906,8 +917,7 @@ def resolve_runtime_provider(
code="no_aws_credentials",
)
# Read bedrock-specific config from config.yaml
from hermes_cli.config import load_config as _load_bedrock_config
_bedrock_cfg = _load_bedrock_config().get("bedrock", {})
_bedrock_cfg = load_config().get("bedrock", {})
# Region priority: config.yaml bedrock.region → env var → us-east-1
region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"

View file

@ -96,13 +96,14 @@ _DEFAULT_PROVIDER_MODELS = {
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"stepfun": ["step-3.5-flash", "step-3.5-flash-2603"],
"arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
"minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
"minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
"opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
"huggingface": [
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@ -408,13 +409,36 @@ def _print_setup_summary(config: dict, hermes_home):
("Browser Automation", False, missing_browser_hint)
)
# FAL (image generation)
# Image generation — FAL (direct or via Nous), or any plugin-registered
# provider (OpenAI, etc.)
if subscription_features.image_gen.managed_by_nous:
tool_status.append(("Image Generation (Nous subscription)", True, None))
elif subscription_features.image_gen.available:
tool_status.append(("Image Generation", True, None))
else:
tool_status.append(("Image Generation", False, "FAL_KEY"))
# Fall back to probing plugin-registered providers so OpenAI-only
# setups don't show as "missing FAL_KEY".
_img_backend = None
try:
from agent.image_gen_registry import list_providers
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
for _p in list_providers():
if _p.name == "fal":
continue
try:
if _p.is_available():
_img_backend = _p.display_name
break
except Exception:
continue
except Exception:
pass
if _img_backend:
tool_status.append((f"Image Generation ({_img_backend})", True, None))
else:
tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))
# TTS — show configured provider
tts_provider = config.get("tts", {}).get("provider", "edge")
@ -434,7 +458,6 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status.append(("Text-to-Speech (Google Gemini)", True, None))
elif tts_provider == "neutts":
try:
import importlib.util
neutts_ok = importlib.util.find_spec("neutts") is not None
except Exception:
neutts_ok = False
@ -442,6 +465,16 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
else:
tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
elif tts_provider == "kittentts":
try:
import importlib.util
kittentts_ok = importlib.util.find_spec("kittentts") is not None
except Exception:
kittentts_ok = False
if kittentts_ok:
tool_status.append(("Text-to-Speech (KittenTTS local)", True, None))
else:
tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'"))
else:
tool_status.append(("Text-to-Speech (Edge TTS)", True, None))
@ -772,6 +805,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
"zai": "Z.AI / GLM",
"kimi-coding": "Kimi / Moonshot",
"kimi-coding-cn": "Kimi / Moonshot (China)",
"stepfun": "StepFun Step Plan",
"minimax": "MiniMax",
"minimax-cn": "MiniMax CN",
"anthropic": "Anthropic",
@ -849,7 +883,6 @@ def setup_model_provider(config: dict, *, quick: bool = False):
def _check_espeak_ng() -> bool:
"""Check if espeak-ng is installed."""
import shutil
return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None
@ -903,6 +936,31 @@ def _install_neutts_deps() -> bool:
return False
def _install_kittentts_deps() -> bool:
"""Install KittenTTS dependencies with user approval. Returns True on success."""
import subprocess
import sys
wheel_url = (
"https://github.com/KittenML/KittenTTS/releases/download/"
"0.8.1/kittentts-0.8.1-py3-none-any.whl"
)
print()
print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...")
print()
try:
subprocess.run(
[sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
check=True, timeout=300,
)
print_success("kittentts installed successfully")
return True
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
print_error(f"Failed to install kittentts: {e}")
print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile")
return False
def _setup_tts_provider(config: dict):
"""Interactive TTS provider selection with install flow for NeuTTS."""
tts_config = config.get("tts", {})
@ -918,6 +976,7 @@ def _setup_tts_provider(config: dict):
"mistral": "Mistral Voxtral TTS",
"gemini": "Google Gemini TTS",
"neutts": "NeuTTS",
"kittentts": "KittenTTS",
}
current_label = provider_labels.get(current_provider, current_provider)
@ -941,9 +1000,10 @@ def _setup_tts_provider(config: dict):
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
"Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
"NeuTTS (local on-device, free, ~300MB model download)",
"KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)",
]
)
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"])
choices.append(f"Keep current ({current_label})")
keep_current_idx = len(choices) - 1
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@ -964,7 +1024,6 @@ def _setup_tts_provider(config: dict):
if selected == "neutts":
# Check if already installed
try:
import importlib.util
already_installed = importlib.util.find_spec("neutts") is not None
except Exception:
already_installed = False
@ -1063,6 +1122,29 @@ def _setup_tts_provider(config: dict):
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
elif selected == "kittentts":
# Check if already installed
try:
import importlib.util
already_installed = importlib.util.find_spec("kittentts") is not None
except Exception:
already_installed = False
if already_installed:
print_success("KittenTTS is already installed")
else:
print()
print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).")
print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
print()
if prompt_yes_no("Install KittenTTS now?", True):
if not _install_kittentts_deps():
print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.")
selected = "edge"
else:
print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.")
selected = "edge"
# Save the selection
if "tts" not in config:
config["tts"] = {}
@ -1084,8 +1166,6 @@ def setup_tts(config: dict):
def setup_terminal_backend(config: dict):
"""Configure the terminal execution backend."""
import platform as _platform
import shutil
print_header("Terminal Backend")
print_info("Choose where Hermes runs shell commands and code.")
print_info("This affects tool execution, file access, and isolation.")

View file

@ -30,6 +30,14 @@ All fields are optional. Missing values inherit from the ``default`` skin.
prompt: "#FFF8DC" # Prompt text color
input_rule: "#CD7F32" # Input area horizontal rule
response_border: "#FFD700" # Response box border (ANSI)
status_bar_bg: "#1a1a2e" # Status bar background
status_bar_text: "#C0C0C0" # Status bar default text
status_bar_strong: "#FFD700" # Status bar highlighted text
status_bar_dim: "#8B8682" # Status bar separators/muted text
status_bar_good: "#8FBC8F" # Healthy context usage
status_bar_warn: "#FFD700" # Warning context usage
status_bar_bad: "#FF8C00" # High context usage
status_bar_critical: "#FF6B6B" # Critical context usage
session_label: "#DAA520" # Session label color
session_border: "#8B8682" # Session ID dim color
status_bar_bg: "#1a1a2e" # TUI status/usage bar background
@ -170,6 +178,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#FFF8DC",
"input_rule": "#CD7F32",
"response_border": "#FFD700",
"status_bar_bg": "#1a1a2e",
"session_label": "#DAA520",
"session_border": "#8B8682",
},
@ -203,6 +212,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#F1E6CF",
"input_rule": "#9F1C1C",
"response_border": "#C7A96B",
"status_bar_bg": "#2A1212",
"status_bar_text": "#F1E6CF",
"status_bar_strong": "#C7A96B",
"status_bar_dim": "#6E584B",
"status_bar_good": "#7BC96F",
"status_bar_warn": "#C7A96B",
"status_bar_bad": "#DD4A3A",
"status_bar_critical": "#EF5350",
"session_label": "#C7A96B",
"session_border": "#6E584B",
},
@ -267,6 +284,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#c9d1d9",
"input_rule": "#444444",
"response_border": "#aaaaaa",
"status_bar_bg": "#1F1F1F",
"status_bar_text": "#C9D1D9",
"status_bar_strong": "#E6EDF3",
"status_bar_dim": "#777777",
"status_bar_good": "#B5B5B5",
"status_bar_warn": "#AAAAAA",
"status_bar_bad": "#D0D0D0",
"status_bar_critical": "#F0F0F0",
"session_label": "#888888",
"session_border": "#555555",
},
@ -298,6 +323,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#c9d1d9",
"input_rule": "#4169e1",
"response_border": "#7eb8f6",
"status_bar_bg": "#151C2F",
"status_bar_text": "#C9D1D9",
"status_bar_strong": "#7EB8F6",
"status_bar_dim": "#4B5563",
"status_bar_good": "#63D0A6",
"status_bar_warn": "#E6A855",
"status_bar_bad": "#F7A072",
"status_bar_critical": "#FF7A7A",
"session_label": "#7eb8f6",
"session_border": "#4b5563",
},
@ -403,6 +436,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#EAF7FF",
"input_rule": "#2A6FB9",
"response_border": "#5DB8F5",
"status_bar_bg": "#0F2440",
"status_bar_text": "#EAF7FF",
"status_bar_strong": "#A9DFFF",
"status_bar_dim": "#496884",
"status_bar_good": "#6ED7B0",
"status_bar_warn": "#5DB8F5",
"status_bar_bad": "#2A6FB9",
"status_bar_critical": "#D94F4F",
"session_label": "#A9DFFF",
"session_border": "#496884",
},
@ -467,6 +508,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#F5F5F5",
"input_rule": "#656565",
"response_border": "#B7B7B7",
"status_bar_bg": "#202020",
"status_bar_text": "#D3D3D3",
"status_bar_strong": "#F5F5F5",
"status_bar_dim": "#656565",
"status_bar_good": "#B7B7B7",
"status_bar_warn": "#D3D3D3",
"status_bar_bad": "#E7E7E7",
"status_bar_critical": "#F5F5F5",
"session_label": "#919191",
"session_border": "#656565",
},
@ -532,6 +581,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#FFF0D4",
"input_rule": "#C75B1D",
"response_border": "#F29C38",
"status_bar_bg": "#2B160E",
"status_bar_text": "#FFF0D4",
"status_bar_strong": "#FFD39A",
"status_bar_dim": "#6C4724",
"status_bar_good": "#6BCB77",
"status_bar_warn": "#F29C38",
"status_bar_bad": "#E2832B",
"status_bar_critical": "#EF5350",
"session_label": "#FFD39A",
"session_border": "#6C4724",
},
@ -770,6 +827,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
warn = skin.get_color("ui_warn", "#FF8C00")
error = skin.get_color("ui_error", "#FF6B6B")
status_bg = skin.get_color("status_bar_bg", "#1a1a2e")
status_text = skin.get_color("status_bar_text", text)
status_strong = skin.get_color("status_bar_strong", title)
status_dim = skin.get_color("status_bar_dim", dim)
status_good = skin.get_color("status_bar_good", skin.get_color("ui_ok", "#8FBC8F"))
status_warn = skin.get_color("status_bar_warn", warn)
status_bad = skin.get_color("status_bar_bad", skin.get_color("banner_accent", warn))
status_critical = skin.get_color("status_bar_critical", error)
voice_bg = skin.get_color("voice_status_bg", status_bg)
menu_bg = skin.get_color("completion_menu_bg", "#1a1a2e")
menu_current_bg = skin.get_color("completion_menu_current_bg", "#333355")
@ -782,13 +846,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
"prompt": prompt,
"prompt-working": f"{dim} italic",
"hint": f"{dim} italic",
"status-bar": f"bg:{status_bg} {text}",
"status-bar-strong": f"bg:{status_bg} {title} bold",
"status-bar-dim": f"bg:{status_bg} {dim}",
"status-bar-good": f"bg:{status_bg} {skin.get_color('ui_ok', '#8FBC8F')} bold",
"status-bar-warn": f"bg:{status_bg} {warn} bold",
"status-bar-bad": f"bg:{status_bg} {skin.get_color('banner_accent', warn)} bold",
"status-bar-critical": f"bg:{status_bg} {error} bold",
"status-bar": f"bg:{status_bg} {status_text}",
"status-bar-strong": f"bg:{status_bg} {status_strong} bold",
"status-bar-dim": f"bg:{status_bg} {status_dim}",
"status-bar-good": f"bg:{status_bg} {status_good} bold",
"status-bar-warn": f"bg:{status_bg} {status_warn} bold",
"status-bar-bad": f"bg:{status_bg} {status_bad} bold",
"status-bar-critical": f"bg:{status_bg} {status_critical} bold",
"input-rule": input_rule,
"image-badge": f"{label} bold",
"completion-menu": f"bg:{menu_bg} {text}",

View file

@ -122,6 +122,7 @@ def show_status(args):
"OpenAI": "OPENAI_API_KEY",
"Z.AI/GLM": "GLM_API_KEY",
"Kimi": "KIMI_API_KEY",
"StepFun Step Plan": "STEPFUN_API_KEY",
"MiniMax": "MINIMAX_API_KEY",
"MiniMax-CN": "MINIMAX_CN_API_KEY",
"Firecrawl": "FIRECRAWL_API_KEY",
@ -252,6 +253,7 @@ def show_status(args):
apikey_providers = {
"Z.AI / GLM": ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
"Kimi / Moonshot": ("KIMI_API_KEY",),
"StepFun Step Plan": ("STEPFUN_API_KEY",),
"MiniMax": ("MINIMAX_API_KEY",),
"MiniMax (China)": ("MINIMAX_CN_API_KEY",),
}

View file

@ -127,7 +127,7 @@ TIPS = [
# --- Tools & Capabilities ---
"execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
"delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.",
"delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
"web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
"search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
"patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",

View file

@ -24,7 +24,7 @@ from hermes_cli.nous_subscription import (
apply_nous_managed_defaults,
get_nous_subscription_features,
)
from tools.tool_backend_helpers import managed_nous_tools_enabled
from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
from utils import base_url_hostname
logger = logging.getLogger(__name__)
@ -182,6 +182,14 @@ TOOL_CATEGORIES = {
],
"tts_provider": "gemini",
},
{
"name": "KittenTTS",
"badge": "local · free",
"tag": "Lightweight local ONNX TTS (~25MB), no API key",
"env_vars": [],
"tts_provider": "kittentts",
"post_setup": "kittentts",
},
],
},
"web": {
@ -423,6 +431,36 @@ def _run_post_setup(post_setup_key: str):
_print_warning(" Node.js not found. Install Camofox via Docker:")
_print_info(" docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
elif post_setup_key == "kittentts":
try:
__import__("kittentts")
_print_success(" kittentts is already installed")
return
except ImportError:
pass
import subprocess
_print_info(" Installing kittentts (~25-80MB model, CPU-only)...")
wheel_url = (
"https://github.com/KittenML/KittenTTS/releases/download/"
"0.8.1/kittentts-0.8.1-py3-none-any.whl"
)
try:
result = subprocess.run(
[sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
capture_output=True, text=True, timeout=300,
)
if result.returncode == 0:
_print_success(" kittentts installed")
_print_info(" Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
_print_info(" Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)")
else:
_print_warning(" kittentts install failed:")
_print_info(f" {result.stderr.strip()[:300]}")
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
except subprocess.TimeoutExpired:
_print_warning(" kittentts install timed out (>5min)")
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
elif post_setup_key == "rl_training":
try:
__import__("tinker_atropos")
@ -809,6 +847,51 @@ def _configure_toolset(ts_key: str, config: dict):
_configure_simple_requirements(ts_key)
def _plugin_image_gen_providers() -> list[dict]:
"""Build picker-row dicts from plugin-registered image gen providers.
Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
row but carries an ``image_gen_plugin_name`` marker so downstream
code (config writing, model picker) knows to route through the
plugin registry instead of the in-tree FAL backend.
FAL is skipped it's already exposed by the hardcoded
``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
a plugin in a follow-up PR, the hardcoded entries go away and this
function surfaces it alongside OpenAI automatically.
"""
try:
from agent.image_gen_registry import list_providers
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
providers = list_providers()
except Exception:
return []
rows: list[dict] = []
for provider in providers:
if getattr(provider, "name", None) == "fal":
# FAL has its own hardcoded rows today.
continue
try:
schema = provider.get_setup_schema()
except Exception:
continue
if not isinstance(schema, dict):
continue
rows.append(
{
"name": schema.get("name", provider.display_name),
"badge": schema.get("badge", ""),
"tag": schema.get("tag", ""),
"env_vars": schema.get("env_vars", []),
"image_gen_plugin_name": provider.name,
}
)
return rows
def _visible_providers(cat: dict, config: dict) -> list[dict]:
"""Return provider entries visible for the current auth/config state."""
features = get_nous_subscription_features(config)
@ -819,6 +902,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
if provider.get("requires_nous_auth") and not features.nous_auth_present:
continue
visible.append(provider)
# Inject plugin-registered image_gen backends (OpenAI today, more
# later) so the picker lists them alongside FAL / Nous Subscription.
if cat.get("name") == "Image Generation":
visible.extend(_plugin_image_gen_providers())
return visible
@ -838,7 +927,24 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
browser_cfg = config.get("browser", {})
return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
if ts_key == "image_gen":
return not get_env_value("FAL_KEY")
# Satisfied when the in-tree FAL backend is configured OR any
# plugin-registered image gen provider is available.
if fal_key_is_configured():
return False
try:
from agent.image_gen_registry import list_providers
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
for provider in list_providers():
try:
if provider.is_available():
return False
except Exception:
continue
except Exception:
pass
return True
return not _toolset_has_keys(ts_key, config)
@ -1057,6 +1163,88 @@ def _configure_imagegen_model(backend_name: str, config: dict) -> None:
_print_success(f" Model set to: {chosen}")
def _plugin_image_gen_catalog(plugin_name: str):
"""Return ``(catalog_dict, default_model_id)`` for a plugin provider.
``catalog_dict`` is shaped like the legacy ``FAL_MODELS`` table
``{model_id: {"display", "speed", "strengths", "price", ...}}``
so the existing picker code paths work without change. Returns
``({}, None)`` if the provider isn't registered or has no models.
"""
try:
from agent.image_gen_registry import get_provider
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
provider = get_provider(plugin_name)
except Exception:
return {}, None
if provider is None:
return {}, None
try:
models = provider.list_models() or []
default = provider.default_model()
except Exception:
return {}, None
catalog = {m["id"]: m for m in models if isinstance(m, dict) and "id" in m}
return catalog, default
def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None:
"""Prompt the user to pick a model for a plugin-registered backend.
Writes selection to ``image_gen.model``. Mirrors
:func:`_configure_imagegen_model` but sources its catalog from the
plugin registry instead of :data:`IMAGEGEN_BACKENDS`.
"""
catalog, default_model = _plugin_image_gen_catalog(plugin_name)
if not catalog:
return
cur_cfg = config.setdefault("image_gen", {})
if not isinstance(cur_cfg, dict):
cur_cfg = {}
config["image_gen"] = cur_cfg
current_model = cur_cfg.get("model") or default_model
if current_model not in catalog:
current_model = default_model
model_ids = list(catalog.keys())
ordered = [current_model] + [m for m in model_ids if m != current_model]
widths = {
"model": max(len(m) for m in model_ids),
"speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6),
"strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0),
}
print()
header = (
f" {'Model':<{widths['model']}} "
f"{'Speed':<{widths['speed']}} "
f"{'Strengths':<{widths['strengths']}} "
f"Price"
)
print(color(header, Colors.CYAN))
rows = []
for mid in ordered:
row = _format_imagegen_model_row(mid, catalog[mid], widths)
if mid == current_model:
row += " ← currently in use"
rows.append(row)
idx = _prompt_choice(
f" Choose {plugin_name} model:",
rows,
default=0,
)
chosen = ordered[idx]
cur_cfg["model"] = chosen
_print_success(f" Model set to: {chosen}")
def _configure_provider(provider: dict, config: dict):
"""Configure a single provider - prompt for API keys and set config."""
env_vars = provider.get("env_vars", [])
@ -1113,10 +1301,28 @@ def _configure_provider(provider: dict, config: dict):
_print_success(f" {provider['name']} - no configuration needed!")
if managed_feature:
_print_info(" Requests for this tool will be billed to your Nous subscription.")
# Plugin-registered image_gen provider: write image_gen.provider
# and route model selection to the plugin's own catalog.
plugin_name = provider.get("image_gen_plugin_name")
if plugin_name:
img_cfg = config.setdefault("image_gen", {})
if not isinstance(img_cfg, dict):
img_cfg = {}
config["image_gen"] = img_cfg
img_cfg["provider"] = plugin_name
_print_success(f" image_gen.provider set to: {plugin_name}")
_configure_imagegen_model_for_plugin(plugin_name, config)
return
# Imagegen backends prompt for model selection after backend pick.
backend = provider.get("imagegen_backend")
if backend:
_configure_imagegen_model(backend, config)
# In-tree FAL is the only non-plugin backend today. Keep
# image_gen.provider clear so the dispatch shim falls through
# to the legacy FAL path.
img_cfg = config.setdefault("image_gen", {})
if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
img_cfg["provider"] = "fal"
return
# Prompt for each required env var
@ -1151,10 +1357,23 @@ def _configure_provider(provider: dict, config: dict):
if all_configured:
_print_success(f" {provider['name']} configured!")
plugin_name = provider.get("image_gen_plugin_name")
if plugin_name:
img_cfg = config.setdefault("image_gen", {})
if not isinstance(img_cfg, dict):
img_cfg = {}
config["image_gen"] = img_cfg
img_cfg["provider"] = plugin_name
_print_success(f" image_gen.provider set to: {plugin_name}")
_configure_imagegen_model_for_plugin(plugin_name, config)
return
# Imagegen backends prompt for model selection after env vars are in.
backend = provider.get("imagegen_backend")
if backend:
_configure_imagegen_model(backend, config)
img_cfg = config.setdefault("image_gen", {})
if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
img_cfg["provider"] = "fal"
def _configure_simple_requirements(ts_key: str):
@ -1186,7 +1405,6 @@ def _configure_simple_requirements(ts_key: str):
if api_key and api_key.strip():
save_env_value("OPENAI_API_KEY", api_key.strip())
# Save vision base URL to config (not .env — only secrets go there)
from hermes_cli.config import load_config, save_config
_cfg = load_config()
_aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
_aux["base_url"] = base_url

View file

@ -16,6 +16,7 @@ import json
import logging
import os
import secrets
import subprocess
import sys
import threading
import time
@ -114,6 +115,91 @@ def _require_token(request: Request) -> None:
raise HTTPException(status_code=401, detail="Unauthorized")
# Accepted Host header values for loopback binds. DNS rebinding attacks
# point a victim browser at an attacker-controlled hostname (evil.test)
# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin
# checks because the browser now considers evil.test and our dashboard
# "same origin". Validating the Host header at the app layer rejects any
# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7.
_LOOPBACK_HOST_VALUES: frozenset = frozenset({
"localhost", "127.0.0.1", "::1",
})
def _is_accepted_host(host_header: str, bound_host: str) -> bool:
"""True if the Host header targets the interface we bound to.
Accepts:
- Exact bound host (with or without port suffix)
- Loopback aliases when bound to loopback
- Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback,
no protection possible at this layer)
"""
if not host_header:
return False
# Strip port suffix. IPv6 addresses use bracket notation:
# [::1] — no port
# [::1]:9119 — with port
# Plain hosts/v4:
# localhost:9119
# 127.0.0.1:9119
h = host_header.strip()
if h.startswith("["):
# IPv6 bracketed — port (if any) follows "]:"
close = h.find("]")
if close != -1:
host_only = h[1:close] # strip brackets
else:
host_only = h.strip("[]")
else:
host_only = h.rsplit(":", 1)[0] if ":" in h else h
host_only = host_only.lower()
# 0.0.0.0 bind means operator explicitly opted into all-interfaces
# (requires --insecure per web_server.start_server). No Host-layer
# defence can protect that mode; rely on operator network controls.
if bound_host in ("0.0.0.0", "::"):
return True
# Loopback bind: accept the loopback names
bound_lc = bound_host.lower()
if bound_lc in _LOOPBACK_HOST_VALUES:
return host_only in _LOOPBACK_HOST_VALUES
# Explicit non-loopback bind: require exact host match
return host_only == bound_lc
@app.middleware("http")
async def host_header_middleware(request: Request, call_next):
"""Reject requests whose Host header doesn't match the bound interface.
Defends against DNS rebinding: a victim browser on a localhost
dashboard is tricked into fetching from an attacker hostname that
TTL-flips to 127.0.0.1. CORS and same-origin checks don't help —
the browser now treats the attacker origin as same-origin with the
dashboard. Host-header validation at the app layer catches it.
See GHSA-ppp5-vxwm-4cf7.
"""
# Store the bound host on app.state so this middleware can read it —
# set by start_server() at listen time.
bound_host = getattr(app.state, "bound_host", None)
if bound_host:
host_header = request.headers.get("host", "")
if not _is_accepted_host(host_header, bound_host):
return JSONResponse(
status_code=400,
content={
"detail": (
"Invalid Host header. Dashboard requests must use "
"the hostname the server was bound to."
),
},
)
return await call_next(request)
@app.middleware("http")
async def auth_middleware(request: Request, call_next):
"""Require the session token on all /api/ routes except the public list."""
@ -476,6 +562,138 @@ async def get_status():
}
# ---------------------------------------------------------------------------
# Gateway + update actions (invoked from the Status page).
#
# Both commands are spawned as detached subprocesses so the HTTP request
# returns immediately. stdin is closed (``DEVNULL``) so any stray ``input()``
# calls fail fast with EOF rather than hanging forever. stdout/stderr are
# streamed to a per-action log file under ``~/.hermes/logs/<action>.log`` so
# the dashboard can tail them back to the user.
# ---------------------------------------------------------------------------
_ACTION_LOG_DIR: Path = get_hermes_home() / "logs"
# Short ``name`` (from the URL) → absolute log file path.
_ACTION_LOG_FILES: Dict[str, str] = {
"gateway-restart": "gateway-restart.log",
"hermes-update": "hermes-update.log",
}
# ``name`` → most recently spawned Popen handle. Used so ``status`` can
# report liveness and exit code without shelling out to ``ps``.
_ACTION_PROCS: Dict[str, subprocess.Popen] = {}
def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen:
"""Spawn ``hermes <subcommand>`` detached and record the Popen handle.
Uses the running interpreter's ``hermes_cli.main`` module so the action
inherits the same venv/PYTHONPATH the web server is using.
"""
log_file_name = _ACTION_LOG_FILES[name]
_ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True)
log_path = _ACTION_LOG_DIR / log_file_name
log_file = open(log_path, "ab", buffering=0)
log_file.write(
f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode()
)
cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand]
popen_kwargs: Dict[str, Any] = {
"cwd": str(PROJECT_ROOT),
"stdin": subprocess.DEVNULL,
"stdout": log_file,
"stderr": subprocess.STDOUT,
"env": {**os.environ, "HERMES_NONINTERACTIVE": "1"},
}
if sys.platform == "win32":
popen_kwargs["creationflags"] = (
subprocess.CREATE_NEW_PROCESS_GROUP # type: ignore[attr-defined]
| getattr(subprocess, "DETACHED_PROCESS", 0)
)
else:
popen_kwargs["start_new_session"] = True
proc = subprocess.Popen(cmd, **popen_kwargs)
_ACTION_PROCS[name] = proc
return proc
def _tail_lines(path: Path, n: int) -> List[str]:
"""Return the last ``n`` lines of ``path``. Reads the whole file — fine
for our small per-action logs. Binary-decoded with ``errors='replace'``
so log corruption doesn't 500 the endpoint."""
if not path.exists():
return []
try:
text = path.read_text(errors="replace")
except OSError:
return []
lines = text.splitlines()
return lines[-n:] if n > 0 else lines
@app.post("/api/gateway/restart")
async def restart_gateway():
"""Kick off a ``hermes gateway restart`` in the background."""
try:
proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
except Exception as exc:
_log.exception("Failed to spawn gateway restart")
raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}")
return {
"ok": True,
"pid": proc.pid,
"name": "gateway-restart",
}
@app.post("/api/hermes/update")
async def update_hermes():
"""Kick off ``hermes update`` in the background."""
try:
proc = _spawn_hermes_action(["update"], "hermes-update")
except Exception as exc:
_log.exception("Failed to spawn hermes update")
raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}")
return {
"ok": True,
"pid": proc.pid,
"name": "hermes-update",
}
@app.get("/api/actions/{name}/status")
async def get_action_status(name: str, lines: int = 200):
"""Tail an action log and report whether the process is still running."""
log_file_name = _ACTION_LOG_FILES.get(name)
if log_file_name is None:
raise HTTPException(status_code=404, detail=f"Unknown action: {name}")
log_path = _ACTION_LOG_DIR / log_file_name
tail = _tail_lines(log_path, min(max(lines, 1), 2000))
proc = _ACTION_PROCS.get(name)
if proc is None:
running = False
exit_code: Optional[int] = None
pid: Optional[int] = None
else:
exit_code = proc.poll()
running = exit_code is None
pid = proc.pid
return {
"name": name,
"running": running,
"exit_code": exit_code,
"pid": pid,
"lines": tail,
}
@app.get("/api/sessions")
async def get_sessions(limit: int = 20, offset: int = 0):
try:
@ -1971,7 +2189,8 @@ async def get_usage_analytics(days: int = 30):
SUM(reasoning_tokens) as reasoning_tokens,
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
COUNT(*) as sessions
COUNT(*) as sessions,
SUM(COALESCE(api_call_count, 0)) as api_calls
FROM sessions WHERE started_at > ?
GROUP BY day ORDER BY day
""", (cutoff,))
@ -1982,7 +2201,8 @@ async def get_usage_analytics(days: int = 30):
SUM(input_tokens) as input_tokens,
SUM(output_tokens) as output_tokens,
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
COUNT(*) as sessions
COUNT(*) as sessions,
SUM(COALESCE(api_call_count, 0)) as api_calls
FROM sessions WHERE started_at > ? AND model IS NOT NULL
GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
""", (cutoff,))
@ -1995,7 +2215,8 @@ async def get_usage_analytics(days: int = 30):
SUM(reasoning_tokens) as total_reasoning,
COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
COUNT(*) as total_sessions
COUNT(*) as total_sessions,
SUM(COALESCE(api_call_count, 0)) as total_api_calls
FROM sessions WHERE started_at > ?
""", (cutoff,))
totals = dict(cur3.fetchone())
@ -2465,13 +2686,15 @@ def start_server(
"authentication. Only use on trusted networks.", host,
)
# Record the bound host so host_header_middleware can validate incoming
# Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
app.state.bound_host = host
if open_browser:
import threading
import webbrowser
def _open():
import time as _t
_t.sleep(1.0)
time.sleep(1.0)
webbrowser.open(f"http://{host}:{port}")
threading.Thread(target=_open, daemon=True).start()