Merge remote-tracking branch 'origin/main' into jq/hermes-update-branch-flag

This commit is contained in:
emozilla 2026-05-27 00:48:25 -04:00
commit 3d9a26afad
1217 changed files with 178911 additions and 8214 deletions

View file

@ -129,7 +129,8 @@ def build_top_level_parser():
default=None,
help=(
"Provider override for this invocation (e.g. openrouter, anthropic). "
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
"Applies to -z/--oneshot and --tui. The persistent provider lives in config.yaml "
"under model.provider — use `hermes setup` or edit the file to change it."
),
)
parser.add_argument(
@ -268,7 +269,11 @@ def build_top_level_parser():
help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
)
chat_parser.add_argument(
"-v", "--verbose", action="store_true", help="Verbose output"
"-v",
"--verbose",
action="store_true",
default=argparse.SUPPRESS,
help="Verbose output",
)
chat_parser.add_argument(
"-Q",

View file

@ -41,14 +41,15 @@ from dataclasses import dataclass, field
from datetime import datetime, timezone
from http.server import BaseHTTPRequestHandler, HTTPServer, ThreadingHTTPServer
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple
from typing import Any, Callable, Dict, FrozenSet, List, Optional, Tuple
from urllib.parse import parse_qs, urlencode, urlparse
import httpx
import yaml
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
from hermes_constants import OPENROUTER_BASE_URL
from hermes_constants import OPENROUTER_BASE_URL, secure_parent_dir
from agent.credential_persistence import sanitize_borrowed_credential_payload
from utils import atomic_replace, atomic_yaml_write, is_truthy_value
logger = logging.getLogger(__name__)
@ -196,9 +197,17 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
auth_type="oauth_external",
inference_base_url=DEFAULT_CODEX_BASE_URL,
),
"openai-api": ProviderConfig(
id="openai-api",
name="OpenAI API",
auth_type="api_key",
inference_base_url="https://api.openai.com/v1",
api_key_env_vars=("OPENAI_API_KEY",),
base_url_env_var="OPENAI_BASE_URL",
),
"xai-oauth": ProviderConfig(
id="xai-oauth",
name="xAI Grok OAuth (SuperGrok Subscription)",
name="xAI Grok OAuth (SuperGrok / Premium+)",
auth_type="oauth_external",
inference_base_url=DEFAULT_XAI_OAUTH_BASE_URL,
),
@ -393,6 +402,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
# OpenCode Go mixes API surfaces by model:
# - GLM / Kimi use OpenAI-compatible chat completions under /v1
# - MiniMax models use Anthropic Messages under /v1/messages
# - Qwen 3.7 uses Anthropic Messages under /v1/messages
# Keep the provider base at /v1 and select api_mode per-model.
inference_base_url="https://opencode.ai/zen/go/v1",
api_key_env_vars=("OPENCODE_GO_API_KEY",),
@ -553,6 +563,7 @@ _PLACEHOLDER_SECRET_VALUES = {
"***",
"changeme",
"your_api_key",
"your_api_key_here",
"your-api-key",
"placeholder",
"example",
@ -1030,10 +1041,8 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
auth_file.parent.mkdir(parents=True, exist_ok=True)
# Tighten parent dir to 0o700 so siblings can't traverse to creds.
# No-op on Windows (POSIX mode bits not enforced); ignore failures.
try:
os.chmod(auth_file.parent, 0o700)
except OSError:
pass
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
secure_parent_dir(auth_file)
auth_store["version"] = AUTH_STORE_VERSION
auth_store["updated_at"] = datetime.now(timezone.utc).isoformat()
payload = json.dumps(auth_store, indent=2) + "\n"
@ -1169,14 +1178,23 @@ def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
"""Persist one provider's credential pool under auth.json."""
"""Persist one provider's credential pool under auth.json.
This is the final disk-boundary guard for borrowed/reference-only
credentials. Callers may pass raw dictionaries, so sanitize here even when
``PooledCredential.to_dict()`` already did the same work upstream.
"""
with _auth_store_lock():
auth_store = _load_auth_store()
pool = auth_store.get("credential_pool")
if not isinstance(pool, dict):
pool = {}
auth_store["credential_pool"] = pool
pool[provider_id] = list(entries)
pool[provider_id] = [
sanitize_borrowed_credential_payload(entry, provider_id)
if isinstance(entry, dict) else entry
for entry in entries
]
return _save_auth_store(auth_store)
@ -1561,6 +1579,67 @@ def _optional_base_url(value: Any) -> Optional[str]:
return cleaned if cleaned else None
# Allowlist of hosts the Nous Portal proxy is willing to forward minted
# bearer tokens to. The bearer is a long-lived agent_key minted by
# portal.nousresearch.com — sending it anywhere else would leak it.
#
# This is consulted only for URLs coming from the NETWORK side (Portal
# refresh / agent-key-mint responses). User-controlled env-var overrides
# (NOUS_INFERENCE_BASE_URL) bypass validation — that's the documented
# dev/staging escape hatch and the env source is already trusted (the
# user set it themselves).
_ALLOWED_NOUS_INFERENCE_HOSTS: FrozenSet[str] = frozenset({
"inference-api.nousresearch.com",
})
def _validate_nous_inference_url_from_network(url: Optional[str]) -> Optional[str]:
"""Validate a Portal-returned inference URL against the host allowlist.
Returns ``url`` (normalised by stripping trailing slashes) if it's a
well-formed ``https://<allowlisted-host>/...`` URL. Returns ``None``
if the URL is missing, malformed, non-https, or points at an
unexpected host letting the caller fall back to the configured
default rather than persist or forward a poisoned value.
Defense-in-depth: a compromised refresh / mint response from the
Portal API (MITM, malicious response injection) could otherwise
redirect every subsequent proxy request bearing the user's
legitimately-minted agent_key to an attacker-controlled endpoint.
Validating scheme + host at the source closes that loop before the
poisoned URL ever lands in ``auth.json``.
The env-var override path (``NOUS_INFERENCE_BASE_URL``) bypasses
this env values come from the trusted OS user, not from the
network, and the override is documented for staging/dev use.
Co-authored-by: memosr <mehmet.sr35@gmail.com>
"""
if not isinstance(url, str):
return None
cleaned = url.strip()
if not cleaned:
return None
try:
parsed = urlparse(cleaned)
except Exception:
return None
if parsed.scheme != "https":
logger.warning(
"nous: refusing non-https inference URL scheme %r from Portal response",
parsed.scheme,
)
return None
if parsed.hostname not in _ALLOWED_NOUS_INFERENCE_HOSTS:
logger.warning(
"nous: refusing inference URL host %r from Portal response "
"(not in allowlist); falling back to default",
parsed.hostname,
)
return None
return cleaned.rstrip("/")
def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
if not isinstance(token, str) or token.count(".") != 2:
return {}
@ -1863,10 +1942,8 @@ def _read_qwen_cli_tokens() -> Dict[str, Any]:
def _save_qwen_cli_tokens(tokens: Dict[str, Any]) -> Path:
auth_path = _qwen_cli_auth_path()
auth_path.parent.mkdir(parents=True, exist_ok=True)
try:
os.chmod(auth_path.parent, 0o700)
except OSError:
pass
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
secure_parent_dir(auth_path)
# Per-process random temp suffix avoids collisions between concurrent
# writers and stale leftovers from a crashed prior write.
tmp_path = auth_path.with_name(f"{auth_path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}")
@ -2008,7 +2085,10 @@ def resolve_qwen_runtime_credentials(
def get_qwen_auth_status() -> Dict[str, Any]:
auth_path = _qwen_cli_auth_path()
try:
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
# Validate the runtime credentials, including refresh when the cached
# CLI token is expired. Otherwise stale tokens show up as "logged in"
# and `hermes model` walks users into a broken Qwen setup flow.
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True)
return {
"logged_in": True,
"auth_file": str(auth_path),
@ -2409,6 +2489,32 @@ def _make_xai_callback_handler(expected_path: str) -> tuple[type[BaseHTTPRequest
"error_description": params.get("error_description", [None])[0],
}
# Diagnostic logging — emits at INFO so reporters of loopback bugs
# (#27385 — "callback received but Hermes times out") can produce
# actionable evidence without a code change. Logged values are
# fingerprints / booleans only; no actual code/state strings leak
# into the log file. Run with ``HERMES_LOG_LEVEL=INFO`` (or check
# ``~/.hermes/logs/agent.log`` which captures INFO+ unconditionally).
try:
logger.info(
"xAI loopback callback received: path=%s has_code=%s has_state=%s has_error=%s "
"ua=%s",
parsed.path,
incoming["code"] is not None,
incoming["state"] is not None,
incoming["error"] is not None,
(self.headers.get("User-Agent") or "")[:80],
)
if incoming["error"]:
logger.info(
"xAI loopback callback carries error=%s error_description=%s",
incoming["error"],
(incoming["error_description"] or "")[:200],
)
except Exception:
# Logging must never break the OAuth flow.
pass
# Treat a hit on the callback path with neither `code` nor `error`
# as a missing OAuth callback (e.g. xAI's auth backend failed to
# redirect and the user navigated to the bare loopback URL by hand).
@ -2513,6 +2619,17 @@ def _xai_wait_for_callback(
server.shutdown()
server.server_close()
thread.join(timeout=1.0)
# Diagnostic: distinguish "no callback ever arrived" from "callback
# arrived but result wasn't populated" (#27385). The per-hit handler
# also logs at INFO; if neither line appears, xAI's IDP never reached
# the loopback at all (firewall, port-binding, IPv6/IPv4 mismatch).
logger.info(
"xAI loopback wait timed out after %.0fs with no usable callback "
"(result.code=%s result.error=%s)",
max(5.0, timeout_seconds),
result["code"] is not None,
result["error"] is not None,
)
raise AuthError(
"xAI authorization timed out waiting for the local callback.",
provider="xai-oauth",
@ -3346,7 +3463,7 @@ def _read_xai_oauth_tokens(*, _lock: bool = True) -> Dict[str, Any]:
state = _load_provider_state(auth_store, "xai-oauth")
if not state:
raise AuthError(
"No xAI OAuth credentials stored. Select xAI Grok OAuth (SuperGrok Subscription) in `hermes model`.",
"No xAI OAuth credentials stored. Select xAI Grok OAuth (SuperGrok / Premium+) in `hermes model`.",
provider="xai-oauth",
code="xai_auth_missing",
relogin_required=True,
@ -4168,10 +4285,8 @@ def _write_shared_nous_state(state: Dict[str, Any]) -> None:
with _nous_shared_store_lock():
path = _nous_shared_store_path()
path.parent.mkdir(parents=True, exist_ok=True)
try:
os.chmod(path.parent, 0o700)
except OSError:
pass
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
secure_parent_dir(path)
tmp = path.with_name(f"{path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}")
# Create with 0o600 atomically via os.open(O_EXCL) — closes the TOCTOU
# window where write_text() + post-write chmod briefly exposed Nous
@ -4782,7 +4897,7 @@ def refresh_nous_oauth_pure(
state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"]
state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
state["scope"] = refreshed.get("scope") or state.get("scope")
refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
if refreshed_url:
state["inference_base_url"] = refreshed_url
state["obtained_at"] = now.isoformat()
@ -4818,7 +4933,7 @@ def refresh_nous_oauth_pure(
state["agent_key_expires_in"] = mint_payload.get("expires_in")
state["agent_key_reused"] = bool(mint_payload.get("reused", False))
state["agent_key_obtained_at"] = now.isoformat()
minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
if minted_url:
state["inference_base_url"] = minted_url
@ -5096,7 +5211,7 @@ def resolve_nous_runtime_credentials(
state["refresh_token"] = refreshed.get("refresh_token") or refresh_token
state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
state["scope"] = refreshed.get("scope") or state.get("scope")
refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
if refreshed_url:
inference_base_url = refreshed_url
state["obtained_at"] = now.isoformat()
@ -5204,7 +5319,7 @@ def resolve_nous_runtime_credentials(
state["refresh_token"] = refreshed.get("refresh_token") or latest_refresh_token
state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
state["scope"] = refreshed.get("scope") or state.get("scope")
refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
if refreshed_url:
inference_base_url = refreshed_url
state["obtained_at"] = now.isoformat()
@ -5259,7 +5374,7 @@ def resolve_nous_runtime_credentials(
state["agent_key_expires_in"] = mint_payload.get("expires_in")
state["agent_key_reused"] = bool(mint_payload.get("reused", False))
state["agent_key_obtained_at"] = now.isoformat()
minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
if minted_url:
inference_base_url = minted_url
_oauth_trace(
@ -6279,7 +6394,7 @@ def _login_xai_oauth(
pass
print()
print("Signing in to xAI Grok OAuth (SuperGrok Subscription)...")
print("Signing in to xAI Grok OAuth (SuperGrok / Premium+)...")
print("(Hermes creates its own local OAuth session)")
print()
@ -7051,10 +7166,95 @@ def _refresh_minimax_oauth_state(
return new_state
def _minimax_oauth_quarantine_on_terminal_refresh(state: Dict[str, Any], exc: AuthError) -> None:
"""Wipe dead tokens from auth.json after a terminal refresh failure.
Shared by both the eager-resolve path and the lazy per-request token
provider. Mirrors the Nous / xAI-OAuth / Codex-OAuth quarantine pattern
so subsequent calls fail fast without a network retry.
"""
if not (exc.relogin_required and state.get("refresh_token")):
return
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
state.pop(_k, None)
state["last_auth_error"] = {
"provider": "minimax-oauth",
"code": exc.code or "refresh_failed",
"message": str(exc),
"reason": "runtime_refresh_failure",
"relogin_required": True,
"at": datetime.now(timezone.utc).isoformat(),
}
try:
_minimax_save_auth_state(state)
except Exception as _save_exc:
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
def build_minimax_oauth_token_provider() -> Callable[[], str]:
"""Return a zero-arg callable that yields a fresh MiniMax access token.
The Anthropic SDK caches ``api_key`` as a static string at construction
time, so a session that resolves credentials once at startup will keep
sending the same bearer until MiniMax's server returns 401 — typically
~15 minutes in, because MiniMax issues short-lived access tokens.
Returning a *callable* instead of a string lets us hook into the
existing Entra-ID bearer infrastructure in
:mod:`agent.anthropic_adapter`: ``build_anthropic_client`` detects a
callable and routes through ``_build_anthropic_client_with_bearer_hook``,
which mints a fresh ``Authorization`` header on every outbound request.
Each invocation re-reads the persisted state from ``auth.json`` and
calls :func:`_refresh_minimax_oauth_state` that helper is a no-op
when the token still has more than ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS``
of life left, so the steady-state cost is one file read + one
timestamp compare per request.
Reading state fresh each time also means a refresh persisted by one
process (CLI, gateway, cron) is immediately visible to every other
process sharing the same ``auth.json``.
"""
def _provide() -> str:
state = get_provider_auth_state("minimax-oauth")
if not state or not state.get("access_token"):
raise AuthError(
"Not logged into MiniMax OAuth. Run `hermes model` and select "
"MiniMax (OAuth).",
provider="minimax-oauth", code="not_logged_in", relogin_required=True,
)
try:
state = _refresh_minimax_oauth_state(state)
except AuthError as exc:
_minimax_oauth_quarantine_on_terminal_refresh(state, exc)
raise
token = state.get("access_token")
if not token:
raise AuthError(
"MiniMax OAuth state has no access_token after refresh.",
provider="minimax-oauth", code="no_access_token", relogin_required=True,
)
return token
return _provide
def resolve_minimax_oauth_runtime_credentials(
*, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
as_token_provider: bool = False,
) -> Dict[str, Any]:
"""Return {provider, api_key, base_url, source} for minimax-oauth."""
"""Return {provider, api_key, base_url, source} for minimax-oauth.
When ``as_token_provider`` is True, ``api_key`` is a zero-arg callable
that mints a fresh access token per call (proactively refreshing if
the cached token is within ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of
expiry). This is what the runtime provider path uses so that long
sessions survive MiniMax's short access-token lifetime — see
:func:`build_minimax_oauth_token_provider` for the rationale.
The default (string ``api_key``) preserves the historical contract for
diagnostic call sites like ``hermes status`` that just want to know
whether a valid token exists right now.
"""
state = get_provider_auth_state("minimax-oauth")
if not state or not state.get("access_token"):
raise AuthError(
@ -7065,28 +7265,15 @@ def resolve_minimax_oauth_runtime_credentials(
try:
state = _refresh_minimax_oauth_state(state)
except AuthError as exc:
if exc.relogin_required and state.get("refresh_token"):
# Terminal refresh failure — clear dead tokens from auth.json so
# subsequent calls fail fast without a network retry, mirroring
# the Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
state.pop(_k, None)
state["last_auth_error"] = {
"provider": "minimax-oauth",
"code": exc.code or "refresh_failed",
"message": str(exc),
"reason": "runtime_refresh_failure",
"relogin_required": True,
"at": datetime.now(timezone.utc).isoformat(),
}
try:
_minimax_save_auth_state(state)
except Exception as _save_exc:
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
_minimax_oauth_quarantine_on_terminal_refresh(state, exc)
raise
if as_token_provider:
api_key: Any = build_minimax_oauth_token_provider()
else:
api_key = state["access_token"]
return {
"provider": "minimax-oauth",
"api_key": state["access_token"],
"api_key": api_key,
"base_url": state["inference_base_url"].rstrip("/"),
"source": "oauth",
}

View file

@ -2,7 +2,6 @@
from __future__ import annotations
from getpass import getpass
import math
import sys
import time
@ -30,6 +29,7 @@ from agent.credential_pool import (
import hermes_cli.auth as auth_mod
from hermes_cli.auth import PROVIDER_REGISTRY
from hermes_constants import OPENROUTER_BASE_URL
from hermes_cli.secret_prompt import masked_secret_prompt
# Providers that support OAuth login in addition to API keys.
@ -196,7 +196,7 @@ def auth_add_command(args) -> None:
if requested_type == AUTH_TYPE_API_KEY:
token = (getattr(args, "api_key", None) or "").strip()
if not token:
token = getpass("Paste your API key: ").strip()
token = masked_secret_prompt("Paste your API key: ").strip()
if not token:
raise SystemExit("No API key provided.")
default_label = _api_key_default_label(len(pool.entries()) + 1)

View file

@ -85,6 +85,22 @@ def _should_exclude(rel_path: Path) -> bool:
return False
def _should_skip_backup_file(abs_path: Path, rel_path: Path, out_path: Path) -> bool:
"""Return True when a candidate file should not be written to a backup zip."""
if _should_exclude(rel_path):
return True
# zipfile.write() follows file symlinks, so skip links before any archive
# write can copy data from outside HERMES_HOME.
if abs_path.is_symlink():
return True
try:
return abs_path.resolve() == out_path.resolve()
except (OSError, ValueError):
return False
# ---------------------------------------------------------------------------
# SQLite safe copy
# ---------------------------------------------------------------------------
@ -173,16 +189,9 @@ def run_backup(args) -> None:
fpath = dp / fname
rel = fpath.relative_to(hermes_root)
if _should_exclude(rel):
if _should_skip_backup_file(fpath, rel, out_path):
continue
# Skip the output zip itself if it happens to be inside hermes root
try:
if fpath.resolve() == out_path.resolve():
continue
except (OSError, ValueError):
pass
files_to_add.append((fpath, rel))
if not files_to_add:
@ -726,16 +735,9 @@ def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]:
except ValueError:
continue
if _should_exclude(rel):
if _should_skip_backup_file(fpath, rel, out_path):
continue
# Skip the output zip itself if it already exists inside root.
try:
if fpath.resolve() == out_path.resolve():
continue
except (OSError, ValueError):
pass
files_to_add.append((fpath, rel))
except OSError as exc:
logger.warning("Full-zip backup: walk failed: %s", exc)

View file

@ -8,10 +8,10 @@ with the TUI.
import queue
import time as _time
import getpass
from hermes_cli.banner import cprint, _DIM, _RST
from hermes_cli.config import save_env_value_secure
from hermes_cli.secret_prompt import masked_secret_prompt
from hermes_constants import display_hermes_home
@ -75,7 +75,7 @@ def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict:
if not hasattr(cli, "_secret_deadline"):
cli._secret_deadline = 0
try:
value = getpass.getpass(f"{prompt} (hidden, ESC or empty Enter to skip): ")
value = masked_secret_prompt(f"{prompt} (hidden, ESC or empty Enter to skip): ")
except (EOFError, KeyboardInterrupt):
value = ""

View file

@ -5,9 +5,8 @@ functions previously duplicated across setup.py, tools_config.py,
mcp_config.py, and memory_setup.py.
"""
import getpass
from hermes_cli.colors import Colors, color
from hermes_cli.secret_prompt import masked_secret_prompt
# ─── Print Helpers ────────────────────────────────────────────────────────────
@ -59,7 +58,7 @@ def prompt(
try:
if password:
value = getpass.getpass(display)
value = masked_secret_prompt(display)
else:
value = input(display)
value = value.strip()

View file

@ -164,7 +164,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
cli_only=True),
CommandDef("skills", "Search, install, inspect, or manage skills",
"Tools & Skills", cli_only=True,
subcommands=("search", "browse", "inspect", "install")),
subcommands=("search", "browse", "inspect", "install", "audit")),
CommandDef("bundles", "List skill bundles (aliases /<name> for multiple skills)",
"Tools & Skills"),
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
@ -449,7 +449,7 @@ def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
:func:`hermes_cli.plugins.PluginContext.register_command`. They behave
like ``CommandDef`` entries for gateway surfacing: they appear in the
Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
(via :func:`gateway.platforms.discord._register_slash_commands`) in
(via :func:`plugins.platforms.discord.adapter._register_slash_commands`) in
Discord's native slash command picker.
Lookup is lazy so importing this module never forces plugin discovery

View file

@ -26,6 +26,8 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
from hermes_cli.secret_prompt import masked_secret_prompt
logger = logging.getLogger(__name__)
# Track which (config_path, mtime_ns, size) tuples we've already warned about
@ -72,6 +74,82 @@ def _warn_config_parse_failure(config_path: Path, exc: Exception) -> None:
_IS_WINDOWS = platform.system() == "Windows"
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
# Env var names that influence how the next subprocess executes —
# never writable through ``save_env_value``. Anything that controls
# the loader, interpreter, shell, or replacement editor counts:
#
# * ``LD_PRELOAD`` / ``LD_LIBRARY_PATH`` / ``LD_AUDIT`` — Linux dynamic
# loader. ``DYLD_*`` — macOS equivalent. Planting a path here means
# the next ``subprocess.run([...])`` Hermes makes loads attacker code
# before main().
# * ``PYTHONPATH`` / ``PYTHONHOME`` / ``PYTHONSTARTUP`` /
# ``PYTHONUSERBASE`` — Python interpreter init. Hermes itself starts
# from one of these on every restart.
# * ``NODE_OPTIONS`` / ``NODE_PATH`` — Node interpreter; affects npm,
# ``hermes update``, the TUI build.
# * ``PATH`` — too broad to allow. The dashboard never needs to rewrite
# the operator's PATH; if a tool can't be found, the fix is to add an
# absolute path in the integration config, not to mutate PATH globally.
# * ``GIT_SSH_COMMAND`` / ``GIT_EXEC_PATH`` — git rewrites that fire
# on every plugin install / ``hermes update``.
# * ``BROWSER`` / ``EDITOR`` / ``VISUAL`` / ``PAGER`` — commands the
# shell or CLI invokes implicitly. Wrong values here = RCE on next
# ``$EDITOR``.
# * ``SHELL`` — what subprocess uses with ``shell=True`` (we try to
# avoid that, but defense in depth).
# * ``HERMES_HOME`` / ``HERMES_PROFILE`` / ``HERMES_CONFIG`` /
# ``HERMES_ENV`` — Hermes runtime location flags. Writing these into
# ``.env`` would relocate state in ways the user did not request from
# the dashboard. ``config.yaml`` is the supported surface for these.
#
# IMPORTANT: ``HERMES_*`` overall is NOT blocked. Many legitimate
# integration credentials follow that prefix (HERMES_GEMINI_CLIENT_ID,
# HERMES_LANGFUSE_PUBLIC_KEY, HERMES_SPOTIFY_CLIENT_ID, ...). The
# denylist is name-by-name on purpose so the gate stays narrow and
# doesn't accidentally break provider setup wizards.
#
# This is enforced on *write* only — values already in ``.env`` (set
# by the operator out-of-band, or pre-existing) keep working. The
# point is that the dashboard's writable surface cannot escalate by
# planting them.
_ENV_VAR_NAME_DENYLIST: frozenset[str] = frozenset({
# Loader / linker
"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_AUDIT", "LD_DEBUG",
"DYLD_INSERT_LIBRARIES", "DYLD_LIBRARY_PATH", "DYLD_FRAMEWORK_PATH",
"DYLD_FALLBACK_LIBRARY_PATH", "DYLD_FALLBACK_FRAMEWORK_PATH",
# Python
"PYTHONPATH", "PYTHONHOME", "PYTHONSTARTUP", "PYTHONUSERBASE",
"PYTHONEXECUTABLE", "PYTHONNOUSERSITE",
# Node
"NODE_OPTIONS", "NODE_PATH",
# General
"PATH", "SHELL", "BROWSER", "EDITOR", "VISUAL", "PAGER",
# Git
"GIT_SSH_COMMAND", "GIT_EXEC_PATH", "GIT_SHELL",
# Hermes runtime location — never via dashboard env writer.
# NOT a HERMES_* blanket: integration credentials (HERMES_GEMINI_*,
# HERMES_LANGFUSE_*, HERMES_SPOTIFY_*, ...) ARE allowed.
"HERMES_HOME", "HERMES_PROFILE", "HERMES_CONFIG", "HERMES_ENV",
})
def _reject_denylisted_env_var(key: str) -> None:
"""Raise if ``key`` is in :data:`_ENV_VAR_NAME_DENYLIST`.
Centralised so both the regular and "secure" env writers share the
same gate, and so the message is consistent for callers.
"""
if key in _ENV_VAR_NAME_DENYLIST:
raise ValueError(
f"Environment variable {key!r} is on the writer denylist. "
"Names that influence subprocess execution (LD_PRELOAD, "
"PYTHONPATH, PATH, EDITOR, ...) or Hermes runtime location "
"(HERMES_HOME, HERMES_PROFILE, ...) cannot be persisted via "
"the env writer. If you really need this, edit "
"~/.hermes/.env directly."
)
_LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
# (path, mtime_ns, size) -> cached expanded config dict.
# load_config() returns a deepcopy of the cached value when the file
@ -658,7 +736,8 @@ DEFAULT_CONFIG = {
# are owned by your host user instead of root, which avoids needing
# `sudo chown` after container runs. Default off to preserve behavior
# for images whose entrypoints expect to start as root (e.g. the
# bundled Hermes image, which drops to the `hermes` user via gosu).
# bundled Hermes image, which drops to the `hermes` user via
# s6-setuidgid inside each supervised service).
# When on, SETUID/SETGID caps are omitted from the container since
# no privilege drop is needed.
"docker_run_as_host_user": False,
@ -1008,6 +1087,19 @@ DEFAULT_CONFIG = {
"compact": False,
"personality": "kawaii",
"resume_display": "full",
# Recap tuning for /resume and startup resume. The defaults match the
# historical hardcoded values; expose them as config so power users can
# widen or tighten the snapshot to taste.
"resume_exchanges": 10, # max user+assistant pairs to show
"resume_max_user_chars": 300, # truncate user message text
"resume_max_assistant_chars": 200, # truncate non-last assistant text
"resume_max_assistant_lines": 3, # truncate non-last assistant lines
# When True (default), assistant entries that are *only* tool calls
# (no visible text) are skipped in the recap. This prevents the recap
# from being dominated by `[2 tool calls: terminal, read_file]` lines
# when an exchange was tool-heavy. Set False to restore the legacy
# behavior of showing tool-call summaries inline.
"resume_skip_tool_only": True,
"busy_input_mode": "interrupt", # interrupt | queue | steer
# When true, `hermes --tui` auto-resumes the most recent human-
# facing session on launch instead of forging a fresh one.
@ -1622,6 +1714,31 @@ DEFAULT_CONFIG = {
"force_ipv4": False,
},
# Gateway settings — control how messaging platforms (Telegram, Discord,
# Slack, etc.) deliver agent-produced files as native attachments.
"gateway": {
# Extra directories from which model-emitted bare file paths may be
# uploaded as native gateway attachments. Files inside the Hermes
# cache (~/.hermes/cache/{documents,images,audio,video,screenshots})
# are always trusted; this list adds operator-controlled roots
# (project dirs, scratch dirs, mounted shares). Accepts a list of
# absolute paths or a single os.pathsep-separated string. Bridged
# to HERMES_MEDIA_ALLOW_DIRS at gateway startup. Tilde paths are
# expanded.
"media_delivery_allow_dirs": [],
# When true, files whose mtime is within ``trust_recent_files_seconds``
# of "now" are trusted for native delivery even outside the cache /
# operator allowlist — useful for ``pandoc -o /tmp/report.pdf`` or
# PDFs the agent writes into a working directory. System paths
# (/etc, /proc, ~/.ssh, ~/.aws, etc.) remain blocked regardless.
# Disable to fall back to pure-allowlist mode. Bridged to
# HERMES_MEDIA_TRUST_RECENT_FILES.
"trust_recent_files": True,
# Recency window in seconds. 600 (10 min) comfortably covers a
# multi-tool agent turn. Bridged to HERMES_MEDIA_TRUST_RECENT_SECONDS.
"trust_recent_files_seconds": 600,
},
# Session storage — controls automatic cleanup of ~/.hermes/state.db.
# state.db accumulates every session, message, tool call, and FTS5 index
# entry forever. Without auto-pruning, a heavy user (gateway + cron)
@ -1730,6 +1847,7 @@ DEFAULT_CONFIG = {
"servers": {},
},
# X (Twitter) Search via xAI's built-in x_search Responses tool.
# The tool registers when xAI credentials are available (SuperGrok
# OAuth or XAI_API_KEY) AND the x_search toolset is enabled in
@ -1747,8 +1865,69 @@ DEFAULT_CONFIG = {
"retries": 2,
},
# =========================================================================
# External secret sources
# =========================================================================
# Pull credentials from external secret managers at process startup
# rather than storing them in ~/.hermes/.env.
"secrets": {
"bitwarden": {
# Master switch. When false, BSM is never contacted and the
# bws binary is never auto-installed — same as not having
# this section at all.
"enabled": False,
# Name of the env var that holds the Bitwarden machine-account
# access token. This is the one bootstrap secret; it lives
# in ~/.hermes/.env (or your shell) and never in config.yaml.
"access_token_env": "BWS_ACCESS_TOKEN",
# UUID of the BSM project to sync from.
"project_id": "",
# Seconds to cache fetched secrets in-process. 0 disables.
"cache_ttl_seconds": 300,
# When True, BSM values overwrite existing env vars. Default
# True because the point of using BSM is centralized rotation —
# if .env had the final say, rotating in Bitwarden wouldn't
# take effect until you also cleared the matching .env line.
"override_existing": True,
# When True, the bws binary is auto-downloaded into
# ~/.hermes/bin/ on first use. When False you must install
# bws yourself and have it on PATH.
"auto_install": True,
# Bitwarden region / self-hosted endpoint. Empty string
# means use the bws CLI default (US Cloud,
# https://vault.bitwarden.com). Set to
# https://vault.bitwarden.eu for EU Cloud, or your own URL
# for self-hosted Bitwarden. Plumbed into the bws subprocess
# as BWS_SERVER_URL. Prompted for during
# `hermes secrets bitwarden setup`.
"server_url": "",
},
},
# Paste collapse thresholds (TUI + CLI).
#
# paste_collapse_threshold (default 5)
# Bracketed-paste handler. Pastes with this many newlines or more
# collapse to a file reference. Set 0 to disable.
#
# paste_collapse_threshold_fallback (default 5)
# Fallback heuristic for terminals without bracketed paste support.
# Same line count test but heuristically gated by chars-added /
# newlines-added to avoid false positives from normal typing.
# Set 0 to disable.
#
# paste_collapse_char_threshold (default 2000)
# Long single-line paste guard. Pastes whose total char length
# reaches this value collapse to a file reference even if line
# count is below the line threshold. Catches the "8000 chars of
# minified JSON / log output on one line" case. Set 0 to disable.
"paste_collapse_threshold": 5,
"paste_collapse_threshold_fallback": 5,
"paste_collapse_char_threshold": 2000,
# Config schema version - bump this when adding new required fields
"_config_version": 23,
"_config_version": 24,
}
# =============================================================================
@ -3017,7 +3196,7 @@ def _normalize_custom_provider_entry(
"api_mode", "transport", "model", "default_model", "models",
"context_length", "rate_limit_delay",
"request_timeout_seconds", "stale_timeout_seconds",
"discover_models",
"discover_models", "extra_body",
}
for camel, snake in _CAMEL_ALIASES.items():
if camel in entry and snake not in entry:
@ -3112,6 +3291,10 @@ def _normalize_custom_provider_entry(
if isinstance(discover_models, bool):
normalized["discover_models"] = discover_models
extra_body = entry.get("extra_body")
if isinstance(extra_body, dict):
normalized["extra_body"] = dict(extra_body)
return normalized
@ -3272,7 +3455,7 @@ _KNOWN_ROOT_KEYS = {
# Valid fields inside a custom_providers list entry
_VALID_CUSTOM_PROVIDER_FIELDS = {
"name", "base_url", "api_key", "api_mode", "model", "models",
"context_length", "rate_limit_delay",
"context_length", "rate_limit_delay", "extra_body",
# key_env is read at runtime by runtime_provider.py and auxiliary_client.py
# — include it here so the set accurately describes the supported schema.
"key_env",
@ -3947,8 +4130,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
print(f" Get your key at: {var['url']}")
if var.get("password"):
import getpass
value = getpass.getpass(f" {var['prompt']}: ")
value = masked_secret_prompt(f" {var['prompt']}: ")
else:
value = input(f" {var['prompt']}: ").strip()
@ -3999,8 +4181,9 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
else:
print(f" {info.get('description', name)}")
if info.get("password"):
import getpass
value = getpass.getpass(f" {info.get('prompt', name)} (Enter to skip): ")
value = masked_secret_prompt(
f" {info.get('prompt', name)} (Enter to skip): "
)
else:
value = input(f" {info.get('prompt', name)} (Enter to skip): ").strip()
if value:
@ -4779,6 +4962,7 @@ def save_env_value(key: str, value: str):
return
if not _ENV_VAR_NAME_RE.match(key):
raise ValueError(f"Invalid environment variable name: {key!r}")
_reject_denylisted_env_var(key)
value = value.replace("\n", "").replace("\r", "")
# API keys / tokens must be ASCII — strip non-ASCII with a warning.
value = _check_non_ascii_credential(key, value)

View file

@ -0,0 +1,325 @@
"""Container-boot reconciliation of per-profile gateway s6 services.
Service directories under /run/service/ live on **tmpfs** and are wiped
on every container restart. Profile directories under
``$HERMES_HOME/profiles/<name>/`` live on the persistent VOLUME, and
each one records its gateway's last state in ``gateway_state.json``.
This module bridges the two: on every container boot, walk the
persistent profiles, recreate the s6 service slots, and auto-start
only those whose last recorded state was ``running``.
Wired into the image as /etc/cont-init.d/02-reconcile-profiles by the
Dockerfile (Phase 4 Task 4.0). Runs as root after 01-hermes-setup
(the stage2 hook) has chowned the volume and seeded $HERMES_HOME, but
before s6-rc starts user services.
Without this module, every ``docker restart`` would silently wipe
every per-profile gateway, even though the user's profiles still
exist on disk.
"""
from __future__ import annotations
import json
import logging
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Literal
log = logging.getLogger(__name__)
# Only this prior state triggers automatic restart. Everything else
# (startup_failed, starting, stopped, missing) registers the slot in
# the down state and waits for explicit user action — this avoids the
# crash-loop where a broken gateway keeps being restarted across
# `docker restart` cycles.
_AUTOSTART_STATES = frozenset({"running"})
# Stale runtime files we sweep before recreating service slots. These
# all hold container-namespaced state (PIDs, process tables) that's
# garbage post-restart — a numerically-equal PID in the new container
# is a different process. See the Risk Register in the plan.
_STALE_RUNTIME_FILES = ("gateway.pid", "processes.json")
ReconcileActionLabel = Literal["started", "registered", "skipped"]
@dataclass(frozen=True)
class ReconcileAction:
"""One profile's outcome from a single reconciliation pass."""
profile: str
prior_state: str | None
action: ReconcileActionLabel
def reconcile_profile_gateways(
*,
hermes_home: Path,
scandir: Path,
dry_run: bool = False,
) -> list[ReconcileAction]:
"""Recreate s6 service registrations for every persistent profile.
Always registers a ``gateway-default`` slot for the root profile
(the implicit profile that lives at the top of ``$HERMES_HOME``,
not under ``profiles/``). The dispatcher in ``hermes_cli.gateway``
maps an empty profile suffix to ``gateway-default``, so this slot
is what ``hermes gateway start`` (no ``-p``) targets. Without it,
bare ``hermes gateway start`` inside the container would land on
``s6-svc -u /run/service/gateway-default`` uncaught
``CalledProcessError`` traceback to the user (PR #30136 review).
The default slot's prior state is read from
``$HERMES_HOME/gateway_state.json`` (sibling to the profile root,
not under ``profiles/``); stale runtime files there are swept the
same way as for named profiles.
Args:
hermes_home: The container's HERMES_HOME (typically /opt/data).
Profiles live under ``<hermes_home>/profiles/<name>/``;
the default profile lives at ``<hermes_home>`` itself.
scandir: The s6 dynamic scandir (typically /run/service). Service
directories are created at ``<scandir>/gateway-<profile>/``.
dry_run: When True, walk and return the action list without
touching the filesystem. For tests and `--dry-run` debug.
Returns:
One :class:`ReconcileAction` per profile, in this order:
``default`` first, then named profiles in directory order.
"""
actions: list[ReconcileAction] = []
# Default profile — always register, even if nothing has ever
# populated the root profile dir. The slot exists so
# ``hermes gateway start`` (no ``-p``) has somewhere to land;
# auto-up only when the prior state was "running" (same rule as
# named profiles).
default_prior_state = _read_prior_state(hermes_home)
default_should_start = default_prior_state in _AUTOSTART_STATES
if not dry_run:
_cleanup_stale_runtime_files(hermes_home)
_register_service(scandir, "default", start=default_should_start)
actions.append(ReconcileAction(
profile="default",
prior_state=default_prior_state,
action="started" if default_should_start else "registered",
))
profiles_root = hermes_home / "profiles"
if profiles_root.is_dir():
for entry in sorted(profiles_root.iterdir()):
if not entry.is_dir():
continue
# SOUL.md is always seeded by `hermes profile create` (config.yaml
# is not — that comes later via `hermes setup`). Use it as the
# "real profile" marker so stray dirs (backups, manual mkdir)
# aren't picked up.
if not (entry / "SOUL.md").exists():
continue
# The "default" service name is reserved for the root
# profile (above) — if a user has somehow created a
# ``profiles/default/`` directory, skip it to avoid the
# slot collision. Their gateway would still be reachable
# via ``hermes -p default-named gateway start`` if they
# rename the directory; we don't try to disambiguate here.
if entry.name == "default":
log.warning(
"profiles/default/ exists — skipping to avoid colliding "
"with the reserved root-profile s6 slot",
)
continue
prior_state = _read_prior_state(entry)
should_start = prior_state in _AUTOSTART_STATES
if not dry_run:
_cleanup_stale_runtime_files(entry)
_register_service(scandir, entry.name, start=should_start)
actions.append(ReconcileAction(
profile=entry.name,
prior_state=prior_state,
action="started" if should_start else "registered",
))
if not dry_run:
_write_reconcile_log(hermes_home, actions)
return actions
def _read_prior_state(profile_dir: Path) -> str | None:
"""Read gateway_state.json's ``gateway_state`` field, or None if
missing or unparseable. Unparseable counts as "no prior state" so
we don't bork the whole reconciliation on a corrupt file."""
state_file = profile_dir / "gateway_state.json"
if not state_file.exists():
return None
try:
return json.loads(state_file.read_text()).get("gateway_state")
except (OSError, json.JSONDecodeError):
log.warning(
"could not read %s; treating as no prior state", state_file,
)
return None
def _cleanup_stale_runtime_files(profile_dir: Path) -> None:
"""Remove gateway.pid and processes.json — they reference PIDs in
the dead container's process namespace and would otherwise confuse
the newly-started gateway's process-mismatch checks."""
for name in _STALE_RUNTIME_FILES:
(profile_dir / name).unlink(missing_ok=True)
def _register_service(scandir: Path, profile: str, *, start: bool) -> None:
"""Recreate the s6 service slot for one profile.
Mirrors the rendering in :func:`S6ServiceManager.register_profile_gateway`,
but here we control the start state directly via the ``down`` marker
file (s6-svscan honors it on rescan). Cannot use the manager
directly because the cont-init.d phase runs as root before
s6-svscan starts scanning the dynamic scandir the manager's
``s6-svscanctl -a`` call would fail with no control socket.
Atomicity: build the new layout in a sibling temp directory and
rename it into place via :meth:`Path.replace`. This matches
:meth:`S6ServiceManager.register_profile_gateway` (PR #30136
review item O4) even though cont-init.d runs before s6-svscan
starts scanning, an atomic publication keeps the contract uniform
between the two registration paths and protects against a
half-populated dir if the script is interrupted mid-write.
"""
import shutil
from hermes_cli.service_manager import (
S6ServiceManager,
_seed_supervise_skeleton,
validate_profile_name,
)
validate_profile_name(profile)
service_dir = scandir / f"gateway-{profile}"
tmp_dir = service_dir.with_name(service_dir.name + ".tmp")
# Wipe any leftover tmp from a previous interrupted run.
if tmp_dir.exists():
shutil.rmtree(tmp_dir, ignore_errors=True)
tmp_dir.mkdir(parents=True)
try:
(tmp_dir / "type").write_text("longrun\n")
# Reuse the manager's run-script rendering — single source of
# truth so register_profile_gateway and reconcile_profile_gateways
# stay consistent. extra_env is empty here; users who need
# per-profile env can set it via the profile's config.yaml
# (which the gateway itself loads).
run = tmp_dir / "run"
run.write_text(S6ServiceManager._render_run_script(profile, extra_env={}))
run.chmod(0o755)
# Persistent log rotation (OQ8-C).
log_subdir = tmp_dir / "log"
log_subdir.mkdir()
log_run = log_subdir / "run"
log_run.write_text(S6ServiceManager._render_log_run(profile))
log_run.chmod(0o755)
# The presence of a `down` file tells s6-supervise to NOT
# start the service when s6-svscan picks it up. User brings
# it up explicitly with `hermes -p <profile> gateway start`
# (which routes through the Phase 4
# _dispatch_via_service_manager_if_s6 helper to `s6-svc -u`).
if not start:
(tmp_dir / "down").touch()
# Pre-create the supervise/ skeleton with hermes ownership
# BEFORE we publish the slot. Mirrors the same pre-creation
# step in S6ServiceManager.register_profile_gateway — when
# s6-svscan picks the published slot up, the s6-supervise it
# spawns will EEXIST our dirs/FIFOs and inherit hermes
# ownership, so runtime s6-svc / s6-svstat / s6-svwait calls
# (all dispatched as the hermes user) won't hit EACCES. See
# ``_seed_supervise_skeleton`` in service_manager.py for the
# full rationale.
_seed_supervise_skeleton(tmp_dir)
# Publish atomically. Path.replace handles the existing-target
# case the same way os.rename does on POSIX: the target is
# silently replaced, so a previous reconcile pass's slot is
# cleanly overwritten in one operation.
if service_dir.exists():
shutil.rmtree(service_dir)
tmp_dir.replace(service_dir)
except Exception:
shutil.rmtree(tmp_dir, ignore_errors=True)
raise
def _write_reconcile_log(
hermes_home: Path, actions: list[ReconcileAction],
) -> None:
"""Append one line per profile to $HERMES_HOME/logs/container-boot.log.
Operators inspect this to debug "why didn't my profile come back
up". Keeping a separate log file (vs. mixing into agent.log) lets
troubleshooters grep for "profile=foo" without wading through
unrelated activity.
Size-bounded: when the file exceeds ``_LOG_ROTATE_BYTES``
(defaults to 256 KiB 3000 reconcile lines), the current file
is renamed to ``container-boot.log.1`` (replacing any previous
rotation) before the new entries are appended. This gives long-
lived containers a soft cap of ~512 KiB across the two files
without pulling in logrotate or s6-log machinery just for this
one append-only file (PR #30136 review item O3).
"""
import time
log_dir = hermes_home / "logs"
log_dir.mkdir(parents=True, exist_ok=True)
log_path = log_dir / "container-boot.log"
# Rotate before opening to append, so the new entries always land
# in a fresh file when we crossed the threshold last time.
try:
if log_path.exists() and log_path.stat().st_size >= _LOG_ROTATE_BYTES:
log_path.replace(log_dir / "container-boot.log.1")
except OSError as exc:
# Rotation failure is non-fatal — keep appending to the
# existing file rather than losing the entry entirely.
log.warning("could not rotate %s: %s", log_path, exc)
ts = time.strftime("%Y-%m-%dT%H:%M:%S%z")
with log_path.open("a", encoding="utf-8") as f:
for a in actions:
f.write(
f"{ts} profile={a.profile} prior_state={a.prior_state} "
f"action={a.action}\n"
)
# 256 KiB soft cap on container-boot.log; rotated to .1 when crossed.
# At ~80 B per reconcile-action line this is ~3000 lines, or about a
# year of daily reboots on a 5-profile container. Two files = ~512 KiB
# worst case. Tuned for visibility (small enough to grep / cat without
# scrolling forever) more than space (the persistent volume has GB).
_LOG_ROTATE_BYTES = 256 * 1024
def main() -> int:
"""Entry point invoked from /etc/cont-init.d/02-reconcile-profiles."""
hermes_home = Path(os.environ.get("HERMES_HOME", "/opt/data"))
scandir = Path(os.environ.get("S6_PROFILE_GATEWAY_SCANDIR", "/run/service"))
actions = reconcile_profile_gateways(
hermes_home=hermes_home, scandir=scandir,
)
for a in actions:
print(
f"reconcile: profile={a.profile} "
f"prior_state={a.prior_state} action={a.action}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -71,7 +71,7 @@ def curses_checklist(
curses.use_default_colors()
curses.init_pair(1, curses.COLOR_GREEN, -1)
curses.init_pair(2, curses.COLOR_YELLOW, -1)
curses.init_pair(3, 8, -1) # dim gray
curses.init_pair(3, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim gray
cursor = 0
scroll_offset = 0

View file

@ -14,6 +14,7 @@ Currently supports:
import io
import json
import logging
import re
import sys
import time
import urllib.error
@ -36,6 +37,12 @@ _REDACTION_BANNER = (
"run with --no-redact to disable]\n"
)
_EMAIL_ADDRESS_RE = re.compile(
r"(?<![A-Za-z0-9._%+-])"
r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"
r"(?![A-Za-z0-9._%+-])"
)
# ---------------------------------------------------------------------------
# Paste services — try paste.rs first, dpaste.com as fallback.
@ -398,7 +405,8 @@ def _redact_log_text(text: str) -> str:
return text
from agent.redact import redact_sensitive_text
return redact_sensitive_text(text, force=True)
text = redact_sensitive_text(text, force=True)
return _EMAIL_ADDRESS_RE.sub("[REDACTED_EMAIL]", text)
def _capture_log_snapshot(

View file

@ -207,14 +207,69 @@ def _fail_and_issue(text: str, detail: str, fix: str, issues: list[str]) -> None
issues.append(fix)
def _check_s6_supervision(issues: list[str]) -> None:
"""Inside a container under our s6 /init, surface what s6 sees.
Runs as a counterpart to :func:`_check_gateway_service_linger` for
the systemd-on-host case. No-op everywhere except in the s6
container so host runs aren't cluttered with irrelevant output.
Reports:
- Whether the main-hermes and dashboard static services are up
- How many per-profile gateway slots are registered (via
``S6ServiceManager.list_profile_gateways()``) and how many are
currently supervised as ``up``
"""
try:
from hermes_cli.service_manager import (
S6ServiceManager,
detect_service_manager,
)
except Exception:
return
if detect_service_manager() != "s6":
return
_section("s6 Supervision")
mgr = S6ServiceManager()
# Static services. They live under /run/service/ via s6-rc symlinks,
# so the same s6-svstat probe works.
for static in ("main-hermes", "dashboard"):
if mgr.is_running(static):
check_ok(f"{static}: up")
else:
check_info(f"{static}: down (expected if not enabled via env)")
profiles = mgr.list_profile_gateways()
if not profiles:
check_info("No per-profile gateways registered yet — create one with `hermes profile create <name>`")
return
up_count = sum(1 for p in profiles if mgr.is_running(f"gateway-{p}"))
check_ok(
f"Per-profile gateways: {up_count}/{len(profiles)} supervised up"
+ (f" ({', '.join(sorted(profiles))})" if len(profiles) <= 8 else "")
)
def _check_gateway_service_linger(issues: list[str]) -> None:
"""Warn when a systemd user gateway service will stop after logout."""
"""Warn when a systemd user gateway service will stop after logout.
Skipped inside a container running under s6 the linger concept
(user-systemd surviving SSH logout) doesn't apply there, and the
s6 supervision state is surfaced separately by
``_check_s6_supervision``.
"""
try:
from hermes_cli.gateway import (
get_systemd_linger_status,
get_systemd_unit_path,
is_linux,
)
from hermes_cli.service_manager import detect_service_manager
except Exception as e:
check_warn("Gateway service linger", f"(could not import gateway helpers: {e})")
return
@ -222,6 +277,12 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
if not is_linux():
return
# Inside a container under our s6 /init, _check_s6_supervision
# reports the live supervision state; the linger warning would be
# confusing here (no systemd, no logout, no "lingering" concept).
if detect_service_manager() == "s6":
return
unit_path = get_systemd_unit_path()
if not unit_path.exists():
return
@ -508,6 +569,13 @@ def run_doctor(args):
if should_fix:
env_path.parent.mkdir(parents=True, exist_ok=True)
env_path.touch()
# .env holds API keys — restrict to owner-only access from
# creation. touch() obeys umask which is commonly 0o022,
# leaving the file world-readable; tighten explicitly.
try:
os.chmod(str(env_path), 0o600)
except OSError:
pass
check_ok(f"Created empty {_DHH}/.env")
check_info("Run 'hermes setup' to configure API keys")
fixed_count += 1
@ -744,7 +812,18 @@ def run_doctor(args):
"(should be under 'model:' section)"
)
if should_fix:
model_section = raw_config.setdefault("model", {})
# Coerce scalar/None ``model:`` into a dict before mutation —
# ``setdefault("model", {})`` would return an existing scalar
# and then ``model_section[k] = ...`` would raise TypeError.
raw_model = raw_config.get("model")
if isinstance(raw_model, dict):
model_section = raw_model
elif isinstance(raw_model, str) and raw_model.strip():
model_section = {"default": raw_model.strip()}
raw_config["model"] = model_section
else:
model_section = {}
raw_config["model"] = model_section
for k in stale_root_keys:
if not model_section.get(k):
model_section[k] = raw_config.pop(k)
@ -984,6 +1063,7 @@ def run_doctor(args):
pass
_check_gateway_service_linger(issues)
_check_s6_supervision(issues)
if sys.platform != "win32":
_section("Command Installation")
@ -1076,6 +1156,26 @@ def run_doctor(args):
# Docker (optional)
terminal_env = os.getenv("TERMINAL_ENV", "local")
try:
from hermes_constants import is_container as _is_container
running_in_container = _is_container()
except Exception:
running_in_container = False
if running_in_container:
# Inside our container the Docker terminal backend is not
# configured by default (Docker-in-Docker isn't set up); the
# local backend is the intended one. Skip the noisy "docker
# not found" warning. If the user has explicitly chosen
# TERMINAL_ENV=docker inside the container they likely mounted
# /var/run/docker.sock, so fall through to the normal check.
if terminal_env != "docker":
check_info(
"Running inside a container — using local terminal backend "
"(docker-in-docker is not configured by default)"
)
# Skip to next section; Docker isn't relevant here.
terminal_env = "local"
if terminal_env == "docker":
if _safe_which("docker"):
# Check if docker daemon is running
@ -1098,6 +1198,8 @@ def run_doctor(args):
check_ok("docker", "(optional)")
elif _is_termux():
check_info("Docker backend is not available inside Termux (expected on Android)")
elif running_in_container:
pass # already explained above
else:
check_warn("docker not found", "(optional)")

View file

@ -16,6 +16,7 @@ from pathlib import Path
from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
from hermes_cli.env_loader import load_hermes_dotenv
from hermes_constants import display_hermes_home
from agent.skill_utils import is_excluded_skill_path
def _get_git_commit(project_root: Path) -> str:
@ -69,6 +70,8 @@ def _count_skills(hermes_home: Path) -> int:
return 0
count = 0
for item in skills_dir.rglob("SKILL.md"):
if is_excluded_skill_path(item):
continue
count += 1
return count

View file

@ -21,6 +21,68 @@ _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
# tests) don't spam the same warning multiple times.
_WARNED_KEYS: set[str] = set()
# Map of env-var name → source label ("bitwarden", etc.) for credentials
# that were injected by an external secret source during load_hermes_dotenv().
# Used by setup / `hermes model` flows to label detected credentials so
# users understand WHERE a key came from when their .env doesn't contain it
# directly (otherwise the "credentials detected ✓" line looks identical to
# the .env case and they don't know Bitwarden is wired up).
_SECRET_SOURCES: dict[str, str] = {}
# HERMES_HOME paths we've already pulled external secrets for during this
# process. ``load_hermes_dotenv()`` is called at module-import time from
# several hot modules (cli.py, hermes_cli/main.py, run_agent.py,
# trajectory_compressor.py, gateway/run.py, ...), so without this guard the
# Bitwarden status line gets printed 3-5x per startup. Bitwarden's own
# in-process cache prevents redundant network calls, but the print, the
# config re-parse, and the ASCII sanitization sweep still ran every time.
_APPLIED_HOMES: set[str] = set()
def get_secret_source(env_var: str) -> str | None:
"""Return the label of the secret source that supplied ``env_var``, if any.
Returns ``"bitwarden"`` for keys pulled from Bitwarden Secrets Manager
during the current process's ``load_hermes_dotenv()`` call. Returns
``None`` for keys that came from ``.env``, the shell environment, or
aren't tracked. The returned label is metadata only: credential-pool
persistence may store it to explain the origin of a borrowed secret, but
must never treat it as authorization to persist the raw value.
"""
return _SECRET_SOURCES.get(env_var)
def reset_secret_source_cache() -> None:
"""Forget which HERMES_HOME paths have already had external secrets applied.
The first call to ``_apply_external_secret_sources(home_path)`` in a
process pulls from Bitwarden (or other configured backend), records the
applied keys in ``_SECRET_SOURCES``, and remembers ``home_path`` so
subsequent calls in the same process are no-ops. Call this to force the
next call to re-pull useful for tests, and for long-running processes
that want to refresh after a config change.
"""
_APPLIED_HOMES.clear()
def format_secret_source_suffix(env_var: str) -> str:
"""Return a human-readable suffix like ``" (from Bitwarden)"`` or ``""``.
Use this when printing a detected credential so the user can see where
it came from. Empty string when the credential came from ``.env`` or
the shell those are the implicit / "default" cases users already
understand.
"""
source = get_secret_source(env_var)
if not source:
return ""
if source == "bitwarden":
return " (from Bitwarden)"
# Generic fallback — future-proofing for additional secret sources
# (e.g. 1Password, HashiCorp Vault) without having to update every
# call site.
return f" (from {source})"
def _format_offending_chars(value: str, limit: int = 3) -> str:
"""Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
@ -102,6 +164,10 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
This produces mangled values e.g. a bot token duplicated 8×
(see #8908).
Also strips embedded null bytes which crash ``os.environ[k] = v``
with ``ValueError: embedded null byte`` typically introduced by
copy-pasting API keys from terminals or rich-text editors.
We delegate to ``hermes_cli.config._sanitize_env_lines`` which
already knows all valid Hermes env-var names and can split
concatenated lines correctly.
@ -117,7 +183,11 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
try:
with open(path, **read_kw) as f:
original = f.readlines()
sanitized = _sanitize_env_lines(original)
# Strip null bytes before _sanitize_env_lines so they never
# reach python-dotenv (which passes them to os.environ and
# crashes with ValueError).
stripped = [line.replace("\x00", "") for line in original]
sanitized = _sanitize_env_lines(stripped)
if sanitized != original:
import tempfile
fd, tmp = tempfile.mkstemp(
@ -172,4 +242,103 @@ def load_hermes_dotenv(
_load_dotenv_with_fallback(project_env_path, override=not loaded)
loaded.append(project_env_path)
_apply_external_secret_sources(home_path)
return loaded
def _apply_external_secret_sources(home_path: Path) -> None:
"""Pull secrets from external sources (currently Bitwarden) into env.
Runs AFTER dotenv loads so .env values are visible (we use them to
locate the access token) but BEFORE the rest of Hermes reads
``os.environ`` for credentials. Any failure here is logged and
swallowed external secret sources must never block startup.
Idempotent within a process: subsequent calls for the same
``home_path`` are no-ops. ``load_hermes_dotenv()`` runs at import
time from several hot modules (cli.py, hermes_cli/main.py,
run_agent.py, trajectory_compressor.py, ...), so without this guard
the Bitwarden status line would print 3-5x per CLI startup. Use
``reset_secret_source_cache()`` if you need to force a re-pull
(tests, future ``hermes secrets bitwarden sync`` from a long-running
process).
"""
home_key = str(Path(home_path).resolve())
if home_key in _APPLIED_HOMES:
return
_APPLIED_HOMES.add(home_key)
try:
cfg = _load_secrets_config(home_path)
except Exception: # noqa: BLE001 — config errors must not block startup
return
bw_cfg = (cfg or {}).get("bitwarden") or {}
if not bw_cfg.get("enabled"):
return
try:
from agent.secret_sources.bitwarden import apply_bitwarden_secrets
except ImportError:
return
result = apply_bitwarden_secrets(
enabled=True,
access_token_env=bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN"),
project_id=bw_cfg.get("project_id", ""),
override_existing=bool(bw_cfg.get("override_existing", False)),
cache_ttl_seconds=float(bw_cfg.get("cache_ttl_seconds", 300)),
auto_install=bool(bw_cfg.get("auto_install", True)),
server_url=str(bw_cfg.get("server_url", "") or "").strip(),
home_path=home_path,
)
if result.applied:
# Re-run the ASCII sanitization pass: BSM values are user-supplied
# and might have the same copy-paste corruption as a manually
# edited .env (see #6843).
_sanitize_loaded_credentials()
# Remember where these came from so the setup / `hermes model`
# flows can label detected credentials with "(from Bitwarden)" —
# otherwise users see "credentials ✓" with no hint that the value
# came from BSM rather than .env.
for name in result.applied:
_SECRET_SOURCES[name] = "bitwarden"
print(
f" Bitwarden Secrets Manager: applied {len(result.applied)} "
f"secret{'s' if len(result.applied) != 1 else ''} "
f"({', '.join(sorted(result.applied))})",
file=sys.stderr,
)
if result.error:
print(
f" Bitwarden Secrets Manager: {result.error}",
file=sys.stderr,
)
for warn in result.warnings:
print(
f" Bitwarden Secrets Manager: {warn}",
file=sys.stderr,
)
def _load_secrets_config(home_path: Path) -> dict:
"""Read just the ``secrets:`` section out of config.yaml.
Imported lazily and isolated from the main config loader so a
malformed config can't take down dotenv loading entirely.
"""
config_path = home_path / "config.yaml"
if not config_path.exists():
return {}
try:
import yaml # type: ignore
except ImportError:
return {}
try:
with open(config_path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
except Exception: # noqa: BLE001
return {}
return data.get("secrets") or {}

View file

@ -21,6 +21,8 @@ from __future__ import annotations
import copy
from typing import Any, Dict, List, Optional
from hermes_cli.fallback_config import get_fallback_chain
# ---------------------------------------------------------------------------
# Helpers
@ -30,20 +32,11 @@ def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Return the normalized fallback chain as a list of dicts.
Accepts both the new list format (``fallback_providers``) and the legacy
single-dict format (``fallback_model``). The returned list is always a
fresh copy callers can mutate without touching the config dict.
``fallback_model`` format. When both are present, the effective chain is
merged with ``fallback_providers`` entries kept first. The returned list is
always a fresh copy callers can mutate without touching the config dict.
"""
chain = config.get("fallback_providers") or []
if isinstance(chain, list):
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
if result:
return result
legacy = config.get("fallback_model")
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
return [dict(legacy)]
if isinstance(legacy, list):
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
return []
return get_fallback_chain(config)
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:

View file

@ -0,0 +1,72 @@
"""Helpers for reading the effective fallback provider chain from config."""
from __future__ import annotations
from typing import Any
def _normalized_base_url(value: Any) -> str:
if not isinstance(value, str):
return ""
return value.strip().rstrip("/")
def _iter_fallback_entries(raw: Any) -> list[dict[str, Any]]:
if isinstance(raw, dict):
candidates = [raw]
elif isinstance(raw, list):
candidates = raw
else:
return []
entries: list[dict[str, Any]] = []
for entry in candidates:
if not isinstance(entry, dict):
continue
provider = str(entry.get("provider") or "").strip()
model = str(entry.get("model") or "").strip()
if not provider or not model:
continue
normalized = dict(entry)
normalized["provider"] = provider
normalized["model"] = model
base_url = _normalized_base_url(entry.get("base_url"))
if base_url:
normalized["base_url"] = base_url
entries.append(normalized)
return entries
def _entry_identity(entry: dict[str, Any]) -> tuple[str, str, str]:
return (
str(entry.get("provider") or "").strip().lower(),
str(entry.get("model") or "").strip().lower(),
_normalized_base_url(entry.get("base_url")).lower(),
)
def get_fallback_chain(config: dict[str, Any] | None) -> list[dict[str, Any]]:
"""Return the effective fallback chain merged across old and new config keys.
``fallback_providers`` remains the primary source of truth and keeps its
order. Legacy ``fallback_model`` entries are appended afterwards unless
they target the same provider/model/base_url route as an earlier entry.
The returned list always contains fresh dict copies.
"""
config = config or {}
chain: list[dict[str, Any]] = []
seen: set[tuple[str, str, str]] = set()
for key in ("fallback_providers", "fallback_model"):
for entry in _iter_fallback_entries(config.get(key)):
identity = _entry_identity(entry)
if identity in seen:
continue
seen.add(identity)
chain.append(entry)
return chain

View file

@ -981,6 +981,18 @@ def get_gateway_runtime_snapshot(system: bool = False) -> GatewayRuntimeSnapshot
from hermes_constants import is_container
if is_linux() and is_container():
# Phase 4: report s6 supervision when running under our /init.
# Other container runtimes (or containers built before Phase 2)
# still get the original "docker (foreground)" label.
try:
from hermes_cli.service_manager import detect_service_manager
if detect_service_manager() == "s6":
return GatewayRuntimeSnapshot(
manager="s6 (container supervisor)",
gateway_pids=gateway_pids,
)
except Exception:
pass # Fall through to the legacy label on any detection error.
return GatewayRuntimeSnapshot(
manager="docker (foreground)",
gateway_pids=gateway_pids,
@ -1202,7 +1214,17 @@ def _systemd_operational(system: bool = False) -> bool:
def _container_systemd_operational() -> bool:
"""Return True when a container exposes working user or system systemd."""
"""Return True when a container exposes working user or system systemd.
This is NOT our Hermes Docker image that one runs s6-overlay as
PID 1 (since Phase 2 of the s6-overlay supervision plan) and is
detected via ``service_manager.detect_service_manager() == "s6"``.
This function handles the "container managed by something else"
case: systemd-nspawn, certain k8s pods, containers built FROM
systemd-bearing distros where the user has wired systemd as their
init. In those environments systemctl behaves identically to the
host case, so we fall through to the normal systemd code paths.
"""
if _systemd_operational(system=False):
return True
if _systemd_operational(system=True):
@ -3327,34 +3349,9 @@ _PLATFORMS = [
"help": "For DMs, this is your user ID. You can set it later by typing /set-home in chat."},
],
},
{
"key": "discord",
"label": "Discord",
"emoji": "💬",
"token_var": "DISCORD_BOT_TOKEN",
"setup_instructions": [
"1. Go to https://discord.com/developers/applications → New Application",
"2. Go to Bot → Reset Token → copy the bot token",
"3. Enable: Bot → Privileged Gateway Intents → Message Content Intent",
"4. Invite the bot to your server:",
" OAuth2 → URL Generator → check BOTH scopes:",
" - bot",
" - applications.commands (required for slash commands!)",
" Bot Permissions: Send Messages, Read Message History, Attach Files",
" Copy the URL and open it in your browser to invite.",
"5. Get your user ID: enable Developer Mode in Discord settings,",
" then right-click your name → Copy ID",
],
"vars": [
{"name": "DISCORD_BOT_TOKEN", "prompt": "Bot token", "password": True,
"help": "Paste the token from step 2 above."},
{"name": "DISCORD_ALLOWED_USERS", "prompt": "Allowed user IDs or usernames (comma-separated)", "password": False,
"is_allowlist": True,
"help": "Paste your user ID from step 5 above."},
{"name": "DISCORD_HOME_CHANNEL", "prompt": "Home channel ID (for cron/notification delivery, or empty to set later with /set-home)", "password": False,
"help": "Right-click a channel → Copy Channel ID (requires Developer Mode)."},
],
},
# Discord moved to plugins/platforms/discord/ — its setup metadata is
# discovered dynamically via _all_platforms() from the platform registry
# entry registered by plugins/platforms/discord/adapter.py::register().
{
"key": "slack",
"label": "Slack",
@ -3762,7 +3759,12 @@ def _platform_status(platform: dict) -> str:
configured = bool(entry.is_connected(synthetic))
except Exception:
configured = False
if not configured:
else:
# No is_connected hook — fall back to check_fn as a coarse
# "are deps present" gate. Don't fall back when is_connected
# is defined and returned False; that would let "SDK is
# installed" override "no token configured" and incorrectly
# report the platform as ready.
try:
configured = bool(entry.check_fn())
except Exception:
@ -4018,15 +4020,11 @@ def _setup_dingtalk():
client_id, client_secret = result
save_env_value("DINGTALK_CLIENT_ID", client_id)
save_env_value("DINGTALK_CLIENT_SECRET", client_secret)
save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")
print()
print_success(f"{emoji} {label} configured via QR scan!")
else:
# ── Manual entry ──
_setup_standard_platform(dingtalk_platform)
# Also enable allow-all by default for convenience
if get_env_value("DINGTALK_CLIENT_ID"):
save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")
def _setup_wecom():
@ -4747,10 +4745,14 @@ def _builtin_setup_fn(key: str):
from hermes_cli import setup as _s
return {
"telegram": _s._setup_telegram,
"discord": _s._setup_discord,
# discord moved into the plugin: setup_fn is registered by
# plugins/platforms/discord/adapter.py::register() and dispatched
# via the plugin path in _configure_platform().
"slack": _s._setup_slack,
"matrix": _s._setup_matrix,
"mattermost": _s._setup_mattermost,
# mattermost moved into the plugin: setup_fn is registered by
# plugins/platforms/mattermost/adapter.py::register() and dispatched
# via the plugin path in _configure_platform().
"bluebubbles": _s._setup_bluebubbles,
"webhooks": _s._setup_webhooks,
"signal": _setup_signal,
@ -5025,6 +5027,108 @@ def gateway_setup():
# Main Command Handler
# =============================================================================
def _dispatch_via_service_manager_if_s6(
action: str, profile: str | None = None,
) -> bool:
"""If we're in a container with s6, dispatch gateway lifecycle via s6.
Returns True iff dispatched (caller should ``return``); False
otherwise caller continues with the host-side code path.
``action`` is one of ``start`` / ``stop`` / ``restart``. The
profile defaults to the current one (resolved via ``_profile_arg``).
The s6 service slot was created either by the Phase 4 profile-create
hook or by the container-boot reconciler (cont-init.d/02-). If it
doesn't exist or s6 returns an error, the named errors from
:mod:`hermes_cli.service_manager` are caught and surfaced as
actionable CLI messages (no raw ``CalledProcessError`` traceback).
"""
from hermes_cli.service_manager import (
GatewayNotRegisteredError,
S6CommandError,
detect_service_manager,
get_service_manager,
)
if detect_service_manager() != "s6":
return False
if profile is None:
# _profile_suffix() returns the bare profile name for
# HERMES_HOME=<root>/profiles/<name>, "" for the default root,
# or a hash for unrelated paths. Map "" → "default" so the
# default-profile gateway is reachable as gateway-default.
profile = _profile_suffix() or "default"
mgr = get_service_manager()
service_name = f"gateway-{profile}"
try:
if action == "start":
mgr.start(service_name)
elif action == "stop":
mgr.stop(service_name)
elif action == "restart":
mgr.restart(service_name)
else:
return False
except GatewayNotRegisteredError as exc:
print(f"{exc}")
sys.exit(1)
except S6CommandError as exc:
print(f"{exc}")
sys.exit(1)
return True
def _dispatch_all_via_service_manager_if_s6(action: str) -> bool:
"""Inside a container with s6, dispatch ``--all`` lifecycle to every
registered profile gateway.
Returns True iff dispatched (caller should ``return``); False
otherwise caller continues with the host-side code path.
Without this, ``hermes gateway stop --all`` and ``... restart --all``
fall through to ``kill_gateway_processes(all_profiles=True)``, which
just ``pkill``s every gateway process. s6-supervise observes the
crash and restarts each one ~1s later so ``--all`` ends up
*kicking* every gateway instead of *stopping* it. By iterating
``list_profile_gateways()`` and sending the lifecycle command
through the service manager we get the intended semantics (s6's
``want up``/``want down`` flips correctly so supervise stays down
after a stop).
``action`` is one of ``stop`` / ``restart`` (``start --all`` isn't
a supported CLI surface).
"""
from hermes_cli.service_manager import (
detect_service_manager,
get_service_manager,
)
if detect_service_manager() != "s6":
return False
if action not in ("stop", "restart"):
return False
mgr = get_service_manager()
profiles = mgr.list_profile_gateways()
if not profiles:
print("✗ No profile gateways registered under s6")
return True
fn = mgr.stop if action == "stop" else mgr.restart
errors: list[tuple[str, Exception]] = []
for profile in profiles:
service_name = f"gateway-{profile}"
try:
fn(service_name)
except Exception as exc: # noqa: BLE001 — report and continue
errors.append((profile, exc))
succeeded = len(profiles) - len(errors)
verb = "stopped" if action == "stop" else "restarted"
if succeeded:
print(f"{verb.capitalize()} {succeeded} profile gateway(s) under s6")
for profile, exc in errors:
print(f"✗ Could not {action} gateway-{profile}: {exc}")
return True
def gateway_command(args):
"""Handle gateway subcommands."""
try:
@ -5109,6 +5213,21 @@ def _gateway_command_inner(args):
print(" nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 & # background")
sys.exit(1)
elif is_container():
# Phase 4: inside a container with s6 the gateway service is
# auto-registered when the profile is created (and reconciled
# at every container boot). `install` is therefore informational.
from hermes_cli.service_manager import detect_service_manager
if detect_service_manager() == "s6":
print("Per-profile gateways are auto-registered when you create a profile.")
print()
print(" hermes profile create <name> # creates the s6 service slot")
print(" hermes -p <name> gateway start # bring it up via s6")
print(" hermes status # see currently-supervised gateways")
return
# Fallback for pre-s6 containers or other container runtimes
# we haven't taught about supervision (Podman without our
# /init, k8s plain runs, etc.) — the historical guidance still
# applies.
print("Service installation is not needed inside a Docker container.")
print("The container runtime is your service manager — use Docker restart policies instead:")
print()
@ -5139,6 +5258,13 @@ def _gateway_command_inner(args):
from hermes_cli import gateway_windows
gateway_windows.uninstall()
elif is_container():
from hermes_cli.service_manager import detect_service_manager
if detect_service_manager() == "s6":
print("Per-profile gateways are auto-unregistered when you delete the profile.")
print()
print(" hermes profile delete <name> # tears down the s6 service slot")
print(" hermes -p <name> gateway stop # stop without deleting the profile")
return
print("Service uninstall is not applicable inside a Docker container.")
print("To stop the gateway, stop or remove the container:")
print()
@ -5153,6 +5279,14 @@ def _gateway_command_inner(args):
system = getattr(args, 'system', False)
start_all = getattr(args, 'all', False)
# Phase 4: inside a container with s6, dispatch via the service
# manager instead of falling through to systemd/launchd/windows.
# `--all` isn't meaningful here (each profile has its own service
# slot — start them individually via `hermes -p <name> gateway
# start`), so just bring up the current profile's slot.
if not start_all and _dispatch_via_service_manager_if_s6("start"):
return
if start_all:
# Kill all stale gateway processes across all profiles before starting
killed = kill_gateway_processes(all_profiles=True)
@ -5182,6 +5316,11 @@ def _gateway_command_inner(args):
print("To enable systemd: add systemd=true to /etc/wsl.conf and run 'wsl --shutdown' from PowerShell.")
sys.exit(1)
elif is_container():
# Reached only when s6 ISN'T running (the early dispatch
# above handles the s6 case). Pre-s6 containers or other
# container runtimes that don't ship our /init get the
# historical guidance: the gateway is the container's main
# process, so use docker lifecycle commands.
print("Service start is not applicable inside a Docker container.")
print("The gateway runs as the container's main process.")
print()
@ -5198,6 +5337,15 @@ def _gateway_command_inner(args):
stop_all = getattr(args, 'all', False)
system = getattr(args, 'system', False)
# Phase 4: inside a container with s6, dispatch via the service
# manager. ``--all`` iterates every registered profile gateway
# through s6 (otherwise it would fall through to ``pkill``,
# which s6-supervise observes as a crash and immediately restarts).
if stop_all and _dispatch_all_via_service_manager_if_s6("stop"):
return
if not stop_all and _dispatch_via_service_manager_if_s6("stop"):
return
if stop_all:
# --all: kill every gateway process on the machine
service_available = False
@ -5267,6 +5415,16 @@ def _gateway_command_inner(args):
restart_all = getattr(args, 'all', False)
service_configured = False
# Phase 4: inside a container with s6, dispatch via the service
# manager (s6-svc -t restarts the supervised process). ``--all``
# iterates every registered profile gateway through s6; without
# this it would fall through to ``pkill``, which s6-supervise
# would observe as a crash and immediately restart anyway.
if restart_all and _dispatch_all_via_service_manager_if_s6("restart"):
return
if not restart_all and _dispatch_via_service_manager_if_s6("restart"):
return
if restart_all:
# --all: stop every gateway process across all profiles, then start fresh
service_stopped = False

View file

@ -365,7 +365,9 @@ def _write_task_script() -> Path:
content = _build_gateway_cmd_script(python_path, working_dir, hermes_home, profile_arg)
script_path = get_task_script_path()
script_path.write_text(content, encoding="utf-8", newline="")
tmp = script_path.with_suffix(".tmp")
tmp.write_text(content, encoding="utf-8", newline="")
tmp.replace(script_path)
return script_path
@ -436,7 +438,9 @@ def _install_startup_entry(script_path: Path) -> Path:
"""Write the Startup-folder fallback launcher. Returns its path."""
entry = get_startup_entry_path()
entry.parent.mkdir(parents=True, exist_ok=True)
entry.write_text(_build_startup_launcher(script_path), encoding="utf-8", newline="")
tmp = entry.with_suffix(".tmp")
tmp.write_text(_build_startup_launcher(script_path), encoding="utf-8", newline="")
tmp.replace(entry)
return entry

View file

@ -550,6 +550,39 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
p_unblock = sub.add_parser("unblock", help="Return one or more blocked/scheduled tasks to ready")
p_unblock.add_argument("task_ids", nargs="+")
p_promote = sub.add_parser(
"promote",
help="Manually move one or more todo/blocked tasks to ready (recovery path)",
)
p_promote.add_argument("task_id")
p_promote.add_argument(
"reason",
nargs="*",
help="Audit-trail reason (recorded on the task_events row)",
)
p_promote.add_argument(
"--ids",
nargs="+",
default=None,
help="Additional task ids to promote with the same reason (bulk mode)",
)
p_promote.add_argument(
"--force",
action="store_true",
help="Promote even if parent dependencies are not yet done/archived",
)
p_promote.add_argument(
"--dry-run",
action="store_true",
help="Validate the promotion without mutating state",
)
p_promote.add_argument(
"--json",
dest="json",
action="store_true",
help="Emit machine-readable JSON result",
)
p_archive = sub.add_parser("archive", help="Archive one or more tasks")
p_archive.add_argument("task_ids", nargs="*",
help="Task ids to archive (default mode)")
@ -899,6 +932,7 @@ def kanban_command(args: argparse.Namespace) -> int:
"block": _cmd_block,
"schedule": _cmd_schedule,
"unblock": _cmd_unblock,
"promote": _cmd_promote,
"archive": _cmd_archive,
"tail": _cmd_tail,
"dispatch": _cmd_dispatch,
@ -1955,6 +1989,57 @@ def _cmd_unblock(args: argparse.Namespace) -> int:
return 0 if not failed else 1
def _cmd_promote(args: argparse.Namespace) -> int:
reason = " ".join(args.reason).strip() if args.reason else None
author = _profile_author()
as_json = getattr(args, "json", False)
extra_ids = list(getattr(args, "ids", None) or [])
# Dedupe while preserving order; positional task_id always first.
ids: list[str] = []
seen: set[str] = set()
for tid in [args.task_id, *extra_ids]:
if tid not in seen:
ids.append(tid)
seen.add(tid)
results: list[dict[str, object]] = []
with kb.connect() as conn:
for tid in ids:
ok, err = kb.promote_task(
conn,
tid,
actor=author,
reason=reason,
force=bool(args.force),
dry_run=bool(args.dry_run),
)
results.append({
"task_id": tid,
"promoted": ok,
"dry_run": bool(args.dry_run),
"forced": bool(args.force),
"reason": reason,
"error": err,
})
failed = [r for r in results if not r["promoted"]]
if as_json:
# Single-id stays a flat object for back-compat; bulk emits a list.
payload: object = results[0] if len(results) == 1 else results
print(json.dumps(payload, indent=2, ensure_ascii=False))
return 0 if not failed else 1
tag = " (dry)" if args.dry_run else ""
label = "Would promote" if args.dry_run else "Promoted"
for r in results:
if r["promoted"]:
suffix = f": {reason}" if reason else ""
print(f"{label} {r['task_id']} -> ready{tag}{suffix}")
else:
print(f"cannot promote {r['task_id']}: {r['error']}", file=sys.stderr)
return 0 if not failed else 1
def _cmd_archive(args: argparse.Namespace) -> int:
ids = list(args.task_ids or [])
purge_ids = list(getattr(args, "purge_ids", None) or [])

View file

@ -75,6 +75,7 @@ import json
import os
import re
import secrets
import shutil
import sqlite3
import subprocess
import sys
@ -82,6 +83,7 @@ import threading
import logging
import time
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Iterable, Optional
@ -1005,6 +1007,131 @@ def _validate_sqlite_header(path: Path) -> None:
)
class KanbanDbCorruptError(RuntimeError):
"""Raised when an existing kanban DB file fails integrity checks.
Fail-closed guard against silent recreation of a corrupt board file,
which would otherwise destroy the user's tasks. Carries both the
original path and the timestamped backup we made before refusing.
"""
def __init__(self, db_path: Path, backup_path: Optional[Path], reason: str):
self.db_path = db_path
self.backup_path = backup_path
self.reason = reason
backup_str = str(backup_path) if backup_path is not None else "<backup failed>"
super().__init__(
f"Refusing to open corrupt kanban DB at {db_path}: {reason}. "
f"Original preserved; backup at {backup_str}."
)
def _backup_corrupt_db(path: Path) -> Optional[Path]:
"""Copy a corrupt DB (and its WAL/SHM sidecars) to a timestamped backup.
Returns the backup path of the main DB file, or ``None`` if the copy
itself failed (the caller still raises loudly in that case).
Writes are confined to the original DB's parent directory. The
backup basename is derived purely from ``path.name``, never from
caller-supplied directory segments no traversal is possible.
"""
# Resolve once and pin the parent so subsequent path operations cannot
# escape it. ``Path.resolve()`` collapses any ``..`` segments and
# symlinks, and we only ever write inside ``parent``.
resolved = path.resolve()
parent = resolved.parent
base_name = resolved.name # basename only
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
candidate = parent / f"{base_name}.corrupt.{stamp}.bak"
# Defensive: candidate must still be inside parent after construction.
# f-string interpolation of ``base_name`` cannot escape ``parent``
# because ``base_name`` is itself a resolved basename, but assert it
# anyway so static analyzers can see the containment guarantee.
if candidate.parent != parent:
return None
counter = 0
while candidate.exists():
counter += 1
candidate = parent / f"{base_name}.corrupt.{stamp}.{counter}.bak"
if candidate.parent != parent:
return None
try:
shutil.copy2(resolved, candidate)
except OSError:
return None
for suffix in ("-wal", "-shm"):
sidecar = parent / (base_name + suffix)
if sidecar.parent != parent or not sidecar.exists():
continue
try:
sidecar_backup = parent / (candidate.name + suffix)
if sidecar_backup.parent != parent:
continue
shutil.copy2(sidecar, sidecar_backup)
except OSError:
pass
return candidate
def _guard_existing_db_is_healthy(path: Path) -> None:
"""Run ``PRAGMA integrity_check`` on an existing non-empty DB file.
Opens the probe in read/write mode so SQLite can recover or
checkpoint a healthy WAL/hot-journal DB before we declare it
corrupt. If the file is malformed, copy it (and any WAL/SHM
sidecars) to a timestamped backup and raise
:class:`KanbanDbCorruptError` so callers cannot silently recreate
the schema on top of a damaged DB.
Transient lock/busy errors (``sqlite3.OperationalError``) are NOT
treated as corruption; they propagate raw so the caller sees a
normal lock failure and no spurious ``.corrupt`` backup is made.
No-op for missing files, zero-byte files (treated as fresh), and
paths already proven healthy this process (cache hit).
Path-trust note: ``path`` arrives via :func:`connect`, which itself
resolves it from an explicit ``db_path`` argument, the
:func:`kanban_db_path` env-var chain, or the kanban-home default
all sources Hermes treats as user-controlled-but-trusted on the
user's own machine. We additionally resolve the path here and
confine all filesystem writes to its parent directory so any
accidental ``..`` segments are collapsed before any I/O happens.
"""
# Resolve before any I/O. ``Path.resolve()`` normalizes ``..`` and
# symlinks, giving us a canonical path whose parent dir we can pin.
try:
resolved = path.resolve()
except OSError:
return
try:
if not resolved.exists() or resolved.stat().st_size == 0:
return
except OSError:
return
if str(resolved) in _INITIALIZED_PATHS:
return
reason: Optional[str] = None
try:
probe = sqlite3.connect(str(resolved), timeout=5, isolation_level=None)
try:
row = probe.execute("PRAGMA integrity_check").fetchone()
finally:
probe.close()
if not row or (row[0] or "").lower() != "ok":
reason = f"integrity_check returned {row[0] if row else '<no row>'!r}"
except sqlite3.OperationalError:
# Lock contention, busy, transient IO — not corruption. Let it propagate.
raise
except sqlite3.DatabaseError as exc:
reason = f"sqlite refused to open file: {exc}"
if reason is None:
return
backup = _backup_corrupt_db(resolved)
raise KanbanDbCorruptError(resolved, backup, reason)
def connect(
db_path: Optional[Path] = None,
*,
@ -1033,7 +1160,13 @@ def connect(
else:
path = kanban_db_path(board=board)
path.parent.mkdir(parents=True, exist_ok=True)
# Cheap byte-level check first — catches the #29507 TLS-overwrite shape
# and other invalid-header cases without opening a sqlite connection.
_validate_sqlite_header(path)
# Full integrity probe — catches corruption past the header (malformed
# pages, broken internal metadata). Cached per-path after first success
# via _INITIALIZED_PATHS so it only runs once per process per path.
_guard_existing_db_is_healthy(path)
resolved = str(path.resolve())
conn = sqlite3.connect(str(path), isolation_level=None, timeout=30)
try:
@ -1518,8 +1651,15 @@ def create_task(
now = int(time.time())
# Resolve workspace_path from board-level default_workdir when the
# caller did not specify one explicitly.
if workspace_path is None:
# caller did not specify one explicitly. Board defaults represent
# persistent project checkouts, so only persistent workspace kinds may
# inherit them. Scratch workspaces are auto-deleted on completion and
# must stay under the per-board scratch root created by
# ``resolve_workspace``; inheriting ``default_workdir`` for a scratch
# task would point cleanup at the user's source tree (#28818). The
# containment guard in ``_cleanup_workspace`` is the safety rail, but
# we also stop the bad state from being created in the first place.
if workspace_path is None and workspace_kind in {"dir", "worktree"}:
board_slug = board if board else get_current_board()
board_meta = read_board_metadata(board_slug)
board_default = board_meta.get("default_workdir")
@ -2904,6 +3044,81 @@ def complete_task(
# Workspace / tmux cleanup
# ---------------------------------------------------------------------------
def _is_managed_scratch_path(p: Path) -> bool:
"""Return True iff *p* is a strict descendant of a kanban-managed scratch root.
A managed root is exclusively a ``workspaces/`` directory never the
broader kanban home, a board root, or sibling subtrees like ``logs/`` or
``boards/<slug>/`` itself. Allowed roots:
* ``HERMES_KANBAN_WORKSPACES_ROOT`` when set (worker-side override
injected by the dispatcher).
* ``<kanban_home>/kanban/workspaces`` legacy default-board scratch root.
* ``<kanban_home>/kanban/boards/<slug>/workspaces`` for each board slug
that currently exists on disk.
The check requires strict descendancy: a path equal to one of these
roots is NOT managed (deleting the workspaces root would wipe every
task's scratch dir at once), and a path that resolves to ``<kanban_home>
/kanban`` itself, ``<kanban_home>/kanban/logs``, or
``<kanban_home>/kanban/boards/<slug>`` is rejected because those
subtrees hold Hermes' own DB, metadata, and logs, not task workspaces.
Used by :func:`_cleanup_workspace` to refuse to ``shutil.rmtree`` paths
outside Hermes-managed storage. A board ``default_workdir`` pointing at a
real source tree can otherwise pair with ``workspace_kind='scratch'`` and
cause task completion to delete user data (#28818).
"""
try:
p_abs = p.resolve(strict=False)
except OSError:
return False
roots: list[Path] = []
override = os.environ.get("HERMES_KANBAN_WORKSPACES_ROOT", "").strip()
if override:
try:
roots.append(Path(override).expanduser().resolve(strict=False))
except OSError:
pass
try:
home = kanban_home()
except OSError:
home = None
if home is not None:
try:
roots.append((home / "kanban" / "workspaces").resolve(strict=False))
except OSError:
pass
try:
boards_parent = (home / "kanban" / "boards").resolve(strict=False)
except OSError:
boards_parent = None
if boards_parent is not None:
try:
entries = list(boards_parent.iterdir())
except OSError:
entries = []
for entry in entries:
try:
if not entry.is_dir():
continue
except OSError:
continue
try:
roots.append((entry / "workspaces").resolve(strict=False))
except OSError:
continue
for root in roots:
if p_abs == root:
continue
try:
if p_abs.is_relative_to(root):
return True
except ValueError:
continue
return False
def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None:
"""Remove a task's scratch workspace dir and kill its stale tmux session.
@ -2926,8 +3141,21 @@ def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None:
import shutil
wp = Path(path)
if wp.is_dir():
shutil.rmtree(wp, ignore_errors=True)
_log.debug("Removed scratch workspace: %s", wp)
# Containment guard (#28818): a board's ``default_workdir`` can
# pair ``workspace_kind='scratch'`` with a user-supplied path
# pointing at a real source tree. Without this check, task
# completion would unconditionally ``shutil.rmtree`` that path
# and silently delete the user's source data.
if _is_managed_scratch_path(wp):
shutil.rmtree(wp, ignore_errors=True)
_log.debug("Removed scratch workspace: %s", wp)
else:
_log.warning(
"Refusing to remove out-of-scratch workspace for task %s: %s "
"(workspace_kind='scratch' but path is outside any "
"kanban-managed workspaces root)",
task_id, wp,
)
# Also kill the tmux session for the worker that owned this task,
# if the tmux session is now dead (worker process exited).
_cleanup_worker_tmux(conn, task_id)
@ -2961,6 +3189,93 @@ def _cleanup_worker_tmux(conn: sqlite3.Connection, task_id: str) -> None:
pass # best-effort — never block completion
# ---------------------------------------------------------------------------
# First-use tip for scratch workspaces
# ---------------------------------------------------------------------------
#
# Scratch workspaces are intentionally ephemeral — ``_cleanup_workspace``
# removes them as soon as ``complete_task`` runs. New users often don't
# realize that and lose worker output (community report, May 2026). The
# behavior is right; the lack of warning is the bug.
#
# On the FIRST scratch workspace materialization across the whole install
# we:
# 1. Log a warning line on the dispatcher logger.
# 2. Append a ``tip_scratch_workspace`` event on the task so it's visible
# via ``hermes kanban show <id>`` and the dashboard.
# 3. Touch a sentinel file under ``kanban_home() / '.scratch_tip_shown'``
# so we don't repeat the tip — once you know, you know.
#
# Scope is per-install, not per-board: a user creating a second board
# already learned the lesson on board #1.
_SCRATCH_TIP_SENTINEL_NAME = ".scratch_tip_shown"
_SCRATCH_TIP_MESSAGE = (
"scratch workspaces are ephemeral — they're deleted when the task "
"completes. Use --workspace worktree: (git worktree) or "
"--workspace dir:/abs/path (existing dir) to preserve worker output."
)
def _scratch_tip_sentinel_path() -> Path:
"""Path to the per-install scratch-workspace-tip sentinel file."""
return kanban_home() / _SCRATCH_TIP_SENTINEL_NAME
def _scratch_tip_shown() -> bool:
"""True iff the scratch-workspace tip has already been emitted on this
install. Best-effort any error means we re-emit, which is the safer
failure mode for a help message."""
try:
return _scratch_tip_sentinel_path().exists()
except OSError:
return False
def _mark_scratch_tip_shown() -> None:
"""Touch the sentinel so future scratch workspaces stay silent.
Best-effort: a failure here just means the tip might appear once more,
which is preferable to crashing dispatch over a help message.
"""
try:
path = _scratch_tip_sentinel_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.touch(exist_ok=True)
except OSError:
pass
def _maybe_emit_scratch_tip(
conn: sqlite3.Connection,
task_id: str,
workspace_kind: Optional[str],
) -> None:
"""Emit the first-use scratch-workspace tip exactly once per install.
Called from the dispatcher right after a scratch workspace is
materialized. No-op for ``worktree`` / ``dir`` workspaces (they're
preserved by design) and no-op after the sentinel exists.
"""
if (workspace_kind or "scratch") != "scratch":
return
if _scratch_tip_shown():
return
try:
_log.warning("kanban: %s (task %s)", _SCRATCH_TIP_MESSAGE, task_id)
with write_txn(conn):
_append_event(
conn, task_id, "tip_scratch_workspace",
{"message": _SCRATCH_TIP_MESSAGE},
)
except Exception:
# Best-effort — never block the spawn loop over a help message.
pass
finally:
_mark_scratch_tip_shown()
def edit_completed_task_result(
conn: sqlite3.Connection,
task_id: str,
@ -3083,6 +3398,77 @@ def block_task(
return True
def promote_task(
conn: sqlite3.Connection,
task_id: str,
*,
actor: str,
reason: Optional[str] = None,
force: bool = False,
dry_run: bool = False,
) -> tuple[bool, Optional[str]]:
"""Manually promote a `todo` or `blocked` task to `ready`.
Mirrors the automatic promotion done by ``recompute_ready`` but
drives it from a deliberate operator action with an audit-trail
entry. Refuses to promote if any parent dep is not in a terminal
state (`done`/`archived`) unless ``force=True``. Does NOT change
assignee or claim state. Returns ``(True, None)`` on success and
``(False, reason)`` if refused. ``dry_run=True`` validates the
promotion would succeed without mutating state.
"""
row = conn.execute(
"SELECT status FROM tasks WHERE id = ?", (task_id,)
).fetchone()
if row is None:
return False, f"task {task_id} not found"
cur_status = row["status"]
if cur_status not in ("todo", "blocked"):
return False, (
f"task {task_id} is {cur_status!r}; promote only applies to "
f"'todo' or 'blocked'"
)
if not force:
parents = conn.execute(
"SELECT t.id, t.status FROM tasks t "
"JOIN task_links l ON l.parent_id = t.id "
"WHERE l.child_id = ?",
(task_id,),
).fetchall()
unsatisfied = [
p["id"] for p in parents
if p["status"] not in ("done", "archived")
]
if unsatisfied:
return False, (
f"unsatisfied parent dependencies: "
f"{', '.join(unsatisfied)} (use --force to override)"
)
if dry_run:
return True, None
with write_txn(conn):
upd = conn.execute(
"UPDATE tasks SET status = 'ready' "
"WHERE id = ? AND status IN ('todo', 'blocked')",
(task_id,),
)
if upd.rowcount != 1:
return False, f"task {task_id} status changed during promotion"
_append_event(
conn,
task_id,
"promoted_manual",
{"actor": actor, "reason": reason, "forced": force},
)
return True, None
def unblock_task(conn: sqlite3.Connection, task_id: str) -> bool:
"""Transition ``blocked``/``scheduled`` -> ready or todo.
@ -4892,6 +5278,7 @@ def dispatch_once(
continue
# Persist the resolved workspace path so the worker can cd there.
set_workspace_path(conn, claimed.id, str(workspace))
_maybe_emit_scratch_tip(conn, claimed.id, claimed.workspace_kind)
_spawn = spawn_fn if spawn_fn is not None else _default_spawn
try:
# Back-compat: older spawn_fn signatures accept only
@ -4970,6 +5357,7 @@ def dispatch_once(
continue
# Persist the resolved workspace path so the worker can cd there.
set_workspace_path(conn, claimed.id, str(workspace))
_maybe_emit_scratch_tip(conn, claimed.id, claimed.workspace_kind)
# Force-load sdlc-review skill for review agents. The
# _default_spawn function already auto-loads kanban-worker, and
# appends task.skills via --skills. Setting task.skills here

File diff suppressed because it is too large Load diff

776
hermes_cli/mcp_catalog.py Normal file
View file

@ -0,0 +1,776 @@
"""MCP catalog — curated, Nous-approved MCP servers shipped with the repo.
Mirrors the optional-skills/ pattern: each catalog entry lives under
``optional-mcps/<name>/manifest.yaml`` and ships disabled. Users discover
entries via ``hermes mcp catalog`` or the interactive ``hermes mcp picker``,
and install them with ``hermes mcp install <name>`` (or by toggling in the
picker, which flows them through any required env/OAuth setup).
Catalog policy:
- Entries are added only by merging a PR into hermes-agent. Presence in the
``optional-mcps/`` directory = Nous approval. No community tier, no trust
signals beyond "it's in the catalog".
- Manifests pin transport details (commands, args, refs). MCPs are never
auto-updated; users explicitly re-run ``hermes mcp install <name>`` to
pull a new manifest version after a repo update.
- Secrets prompted at install time go to ``~/.hermes/.env`` (the
.env-is-for-secrets rule). Non-secret env vars also go to .env to keep
one credential store.
See website/docs/user-guide/mcp-catalog.md for user docs.
See references/mcp-catalog.md (this repo's skill) for the manifest schema.
"""
from __future__ import annotations
import os
import re
import shutil
import subprocess
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
import yaml
from hermes_constants import get_hermes_home, get_optional_mcps_dir
from hermes_cli.colors import Colors, color
from hermes_cli.config import (
load_config,
save_config,
get_env_value,
save_env_value,
)
from hermes_cli.cli_output import prompt as _prompt_input, prompt_yes_no
_MANIFEST_VERSION = 1
# Substituted at install time inside `transport.command` / `transport.args`.
_INSTALL_DIR_VAR = "${INSTALL_DIR}"
# ─── Data classes ────────────────────────────────────────────────────────────
@dataclass
class EnvVarSpec:
name: str
prompt: str
required: bool = True
secret: bool = True
default: str = ""
@dataclass
class AuthSpec:
type: str # "api_key" | "oauth" | "none"
env: List[EnvVarSpec] = field(default_factory=list)
# OAuth-specific (case 2: third-party provider like Google)
provider: Optional[str] = None
scopes: List[str] = field(default_factory=list)
env_var: Optional[str] = None
@dataclass
class TransportSpec:
type: str # "stdio" | "http"
command: Optional[str] = None
args: List[str] = field(default_factory=list)
url: Optional[str] = None
version: Optional[str] = None # informational, pinned
@dataclass
class InstallSpec:
"""Optional bootstrap step (git clone + dep install).
Omit for one-shot launchable servers (npx, uvx).
"""
type: str # "git"
url: str
ref: str # commit/tag/branch — pinned, never floats
bootstrap: List[str] = field(default_factory=list)
@dataclass
class ToolsSpec:
"""Manifest-side tool-selection hints.
Drives the pre-checked state of the install-time tool checklist, and acts
as the fallback selection when probe fails. See install_entry() flow.
"""
# If declared, these tool names are pre-checked in the checklist (or
# applied directly when probe fails). If None, all probed tools are
# pre-checked (or no filter is written when probe fails).
default_enabled: Optional[List[str]] = None
@dataclass
class CatalogEntry:
name: str
description: str
source: str
transport: TransportSpec
auth: AuthSpec
tools: ToolsSpec = field(default_factory=ToolsSpec)
install: Optional[InstallSpec] = None
post_install: str = ""
manifest_path: Path = field(default_factory=Path)
# ─── Manifest loader ─────────────────────────────────────────────────────────
class CatalogError(Exception):
"""Manifest parse/validation failure or install error."""
def _catalog_root() -> Path:
"""Return the optional-mcps/ directory shipped with this Hermes install."""
# Prefer the env-var override / packaged location; fall back to the repo's
# optional-mcps/ next to the package (source checkout).
return get_optional_mcps_dir(Path(__file__).parent.parent / "optional-mcps")
def _parse_env_spec(raw: Any) -> EnvVarSpec:
if not isinstance(raw, dict):
raise CatalogError(f"env entry must be a mapping, got {type(raw).__name__}")
name = raw.get("name") or ""
if not name or not re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", name):
raise CatalogError(f"invalid env var name: {name!r}")
return EnvVarSpec(
name=name,
prompt=raw.get("prompt") or name,
required=bool(raw.get("required", True)),
secret=bool(raw.get("secret", True)),
default=str(raw.get("default") or ""),
)
def _parse_manifest(path: Path) -> CatalogEntry:
"""Read and validate a manifest.yaml. Raise CatalogError on any problem."""
try:
with open(path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
except Exception as exc:
raise CatalogError(f"failed to read {path}: {exc}") from exc
if not isinstance(data, dict):
raise CatalogError(f"{path}: manifest must be a mapping")
mv = data.get("manifest_version")
if mv != _MANIFEST_VERSION:
raise CatalogError(
f"{path}: manifest_version {mv!r} unsupported "
f"(this Hermes understands version {_MANIFEST_VERSION})"
)
name = data.get("name") or ""
if not name or not re.match(r"^[A-Za-z0-9_-]+$", name):
raise CatalogError(f"{path}: invalid or missing 'name'")
description = str(data.get("description") or "").strip()
if not description:
raise CatalogError(f"{path}: 'description' required")
source = str(data.get("source") or "").strip()
transport_raw = data.get("transport") or {}
if not isinstance(transport_raw, dict):
raise CatalogError(f"{path}: 'transport' must be a mapping")
t_type = transport_raw.get("type")
if t_type not in ("stdio", "http"):
raise CatalogError(f"{path}: transport.type must be 'stdio' or 'http'")
args = transport_raw.get("args") or []
if not isinstance(args, list):
raise CatalogError(f"{path}: transport.args must be a list")
transport = TransportSpec(
type=t_type,
command=transport_raw.get("command"),
args=[str(a) for a in args],
url=transport_raw.get("url"),
version=transport_raw.get("version"),
)
if t_type == "stdio" and not transport.command:
raise CatalogError(f"{path}: stdio transport requires 'command'")
if t_type == "http" and not transport.url:
raise CatalogError(f"{path}: http transport requires 'url'")
auth_raw = data.get("auth") or {"type": "none"}
if not isinstance(auth_raw, dict):
raise CatalogError(f"{path}: 'auth' must be a mapping")
a_type = auth_raw.get("type") or "none"
if a_type not in ("api_key", "oauth", "none"):
raise CatalogError(f"{path}: auth.type must be 'api_key'|'oauth'|'none'")
env_list_raw = auth_raw.get("env") or []
if not isinstance(env_list_raw, list):
raise CatalogError(f"{path}: auth.env must be a list")
env_list = [_parse_env_spec(e) for e in env_list_raw]
auth = AuthSpec(
type=a_type,
env=env_list,
provider=auth_raw.get("provider"),
scopes=list(auth_raw.get("scopes") or []),
env_var=auth_raw.get("env_var"),
)
tools_raw = data.get("tools") or {}
if not isinstance(tools_raw, dict):
raise CatalogError(f"{path}: 'tools' must be a mapping")
default_enabled = tools_raw.get("default_enabled")
if default_enabled is not None:
if not isinstance(default_enabled, list) or not all(
isinstance(t, str) for t in default_enabled
):
raise CatalogError(
f"{path}: tools.default_enabled must be a list of strings"
)
tools_spec = ToolsSpec(default_enabled=default_enabled)
install: Optional[InstallSpec] = None
install_raw = data.get("install")
if install_raw is not None:
if not isinstance(install_raw, dict):
raise CatalogError(f"{path}: 'install' must be a mapping")
i_type = install_raw.get("type")
if i_type != "git":
raise CatalogError(f"{path}: install.type must be 'git' (got {i_type!r})")
url = install_raw.get("url") or ""
ref = install_raw.get("ref") or ""
if not url or not ref:
raise CatalogError(f"{path}: install.url and install.ref are required")
bootstrap = install_raw.get("bootstrap") or []
if not isinstance(bootstrap, list):
raise CatalogError(f"{path}: install.bootstrap must be a list")
install = InstallSpec(
type=i_type,
url=url,
ref=ref,
bootstrap=[str(c) for c in bootstrap],
)
return CatalogEntry(
name=name,
description=description,
source=source,
transport=transport,
auth=auth,
tools=tools_spec,
install=install,
post_install=str(data.get("post_install") or ""),
manifest_path=path,
)
def list_catalog() -> List[CatalogEntry]:
"""Return all valid catalog entries, sorted by name.
Invalid manifests are skipped silently (CI tests catch them at PR time).
Manifests with a future ``manifest_version`` are also skipped, but the
skip is surfaced via :func:`catalog_diagnostics` so the picker / catalog
UIs can tell the user their Hermes is out of date.
"""
root = _catalog_root()
if not root.exists():
return []
entries: List[CatalogEntry] = []
_CATALOG_DIAGNOSTICS.clear()
for child in sorted(root.iterdir()):
manifest = child / "manifest.yaml"
if not manifest.is_file():
continue
try:
entries.append(_parse_manifest(manifest))
except CatalogError as exc:
msg = str(exc)
# Recognize the future-manifest error specifically so the UI can
# surface a more actionable nudge than "broken manifest".
if "manifest_version" in msg and "unsupported" in msg:
_CATALOG_DIAGNOSTICS.append((child.name, "future_manifest", msg))
else:
_CATALOG_DIAGNOSTICS.append((child.name, "invalid", msg))
continue
return entries
# Populated by list_catalog(). Inspected by the picker / catalog UIs so the
# user gets actionable feedback instead of a silently-shorter list.
_CATALOG_DIAGNOSTICS: List[tuple] = []
def catalog_diagnostics() -> List[tuple]:
"""Diagnostics from the most recent :func:`list_catalog` call.
Returns a list of ``(entry_name, kind, message)`` tuples where ``kind``
is one of:
- ``future_manifest`` manifest_version is newer than this Hermes
understands. Update Hermes to install this entry.
- ``invalid`` manifest is malformed in some other way (caught by
CI for shipped manifests; user-modified manifests can hit this).
"""
return list(_CATALOG_DIAGNOSTICS)
def get_entry(name: str) -> Optional[CatalogEntry]:
"""Look up a single entry by name. ``official/<name>`` prefix accepted."""
if name.startswith("official/"):
name = name[len("official/"):]
for entry in list_catalog():
if entry.name == name:
return entry
return None
# ─── Status helpers ──────────────────────────────────────────────────────────
def installed_servers() -> Dict[str, dict]:
"""Return current ``mcp_servers`` block from config.yaml."""
cfg = load_config()
servers = cfg.get("mcp_servers") or {}
return servers if isinstance(servers, dict) else {}
def is_installed(name: str) -> bool:
return name in installed_servers()
def is_enabled(name: str) -> bool:
servers = installed_servers()
cfg = servers.get(name)
if not cfg:
return False
enabled = cfg.get("enabled", True)
if isinstance(enabled, str):
return enabled.lower() in {"true", "1", "yes"}
return bool(enabled)
# ─── Install ─────────────────────────────────────────────────────────────────
def _install_root() -> Path:
"""Where git-bootstrapped MCPs are cloned. Per-user, profile-aware."""
root = get_hermes_home() / "mcp-installs"
root.mkdir(parents=True, exist_ok=True)
return root
def _run_bootstrap(cwd: Path, commands: List[str]) -> None:
"""Execute bootstrap commands in *cwd*. Raise CatalogError on first failure.
Each command runs through the shell (so `&&` etc. work). The output is
streamed to the user's terminal for visibility.
"""
for cmd in commands:
print(color(f" $ {cmd}", Colors.DIM))
proc = subprocess.run(cmd, cwd=str(cwd), shell=True)
if proc.returncode != 0:
raise CatalogError(
f"bootstrap step failed (exit {proc.returncode}): {cmd}"
)
def _do_git_install(entry: CatalogEntry) -> Path:
"""Clone the entry's repo into ``~/.hermes/mcp-installs/<name>`` and run
bootstrap commands. Returns the install directory."""
assert entry.install is not None and entry.install.type == "git"
install = entry.install
dest = _install_root() / entry.name
git = shutil.which("git")
if not git:
raise CatalogError("git is required to install this MCP but was not found on PATH")
if dest.exists():
# Fresh checkout each install — manifest version is the source of truth,
# so wipe + re-clone for determinism.
print(color(f" Removing existing install at {dest}", Colors.DIM))
shutil.rmtree(dest)
print(color(f" Cloning {install.url} ({install.ref}) → {dest}", Colors.CYAN))
# `git clone --branch` only accepts branches and tags, NOT commit SHAs.
# Detecting SHA-shaped refs upfront avoids a guaranteed stderr leak on
# the fast path (the --branch attempt would always fail noisily for a
# SHA ref before we fall back to full-clone-then-checkout).
is_sha_ref = bool(re.fullmatch(r"[0-9a-f]{7,40}", install.ref))
if not is_sha_ref:
proc = subprocess.run(
[git, "clone", "--depth", "1", "--branch", install.ref, install.url, str(dest)],
)
if proc.returncode == 0:
pass
else:
# Branch/tag form failed (unlikely for valid manifests; possible if
# the ref was deleted upstream). Fall through to the full-clone path.
if dest.exists():
shutil.rmtree(dest)
is_sha_ref = True # treat the same as a SHA ref from here
if is_sha_ref:
proc = subprocess.run([git, "clone", install.url, str(dest)])
if proc.returncode != 0:
raise CatalogError(f"git clone failed for {install.url}")
proc = subprocess.run([git, "-C", str(dest), "checkout", install.ref])
if proc.returncode != 0:
raise CatalogError(f"git checkout {install.ref} failed")
if install.bootstrap:
_run_bootstrap(dest, install.bootstrap)
return dest
def _expand_install_dir(value: str, install_dir: Optional[Path]) -> str:
if _INSTALL_DIR_VAR not in value:
return value
if install_dir is None:
raise CatalogError(
f"manifest references {_INSTALL_DIR_VAR} but no install block exists"
)
return value.replace(_INSTALL_DIR_VAR, str(install_dir))
def _prompt_env_vars(specs: List[EnvVarSpec]) -> Dict[str, str]:
"""Walk the env spec list, prompting the user for each. Writes secrets and
non-secrets alike to ~/.hermes/.env via save_env_value()."""
collected: Dict[str, str] = {}
for spec in specs:
existing = get_env_value(spec.name)
if existing:
print(color(f"{spec.name} already set in .env", Colors.GREEN))
collected[spec.name] = existing
continue
value = _prompt_input(
spec.prompt,
default=spec.default or None,
password=spec.secret,
)
if not value:
if spec.required:
raise CatalogError(f"{spec.name} is required but no value was provided")
continue
save_env_value(spec.name, value)
collected[spec.name] = value
return collected
def _build_server_config(
entry: CatalogEntry, install_dir: Optional[Path]
) -> dict:
"""Translate a manifest into the ``mcp_servers.<name>`` block format used
by hermes_cli/mcp_config.py."""
cfg: dict = {}
t = entry.transport
if t.type == "stdio":
cfg["command"] = _expand_install_dir(t.command or "", install_dir)
if t.args:
cfg["args"] = [_expand_install_dir(a, install_dir) for a in t.args]
elif t.type == "http":
cfg["url"] = t.url
if entry.auth.type == "oauth":
cfg["auth"] = "oauth"
return cfg
def _read_prior_tool_selection(name: str) -> Optional[List[str]]:
"""Return the user's prior `tools.include` for *name*, if any.
Used during reinstalls so the install-time checklist starts pre-checked
with whatever the user already had. Tools no longer on the server are
silently dropped at checklist-display time.
"""
servers = installed_servers()
cfg = servers.get(name) or {}
tools_cfg = cfg.get("tools") or {}
if not isinstance(tools_cfg, dict):
return None
include = tools_cfg.get("include")
if isinstance(include, list) and all(isinstance(t, str) for t in include):
return list(include)
return None
def _probe_tools(name: str) -> Optional[List[tuple]]:
"""Connect to a freshly-configured MCP and list its tools.
Returns a list of ``(tool_name, description)`` tuples on success, or
``None`` on any failure (server unreachable, OAuth not yet completed,
backing service offline, etc.). Failures are intentionally swallowed
here the fallback path in :func:`_apply_tool_selection` handles them.
"""
servers = installed_servers()
server_cfg = servers.get(name)
if not server_cfg:
return None
try:
# Import lazily so the catalog module stays cheap to load.
from hermes_cli.mcp_config import _probe_single_server
tools = _probe_single_server(name, server_cfg)
return list(tools) if tools is not None else []
except Exception as exc:
# Display the cause but never raise from the install path.
print(color(f" Probe failed: {exc}", Colors.YELLOW))
return None
def _write_tools_include(name: str, include: Optional[List[str]]) -> None:
"""Persist or clear ``mcp_servers.<name>.tools.include``."""
cfg = load_config()
servers = cfg.setdefault("mcp_servers", {})
server_entry = servers.get(name) or {}
if include is None:
# No filter — drop any existing tools block.
server_entry.pop("tools", None)
else:
tools_block = server_entry.get("tools") or {}
if not isinstance(tools_block, dict):
tools_block = {}
tools_block["include"] = list(include)
tools_block.pop("exclude", None)
server_entry["tools"] = tools_block
servers[name] = server_entry
cfg["mcp_servers"] = servers
save_config(cfg)
def _apply_tool_selection(
entry: CatalogEntry, *, prior_selection: Optional[List[str]]
) -> None:
"""Probe the server and let the user pick which tools to enable.
Probe-success path:
- Curses checklist of all probed tools.
- Pre-check uses (in priority order):
1. *prior_selection* (reinstall: preserve what the user had)
2. manifest's ``tools.default_enabled``
3. all tools (default)
- All-on selection clears any filter (no ``tools.include`` written).
- Sub-selection writes ``tools.include``.
Probe-fail path:
- If manifest declares ``tools.default_enabled`` apply directly.
- Otherwise leave config with no filter (all on when reachable).
- Either way, point the user at ``hermes mcp configure <name>``.
"""
print()
print(color(f" Probing '{entry.name}' for available tools...", Colors.CYAN))
probed = _probe_tools(entry.name)
# Probe failure path
if probed is None:
manifest_default = entry.tools.default_enabled
if manifest_default:
_write_tools_include(entry.name, manifest_default)
print(color(
f" Couldn\'t probe server. Applied manifest default "
f"({len(manifest_default)} tools). "
f"Run `hermes mcp configure {entry.name}` after the server "
"is reachable to refine.",
Colors.YELLOW,
))
else:
_write_tools_include(entry.name, None)
print(color(
f" Couldn\'t probe server; installed with no tool filter "
"(all tools enabled when reachable). "
f"Run `hermes mcp configure {entry.name}` after first "
"connect to prune.",
Colors.YELLOW,
))
return
if not probed:
# Probe succeeded but server reported zero tools. Nothing to filter.
_write_tools_include(entry.name, None)
print(color(" Server reported no tools.", Colors.YELLOW))
return
tool_names = [t[0] for t in probed]
# Build the pre-checked set in priority order
if prior_selection:
pre_set = {n for n in prior_selection if n in tool_names}
elif entry.tools.default_enabled:
pre_set = {n for n in entry.tools.default_enabled if n in tool_names}
else:
pre_set = set(tool_names)
pre_indices = {i for i, n in enumerate(tool_names) if n in pre_set}
# Non-TTY: skip the checklist. Priority matches the interactive
# pre-check priority: prior user selection > manifest default > all-on.
import sys as _sys
if not _sys.stdin.isatty():
if prior_selection is not None:
include = [n for n in prior_selection if n in tool_names]
_write_tools_include(entry.name, include)
elif entry.tools.default_enabled:
include = [n for n in entry.tools.default_enabled if n in tool_names]
_write_tools_include(entry.name, include)
else:
_write_tools_include(entry.name, None)
return
print(color(
f" Found {len(probed)} tool(s). "
f"Pre-checked: {len(pre_indices)}.",
Colors.GREEN,
))
from hermes_cli.curses_ui import curses_checklist
labels = [
f"{n}{(d[:60] + '...') if len(d) > 60 else d}"
for n, d in probed
]
chosen_indices = curses_checklist(
f"Select tools for '{entry.name}' (SPACE toggle, ENTER confirm)",
labels,
pre_indices,
)
if not chosen_indices:
# User unchecked everything; treat as "no tools" — write empty include
# so the server is installed but contributes nothing until reconfigured.
_write_tools_include(entry.name, [])
print(color(
f" No tools selected. Run `hermes mcp configure {entry.name}` "
"to change.",
Colors.YELLOW,
))
return
if len(chosen_indices) == len(probed):
# Everything selected — clear filter for the cleanest config shape.
# NOTE: this means any tools the server adds later (e.g. a future MCP
# version) will also be auto-enabled. To pin to the current set,
# the user can re-run `hermes mcp configure <name>` and unselect a
# tool to switch back to include-mode.
_write_tools_include(entry.name, None)
print(color(
f" ✓ All {len(probed)} tools enabled (no filter — new tools "
"the server adds later will be auto-enabled).",
Colors.GREEN,
))
return
chosen_names = [tool_names[i] for i in sorted(chosen_indices)]
_write_tools_include(entry.name, chosen_names)
print(color(
f"{len(chosen_names)}/{len(probed)} tools enabled.",
Colors.GREEN,
))
def install_entry(entry: CatalogEntry, *, enable: bool = True) -> None:
"""Install a catalog entry end-to-end.
Steps:
1. If ``install.type == git``, clone + run bootstrap commands.
2. If ``auth.type == api_key``, prompt for env vars, save to .env.
3. If ``auth.type == oauth`` (remote MCP / case 1), write the
``auth: oauth`` marker (MCP client handles browser on first connect
in the non-pre-authenticated case).
4. Translate the manifest into an ``mcp_servers.<name>`` block and
save into config.yaml.
5. Probe the server, present a curses checklist for tool selection,
write ``tools.include`` (or no filter, depending on choice).
If probe fails, fall back to the manifest's
``tools.default_enabled`` or all-on.
6. Print post_install notes.
"""
print()
print(color(f" Installing MCP '{entry.name}'", Colors.CYAN + Colors.BOLD))
if entry.description:
print(color(f" {entry.description}", Colors.DIM))
if entry.source:
print(color(f" Source: {entry.source}", Colors.DIM))
print()
install_dir: Optional[Path] = None
if entry.install is not None:
install_dir = _do_git_install(entry)
# Auth
if entry.auth.type == "api_key":
print()
print(color(" Configure credentials:", Colors.CYAN))
_prompt_env_vars(entry.auth.env)
elif entry.auth.type == "oauth":
if entry.auth.provider:
# Case 2: provider-mediated (Google, GitHub, etc.). We rely on
# the existing `hermes auth <provider>` flow. Surface guidance
# here rather than auto-running it — keeps the catalog install
# decoupled from provider-auth lifecycle.
print(color(
f" This MCP uses {entry.auth.provider} OAuth. Run "
f"`hermes auth {entry.auth.provider}` if you have not "
"already authenticated.",
Colors.YELLOW,
))
else:
print(color(
" This MCP uses native OAuth 2.1; tokens will be acquired "
"on first connection (browser flow).",
Colors.DIM,
))
# auth.type == "none": nothing to do.
# ── Preserve any prior user tool selection across reinstalls ────────
# Reading BEFORE we overwrite the entry below so a reinstall pre-checks
# whatever the user picked last time.
prior_selection = _read_prior_tool_selection(entry.name)
# Build and write the mcp_servers entry (without tools filter yet;
# _apply_tool_selection() finalizes it below).
server_cfg = _build_server_config(entry, install_dir)
server_cfg["enabled"] = enable
cfg = load_config()
cfg.setdefault("mcp_servers", {})[entry.name] = server_cfg
save_config(cfg)
# ── Probe + tool selection ──────────────────────────────────────────
_apply_tool_selection(entry, prior_selection=prior_selection)
print()
print(color(
f" ✓ Installed '{entry.name}' "
f"({'enabled' if enable else 'disabled'}). "
f"Start a new Hermes session to load its tools.",
Colors.GREEN,
))
if entry.post_install:
print()
for line in entry.post_install.strip().splitlines():
print(color(f" {line}", Colors.DIM))
print()
def uninstall_entry(name: str, *, purge_install_dir: bool = True) -> bool:
"""Remove a catalog-installed MCP from config and (optionally) wipe its
clone directory. Returns True if anything was removed."""
cfg = load_config()
servers = cfg.get("mcp_servers") or {}
removed = False
if name in servers:
del servers[name]
if not servers:
cfg.pop("mcp_servers", None)
else:
cfg["mcp_servers"] = servers
save_config(cfg)
removed = True
if purge_install_dir:
clone = _install_root() / name
if clone.exists():
shutil.rmtree(clone)
removed = True
return removed

View file

@ -749,6 +749,24 @@ def mcp_command(args):
run_mcp_server(verbose=getattr(args, "verbose", False))
return
# Catalog subcommands live in mcp_picker / mcp_catalog. Import lazily so
# the original `mcp_config` module stays import-cheap.
if action == "picker":
from hermes_cli.mcp_picker import run_picker
run_picker()
return
if action == "catalog":
from hermes_cli.mcp_picker import show_catalog
show_catalog()
return
if action == "install":
from hermes_cli.mcp_picker import install_by_name
import sys as _sys
rc = install_by_name(getattr(args, "identifier", "") or "")
if rc:
_sys.exit(rc)
return
handlers = {
"add": cmd_mcp_add,
"remove": cmd_mcp_remove,
@ -765,15 +783,20 @@ def mcp_command(args):
if handler:
handler(args)
else:
# No subcommand — show list
cmd_mcp_list()
# No subcommand — drop the user into the catalog picker. This is the
# "try enabling and it flows you into setup" UX matching `hermes plugin`.
from hermes_cli.mcp_picker import run_picker
run_picker()
print(color(" Commands:", Colors.CYAN))
_info("hermes mcp Open the catalog picker (default)")
_info("hermes mcp catalog List Nous-approved MCPs")
_info("hermes mcp install <name> Install a catalog MCP")
_info("hermes mcp serve Run as MCP server")
_info("hermes mcp add <name> --url <endpoint> Add an MCP server")
_info("hermes mcp add <name> --url <endpoint> Add a custom MCP server")
_info("hermes mcp add <name> --command <cmd> Add a stdio server")
_info("hermes mcp add <name> --preset <preset> Add from a known preset")
_info("hermes mcp remove <name> Remove a server")
_info("hermes mcp list List servers")
_info("hermes mcp list List configured servers")
_info("hermes mcp test <name> Test connection")
_info("hermes mcp configure <name> Toggle tools")
_info("hermes mcp login <name> Re-authenticate OAuth")

322
hermes_cli/mcp_picker.py Normal file
View file

@ -0,0 +1,322 @@
"""MCP picker — interactive `hermes mcp picker` (also the default `hermes mcp`).
Lists every catalog entry plus any custom MCP servers the user has added via
``hermes mcp add``, lets them pick one, and routes to install / enable /
disable / uninstall / configure-tools flows.
Mirrors the `hermes plugin` picker UX: arrow keys to navigate, ENTER on a row
to act on it. The action depends on current status:
not installed (catalog) install (clone/bootstrap if needed, prompt for creds)
installed / disabled enable
installed / enabled submenu: configure tools / disable / uninstall / reinstall
custom (non-catalog) submenu: configure tools / enable / disable / remove
The picker loops until the user hits ESC/q so they can manage multiple
entries in one session.
"""
from __future__ import annotations
import sys
from dataclasses import dataclass
from typing import List, Optional
from hermes_cli.colors import Colors, color
from hermes_cli.cli_output import prompt_yes_no
from hermes_cli.curses_ui import curses_single_select
from hermes_cli.mcp_catalog import (
CatalogEntry,
CatalogError,
catalog_diagnostics,
install_entry,
is_enabled,
is_installed,
list_catalog,
installed_servers,
uninstall_entry,
)
from hermes_cli.config import load_config, save_config
# ─── Status badges ────────────────────────────────────────────────────────────
_STATUS_NOT_INSTALLED = "available"
_STATUS_DISABLED = "installed (disabled)"
_STATUS_ENABLED = "enabled"
_STATUS_CUSTOM_ENABLED = "custom — enabled"
_STATUS_CUSTOM_DISABLED = "custom — disabled"
# ─── Row model — unifies catalog and custom entries ──────────────────────────
@dataclass
class _Row:
"""A row in the picker. ``entry`` is set for catalog rows; for custom
user-added MCPs only ``name`` + ``description`` + status are populated."""
name: str
description: str
status: str
entry: Optional[CatalogEntry] = None # None for non-catalog (custom) rows
@property
def is_custom(self) -> bool:
return self.entry is None
def _build_rows() -> List[_Row]:
"""Return catalog rows + any custom (non-catalog) MCPs found in config."""
catalog_entries = list_catalog()
catalog_names = {e.name for e in catalog_entries}
rows: List[_Row] = []
for entry in catalog_entries:
if not is_installed(entry.name):
status = _STATUS_NOT_INSTALLED
elif is_enabled(entry.name):
status = _STATUS_ENABLED
else:
status = _STATUS_DISABLED
rows.append(
_Row(
name=entry.name,
description=entry.description,
status=status,
entry=entry,
)
)
# Custom MCPs the user added directly (not in the catalog)
for name, cfg in sorted(installed_servers().items()):
if name in catalog_names:
continue
enabled = cfg.get("enabled", True)
if isinstance(enabled, str):
enabled = enabled.lower() in {"true", "1", "yes"}
status = _STATUS_CUSTOM_ENABLED if enabled else _STATUS_CUSTOM_DISABLED
# Use the transport URL/command as the "description" for custom rows
desc = cfg.get("url") or cfg.get("command") or "(no transport)"
rows.append(_Row(name=name, description=str(desc), status=status))
return rows
def _format_row(row: _Row) -> str:
return f"{row.name:<18} {row.status:<24} {row.description}"
# ─── Actions ──────────────────────────────────────────────────────────────────
def _enable_disable(name: str, *, enable: bool) -> None:
cfg = load_config()
servers = cfg.get("mcp_servers") or {}
server = servers.get(name)
if not server:
print(color(f" '{name}' is not installed.", Colors.RED))
return
server["enabled"] = enable
cfg["mcp_servers"] = servers
save_config(cfg)
print(color(
f"'{name}' {'enabled' if enable else 'disabled'}. "
"Start a new Hermes session for changes to take effect.",
Colors.GREEN,
))
def _configure_tools(name: str) -> None:
"""Open the tool selection checklist for an already-installed MCP.
Delegates to the existing ``cmd_mcp_configure`` flow which probes the
server, displays a checklist, and writes ``tools.include``.
"""
import argparse
from hermes_cli.mcp_config import cmd_mcp_configure
cmd_mcp_configure(argparse.Namespace(name=name))
def _remove_custom(name: str) -> None:
"""Remove a non-catalog MCP entry from config.yaml."""
cfg = load_config()
servers = cfg.get("mcp_servers") or {}
if name not in servers:
print(color(f" '{name}' is not configured.", Colors.RED))
return
if not prompt_yes_no(f"Remove '{name}' from mcp_servers?", default=False):
return
del servers[name]
if not servers:
cfg.pop("mcp_servers", None)
else:
cfg["mcp_servers"] = servers
save_config(cfg)
print(color(f" ✓ Removed '{name}'", Colors.GREEN))
def _handle_row(row: _Row) -> None:
"""Act on the picked row based on its current status."""
# === Catalog row, not yet installed ===
if row.entry and not is_installed(row.name):
try:
install_entry(row.entry, enable=True)
except CatalogError as exc:
print(color(f" ✗ install failed: {exc}", Colors.RED))
return
# === Catalog row, installed but disabled ===
if row.entry and not is_enabled(row.name):
_enable_disable(row.name, enable=True)
return
# === Catalog row, installed + enabled OR custom row ===
if row.is_custom:
# Custom (non-catalog) row submenu
actions = [
"Configure tools (probe server + re-pick)",
"Enable" if not is_enabled(row.name) else "Disable",
"Remove from config",
]
choice = curses_single_select(f"Action for '{row.name}' (custom)", actions)
if choice is None:
return
if choice == 0:
_configure_tools(row.name)
elif choice == 1:
_enable_disable(row.name, enable=not is_enabled(row.name))
elif choice == 2:
_remove_custom(row.name)
return
# Catalog row, installed + enabled
print()
print(color(f" '{row.name}' is already enabled.", Colors.DIM))
actions = [
"Configure tools (probe server + re-pick)",
"Disable (keep config, stop loading on next session)",
"Uninstall (remove config and any cloned files)",
"Reinstall (re-clone, re-prompt for credentials)",
]
choice = curses_single_select(f"Action for '{row.name}'", actions)
if choice is None:
return
if choice == 0:
_configure_tools(row.name)
elif choice == 1:
_enable_disable(row.name, enable=False)
elif choice == 2:
if prompt_yes_no(f"Uninstall '{row.name}'?", default=False):
if uninstall_entry(row.name):
print(color(
f" ✓ Uninstalled '{row.name}'. "
"Credentials in .env preserved — delete manually if no longer needed.",
Colors.GREEN,
))
else:
print(color(f" '{row.name}' was not installed", Colors.DIM))
elif choice == 3:
try:
assert row.entry is not None
install_entry(row.entry, enable=True)
except CatalogError as exc:
print(color(f" ✗ reinstall failed: {exc}", Colors.RED))
# ─── Output / entry points ────────────────────────────────────────────────────
def _print_rows_text(rows: List[_Row]) -> None:
"""Plain-text catalog dump used as a fallback when curses can't run, and
as the default output of `hermes mcp catalog`."""
if not rows:
print()
print(color(" No MCPs in the catalog or configured.", Colors.DIM))
print()
return
print()
print(color(" MCP Catalog + configured servers:", Colors.CYAN + Colors.BOLD))
print()
print(f" {'Name':<18} {'Status':<24} Description")
print(f" {'-' * 18} {'-' * 24} {'-' * 11}")
for row in rows:
print(f" {_format_row(row)}")
print()
print(color(
" Install: hermes mcp install <name> Picker: hermes mcp",
Colors.DIM,
))
# Surface manifest-version warnings so users know when their Hermes is
# too old to install everything in the catalog.
diags = catalog_diagnostics()
future = [d for d in diags if d[1] == "future_manifest"]
if future:
print()
for name, _, msg in future:
print(color(
f"'{name}' requires a newer Hermes — run `hermes update` "
"to install this entry.",
Colors.YELLOW,
))
print()
print()
def show_catalog() -> None:
"""`hermes mcp catalog` — print the curated list + custom servers, no interaction."""
_print_rows_text(_build_rows())
def run_picker() -> None:
"""`hermes mcp picker` (and default `hermes mcp`) — interactive selector.
Loops until the user hits ESC/q. After each action the picker re-renders
so the user can manage several entries in one session.
"""
if not sys.stdin.isatty():
# Non-interactive shell: degrade to the text dump rather than failing.
_print_rows_text(_build_rows())
return
while True:
rows = _build_rows()
if not rows:
_print_rows_text(rows)
return
labels = [_format_row(r) for r in rows]
idx = curses_single_select(
"MCP Catalog — ↑↓ navigate ENTER act on entry ESC/q quit",
labels,
)
if idx is None:
return
_handle_row(rows[idx])
def install_by_name(identifier: str) -> int:
"""`hermes mcp install <name>` — non-interactive entry-point.
Returns 0 on success, non-zero on failure (so the CLI can propagate
exit codes).
"""
from hermes_cli.mcp_catalog import get_entry
entry = get_entry(identifier)
if entry is None:
print(color(
f"'{identifier}' is not in the catalog. "
"Run `hermes mcp catalog` to see available entries.",
Colors.RED,
))
return 1
try:
install_entry(entry, enable=True)
except CatalogError as exc:
print(color(f" ✗ install failed: {exc}", Colors.RED))
return 1
return 0

View file

@ -7,13 +7,13 @@ the provider's config schema. Writes config to config.yaml + .env.
from __future__ import annotations
import getpass
import os
import sys
import shlex
from pathlib import Path
from hermes_constants import get_hermes_home
from hermes_cli.secret_prompt import masked_secret_prompt
# ---------------------------------------------------------------------------
@ -39,12 +39,7 @@ def _prompt(label: str, default: str | None = None, secret: bool = False) -> str
"""Prompt for a value with optional default and secret masking."""
suffix = f" [{default}]" if default else ""
if secret:
sys.stdout.write(f" {label}{suffix}: ")
sys.stdout.flush()
if sys.stdin.isatty():
val = getpass.getpass(prompt="")
else:
val = sys.stdin.readline().strip()
val = masked_secret_prompt(f" {label}{suffix}: ")
else:
sys.stdout.write(f" {label}{suffix}: ")
sys.stdout.flush()

View file

@ -37,7 +37,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
("anthropic/claude-sonnet-4.6", ""),
("moonshotai/kimi-k2.6", "recommended"),
("openrouter/pareto-code", "auto-routes to cheapest coder meeting openrouter.min_coding_score"),
("qwen/qwen3.6-plus", ""),
("qwen/qwen3.7-max", ""),
("anthropic/claude-haiku-4.5", ""),
("openai/gpt-5.5", ""),
("openai/gpt-5.5-pro", ""),
@ -166,7 +166,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.6",
"moonshotai/kimi-k2.6",
"qwen/qwen3.6-plus",
"qwen/qwen3.7-max",
"anthropic/claude-haiku-4.5",
"openai/gpt-5.5",
"openai/gpt-5.5-pro",
@ -199,6 +199,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"gpt-4o",
"gpt-4o-mini",
],
"openai-api": [
"gpt-5.5",
"gpt-5.5-pro",
"gpt-5.4",
"gpt-5.4-mini",
"gpt-5.4-nano",
"gpt-5-mini",
"gpt-5.3-codex",
"gpt-4.1",
"gpt-4o",
"gpt-4o-mini",
],
"openai-codex": _codex_curated_models(),
"xai-oauth": _xai_curated_models(),
"copilot-acp": [
@ -387,6 +399,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"mimo-v2-omni",
"minimax-m2.7",
"minimax-m2.5",
"qwen3.7-max",
"qwen3.6-plus",
"qwen3.5-plus",
],
@ -928,8 +941,9 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"),
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry("openai-api", "OpenAI API", "OpenAI API (api.openai.com, API key)"),
ProviderEntry("alibaba", "Qwen Cloud", "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"),
ProviderEntry("xai-oauth", "xAI Grok OAuth (SuperGrok Subscription)", "xAI Grok OAuth (SuperGrok Subscription)"),
ProviderEntry("xai-oauth", "xAI Grok OAuth (SuperGrok / Premium+)", "xAI Grok OAuth (SuperGrok / Premium+)"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
ProviderEntry("tencent-tokenhub", "Tencent TokenHub", "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
@ -2229,7 +2243,7 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
live = fetch_ollama_cloud_models(force_refresh=force_refresh)
if live:
return live
if normalized == "openai":
if normalized in ("openai", "openai-api"):
api_key = os.getenv("OPENAI_API_KEY", "").strip()
if api_key:
base_raw = os.getenv("OPENAI_BASE_URL", "").strip().rstrip("/")
@ -3002,6 +3016,8 @@ def opencode_model_api_mode(provider_id: Optional[str], model_id: Optional[str])
if provider == "opencode-go":
if normalized.startswith("minimax-"):
return "anthropic_messages"
if normalized.startswith("qwen3.7-max"):
return "anthropic_messages"
return "chat_completions"
if provider == "opencode-zen":
@ -3491,7 +3507,7 @@ def validate_requested_model(
suggestion_text = ""
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
provider_label = "OpenAI Codex" if normalized == "openai-codex" else "xAI Grok OAuth (SuperGrok Subscription)"
provider_label = "OpenAI Codex" if normalized == "openai-codex" else "xAI Grok OAuth (SuperGrok / Premium+)"
return {
"accepted": True,
"persist": True,

View file

@ -17,7 +17,6 @@ Model / provider selection mirrors `hermes chat`:
Env var fallbacks (used when the corresponding arg is not passed):
- HERMES_INFERENCE_MODEL
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
"""
from __future__ import annotations
@ -28,6 +27,8 @@ import sys
from contextlib import redirect_stderr, redirect_stdout
from typing import Optional
from hermes_cli.fallback_config import get_fallback_chain
def _normalize_toolsets(toolsets: object = None) -> list[str] | None:
if not toolsets:
@ -133,9 +134,8 @@ def run_oneshot(
prompt: The user message to send.
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
env var, then config.yaml's model.default / model.model.
provider: Optional provider override. Falls back to
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
then "auto".
provider: Optional provider override. Falls back to config.yaml's
model.provider, then "auto".
toolsets: Optional comma-separated string or iterable of toolsets.
Returns the exit code. Caller should sys.exit() with the return.
@ -301,14 +301,9 @@ def _run_agent(
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
session_db = _create_session_db_for_oneshot()
# Read fallback chain from profile config — supports both the new list
# format (fallback_providers) and the legacy single-dict (fallback_model).
# Mirrors the same normalization in cli.py so oneshot workers (e.g. kanban
# workers spawned via `hermes -p <profile> chat -q ...`) honour the
# profile's fallback chain just like interactive sessions do.
_fb = cfg.get("fallback_providers") or cfg.get("fallback_model") or []
if isinstance(_fb, dict):
_fb = [_fb] if _fb.get("provider") and _fb.get("model") else []
# Read the effective fallback chain from profile config so oneshot workers
# honour the same merge semantics as interactive CLI and gateway sessions.
_fb = get_fallback_chain(cfg)
agent = AIAgent(
api_key=runtime.get("api_key"),

View file

@ -640,6 +640,88 @@ class PluginContext:
self.manifest.name, provider.name,
)
# -- TTS provider registration -------------------------------------------
def register_tts_provider(self, provider) -> None:
"""Register a text-to-speech backend.
``provider`` must be an instance of
:class:`agent.tts_provider.TTSProvider`. The ``provider.name``
attribute is what ``tts.provider`` in ``config.yaml`` matches
against when routing ``text_to_speech`` tool calls **but
only when**:
1. ``provider.name`` is NOT a built-in TTS provider name
(``edge``, ``openai``, ``elevenlabs``, ). Built-ins always
win the registry rejects shadowing names with a warning.
2. There is NO ``tts.providers.<name>: type: command`` entry
with the same name. Command-providers (PR #17843) win on
name collision because config is more local than plugin
install.
Coexists with the command-provider registry rather than
replacing it see issue #30398 for the full design rationale.
"""
from agent.tts_provider import TTSProvider
from agent.tts_registry import register_provider as _register_tts_provider
if not isinstance(provider, TTSProvider):
logger.warning(
"Plugin '%s' tried to register a TTS provider that does "
"not inherit from TTSProvider. Ignoring.",
self.manifest.name,
)
return
_register_tts_provider(provider)
logger.info(
"Plugin '%s' registered TTS provider: %s",
self.manifest.name, provider.name,
)
# -- transcription (STT) provider registration ---------------------------
def register_transcription_provider(self, provider) -> None:
"""Register a speech-to-text backend.
``provider`` must be an instance of
:class:`agent.transcription_provider.TranscriptionProvider`.
The ``provider.name`` attribute is what ``stt.provider`` in
``config.yaml`` matches against when routing
:func:`tools.transcription_tools.transcribe_audio` calls
**but only when**:
1. ``provider.name`` is NOT a built-in STT provider name
(``local``, ``local_command``, ``groq``, ``openai``,
``mistral``, ``xai``). Built-ins always win the registry
rejects shadowing names with a warning.
2. There is NO ``stt.providers.<name>: type: command`` entry
with the same name. Command-providers win on name
collision because config is more local than plugin install
same precedence rule as TTS.
Coexists with the in-tree dispatcher and the STT
command-provider registry rather than replacing them. The 6
built-in STT backends keep their native implementations in
``tools/transcription_tools.py``; this hook is for *new* Python
engines (OpenRouter, SenseAudio, Gemini-STT, custom proprietary
backends).
"""
from agent.transcription_provider import TranscriptionProvider
from agent.transcription_registry import register_provider as _register_stt_provider
if not isinstance(provider, TranscriptionProvider):
logger.warning(
"Plugin '%s' tried to register a transcription provider that "
"does not inherit from TranscriptionProvider. Ignoring.",
self.manifest.name,
)
return
_register_stt_provider(provider)
logger.info(
"Plugin '%s' registered transcription provider: %s",
self.manifest.name, provider.name,
)
# -- platform adapter registration ---------------------------------------
def register_platform(
@ -698,6 +780,119 @@ class PluginContext:
# -- hook registration --------------------------------------------------
# -- auxiliary task registration ---------------------------------------
def register_auxiliary_task(
self,
key: str,
*,
display_name: str,
description: str,
defaults: Optional[Dict[str, Any]] = None,
) -> None:
"""Register a plugin-defined auxiliary LLM task.
Auxiliary tasks are LLM-backed side jobs (vision analysis, web extraction,
compression, smart-approval, etc.) that route through ``auxiliary_client.py``.
Each task has its own ``auxiliary.<key>`` config block where users can
pin a provider/model independent of the main chat model.
Plugins use this to declare their own auxiliary tasks without touching
core files. After registration, the task:
- Appears in the ``hermes model Configure auxiliary models`` picker
- Has its provider/model/base_url/api_key bridged from config.yaml to
``AUXILIARY_<KEY_UPPER>_*`` env vars at gateway startup
- Gets default routing fields (provider="auto", model="", etc.) merged
into loaded configs so ``cfg.get("auxiliary", {}).get(key)`` works
Args:
key: stable task key (snake_case). Used in config ``auxiliary.<key>``
and env vars ``AUXILIARY_<KEY_UPPER>_*``. Must not shadow a
built-in task key (vision, compression, web_extract, approval,
mcp, title_generation, skills_hub, curator).
display_name: human-readable name shown in the picker.
description: short one-line description shown next to the name.
defaults: optional dict of default routing fields. Recognized keys:
``provider`` (default "auto"), ``model`` (default ""),
``base_url`` (default ""), ``api_key`` (default ""),
``timeout`` (default 60), ``extra_body`` (default {}),
plus any task-specific extras (e.g. ``download_timeout``).
Unknown keys are preserved verbatim the plugin owns the
schema for its own task.
Raises:
ValueError: if *key* is empty, contains invalid characters, or
shadows a built-in auxiliary task key.
Example:
ctx.register_auxiliary_task(
key="memory_retain_filter",
display_name="Memory retain filter",
description="hindsight pre-retain dedup/extract",
defaults={"provider": "auto", "timeout": 30},
)
"""
# Validate key shape
if not key or not isinstance(key, str):
raise ValueError(
f"Plugin '{self.manifest.name}' tried to register auxiliary task "
f"with invalid key {key!r}"
)
if not all(c.isalnum() or c == "_" for c in key):
raise ValueError(
f"Plugin '{self.manifest.name}' auxiliary task key {key!r} "
f"must contain only alphanumeric characters and underscores"
)
# Lazy import to avoid circular: hermes_cli.main imports plugins indirectly
from hermes_cli.main import _AUX_TASKS as _BUILTIN_AUX_TASKS
builtin_keys = {k for k, _name, _desc in _BUILTIN_AUX_TASKS}
if key in builtin_keys:
raise ValueError(
f"Plugin '{self.manifest.name}' cannot register auxiliary task "
f"{key!r} — that key is reserved for a built-in task. "
f"Pick a plugin-namespaced key (e.g. '{self.manifest.name}_{key}')."
)
# Reject duplicate registrations across plugins
existing = self._manager._aux_tasks.get(key)
if existing is not None and existing.get("plugin") != self.manifest.name:
raise ValueError(
f"Plugin '{self.manifest.name}' cannot register auxiliary task "
f"{key!r} — already registered by plugin "
f"'{existing.get('plugin')}'"
)
# Normalize defaults — plugin owns the schema, but we ensure routing
# fields exist with sensible types so consumers don't crash.
merged_defaults: Dict[str, Any] = {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
"timeout": 60,
"extra_body": {},
}
if defaults:
for k, v in defaults.items():
merged_defaults[k] = v
self._manager._aux_tasks[key] = {
"key": key,
"display_name": display_name,
"description": description,
"defaults": merged_defaults,
"plugin": self.manifest.name,
}
logger.debug(
"Plugin %s registered auxiliary task: %s (%s)",
self.manifest.name,
key,
display_name,
)
def register_hook(self, hook_name: str, callback: Callable) -> None:
"""Register a lifecycle hook callback.
@ -782,6 +977,9 @@ class PluginManager:
self._cli_ref = None # Set by CLI after plugin discovery
# Plugin skill registry: qualified name → metadata dict.
self._plugin_skills: Dict[str, Dict[str, Any]] = {}
# Plugin-registered auxiliary tasks: key → {key, display_name,
# description, defaults, plugin}. See PluginContext.register_auxiliary_task.
self._aux_tasks: Dict[str, Dict[str, Any]] = {}
# -----------------------------------------------------------------------
# Public
@ -803,6 +1001,7 @@ class PluginManager:
self._cli_commands.clear()
self._plugin_commands.clear()
self._plugin_skills.clear()
self._aux_tasks.clear()
self._context_engine = None
self._discovered = True
@ -1548,6 +1747,21 @@ def get_plugin_commands() -> Dict[str, dict]:
return _ensure_plugins_discovered()._plugin_commands
def get_plugin_auxiliary_tasks() -> List[Dict[str, Any]]:
"""Return all plugin-registered auxiliary tasks as a stable-ordered list.
Each entry is the registration dict from
:meth:`PluginContext.register_auxiliary_task`:
``{key, display_name, description, defaults, plugin}``.
Triggers idempotent plugin discovery so callers can read the registry
before any explicit ``discover_plugins()`` call. Sorted by ``key`` for
deterministic ordering in pickers and tests.
"""
manager = _ensure_plugins_discovered()
return [manager._aux_tasks[k] for k in sorted(manager._aux_tasks)]
def get_plugin_toolsets() -> List[tuple]:
"""Return plugin toolsets as ``(key, label, description)`` tuples.

View file

@ -20,6 +20,7 @@ from typing import Any, Optional
from hermes_constants import get_hermes_home
from hermes_cli.config import cfg_get
from hermes_cli.secret_prompt import masked_secret_prompt
logger = logging.getLogger(__name__)
@ -76,22 +77,42 @@ def _plugins_dir() -> Path:
return plugins
def _sanitize_plugin_name(name: str, plugins_dir: Path) -> Path:
def _sanitize_plugin_name(
name: str,
plugins_dir: Path,
*,
allow_subdir: bool = False,
) -> Path:
"""Validate a plugin name and return the safe target path inside *plugins_dir*.
Raises ``ValueError`` if the name contains path-traversal sequences or would
resolve outside the plugins directory.
``allow_subdir=True`` permits a single forward slash inside *name* so
category-namespaced plugin keys like ``observability/langfuse`` or
``image_gen/openai`` (the registry keys emitted by ``_discover_all_plugins``)
can be looked up. ``..`` and backslash are still rejected, leading and
trailing slashes are stripped, and the resolved target must still live
inside *plugins_dir*. Install paths leave this at the default ``False``
because a freshly-cloned plugin always lands top-level under
``~/.hermes/plugins/<name>/``.
"""
if not name:
raise ValueError("Plugin name must not be empty.")
if allow_subdir:
name = name.strip("/")
if not name:
raise ValueError("Plugin name must not be empty.")
if name in {".", ".."}:
raise ValueError(
f"Invalid plugin name '{name}': must not reference the plugins directory itself."
)
# Reject obvious traversal characters
for bad in ("/", "\\", ".."):
bad_chars = ("\\", "..") if allow_subdir else ("/", "\\", "..")
for bad in bad_chars:
if bad in name:
raise ValueError(f"Invalid plugin name '{name}': must not contain '{bad}'.")
@ -267,8 +288,7 @@ def _prompt_plugin_env_vars(manifest: dict, console) -> None:
try:
if secret:
import getpass
value = getpass.getpass(f" {name}: ").strip()
value = masked_secret_prompt(f" {name}: ").strip()
else:
value = input(f" {name}: ").strip()
except (EOFError, KeyboardInterrupt):
@ -326,7 +346,7 @@ def _display_removed(name: str, plugins_dir: Path) -> None:
def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path:
"""Return the plugin path if it exists, or exit with an error listing installed plugins."""
target = _sanitize_plugin_name(name, plugins_dir)
target = _sanitize_plugin_name(name, plugins_dir, allow_subdir=True)
if not target.exists():
installed = ", ".join(d.name for d in plugins_dir.iterdir() if d.is_dir()) or "(none)"
console.print(
@ -1051,7 +1071,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
curses.init_pair(1, curses.COLOR_GREEN, -1)
curses.init_pair(2, curses.COLOR_YELLOW, -1)
curses.init_pair(3, curses.COLOR_CYAN, -1)
curses.init_pair(4, 8, -1) # dim gray
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim gray
cursor = 0
scroll_offset = 0
@ -1196,7 +1216,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
curses.init_pair(1, curses.COLOR_GREEN, -1)
curses.init_pair(2, curses.COLOR_YELLOW, -1)
curses.init_pair(3, curses.COLOR_CYAN, -1)
curses.init_pair(4, 8, -1)
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
curses.curs_set(0)
elif key in {curses.KEY_ENTER, 10, 13}:
if cursor < n_plugins:
@ -1228,7 +1248,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
curses.init_pair(1, curses.COLOR_GREEN, -1)
curses.init_pair(2, curses.COLOR_YELLOW, -1)
curses.init_pair(3, curses.COLOR_CYAN, -1)
curses.init_pair(4, 8, -1)
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
curses.curs_set(0)
elif key in {27, ord("q")}:
# Save plugin changes on exit
@ -1508,7 +1528,7 @@ def _user_installed_plugin_dir(name: str) -> Optional[Path]:
"""Resolved path under ``~/.hermes/plugins/<name>`` if it exists."""
plugins_dir = _plugins_dir()
try:
target = _sanitize_plugin_name(name, plugins_dir)
target = _sanitize_plugin_name(name, plugins_dir, allow_subdir=True)
except ValueError:
return None
return target if target.is_dir() else None

219
hermes_cli/portal_cli.py Normal file
View file

@ -0,0 +1,219 @@
"""``hermes portal`` — small CLI surface for Nous Portal users.
Subcommands:
status Show Portal auth state + which Tool Gateway tools are routed.
open Open the Portal subscription page in the user's default browser.
tools List Tool Gateway tools and which are active in the current config.
This command is intentionally minimal it does not duplicate functionality
already in ``hermes auth`` or ``hermes tools``. It's a discovery + status
surface for the Portal subscription itself.
"""
from __future__ import annotations
import sys
import webbrowser
from typing import Optional
from hermes_cli.colors import Colors, color
from hermes_cli.config import load_config
DEFAULT_PORTAL_URL = "https://portal.nousresearch.com"
SUBSCRIPTION_URL = "https://portal.nousresearch.com/manage-subscription"
DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway"
def _nous_portal_base_url() -> str:
"""Resolve the Portal base URL from auth state or default."""
try:
from hermes_cli.auth import get_nous_auth_status
status = get_nous_auth_status() or {}
url = status.get("portal_base_url")
if isinstance(url, str) and url.strip():
return url.rstrip("/")
except Exception:
pass
return DEFAULT_PORTAL_URL
def _cmd_status(args) -> int:
"""Show Portal auth + Tool Gateway routing summary."""
from hermes_cli.auth import get_nous_auth_status
from hermes_cli.nous_subscription import get_nous_subscription_features
config = load_config() or {}
try:
auth = get_nous_auth_status() or {}
except Exception:
auth = {}
logged_in = bool(auth.get("logged_in"))
print()
print(color(" Nous Portal", Colors.MAGENTA))
print(color(" ───────────", Colors.MAGENTA))
if logged_in:
portal = auth.get("portal_base_url") or DEFAULT_PORTAL_URL
print(f" Auth: {color('✓ logged in', Colors.GREEN)}")
print(f" Portal: {portal}")
inference = auth.get("inference_base_url")
if inference:
print(f" API: {inference}")
else:
print(f" Auth: {color('not logged in', Colors.YELLOW)}")
print(f" Sign up: {SUBSCRIPTION_URL}")
print(f" Login: hermes auth add nous --type oauth")
# Provider selection (independent of auth)
model_cfg = config.get("model") if isinstance(config.get("model"), dict) else {}
provider = str(model_cfg.get("provider") or "").strip().lower()
if provider == "nous":
print(f" Model: {color('✓ using Nous as inference provider', Colors.GREEN)}")
elif provider:
print(f" Model: currently {provider} (switch with `hermes model`)")
# Tool Gateway routing
print()
print(color(" Tool Gateway", Colors.MAGENTA))
print(color(" ────────────", Colors.MAGENTA))
try:
features = get_nous_subscription_features(config)
except Exception:
features = None
if features is None:
print(" (could not resolve subscription state)")
return 0
rows = []
for feat in features.items():
if feat.managed_by_nous:
state = color("via Nous Portal", Colors.GREEN)
elif feat.active and feat.current_provider:
state = feat.current_provider
elif feat.active:
state = "active"
else:
state = color("not configured", Colors.DIM)
rows.append((feat.label, state))
width = max((len(r[0]) for r in rows), default=0)
for label, state in rows:
print(f" {label:<{width}} {state}")
if not logged_in:
print()
print(color(f" Docs: {DOCS_URL}", Colors.DIM))
return 0
def _cmd_open(args) -> int:
"""Open the Portal subscription page in the default browser."""
target = SUBSCRIPTION_URL
print(f"Opening {target}")
try:
opened = webbrowser.open(target)
except Exception:
opened = False
if not opened:
print()
print("Could not launch a browser. Visit the URL above manually.")
return 1
return 0
def _cmd_tools(args) -> int:
"""List the Tool Gateway catalog + current routing."""
from hermes_cli.nous_subscription import get_nous_subscription_features
config = load_config() or {}
try:
features = get_nous_subscription_features(config)
except Exception:
print("Could not resolve Tool Gateway state.", file=sys.stderr)
return 1
# Static catalog — the partners Tool Gateway routes to today.
catalog = [
("web", "Web search & extract", "Firecrawl"),
("image_gen", "Image generation", "FAL"),
("tts", "Text-to-speech", "OpenAI TTS"),
("browser", "Browser automation", "Browser Use"),
("modal", "Cloud terminal", "Modal"),
]
print()
print(color(" Tool Gateway catalog", Colors.MAGENTA))
print(color(" ────────────────────", Colors.MAGENTA))
if not features.nous_auth_present:
print(color(" Not logged into Nous Portal — sign in with `hermes auth add nous --type oauth`.", Colors.YELLOW))
print()
label_width = max(len(label) for _, label, _ in catalog)
for key, label, partner in catalog:
feat = features.features.get(key)
if feat is None:
state = color("unknown", Colors.DIM)
elif feat.managed_by_nous:
state = color("✓ via Nous Portal", Colors.GREEN)
elif feat.active and feat.current_provider:
state = feat.current_provider
elif feat.active:
state = "active"
else:
state = color("not configured", Colors.DIM)
print(f" {label:<{label_width}} partner: {partner:<14} {state}")
print()
print(color(f" Manage your subscription: {SUBSCRIPTION_URL}", Colors.DIM))
print(color(f" Docs: {DOCS_URL}", Colors.DIM))
return 0
def portal_command(args) -> int:
"""Top-level dispatch for `hermes portal <subcommand>`."""
sub = getattr(args, "portal_command", None)
if sub in {None, ""}:
# Default to status — matches gh / kubectl conventions where the
# subcommand-less form gives a useful overview.
return _cmd_status(args)
if sub == "status":
return _cmd_status(args)
if sub == "open":
return _cmd_open(args)
if sub == "tools":
return _cmd_tools(args)
print(f"Unknown portal subcommand: {sub}", file=sys.stderr)
print("Run `hermes portal -h` for usage.", file=sys.stderr)
return 1
def add_parser(subparsers) -> None:
"""Register `hermes portal` on the given argparse subparsers object."""
portal_parser = subparsers.add_parser(
"portal",
help="Nous Portal status, subscription, and Tool Gateway routing",
description=(
"Inspect Nous Portal auth, Tool Gateway routing, and open the "
"Portal subscription page. Subcommands: status (default), "
"open, tools."
),
)
portal_sub = portal_parser.add_subparsers(dest="portal_command")
portal_sub.add_parser(
"status",
help="Show Portal auth + Tool Gateway routing summary (default)",
)
portal_sub.add_parser(
"open",
help="Open the Portal subscription page in your default browser",
)
portal_sub.add_parser(
"tools",
help="List Tool Gateway tools and which are routed via Nous",
)
portal_parser.set_defaults(func=portal_command)

View file

@ -35,6 +35,7 @@ from pathlib import Path
from typing import Optional
from hermes_cli import profiles as profiles_mod
from agent.skill_utils import is_excluded_skill_path
logger = logging.getLogger(__name__)
@ -109,8 +110,7 @@ def _collect_skills(profile_dir: Path) -> list[str]:
return []
names: list[str] = []
for md in skills_dir.rglob("SKILL.md"):
path_str = str(md)
if "/.hub/" in path_str or "/.git/" in path_str:
if is_excluded_skill_path(md):
continue
try:
rel = md.relative_to(skills_dir)
@ -201,7 +201,7 @@ def describe_profile(
skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)"
skill_count = sum(
1 for _ in (profile_dir / "skills").rglob("SKILL.md")
if "/.hub/" not in str(_) and "/.git/" not in str(_)
if not is_excluded_skill_path(_)
) if (profile_dir / "skills").is_dir() else 0
# Read model + provider from the profile's config.

View file

@ -70,6 +70,8 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from agent.skill_utils import is_excluded_skill_path
# ---------------------------------------------------------------------------
# Constants
@ -430,6 +432,20 @@ def _stage_source(source: str, workdir: Path) -> Tuple[Path, str]:
)
def _reject_distribution_symlinks(staged: Path) -> None:
"""Reject symlinks before reading or copying distribution files."""
for entry in staged.rglob("*"):
if not entry.is_symlink():
continue
try:
rel = entry.relative_to(staged)
except ValueError:
rel = entry
raise DistributionError(
f"Profile distributions cannot contain symlinks: {rel}"
)
# ---------------------------------------------------------------------------
# Install
# ---------------------------------------------------------------------------
@ -463,7 +479,9 @@ def _count_skills(staged: Path) -> int:
skills_dir = staged / "skills"
if not skills_dir.is_dir():
return 0
return sum(1 for _ in skills_dir.rglob("SKILL.md"))
return sum(
1 for p in skills_dir.rglob("SKILL.md") if not is_excluded_skill_path(p)
)
def plan_install(
@ -480,6 +498,7 @@ def plan_install(
from hermes_cli import __version__ as hermes_version
staged, provenance = _stage_source(source, workdir)
_reject_distribution_symlinks(staged)
manifest = read_manifest(staged)
if manifest is None:
raise DistributionError(

View file

@ -30,6 +30,8 @@ from dataclasses import dataclass
from pathlib import Path, PurePosixPath, PureWindowsPath
from typing import List, Optional
from agent.skill_utils import is_excluded_skill_path
_PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
# Directories bootstrapped inside every new profile
@ -485,8 +487,9 @@ def _count_skills(profile_dir: Path) -> int:
return 0
count = 0
for md in skills_dir.rglob("SKILL.md"):
if "/.hub/" not in str(md) and "/.git/" not in str(md):
count += 1
if is_excluded_skill_path(md):
continue
count += 1
return count
@ -720,7 +723,17 @@ def create_profile(
for filename in _CLONE_CONFIG_FILES:
src = source_dir / filename
if src.exists():
shutil.copy2(src, profile_dir / filename)
dst = profile_dir / filename
shutil.copy2(src, dst)
# Tighten .env to owner-only after copy. shutil.copy2
# preserves source mode bits, but if the source's .env
# was loose (host umask 0o022 leaving 0o644), tighten
# explicitly so the clone doesn't inherit weak perms.
if filename == ".env":
try:
os.chmod(str(dst), 0o600)
except OSError:
pass
# Clone installed skills from the source profile. The dashboard's
# "clone from default" flow is expected to preserve both bundled
@ -774,6 +787,14 @@ def create_profile(
except Exception:
pass # non-fatal — user can describe later with `hermes profile describe`
# Phase 4: when running inside a container under s6, register the
# new profile's gateway as a runtime s6 service so
# `hermes -p <profile> gateway start` can supervise it via
# `s6-svc -u` instead of spawning a bare process. On host (systemd
# / launchd / windows) this is a no-op — the existing per-profile
# unit-generation paths handle gateway lifecycle.
_maybe_register_gateway_service(canon)
return profile_dir
@ -890,6 +911,10 @@ def delete_profile(name: str, yes: bool = False) -> Path:
# 1. Disable service (prevents auto-restart)
_cleanup_gateway_service(canon, profile_dir)
# 1b. Phase 4: unregister the s6 service slot (container path).
# On host this is a no-op; on container it removes
# /run/service/gateway-<profile>/ so s6-supervise drops it.
_maybe_unregister_gateway_service(canon)
# 2. Stop running gateway
if gw_running:
@ -902,7 +927,49 @@ def delete_profile(name: str, yes: bool = False) -> Path:
# 4. Remove profile directory
try:
shutil.rmtree(profile_dir)
def _make_writable(func, path, exc):
"""onexc/onerror handler: add +w on PermissionError so rmtree can proceed.
Handles two cases on NixOS (and other systems with read-only
copies from immutable stores):
1. The path itself isn't writable (e.g. a file with mode 0444)
2. The *parent* directory isn't writable (e.g. mode 0555)
Compatible with both the ``onexc`` API (3.12+, receives an
exception instance) and the ``onerror`` API (3.11-, receives
``sys.exc_info()`` tuple).
"""
import stat as _stat
import sys as _sys
# Normalise the two callback signatures:
# onexc(func, path, exc_instance) — 3.12+
# onerror(func, path, exc_info_tuple) — 3.11
if isinstance(exc, tuple):
exc = exc[1] # exc_info → actual exception object
if isinstance(exc, PermissionError):
# Make the path writable
try:
os.chmod(path, os.stat(path).st_mode | _stat.S_IWUSR)
except OSError:
pass
# Also make the parent writable (needed for unlink/rmdir)
parent = os.path.dirname(path)
if parent:
try:
os.chmod(parent, os.stat(parent).st_mode | _stat.S_IWUSR)
except OSError:
pass
func(path)
else:
raise
# ``onexc`` was added in 3.12; fall back to ``onerror`` on 3.11.
try:
shutil.rmtree(profile_dir, onexc=_make_writable)
except TypeError:
shutil.rmtree(profile_dir, onerror=_make_writable)
print(f"✓ Removed {profile_dir}")
except Exception as e:
print(f"⚠ Could not remove {profile_dir}: {e}")
@ -920,6 +987,87 @@ def delete_profile(name: str, yes: bool = False) -> Path:
return profile_dir
def _maybe_register_gateway_service(profile_name: str) -> None:
"""Register a profile's gateway with s6 inside the container.
No-op on host (systemd/launchd/windows) those backends raise
``NotImplementedError`` on ``register_profile_gateway`` and the
existing per-profile unit-generation paths handle lifecycle.
Best-effort: any error (no backend detected, s6 not yet ready,
etc.) is logged and swallowed so profile creation doesn't fail
because the s6 supervision tree is in a weird state. The user
can re-register manually later via the gateway start command,
which goes through the same dispatch path.
Port selection is governed by the profile's ``config.yaml``
(``[gateway] port = ``) there is no Python-side allocator
(PR #30136 review item I5 retired the SHA-256-derived range
[9200, 9800) because it was dead code through the entire stack).
Host short-circuit: check ``detect_service_manager()`` first and
return immediately if it isn't ``"s6"``. This keeps host
(systemd/launchd/windows) profile creation completely silent
no ``get_service_manager()`` call, no exception path, no chance
of the `` Could not register s6 gateway service`` warning ever
rendering on a non-container machine. The earlier
``supports_runtime_registration()`` check still catches the case
where detection somehow returns ``"s6"`` but the backend isn't
actually the S6 one.
"""
try:
from hermes_cli.service_manager import detect_service_manager
if detect_service_manager() != "s6":
return # host path — silent, no registration needed
from hermes_cli.service_manager import get_service_manager
mgr = get_service_manager()
except RuntimeError:
return # no backend on this host — nothing to do
except Exception:
# Defensive: detect_service_manager failed for some other
# reason. Stay silent on host rather than printing a confusing
# s6 warning to users who have never touched the container.
return
if not mgr.supports_runtime_registration():
return # host backend; no-op
try:
mgr.register_profile_gateway(profile_name)
except ValueError:
# Already registered (e.g. the container-boot reconciler ran
# first and brought up a stale slot). That's fine.
pass
except Exception as exc:
# Don't fail profile create over a supervision-tree hiccup.
print(f"⚠ Could not register s6 gateway service: {exc}")
def _maybe_unregister_gateway_service(profile_name: str) -> None:
"""Tear down a profile's s6 gateway service inside the container.
No-op on host. Idempotent: absent services are silently skipped
by ``unregister_profile_gateway``.
Same host short-circuit as :func:`_maybe_register_gateway_service`
see that docstring.
"""
try:
from hermes_cli.service_manager import detect_service_manager
if detect_service_manager() != "s6":
return # host path — silent
from hermes_cli.service_manager import get_service_manager
mgr = get_service_manager()
except RuntimeError:
return
except Exception:
return
if not mgr.supports_runtime_registration():
return
try:
mgr.unregister_profile_gateway(profile_name)
except Exception as exc:
print(f"⚠ Could not unregister s6 gateway service: {exc}")
def _cleanup_gateway_service(name: str, profile_dir: Path) -> None:
"""Disable and remove systemd/launchd service for a profile."""
import platform as _platform

View file

@ -60,6 +60,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
auth_type="oauth_external",
base_url_override="https://chatgpt.com/backend-api/codex",
),
"openai-api": HermesOverlay(
transport="codex_responses",
base_url_override="https://api.openai.com/v1",
base_url_env_var="OPENAI_BASE_URL",
),
"xai-oauth": HermesOverlay(
transport="codex_responses",
auth_type="oauth_external",
@ -381,6 +386,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"local": "Local endpoint",
"bedrock": "AWS Bedrock",
"ollama-cloud": "Ollama Cloud",
"xai-oauth": "xAI Grok OAuth (SuperGrok / Premium+)",
}

View file

@ -27,6 +27,7 @@ from hermes_cli.auth import (
_quarantine_nous_oauth_state,
_quarantine_nous_pool_entries,
_save_auth_store,
_validate_nous_inference_url_from_network,
_write_shared_nous_state,
resolve_nous_runtime_credentials,
)
@ -103,7 +104,7 @@ class NousPortalAdapter(UpstreamAdapter):
state = self._read_state()
if state is None:
raise RuntimeError(
"Not logged into Nous Portal. Run `hermes login nous` first."
"Not logged into Nous Portal. Run `hermes auth add nous` first."
)
try:
@ -134,10 +135,13 @@ class NousPortalAdapter(UpstreamAdapter):
if not agent_key:
raise RuntimeError(
"Nous Portal refresh did not return a usable agent_key. "
"Try `hermes login nous` to re-authenticate."
"Try `hermes auth add nous` to re-authenticate."
)
base_url = refreshed.get("base_url") or DEFAULT_NOUS_INFERENCE_URL
base_url = (
_validate_nous_inference_url_from_network(refreshed.get("base_url"))
or DEFAULT_NOUS_INFERENCE_URL
)
base_url = base_url.rstrip("/")
return UpstreamCredential(

View file

@ -44,7 +44,7 @@ def cmd_proxy_start(args: Any) -> int:
return 2
if not adapter.is_authenticated():
auth_hint = getattr(adapter, "auth_hint", f"hermes login {adapter.name}")
auth_hint = getattr(adapter, "auth_hint", f"hermes auth add {adapter.name}")
print(
f"Not logged into {adapter.display_name}. "
f"Run `{auth_hint}` first.",

View file

@ -100,6 +100,63 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
return None
def _host_derived_api_key(base_url: str) -> str:
"""Look up `<VENDOR>_API_KEY` in the env, derived from the base URL host.
Examples:
https://api.deepseek.com/v1 DEEPSEEK_API_KEY
https://api.groq.com/openai/v1 GROQ_API_KEY
https://api.mistral.ai/v1 MISTRAL_API_KEY
https://generativelanguage.googleapis.com/v1beta/openai/ GOOGLEAPIS_API_KEY
Returns the env value (stripped) or "". Never returns env vars whose names
are already explicitly checked elsewhere those are handled by their own
host-gated paths (OPENAI/OPENROUTER/OLLAMA).
The vendor label is the *registrable* portion of the hostname: strip
``api.`` / ``www.`` prefixes, then take the second-to-last label
(``api.deepseek.com`` ``deepseek``). Falls back to "" for hostnames
that don't yield a usable vendor label (IPs, loopback, single-label
hosts).
"""
hostname = base_url_hostname(base_url)
if not hostname:
return ""
# Reject IPv4 / IPv6 / loopback — no meaningful vendor label.
if any(ch.isdigit() for ch in hostname.split(".")[-1]):
# Last label starts with a digit → likely IP. (TLDs are never numeric.)
return ""
if hostname in ("localhost",) or ":" in hostname:
return ""
labels = [lbl for lbl in hostname.split(".") if lbl]
# Strip common API/CDN prefixes.
while labels and labels[0] in ("api", "www"):
labels.pop(0)
if len(labels) < 2:
return ""
# Take the *registrable* label (second-to-last). For typical provider
# hosts this is what users intuitively call "the vendor":
# deepseek.com → labels[-2] = "deepseek" ✓
# api.groq.com → groq.com → labels[-2] = "groq" ✓
# api.mistral.ai → labels[-2] = "mistral" ✓
# Crucially, lookalike hosts pick the ATTACKER's label, not the spoofed
# vendor:
# api.deepseek.com.attacker.test → labels[-2] = "attacker"
# so DEEPSEEK_API_KEY stays put and the chain falls through to
# no-key-required. This mirrors how `base_url_host_matches` resists the
# same lookalike attack for explicit hosts.
vendor = labels[-2]
# Sanitize to env var charset: A-Z, 0-9, underscore.
sanitized = "".join(ch if ch.isalnum() else "_" for ch in vendor).upper()
if not sanitized or not sanitized[0].isalpha():
return ""
# Don't re-derive env vars already handled by explicit host-gated paths.
if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"):
return ""
env_name = f"{sanitized}_API_KEY"
return (os.getenv(env_name, "") or "").strip()
def _auto_detect_local_model(base_url: str) -> str:
"""Query a local server for its model name when only one model is loaded."""
if not base_url:
@ -471,6 +528,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
"api_key": resolved_api_key,
"model": entry.get("default_model", ""),
}
extra_body = entry.get("extra_body")
if isinstance(extra_body, dict):
result["extra_body"] = dict(extra_body)
# The v11→v12 migration writes the API mode under the new
# ``transport`` field, but hand-edited configs may still
# use the legacy ``api_mode`` spelling. Accept both —
@ -496,6 +556,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
"api_key": resolved_api_key,
"model": entry.get("default_model", ""),
}
extra_body = entry.get("extra_body")
if isinstance(extra_body, dict):
result["extra_body"] = dict(extra_body)
api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport"))
if api_mode:
result["api_mode"] = api_mode
@ -539,6 +602,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
result["key_env"] = key_env
if provider_key:
result["provider_key"] = provider_key
extra_body = entry.get("extra_body")
if isinstance(extra_body, dict):
result["extra_body"] = dict(extra_body)
api_mode = _parse_api_mode(entry.get("api_mode"))
if api_mode:
result["api_mode"] = api_mode
@ -550,6 +616,13 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
return None
def _custom_provider_request_overrides(custom_provider: Dict[str, Any]) -> Dict[str, Any]:
extra_body = custom_provider.get("extra_body")
if not isinstance(extra_body, dict) or not extra_body:
return {}
return {"extra_body": dict(extra_body)}
def _resolve_named_custom_runtime(
*,
requested_provider: str,
@ -582,10 +655,17 @@ def _resolve_named_custom_runtime(
if pool_result:
pool_result["source"] = "direct-alias"
return pool_result
_da_is_openai_url = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
_da_is_openrouter = base_url_host_matches(base_url, "openrouter.ai")
api_key_candidates = [
(explicit_api_key or "").strip(),
os.getenv("OPENAI_API_KEY", "").strip(),
os.getenv("OPENROUTER_API_KEY", "").strip(),
# Gate env key fallbacks on authoritative hosts (#28660)
(os.getenv("OPENAI_API_KEY", "").strip() if _da_is_openai_url else ""),
(os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter else ""),
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
# intuitive match without configuring `custom_providers` first.
_host_derived_api_key(base_url),
]
api_key = next(
(c for c in api_key_candidates if has_usable_secret(c)),
@ -619,14 +699,27 @@ def _resolve_named_custom_runtime(
model_name = custom_provider.get("model")
if model_name:
pool_result["model"] = model_name
request_overrides = _custom_provider_request_overrides(custom_provider)
if request_overrides:
pool_result["request_overrides"] = {
**dict(pool_result.get("request_overrides") or {}),
**request_overrides,
}
return pool_result
_cp_is_openai_url = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
_cp_is_openrouter = base_url_host_matches(base_url, "openrouter.ai")
api_key_candidates = [
(explicit_api_key or "").strip(),
str(custom_provider.get("api_key", "") or "").strip(),
os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
os.getenv("OPENAI_API_KEY", "").strip(),
os.getenv("OPENROUTER_API_KEY", "").strip(),
# Gate provider env keys on their authoritative hosts — sending
# OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660).
(os.getenv("OPENAI_API_KEY", "").strip() if _cp_is_openai_url else ""),
(os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter else ""),
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host as a final
# fallback when key_env wasn't set explicitly.
_host_derived_api_key(base_url),
]
api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "")
@ -643,6 +736,9 @@ def _resolve_named_custom_runtime(
# provider name differs from the actual model string the API expects.
if custom_provider.get("model"):
result["model"] = custom_provider["model"]
request_overrides = _custom_provider_request_overrides(custom_provider)
if request_overrides:
result["request_overrides"] = request_overrides
return result
@ -707,7 +803,15 @@ def _resolve_openrouter_runtime(
# OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
# provider (issues #420, #560).
_is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
if _is_openrouter_url:
# Also treat explicitly-configured OpenRouter mirrors/proxies as OpenRouter
# for key selection — if the user set OPENROUTER_BASE_URL or requested
# provider=openrouter explicitly, OPENROUTER_API_KEY should still be used.
_is_openrouter_context = _is_openrouter_url or (
requested_norm == "openrouter"
and (env_openrouter_base_url or base_url == env_openrouter_base_url)
and base_url == (env_openrouter_base_url or "").rstrip("/")
)
if _is_openrouter_context:
api_key_candidates = [
explicit_api_key,
os.getenv("OPENROUTER_API_KEY"),
@ -721,13 +825,24 @@ def _resolve_openrouter_runtime(
# "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
# hostname is a look-alike (ollama.com.attacker.test) must not
# receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
_is_openai_url = base_url_host_matches(base_url, "openai.com")
_is_openai_azure = base_url_host_matches(base_url, "openai.azure.com")
# Gate each provider key on its own host — sending OPENAI_API_KEY or
# OPENROUTER_API_KEY to an unrelated custom endpoint (DeepSeek, Groq,
# Mistral, …) leaks credentials and causes 401s (issue #28660).
# Mirrors the OLLAMA_API_KEY host-gate added in GHSA-76xc-57q6-vm5m.
api_key_candidates = [
explicit_api_key,
(cfg_api_key if use_config_base_url else ""),
(os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
os.getenv("OPENAI_API_KEY"),
os.getenv("OPENROUTER_API_KEY"),
(os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
(os.getenv("OPENAI_API_KEY") if (_is_openai_url or _is_openai_azure) else ""),
(os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url else ""),
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
# intuitive match. Helper returns "" for IPs/loopback and for env
# vars already handled by the explicit host-gated paths above.
_host_derived_api_key(base_url),
]
api_key = next(
(str(candidate or "").strip() for candidate in api_key_candidates if has_usable_secret(candidate)),

126
hermes_cli/secret_prompt.py Normal file
View file

@ -0,0 +1,126 @@
"""Secret input prompts with masked typing feedback."""
from __future__ import annotations
import getpass
import os
import sys
from collections.abc import Callable
_BACKSPACE_CHARS = {"\b", "\x7f"}
_ENTER_CHARS = {"\r", "\n"}
_EOF_CHARS = {"\x04", "\x1a"}
def _collect_masked_input(
read_char: Callable[[], str],
write: Callable[[str], object],
prompt: str,
*,
mask: str = "*",
) -> str:
"""Read one secret line while writing a mask character per typed char."""
value: list[str] = []
write(prompt)
while True:
ch = read_char()
if ch == "":
write("\n")
raise EOFError
if ch in _ENTER_CHARS:
write("\n")
return "".join(value)
if ch == "\x03":
write("\n")
raise KeyboardInterrupt
if ch in _EOF_CHARS:
write("\n")
raise EOFError
if ch in _BACKSPACE_CHARS:
if value:
value.pop()
write("\b \b")
continue
if ch == "\x1b":
# Ignore escape itself. Terminals commonly send escape-prefixed
# navigation/delete sequences; they should not become secret text.
continue
value.append(ch)
if mask:
write(mask)
def masked_secret_prompt(prompt: str, *, mask: str = "*") -> str:
"""Prompt for a secret while showing masked typing feedback.
Falls back to ``getpass.getpass`` when stdin/stdout are not interactive or
when raw terminal handling is unavailable.
"""
stdin = sys.stdin
stdout = sys.stdout
if not _stream_is_tty(stdin) or not _stream_is_tty(stdout):
return getpass.getpass(prompt)
if os.name == "nt":
try:
return _masked_secret_prompt_windows(prompt, mask=mask)
except (KeyboardInterrupt, EOFError):
raise
except Exception:
return getpass.getpass(prompt)
try:
return _masked_secret_prompt_posix(prompt, mask=mask)
except (KeyboardInterrupt, EOFError):
raise
except Exception:
return getpass.getpass(prompt)
def _stream_is_tty(stream) -> bool:
try:
return bool(stream.isatty())
except Exception:
return False
def _masked_secret_prompt_windows(prompt: str, *, mask: str) -> str:
import msvcrt
def read_char() -> str:
ch = msvcrt.getwch()
if ch in {"\x00", "\xe0"}:
msvcrt.getwch()
return "\x1b"
return ch
def write(text: str) -> None:
sys.stdout.write(text)
sys.stdout.flush()
return _collect_masked_input(read_char, write, prompt, mask=mask)
def _masked_secret_prompt_posix(prompt: str, *, mask: str) -> str:
import termios
import tty
fd = sys.stdin.fileno()
old_attrs = termios.tcgetattr(fd)
def read_char() -> str:
return sys.stdin.read(1)
def write(text: str) -> None:
sys.stdout.write(text)
sys.stdout.flush()
try:
tty.setraw(fd)
return _collect_masked_input(read_char, write, prompt, mask=mask)
finally:
termios.tcsetattr(fd, termios.TCSADRAIN, old_attrs)

577
hermes_cli/secrets_cli.py Normal file
View file

@ -0,0 +1,577 @@
"""CLI handlers for ``hermes secrets bitwarden ...``.
Subcommands:
setup interactive wizard: install bws, prompt for token + project, test fetch
status show current config + binary version + last fetch outcome
sync run a fetch right now and show what would be applied (dry-run friendly)
disable flip ``secrets.bitwarden.enabled`` to False
install just download the bws binary (no token / project required)
"""
from __future__ import annotations
import argparse
import json
import os
import subprocess
import sys
from pathlib import Path
from typing import List, Optional, Tuple
from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from agent.secret_sources import bitwarden as bw
from hermes_cli.config import (
get_env_path,
load_config,
save_config,
save_env_value,
)
from hermes_cli.secret_prompt import masked_secret_prompt
# ---------------------------------------------------------------------------
# Argparse wiring — called from hermes_cli.main
# ---------------------------------------------------------------------------
def register_cli(parent_parser: argparse.ArgumentParser) -> None:
"""Attach the ``bitwarden`` subcommand tree to a parent parser.
Called from ``hermes_cli.main`` as part of building the top-level
``hermes secrets`` parser.
"""
sub = parent_parser.add_subparsers(dest="secrets_bw_command")
setup = sub.add_parser(
"setup",
help="Interactive wizard: install bws, store access token, pick project",
)
setup.add_argument(
"--project-id",
help="Pre-select a project UUID instead of prompting",
)
setup.add_argument(
"--access-token",
help="Provide the access token non-interactively (will be stored in .env)",
)
setup.add_argument(
"--server-url",
help=(
"Bitwarden region / self-hosted endpoint. Examples: "
"https://vault.bitwarden.com (US, default), "
"https://vault.bitwarden.eu (EU), or your self-hosted URL. "
"Skips the interactive region prompt."
),
)
setup.set_defaults(func=cmd_setup)
status = sub.add_parser("status", help="Show config + binary + last fetch")
status.set_defaults(func=cmd_status)
sync = sub.add_parser("sync", help="Fetch secrets now and report what changed")
sync.add_argument(
"--apply",
action="store_true",
help="Actually export the secrets into the current shell's env (default: dry-run)",
)
sync.set_defaults(func=cmd_sync)
disable = sub.add_parser("disable", help="Turn off the Bitwarden integration")
disable.set_defaults(func=cmd_disable)
install = sub.add_parser(
"install",
help=f"Download and verify the pinned bws binary (v{bw._BWS_VERSION})",
)
install.add_argument(
"--force",
action="store_true",
help="Re-download even if a managed copy already exists",
)
install.set_defaults(func=cmd_install)
# ---------------------------------------------------------------------------
# Handlers
# ---------------------------------------------------------------------------
def cmd_setup(args: argparse.Namespace) -> int:
console = Console()
console.print(
Panel.fit(
"[bold]Bitwarden Secrets Manager setup[/bold]\n\n"
"Need an access token? In the Bitwarden web app:\n"
" Secrets Manager → Machine accounts → [your account] →\n"
" Access tokens → Create access token\n\n"
"Copy the token (starts with [cyan]0.[/cyan]…) — it cannot be retrieved later.",
border_style="cyan",
)
)
# ------------------------------------------------------------------ binary
console.print()
console.print("[bold]Step 1[/bold] Install the bws CLI")
try:
binary = bw.find_bws(install_if_missing=False)
if binary is None:
console.print(" No bws on PATH — downloading…")
binary = bw.install_bws()
version = _bws_version(binary)
console.print(f" [green]✓[/green] {binary} ({version})")
except Exception as exc: # noqa: BLE001
console.print(f" [red]✗ Could not install bws: {exc}[/red]")
console.print(
" Manual install: "
"https://github.com/bitwarden/sdk-sm/releases"
)
return 1
# ------------------------------------------------------------------- token
console.print()
console.print("[bold]Step 2[/bold] Provide your access token")
cfg = load_config()
secrets_cfg = (cfg.setdefault("secrets", {})
.setdefault("bitwarden", {}))
token_env = secrets_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
token = (args.access_token or "").strip()
if not token:
token = masked_secret_prompt(f" Paste access token ({token_env}): ").strip()
if not token:
console.print(" [red]Empty token, aborting.[/red]")
return 1
if not token.startswith("0."):
console.print(
" [yellow]Warning: token doesn't start with '0.' — usually that means "
"you pasted something other than a BSM access token. Continuing anyway.[/yellow]"
)
save_env_value(token_env, token)
os.environ[token_env] = token # so the test fetch below sees it
console.print(f" [green]✓[/green] stored in {get_env_path()} as {token_env}")
# ------------------------------------------------------------------ region
console.print()
console.print("[bold]Step 3[/bold] Pick a Bitwarden region")
server_url = _resolve_server_url(args, secrets_cfg, console)
if server_url is None:
return 1
if server_url:
console.print(f" [green]✓[/green] using {server_url}")
else:
console.print(
" [green]✓[/green] using bws default "
"(US Cloud, https://vault.bitwarden.com)"
)
# ------------------------------------------------------------------- project
if args.project_id and args.project_id.strip():
project_id = args.project_id.strip()
else:
console.print()
console.print("[bold]Step 4[/bold] Pick a project")
project_id = ""
projects = _list_projects(binary, token, console, server_url=server_url)
if projects is None:
return 1
if not projects:
console.print(" [yellow]No projects visible to this machine account.[/yellow]")
console.print(
" In the Bitwarden web app, open the machine account → Projects tab "
"and grant it access to at least one project."
)
return 1
table = Table(show_header=True, header_style="bold")
table.add_column("#", style="cyan", width=4)
table.add_column("Name")
table.add_column("ID", style="dim")
for i, p in enumerate(projects, 1):
table.add_row(str(i), p.get("name", "?"), p.get("id", "?"))
console.print(table)
while True:
choice = console.input(f" Select project [1-{len(projects)}]: ").strip()
if not choice:
continue
try:
idx = int(choice)
except ValueError:
console.print(" [red]Enter a number.[/red]")
continue
if 1 <= idx <= len(projects):
project_id = projects[idx - 1]["id"]
break
console.print(f" [red]Out of range — pick 1-{len(projects)}.[/red]")
# ------------------------------------------------------------------- test
console.print()
step_num = 5 if not (args.project_id and args.project_id.strip()) else 4
console.print(f"[bold]Step {step_num}[/bold] Test fetch")
try:
secrets, warnings = bw.fetch_bitwarden_secrets(
access_token=token,
project_id=project_id,
binary=binary,
use_cache=False,
server_url=server_url,
)
except Exception as exc: # noqa: BLE001
console.print(f" [red]✗ Fetch failed: {exc}[/red]")
return 1
if not secrets:
console.print(" [yellow]Fetch succeeded but the project has no secrets.[/yellow]")
else:
table = Table(show_header=True, header_style="bold")
table.add_column("Name", style="cyan")
table.add_column("Status")
for key in sorted(secrets):
if key == token_env:
status = "[dim]bootstrap token — never overrides itself[/dim]"
elif os.environ.get(key):
status = "[yellow]already set in env (will be overwritten)[/yellow]"
else:
status = "[green]new[/green]"
table.add_row(key, status)
console.print(table)
for w in warnings:
console.print(f" [yellow]warning:[/yellow] {w}")
# ------------------------------------------------------------------- save
secrets_cfg["enabled"] = True
secrets_cfg["project_id"] = project_id
secrets_cfg["server_url"] = server_url
secrets_cfg.setdefault("access_token_env", token_env)
secrets_cfg.setdefault("cache_ttl_seconds", 300)
secrets_cfg.setdefault("override_existing", True)
secrets_cfg.setdefault("auto_install", True)
save_config(cfg)
console.print()
console.print(
"[green]✓ Bitwarden Secrets Manager is enabled.[/green] "
"Secrets will be pulled at the start of every Hermes process."
)
console.print(
" Status: [cyan]hermes secrets bitwarden status[/cyan]\n"
" Refresh: [cyan]hermes secrets bitwarden sync[/cyan]\n"
" Disable: [cyan]hermes secrets bitwarden disable[/cyan]"
)
return 0
def cmd_status(args: argparse.Namespace) -> int:
console = Console()
cfg = load_config()
bw_cfg = (cfg.get("secrets") or {}).get("bitwarden") or {}
enabled = bool(bw_cfg.get("enabled"))
token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
project_id = bw_cfg.get("project_id", "")
server_url = str(bw_cfg.get("server_url", "") or "").strip()
token_set = bool(os.environ.get(token_env))
table = Table(show_header=False, box=None, padding=(0, 2))
table.add_column("", style="bold")
table.add_column("")
table.add_row("Enabled", _yn(enabled))
table.add_row("Token env var", token_env)
table.add_row("Token in env", _yn(token_set))
table.add_row("Project ID", project_id or "[dim](unset)[/dim]")
table.add_row(
"Server URL",
server_url or "[dim]default (US Cloud, https://vault.bitwarden.com)[/dim]",
)
table.add_row("Override existing", _yn(bool(bw_cfg.get("override_existing", False))))
table.add_row("Cache TTL (s)", str(bw_cfg.get("cache_ttl_seconds", 300)))
table.add_row("Auto-install", _yn(bool(bw_cfg.get("auto_install", True))))
binary = bw.find_bws(install_if_missing=False)
if binary:
table.add_row("bws binary", f"{binary} ({_bws_version(binary)})")
else:
table.add_row("bws binary", "[yellow]not installed[/yellow]")
console.print(Panel(table, title="Bitwarden Secrets Manager", border_style="cyan"))
if not enabled:
console.print("\n Run [cyan]hermes secrets bitwarden setup[/cyan] to enable.")
return 0
if not token_set:
console.print(
f"\n [yellow]Enabled but {token_env} is not set — Hermes will skip BSM "
"and warn on next startup.[/yellow]"
)
if not project_id:
console.print(
"\n [yellow]Enabled but no project_id — nothing to fetch.[/yellow]"
)
return 0
def cmd_sync(args: argparse.Namespace) -> int:
console = Console()
cfg = load_config()
bw_cfg = (cfg.get("secrets") or {}).get("bitwarden") or {}
if not bw_cfg.get("enabled"):
console.print(
"[yellow]Bitwarden integration is disabled. Run "
"`hermes secrets bitwarden setup` first.[/yellow]"
)
return 1
token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
token = os.environ.get(token_env, "").strip()
if not token:
console.print(f"[red]{token_env} is not set.[/red]")
return 1
project_id = bw_cfg.get("project_id", "")
if not project_id:
console.print("[red]No project_id configured.[/red]")
return 1
server_url = str(bw_cfg.get("server_url", "") or "").strip()
try:
secrets, warnings = bw.fetch_bitwarden_secrets(
access_token=token,
project_id=project_id,
use_cache=False,
server_url=server_url,
)
except Exception as exc: # noqa: BLE001
console.print(f"[red]Fetch failed: {exc}[/red]")
return 1
if not secrets:
console.print("[yellow]No secrets in project.[/yellow]")
return 0
override = bool(bw_cfg.get("override_existing", False)) or args.apply
table = Table(show_header=True, header_style="bold")
table.add_column("Name", style="cyan")
table.add_column("Action")
applied = 0
for key in sorted(secrets):
if key == token_env:
table.add_row(key, "[dim]skip (bootstrap token)[/dim]")
continue
already = bool(os.environ.get(key))
if already and not override:
table.add_row(key, "[dim]skip (already set)[/dim]")
continue
if args.apply:
os.environ[key] = secrets[key]
applied += 1
table.add_row(key, "[green]exported[/green]" + (" (overrode)" if already else ""))
else:
table.add_row(key, "[green]would export[/green]" + (" (overrides)" if already else ""))
console.print(table)
for w in warnings:
console.print(f"[yellow]warning:[/yellow] {w}")
if not args.apply:
console.print(
"\n This was a dry-run — secrets are picked up automatically on the "
"next [cyan]hermes[/cyan] invocation. Re-run with [cyan]--apply[/cyan] "
"to export into the current shell instead."
)
else:
console.print(f"\n [green]Exported {applied} secret(s) into current process.[/green]")
return 0
def cmd_disable(args: argparse.Namespace) -> int:
console = Console()
cfg = load_config()
bw_cfg = (cfg.setdefault("secrets", {})
.setdefault("bitwarden", {}))
bw_cfg["enabled"] = False
save_config(cfg)
console.print(
"[green]Disabled.[/green] Bitwarden secrets will NOT be pulled on the next "
"Hermes invocation.\n"
" Your access token is left in .env — remove it manually if you also want "
"to revoke the credential."
)
return 0
def cmd_install(args: argparse.Namespace) -> int:
console = Console()
try:
path = bw.install_bws(force=bool(args.force))
console.print(f"[green]✓[/green] {path} ({_bws_version(path)})")
return 0
except Exception as exc: # noqa: BLE001
console.print(f"[red]Install failed: {exc}[/red]")
return 1
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _yn(b: bool) -> str:
return "[green]yes[/green]" if b else "[dim]no[/dim]"
def _bws_version(binary: Path) -> str:
try:
res = subprocess.run(
[str(binary), "--version"],
capture_output=True,
text=True,
timeout=5,
)
if res.returncode == 0:
return (res.stdout or res.stderr).strip().splitlines()[0]
except (OSError, subprocess.TimeoutExpired):
pass
return "version unknown"
def _list_projects(
binary: Path, token: str, console: Console, *, server_url: str = ""
) -> Optional[List[dict]]:
"""Call ``bws project list`` and return the parsed list, or None on failure."""
env = os.environ.copy()
env["BWS_ACCESS_TOKEN"] = token
env.setdefault("NO_COLOR", "1")
if server_url:
env["BWS_SERVER_URL"] = server_url
try:
res = subprocess.run(
[str(binary), "project", "list", "--output", "json"],
env=env,
capture_output=True,
text=True,
timeout=15,
)
except (OSError, subprocess.TimeoutExpired) as exc:
console.print(f" [red]Couldn't list projects: {exc}[/red]")
return None
if res.returncode != 0:
err = (res.stderr or res.stdout).strip()[:300]
console.print(f" [red]bws project list failed: {err}[/red]")
lowered = err.lower()
if "invalid_client" in lowered or "400 bad request" in lowered:
console.print(
" [yellow]'invalid_client' from the US identity endpoint usually "
"means the token is for a different Bitwarden region. Re-run "
"[cyan]hermes secrets bitwarden setup[/cyan] and pick EU or "
"self-hosted at the region prompt, or set [cyan]secrets.bitwarden."
"server_url[/cyan] in config.yaml.[/yellow]"
)
elif "authorization" in lowered or "invalid" in lowered:
console.print(
" [yellow]This usually means the access token is wrong or revoked. "
"Double-check it in the Bitwarden web app.[/yellow]"
)
return None
try:
data = json.loads(res.stdout or "[]")
except json.JSONDecodeError as exc:
console.print(f" [red]bws returned non-JSON: {exc}[/red]")
return None
if not isinstance(data, list):
return []
return [p for p in data if isinstance(p, dict) and p.get("id")]
# Canonical Bitwarden region endpoints. Keep in sync with what Bitwarden
# publishes — these are stable but if a third region appears, add it here
# and to the prompt below.
_REGION_PRESETS = [
("US Cloud (https://vault.bitwarden.com — bws default)", ""),
("EU Cloud (https://vault.bitwarden.eu)", "https://vault.bitwarden.eu"),
]
def _resolve_server_url(
args: argparse.Namespace,
secrets_cfg: dict,
console: Console,
) -> Optional[str]:
"""Pick a Bitwarden server URL for setup.
Resolution order:
1. ``--server-url`` CLI flag (non-interactive)
2. ``BWS_SERVER_URL`` env var (so users running with that already set
in their shell don't have to re-enter it)
3. Existing ``secrets.bitwarden.server_url`` value (for re-runs)
4. Interactive menu: US / EU / self-hosted
Returns the chosen URL as a string (empty string = bws default,
i.e. US Cloud). Returns None if the user aborted with an empty
custom URL.
"""
if args.server_url and args.server_url.strip():
return args.server_url.strip()
env_url = os.environ.get("BWS_SERVER_URL", "").strip()
if env_url:
console.print(
f" Detected [cyan]BWS_SERVER_URL[/cyan]={env_url} in your shell — using it."
)
return env_url
existing = str(secrets_cfg.get("server_url", "") or "").strip()
if existing:
console.print(
f" Existing config: [cyan]{existing}[/cyan]. "
"Press Enter to keep, or pick a different option below."
)
table = Table(show_header=True, header_style="bold", box=None, padding=(0, 2))
table.add_column("#", style="cyan", width=4)
table.add_column("Region / endpoint")
for i, (label, _url) in enumerate(_REGION_PRESETS, 1):
table.add_row(str(i), label)
table.add_row(str(len(_REGION_PRESETS) + 1), "Self-hosted / custom URL")
console.print(table)
custom_idx = len(_REGION_PRESETS) + 1
while True:
prompt = f" Select region [1-{custom_idx}]"
if existing:
prompt += " (Enter to keep current)"
prompt += ": "
choice = console.input(prompt).strip()
if not choice:
if existing:
return existing
console.print(" [red]Enter a number.[/red]")
continue
try:
idx = int(choice)
except ValueError:
console.print(" [red]Enter a number.[/red]")
continue
if 1 <= idx <= len(_REGION_PRESETS):
return _REGION_PRESETS[idx - 1][1]
if idx == custom_idx:
custom = console.input(
" Enter your Bitwarden server URL "
"(e.g. https://vault.example.com): "
).strip()
if not custom:
console.print(" [red]Empty URL, aborting.[/red]")
return None
if not custom.startswith(("http://", "https://")):
console.print(
" [yellow]Warning: URL doesn't start with http:// or "
"https:// — bws may reject it.[/yellow]"
)
return custom
console.print(f" [red]Out of range — pick 1-{custom_idx}.[/red]")

View file

@ -0,0 +1,576 @@
"""On-demand supply-chain audit for Hermes Agent installs.
Scans three surfaces a Hermes user actually controls and we can map to
upstream advisories without auth or extra binaries:
1. The Hermes venv (every PyPI dist via ``importlib.metadata``).
2. Python deps declared by user-installed plugins under ``~/.hermes/plugins``
(``requirements.txt`` + ``pyproject.toml`` best-effort pin extraction).
3. MCP servers wired in ``config.yaml`` whose ``command/args`` look like
``npx -y <pkg>@<ver>`` or ``uvx <pkg>==<ver>``.
Vulnerabilities are looked up against OSV.dev (``api.osv.dev/v1/querybatch``
+ ``/v1/vulns/{id}``). Single-shot, on-demand, never daily see the design
notes in ``references/security-disclosure-triage.md``.
Out of scope on purpose: global pip/npm, editor/browser extensions,
daily background scans, auto-blocking installs.
"""
from __future__ import annotations
import argparse
import concurrent.futures
import json
import re
import sys
import urllib.error
import urllib.request
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Iterable, Optional
from hermes_constants import get_hermes_home
OSV_BATCH_URL = "https://api.osv.dev/v1/querybatch"
OSV_VULN_URL = "https://api.osv.dev/v1/vulns/{vid}"
OSV_BATCH_MAX = 1000 # OSV documented hard cap per request
HTTP_TIMEOUT = 20
DETAIL_PARALLELISM = 8
# Severity ordering for --fail-on gating. UNKNOWN sits below LOW so it
# never blocks unless --fail-on is passed something even lower (we don't
# expose that).
SEVERITY_ORDER = {
"UNKNOWN": 0,
"LOW": 1,
"MODERATE": 2,
"MEDIUM": 2,
"HIGH": 3,
"CRITICAL": 4,
}
# ─── Data shapes ──────────────────────────────────────────────────────────────
@dataclass(frozen=True)
class Component:
"""A single (name, version, ecosystem) tuple discovered on disk."""
name: str
version: str
ecosystem: str # "PyPI" | "npm" — exactly as OSV expects
source: str # human-readable origin, e.g. "venv", "plugin:foo", "mcp:bar"
@dataclass
class Vulnerability:
osv_id: str
severity: str = "UNKNOWN"
summary: str = ""
fixed_versions: list[str] = field(default_factory=list)
@dataclass
class Finding:
component: Component
vuln: Vulnerability
# ─── Component discovery ──────────────────────────────────────────────────────
def _discover_venv() -> list[Component]:
"""Every dist installed in the running Python's import path."""
from importlib.metadata import distributions
out: list[Component] = []
seen: set[tuple[str, str]] = set()
for dist in distributions():
try:
name = (dist.metadata["Name"] or "").strip()
except Exception:
continue
version = (dist.version or "").strip()
if not name or not version:
continue
key = (name.lower(), version)
if key in seen:
continue
seen.add(key)
out.append(Component(name=name, version=version, ecosystem="PyPI", source="venv"))
return out
# requirements.txt line: drop comments, environment markers, options, extras
_REQ_LINE = re.compile(
r"""^\s*
(?P<name>[A-Za-z0-9][A-Za-z0-9._-]*)
(?:\[[^\]]+\])? # extras
\s*==\s*
(?P<version>[A-Za-z0-9._+!-]+)
\s*(?:;.*)?$
""",
re.VERBOSE,
)
def _parse_requirements(text: str) -> list[tuple[str, str]]:
"""Extract ``name==version`` pins. Everything else (>=, ~=, no pin) is skipped.
A loose pin can't be mapped to a single OSV query, and getting it wrong
is worse than missing a finding for an audit tool false positives
train users to ignore output.
"""
pins: list[tuple[str, str]] = []
for raw in text.splitlines():
line = raw.strip()
if not line or line.startswith("#") or line.startswith("-"):
continue
m = _REQ_LINE.match(line)
if m:
pins.append((m.group("name"), m.group("version")))
return pins
def _parse_pyproject_pins(text: str) -> list[tuple[str, str]]:
"""Pull ``name==version`` pins from a ``pyproject.toml`` ``dependencies`` list.
Uses stdlib ``tomllib`` (3.11+). Same exact-pin policy as requirements.
"""
try:
import tomllib
except ImportError: # pragma: no cover - 3.10 only
return []
try:
data = tomllib.loads(text)
except Exception:
return []
deps: list[str] = []
project = data.get("project") or {}
if isinstance(project.get("dependencies"), list):
deps.extend(str(x) for x in project["dependencies"])
optional = project.get("optional-dependencies") or {}
if isinstance(optional, dict):
for group in optional.values():
if isinstance(group, list):
deps.extend(str(x) for x in group)
pins: list[tuple[str, str]] = []
for dep in deps:
m = _REQ_LINE.match(dep)
if m:
pins.append((m.group("name"), m.group("version")))
return pins
def _discover_plugins(hermes_home: Path) -> list[Component]:
"""Python deps declared by plugins under ``~/.hermes/plugins``.
Plugins typically don't install into the venv (they're directory-based
with relative imports), so their stated requirements are useful audit
surface even when the venv scan misses them.
"""
plugins_dir = hermes_home / "plugins"
if not plugins_dir.is_dir():
return []
out: list[Component] = []
for plugin_dir in sorted(plugins_dir.iterdir()):
if not plugin_dir.is_dir() or plugin_dir.name.startswith("."):
continue
source = f"plugin:{plugin_dir.name}"
for req_file in ("requirements.txt", "requirements-dev.txt"):
path = plugin_dir / req_file
if path.is_file():
try:
pins = _parse_requirements(path.read_text(encoding="utf-8", errors="replace"))
except OSError:
continue
for name, version in pins:
out.append(Component(name=name, version=version, ecosystem="PyPI", source=source))
pyproject = plugin_dir / "pyproject.toml"
if pyproject.is_file():
try:
pins = _parse_pyproject_pins(pyproject.read_text(encoding="utf-8", errors="replace"))
except OSError:
continue
for name, version in pins:
out.append(Component(name=name, version=version, ecosystem="PyPI", source=source))
return out
# npx forms we recognise:
# npx -y @scope/pkg@1.2.3
# npx --yes pkg@1.2.3
# npx pkg@1.2.3 [...args]
# We deliberately don't try to resolve unversioned names — that maps to
# "latest" at runtime and isn't a stable audit subject.
_NPX_PKG = re.compile(r"^(@[A-Za-z0-9._-]+/[A-Za-z0-9._-]+|[A-Za-z0-9._-]+)@([A-Za-z0-9._+-]+)$")
# uvx forms:
# uvx pkg==1.2.3
# uvx --with pkg==1.2.3 entrypoint
_UVX_PKG = re.compile(r"^([A-Za-z0-9][A-Za-z0-9._-]*)==([A-Za-z0-9._+!-]+)$")
def _extract_mcp_component(server_name: str, command: str, args: list[str]) -> Optional[Component]:
"""Best-effort: parse `command/args` into a (name, version, ecosystem).
Returns None when the entry doesn't pin a version we can audit (local
paths, Docker images, unversioned npx, etc.). Audit output stays silent
rather than guess.
"""
cmd = (command or "").strip().lower()
if not args:
return None
# npx (any prefix path)
if cmd.endswith("npx") or cmd == "npx":
# Skip flag tokens until we see the first thing that looks like a pkg ref
for token in args:
if token.startswith("-"):
continue
m = _NPX_PKG.match(token)
if m:
return Component(
name=m.group(1),
version=m.group(2),
ecosystem="npm",
source=f"mcp:{server_name}",
)
return None # First non-flag token isn't a pinned ref
# uvx (any prefix path)
if cmd.endswith("uvx") or cmd == "uvx":
for token in args:
if token.startswith("-"):
continue
m = _UVX_PKG.match(token)
if m:
return Component(
name=m.group(1),
version=m.group(2),
ecosystem="PyPI",
source=f"mcp:{server_name}",
)
return None
return None
def _discover_mcp() -> list[Component]:
"""Pinned MCP server packages from ``config.yaml``."""
try:
from hermes_cli.mcp_config import _get_mcp_servers
except Exception:
return []
out: list[Component] = []
servers = _get_mcp_servers()
if not isinstance(servers, dict):
return []
for name, cfg in servers.items():
if not isinstance(cfg, dict):
continue
command = cfg.get("command", "") or ""
args = cfg.get("args") or []
if not isinstance(args, list):
continue
comp = _extract_mcp_component(name, command, [str(a) for a in args])
if comp is not None:
out.append(comp)
return out
# ─── OSV client ───────────────────────────────────────────────────────────────
def _http_post_json(url: str, payload: dict) -> dict:
data = json.dumps(payload).encode("utf-8")
req = urllib.request.Request(
url, data=data, headers={"Content-Type": "application/json"}, method="POST"
)
with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT) as resp:
return json.loads(resp.read().decode("utf-8"))
def _http_get_json(url: str) -> dict:
req = urllib.request.Request(url, method="GET")
with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT) as resp:
return json.loads(resp.read().decode("utf-8"))
def _osv_query_batch(components: list[Component]) -> dict[Component, list[str]]:
"""Return {component -> [osv_id, ...]} for components with any vulns.
Components without findings are omitted from the result dict.
"""
if not components:
return {}
findings: dict[Component, list[str]] = {}
for chunk_start in range(0, len(components), OSV_BATCH_MAX):
chunk = components[chunk_start:chunk_start + OSV_BATCH_MAX]
payload = {
"queries": [
{
"package": {"name": c.name, "ecosystem": c.ecosystem},
"version": c.version,
}
for c in chunk
]
}
try:
resp = _http_post_json(OSV_BATCH_URL, payload)
except (urllib.error.URLError, TimeoutError, ConnectionError) as exc:
raise RuntimeError(f"OSV batch query failed: {exc}") from exc
results = resp.get("results") or []
for comp, result in zip(chunk, results):
vulns = (result or {}).get("vulns") or []
ids = [v.get("id") for v in vulns if v.get("id")]
if ids:
findings[comp] = ids
return findings
def _osv_severity_from_record(record: dict) -> str:
"""Extract CVSS-derived severity tier from an OSV vuln record."""
# OSV puts CVSS in `severity` (top-level or per-affected) and a
# human-readable bucket in `database_specific.severity` for GHSAs.
db_specific = record.get("database_specific") or {}
raw = db_specific.get("severity")
if isinstance(raw, str) and raw.strip():
upper = raw.strip().upper()
if upper in SEVERITY_ORDER:
return upper
# Fall back to CVSS score → tier
score: Optional[float] = None
for sev_entry in record.get("severity") or []:
s = sev_entry.get("score")
if isinstance(s, str):
# CVSS vector strings look like "CVSS:3.1/AV:N/..." — we can't
# parse without a lib. Look for an explicit numeric in
# affected[].ecosystem_specific later if present.
continue
affected = record.get("affected") or []
for entry in affected:
eco_spec = entry.get("ecosystem_specific") or {}
sev = eco_spec.get("severity")
if isinstance(sev, str) and sev.strip().upper() in SEVERITY_ORDER:
return sev.strip().upper()
if score is not None:
if score >= 9.0:
return "CRITICAL"
if score >= 7.0:
return "HIGH"
if score >= 4.0:
return "MODERATE"
if score > 0:
return "LOW"
return "UNKNOWN"
def _osv_fixed_versions(record: dict) -> list[str]:
fixes: list[str] = []
for entry in record.get("affected") or []:
for rng in entry.get("ranges") or []:
for event in rng.get("events") or []:
if "fixed" in event:
fixes.append(str(event["fixed"]))
# Dedupe, preserve order
seen: set[str] = set()
out: list[str] = []
for f in fixes:
if f not in seen:
seen.add(f)
out.append(f)
return out
def _osv_fetch_details(vuln_ids: Iterable[str]) -> dict[str, Vulnerability]:
"""Fetch summary/severity for each unique vuln id, in parallel."""
unique = sorted({vid for vid in vuln_ids if vid})
if not unique:
return {}
out: dict[str, Vulnerability] = {}
def _fetch_one(vid: str) -> Vulnerability:
try:
rec = _http_get_json(OSV_VULN_URL.format(vid=vid))
except (urllib.error.URLError, TimeoutError, ConnectionError):
return Vulnerability(osv_id=vid)
return Vulnerability(
osv_id=vid,
severity=_osv_severity_from_record(rec),
summary=(rec.get("summary") or "").strip(),
fixed_versions=_osv_fixed_versions(rec),
)
with concurrent.futures.ThreadPoolExecutor(max_workers=DETAIL_PARALLELISM) as pool:
for vuln in pool.map(_fetch_one, unique):
out[vuln.osv_id] = vuln
return out
# ─── Orchestration ────────────────────────────────────────────────────────────
def run_audit(
*,
skip_venv: bool = False,
skip_plugins: bool = False,
skip_mcp: bool = False,
hermes_home: Optional[Path] = None,
) -> list[Finding]:
"""Discover components, query OSV, return findings sorted by severity desc."""
home = hermes_home or Path(get_hermes_home())
components: list[Component] = []
if not skip_venv:
components.extend(_discover_venv())
if not skip_plugins:
components.extend(_discover_plugins(home))
if not skip_mcp:
components.extend(_discover_mcp())
if not components:
return []
raw = _osv_query_batch(components)
if not raw:
return []
all_ids: list[str] = []
for ids in raw.values():
all_ids.extend(ids)
details = _osv_fetch_details(all_ids)
findings: list[Finding] = []
for comp, ids in raw.items():
for vid in ids:
vuln = details.get(vid) or Vulnerability(osv_id=vid)
findings.append(Finding(component=comp, vuln=vuln))
findings.sort(
key=lambda f: (
-SEVERITY_ORDER.get(f.vuln.severity, 0),
f.component.source,
f.component.name.lower(),
f.vuln.osv_id,
)
)
return findings
# ─── Rendering ────────────────────────────────────────────────────────────────
def _render_human(findings: list[Finding], total_components: int) -> str:
if not findings:
return f"No known vulnerabilities found across {total_components} component(s)."
lines: list[str] = []
lines.append(
f"Found {len(findings)} known vulnerability finding(s) "
f"across {total_components} component(s):"
)
lines.append("")
last_source = None
for f in findings:
if f.component.source != last_source:
lines.append(f"[{f.component.source}]")
last_source = f.component.source
sev = f.vuln.severity.ljust(8)
head = f" {sev} {f.component.name}=={f.component.version} {f.vuln.osv_id}"
lines.append(head)
if f.vuln.summary:
summary = f.vuln.summary
if len(summary) > 100:
summary = summary[:97] + "..."
lines.append(f" {summary}")
if f.vuln.fixed_versions:
lines.append(f" fixed in: {', '.join(f.vuln.fixed_versions[:3])}")
return "\n".join(lines)
def _render_json(findings: list[Finding], total_components: int) -> str:
payload = {
"total_components_scanned": total_components,
"finding_count": len(findings),
"findings": [
{
"package": f.component.name,
"version": f.component.version,
"ecosystem": f.component.ecosystem,
"source": f.component.source,
"vuln_id": f.vuln.osv_id,
"severity": f.vuln.severity,
"summary": f.vuln.summary,
"fixed_versions": f.vuln.fixed_versions,
}
for f in findings
],
}
return json.dumps(payload, indent=2)
def _count_components(
*, skip_venv: bool, skip_plugins: bool, skip_mcp: bool, hermes_home: Path
) -> int:
total = 0
if not skip_venv:
total += len(_discover_venv())
if not skip_plugins:
total += len(_discover_plugins(hermes_home))
if not skip_mcp:
total += len(_discover_mcp())
return total
# ─── CLI entrypoint ───────────────────────────────────────────────────────────
def cmd_security_audit(args: argparse.Namespace) -> int:
"""Implementation of `hermes security audit`."""
home = Path(get_hermes_home())
skip_venv = bool(getattr(args, "skip_venv", False))
skip_plugins = bool(getattr(args, "skip_plugins", False))
skip_mcp = bool(getattr(args, "skip_mcp", False))
output_json = bool(getattr(args, "json", False))
fail_on = (getattr(args, "fail_on", None) or "critical").upper()
if fail_on not in SEVERITY_ORDER:
print(
f"unknown --fail-on value: {fail_on.lower()} "
f"(choose from: low, moderate, high, critical)",
file=sys.stderr,
)
return 2
total = _count_components(
skip_venv=skip_venv, skip_plugins=skip_plugins, skip_mcp=skip_mcp, hermes_home=home
)
if total == 0:
msg = "No components discovered (everything skipped, or empty environment)."
if output_json:
print(json.dumps({"total_components_scanned": 0, "finding_count": 0, "findings": []}))
else:
print(msg)
return 0
try:
findings = run_audit(
skip_venv=skip_venv,
skip_plugins=skip_plugins,
skip_mcp=skip_mcp,
hermes_home=home,
)
except RuntimeError as exc:
print(f"audit failed: {exc}", file=sys.stderr)
return 2
if output_json:
print(_render_json(findings, total))
else:
print(_render_human(findings, total))
# Exit code: 1 iff any finding meets or exceeds the --fail-on threshold.
threshold = SEVERITY_ORDER[fail_on]
for f in findings:
if SEVERITY_ORDER.get(f.vuln.severity, 0) >= threshold:
return 1
return 0

View file

@ -0,0 +1,886 @@
"""Abstract service manager interface.
Wraps the existing systemd (Linux host), launchd (macOS host), Windows
Scheduled Task (native Windows host), and s6 (container) backends behind
a common Protocol. Only the s6 backend supports runtime registration
(for per-profile gateways) host backends raise NotImplementedError
from those methods, and callers MUST check supports_runtime_registration()
before invoking them.
Host-side call sites (setup wizard, uninstall, status) continue to use
the existing module-level functions in hermes_cli.gateway and
hermes_cli.gateway_windows directly. This protocol is a thin facade
used by new code that needs to be backend-agnostic specifically the
profile create/delete hooks (Phase 4) and the s6 dispatch path in
``hermes gateway start/stop/restart`` when running inside a container.
"""
from __future__ import annotations
import re
from pathlib import Path
from typing import Literal, Protocol, runtime_checkable
ServiceManagerKind = Literal["systemd", "launchd", "windows", "s6", "none"]
# Profile name → service directory mapping. Profile names must be safe
# as filesystem directory names because the s6 backend creates a service
# directory at ``<scandir>/gateway-<profile>/``. We reject anything that
# could traverse paths, span filesystems, or break s6's own naming rules.
_VALID_PROFILE_RE = re.compile(r"^[a-z0-9][a-z0-9_-]*$")
_MAX_PROFILE_LEN = 251 # s6-svscan default name_max
def validate_profile_name(name: str) -> None:
"""Raise ValueError if ``name`` is not usable as a profile name.
Profile names are used as s6 service directory names, so they must
match a conservative subset of filesystem-safe characters. Reject
empty strings, uppercase, paths-traversal sequences, and anything
longer than s6's default ``name_max``.
"""
if not name:
raise ValueError("profile name must not be empty")
if len(name) > _MAX_PROFILE_LEN:
raise ValueError(
f"profile name too long ({len(name)} > {_MAX_PROFILE_LEN})"
)
if not _VALID_PROFILE_RE.match(name):
raise ValueError(
f"profile name must match [a-z0-9][a-z0-9_-]*, got {name!r}"
)
@runtime_checkable
class ServiceManager(Protocol):
"""Abstract interface for init-system-specific service operations.
Lifecycle methods (start / stop / restart / is_running) are
implemented by every backend. Runtime registration
(register_profile_gateway / unregister_profile_gateway /
list_profile_gateways) is implemented only by the s6 backend
callers MUST check ``supports_runtime_registration()`` before
invoking the registration methods.
"""
kind: ServiceManagerKind
# Lifecycle of a pre-declared service.
def start(self, name: str) -> None: ...
def stop(self, name: str) -> None: ...
def restart(self, name: str) -> None: ...
def is_running(self, name: str) -> bool: ...
# Runtime registration (s6 only).
def supports_runtime_registration(self) -> bool: ...
def register_profile_gateway(
self,
profile: str,
*,
extra_env: dict[str, str] | None = None,
) -> None: ...
def unregister_profile_gateway(self, profile: str) -> None: ...
def list_profile_gateways(self) -> list[str]: ...
def detect_service_manager() -> ServiceManagerKind:
"""Detect which service manager is available in this environment.
Returns:
"s6" inside a container when /init is s6-svscan (Phase 2+)
"windows" native Windows host
"launchd" macOS host
"systemd" Linux host with a working user/system bus
"none" anything else (Termux, sandbox shells, etc.)
This function does NOT replace ``supports_systemd_services()``
host call sites continue to use that. It exists for new backend-
agnostic code (profile create/delete hooks, the s6 dispatch path
in ``hermes gateway start/stop/restart``).
"""
# Imports deferred so importing this module doesn't drag in the
# whole gateway dependency graph for callers that only need the
# Protocol type or validate_profile_name().
from hermes_constants import is_container
from hermes_cli.gateway import (
is_macos,
is_windows,
supports_systemd_services,
)
if is_container() and _s6_running():
return "s6"
if is_windows():
return "windows"
if is_macos():
return "launchd"
if supports_systemd_services():
return "systemd"
return "none"
def _s6_running() -> bool:
"""True when s6-svscan is running as PID 1 in this container.
Detection has to work for **both** root and the unprivileged hermes
user (UID 10000). The obvious probe ``Path('/proc/1/exe').resolve()``
only works as root: for any other UID, the symlink at
``/proc/1/exe`` is unreadable and ``resolve()`` silently returns the
path unchanged, so the resolved name is the literal ``"exe"`` and
detection always fails. Since every Hermes runtime call inside the
container drops to hermes via ``s6-setuidgid``, that silent failure
made the entire service-manager runtime-registration path inert in
production (PR #30136 review).
Probe instead via:
* ``/proc/1/comm`` world-readable, contains the process comm
(``s6-svscan`` when s6-overlay is PID 1).
* ``/run/s6/basedir`` s6-overlay-specific directory created by
stage1. World-readable. More specific than ``/run/s6`` (which
other tools occasionally create).
Both signals are required; either alone could false-positive
(e.g. a container with the s6 binaries installed but a different
init, or an unrelated process named ``s6-svscan``).
"""
try:
comm = Path("/proc/1/comm").read_text(encoding="utf-8").strip()
except OSError:
return False
if comm != "s6-svscan":
return False
return Path("/run/s6/basedir").is_dir()
# ---------------------------------------------------------------------------
# Backend wrappers
#
# These adapters are thin facades over the existing module-level functions
# in ``hermes_cli.gateway`` (systemd/launchd) and ``hermes_cli.gateway_windows``
# (Windows Scheduled Tasks). The protocol's ``name`` parameter is currently
# unused for host backends — they operate on whichever profile is currently
# active (set via the ``hermes -p <profile>`` flag before the call). This
# matches existing host-side semantics; the parameter shape is designed
# for s6 where each profile maps to a distinct service directory.
# ---------------------------------------------------------------------------
class _RegistrationUnsupportedMixin:
"""Mixin for host backends that don't support runtime registration."""
def supports_runtime_registration(self) -> bool:
return False
def register_profile_gateway(
self,
profile: str,
*,
extra_env: dict[str, str] | None = None,
) -> None:
raise NotImplementedError(
f"{type(self).__name__} does not support runtime profile "
"gateway registration (container-only feature)"
)
def unregister_profile_gateway(self, profile: str) -> None:
raise NotImplementedError(
f"{type(self).__name__} does not support runtime profile "
"gateway unregistration (container-only feature)"
)
def list_profile_gateways(self) -> list[str]:
return []
class SystemdServiceManager(_RegistrationUnsupportedMixin):
"""Thin wrapper around the ``systemd_*`` functions in hermes_cli.gateway.
Existing host call sites continue to use those functions directly;
this wrapper exists for new code that needs to be backend-agnostic
(the Phase 4 profile create/delete hooks).
"""
kind: ServiceManagerKind = "systemd"
def start(self, name: str) -> None:
from hermes_cli.gateway import systemd_start
systemd_start()
def stop(self, name: str) -> None:
from hermes_cli.gateway import systemd_stop
systemd_stop()
def restart(self, name: str) -> None:
from hermes_cli.gateway import systemd_restart
systemd_restart()
def is_running(self, name: str) -> bool:
from hermes_cli.gateway import _probe_systemd_service_running
_, running = _probe_systemd_service_running()
return running
class LaunchdServiceManager(_RegistrationUnsupportedMixin):
"""Thin wrapper around the ``launchd_*`` functions in hermes_cli.gateway."""
kind: ServiceManagerKind = "launchd"
def start(self, name: str) -> None:
from hermes_cli.gateway import launchd_start
launchd_start()
def stop(self, name: str) -> None:
from hermes_cli.gateway import launchd_stop
launchd_stop()
def restart(self, name: str) -> None:
from hermes_cli.gateway import launchd_restart
launchd_restart()
def is_running(self, name: str) -> bool:
from hermes_cli.gateway import _probe_launchd_service_running
return _probe_launchd_service_running()
class WindowsServiceManager(_RegistrationUnsupportedMixin):
"""Thin wrapper around ``hermes_cli.gateway_windows`` (Scheduled Task /
Startup-folder fallback).
The native Windows backend uses a Scheduled Task rather than a true
init-system service, but for protocol purposes the lifecycle is the
same: start / stop / restart / is_running. ``install`` accepts a
handful of Windows-specific kwargs (start_now, start_on_login,
elevated_handoff) that are passed straight through non-Windows
callers should never invoke ``install`` on this wrapper.
"""
kind: ServiceManagerKind = "windows"
def install(
self,
*,
force: bool = False,
start_now: bool | None = None,
start_on_login: bool | None = None,
elevated_handoff: bool = False,
) -> None:
from hermes_cli import gateway_windows
gateway_windows.install(
force=force,
start_now=start_now,
start_on_login=start_on_login,
elevated_handoff=elevated_handoff,
)
def start(self, name: str) -> None:
from hermes_cli import gateway_windows
gateway_windows.start()
def stop(self, name: str) -> None:
from hermes_cli import gateway_windows
gateway_windows.stop()
def restart(self, name: str) -> None:
from hermes_cli import gateway_windows
gateway_windows.restart()
def is_running(self, name: str) -> bool:
from hermes_cli import gateway_windows
from hermes_cli.gateway import find_gateway_pids
if not gateway_windows.is_installed():
return False
return bool(find_gateway_pids())
def get_service_manager() -> ServiceManager:
"""Return the ServiceManager instance for the current environment.
Raises:
RuntimeError: when no supported backend is available.
"""
kind = detect_service_manager()
if kind == "systemd":
return SystemdServiceManager()
if kind == "launchd":
return LaunchdServiceManager()
if kind == "windows":
return WindowsServiceManager()
if kind == "s6":
return S6ServiceManager()
raise RuntimeError("no supported service manager detected")
# ---------------------------------------------------------------------------
# S6ServiceManager (container-only)
#
# Per-profile gateways are registered dynamically when `hermes profile create`
# runs inside the container (Phase 4). Static services (main-hermes, dashboard)
# live in /etc/s6-overlay/s6-rc.d/ and are NOT managed by this class — they're
# part of the image, not runtime-created.
# ---------------------------------------------------------------------------
# s6-overlay's dynamic scandir for runtime-registered services. Lives on
# tmpfs and is the directory s6-svscan watches. Writes here trigger
# automatic supervision on the next rescan.
S6_DYNAMIC_SCANDIR = Path("/run/service")
S6_SERVICE_PREFIX = "gateway-"
# s6-overlay installs its binaries under /command/ and only adds that
# directory to PATH for processes started under the supervision tree
# (services started by s6-svscan, cont-init.d scripts, etc.). Code
# that runs via `docker exec` or any other out-of-tree entry point —
# notably our Phase 4 profile create/delete hooks — inherits the
# container's base PATH which does NOT include /command/.
#
# Rather than asking every caller to fix up its environment, the
# S6ServiceManager calls s6-* binaries by absolute path via this
# constant. We don't use `/usr/bin/s6-…` symlinks because the
# s6-overlay-symlinks-noarch tarball only links a subset, and we
# want every s6 invocation to be guaranteed-findable.
_S6_BIN_DIR = "/command"
# UID/GID of the in-image ``hermes`` user. Hardcoded to match what
# ``stage2-hook.sh`` enforces (the runtime invariant — see also
# tests/docker/test_uid_remap.py). The container starts s6-supervise
# under root and immediately drops to this UID via ``s6-setuidgid``.
_HERMES_UID = 10000
_HERMES_GID = 10000
def _seed_supervise_skeleton(svc_dir: Path) -> None:
"""Pre-create the ``supervise/`` and top-level ``event/`` skeleton
inside a service directory, owned by the hermes user.
Why this exists
---------------
When s6-supervise spawns a service it tries to ``mkdir`` two
directories: ``<svc>/event`` and ``<svc>/supervise``, both with mode
``0700``. It also ``mkfifo``s ``<svc>/supervise/control`` with mode
``0600``. Because s6-supervise runs as PID 1's effective UID (root)
these dirs end up root-owned mode 0700, and an unprivileged client
(the ``hermes`` user UID 10000 running every Hermes runtime
operation via ``s6-setuidgid``) gets ``EACCES`` on any ``s6-svc``,
``s6-svstat``, or ``s6-svwait`` invocation against the slot.
The PR #30136 review surfaced this as a real product gap: the
entire S6ServiceManager lifecycle (``register/start/stop/unregister
_profile_gateway``) was inert in production because every operation
is dispatched as the hermes user.
Why this works
--------------
Reading s6's source (src/supervision/s6-supervise.c::trymkdir +
control_init): the ``mkdir`` and ``mkfifo`` calls both treat
``EEXIST`` as success. If the directory is already present, the
chown/chmod fix-up that would normally make event/ ``03730
root:root`` is **skipped** entirely s6-supervise just opens the
pre-existing FIFOs and proceeds. So if we lay the skeleton down
with hermes ownership before triggering ``s6-svscanctl -a``,
s6-supervise inherits our layout and never touches it.
Layout produced
---------------
``svc_dir/`` hermes:hermes, 0755 (parent must already exist)
``svc_dir/event/`` hermes:hermes, 03730 (setgid + g+rwx + sticky)
``svc_dir/supervise/`` hermes:hermes, 0755
``svc_dir/supervise/event/`` hermes:hermes, 03730
``svc_dir/supervise/control`` hermes:hermes, 0660 (FIFO)
The ``death_tally``, ``lock``, and ``status`` regular files end up
written by s6-supervise itself (as root), but those land mode 0644
world-readable and ``s6-svstat`` only needs read access, so the
hermes user reads them fine.
If ``svc_dir/log/`` is present (the canonical s6 logger pattern
one s6-supervise instance per service, plus a second for its
logger), the same skeleton is seeded under ``log/`` as well:
``log/event/``, ``log/supervise/``, ``log/supervise/event/``,
``log/supervise/control``. Without this, unregister teardown
would EACCES on the logger's supervise dir even after the parent
slot's supervise/ was hermes-owned.
Idempotency
-----------
Safe to call against a directory where the skeleton already exists.
Existing entries are left untouched (the helper doesn't try to
re-chown / re-chmod live FIFOs that s6-supervise may have already
opened).
Reference
---------
Discussed at length on the skarnet `skaware` mailing list in 2020
(`<http://skarnet.org/lists/skaware/1424.html>`_); see also
just-containers/s6-overlay#130. The pre-creation pattern was
historically called out as forward-compatibility-fragile, but the
EEXIST handling in s6-supervise has been stable since 2015 it's
the same pattern ``s6-svperms`` and ``fix-attrs.d`` rely on.
"""
import os
def _mkdir_owned(path: Path, mode: int) -> None:
if path.exists():
return
path.mkdir(parents=False, exist_ok=False)
path.chmod(mode)
try:
os.chown(path, _HERMES_UID, _HERMES_GID)
except PermissionError:
# Running as the hermes user already — directory is hermes-
# owned by default. The chown is a no-op in that case, so
# swallowing this keeps both root and unprivileged callers
# on one code path.
pass
# Top-level event/ dir (this is the s6-svlisten1 event-subscription
# dir at the service root, distinct from supervise/event/).
_mkdir_owned(svc_dir / "event", 0o3730)
# supervise/ dir + its inner event/ dir.
supervise = svc_dir / "supervise"
_mkdir_owned(supervise, 0o755)
_mkdir_owned(supervise / "event", 0o3730)
# supervise/control FIFO. Same EEXIST-safe pattern: if it's already
# there (s6-supervise has already started against this slot), leave
# it alone. The explicit chmod after mkfifo is required because
# mkfifo honors the process umask, which can strip group-write
# (e.g. the default 0022 on most dev hosts → 0o660 becomes 0o640).
# The container runs with umask 0 inside s6-overlay's stage2, but
# being defensive here keeps the helper consistent under any
# invocation context.
control = supervise / "control"
if not control.exists():
os.mkfifo(control, 0o660)
control.chmod(0o660)
try:
os.chown(control, _HERMES_UID, _HERMES_GID)
except PermissionError:
pass
# If a log/ subdir is present (the canonical s6 logger pattern —
# see servicedir(7)), it gets its own s6-supervise instance and
# needs the same skeleton. Without this, unregister teardown
# would EACCES on the logger's root-owned supervise/ dir even
# when the parent slot's supervise/ is hermes-owned.
log_dir = svc_dir / "log"
if log_dir.is_dir():
_mkdir_owned(log_dir / "event", 0o3730)
log_supervise = log_dir / "supervise"
_mkdir_owned(log_supervise, 0o755)
_mkdir_owned(log_supervise / "event", 0o3730)
log_control = log_supervise / "control"
if not log_control.exists():
os.mkfifo(log_control, 0o660)
log_control.chmod(0o660)
try:
os.chown(log_control, _HERMES_UID, _HERMES_GID)
except PermissionError:
pass
class S6Error(RuntimeError):
"""Base error for S6ServiceManager lifecycle failures.
Concrete subclasses carry the slot name (and, where useful, the
underlying subprocess output) so the CLI can render an actionable
message instead of leaking a raw ``CalledProcessError`` traceback.
"""
def __init__(self, message: str, *, service: str | None = None) -> None:
super().__init__(message)
self.service = service
class GatewayNotRegisteredError(S6Error):
"""Raised when a lifecycle method targets a slot that doesn't exist.
Most commonly: ``hermes -p typo gateway start`` when no profile
``typo`` exists. Carries the unprefixed profile name (not the
full ``gateway-<profile>`` service-dir name) so callers can phrase
a user-facing message like "no such gateway 'typo'".
"""
def __init__(self, profile: str) -> None:
self.profile = profile
super().__init__(
f"no such gateway {profile!r}: register it with "
f"`hermes profile create {profile}` first, or pass "
"an existing profile name via `-p <name>`",
service=f"gateway-{profile}",
)
class S6CommandError(S6Error):
"""Raised when an s6 command fails for a reason other than a
missing slot e.g. permission denied on the supervise control
FIFO, or s6-svc returning a non-zero exit for an unexpected
reason. Carries the stderr from the failing command so callers
can surface it.
"""
def __init__(
self, *, service: str, action: str, returncode: int, stderr: str,
) -> None:
self.action = action
self.returncode = returncode
self.stderr = stderr
message = (
f"s6-svc {action} on {service!r} failed (rc={returncode})"
)
if stderr.strip():
message += f": {stderr.strip()}"
super().__init__(message, service=service)
class S6ServiceManager:
"""Per-profile gateway supervision via s6-overlay.
Only handles runtime-registered services under
``S6_DYNAMIC_SCANDIR``. Static services (main-hermes, dashboard)
are managed by s6-rc at image-build time and are out of scope.
"""
kind: ServiceManagerKind = "s6"
def __init__(self, scandir: Path = S6_DYNAMIC_SCANDIR) -> None:
self.scandir = scandir
# -- internal helpers --------------------------------------------------
def _service_dir(self, profile: str) -> Path:
validate_profile_name(profile)
return self.scandir / f"{S6_SERVICE_PREFIX}{profile}"
def _service_name(self, profile: str) -> str:
return f"{S6_SERVICE_PREFIX}{profile}"
@staticmethod
def _render_run_script(
profile: str,
extra_env: dict[str, str],
) -> str:
"""Generate the run script for a profile-gateway s6 service.
The script:
1. Sources HERMES_HOME (and any extra env) via with-contenv
so e.g. ``-e HERMES_HOME=/data/hermes`` is honored at run
time, not Python-substituted at registration time (OQ8-C).
2. Activates the bundled venv.
3. Drops to the hermes user and exec's
``hermes -p <profile> gateway run`` (or just ``hermes
gateway run`` for the default profile see below).
Special case: ``profile == "default"`` emits ``hermes gateway
run`` with **no** ``-p`` flag. This is the sentinel for "the
root HERMES_HOME profile" (the implicit profile that exists at
the top of $HERMES_HOME, not under profiles/). It must be
spelled this way because ``_profile_suffix()`` returns the
empty string for the root profile, and the dispatcher in
``hermes_cli.gateway`` maps that empty string to the
``gateway-default`` service slot. Passing ``-p default`` here
would instead look up ``$HERMES_HOME/profiles/default/`` a
completely different (and almost always nonexistent) profile.
Port selection: the gateway picks its bind port from the
profile's ``config.yaml`` (``[gateway] port = ...``) — that
is the single source of truth. Previously this method took a
``port`` parameter that was passed in but never substituted
into the rendered script (it was carried in for "API parity"
with a deterministic SHA-256 allocator in
``hermes_cli.profiles._allocate_gateway_port``). PR #30136
review item I5 retired both the allocator and the parameter
because they were dead code through the entire stack.
"""
import shlex
lines = [
"#!/command/with-contenv sh",
"# shellcheck shell=sh",
"set -e",
"cd /opt/data",
". /opt/hermes/.venv/bin/activate",
]
for k, v in sorted(extra_env.items()):
lines.append(f"export {k}={shlex.quote(v)}")
if profile == "default":
lines.append("exec s6-setuidgid hermes hermes gateway run")
else:
lines.append(
f"exec s6-setuidgid hermes hermes -p {shlex.quote(profile)} gateway run"
)
return "\n".join(lines) + "\n"
@staticmethod
def _render_log_run(profile: str) -> str:
"""Generate the log/run script for a profile-gateway service.
OQ8-C: persist to ``${HERMES_HOME}/logs/gateways/<profile>/``.
CRITICAL: the HERMES_HOME path is sourced from the runtime env
via with-contenv NOT Python-substituted at registration time
so a container started with ``-e HERMES_HOME=/data/hermes``
gets its logs under /data/hermes/logs/..., not the build-time
default.
"""
import shlex
prof = shlex.quote(profile)
return (
f"#!/command/with-contenv sh\n"
f"# shellcheck shell=sh\n"
f': "${{HERMES_HOME:=/opt/data}}"\n'
f'log_dir="$HERMES_HOME/logs/gateways/{prof}"\n'
f'mkdir -p "$log_dir"\n'
f'chown -R hermes:hermes "$log_dir" 2>/dev/null || true\n'
f'exec s6-setuidgid hermes s6-log n10 s1000000 T "$log_dir"\n'
)
# -- lifecycle ---------------------------------------------------------
def _run_svc(self, action_flag: str, action_label: str, name: str) -> None:
"""Shared lifecycle dispatch for start / stop / restart.
Translates the two failure modes operators care about into
named errors:
* ``GatewayNotRegisteredError`` the service directory at
``<scandir>/<name>/`` doesn't exist. ``s6-svc`` would
exit non-zero with a fairly opaque message; we pre-empt
it with a clear "no such gateway 'X'" tied to the profile
name (without the ``gateway-`` prefix).
* ``S6CommandError`` anything else (EACCES on the
supervise control FIFO, timeout, etc.). Carries the
subprocess return code and stderr so callers can render
them inline.
``action_flag`` is the ``s6-svc`` flag (``-u`` / ``-d`` /
``-t``); ``action_label`` is the human verb (``start`` /
``stop`` / ``restart``) used in error messages.
"""
import subprocess
service_dir = self.scandir / name
if not service_dir.is_dir():
# Strip the gateway- prefix back off so the message
# matches what the user typed on the CLI (``-p <profile>``).
profile = (
name[len(S6_SERVICE_PREFIX):]
if name.startswith(S6_SERVICE_PREFIX)
else name
)
raise GatewayNotRegisteredError(profile)
try:
subprocess.run(
[f"{_S6_BIN_DIR}/s6-svc", action_flag, str(service_dir)],
check=True, capture_output=True, text=True, timeout=5,
)
except subprocess.CalledProcessError as exc:
raise S6CommandError(
service=name,
action=action_label,
returncode=exc.returncode,
stderr=exc.stderr or "",
) from exc
def start(self, name: str) -> None:
"""Bring up a registered service (``s6-svc -u``).
Raises:
GatewayNotRegisteredError: no service directory for ``name``.
S6CommandError: s6-svc exited non-zero for any other reason
(permission denied on the supervise FIFO, timeout, etc.).
"""
self._run_svc("-u", "start", name)
def stop(self, name: str) -> None:
"""Bring down a registered service (``s6-svc -d``).
Raises:
GatewayNotRegisteredError: no service directory for ``name``.
S6CommandError: s6-svc exited non-zero for any other reason.
"""
self._run_svc("-d", "stop", name)
def restart(self, name: str) -> None:
"""Restart a registered service (``s6-svc -t`` = SIGTERM).
Raises:
GatewayNotRegisteredError: no service directory for ``name``.
S6CommandError: s6-svc exited non-zero for any other reason.
"""
self._run_svc("-t", "restart", name)
def is_running(self, name: str) -> bool:
"""True iff ``s6-svstat`` reports the service as up."""
import subprocess
result = subprocess.run(
[f"{_S6_BIN_DIR}/s6-svstat", str(self.scandir / name)],
capture_output=True, text=True, timeout=5,
)
return result.returncode == 0 and "up " in result.stdout
# -- runtime registration ---------------------------------------------
def supports_runtime_registration(self) -> bool:
return True
def register_profile_gateway(
self,
profile: str,
*,
extra_env: dict[str, str] | None = None,
) -> None:
"""Create the s6 service directory for a profile gateway.
Triggers ``s6-svscanctl -a`` so s6-svscan picks the new directory
up immediately. The service is created in the *up* state to
register without auto-starting, follow up with ``stop(profile)``
(or pass the start flag via the future ``start_now=False`` arg,
which the Phase 4 reconciliation path uses via a ``down``
marker file written directly).
Raises:
ValueError: if the profile name is invalid or the service
directory already exists.
RuntimeError: if ``s6-svscanctl`` fails.
"""
import shutil
import subprocess
svc_dir = self._service_dir(profile)
if svc_dir.exists():
raise ValueError(
f"profile gateway {profile!r} already registered at {svc_dir}"
)
# Build the service directory atomically: write to a sibling
# temp dir, then rename. Avoids s6-svscan observing a half-
# populated directory on a fast rescan.
tmp_dir = svc_dir.with_name(svc_dir.name + ".tmp")
if tmp_dir.exists():
shutil.rmtree(tmp_dir, ignore_errors=True)
tmp_dir.mkdir(parents=True)
try:
(tmp_dir / "type").write_text("longrun\n")
run_script = self._render_run_script(profile, extra_env or {})
run_path = tmp_dir / "run"
run_path.write_text(run_script)
run_path.chmod(0o755)
# Persistent log rotation (OQ8-C).
log_subdir = tmp_dir / "log"
log_subdir.mkdir()
log_run = log_subdir / "run"
log_run.write_text(self._render_log_run(profile))
log_run.chmod(0o755)
# Pre-create the supervise/ skeleton with hermes ownership
# BEFORE we publish the slot. s6-supervise will EEXIST our
# dirs/FIFOs and inherit the ownership, so the runtime
# s6-svc / s6-svstat / s6-svwait calls (all dispatched as
# the hermes user) won't hit EACCES on root-owned 0700
# dirs. See ``_seed_supervise_skeleton`` for the full
# rationale.
_seed_supervise_skeleton(tmp_dir)
tmp_dir.rename(svc_dir)
except Exception:
shutil.rmtree(tmp_dir, ignore_errors=True)
raise
# Trigger rescan so s6-svscan picks up the new service.
result = subprocess.run(
[f"{_S6_BIN_DIR}/s6-svscanctl", "-a", str(self.scandir)],
capture_output=True, text=True, timeout=5,
)
if result.returncode != 0:
# Clean up: rescan failed, leave the directory in place would
# be confusing (no supervisor watching it).
shutil.rmtree(svc_dir, ignore_errors=True)
raise RuntimeError(
f"s6-svscanctl failed: {result.stderr or result.stdout}"
)
def unregister_profile_gateway(self, profile: str) -> None:
"""Stop the profile gateway service and remove its directory.
Idempotent: absent services are a no-op. Best-effort stop +
wait-for-down before removal so the running gateway process
gets a chance to shut down cleanly before its service dir
disappears.
Teardown ordering matters: ``s6-svscanctl -an`` is fired
**before** ``rmtree`` so s6-svscan reaps the supervise child
process (releasing its handle on ``supervise/lock`` and the
regular files inside the supervise dir), giving us a clean
directory to remove. Without the reap-first ordering, the
rmtree races s6-supervise on a set of root-owned files inside
the supervise dir and the dir is left half-removed.
"""
import shutil
import subprocess
import time
svc_dir = self._service_dir(profile)
if not svc_dir.exists():
return
# Stop the service (best effort — service may already be down).
subprocess.run(
[f"{_S6_BIN_DIR}/s6-svc", "-d", str(svc_dir)],
capture_output=True, text=True, timeout=5,
check=False,
)
# Wait for it to actually go down (up to 10s).
subprocess.run(
[f"{_S6_BIN_DIR}/s6-svwait", "-D", "-t", "10000", str(svc_dir)],
capture_output=True, text=True, timeout=15,
check=False,
)
# Reap the supervise child FIRST: -n tells s6-svscan to drop
# any supervise processes whose service dir is gone (which
# includes any service dir we're about to remove). This
# releases the file handles s6-supervise holds against the
# supervise/lock + supervise/status + supervise/death_tally
# files inside the slot, so the upcoming rmtree doesn't race.
subprocess.run(
[f"{_S6_BIN_DIR}/s6-svscanctl", "-an", str(self.scandir)],
capture_output=True, text=True, timeout=5,
check=False,
)
# Give s6-svscan a moment to reap. There's no synchronous
# "scan completed" handshake — the -a/-n trigger just sets a
# flag s6-svscan reads on its next loop iteration. 200ms is
# comfortably above the loop's resolution but well under any
# user-perceived latency.
time.sleep(0.2)
# Now the supervise dir's files are no longer held open by a
# live s6-supervise, so rmtree can remove them. Files inside
# supervise/ are root-owned (death_tally, lock, status, written
# by s6-supervise itself) — but the parent supervise/ directory
# is hermes-owned (see ``_seed_supervise_skeleton``), and on
# POSIX you only need write+execute on the parent to remove
# contained files regardless of file ownership.
shutil.rmtree(svc_dir, ignore_errors=True)
def list_profile_gateways(self) -> list[str]:
"""Return the profile names of all currently-registered gateway services.
Filters the scandir to entries that match the ``gateway-`` prefix.
Other services (e.g. ``s6-linux-init-shutdownd``) are ignored.
"""
if not self.scandir.exists():
return []
profiles: list[str] = []
for entry in self.scandir.iterdir():
if entry.name.startswith("."):
continue
if not entry.is_dir():
continue
if not entry.name.startswith(S6_SERVICE_PREFIX):
continue
profiles.append(entry.name[len(S6_SERVICE_PREFIX):])
return profiles

View file

@ -104,7 +104,7 @@ _DEFAULT_PROVIDER_MODELS = {
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"],
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.7-max", "qwen3.6-plus", "qwen3.5-plus"],
"huggingface": [
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@ -161,6 +161,7 @@ from hermes_cli.cli_output import ( # noqa: E402
print_success,
print_warning,
)
from hermes_cli.secret_prompt import masked_secret_prompt # noqa: E402
def is_interactive_stdin() -> bool:
@ -202,9 +203,7 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:
try:
if password:
import getpass
value = getpass.getpass(color(display, Colors.YELLOW))
value = masked_secret_prompt(color(display, Colors.YELLOW))
else:
value = input(color(display, Colors.YELLOW))
@ -1094,7 +1093,7 @@ def _xai_oauth_logged_in_for_setup() -> bool:
"""True iff xAI Grok OAuth credentials are already stored locally.
Lets TTS / STT setup skip the API-key prompt for users who logged in
through ``hermes model`` -> xAI Grok OAuth (SuperGrok Subscription).
through ``hermes model`` -> xAI Grok OAuth (SuperGrok / Premium+).
"""
try:
from hermes_cli.auth import get_xai_oauth_auth_status
@ -1124,7 +1123,7 @@ def _run_xai_oauth_login_from_setup() -> bool:
open_browser = not _is_remote_session()
print()
print_info("Signing in to xAI Grok OAuth (SuperGrok Subscription)...")
print_info("Signing in to xAI Grok OAuth (SuperGrok / Premium+)...")
try:
creds = _xai_oauth_loopback_login(open_browser=open_browser)
_save_xai_oauth_tokens(
@ -1259,7 +1258,7 @@ def _setup_tts_provider(config: dict):
if oauth_logged_in:
print_success(
"xAI TTS will use your xAI Grok OAuth (SuperGrok Subscription) "
"xAI TTS will use your xAI Grok OAuth (SuperGrok / Premium+) "
"credentials"
)
elif existing_api_key:
@ -1269,7 +1268,7 @@ def _setup_tts_provider(config: dict):
choice_idx = prompt_choice(
"How do you want xAI TTS to authenticate?",
choices=[
"Sign in with xAI Grok OAuth (SuperGrok Subscription) — browser login",
"Sign in with xAI Grok OAuth (SuperGrok / Premium+) — browser login",
"Paste an xAI API key (console.x.ai)",
"Skip → fallback to Edge TTS",
],
@ -2034,74 +2033,6 @@ def _setup_telegram():
save_env_value("TELEGRAM_HOME_CHANNEL", home_channel)
def _setup_discord():
"""Configure Discord bot credentials and allowlist."""
print_header("Discord")
existing = get_env_value("DISCORD_BOT_TOKEN")
if existing:
print_info("Discord: already configured")
if not prompt_yes_no("Reconfigure Discord?", False):
if not get_env_value("DISCORD_ALLOWED_USERS"):
print_info("⚠️ Discord has no user allowlist - anyone can use your bot!")
if prompt_yes_no("Add allowed users now?", True):
print_info(" To find Discord ID: Enable Developer Mode, right-click name → Copy ID")
allowed_users = prompt("Allowed user IDs (comma-separated)")
if allowed_users:
cleaned_ids = _clean_discord_user_ids(allowed_users)
save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
print_success("Discord allowlist configured")
return
print_info("Create a bot at https://discord.com/developers/applications")
token = prompt("Discord bot token", password=True)
if not token:
return
save_env_value("DISCORD_BOT_TOKEN", token)
print_success("Discord token saved")
print()
print_info("🔒 Security: Restrict who can use your bot")
print_info(" To find your Discord user ID:")
print_info(" 1. Enable Developer Mode in Discord settings")
print_info(" 2. Right-click your name → Copy ID")
print()
print_info(" You can also use Discord usernames (resolved on gateway start).")
print()
allowed_users = prompt(
"Allowed user IDs or usernames (comma-separated, leave empty for open access)"
)
if allowed_users:
cleaned_ids = _clean_discord_user_ids(allowed_users)
save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
print_success("Discord allowlist configured")
else:
print_info("⚠️ No allowlist set - anyone in servers with your bot can use it!")
print()
print_info("📬 Home Channel: where Hermes delivers cron job results,")
print_info(" cross-platform messages, and notifications.")
print_info(" To get a channel ID: right-click a channel → Copy Channel ID")
print_info(" (requires Developer Mode in Discord settings)")
print_info(" You can also set this later by typing /set-home in a Discord channel.")
home_channel = prompt("Home channel ID (leave empty to set later with /set-home)")
if home_channel:
save_env_value("DISCORD_HOME_CHANNEL", home_channel)
def _clean_discord_user_ids(raw: str) -> list:
"""Strip common Discord mention prefixes from a comma-separated ID string."""
cleaned = []
for uid in raw.replace(" ", "").split(","):
uid = uid.strip()
if uid.startswith("<@") and uid.endswith(">"):
uid = uid.lstrip("<@!").rstrip(">")
if uid.lower().startswith("user:"):
uid = uid[5:]
if uid:
cleaned.append(uid)
return cleaned
def _setup_slack():
"""Configure Slack bot credentials."""
print_header("Slack")
@ -2256,28 +2187,58 @@ def _setup_matrix():
print_success("E2EE enabled")
matrix_pkg = "mautrix[encryption]" if want_e2ee else "mautrix"
# Use the central lazy-deps feature group so we install ALL of
# platform.matrix's dependencies (mautrix, Markdown, aiosqlite,
# asyncpg, aiohttp-socks) — not just mautrix itself. The previous
# hand-rolled ``pip install mautrix[encryption]`` left asyncpg /
# aiosqlite uninstalled and broke E2EE connect with
# ``No module named 'asyncpg'`` on every fresh install (#31116).
try:
__import__("mautrix")
from tools.lazy_deps import ensure as _lazy_ensure, feature_missing
_missing_before = feature_missing("platform.matrix")
if _missing_before:
print_info(
f"Installing {matrix_pkg} (+ {len(_missing_before)} runtime deps)..."
)
try:
_lazy_ensure("platform.matrix", prompt=False)
print_success(f"{matrix_pkg} installed")
except Exception as exc:
print_warning(
f"Install failed — run manually: pip install "
f"'mautrix[encryption]' asyncpg aiosqlite Markdown "
f"aiohttp-socks"
)
print_info(f" Error: {exc}")
except ImportError:
print_info(f"Installing {matrix_pkg}...")
import subprocess
uv_bin = shutil.which("uv")
if uv_bin:
result = subprocess.run(
[uv_bin, "pip", "install", "--python", sys.executable, matrix_pkg],
capture_output=True, text=True,
)
else:
result = subprocess.run(
[sys.executable, "-m", "pip", "install", matrix_pkg],
capture_output=True, text=True,
)
if result.returncode == 0:
print_success(f"{matrix_pkg} installed")
else:
print_warning(f"Install failed — run manually: pip install '{matrix_pkg}'")
if result.stderr:
print_info(f" Error: {result.stderr.strip().splitlines()[-1]}")
# tools.lazy_deps unavailable (extreme edge case — partial
# install). Fall back to the legacy single-package install
# path so the wizard still does *something*.
try:
__import__("mautrix")
except ImportError:
print_info(f"Installing {matrix_pkg}...")
import subprocess
uv_bin = shutil.which("uv")
if uv_bin:
result = subprocess.run(
[uv_bin, "pip", "install", "--python", sys.executable, matrix_pkg],
capture_output=True, text=True,
)
else:
result = subprocess.run(
[sys.executable, "-m", "pip", "install", matrix_pkg],
capture_output=True, text=True,
)
if result.returncode == 0:
print_success(f"{matrix_pkg} installed")
else:
print_warning(
f"Install failed — run manually: pip install "
f"'{matrix_pkg}' asyncpg aiosqlite Markdown aiohttp-socks"
)
if result.stderr:
print_info(f" Error: {result.stderr.strip().splitlines()[-1]}")
print()
print_info("🔒 Security: Restrict who can use your bot")
@ -2299,50 +2260,6 @@ def _setup_matrix():
save_env_value("MATRIX_HOME_ROOM", home_room)
def _setup_mattermost():
"""Configure Mattermost bot credentials."""
print_header("Mattermost")
existing = get_env_value("MATTERMOST_TOKEN")
if existing:
print_info("Mattermost: already configured")
if not prompt_yes_no("Reconfigure Mattermost?", False):
return
print_info("Works with any self-hosted Mattermost instance.")
print_info(" 1. In Mattermost: Integrations → Bot Accounts → Add Bot Account")
print_info(" 2. Copy the bot token")
print()
mm_url = prompt("Mattermost server URL (e.g. https://mm.example.com)")
if mm_url:
save_env_value("MATTERMOST_URL", mm_url.rstrip("/"))
token = prompt("Bot token", password=True)
if not token:
return
save_env_value("MATTERMOST_TOKEN", token)
print_success("Mattermost token saved")
print()
print_info("🔒 Security: Restrict who can use your bot")
print_info(" To find your user ID: click your avatar → Profile")
print_info(" or use the API: GET /api/v4/users/me")
print()
allowed_users = prompt("Allowed user IDs (comma-separated, leave empty for open access)")
if allowed_users:
save_env_value("MATTERMOST_ALLOWED_USERS", allowed_users.replace(" ", ""))
print_success("Mattermost allowlist configured")
else:
print_info("⚠️ No allowlist set - anyone who can message the bot can use it!")
print()
print_info("📬 Home Channel: where Hermes delivers cron job results and notifications.")
print_info(" To get a channel ID: click channel name → View Info → copy the ID")
print_info(" You can also set this later by typing /set-home in a Mattermost channel.")
home_channel = prompt("Home channel ID (leave empty to set later with /set-home)")
if home_channel:
save_env_value("MATTERMOST_HOME_CHANNEL", home_channel)
print_info(" Open config in your editor: hermes config edit")
def _setup_bluebubbles():
"""Configure BlueBubbles iMessage gateway."""
print_header("BlueBubbles (iMessage)")
@ -3128,6 +3045,119 @@ SETUP_SECTIONS = [
]
def _run_portal_one_shot(config: dict) -> None:
"""One-shot Nous Portal setup — OAuth + provider switch + Tool Gateway.
Wired into ``hermes setup --portal``. Does NOT prompt for anything
besides what the underlying OAuth + Tool Gateway prompts already need.
Designed to be shareable as a single command (``hermes setup --portal``)
that gets a brand-new user from zero to a fully working Hermes session
with web/image/tts/browser tools all routed via their Portal sub.
"""
from types import SimpleNamespace
from hermes_cli.auth_commands import auth_add_command
from hermes_cli.config import save_config
from hermes_cli.auth import get_nous_auth_status
from hermes_cli.nous_subscription import prompt_enable_tool_gateway
print()
print(
color(
"┌─────────────────────────────────────────────────────────┐",
Colors.MAGENTA,
)
)
print(color("│ ⚕ Hermes Setup — Nous Portal (one-shot) │", Colors.MAGENTA))
print(
color(
"└─────────────────────────────────────────────────────────┘",
Colors.MAGENTA,
)
)
print()
print_info(" One subscription, 300+ models, plus the Tool Gateway:")
print_info(" web search, image generation, TTS, browser automation")
print_info(" — all routed through your Nous Portal sub.")
print()
print_info(" Sign up: https://portal.nousresearch.com/manage-subscription")
print()
# Skip OAuth if already logged in (don't re-prompt every time the user
# runs `hermes setup --portal` after a successful first run).
already_logged_in = False
try:
already_logged_in = bool((get_nous_auth_status() or {}).get("logged_in"))
except Exception:
already_logged_in = False
if already_logged_in:
print_success(" Already logged into Nous Portal.")
else:
# Hand off to the shared auth wiring so the device-code flow is
# identical to `hermes auth add nous --type oauth`. SimpleNamespace
# mirrors the argparse Namespace contract that auth_add_command expects.
ns = SimpleNamespace(
provider="nous",
auth_type="oauth",
label=None,
api_key=None,
portal_url=None,
inference_url=None,
client_id=None,
scope=None,
no_browser=False,
timeout=None,
insecure=False,
ca_bundle=None,
min_key_ttl_seconds=5 * 60,
)
try:
auth_add_command(ns)
except SystemExit as e:
print()
print_error(f" Nous Portal login failed (exit {e.code}).")
print_info(" You can retry later with `hermes auth add nous --type oauth`.")
return
except (KeyboardInterrupt, EOFError):
print()
print_info(" Setup cancelled.")
return
except Exception as exc:
print()
print_error(f" Nous Portal login failed: {exc}")
print_info(" You can retry later with `hermes auth add nous --type oauth`.")
return
# Set provider → nous so the model picker, status surfaces, and
# managed-tool gating all light up. Leave model.model empty so the
# runtime picks Nous's default model; the user can change it later
# with `hermes model`.
model_cfg = config.get("model")
if not isinstance(model_cfg, dict):
model_cfg = {}
config["model"] = model_cfg
model_cfg["provider"] = "nous"
save_config(config)
print()
print_success(" Nous set as your inference provider.")
# Offer the Tool Gateway opt-in (single Y/n) — same flow that fires
# from `hermes model` after picking Nous.
print()
try:
prompt_enable_tool_gateway(config)
except (KeyboardInterrupt, EOFError):
pass
except Exception as exc:
print_warning(f" Tool Gateway prompt skipped: {exc}")
print()
print_success("Portal setup complete.")
print_info(" Run `hermes portal status` to inspect routing.")
print_info(" Run `hermes` to start chatting.")
def run_setup_wizard(args):
"""Run the interactive setup wizard.
@ -3183,6 +3213,11 @@ def run_setup_wizard(args):
)
return
# --portal: one-shot Nous Portal setup. Skips the rest of the wizard.
if bool(getattr(args, "portal", False)):
_run_portal_one_shot(config)
return
# Check if a specific section was requested
section = getattr(args, "section", None)
if section:

View file

@ -23,6 +23,7 @@ from rich.table import Table
# Lazy imports to avoid circular dependencies and slow startup.
# tools.skills_hub and tools.skills_guard are imported inside functions.
from hermes_constants import display_hermes_home
from agent.skill_utils import is_excluded_skill_path
_console = Console()
@ -178,9 +179,12 @@ def _existing_categories() -> List[str]:
# top level (no category); otherwise treat as a category bucket.
if (entry / "SKILL.md").exists():
continue
# Has at least one nested SKILL.md?
# Has at least one nested SKILL.md (excluding dependency/cache dirs)?
try:
if any(entry.rglob("SKILL.md")):
if any(
not is_excluded_skill_path(p)
for p in entry.rglob("SKILL.md")
):
out.append(entry.name)
except OSError:
continue
@ -546,7 +550,14 @@ def do_install(identifier: str, category: str = "", force: bool = False,
# Scan
c.print("[bold]Running security scan...[/]")
scan_source = getattr(bundle, "identifier", "") or getattr(meta, "identifier", "") or identifier
if bundle.source == "official":
scan_source = "official"
else:
scan_source = (
getattr(bundle, "identifier", "")
or getattr(meta, "identifier", "")
or identifier
)
result = scan_skill(q_path, source=scan_source)
c.print(format_scan_report(result))
@ -902,8 +913,14 @@ def do_update(name: Optional[str] = None, console: Optional[Console] = None) ->
c.print(f"[bold green]Updated {len(updates)} skill(s).[/]\n")
def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> None:
"""Re-run security scan on installed hub skills."""
def do_audit(name: Optional[str] = None, console: Optional[Console] = None,
deep: bool = False) -> None:
"""Re-run security scan on installed hub skills.
When ``deep=True``, also runs an opt-in AST-level diagnostic on Python
files (review aid only not a security gate; skills_guard.py verdicts
are unchanged).
"""
from tools.skills_hub import HubLockFile, SKILLS_DIR
from tools.skills_guard import scan_skill, format_scan_report
@ -924,6 +941,9 @@ def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> N
c.print(f"\n[bold]Auditing {len(targets)} skill(s)...[/]\n")
if deep:
from tools.skills_ast_audit import ast_scan_path, format_ast_report
for entry in targets:
skill_path = SKILLS_DIR / entry["install_path"]
if not skill_path.exists():
@ -932,6 +952,10 @@ def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> N
result = scan_skill(skill_path, source=entry.get("identifier", entry["source"]))
c.print(format_scan_report(result))
if deep:
c.print(format_ast_report(ast_scan_path(skill_path), skill_name=entry["name"]))
c.print()
@ -1339,7 +1363,8 @@ def skills_command(args) -> None:
elif action == "update":
do_update(name=getattr(args, "name", None))
elif action == "audit":
do_audit(name=getattr(args, "name", None))
do_audit(name=getattr(args, "name", None),
deep=getattr(args, "deep", False))
elif action == "uninstall":
do_uninstall(args.name)
elif action == "reset":
@ -1391,6 +1416,8 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
/skills update
/skills audit
/skills audit my-skill
/skills audit --deep
/skills audit my-skill --deep
/skills uninstall my-skill
/skills tap list
/skills tap add owner/repo
@ -1505,8 +1532,9 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
do_update(name=name, console=c)
elif action == "audit":
name = args[0] if args else None
do_audit(name=name, console=c)
name = args[0] if args and not args[0].startswith("--") else None
deep = "--deep" in args
do_audit(name=name, console=c, deep=deep)
elif action == "uninstall":
if not args:

View file

@ -227,6 +227,9 @@ TIPS = [
"browser_vision with annotate=true overlays numbered labels on interactive elements.",
# --- MCP ---
"hermes mcp opens an interactive picker of Nous-approved MCPs you can install in one keystroke.",
"hermes mcp catalog lists Nous-approved MCP servers shipped with the repo.",
"hermes mcp install <name> installs a catalog entry, prompts for credentials, and lets you pick which of its tools to enable.",
"MCP servers are configured in config.yaml — both stdio and HTTP transports supported.",
"Per-server tool filtering: tools.include whitelists and tools.exclude blacklists specific tools.",
"MCP servers auto-generate toolsets at runtime — hermes tools can toggle them per platform.",
@ -260,7 +263,7 @@ TIPS = [
"Custom providers: save named endpoints in config.yaml under custom_providers.",
"HERMES_EPHEMERAL_SYSTEM_PROMPT injects a system prompt that's never persisted to history.",
"credential_pool_strategies supports fill_first, round_robin, least_used, and random rotation.",
"hermes login supports OAuth-based auth for Nous and OpenAI Codex providers.",
"hermes auth add nous or hermes auth add openai-codex sets up OAuth-based providers.",
"The API server supports both Chat Completions and Responses API with server-side state.",
"tool_preview_length: 0 in config shows full file paths in the spinner's activity feed.",
"hermes status --deep runs deeper diagnostic checks across all components.",

View file

@ -101,7 +101,7 @@ def _xai_credentials_present() -> bool:
"""Cheap, side-effect-free check for usable xAI credentials.
Used to auto-enable the ``x_search`` toolset when the user has either
completed xAI Grok OAuth (SuperGrok subscription) or set
completed xAI Grok OAuth (SuperGrok / Premium+) or set
``XAI_API_KEY``. Does NOT hit the network only inspects the local
auth store and environment. The tool's runtime ``check_fn`` still
gates schema registration if creds later expire or get revoked.
@ -311,6 +311,16 @@ TOOL_CATEGORIES = {
"image_gen": {
"name": "Image Generation",
"icon": "🎨",
# Per-provider rows for FAL.ai (`plugins/image_gen/fal`), OpenAI,
# OpenAI Codex, and xAI are injected at runtime from each
# ``plugins.image_gen.<vendor>`` package via
# ``_plugin_image_gen_providers()`` in ``_visible_providers``.
# Only non-provider UX setup-flow rows remain here:
# - "Nous Subscription" — managed FAL billed via the Nous
# subscription (requires_nous_auth + override_env_vars).
# Uses the fal plugin as the underlying backend but has a
# distinct setup UX.
# Mirrors the shape browser/video_gen ship today.
"providers": [
{
"name": "Nous Subscription",
@ -322,15 +332,6 @@ TOOL_CATEGORIES = {
"override_env_vars": ["FAL_KEY"],
"imagegen_backend": "fal",
},
{
"name": "FAL.ai",
"badge": "paid",
"tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
"env_vars": [
{"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
],
"imagegen_backend": "fal",
},
],
},
"video_gen": {
@ -355,7 +356,7 @@ TOOL_CATEGORIES = {
"icon": "🐦",
"providers": [
{
"name": "xAI Grok OAuth (SuperGrok Subscription)",
"name": "xAI Grok OAuth (SuperGrok / Premium+)",
"badge": "subscription",
"tag": "Browser login at accounts.x.ai — no API key required",
"env_vars": [],
@ -482,6 +483,11 @@ TOOLSET_ENV_REQUIREMENTS = {
# ─── Post-Setup Hooks ─────────────────────────────────────────────────────────
def _cua_driver_cmd() -> str:
"""Return the cua-driver executable name/path, honoring non-empty overrides."""
return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
def _pip_install(
args: List[str],
*,
@ -550,6 +556,55 @@ def _pip_install(
)
def _check_cua_driver_asset_for_arch() -> bool:
"""Check whether the latest CUA release ships an asset for this architecture.
Returns True if the asset likely exists (or if we cannot determine it).
Returns False and prints a warning when the asset is confirmed missing,
so callers can skip the install attempt and avoid a raw 404.
"""
import platform as _plat
import urllib.request
machine = _plat.machine() # "x86_64" or "arm64"
if machine == "arm64":
# arm64 (Apple Silicon) assets are always published.
return True
# x86_64 / Intel — probe the latest release for an architecture-specific
# asset before falling through to the upstream installer.
api_url = (
"https://api.github.com/repos/trycua/cua/releases/latest"
)
try:
req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github+json"})
with urllib.request.urlopen(req, timeout=10) as resp:
release = _json.loads(resp.read().decode())
tag = release.get("tag_name", "")
assets = release.get("assets", [])
arch_names = {"x86_64", "amd64"}
has_asset = any(
any(a in a_info.get("name", "").lower() for a in arch_names)
for a_info in assets
)
if not has_asset:
_print_warning(
f" Latest CUA release ({tag}) has no Intel (x86_64) asset."
)
_print_info(
" CUA Driver currently only ships Apple Silicon builds."
)
_print_info(
" See: https://github.com/trycua/cua/issues/1493"
)
return False
except Exception:
# Network / API failure — proceed and let the installer handle it.
pass
return True
def install_cua_driver(upgrade: bool = False) -> bool:
"""Install or refresh the cua-driver binary used by Computer Use.
@ -579,7 +634,8 @@ def install_cua_driver(upgrade: bool = False) -> bool:
_print_warning(" Computer Use (cua-driver) is macOS-only; skipping.")
return False
binary = shutil.which("cua-driver")
driver_cmd = _cua_driver_cmd()
binary = shutil.which(driver_cmd)
# Not installed → fresh install path (only when caller asked for it).
if not binary and not upgrade:
@ -587,18 +643,20 @@ def install_cua_driver(upgrade: bool = False) -> bool:
_print_warning(" curl not found — install manually:")
_print_info(" https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
return False
if not _check_cua_driver_asset_for_arch():
return False
return _run_cua_driver_installer(label="Installing")
# Already installed and caller didn't ask to upgrade → just confirm.
if binary and not upgrade:
try:
version = subprocess.run(
["cua-driver", "--version"],
[driver_cmd, "--version"],
capture_output=True, text=True, timeout=5,
).stdout.strip()
_print_success(f" cua-driver already installed: {version or 'unknown version'}")
_print_success(f" {driver_cmd} already installed: {version or 'unknown version'}")
except Exception:
_print_success(" cua-driver already installed.")
_print_success(f" {driver_cmd} already installed.")
_print_info(" Grant macOS permissions if not done yet:")
_print_info(" System Settings > Privacy & Security > Accessibility")
_print_info(" System Settings > Privacy & Security > Screen Recording")
@ -609,11 +667,14 @@ def install_cua_driver(upgrade: bool = False) -> bool:
_print_warning(" curl not found — cannot refresh cua-driver.")
return bool(binary)
if not _check_cua_driver_asset_for_arch():
return bool(binary)
if binary:
# Show before/after version when we have a baseline. Best-effort.
try:
before = subprocess.run(
["cua-driver", "--version"],
[driver_cmd, "--version"],
capture_output=True, text=True, timeout=5,
).stdout.strip()
except Exception:
@ -625,13 +686,13 @@ def install_cua_driver(upgrade: bool = False) -> bool:
if ok and before:
try:
after = subprocess.run(
["cua-driver", "--version"],
[driver_cmd, "--version"],
capture_output=True, text=True, timeout=5,
).stdout.strip()
if after and after != before:
_print_success(f" cua-driver upgraded: {before}{after}")
_print_success(f" {driver_cmd} upgraded: {before}{after}")
elif after:
_print_info(f" cua-driver up to date: {after}")
_print_info(f" {driver_cmd} up to date: {after}")
except Exception:
pass
return ok
@ -655,11 +716,12 @@ def _run_cua_driver_installer(label: str = "Installing", verbose: bool = True) -
_print_info(f" {label} cua-driver (macOS background computer-use)...")
else:
_print_info(f" {label} cua-driver...")
driver_cmd = _cua_driver_cmd()
try:
result = subprocess.run(install_cmd, shell=True, timeout=300)
if result.returncode == 0 and shutil.which("cua-driver"):
if result.returncode == 0 and shutil.which(driver_cmd):
if verbose:
_print_success(" cua-driver installed.")
_print_success(f" {driver_cmd} installed.")
_print_info(" IMPORTANT — grant macOS permissions now:")
_print_info(" System Settings > Privacy & Security > Accessibility")
_print_info(" System Settings > Privacy & Security > Screen Recording")
@ -946,7 +1008,7 @@ def _run_post_setup(post_setup_key: str):
if oauth_logged_in:
_print_success(
" xAI will use your xAI Grok OAuth (SuperGrok Subscription) credentials"
" xAI will use your xAI Grok OAuth (SuperGrok / Premium+) credentials"
)
return
if existing_api_key:
@ -969,7 +1031,7 @@ def _run_post_setup(post_setup_key: str):
idx = prompt_choice(
" How do you want xAI to authenticate?",
choices=[
"Sign in with xAI Grok OAuth (SuperGrok Subscription) — browser login",
"Sign in with xAI Grok OAuth (SuperGrok / Premium+) — browser login",
"Paste an xAI API key (console.x.ai)",
"Skip — configure later via `hermes auth add xai-oauth`",
],
@ -1506,12 +1568,9 @@ def _plugin_image_gen_providers() -> list[dict]:
Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
row but carries an ``image_gen_plugin_name`` marker so downstream
code (config writing, model picker) knows to route through the
plugin registry instead of the in-tree FAL backend.
FAL is skipped it's already exposed by the hardcoded
``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
a plugin in a follow-up PR, the hardcoded entries go away and this
function surfaces it alongside OpenAI automatically.
plugin registry. Every image-gen backend is a plugin now there
are no hardcoded rows left in ``TOOL_CATEGORIES["image_gen"]`` for
this function to dedupe against (see issue #26241).
"""
try:
from agent.image_gen_registry import list_providers
@ -1524,9 +1583,6 @@ def _plugin_image_gen_providers() -> list[dict]:
rows: list[dict] = []
for provider in providers:
if getattr(provider, "name", None) == "fal":
# FAL has its own hardcoded rows today.
continue
try:
schema = provider.get_setup_schema()
except Exception:
@ -1697,6 +1753,62 @@ def _plugin_browser_providers() -> list[dict]:
return rows
def _plugin_tts_providers() -> list[dict]:
"""Build picker-row dicts from plugin-registered TTS providers.
Issue #30398 — the ``register_tts_provider()`` plugin hook
coexists alongside the 10 built-in TTS providers
(``edge``/``openai``/``elevenlabs``/) and the
``tts.providers.<name>: type: command`` registry from PR #17843.
Built-in rows stay hardcoded in ``TOOL_CATEGORIES["tts"]``; this
function only injects PLUGIN-registered providers.
Defensive: plugins whose name collides with a built-in TTS provider
are filtered out even though the registry already rejects them
at registration time, a future code path that registers directly
via :func:`agent.tts_registry.register_provider` could slip
through. Filtering here keeps the picker invariant.
"""
try:
from agent.tts_registry import _BUILTIN_NAMES, list_providers
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
providers = list_providers()
except Exception:
return []
rows: list[dict] = []
for provider in providers:
name = getattr(provider, "name", None)
if not name:
continue
# Defensive: reject built-in shadowing at the picker layer too.
if name.lower().strip() in _BUILTIN_NAMES:
continue
try:
schema = provider.get_setup_schema()
except Exception:
continue
if not isinstance(schema, dict):
continue
row = {
"name": schema.get("name", provider.display_name),
"badge": schema.get("badge", ""),
"tag": schema.get("tag", ""),
"env_vars": schema.get("env_vars", []),
# Selecting this row writes ``tts.provider: <name>`` — the
# same write-path used by hardcoded rows. The plugin
# dispatcher picks it up automatically from there.
"tts_provider": name,
"tts_plugin_name": name,
}
if schema.get("post_setup"):
row["post_setup"] = schema["post_setup"]
rows.append(row)
return rows
def _visible_providers(cat: dict, config: dict) -> list[dict]:
"""Return provider entries visible for the current auth/config state."""
features = get_nous_subscription_features(config)
@ -1734,6 +1846,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
if cat.get("name") == "Browser Automation":
visible.extend(_plugin_browser_providers())
# Inject plugin-registered TTS backends (issue #30398). Plugin rows
# render BELOW the 10 hardcoded built-in rows. Built-in shadowing
# is filtered out by ``_plugin_tts_providers`` defensively.
if cat.get("name") == "Text-to-Speech":
visible.extend(_plugin_tts_providers())
return visible
@ -1751,7 +1869,7 @@ _POST_SETUP_INSTALLED: dict = {
# entry when (a) the post_setup is the ONLY install side-effect for
# a no-key provider, and (b) an installed-state check is cheap and
# doesn't trigger a heavy import.
"cua_driver": lambda: bool(shutil.which("cua-driver")),
"cua_driver": lambda: bool(shutil.which(_cua_driver_cmd())),
}
@ -1869,6 +1987,16 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
print()
# Plain text labels only (no ANSI codes in menu items)
# When the user is logged into Nous, surface a marker on providers
# whose access is included in their subscription so it's visually
# obvious which options cost extra vs. cost nothing on top of Nous.
try:
_nous_logged_in = bool(
get_nous_subscription_features(config).nous_auth_present
)
except Exception:
_nous_logged_in = False
provider_choices = []
for p in providers:
badge = f" [{p['badge']}]" if p.get("badge") else ""
@ -1882,7 +2010,15 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
configured = ""
else:
configured = " [configured]"
provider_choices.append(f"{p['name']}{badge}{tag}{configured}")
# Highlight Nous-managed entries when the user has Portal auth.
# curses_radiolist can't render ANSI inside item strings, so we
# use a plain unicode star + parenthetical phrase. Suppressed
# when no Portal auth is present so non-subscribers see the
# picker unchanged.
sub_marker = ""
if _nous_logged_in and p.get("managed_nous_feature"):
sub_marker = " ★ Included with your Nous subscription"
provider_choices.append(f"{p['name']}{badge}{tag}{configured}{sub_marker}")
# Add skip option
provider_choices.append("Skip — keep defaults / configure later")
@ -2349,6 +2485,30 @@ def _configure_provider(provider: dict, config: dict):
# Prompt for each required env var
all_configured = True
# If this BYOK provider lives in a category that ALSO has a
# Nous-managed sibling, show a single dim hint so users know
# they can avoid the key entirely via a Portal subscription.
# Suppressed when the user is already authed to Nous.
_show_portal_hint = False
if env_vars and not managed_feature and not provider.get("requires_nous_auth"):
try:
_has_managed_sibling = False
for _cat_key, _cat in TOOL_CATEGORIES.items():
_providers = _cat.get("providers", [])
if provider in _providers and any(
sib.get("managed_nous_feature") for sib in _providers
):
_has_managed_sibling = True
break
if _has_managed_sibling:
_features = get_nous_subscription_features(config)
_show_portal_hint = not _features.nous_auth_present
except Exception:
_show_portal_hint = False
if _show_portal_hint:
_print_info(" Available through Nous Portal subscription.")
for var in env_vars:
existing = get_env_value(var["key"])
if existing:
@ -3030,21 +3190,26 @@ def _configure_mcp_tools_interactive(config: dict):
_print_info(f" {server_name}: no changes")
continue
# Compute new exclude list based on unchecked tools
new_exclude = [tool_names[i] for i in range(len(tool_names)) if i not in chosen]
# Compute new include list (the chosen tools). We standardize on
# tools.include across the codebase (catalog installs, hermes mcp
# configure, and this UI) so a server\'s on-disk config shape doesn\'t
# depend on which UI the user touched last.
chosen_names = [tool_names[i] for i in sorted(chosen)]
# Update config
srv_cfg = mcp_servers.setdefault(server_name, {})
tools_cfg = srv_cfg.setdefault("tools", {})
if new_exclude:
tools_cfg["exclude"] = new_exclude
# Remove include if present — we're switching to exclude mode
tools_cfg.pop("include", None)
else:
# All tools enabled — clear filters
if len(chosen) == len(tools):
# All tools enabled — clear filters (cleanest config shape; the
# server\'s native tool set is the active set, and any tools the
# server adds later are auto-enabled).
tools_cfg.pop("exclude", None)
tools_cfg.pop("include", None)
else:
tools_cfg["include"] = chosen_names
# Drop any legacy exclude block — we\'re include-mode now.
tools_cfg.pop("exclude", None)
enabled_count = len(chosen)
disabled_count = len(tools) - enabled_count

View file

@ -16,6 +16,7 @@ import json
import logging
import os
import secrets
import stat
import subprocess
import sys
import threading
@ -48,6 +49,7 @@ from hermes_cli.config import (
redact_key,
)
from gateway.status import get_running_pid, read_runtime_status
from utils import env_var_enabled
try:
from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
@ -118,7 +120,6 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
"/api/model/info",
"/api/dashboard/themes",
"/api/dashboard/plugins",
"/api/dashboard/plugins/rescan",
})
@ -975,11 +976,13 @@ _AUX_TASK_SLOTS: Tuple[str, ...] = (
"vision",
"web_extract",
"compression",
"session_search",
"skills_hub",
"approval",
"mcp",
"title_generation",
"triage_specifier",
"kanban_decomposer",
"profile_describer",
"curator",
)
@ -1220,6 +1223,12 @@ async def set_env_var(body: EnvVarUpdate):
try:
save_env_value(body.key, body.value)
return {"ok": True, "key": body.key}
except ValueError as exc:
# save_env_value raises ValueError for invalid names and for keys
# on the denylist (LD_PRELOAD, PATH, PYTHONPATH, …). Surface the
# message to the SPA so the user understands why the write was
# refused instead of seeing an opaque 500.
raise HTTPException(status_code=400, detail=str(exc)) from exc
except Exception:
_log.exception("PUT /api/env failed")
raise HTTPException(status_code=500, detail="Internal server error")
@ -1684,7 +1693,25 @@ def _save_anthropic_oauth_creds(access_token: str, refresh_token: str, expires_a
"expiresAt": expires_at_ms,
}
_HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
_HERMES_OAUTH_FILE.write_text(json.dumps(payload, indent=2), encoding="utf-8")
tmp_path = _HERMES_OAUTH_FILE.with_name(
f"{_HERMES_OAUTH_FILE.name}.tmp.{os.getpid()}.{secrets.token_hex(8)}"
)
try:
with tmp_path.open("w", encoding="utf-8") as handle:
handle.write(json.dumps(payload, indent=2))
handle.flush()
os.fsync(handle.fileno())
os.replace(tmp_path, _HERMES_OAUTH_FILE)
try:
_HERMES_OAUTH_FILE.chmod(stat.S_IRUSR | stat.S_IWUSR)
except OSError:
pass
finally:
try:
if tmp_path.exists():
tmp_path.unlink()
except OSError:
pass
# Best-effort credential-pool insert. Failure here doesn't invalidate
# the file write — pool registration only matters for the rotation
# strategy, not for runtime credential resolution.
@ -2690,7 +2717,10 @@ async def update_cron_job(job_id: str, body: CronJobUpdate, profile: Optional[st
selected = profile or _find_cron_job_profile(job_id)
if not selected:
raise HTTPException(status_code=404, detail="Job not found")
job = _call_cron_for_profile(selected, "update_job", job_id, body.updates)
try:
job = _call_cron_for_profile(selected, "update_job", job_id, body.updates)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
if not job:
raise HTTPException(status_code=404, detail="Job not found")
return job
@ -2734,7 +2764,11 @@ async def delete_cron_job(job_id: str, profile: Optional[str] = None):
selected = profile or _find_cron_job_profile(job_id)
if not selected:
raise HTTPException(status_code=404, detail="Job not found")
if not _call_cron_for_profile(selected, "remove_job", job_id):
try:
removed = _call_cron_for_profile(selected, "remove_job", job_id)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
if not removed:
raise HTTPException(status_code=404, detail="Job not found")
return {"ok": True}
@ -3293,24 +3327,49 @@ _VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
def _is_public_bind() -> bool:
"""True when bound to all-interfaces (operator used --insecure)."""
return getattr(app.state, "bound_host", "") in {"0.0.0.0", "::"}
def _ws_client_is_allowed(ws: "WebSocket") -> bool:
"""Check if the WebSocket client IP is acceptable.
Allows loopback always; allows any IP when bound to all-interfaces
(--insecure mode, guarded by session token auth).
Allows loopback clients only.
"""
if _is_public_bind():
return True
client_host = ws.client.host if ws.client else ""
if not client_host:
return True
return client_host in _LOOPBACK_HOSTS
def _ws_host_origin_is_allowed(ws: "WebSocket") -> bool:
"""Apply the dashboard Host/Origin guard to WebSocket upgrades.
FastAPI HTTP middleware does not run for WebSocket routes, so the
DNS-rebinding Host check used for normal dashboard HTTP requests must be
repeated here before accepting the upgrade. Browsers also send an Origin
header on WebSocket handshakes; when present, require it to target the
same bound dashboard host.
"""
bound_host = getattr(app.state, "bound_host", None)
if not bound_host:
return True
host_header = ws.headers.get("host", "")
if not _is_accepted_host(host_header, bound_host):
return False
origin = ws.headers.get("origin", "")
if not origin:
return True
parsed = urllib.parse.urlparse(origin)
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
return False
return _is_accepted_host(parsed.netloc, bound_host)
def _ws_request_is_allowed(ws: "WebSocket") -> bool:
"""Return True when the WebSocket upgrade matches dashboard boundaries."""
return _ws_host_origin_is_allowed(ws) and _ws_client_is_allowed(ws)
# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
# and /api/events (dashboard → browser sidebar). Keyed by an opaque channel id
# the chat tab generates on mount; entries auto-evict when the last subscriber
@ -3389,7 +3448,7 @@ async def _broadcast_event(channel: str, payload: str) -> None:
except Exception:
# Subscriber went away mid-send; the /api/events finally clause
# will remove it from the registry on its next iteration.
pass
_log.warning("broadcast send failed for subscriber on %s", channel, exc_info=True)
def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
@ -3412,7 +3471,7 @@ async def pty_ws(ws: WebSocket) -> None:
await ws.close(code=4401)
return
if not _ws_client_is_allowed(ws):
if not _ws_request_is_allowed(ws):
await ws.close(code=4403)
return
@ -3531,7 +3590,7 @@ async def gateway_ws(ws: WebSocket) -> None:
await ws.close(code=4401)
return
if not _ws_client_is_allowed(ws):
if not _ws_request_is_allowed(ws):
await ws.close(code=4403)
return
@ -3563,7 +3622,7 @@ async def pub_ws(ws: WebSocket) -> None:
await ws.close(code=4401)
return
if not _ws_client_is_allowed(ws):
if not _ws_request_is_allowed(ws):
await ws.close(code=4403)
return
@ -3592,7 +3651,7 @@ async def events_ws(ws: WebSocket) -> None:
await ws.close(code=4401)
return
if not _ws_client_is_allowed(ws):
if not _ws_request_is_allowed(ws):
await ws.close(code=4403)
return
@ -4044,6 +4103,43 @@ async def set_dashboard_theme(body: ThemeSetBody):
# Dashboard plugin system
# ---------------------------------------------------------------------------
def _safe_plugin_api_relpath(api_field: Any, *, dashboard_dir: Path) -> Optional[str]:
"""Validate the manifest's ``api`` field for the plugin loader.
The web server later imports this file as a Python module via
``importlib.util.spec_from_file_location`` (arbitrary code
execution by design that's how plugins extend the backend).
Pre-#29156 the field was used as-is, which meant:
* An absolute path swallowed the plugin's dashboard directory
entirely ``Path('safe/dashboard') / '/tmp/evil.py'`` resolves
to ``/tmp/evil.py``, so any attacker-controlled manifest could
point the import at any Python file on disk (GHSA-5qr3-c538-wm9j).
* A ``../..`` traversal could climb out of the plugin into
neighbouring directories on the search path.
Return the original string when the resolved path stays under
``dashboard_dir``; return ``None`` (with a warning logged at the
call site) otherwise so the plugin still loads its static JS/CSS
but its backend ``api`` is rejected.
"""
if not isinstance(api_field, str) or not api_field.strip():
return None
candidate = Path(api_field)
if candidate.is_absolute():
return None
try:
resolved = (dashboard_dir / candidate).resolve()
base = dashboard_dir.resolve()
except (OSError, RuntimeError):
return None
try:
resolved.relative_to(base)
except ValueError:
return None
return api_field
def _discover_dashboard_plugins() -> list:
"""Scan plugins/*/dashboard/manifest.json for dashboard extensions.
@ -4062,7 +4158,16 @@ def _discover_dashboard_plugins() -> list:
(bundled_root / "memory", "bundled"),
(bundled_root, "bundled"),
]
if os.environ.get("HERMES_ENABLE_PROJECT_PLUGINS"):
# GHSA-5qr3-c538-wm9j (#29156): the previous ``os.environ.get(...)``
# check treated *any* non-empty string as truthy, so ``=0``, ``=false``,
# and ``=no`` — all of which the agent loader and operators correctly
# read as "disabled" — silently *enabled* the untrusted project source
# in the web server. Combined with the absolute-path RCE primitive on
# the manifest's ``api`` field (now patched below), this turned the
# opt-in into a sticky always-on switch. Use the shared truthy
# semantics (``1`` / ``true`` / ``yes`` / ``on``) so the gate matches
# ``hermes_cli/plugins.py`` and the documented user contract.
if env_var_enabled("HERMES_ENABLE_PROJECT_PLUGINS"):
search_dirs.append((Path.cwd() / ".hermes" / "plugins", "project"))
for plugins_root, source in search_dirs:
@ -4101,6 +4206,23 @@ def _discover_dashboard_plugins() -> list:
slots: List[str] = []
if isinstance(slots_src, list):
slots = [s for s in slots_src if isinstance(s, str) and s]
# Validate ``api`` at discovery time so the value cached
# on the plugin entry is already safe to feed into the
# importer. An attacker-controlled manifest can name
# any absolute path or ``..`` traversal here — the
# web server then imports that file as a Python module
# (RCE, GHSA-5qr3-c538-wm9j).
raw_api = data.get("api")
dashboard_dir = child / "dashboard"
safe_api = _safe_plugin_api_relpath(raw_api, dashboard_dir=dashboard_dir)
if raw_api and safe_api is None:
_log.warning(
"Plugin %s: refusing unsafe api path %r (must be a "
"relative file inside the plugin's dashboard/ "
"directory); backend routes from this plugin will "
"not be mounted",
name, raw_api,
)
plugins.append({
"name": name,
"label": data.get("label", name),
@ -4111,10 +4233,10 @@ def _discover_dashboard_plugins() -> list:
"slots": slots,
"entry": data.get("entry", "dist/index.js"),
"css": data.get("css"),
"has_api": bool(data.get("api")),
"has_api": bool(safe_api),
"source": source,
"_dir": str(child / "dashboard"),
"_api_file": data.get("api"),
"_dir": str(dashboard_dir),
"_api_file": safe_api,
})
except Exception as exc:
_log.warning("Bad dashboard plugin manifest %s: %s", manifest_file, exc)
@ -4317,12 +4439,13 @@ async def post_agent_plugin_install(request: Request, body: _AgentPluginInstallB
def _validate_plugin_name(name: str) -> str:
"""Reject path-traversal attempts in plugin name URL parameters."""
if not name or "/" in name or "\\" in name or ".." in name:
name = name.strip("/")
if not name or ".." in name or "\\" in name:
raise HTTPException(status_code=400, detail="Invalid plugin name.")
return name
@app.post("/api/dashboard/agent-plugins/{name}/enable")
@app.post("/api/dashboard/agent-plugins/{name:path}/enable")
async def post_agent_plugin_enable(request: Request, name: str):
_require_token(request)
name = _validate_plugin_name(name)
@ -4334,7 +4457,7 @@ async def post_agent_plugin_enable(request: Request, name: str):
return result
@app.post("/api/dashboard/agent-plugins/{name}/disable")
@app.post("/api/dashboard/agent-plugins/{name:path}/disable")
async def post_agent_plugin_disable(request: Request, name: str):
_require_token(request)
name = _validate_plugin_name(name)
@ -4346,7 +4469,7 @@ async def post_agent_plugin_disable(request: Request, name: str):
return result
@app.post("/api/dashboard/agent-plugins/{name}/update")
@app.post("/api/dashboard/agent-plugins/{name:path}/update")
async def post_agent_plugin_update(request: Request, name: str):
_require_token(request)
name = _validate_plugin_name(name)
@ -4359,7 +4482,7 @@ async def post_agent_plugin_update(request: Request, name: str):
return result
@app.delete("/api/dashboard/agent-plugins/{name}")
@app.delete("/api/dashboard/agent-plugins/{name:path}")
async def delete_agent_plugin(request: Request, name: str):
_require_token(request)
name = _validate_plugin_name(name)
@ -4397,7 +4520,7 @@ class _PluginVisibilityBody(BaseModel):
hidden: bool
@app.post("/api/dashboard/plugins/{name}/visibility")
@app.post("/api/dashboard/plugins/{name:path}/visibility")
async def post_plugin_visibility(request: Request, name: str, body: _PluginVisibilityBody):
"""Toggle a plugin's sidebar visibility (persists to config.yaml dashboard.hidden_plugins)."""
_require_token(request)
@ -4426,6 +4549,17 @@ async def serve_plugin_asset(plugin_name: str, file_path: str):
Only serves files from the plugin's ``dashboard/`` subdirectory.
Path traversal is blocked by checking ``resolve().is_relative_to()``.
Restricted to a browser-fetchable suffix allowlist (JS/CSS/JSON/HTML/
SVG/PNG/JPG/WOFF). The dashboard loads plugin JS via ``<script src>``
and CSS via ``<link href>``, neither of which can attach a custom
auth header so this route stays unauthenticated to keep the SPA
working. But user-installed plugins ship a ``plugin_api.py``
backend module that the browser never fetches; it's only imported
by :func:`_mount_plugin_api_routes` at startup. Without a suffix
allowlist, anyone on the loopback port can curl the ``.py`` source
of a private third-party plugin. Reject everything outside the
browser-asset set.
"""
plugins = _get_dashboard_plugins()
plugin = next((p for p in plugins if p["name"] == plugin_name), None)
@ -4440,7 +4574,11 @@ async def serve_plugin_asset(plugin_name: str, file_path: str):
if not target.exists() or not target.is_file():
raise HTTPException(status_code=404, detail="File not found")
# Guess content type
# Browser-asset suffix allowlist. Everything outside this set is
# rejected with 404 so we don't leak ``.py`` backend sources, README
# files, ``.env.example`` templates, etc. — none of which the SPA
# actually fetches. Add to this set deliberately when a new asset
# type comes up; do NOT change the default fallback.
suffix = target.suffix.lower()
content_types = {
".js": "application/javascript",
@ -4451,10 +4589,22 @@ async def serve_plugin_asset(plugin_name: str, file_path: str):
".svg": "image/svg+xml",
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".webp": "image/webp",
".ico": "image/x-icon",
".woff2": "font/woff2",
".woff": "font/woff",
".ttf": "font/ttf",
".otf": "font/otf",
".map": "application/json",
}
media_type = content_types.get(suffix, "application/octet-stream")
if suffix not in content_types:
raise HTTPException(
status_code=404,
detail="File not found",
)
media_type = content_types[suffix]
return FileResponse(
target,
media_type=media_type,
@ -4468,12 +4618,42 @@ def _mount_plugin_api_routes():
Each plugin's ``api`` field points to a Python file that must expose
a ``router`` (FastAPI APIRouter). Routes are mounted under
``/api/plugins/<name>/``.
Backend import is restricted to ``bundled`` and ``user`` sources.
Project plugins (``./.hermes/plugins/``) ship with the CWD and are
therefore attacker-controlled in any threat model where the user
opens a malicious repo; they can extend the dashboard UI via
static JS/CSS but their Python ``api`` file is never auto-imported
by the web server. See GHSA-5qr3-c538-wm9j (#29156).
"""
for plugin in _get_dashboard_plugins():
api_file_name = plugin.get("_api_file")
if not api_file_name:
continue
api_path = Path(plugin["_dir"]) / api_file_name
if plugin.get("source") == "project":
_log.warning(
"Plugin %s: ignoring backend api=%s (project plugins may "
"not auto-import Python code; move the plugin to "
"~/.hermes/plugins/ if you trust it)",
plugin["name"], api_file_name,
)
continue
dashboard_dir = Path(plugin["_dir"])
api_path = dashboard_dir / api_file_name
try:
resolved_api = api_path.resolve()
resolved_base = dashboard_dir.resolve()
resolved_api.relative_to(resolved_base)
except (OSError, RuntimeError, ValueError):
# Discovery already filters this, but re-check here in case
# ``_dir`` was tampered with after caching or a future caller
# bypasses the validator. Defence in depth keeps the import
# primitive contained even if the upstream check regresses.
_log.warning(
"Plugin %s: refusing to import api file outside its "
"dashboard directory (%s)", plugin["name"], api_path,
)
continue
if not api_path.exists():
_log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
continue

View file

@ -11,8 +11,10 @@ hot-reloaded by the webhook adapter without a gateway restart.
"""
import json
import os
import re
import secrets
import tempfile
import time
from pathlib import Path
from typing import Dict
@ -23,6 +25,7 @@ from hermes_cli.config import cfg_get
_SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
_SUBSCRIPTIONS_FILE_MODE = 0o600
def _hermes_home() -> Path:
@ -48,12 +51,33 @@ def _load_subscriptions() -> Dict[str, dict]:
def _save_subscriptions(subs: Dict[str, dict]) -> None:
path = _subscriptions_path()
path.parent.mkdir(parents=True, exist_ok=True)
tmp_path = path.with_suffix(".tmp")
tmp_path.write_text(
json.dumps(subs, indent=2, ensure_ascii=False),
encoding="utf-8",
# webhook_subscriptions.json contains per-route HMAC secrets — write
# via tempfile + chmod 0o600 before the atomic rename so a permissive
# umask cannot leave the secrets readable to other local users in the
# window between create and rename.
fd, tmp_name = tempfile.mkstemp(
prefix=f".{path.name}.",
suffix=".tmp",
dir=path.parent,
text=True,
)
atomic_replace(tmp_path, path)
tmp_path = Path(tmp_name)
try:
with os.fdopen(fd, "w", encoding="utf-8") as fh:
json.dump(subs, fh, indent=2, ensure_ascii=False)
fh.flush()
os.fsync(fh.fileno())
os.chmod(tmp_path, _SUBSCRIPTIONS_FILE_MODE)
atomic_replace(tmp_path, path)
# Re-assert after rename in case the destination existed with a
# broader mode and atomic_replace preserved it.
os.chmod(path, _SUBSCRIPTIONS_FILE_MODE)
except Exception:
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
raise
def _get_webhook_config() -> dict: