Merge remote-tracking branch 'origin/main' into jq/hermes-update-branch-flag

2026-06-07 08:02:23 +00:00 · 2026-05-27 00:48:25 -04:00 · 2026-05-27 00:48:25 -04:00 · 3d9a26afad
commit 3d9a26afad
parent d5b73937db 1e5884e38f
1217 changed files with 178911 additions and 8214 deletions
--- a/hermes_cli/_parser.py
+++ b/hermes_cli/_parser.py
@ -129,7 +129,8 @@ def build_top_level_parser():
        default=None,
        help=(
            "Provider override for this invocation (e.g. openrouter, anthropic). "
-            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
+            "Applies to -z/--oneshot and --tui. The persistent provider lives in config.yaml "
+            "under model.provider — use `hermes setup` or edit the file to change it."
        ),
    )
    parser.add_argument(
@ -268,7 +269,11 @@ def build_top_level_parser():
        help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
    )
    chat_parser.add_argument(
-        "-v", "--verbose", action="store_true", help="Verbose output"
+        "-v",
+        "--verbose",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Verbose output",
    )
    chat_parser.add_argument(
        "-Q",
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -41,14 +41,15 @@ from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from http.server import BaseHTTPRequestHandler, HTTPServer, ThreadingHTTPServer
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Any, Callable, Dict, FrozenSet, List, Optional, Tuple
 from urllib.parse import parse_qs, urlencode, urlparse

 import httpx
 import yaml

 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
-from hermes_constants import OPENROUTER_BASE_URL
+from hermes_constants import OPENROUTER_BASE_URL, secure_parent_dir
+from agent.credential_persistence import sanitize_borrowed_credential_payload
 from utils import atomic_replace, atomic_yaml_write, is_truthy_value

 logger = logging.getLogger(__name__)
@ -196,9 +197,17 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="oauth_external",
        inference_base_url=DEFAULT_CODEX_BASE_URL,
    ),
+    "openai-api": ProviderConfig(
+        id="openai-api",
+        name="OpenAI API",
+        auth_type="api_key",
+        inference_base_url="https://api.openai.com/v1",
+        api_key_env_vars=("OPENAI_API_KEY",),
+        base_url_env_var="OPENAI_BASE_URL",
+    ),
    "xai-oauth": ProviderConfig(
        id="xai-oauth",
-        name="xAI Grok OAuth (SuperGrok Subscription)",
+        name="xAI Grok OAuth (SuperGrok / Premium+)",
        auth_type="oauth_external",
        inference_base_url=DEFAULT_XAI_OAUTH_BASE_URL,
    ),
@ -393,6 +402,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        # OpenCode Go mixes API surfaces by model:
        # - GLM / Kimi use OpenAI-compatible chat completions under /v1
        # - MiniMax models use Anthropic Messages under /v1/messages
+        # - Qwen 3.7 uses Anthropic Messages under /v1/messages
        # Keep the provider base at /v1 and select api_mode per-model.
        inference_base_url="https://opencode.ai/zen/go/v1",
        api_key_env_vars=("OPENCODE_GO_API_KEY",),
@ -553,6 +563,7 @@ _PLACEHOLDER_SECRET_VALUES = {
    "***",
    "changeme",
    "your_api_key",
+    "your_api_key_here",
    "your-api-key",
    "placeholder",
    "example",
@ -1030,10 +1041,8 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
    auth_file.parent.mkdir(parents=True, exist_ok=True)
    # Tighten parent dir to 0o700 so siblings can't traverse to creds.
    # No-op on Windows (POSIX mode bits not enforced); ignore failures.
-    try:
-        os.chmod(auth_file.parent, 0o700)
-    except OSError:
-        pass
+    # secure_parent_dir refuses to chmod / or top-level dirs (#25821).
+    secure_parent_dir(auth_file)
    auth_store["version"] = AUTH_STORE_VERSION
    auth_store["updated_at"] = datetime.now(timezone.utc).isoformat()
    payload = json.dumps(auth_store, indent=2) + "\n"
@ -1169,14 +1178,23 @@ def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:


 def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
-    """Persist one provider's credential pool under auth.json."""
+    """Persist one provider's credential pool under auth.json.
+
+    This is the final disk-boundary guard for borrowed/reference-only
+    credentials. Callers may pass raw dictionaries, so sanitize here even when
+    ``PooledCredential.to_dict()`` already did the same work upstream.
+    """
    with _auth_store_lock():
        auth_store = _load_auth_store()
        pool = auth_store.get("credential_pool")
        if not isinstance(pool, dict):
            pool = {}
            auth_store["credential_pool"] = pool
-        pool[provider_id] = list(entries)
+        pool[provider_id] = [
+            sanitize_borrowed_credential_payload(entry, provider_id)
+            if isinstance(entry, dict) else entry
+            for entry in entries
+        ]
        return _save_auth_store(auth_store)


@ -1561,6 +1579,67 @@ def _optional_base_url(value: Any) -> Optional[str]:
    return cleaned if cleaned else None


+# Allowlist of hosts the Nous Portal proxy is willing to forward minted
+# bearer tokens to. The bearer is a long-lived agent_key minted by
+# portal.nousresearch.com — sending it anywhere else would leak it.
+#
+# This is consulted only for URLs coming from the NETWORK side (Portal
+# refresh / agent-key-mint responses). User-controlled env-var overrides
+# (NOUS_INFERENCE_BASE_URL) bypass validation — that's the documented
+# dev/staging escape hatch and the env source is already trusted (the
+# user set it themselves).
+_ALLOWED_NOUS_INFERENCE_HOSTS: FrozenSet[str] = frozenset({
+    "inference-api.nousresearch.com",
+})
+
+
+def _validate_nous_inference_url_from_network(url: Optional[str]) -> Optional[str]:
+    """Validate a Portal-returned inference URL against the host allowlist.
+
+    Returns ``url`` (normalised by stripping trailing slashes) if it's a
+    well-formed ``https://<allowlisted-host>/...`` URL. Returns ``None``
+    if the URL is missing, malformed, non-https, or points at an
+    unexpected host — letting the caller fall back to the configured
+    default rather than persist or forward a poisoned value.
+
+    Defense-in-depth: a compromised refresh / mint response from the
+    Portal API (MITM, malicious response injection) could otherwise
+    redirect every subsequent proxy request — bearing the user's
+    legitimately-minted agent_key — to an attacker-controlled endpoint.
+    Validating scheme + host at the source closes that loop before the
+    poisoned URL ever lands in ``auth.json``.
+
+    The env-var override path (``NOUS_INFERENCE_BASE_URL``) bypasses
+    this — env values come from the trusted OS user, not from the
+    network, and the override is documented for staging/dev use.
+
+    Co-authored-by: memosr <mehmet.sr35@gmail.com>
+    """
+    if not isinstance(url, str):
+        return None
+    cleaned = url.strip()
+    if not cleaned:
+        return None
+    try:
+        parsed = urlparse(cleaned)
+    except Exception:
+        return None
+    if parsed.scheme != "https":
+        logger.warning(
+            "nous: refusing non-https inference URL scheme %r from Portal response",
+            parsed.scheme,
+        )
+        return None
+    if parsed.hostname not in _ALLOWED_NOUS_INFERENCE_HOSTS:
+        logger.warning(
+            "nous: refusing inference URL host %r from Portal response "
+            "(not in allowlist); falling back to default",
+            parsed.hostname,
+        )
+        return None
+    return cleaned.rstrip("/")
+
+
 def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
    if not isinstance(token, str) or token.count(".") != 2:
        return {}
@ -1863,10 +1942,8 @@ def _read_qwen_cli_tokens() -> Dict[str, Any]:
 def _save_qwen_cli_tokens(tokens: Dict[str, Any]) -> Path:
    auth_path = _qwen_cli_auth_path()
    auth_path.parent.mkdir(parents=True, exist_ok=True)
-    try:
-        os.chmod(auth_path.parent, 0o700)
-    except OSError:
-        pass
+    # secure_parent_dir refuses to chmod / or top-level dirs (#25821).
+    secure_parent_dir(auth_path)
    # Per-process random temp suffix avoids collisions between concurrent
    # writers and stale leftovers from a crashed prior write.
    tmp_path = auth_path.with_name(f"{auth_path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}")
@ -2008,7 +2085,10 @@ def resolve_qwen_runtime_credentials(
 def get_qwen_auth_status() -> Dict[str, Any]:
    auth_path = _qwen_cli_auth_path()
    try:
-        creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
+        # Validate the runtime credentials, including refresh when the cached
+        # CLI token is expired. Otherwise stale tokens show up as "logged in"
+        # and `hermes model` walks users into a broken Qwen setup flow.
+        creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True)
        return {
            "logged_in": True,
            "auth_file": str(auth_path),
@ -2409,6 +2489,32 @@ def _make_xai_callback_handler(expected_path: str) -> tuple[type[BaseHTTPRequest
                "error_description": params.get("error_description", [None])[0],
            }

+            # Diagnostic logging — emits at INFO so reporters of loopback bugs
+            # (#27385 — "callback received but Hermes times out") can produce
+            # actionable evidence without a code change.  Logged values are
+            # fingerprints / booleans only; no actual code/state strings leak
+            # into the log file.  Run with ``HERMES_LOG_LEVEL=INFO`` (or check
+            # ``~/.hermes/logs/agent.log`` which captures INFO+ unconditionally).
+            try:
+                logger.info(
+                    "xAI loopback callback received: path=%s has_code=%s has_state=%s has_error=%s "
+                    "ua=%s",
+                    parsed.path,
+                    incoming["code"] is not None,
+                    incoming["state"] is not None,
+                    incoming["error"] is not None,
+                    (self.headers.get("User-Agent") or "")[:80],
+                )
+                if incoming["error"]:
+                    logger.info(
+                        "xAI loopback callback carries error=%s error_description=%s",
+                        incoming["error"],
+                        (incoming["error_description"] or "")[:200],
+                    )
+            except Exception:
+                # Logging must never break the OAuth flow.
+                pass
+
            # Treat a hit on the callback path with neither `code` nor `error`
            # as a missing OAuth callback (e.g. xAI's auth backend failed to
            # redirect and the user navigated to the bare loopback URL by hand).
@ -2513,6 +2619,17 @@ def _xai_wait_for_callback(
        server.shutdown()
        server.server_close()
        thread.join(timeout=1.0)
+    # Diagnostic: distinguish "no callback ever arrived" from "callback
+    # arrived but result wasn't populated" (#27385).  The per-hit handler
+    # also logs at INFO; if neither line appears, xAI's IDP never reached
+    # the loopback at all (firewall, port-binding, IPv6/IPv4 mismatch).
+    logger.info(
+        "xAI loopback wait timed out after %.0fs with no usable callback "
+        "(result.code=%s result.error=%s)",
+        max(5.0, timeout_seconds),
+        result["code"] is not None,
+        result["error"] is not None,
+    )
    raise AuthError(
        "xAI authorization timed out waiting for the local callback.",
        provider="xai-oauth",
@ -3346,7 +3463,7 @@ def _read_xai_oauth_tokens(*, _lock: bool = True) -> Dict[str, Any]:
    state = _load_provider_state(auth_store, "xai-oauth")
    if not state:
        raise AuthError(
-            "No xAI OAuth credentials stored. Select xAI Grok OAuth (SuperGrok Subscription) in `hermes model`.",
+            "No xAI OAuth credentials stored. Select xAI Grok OAuth (SuperGrok / Premium+) in `hermes model`.",
            provider="xai-oauth",
            code="xai_auth_missing",
            relogin_required=True,
@ -4168,10 +4285,8 @@ def _write_shared_nous_state(state: Dict[str, Any]) -> None:
        with _nous_shared_store_lock():
            path = _nous_shared_store_path()
            path.parent.mkdir(parents=True, exist_ok=True)
-            try:
-                os.chmod(path.parent, 0o700)
-            except OSError:
-                pass
+            # secure_parent_dir refuses to chmod / or top-level dirs (#25821).
+            secure_parent_dir(path)
            tmp = path.with_name(f"{path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}")
            # Create with 0o600 atomically via os.open(O_EXCL) — closes the TOCTOU
            # window where write_text() + post-write chmod briefly exposed Nous
@ -4782,7 +4897,7 @@ def refresh_nous_oauth_pure(
            state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"]
            state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
            state["scope"] = refreshed.get("scope") or state.get("scope")
-            refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
+            refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
            if refreshed_url:
                state["inference_base_url"] = refreshed_url
            state["obtained_at"] = now.isoformat()
@ -4818,7 +4933,7 @@ def refresh_nous_oauth_pure(
            state["agent_key_expires_in"] = mint_payload.get("expires_in")
            state["agent_key_reused"] = bool(mint_payload.get("reused", False))
            state["agent_key_obtained_at"] = now.isoformat()
-            minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
+            minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
            if minted_url:
                state["inference_base_url"] = minted_url

@ -5096,7 +5211,7 @@ def resolve_nous_runtime_credentials(
                        state["refresh_token"] = refreshed.get("refresh_token") or refresh_token
                        state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
                        state["scope"] = refreshed.get("scope") or state.get("scope")
-                        refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
+                        refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
                        if refreshed_url:
                            inference_base_url = refreshed_url
                        state["obtained_at"] = now.isoformat()
@ -5204,7 +5319,7 @@ def resolve_nous_runtime_credentials(
                                state["refresh_token"] = refreshed.get("refresh_token") or latest_refresh_token
                                state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
                                state["scope"] = refreshed.get("scope") or state.get("scope")
-                                refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
+                                refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
                                if refreshed_url:
                                    inference_base_url = refreshed_url
                                state["obtained_at"] = now.isoformat()
@ -5259,7 +5374,7 @@ def resolve_nous_runtime_credentials(
                state["agent_key_expires_in"] = mint_payload.get("expires_in")
                state["agent_key_reused"] = bool(mint_payload.get("reused", False))
                state["agent_key_obtained_at"] = now.isoformat()
-                minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
+                minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
                if minted_url:
                    inference_base_url = minted_url
                _oauth_trace(
@ -6279,7 +6394,7 @@ def _login_xai_oauth(
            pass

    print()
-    print("Signing in to xAI Grok OAuth (SuperGrok Subscription)...")
+    print("Signing in to xAI Grok OAuth (SuperGrok / Premium+)...")
    print("(Hermes creates its own local OAuth session)")
    print()

@ -7051,10 +7166,95 @@ def _refresh_minimax_oauth_state(
    return new_state


+def _minimax_oauth_quarantine_on_terminal_refresh(state: Dict[str, Any], exc: AuthError) -> None:
+    """Wipe dead tokens from auth.json after a terminal refresh failure.
+
+    Shared by both the eager-resolve path and the lazy per-request token
+    provider. Mirrors the Nous / xAI-OAuth / Codex-OAuth quarantine pattern
+    so subsequent calls fail fast without a network retry.
+    """
+    if not (exc.relogin_required and state.get("refresh_token")):
+        return
+    for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
+        state.pop(_k, None)
+    state["last_auth_error"] = {
+        "provider": "minimax-oauth",
+        "code": exc.code or "refresh_failed",
+        "message": str(exc),
+        "reason": "runtime_refresh_failure",
+        "relogin_required": True,
+        "at": datetime.now(timezone.utc).isoformat(),
+    }
+    try:
+        _minimax_save_auth_state(state)
+    except Exception as _save_exc:
+        logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
+
+
+def build_minimax_oauth_token_provider() -> Callable[[], str]:
+    """Return a zero-arg callable that yields a fresh MiniMax access token.
+
+    The Anthropic SDK caches ``api_key`` as a static string at construction
+    time, so a session that resolves credentials once at startup will keep
+    sending the same bearer until MiniMax's server returns 401 — typically
+    ~15 minutes in, because MiniMax issues short-lived access tokens.
+
+    Returning a *callable* instead of a string lets us hook into the
+    existing Entra-ID bearer infrastructure in
+    :mod:`agent.anthropic_adapter`: ``build_anthropic_client`` detects a
+    callable and routes through ``_build_anthropic_client_with_bearer_hook``,
+    which mints a fresh ``Authorization`` header on every outbound request.
+    Each invocation re-reads the persisted state from ``auth.json`` and
+    calls :func:`_refresh_minimax_oauth_state` — that helper is a no-op
+    when the token still has more than ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS``
+    of life left, so the steady-state cost is one file read + one
+    timestamp compare per request.
+
+    Reading state fresh each time also means a refresh persisted by one
+    process (CLI, gateway, cron) is immediately visible to every other
+    process sharing the same ``auth.json``.
+    """
+    def _provide() -> str:
+        state = get_provider_auth_state("minimax-oauth")
+        if not state or not state.get("access_token"):
+            raise AuthError(
+                "Not logged into MiniMax OAuth. Run `hermes model` and select "
+                "MiniMax (OAuth).",
+                provider="minimax-oauth", code="not_logged_in", relogin_required=True,
+            )
+        try:
+            state = _refresh_minimax_oauth_state(state)
+        except AuthError as exc:
+            _minimax_oauth_quarantine_on_terminal_refresh(state, exc)
+            raise
+        token = state.get("access_token")
+        if not token:
+            raise AuthError(
+                "MiniMax OAuth state has no access_token after refresh.",
+                provider="minimax-oauth", code="no_access_token", relogin_required=True,
+            )
+        return token
+
+    return _provide
+
+
 def resolve_minimax_oauth_runtime_credentials(
    *, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
+    as_token_provider: bool = False,
 ) -> Dict[str, Any]:
-    """Return {provider, api_key, base_url, source} for minimax-oauth."""
+    """Return {provider, api_key, base_url, source} for minimax-oauth.
+
+    When ``as_token_provider`` is True, ``api_key`` is a zero-arg callable
+    that mints a fresh access token per call (proactively refreshing if
+    the cached token is within ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of
+    expiry). This is what the runtime provider path uses so that long
+    sessions survive MiniMax's short access-token lifetime — see
+    :func:`build_minimax_oauth_token_provider` for the rationale.
+
+    The default (string ``api_key``) preserves the historical contract for
+    diagnostic call sites like ``hermes status`` that just want to know
+    whether a valid token exists right now.
+    """
    state = get_provider_auth_state("minimax-oauth")
    if not state or not state.get("access_token"):
        raise AuthError(
@ -7065,28 +7265,15 @@ def resolve_minimax_oauth_runtime_credentials(
    try:
        state = _refresh_minimax_oauth_state(state)
    except AuthError as exc:
-        if exc.relogin_required and state.get("refresh_token"):
-            # Terminal refresh failure — clear dead tokens from auth.json so
-            # subsequent calls fail fast without a network retry, mirroring
-            # the Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
-            for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
-                state.pop(_k, None)
-            state["last_auth_error"] = {
-                "provider": "minimax-oauth",
-                "code": exc.code or "refresh_failed",
-                "message": str(exc),
-                "reason": "runtime_refresh_failure",
-                "relogin_required": True,
-                "at": datetime.now(timezone.utc).isoformat(),
-            }
-            try:
-                _minimax_save_auth_state(state)
-            except Exception as _save_exc:
-                logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
+        _minimax_oauth_quarantine_on_terminal_refresh(state, exc)
        raise
+    if as_token_provider:
+        api_key: Any = build_minimax_oauth_token_provider()
+    else:
+        api_key = state["access_token"]
    return {
        "provider": "minimax-oauth",
-        "api_key": state["access_token"],
+        "api_key": api_key,
        "base_url": state["inference_base_url"].rstrip("/"),
        "source": "oauth",
    }
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@ -2,7 +2,6 @@

 from __future__ import annotations

-from getpass import getpass
 import math
 import sys
 import time
@ -30,6 +29,7 @@ from agent.credential_pool import (
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import PROVIDER_REGISTRY
 from hermes_constants import OPENROUTER_BASE_URL
+from hermes_cli.secret_prompt import masked_secret_prompt


 # Providers that support OAuth login in addition to API keys.
@ -196,7 +196,7 @@ def auth_add_command(args) -> None:
    if requested_type == AUTH_TYPE_API_KEY:
        token = (getattr(args, "api_key", None) or "").strip()
        if not token:
-            token = getpass("Paste your API key: ").strip()
+            token = masked_secret_prompt("Paste your API key: ").strip()
        if not token:
            raise SystemExit("No API key provided.")
        default_label = _api_key_default_label(len(pool.entries()) + 1)
--- a/hermes_cli/backup.py
+++ b/hermes_cli/backup.py
@ -85,6 +85,22 @@ def _should_exclude(rel_path: Path) -> bool:
    return False


+def _should_skip_backup_file(abs_path: Path, rel_path: Path, out_path: Path) -> bool:
+    """Return True when a candidate file should not be written to a backup zip."""
+    if _should_exclude(rel_path):
+        return True
+
+    # zipfile.write() follows file symlinks, so skip links before any archive
+    # write can copy data from outside HERMES_HOME.
+    if abs_path.is_symlink():
+        return True
+
+    try:
+        return abs_path.resolve() == out_path.resolve()
+    except (OSError, ValueError):
+        return False
+
+
 # ---------------------------------------------------------------------------
 # SQLite safe copy
 # ---------------------------------------------------------------------------
@ -173,16 +189,9 @@ def run_backup(args) -> None:
            fpath = dp / fname
            rel = fpath.relative_to(hermes_root)

-            if _should_exclude(rel):
+            if _should_skip_backup_file(fpath, rel, out_path):
                continue

-            # Skip the output zip itself if it happens to be inside hermes root
-            try:
-                if fpath.resolve() == out_path.resolve():
-                    continue
-            except (OSError, ValueError):
-                pass
-
            files_to_add.append((fpath, rel))

    if not files_to_add:
@ -726,16 +735,9 @@ def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]:
                except ValueError:
                    continue

-                if _should_exclude(rel):
+                if _should_skip_backup_file(fpath, rel, out_path):
                    continue

-                # Skip the output zip itself if it already exists inside root.
-                try:
-                    if fpath.resolve() == out_path.resolve():
-                        continue
-                except (OSError, ValueError):
-                    pass
-
                files_to_add.append((fpath, rel))
    except OSError as exc:
        logger.warning("Full-zip backup: walk failed: %s", exc)
--- a/hermes_cli/callbacks.py
+++ b/hermes_cli/callbacks.py
@ -8,10 +8,10 @@ with the TUI.

 import queue
 import time as _time
-import getpass

 from hermes_cli.banner import cprint, _DIM, _RST
 from hermes_cli.config import save_env_value_secure
+from hermes_cli.secret_prompt import masked_secret_prompt
 from hermes_constants import display_hermes_home


@ -75,7 +75,7 @@ def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict:
        if not hasattr(cli, "_secret_deadline"):
            cli._secret_deadline = 0
        try:
-            value = getpass.getpass(f"{prompt} (hidden, ESC or empty Enter to skip): ")
+            value = masked_secret_prompt(f"{prompt} (hidden, ESC or empty Enter to skip): ")
        except (EOFError, KeyboardInterrupt):
            value = ""

--- a/hermes_cli/cli_output.py
+++ b/hermes_cli/cli_output.py
@ -5,9 +5,8 @@ functions previously duplicated across setup.py, tools_config.py,
 mcp_config.py, and memory_setup.py.
 """

-import getpass
-
 from hermes_cli.colors import Colors, color
+from hermes_cli.secret_prompt import masked_secret_prompt


 # ─── Print Helpers ────────────────────────────────────────────────────────────
@ -59,7 +58,7 @@ def prompt(

    try:
        if password:
-            value = getpass.getpass(display)
+            value = masked_secret_prompt(display)
        else:
            value = input(display)
        value = value.strip()
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -164,7 +164,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
               cli_only=True),
    CommandDef("skills", "Search, install, inspect, or manage skills",
               "Tools & Skills", cli_only=True,
-               subcommands=("search", "browse", "inspect", "install")),
+               subcommands=("search", "browse", "inspect", "install", "audit")),
    CommandDef("bundles", "List skill bundles (aliases /<name> for multiple skills)",
               "Tools & Skills"),
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
@ -449,7 +449,7 @@ def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
    :func:`hermes_cli.plugins.PluginContext.register_command`. They behave
    like ``CommandDef`` entries for gateway surfacing: they appear in the
    Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
-    (via :func:`gateway.platforms.discord._register_slash_commands`) in
+    (via :func:`plugins.platforms.discord.adapter._register_slash_commands`) in
    Discord's native slash command picker.

    Lookup is lazy so importing this module never forces plugin discovery
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -26,6 +26,8 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple

+from hermes_cli.secret_prompt import masked_secret_prompt
+
 logger = logging.getLogger(__name__)

 # Track which (config_path, mtime_ns, size) tuples we've already warned about
@ -72,6 +74,82 @@ def _warn_config_parse_failure(config_path: Path, exc: Exception) -> None:

 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
+
+# Env var names that influence how the next subprocess executes —
+# never writable through ``save_env_value``. Anything that controls
+# the loader, interpreter, shell, or replacement editor counts:
+#
+# * ``LD_PRELOAD`` / ``LD_LIBRARY_PATH`` / ``LD_AUDIT`` — Linux dynamic
+#   loader. ``DYLD_*`` — macOS equivalent. Planting a path here means
+#   the next ``subprocess.run([...])`` Hermes makes loads attacker code
+#   before main().
+# * ``PYTHONPATH`` / ``PYTHONHOME`` / ``PYTHONSTARTUP`` /
+#   ``PYTHONUSERBASE`` — Python interpreter init. Hermes itself starts
+#   from one of these on every restart.
+# * ``NODE_OPTIONS`` / ``NODE_PATH`` — Node interpreter; affects npm,
+#   ``hermes update``, the TUI build.
+# * ``PATH`` — too broad to allow. The dashboard never needs to rewrite
+#   the operator's PATH; if a tool can't be found, the fix is to add an
+#   absolute path in the integration config, not to mutate PATH globally.
+# * ``GIT_SSH_COMMAND`` / ``GIT_EXEC_PATH`` — git rewrites that fire
+#   on every plugin install / ``hermes update``.
+# * ``BROWSER`` / ``EDITOR`` / ``VISUAL`` / ``PAGER`` — commands the
+#   shell or CLI invokes implicitly. Wrong values here = RCE on next
+#   ``$EDITOR``.
+# * ``SHELL`` — what subprocess uses with ``shell=True`` (we try to
+#   avoid that, but defense in depth).
+# * ``HERMES_HOME`` / ``HERMES_PROFILE`` / ``HERMES_CONFIG`` /
+#   ``HERMES_ENV`` — Hermes runtime location flags. Writing these into
+#   ``.env`` would relocate state in ways the user did not request from
+#   the dashboard. ``config.yaml`` is the supported surface for these.
+#
+# IMPORTANT: ``HERMES_*`` overall is NOT blocked. Many legitimate
+# integration credentials follow that prefix (HERMES_GEMINI_CLIENT_ID,
+# HERMES_LANGFUSE_PUBLIC_KEY, HERMES_SPOTIFY_CLIENT_ID, ...). The
+# denylist is name-by-name on purpose so the gate stays narrow and
+# doesn't accidentally break provider setup wizards.
+#
+# This is enforced on *write* only — values already in ``.env`` (set
+# by the operator out-of-band, or pre-existing) keep working. The
+# point is that the dashboard's writable surface cannot escalate by
+# planting them.
+_ENV_VAR_NAME_DENYLIST: frozenset[str] = frozenset({
+    # Loader / linker
+    "LD_PRELOAD", "LD_LIBRARY_PATH", "LD_AUDIT", "LD_DEBUG",
+    "DYLD_INSERT_LIBRARIES", "DYLD_LIBRARY_PATH", "DYLD_FRAMEWORK_PATH",
+    "DYLD_FALLBACK_LIBRARY_PATH", "DYLD_FALLBACK_FRAMEWORK_PATH",
+    # Python
+    "PYTHONPATH", "PYTHONHOME", "PYTHONSTARTUP", "PYTHONUSERBASE",
+    "PYTHONEXECUTABLE", "PYTHONNOUSERSITE",
+    # Node
+    "NODE_OPTIONS", "NODE_PATH",
+    # General
+    "PATH", "SHELL", "BROWSER", "EDITOR", "VISUAL", "PAGER",
+    # Git
+    "GIT_SSH_COMMAND", "GIT_EXEC_PATH", "GIT_SHELL",
+    # Hermes runtime location — never via dashboard env writer.
+    # NOT a HERMES_* blanket: integration credentials (HERMES_GEMINI_*,
+    # HERMES_LANGFUSE_*, HERMES_SPOTIFY_*, ...) ARE allowed.
+    "HERMES_HOME", "HERMES_PROFILE", "HERMES_CONFIG", "HERMES_ENV",
+})
+
+
+def _reject_denylisted_env_var(key: str) -> None:
+    """Raise if ``key`` is in :data:`_ENV_VAR_NAME_DENYLIST`.
+
+    Centralised so both the regular and "secure" env writers share the
+    same gate, and so the message is consistent for callers.
+    """
+    if key in _ENV_VAR_NAME_DENYLIST:
+        raise ValueError(
+            f"Environment variable {key!r} is on the writer denylist. "
+            "Names that influence subprocess execution (LD_PRELOAD, "
+            "PYTHONPATH, PATH, EDITOR, ...) or Hermes runtime location "
+            "(HERMES_HOME, HERMES_PROFILE, ...) cannot be persisted via "
+            "the env writer. If you really need this, edit "
+            "~/.hermes/.env directly."
+        )
+
 _LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
 # (path, mtime_ns, size) -> cached expanded config dict.
 # load_config() returns a deepcopy of the cached value when the file
@ -658,7 +736,8 @@ DEFAULT_CONFIG = {
        # are owned by your host user instead of root, which avoids needing
        # `sudo chown` after container runs. Default off to preserve behavior
        # for images whose entrypoints expect to start as root (e.g. the
-        # bundled Hermes image, which drops to the `hermes` user via gosu).
+        # bundled Hermes image, which drops to the `hermes` user via
+        # s6-setuidgid inside each supervised service).
        # When on, SETUID/SETGID caps are omitted from the container since
        # no privilege drop is needed.
        "docker_run_as_host_user": False,
@ -1008,6 +1087,19 @@ DEFAULT_CONFIG = {
        "compact": False,
        "personality": "kawaii",
        "resume_display": "full",
+        # Recap tuning for /resume and startup resume. The defaults match the
+        # historical hardcoded values; expose them as config so power users can
+        # widen or tighten the snapshot to taste.
+        "resume_exchanges": 10,            # max user+assistant pairs to show
+        "resume_max_user_chars": 300,      # truncate user message text
+        "resume_max_assistant_chars": 200, # truncate non-last assistant text
+        "resume_max_assistant_lines": 3,   # truncate non-last assistant lines
+        # When True (default), assistant entries that are *only* tool calls
+        # (no visible text) are skipped in the recap. This prevents the recap
+        # from being dominated by `[2 tool calls: terminal, read_file]` lines
+        # when an exchange was tool-heavy. Set False to restore the legacy
+        # behavior of showing tool-call summaries inline.
+        "resume_skip_tool_only": True,
        "busy_input_mode": "interrupt",  # interrupt | queue | steer
        # When true, `hermes --tui` auto-resumes the most recent human-
        # facing session on launch instead of forging a fresh one.
@ -1622,6 +1714,31 @@ DEFAULT_CONFIG = {
        "force_ipv4": False,
    },

+    # Gateway settings — control how messaging platforms (Telegram, Discord,
+    # Slack, etc.) deliver agent-produced files as native attachments.
+    "gateway": {
+        # Extra directories from which model-emitted bare file paths may be
+        # uploaded as native gateway attachments. Files inside the Hermes
+        # cache (~/.hermes/cache/{documents,images,audio,video,screenshots})
+        # are always trusted; this list adds operator-controlled roots
+        # (project dirs, scratch dirs, mounted shares). Accepts a list of
+        # absolute paths or a single os.pathsep-separated string. Bridged
+        # to HERMES_MEDIA_ALLOW_DIRS at gateway startup. Tilde paths are
+        # expanded.
+        "media_delivery_allow_dirs": [],
+        # When true, files whose mtime is within ``trust_recent_files_seconds``
+        # of "now" are trusted for native delivery even outside the cache /
+        # operator allowlist — useful for ``pandoc -o /tmp/report.pdf`` or
+        # PDFs the agent writes into a working directory. System paths
+        # (/etc, /proc, ~/.ssh, ~/.aws, etc.) remain blocked regardless.
+        # Disable to fall back to pure-allowlist mode. Bridged to
+        # HERMES_MEDIA_TRUST_RECENT_FILES.
+        "trust_recent_files": True,
+        # Recency window in seconds. 600 (10 min) comfortably covers a
+        # multi-tool agent turn. Bridged to HERMES_MEDIA_TRUST_RECENT_SECONDS.
+        "trust_recent_files_seconds": 600,
+    },
+
    # Session storage — controls automatic cleanup of ~/.hermes/state.db.
    # state.db accumulates every session, message, tool call, and FTS5 index
    # entry forever.  Without auto-pruning, a heavy user (gateway + cron)
@ -1730,6 +1847,7 @@ DEFAULT_CONFIG = {
        "servers": {},
    },

+
    # X (Twitter) Search via xAI's built-in x_search Responses tool.
    # The tool registers when xAI credentials are available (SuperGrok
    # OAuth or XAI_API_KEY) AND the x_search toolset is enabled in
@ -1747,8 +1865,69 @@ DEFAULT_CONFIG = {
        "retries": 2,
    },

+    # =========================================================================
+    # External secret sources
+    # =========================================================================
+    # Pull credentials from external secret managers at process startup
+    # rather than storing them in ~/.hermes/.env.
+    "secrets": {
+        "bitwarden": {
+            # Master switch.  When false, BSM is never contacted and the
+            # bws binary is never auto-installed — same as not having
+            # this section at all.
+            "enabled": False,
+            # Name of the env var that holds the Bitwarden machine-account
+            # access token.  This is the one bootstrap secret; it lives
+            # in ~/.hermes/.env (or your shell) and never in config.yaml.
+            "access_token_env": "BWS_ACCESS_TOKEN",
+            # UUID of the BSM project to sync from.
+            "project_id": "",
+            # Seconds to cache fetched secrets in-process.  0 disables.
+            "cache_ttl_seconds": 300,
+            # When True, BSM values overwrite existing env vars.  Default
+            # True because the point of using BSM is centralized rotation —
+            # if .env had the final say, rotating in Bitwarden wouldn't
+            # take effect until you also cleared the matching .env line.
+            "override_existing": True,
+            # When True, the bws binary is auto-downloaded into
+            # ~/.hermes/bin/ on first use.  When False you must install
+            # bws yourself and have it on PATH.
+            "auto_install": True,
+            # Bitwarden region / self-hosted endpoint.  Empty string
+            # means use the bws CLI default (US Cloud,
+            # https://vault.bitwarden.com).  Set to
+            # https://vault.bitwarden.eu for EU Cloud, or your own URL
+            # for self-hosted Bitwarden.  Plumbed into the bws subprocess
+            # as BWS_SERVER_URL.  Prompted for during
+            # `hermes secrets bitwarden setup`.
+            "server_url": "",
+        },
+    },
+
+    # Paste collapse thresholds (TUI + CLI).
+    #
+    # paste_collapse_threshold (default 5)
+    #   Bracketed-paste handler. Pastes with this many newlines or more
+    #   collapse to a file reference. Set 0 to disable.
+    #
+    # paste_collapse_threshold_fallback (default 5)
+    #   Fallback heuristic for terminals without bracketed paste support.
+    #   Same line count test but heuristically gated by chars-added /
+    #   newlines-added to avoid false positives from normal typing.
+    #   Set 0 to disable.
+    #
+    # paste_collapse_char_threshold (default 2000)
+    #   Long single-line paste guard. Pastes whose total char length
+    #   reaches this value collapse to a file reference even if line
+    #   count is below the line threshold. Catches the "8000 chars of
+    #   minified JSON / log output on one line" case. Set 0 to disable.
+    "paste_collapse_threshold": 5,
+    "paste_collapse_threshold_fallback": 5,
+    "paste_collapse_char_threshold": 2000,
+
+
    # Config schema version - bump this when adding new required fields
-    "_config_version": 23,
+    "_config_version": 24,
 }

 # =============================================================================
@ -3017,7 +3196,7 @@ def _normalize_custom_provider_entry(
        "api_mode", "transport", "model", "default_model", "models",
        "context_length", "rate_limit_delay",
        "request_timeout_seconds", "stale_timeout_seconds",
-        "discover_models",
+        "discover_models", "extra_body",
    }
    for camel, snake in _CAMEL_ALIASES.items():
        if camel in entry and snake not in entry:
@ -3112,6 +3291,10 @@ def _normalize_custom_provider_entry(
    if isinstance(discover_models, bool):
        normalized["discover_models"] = discover_models

+    extra_body = entry.get("extra_body")
+    if isinstance(extra_body, dict):
+        normalized["extra_body"] = dict(extra_body)
+
    return normalized


@ -3272,7 +3455,7 @@ _KNOWN_ROOT_KEYS = {
 # Valid fields inside a custom_providers list entry
 _VALID_CUSTOM_PROVIDER_FIELDS = {
    "name", "base_url", "api_key", "api_mode", "model", "models",
-    "context_length", "rate_limit_delay",
+    "context_length", "rate_limit_delay", "extra_body",
    # key_env is read at runtime by runtime_provider.py and auxiliary_client.py
    # — include it here so the set accurately describes the supported schema.
    "key_env",
@ -3947,8 +4130,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                print(f"  Get your key at: {var['url']}")
            
            if var.get("password"):
-                import getpass
-                value = getpass.getpass(f"  {var['prompt']}: ")
+                value = masked_secret_prompt(f"  {var['prompt']}: ")
            else:
                value = input(f"  {var['prompt']}: ").strip()
            
@ -3999,8 +4181,9 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                    else:
                        print(f"  {info.get('description', name)}")
                    if info.get("password"):
-                        import getpass
-                        value = getpass.getpass(f"  {info.get('prompt', name)} (Enter to skip): ")
+                        value = masked_secret_prompt(
+                            f"  {info.get('prompt', name)} (Enter to skip): "
+                        )
                    else:
                        value = input(f"  {info.get('prompt', name)} (Enter to skip): ").strip()
                    if value:
@ -4779,6 +4962,7 @@ def save_env_value(key: str, value: str):
        return
    if not _ENV_VAR_NAME_RE.match(key):
        raise ValueError(f"Invalid environment variable name: {key!r}")
+    _reject_denylisted_env_var(key)
    value = value.replace("\n", "").replace("\r", "")
    # API keys / tokens must be ASCII — strip non-ASCII with a warning.
    value = _check_non_ascii_credential(key, value)
--- a/hermes_cli/container_boot.py
+++ b/hermes_cli/container_boot.py
@ -0,0 +1,325 @@
+"""Container-boot reconciliation of per-profile gateway s6 services.
+
+Service directories under /run/service/ live on **tmpfs** and are wiped
+on every container restart. Profile directories under
+``$HERMES_HOME/profiles/<name>/`` live on the persistent VOLUME, and
+each one records its gateway's last state in ``gateway_state.json``.
+This module bridges the two: on every container boot, walk the
+persistent profiles, recreate the s6 service slots, and auto-start
+only those whose last recorded state was ``running``.
+
+Wired into the image as /etc/cont-init.d/02-reconcile-profiles by the
+Dockerfile (Phase 4 Task 4.0). Runs as root after 01-hermes-setup
+(the stage2 hook) has chowned the volume and seeded $HERMES_HOME, but
+before s6-rc starts user services.
+
+Without this module, every ``docker restart`` would silently wipe
+every per-profile gateway, even though the user's profiles still
+exist on disk.
+"""
+from __future__ import annotations
+
+import json
+import logging
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Literal
+
+log = logging.getLogger(__name__)
+
+# Only this prior state triggers automatic restart. Everything else
+# (startup_failed, starting, stopped, missing) registers the slot in
+# the down state and waits for explicit user action — this avoids the
+# crash-loop where a broken gateway keeps being restarted across
+# `docker restart` cycles.
+_AUTOSTART_STATES = frozenset({"running"})
+
+# Stale runtime files we sweep before recreating service slots. These
+# all hold container-namespaced state (PIDs, process tables) that's
+# garbage post-restart — a numerically-equal PID in the new container
+# is a different process. See the Risk Register in the plan.
+_STALE_RUNTIME_FILES = ("gateway.pid", "processes.json")
+
+ReconcileActionLabel = Literal["started", "registered", "skipped"]
+
+
+@dataclass(frozen=True)
+class ReconcileAction:
+    """One profile's outcome from a single reconciliation pass."""
+    profile: str
+    prior_state: str | None
+    action: ReconcileActionLabel
+
+
+def reconcile_profile_gateways(
+    *,
+    hermes_home: Path,
+    scandir: Path,
+    dry_run: bool = False,
+) -> list[ReconcileAction]:
+    """Recreate s6 service registrations for every persistent profile.
+
+    Always registers a ``gateway-default`` slot for the root profile
+    (the implicit profile that lives at the top of ``$HERMES_HOME``,
+    not under ``profiles/``). The dispatcher in ``hermes_cli.gateway``
+    maps an empty profile suffix to ``gateway-default``, so this slot
+    is what ``hermes gateway start`` (no ``-p``) targets. Without it,
+    bare ``hermes gateway start`` inside the container would land on
+    ``s6-svc -u /run/service/gateway-default`` → uncaught
+    ``CalledProcessError`` → traceback to the user (PR #30136 review).
+
+    The default slot's prior state is read from
+    ``$HERMES_HOME/gateway_state.json`` (sibling to the profile root,
+    not under ``profiles/``); stale runtime files there are swept the
+    same way as for named profiles.
+
+    Args:
+        hermes_home: The container's HERMES_HOME (typically /opt/data).
+            Profiles live under ``<hermes_home>/profiles/<name>/``;
+            the default profile lives at ``<hermes_home>`` itself.
+        scandir: The s6 dynamic scandir (typically /run/service). Service
+            directories are created at ``<scandir>/gateway-<profile>/``.
+        dry_run: When True, walk and return the action list without
+            touching the filesystem. For tests and `--dry-run` debug.
+
+    Returns:
+        One :class:`ReconcileAction` per profile, in this order:
+        ``default`` first, then named profiles in directory order.
+    """
+    actions: list[ReconcileAction] = []
+
+    # Default profile — always register, even if nothing has ever
+    # populated the root profile dir. The slot exists so
+    # ``hermes gateway start`` (no ``-p``) has somewhere to land;
+    # auto-up only when the prior state was "running" (same rule as
+    # named profiles).
+    default_prior_state = _read_prior_state(hermes_home)
+    default_should_start = default_prior_state in _AUTOSTART_STATES
+    if not dry_run:
+        _cleanup_stale_runtime_files(hermes_home)
+        _register_service(scandir, "default", start=default_should_start)
+    actions.append(ReconcileAction(
+        profile="default",
+        prior_state=default_prior_state,
+        action="started" if default_should_start else "registered",
+    ))
+
+    profiles_root = hermes_home / "profiles"
+    if profiles_root.is_dir():
+        for entry in sorted(profiles_root.iterdir()):
+            if not entry.is_dir():
+                continue
+            # SOUL.md is always seeded by `hermes profile create` (config.yaml
+            # is not — that comes later via `hermes setup`). Use it as the
+            # "real profile" marker so stray dirs (backups, manual mkdir)
+            # aren't picked up.
+            if not (entry / "SOUL.md").exists():
+                continue
+            # The "default" service name is reserved for the root
+            # profile (above) — if a user has somehow created a
+            # ``profiles/default/`` directory, skip it to avoid the
+            # slot collision. Their gateway would still be reachable
+            # via ``hermes -p default-named gateway start`` if they
+            # rename the directory; we don't try to disambiguate here.
+            if entry.name == "default":
+                log.warning(
+                    "profiles/default/ exists — skipping to avoid colliding "
+                    "with the reserved root-profile s6 slot",
+                )
+                continue
+
+            prior_state = _read_prior_state(entry)
+            should_start = prior_state in _AUTOSTART_STATES
+
+            if not dry_run:
+                _cleanup_stale_runtime_files(entry)
+                _register_service(scandir, entry.name, start=should_start)
+
+            actions.append(ReconcileAction(
+                profile=entry.name,
+                prior_state=prior_state,
+                action="started" if should_start else "registered",
+            ))
+
+    if not dry_run:
+        _write_reconcile_log(hermes_home, actions)
+    return actions
+
+
+def _read_prior_state(profile_dir: Path) -> str | None:
+    """Read gateway_state.json's ``gateway_state`` field, or None if
+    missing or unparseable. Unparseable counts as "no prior state" so
+    we don't bork the whole reconciliation on a corrupt file."""
+    state_file = profile_dir / "gateway_state.json"
+    if not state_file.exists():
+        return None
+    try:
+        return json.loads(state_file.read_text()).get("gateway_state")
+    except (OSError, json.JSONDecodeError):
+        log.warning(
+            "could not read %s; treating as no prior state", state_file,
+        )
+        return None
+
+
+def _cleanup_stale_runtime_files(profile_dir: Path) -> None:
+    """Remove gateway.pid and processes.json — they reference PIDs in
+    the dead container's process namespace and would otherwise confuse
+    the newly-started gateway's process-mismatch checks."""
+    for name in _STALE_RUNTIME_FILES:
+        (profile_dir / name).unlink(missing_ok=True)
+
+
+def _register_service(scandir: Path, profile: str, *, start: bool) -> None:
+    """Recreate the s6 service slot for one profile.
+
+    Mirrors the rendering in :func:`S6ServiceManager.register_profile_gateway`,
+    but here we control the start state directly via the ``down`` marker
+    file (s6-svscan honors it on rescan). Cannot use the manager
+    directly because the cont-init.d phase runs as root before
+    s6-svscan starts scanning the dynamic scandir — the manager's
+    ``s6-svscanctl -a`` call would fail with no control socket.
+
+    Atomicity: build the new layout in a sibling temp directory and
+    rename it into place via :meth:`Path.replace`. This matches
+    :meth:`S6ServiceManager.register_profile_gateway` (PR #30136
+    review item O4) — even though cont-init.d runs before s6-svscan
+    starts scanning, an atomic publication keeps the contract uniform
+    between the two registration paths and protects against a
+    half-populated dir if the script is interrupted mid-write.
+    """
+    import shutil
+
+    from hermes_cli.service_manager import (
+        S6ServiceManager,
+        _seed_supervise_skeleton,
+        validate_profile_name,
+    )
+
+    validate_profile_name(profile)
+    service_dir = scandir / f"gateway-{profile}"
+    tmp_dir = service_dir.with_name(service_dir.name + ".tmp")
+
+    # Wipe any leftover tmp from a previous interrupted run.
+    if tmp_dir.exists():
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+    tmp_dir.mkdir(parents=True)
+
+    try:
+        (tmp_dir / "type").write_text("longrun\n")
+
+        # Reuse the manager's run-script rendering — single source of
+        # truth so register_profile_gateway and reconcile_profile_gateways
+        # stay consistent. extra_env is empty here; users who need
+        # per-profile env can set it via the profile's config.yaml
+        # (which the gateway itself loads).
+        run = tmp_dir / "run"
+        run.write_text(S6ServiceManager._render_run_script(profile, extra_env={}))
+        run.chmod(0o755)
+
+        # Persistent log rotation (OQ8-C).
+        log_subdir = tmp_dir / "log"
+        log_subdir.mkdir()
+        log_run = log_subdir / "run"
+        log_run.write_text(S6ServiceManager._render_log_run(profile))
+        log_run.chmod(0o755)
+
+        # The presence of a `down` file tells s6-supervise to NOT
+        # start the service when s6-svscan picks it up. User brings
+        # it up explicitly with `hermes -p <profile> gateway start`
+        # (which routes through the Phase 4
+        # _dispatch_via_service_manager_if_s6 helper to `s6-svc -u`).
+        if not start:
+            (tmp_dir / "down").touch()
+
+        # Pre-create the supervise/ skeleton with hermes ownership
+        # BEFORE we publish the slot. Mirrors the same pre-creation
+        # step in S6ServiceManager.register_profile_gateway — when
+        # s6-svscan picks the published slot up, the s6-supervise it
+        # spawns will EEXIST our dirs/FIFOs and inherit hermes
+        # ownership, so runtime s6-svc / s6-svstat / s6-svwait calls
+        # (all dispatched as the hermes user) won't hit EACCES. See
+        # ``_seed_supervise_skeleton`` in service_manager.py for the
+        # full rationale.
+        _seed_supervise_skeleton(tmp_dir)
+
+        # Publish atomically. Path.replace handles the existing-target
+        # case the same way os.rename does on POSIX: the target is
+        # silently replaced, so a previous reconcile pass's slot is
+        # cleanly overwritten in one operation.
+        if service_dir.exists():
+            shutil.rmtree(service_dir)
+        tmp_dir.replace(service_dir)
+    except Exception:
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+        raise
+
+
+def _write_reconcile_log(
+    hermes_home: Path, actions: list[ReconcileAction],
+) -> None:
+    """Append one line per profile to $HERMES_HOME/logs/container-boot.log.
+
+    Operators inspect this to debug "why didn't my profile come back
+    up". Keeping a separate log file (vs. mixing into agent.log) lets
+    troubleshooters grep for "profile=foo" without wading through
+    unrelated activity.
+
+    Size-bounded: when the file exceeds ``_LOG_ROTATE_BYTES``
+    (defaults to 256 KiB ≈ 3000 reconcile lines), the current file
+    is renamed to ``container-boot.log.1`` (replacing any previous
+    rotation) before the new entries are appended. This gives long-
+    lived containers a soft cap of ~512 KiB across the two files
+    without pulling in logrotate or s6-log machinery just for this
+    one append-only file (PR #30136 review item O3).
+    """
+    import time
+    log_dir = hermes_home / "logs"
+    log_dir.mkdir(parents=True, exist_ok=True)
+    log_path = log_dir / "container-boot.log"
+
+    # Rotate before opening to append, so the new entries always land
+    # in a fresh file when we crossed the threshold last time.
+    try:
+        if log_path.exists() and log_path.stat().st_size >= _LOG_ROTATE_BYTES:
+            log_path.replace(log_dir / "container-boot.log.1")
+    except OSError as exc:
+        # Rotation failure is non-fatal — keep appending to the
+        # existing file rather than losing the entry entirely.
+        log.warning("could not rotate %s: %s", log_path, exc)
+
+    ts = time.strftime("%Y-%m-%dT%H:%M:%S%z")
+    with log_path.open("a", encoding="utf-8") as f:
+        for a in actions:
+            f.write(
+                f"{ts} profile={a.profile} prior_state={a.prior_state} "
+                f"action={a.action}\n"
+            )
+
+
+# 256 KiB soft cap on container-boot.log; rotated to .1 when crossed.
+# At ~80 B per reconcile-action line this is ~3000 lines, or about a
+# year of daily reboots on a 5-profile container. Two files = ~512 KiB
+# worst case. Tuned for visibility (small enough to grep / cat without
+# scrolling forever) more than space (the persistent volume has GB).
+_LOG_ROTATE_BYTES = 256 * 1024
+
+
+def main() -> int:
+    """Entry point invoked from /etc/cont-init.d/02-reconcile-profiles."""
+    hermes_home = Path(os.environ.get("HERMES_HOME", "/opt/data"))
+    scandir = Path(os.environ.get("S6_PROFILE_GATEWAY_SCANDIR", "/run/service"))
+    actions = reconcile_profile_gateways(
+        hermes_home=hermes_home, scandir=scandir,
+    )
+    for a in actions:
+        print(
+            f"reconcile: profile={a.profile} "
+            f"prior_state={a.prior_state} action={a.action}"
+        )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/hermes_cli/curses_ui.py
+++ b/hermes_cli/curses_ui.py
@ -71,7 +71,7 @@ def curses_checklist(
                curses.use_default_colors()
                curses.init_pair(1, curses.COLOR_GREEN, -1)
                curses.init_pair(2, curses.COLOR_YELLOW, -1)
-                curses.init_pair(3, 8, -1)  # dim gray
+                curses.init_pair(3, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim gray
            cursor = 0
            scroll_offset = 0

--- a/hermes_cli/debug.py
+++ b/hermes_cli/debug.py
@ -14,6 +14,7 @@ Currently supports:
 import io
 import json
 import logging
+import re
 import sys
 import time
 import urllib.error
@ -36,6 +37,12 @@ _REDACTION_BANNER = (
    "run with --no-redact to disable]\n"
 )

+_EMAIL_ADDRESS_RE = re.compile(
+    r"(?<![A-Za-z0-9._%+-])"
+    r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"
+    r"(?![A-Za-z0-9._%+-])"
+)
+

 # ---------------------------------------------------------------------------
 # Paste services — try paste.rs first, dpaste.com as fallback.
@ -398,7 +405,8 @@ def _redact_log_text(text: str) -> str:
        return text
    from agent.redact import redact_sensitive_text

-    return redact_sensitive_text(text, force=True)
+    text = redact_sensitive_text(text, force=True)
+    return _EMAIL_ADDRESS_RE.sub("[REDACTED_EMAIL]", text)


 def _capture_log_snapshot(
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -207,14 +207,69 @@ def _fail_and_issue(text: str, detail: str, fix: str, issues: list[str]) -> None
    issues.append(fix)


+def _check_s6_supervision(issues: list[str]) -> None:
+    """Inside a container under our s6 /init, surface what s6 sees.
+
+    Runs as a counterpart to :func:`_check_gateway_service_linger` for
+    the systemd-on-host case. No-op everywhere except in the s6
+    container so host runs aren't cluttered with irrelevant output.
+
+    Reports:
+      - Whether the main-hermes and dashboard static services are up
+      - How many per-profile gateway slots are registered (via
+        ``S6ServiceManager.list_profile_gateways()``) and how many are
+        currently supervised as ``up``
+    """
+    try:
+        from hermes_cli.service_manager import (
+            S6ServiceManager,
+            detect_service_manager,
+        )
+    except Exception:
+        return
+
+    if detect_service_manager() != "s6":
+        return
+
+    _section("s6 Supervision")
+
+    mgr = S6ServiceManager()
+
+    # Static services. They live under /run/service/ via s6-rc symlinks,
+    # so the same s6-svstat probe works.
+    for static in ("main-hermes", "dashboard"):
+        if mgr.is_running(static):
+            check_ok(f"{static}: up")
+        else:
+            check_info(f"{static}: down (expected if not enabled via env)")
+
+    profiles = mgr.list_profile_gateways()
+    if not profiles:
+        check_info("No per-profile gateways registered yet — create one with `hermes profile create <name>`")
+        return
+
+    up_count = sum(1 for p in profiles if mgr.is_running(f"gateway-{p}"))
+    check_ok(
+        f"Per-profile gateways: {up_count}/{len(profiles)} supervised up"
+        + (f" ({', '.join(sorted(profiles))})" if len(profiles) <= 8 else "")
+    )
+
+
 def _check_gateway_service_linger(issues: list[str]) -> None:
-    """Warn when a systemd user gateway service will stop after logout."""
+    """Warn when a systemd user gateway service will stop after logout.
+
+    Skipped inside a container running under s6 — the linger concept
+    (user-systemd surviving SSH logout) doesn't apply there, and the
+    s6 supervision state is surfaced separately by
+    ``_check_s6_supervision``.
+    """
    try:
        from hermes_cli.gateway import (
            get_systemd_linger_status,
            get_systemd_unit_path,
            is_linux,
        )
+        from hermes_cli.service_manager import detect_service_manager
    except Exception as e:
        check_warn("Gateway service linger", f"(could not import gateway helpers: {e})")
        return
@ -222,6 +277,12 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
    if not is_linux():
        return

+    # Inside a container under our s6 /init, _check_s6_supervision
+    # reports the live supervision state; the linger warning would be
+    # confusing here (no systemd, no logout, no "lingering" concept).
+    if detect_service_manager() == "s6":
+        return
+
    unit_path = get_systemd_unit_path()
    if not unit_path.exists():
        return
@ -508,6 +569,13 @@ def run_doctor(args):
            if should_fix:
                env_path.parent.mkdir(parents=True, exist_ok=True)
                env_path.touch()
+                # .env holds API keys — restrict to owner-only access from
+                # creation. touch() obeys umask which is commonly 0o022,
+                # leaving the file world-readable; tighten explicitly.
+                try:
+                    os.chmod(str(env_path), 0o600)
+                except OSError:
+                    pass
                check_ok(f"Created empty {_DHH}/.env")
                check_info("Run 'hermes setup' to configure API keys")
                fixed_count += 1
@ -744,7 +812,18 @@ def run_doctor(args):
                    "(should be under 'model:' section)"
                )
                if should_fix:
-                    model_section = raw_config.setdefault("model", {})
+                    # Coerce scalar/None ``model:`` into a dict before mutation —
+                    # ``setdefault("model", {})`` would return an existing scalar
+                    # and then ``model_section[k] = ...`` would raise TypeError.
+                    raw_model = raw_config.get("model")
+                    if isinstance(raw_model, dict):
+                        model_section = raw_model
+                    elif isinstance(raw_model, str) and raw_model.strip():
+                        model_section = {"default": raw_model.strip()}
+                        raw_config["model"] = model_section
+                    else:
+                        model_section = {}
+                        raw_config["model"] = model_section
                    for k in stale_root_keys:
                        if not model_section.get(k):
                            model_section[k] = raw_config.pop(k)
@ -984,6 +1063,7 @@ def run_doctor(args):
            pass

    _check_gateway_service_linger(issues)
+    _check_s6_supervision(issues)

    if sys.platform != "win32":
        _section("Command Installation")
@ -1076,6 +1156,26 @@ def run_doctor(args):
    
    # Docker (optional)
    terminal_env = os.getenv("TERMINAL_ENV", "local")
+    try:
+        from hermes_constants import is_container as _is_container
+        running_in_container = _is_container()
+    except Exception:
+        running_in_container = False
+
+    if running_in_container:
+        # Inside our container the Docker terminal backend is not
+        # configured by default (Docker-in-Docker isn't set up); the
+        # local backend is the intended one. Skip the noisy "docker
+        # not found" warning. If the user has explicitly chosen
+        # TERMINAL_ENV=docker inside the container they likely mounted
+        # /var/run/docker.sock, so fall through to the normal check.
+        if terminal_env != "docker":
+            check_info(
+                "Running inside a container — using local terminal backend "
+                "(docker-in-docker is not configured by default)"
+            )
+            # Skip to next section; Docker isn't relevant here.
+            terminal_env = "local"
    if terminal_env == "docker":
        if _safe_which("docker"):
            # Check if docker daemon is running
@ -1098,6 +1198,8 @@ def run_doctor(args):
        check_ok("docker", "(optional)")
    elif _is_termux():
        check_info("Docker backend is not available inside Termux (expected on Android)")
+    elif running_in_container:
+        pass  # already explained above
    else:
        check_warn("docker not found", "(optional)")
    
--- a/hermes_cli/dump.py
+++ b/hermes_cli/dump.py
@ -16,6 +16,7 @@ from pathlib import Path
 from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
 from hermes_cli.env_loader import load_hermes_dotenv
 from hermes_constants import display_hermes_home
+from agent.skill_utils import is_excluded_skill_path


 def _get_git_commit(project_root: Path) -> str:
@ -69,6 +70,8 @@ def _count_skills(hermes_home: Path) -> int:
        return 0
    count = 0
    for item in skills_dir.rglob("SKILL.md"):
+        if is_excluded_skill_path(item):
+            continue
        count += 1
    return count

--- a/hermes_cli/env_loader.py
+++ b/hermes_cli/env_loader.py
@ -21,6 +21,68 @@ _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
 # tests) don't spam the same warning multiple times.
 _WARNED_KEYS: set[str] = set()

+# Map of env-var name → source label ("bitwarden", etc.) for credentials
+# that were injected by an external secret source during load_hermes_dotenv().
+# Used by setup / `hermes model` flows to label detected credentials so
+# users understand WHERE a key came from when their .env doesn't contain it
+# directly (otherwise the "credentials detected ✓" line looks identical to
+# the .env case and they don't know Bitwarden is wired up).
+_SECRET_SOURCES: dict[str, str] = {}
+
+# HERMES_HOME paths we've already pulled external secrets for during this
+# process.  ``load_hermes_dotenv()`` is called at module-import time from
+# several hot modules (cli.py, hermes_cli/main.py, run_agent.py,
+# trajectory_compressor.py, gateway/run.py, ...), so without this guard the
+# Bitwarden status line gets printed 3-5x per startup.  Bitwarden's own
+# in-process cache prevents redundant network calls, but the print, the
+# config re-parse, and the ASCII sanitization sweep still ran every time.
+_APPLIED_HOMES: set[str] = set()
+
+
+def get_secret_source(env_var: str) -> str | None:
+    """Return the label of the secret source that supplied ``env_var``, if any.
+
+    Returns ``"bitwarden"`` for keys pulled from Bitwarden Secrets Manager
+    during the current process's ``load_hermes_dotenv()`` call.  Returns
+    ``None`` for keys that came from ``.env``, the shell environment, or
+    aren't tracked.  The returned label is metadata only: credential-pool
+    persistence may store it to explain the origin of a borrowed secret, but
+    must never treat it as authorization to persist the raw value.
+    """
+    return _SECRET_SOURCES.get(env_var)
+
+
+def reset_secret_source_cache() -> None:
+    """Forget which HERMES_HOME paths have already had external secrets applied.
+
+    The first call to ``_apply_external_secret_sources(home_path)`` in a
+    process pulls from Bitwarden (or other configured backend), records the
+    applied keys in ``_SECRET_SOURCES``, and remembers ``home_path`` so
+    subsequent calls in the same process are no-ops.  Call this to force the
+    next call to re-pull — useful for tests, and for long-running processes
+    that want to refresh after a config change.
+    """
+    _APPLIED_HOMES.clear()
+
+
+def format_secret_source_suffix(env_var: str) -> str:
+    """Return a human-readable suffix like ``" (from Bitwarden)"`` or ``""``.
+
+    Use this when printing a detected credential so the user can see where
+    it came from.  Empty string when the credential came from ``.env`` or
+    the shell — those are the implicit / "default" cases users already
+    understand.
+    """
+    source = get_secret_source(env_var)
+    if not source:
+        return ""
+    if source == "bitwarden":
+        return " (from Bitwarden)"
+    # Generic fallback — future-proofing for additional secret sources
+    # (e.g. 1Password, HashiCorp Vault) without having to update every
+    # call site.
+    return f" (from {source})"
+

 def _format_offending_chars(value: str, limit: int = 3) -> str:
    """Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
@ -102,6 +164,10 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
    This produces mangled values — e.g. a bot token duplicated 8×
    (see #8908).

+    Also strips embedded null bytes which crash ``os.environ[k] = v``
+    with ``ValueError: embedded null byte`` — typically introduced by
+    copy-pasting API keys from terminals or rich-text editors.
+
    We delegate to ``hermes_cli.config._sanitize_env_lines`` which
    already knows all valid Hermes env-var names and can split
    concatenated lines correctly.
@ -117,7 +183,11 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
    try:
        with open(path, **read_kw) as f:
            original = f.readlines()
-        sanitized = _sanitize_env_lines(original)
+        # Strip null bytes before _sanitize_env_lines so they never
+        # reach python-dotenv (which passes them to os.environ and
+        # crashes with ValueError).
+        stripped = [line.replace("\x00", "") for line in original]
+        sanitized = _sanitize_env_lines(stripped)
        if sanitized != original:
            import tempfile
            fd, tmp = tempfile.mkstemp(
@ -172,4 +242,103 @@ def load_hermes_dotenv(
        _load_dotenv_with_fallback(project_env_path, override=not loaded)
        loaded.append(project_env_path)

+    _apply_external_secret_sources(home_path)
+
    return loaded
+
+
+def _apply_external_secret_sources(home_path: Path) -> None:
+    """Pull secrets from external sources (currently Bitwarden) into env.
+
+    Runs AFTER dotenv loads so .env values are visible (we use them to
+    locate the access token) but BEFORE the rest of Hermes reads
+    ``os.environ`` for credentials.  Any failure here is logged and
+    swallowed — external secret sources must never block startup.
+
+    Idempotent within a process: subsequent calls for the same
+    ``home_path`` are no-ops.  ``load_hermes_dotenv()`` runs at import
+    time from several hot modules (cli.py, hermes_cli/main.py,
+    run_agent.py, trajectory_compressor.py, ...), so without this guard
+    the Bitwarden status line would print 3-5x per CLI startup.  Use
+    ``reset_secret_source_cache()`` if you need to force a re-pull
+    (tests, future ``hermes secrets bitwarden sync`` from a long-running
+    process).
+    """
+    home_key = str(Path(home_path).resolve())
+    if home_key in _APPLIED_HOMES:
+        return
+    _APPLIED_HOMES.add(home_key)
+
+    try:
+        cfg = _load_secrets_config(home_path)
+    except Exception:  # noqa: BLE001 — config errors must not block startup
+        return
+
+    bw_cfg = (cfg or {}).get("bitwarden") or {}
+    if not bw_cfg.get("enabled"):
+        return
+
+    try:
+        from agent.secret_sources.bitwarden import apply_bitwarden_secrets
+    except ImportError:
+        return
+
+    result = apply_bitwarden_secrets(
+        enabled=True,
+        access_token_env=bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN"),
+        project_id=bw_cfg.get("project_id", ""),
+        override_existing=bool(bw_cfg.get("override_existing", False)),
+        cache_ttl_seconds=float(bw_cfg.get("cache_ttl_seconds", 300)),
+        auto_install=bool(bw_cfg.get("auto_install", True)),
+        server_url=str(bw_cfg.get("server_url", "") or "").strip(),
+        home_path=home_path,
+    )
+
+    if result.applied:
+        # Re-run the ASCII sanitization pass: BSM values are user-supplied
+        # and might have the same copy-paste corruption as a manually
+        # edited .env (see #6843).
+        _sanitize_loaded_credentials()
+        # Remember where these came from so the setup / `hermes model`
+        # flows can label detected credentials with "(from Bitwarden)" —
+        # otherwise users see "credentials ✓" with no hint that the value
+        # came from BSM rather than .env.
+        for name in result.applied:
+            _SECRET_SOURCES[name] = "bitwarden"
+        print(
+            f"  Bitwarden Secrets Manager: applied {len(result.applied)} "
+            f"secret{'s' if len(result.applied) != 1 else ''} "
+            f"({', '.join(sorted(result.applied))})",
+            file=sys.stderr,
+        )
+    if result.error:
+        print(
+            f"  Bitwarden Secrets Manager: {result.error}",
+            file=sys.stderr,
+        )
+    for warn in result.warnings:
+        print(
+            f"  Bitwarden Secrets Manager: {warn}",
+            file=sys.stderr,
+        )
+
+
+def _load_secrets_config(home_path: Path) -> dict:
+    """Read just the ``secrets:`` section out of config.yaml.
+
+    Imported lazily and isolated from the main config loader so a
+    malformed config can't take down dotenv loading entirely.
+    """
+    config_path = home_path / "config.yaml"
+    if not config_path.exists():
+        return {}
+    try:
+        import yaml  # type: ignore
+    except ImportError:
+        return {}
+    try:
+        with open(config_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f) or {}
+    except Exception:  # noqa: BLE001
+        return {}
+    return data.get("secrets") or {}
--- a/hermes_cli/fallback_cmd.py
+++ b/hermes_cli/fallback_cmd.py
@ -21,6 +21,8 @@ from __future__ import annotations
 import copy
 from typing import Any, Dict, List, Optional

+from hermes_cli.fallback_config import get_fallback_chain
+

 # ---------------------------------------------------------------------------
 # Helpers
@ -30,20 +32,11 @@ def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
    """Return the normalized fallback chain as a list of dicts.

    Accepts both the new list format (``fallback_providers``) and the legacy
-    single-dict format (``fallback_model``).  The returned list is always a
-    fresh copy — callers can mutate without touching the config dict.
+    ``fallback_model`` format. When both are present, the effective chain is
+    merged with ``fallback_providers`` entries kept first. The returned list is
+    always a fresh copy — callers can mutate without touching the config dict.
    """
-    chain = config.get("fallback_providers") or []
-    if isinstance(chain, list):
-        result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
-        if result:
-            return result
-    legacy = config.get("fallback_model")
-    if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
-        return [dict(legacy)]
-    if isinstance(legacy, list):
-        return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
-    return []
+    return get_fallback_chain(config)


 def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
--- a/hermes_cli/fallback_config.py
+++ b/hermes_cli/fallback_config.py
@ -0,0 +1,72 @@
+"""Helpers for reading the effective fallback provider chain from config."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def _normalized_base_url(value: Any) -> str:
+    if not isinstance(value, str):
+        return ""
+    return value.strip().rstrip("/")
+
+
+def _iter_fallback_entries(raw: Any) -> list[dict[str, Any]]:
+    if isinstance(raw, dict):
+        candidates = [raw]
+    elif isinstance(raw, list):
+        candidates = raw
+    else:
+        return []
+
+    entries: list[dict[str, Any]] = []
+    for entry in candidates:
+        if not isinstance(entry, dict):
+            continue
+        provider = str(entry.get("provider") or "").strip()
+        model = str(entry.get("model") or "").strip()
+        if not provider or not model:
+            continue
+
+        normalized = dict(entry)
+        normalized["provider"] = provider
+        normalized["model"] = model
+
+        base_url = _normalized_base_url(entry.get("base_url"))
+        if base_url:
+            normalized["base_url"] = base_url
+
+        entries.append(normalized)
+    return entries
+
+
+def _entry_identity(entry: dict[str, Any]) -> tuple[str, str, str]:
+    return (
+        str(entry.get("provider") or "").strip().lower(),
+        str(entry.get("model") or "").strip().lower(),
+        _normalized_base_url(entry.get("base_url")).lower(),
+    )
+
+
+def get_fallback_chain(config: dict[str, Any] | None) -> list[dict[str, Any]]:
+    """Return the effective fallback chain merged across old and new config keys.
+
+    ``fallback_providers`` remains the primary source of truth and keeps its
+    order. Legacy ``fallback_model`` entries are appended afterwards unless
+    they target the same provider/model/base_url route as an earlier entry.
+    The returned list always contains fresh dict copies.
+    """
+
+    config = config or {}
+    chain: list[dict[str, Any]] = []
+    seen: set[tuple[str, str, str]] = set()
+
+    for key in ("fallback_providers", "fallback_model"):
+        for entry in _iter_fallback_entries(config.get(key)):
+            identity = _entry_identity(entry)
+            if identity in seen:
+                continue
+            seen.add(identity)
+            chain.append(entry)
+
+    return chain
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@ -981,6 +981,18 @@ def get_gateway_runtime_snapshot(system: bool = False) -> GatewayRuntimeSnapshot
    from hermes_constants import is_container

    if is_linux() and is_container():
+        # Phase 4: report s6 supervision when running under our /init.
+        # Other container runtimes (or containers built before Phase 2)
+        # still get the original "docker (foreground)" label.
+        try:
+            from hermes_cli.service_manager import detect_service_manager
+            if detect_service_manager() == "s6":
+                return GatewayRuntimeSnapshot(
+                    manager="s6 (container supervisor)",
+                    gateway_pids=gateway_pids,
+                )
+        except Exception:
+            pass  # Fall through to the legacy label on any detection error.
        return GatewayRuntimeSnapshot(
            manager="docker (foreground)",
            gateway_pids=gateway_pids,
@ -1202,7 +1214,17 @@ def _systemd_operational(system: bool = False) -> bool:


 def _container_systemd_operational() -> bool:
-    """Return True when a container exposes working user or system systemd."""
+    """Return True when a container exposes working user or system systemd.
+
+    This is NOT our Hermes Docker image — that one runs s6-overlay as
+    PID 1 (since Phase 2 of the s6-overlay supervision plan) and is
+    detected via ``service_manager.detect_service_manager() == "s6"``.
+    This function handles the "container managed by something else"
+    case: systemd-nspawn, certain k8s pods, containers built FROM
+    systemd-bearing distros where the user has wired systemd as their
+    init. In those environments systemctl behaves identically to the
+    host case, so we fall through to the normal systemd code paths.
+    """
    if _systemd_operational(system=False):
        return True
    if _systemd_operational(system=True):
@ -3327,34 +3349,9 @@ _PLATFORMS = [
             "help": "For DMs, this is your user ID. You can set it later by typing /set-home in chat."},
        ],
    },
-    {
-        "key": "discord",
-        "label": "Discord",
-        "emoji": "💬",
-        "token_var": "DISCORD_BOT_TOKEN",
-        "setup_instructions": [
-            "1. Go to https://discord.com/developers/applications → New Application",
-            "2. Go to Bot → Reset Token → copy the bot token",
-            "3. Enable: Bot → Privileged Gateway Intents → Message Content Intent",
-            "4. Invite the bot to your server:",
-            "   OAuth2 → URL Generator → check BOTH scopes:",
-            "     - bot",
-            "     - applications.commands  (required for slash commands!)",
-            "   Bot Permissions: Send Messages, Read Message History, Attach Files",
-            "   Copy the URL and open it in your browser to invite.",
-            "5. Get your user ID: enable Developer Mode in Discord settings,",
-            "   then right-click your name → Copy ID",
-        ],
-        "vars": [
-            {"name": "DISCORD_BOT_TOKEN", "prompt": "Bot token", "password": True,
-             "help": "Paste the token from step 2 above."},
-            {"name": "DISCORD_ALLOWED_USERS", "prompt": "Allowed user IDs or usernames (comma-separated)", "password": False,
-             "is_allowlist": True,
-             "help": "Paste your user ID from step 5 above."},
-            {"name": "DISCORD_HOME_CHANNEL", "prompt": "Home channel ID (for cron/notification delivery, or empty to set later with /set-home)", "password": False,
-             "help": "Right-click a channel → Copy Channel ID (requires Developer Mode)."},
-        ],
-    },
+    # Discord moved to plugins/platforms/discord/ — its setup metadata is
+    # discovered dynamically via _all_platforms() from the platform registry
+    # entry registered by plugins/platforms/discord/adapter.py::register().
    {
        "key": "slack",
        "label": "Slack",
@ -3762,7 +3759,12 @@ def _platform_status(platform: dict) -> str:
                configured = bool(entry.is_connected(synthetic))
            except Exception:
                configured = False
-        if not configured:
+        else:
+            # No is_connected hook — fall back to check_fn as a coarse
+            # "are deps present" gate. Don't fall back when is_connected
+            # is defined and returned False; that would let "SDK is
+            # installed" override "no token configured" and incorrectly
+            # report the platform as ready.
            try:
                configured = bool(entry.check_fn())
            except Exception:
@ -4018,15 +4020,11 @@ def _setup_dingtalk():
        client_id, client_secret = result
        save_env_value("DINGTALK_CLIENT_ID", client_id)
        save_env_value("DINGTALK_CLIENT_SECRET", client_secret)
-        save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")
        print()
        print_success(f"{emoji} {label} configured via QR scan!")
    else:
        # ── Manual entry ──
        _setup_standard_platform(dingtalk_platform)
-        # Also enable allow-all by default for convenience
-        if get_env_value("DINGTALK_CLIENT_ID"):
-            save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")


 def _setup_wecom():
@ -4747,10 +4745,14 @@ def _builtin_setup_fn(key: str):
    from hermes_cli import setup as _s
    return {
        "telegram": _s._setup_telegram,
-        "discord": _s._setup_discord,
+        # discord moved into the plugin: setup_fn is registered by
+        # plugins/platforms/discord/adapter.py::register() and dispatched
+        # via the plugin path in _configure_platform().
        "slack": _s._setup_slack,
        "matrix": _s._setup_matrix,
-        "mattermost": _s._setup_mattermost,
+        # mattermost moved into the plugin: setup_fn is registered by
+        # plugins/platforms/mattermost/adapter.py::register() and dispatched
+        # via the plugin path in _configure_platform().
        "bluebubbles": _s._setup_bluebubbles,
        "webhooks": _s._setup_webhooks,
        "signal": _setup_signal,
@ -5025,6 +5027,108 @@ def gateway_setup():
 # Main Command Handler
 # =============================================================================

+def _dispatch_via_service_manager_if_s6(
+    action: str, profile: str | None = None,
+) -> bool:
+    """If we're in a container with s6, dispatch gateway lifecycle via s6.
+
+    Returns True iff dispatched (caller should ``return``); False
+    otherwise — caller continues with the host-side code path.
+
+    ``action`` is one of ``start`` / ``stop`` / ``restart``. The
+    profile defaults to the current one (resolved via ``_profile_arg``).
+    The s6 service slot was created either by the Phase 4 profile-create
+    hook or by the container-boot reconciler (cont-init.d/02-…). If it
+    doesn't exist or s6 returns an error, the named errors from
+    :mod:`hermes_cli.service_manager` are caught and surfaced as
+    actionable CLI messages (no raw ``CalledProcessError`` traceback).
+    """
+    from hermes_cli.service_manager import (
+        GatewayNotRegisteredError,
+        S6CommandError,
+        detect_service_manager,
+        get_service_manager,
+    )
+
+    if detect_service_manager() != "s6":
+        return False
+    if profile is None:
+        # _profile_suffix() returns the bare profile name for
+        # HERMES_HOME=<root>/profiles/<name>, "" for the default root,
+        # or a hash for unrelated paths. Map "" → "default" so the
+        # default-profile gateway is reachable as gateway-default.
+        profile = _profile_suffix() or "default"
+    mgr = get_service_manager()
+    service_name = f"gateway-{profile}"
+    try:
+        if action == "start":
+            mgr.start(service_name)
+        elif action == "stop":
+            mgr.stop(service_name)
+        elif action == "restart":
+            mgr.restart(service_name)
+        else:
+            return False
+    except GatewayNotRegisteredError as exc:
+        print(f"✗ {exc}")
+        sys.exit(1)
+    except S6CommandError as exc:
+        print(f"✗ {exc}")
+        sys.exit(1)
+    return True
+
+
+def _dispatch_all_via_service_manager_if_s6(action: str) -> bool:
+    """Inside a container with s6, dispatch ``--all`` lifecycle to every
+    registered profile gateway.
+
+    Returns True iff dispatched (caller should ``return``); False
+    otherwise — caller continues with the host-side code path.
+
+    Without this, ``hermes gateway stop --all`` and ``... restart --all``
+    fall through to ``kill_gateway_processes(all_profiles=True)``, which
+    just ``pkill``s every gateway process. s6-supervise observes the
+    crash and restarts each one ~1s later — so ``--all`` ends up
+    *kicking* every gateway instead of *stopping* it. By iterating
+    ``list_profile_gateways()`` and sending the lifecycle command
+    through the service manager we get the intended semantics (s6's
+    ``want up``/``want down`` flips correctly so supervise stays down
+    after a stop).
+
+    ``action`` is one of ``stop`` / ``restart`` (``start --all`` isn't
+    a supported CLI surface).
+    """
+    from hermes_cli.service_manager import (
+        detect_service_manager,
+        get_service_manager,
+    )
+
+    if detect_service_manager() != "s6":
+        return False
+    if action not in ("stop", "restart"):
+        return False
+    mgr = get_service_manager()
+    profiles = mgr.list_profile_gateways()
+    if not profiles:
+        print("✗ No profile gateways registered under s6")
+        return True
+    fn = mgr.stop if action == "stop" else mgr.restart
+    errors: list[tuple[str, Exception]] = []
+    for profile in profiles:
+        service_name = f"gateway-{profile}"
+        try:
+            fn(service_name)
+        except Exception as exc:  # noqa: BLE001 — report and continue
+            errors.append((profile, exc))
+    succeeded = len(profiles) - len(errors)
+    verb = "stopped" if action == "stop" else "restarted"
+    if succeeded:
+        print(f"✓ {verb.capitalize()} {succeeded} profile gateway(s) under s6")
+    for profile, exc in errors:
+        print(f"✗ Could not {action} gateway-{profile}: {exc}")
+    return True
+
+
 def gateway_command(args):
    """Handle gateway subcommands."""
    try:
@ -5109,6 +5213,21 @@ def _gateway_command_inner(args):
            print("  nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 &  # background")
            sys.exit(1)
        elif is_container():
+            # Phase 4: inside a container with s6 the gateway service is
+            # auto-registered when the profile is created (and reconciled
+            # at every container boot). `install` is therefore informational.
+            from hermes_cli.service_manager import detect_service_manager
+            if detect_service_manager() == "s6":
+                print("Per-profile gateways are auto-registered when you create a profile.")
+                print()
+                print("  hermes profile create <name>     # creates the s6 service slot")
+                print("  hermes -p <name> gateway start   # bring it up via s6")
+                print("  hermes status                    # see currently-supervised gateways")
+                return
+            # Fallback for pre-s6 containers or other container runtimes
+            # we haven't taught about supervision (Podman without our
+            # /init, k8s plain runs, etc.) — the historical guidance still
+            # applies.
            print("Service installation is not needed inside a Docker container.")
            print("The container runtime is your service manager — use Docker restart policies instead:")
            print()
@ -5139,6 +5258,13 @@ def _gateway_command_inner(args):
            from hermes_cli import gateway_windows
            gateway_windows.uninstall()
        elif is_container():
+            from hermes_cli.service_manager import detect_service_manager
+            if detect_service_manager() == "s6":
+                print("Per-profile gateways are auto-unregistered when you delete the profile.")
+                print()
+                print("  hermes profile delete <name>     # tears down the s6 service slot")
+                print("  hermes -p <name> gateway stop    # stop without deleting the profile")
+                return
            print("Service uninstall is not applicable inside a Docker container.")
            print("To stop the gateway, stop or remove the container:")
            print()
@ -5153,6 +5279,14 @@ def _gateway_command_inner(args):
        system = getattr(args, 'system', False)
        start_all = getattr(args, 'all', False)

+        # Phase 4: inside a container with s6, dispatch via the service
+        # manager instead of falling through to systemd/launchd/windows.
+        # `--all` isn't meaningful here (each profile has its own service
+        # slot — start them individually via `hermes -p <name> gateway
+        # start`), so just bring up the current profile's slot.
+        if not start_all and _dispatch_via_service_manager_if_s6("start"):
+            return
+
        if start_all:
            # Kill all stale gateway processes across all profiles before starting
            killed = kill_gateway_processes(all_profiles=True)
@ -5182,6 +5316,11 @@ def _gateway_command_inner(args):
            print("To enable systemd: add systemd=true to /etc/wsl.conf and run 'wsl --shutdown' from PowerShell.")
            sys.exit(1)
        elif is_container():
+            # Reached only when s6 ISN'T running (the early dispatch
+            # above handles the s6 case). Pre-s6 containers or other
+            # container runtimes that don't ship our /init get the
+            # historical guidance: the gateway is the container's main
+            # process, so use docker lifecycle commands.
            print("Service start is not applicable inside a Docker container.")
            print("The gateway runs as the container's main process.")
            print()
@ -5198,6 +5337,15 @@ def _gateway_command_inner(args):
        stop_all = getattr(args, 'all', False)
        system = getattr(args, 'system', False)

+        # Phase 4: inside a container with s6, dispatch via the service
+        # manager. ``--all`` iterates every registered profile gateway
+        # through s6 (otherwise it would fall through to ``pkill``,
+        # which s6-supervise observes as a crash and immediately restarts).
+        if stop_all and _dispatch_all_via_service_manager_if_s6("stop"):
+            return
+        if not stop_all and _dispatch_via_service_manager_if_s6("stop"):
+            return
+
        if stop_all:
            # --all: kill every gateway process on the machine
            service_available = False
@ -5267,6 +5415,16 @@ def _gateway_command_inner(args):
        restart_all = getattr(args, 'all', False)
        service_configured = False

+        # Phase 4: inside a container with s6, dispatch via the service
+        # manager (s6-svc -t restarts the supervised process). ``--all``
+        # iterates every registered profile gateway through s6; without
+        # this it would fall through to ``pkill``, which s6-supervise
+        # would observe as a crash and immediately restart anyway.
+        if restart_all and _dispatch_all_via_service_manager_if_s6("restart"):
+            return
+        if not restart_all and _dispatch_via_service_manager_if_s6("restart"):
+            return
+
        if restart_all:
            # --all: stop every gateway process across all profiles, then start fresh
            service_stopped = False
--- a/hermes_cli/gateway_windows.py
+++ b/hermes_cli/gateway_windows.py
@ -365,7 +365,9 @@ def _write_task_script() -> Path:

    content = _build_gateway_cmd_script(python_path, working_dir, hermes_home, profile_arg)
    script_path = get_task_script_path()
-    script_path.write_text(content, encoding="utf-8", newline="")
+    tmp = script_path.with_suffix(".tmp")
+    tmp.write_text(content, encoding="utf-8", newline="")
+    tmp.replace(script_path)
    return script_path


@ -436,7 +438,9 @@ def _install_startup_entry(script_path: Path) -> Path:
    """Write the Startup-folder fallback launcher. Returns its path."""
    entry = get_startup_entry_path()
    entry.parent.mkdir(parents=True, exist_ok=True)
-    entry.write_text(_build_startup_launcher(script_path), encoding="utf-8", newline="")
+    tmp = entry.with_suffix(".tmp")
+    tmp.write_text(_build_startup_launcher(script_path), encoding="utf-8", newline="")
+    tmp.replace(entry)
    return entry


--- a/hermes_cli/kanban.py
+++ b/hermes_cli/kanban.py
@ -550,6 +550,39 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
    p_unblock = sub.add_parser("unblock", help="Return one or more blocked/scheduled tasks to ready")
    p_unblock.add_argument("task_ids", nargs="+")

+    p_promote = sub.add_parser(
+        "promote",
+        help="Manually move one or more todo/blocked tasks to ready (recovery path)",
+    )
+    p_promote.add_argument("task_id")
+    p_promote.add_argument(
+        "reason",
+        nargs="*",
+        help="Audit-trail reason (recorded on the task_events row)",
+    )
+    p_promote.add_argument(
+        "--ids",
+        nargs="+",
+        default=None,
+        help="Additional task ids to promote with the same reason (bulk mode)",
+    )
+    p_promote.add_argument(
+        "--force",
+        action="store_true",
+        help="Promote even if parent dependencies are not yet done/archived",
+    )
+    p_promote.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Validate the promotion without mutating state",
+    )
+    p_promote.add_argument(
+        "--json",
+        dest="json",
+        action="store_true",
+        help="Emit machine-readable JSON result",
+    )
+
    p_archive = sub.add_parser("archive", help="Archive one or more tasks")
    p_archive.add_argument("task_ids", nargs="*",
                           help="Task ids to archive (default mode)")
@ -899,6 +932,7 @@ def kanban_command(args: argparse.Namespace) -> int:
        "block":    _cmd_block,
        "schedule": _cmd_schedule,
        "unblock":  _cmd_unblock,
+        "promote":  _cmd_promote,
        "archive":  _cmd_archive,
        "tail":     _cmd_tail,
        "dispatch": _cmd_dispatch,
@ -1955,6 +1989,57 @@ def _cmd_unblock(args: argparse.Namespace) -> int:
    return 0 if not failed else 1


+def _cmd_promote(args: argparse.Namespace) -> int:
+    reason = " ".join(args.reason).strip() if args.reason else None
+    author = _profile_author()
+    as_json = getattr(args, "json", False)
+    extra_ids = list(getattr(args, "ids", None) or [])
+    # Dedupe while preserving order; positional task_id always first.
+    ids: list[str] = []
+    seen: set[str] = set()
+    for tid in [args.task_id, *extra_ids]:
+        if tid not in seen:
+            ids.append(tid)
+            seen.add(tid)
+
+    results: list[dict[str, object]] = []
+    with kb.connect() as conn:
+        for tid in ids:
+            ok, err = kb.promote_task(
+                conn,
+                tid,
+                actor=author,
+                reason=reason,
+                force=bool(args.force),
+                dry_run=bool(args.dry_run),
+            )
+            results.append({
+                "task_id": tid,
+                "promoted": ok,
+                "dry_run": bool(args.dry_run),
+                "forced": bool(args.force),
+                "reason": reason,
+                "error": err,
+            })
+
+    failed = [r for r in results if not r["promoted"]]
+    if as_json:
+        # Single-id stays a flat object for back-compat; bulk emits a list.
+        payload: object = results[0] if len(results) == 1 else results
+        print(json.dumps(payload, indent=2, ensure_ascii=False))
+        return 0 if not failed else 1
+
+    tag = " (dry)" if args.dry_run else ""
+    label = "Would promote" if args.dry_run else "Promoted"
+    for r in results:
+        if r["promoted"]:
+            suffix = f": {reason}" if reason else ""
+            print(f"{label} {r['task_id']} -> ready{tag}{suffix}")
+        else:
+            print(f"cannot promote {r['task_id']}: {r['error']}", file=sys.stderr)
+    return 0 if not failed else 1
+
+
 def _cmd_archive(args: argparse.Namespace) -> int:
    ids = list(args.task_ids or [])
    purge_ids = list(getattr(args, "purge_ids", None) or [])
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
@ -75,6 +75,7 @@ import json
 import os
 import re
 import secrets
+import shutil
 import sqlite3
 import subprocess
 import sys
@ -82,6 +83,7 @@ import threading
 import logging
 import time
 from dataclasses import dataclass, field
+from datetime import datetime
 from pathlib import Path
 from typing import Any, Iterable, Optional

@ -1005,6 +1007,131 @@ def _validate_sqlite_header(path: Path) -> None:
    )


+class KanbanDbCorruptError(RuntimeError):
+    """Raised when an existing kanban DB file fails integrity checks.
+
+    Fail-closed guard against silent recreation of a corrupt board file,
+    which would otherwise destroy the user's tasks. Carries both the
+    original path and the timestamped backup we made before refusing.
+    """
+
+    def __init__(self, db_path: Path, backup_path: Optional[Path], reason: str):
+        self.db_path = db_path
+        self.backup_path = backup_path
+        self.reason = reason
+        backup_str = str(backup_path) if backup_path is not None else "<backup failed>"
+        super().__init__(
+            f"Refusing to open corrupt kanban DB at {db_path}: {reason}. "
+            f"Original preserved; backup at {backup_str}."
+        )
+
+
+def _backup_corrupt_db(path: Path) -> Optional[Path]:
+    """Copy a corrupt DB (and its WAL/SHM sidecars) to a timestamped backup.
+
+    Returns the backup path of the main DB file, or ``None`` if the copy
+    itself failed (the caller still raises loudly in that case).
+
+    Writes are confined to the original DB's parent directory. The
+    backup basename is derived purely from ``path.name``, never from
+    caller-supplied directory segments — no traversal is possible.
+    """
+    # Resolve once and pin the parent so subsequent path operations cannot
+    # escape it. ``Path.resolve()`` collapses any ``..`` segments and
+    # symlinks, and we only ever write inside ``parent``.
+    resolved = path.resolve()
+    parent = resolved.parent
+    base_name = resolved.name  # basename only
+    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    candidate = parent / f"{base_name}.corrupt.{stamp}.bak"
+    # Defensive: candidate must still be inside parent after construction.
+    # f-string interpolation of ``base_name`` cannot escape ``parent``
+    # because ``base_name`` is itself a resolved basename, but assert it
+    # anyway so static analyzers can see the containment guarantee.
+    if candidate.parent != parent:
+        return None
+    counter = 0
+    while candidate.exists():
+        counter += 1
+        candidate = parent / f"{base_name}.corrupt.{stamp}.{counter}.bak"
+        if candidate.parent != parent:
+            return None
+    try:
+        shutil.copy2(resolved, candidate)
+    except OSError:
+        return None
+    for suffix in ("-wal", "-shm"):
+        sidecar = parent / (base_name + suffix)
+        if sidecar.parent != parent or not sidecar.exists():
+            continue
+        try:
+            sidecar_backup = parent / (candidate.name + suffix)
+            if sidecar_backup.parent != parent:
+                continue
+            shutil.copy2(sidecar, sidecar_backup)
+        except OSError:
+            pass
+    return candidate
+
+
+def _guard_existing_db_is_healthy(path: Path) -> None:
+    """Run ``PRAGMA integrity_check`` on an existing non-empty DB file.
+
+    Opens the probe in read/write mode so SQLite can recover or
+    checkpoint a healthy WAL/hot-journal DB before we declare it
+    corrupt. If the file is malformed, copy it (and any WAL/SHM
+    sidecars) to a timestamped backup and raise
+    :class:`KanbanDbCorruptError` so callers cannot silently recreate
+    the schema on top of a damaged DB.
+
+    Transient lock/busy errors (``sqlite3.OperationalError``) are NOT
+    treated as corruption; they propagate raw so the caller sees a
+    normal lock failure and no spurious ``.corrupt`` backup is made.
+
+    No-op for missing files, zero-byte files (treated as fresh), and
+    paths already proven healthy this process (cache hit).
+
+    Path-trust note: ``path`` arrives via :func:`connect`, which itself
+    resolves it from an explicit ``db_path`` argument, the
+    :func:`kanban_db_path` env-var chain, or the kanban-home default —
+    all sources Hermes treats as user-controlled-but-trusted on the
+    user's own machine. We additionally resolve the path here and
+    confine all filesystem writes to its parent directory so any
+    accidental ``..`` segments are collapsed before any I/O happens.
+    """
+    # Resolve before any I/O. ``Path.resolve()`` normalizes ``..`` and
+    # symlinks, giving us a canonical path whose parent dir we can pin.
+    try:
+        resolved = path.resolve()
+    except OSError:
+        return
+    try:
+        if not resolved.exists() or resolved.stat().st_size == 0:
+            return
+    except OSError:
+        return
+    if str(resolved) in _INITIALIZED_PATHS:
+        return
+    reason: Optional[str] = None
+    try:
+        probe = sqlite3.connect(str(resolved), timeout=5, isolation_level=None)
+        try:
+            row = probe.execute("PRAGMA integrity_check").fetchone()
+        finally:
+            probe.close()
+        if not row or (row[0] or "").lower() != "ok":
+            reason = f"integrity_check returned {row[0] if row else '<no row>'!r}"
+    except sqlite3.OperationalError:
+        # Lock contention, busy, transient IO — not corruption. Let it propagate.
+        raise
+    except sqlite3.DatabaseError as exc:
+        reason = f"sqlite refused to open file: {exc}"
+    if reason is None:
+        return
+    backup = _backup_corrupt_db(resolved)
+    raise KanbanDbCorruptError(resolved, backup, reason)
+
+
 def connect(
    db_path: Optional[Path] = None,
    *,
@ -1033,7 +1160,13 @@ def connect(
    else:
        path = kanban_db_path(board=board)
    path.parent.mkdir(parents=True, exist_ok=True)
+    # Cheap byte-level check first — catches the #29507 TLS-overwrite shape
+    # and other invalid-header cases without opening a sqlite connection.
    _validate_sqlite_header(path)
+    # Full integrity probe — catches corruption past the header (malformed
+    # pages, broken internal metadata). Cached per-path after first success
+    # via _INITIALIZED_PATHS so it only runs once per process per path.
+    _guard_existing_db_is_healthy(path)
    resolved = str(path.resolve())
    conn = sqlite3.connect(str(path), isolation_level=None, timeout=30)
    try:
@ -1518,8 +1651,15 @@ def create_task(
    now = int(time.time())

    # Resolve workspace_path from board-level default_workdir when the
-    # caller did not specify one explicitly.
-    if workspace_path is None:
+    # caller did not specify one explicitly. Board defaults represent
+    # persistent project checkouts, so only persistent workspace kinds may
+    # inherit them. Scratch workspaces are auto-deleted on completion and
+    # must stay under the per-board scratch root created by
+    # ``resolve_workspace``; inheriting ``default_workdir`` for a scratch
+    # task would point cleanup at the user's source tree (#28818). The
+    # containment guard in ``_cleanup_workspace`` is the safety rail, but
+    # we also stop the bad state from being created in the first place.
+    if workspace_path is None and workspace_kind in {"dir", "worktree"}:
        board_slug = board if board else get_current_board()
        board_meta = read_board_metadata(board_slug)
        board_default = board_meta.get("default_workdir")
@ -2904,6 +3044,81 @@ def complete_task(
 # Workspace / tmux cleanup
 # ---------------------------------------------------------------------------

+def _is_managed_scratch_path(p: Path) -> bool:
+    """Return True iff *p* is a strict descendant of a kanban-managed scratch root.
+
+    A managed root is exclusively a ``workspaces/`` directory — never the
+    broader kanban home, a board root, or sibling subtrees like ``logs/`` or
+    ``boards/<slug>/`` itself. Allowed roots:
+
+    * ``HERMES_KANBAN_WORKSPACES_ROOT`` when set (worker-side override
+      injected by the dispatcher).
+    * ``<kanban_home>/kanban/workspaces`` — legacy default-board scratch root.
+    * ``<kanban_home>/kanban/boards/<slug>/workspaces`` for each board slug
+      that currently exists on disk.
+
+    The check requires strict descendancy: a path equal to one of these
+    roots is NOT managed (deleting the workspaces root would wipe every
+    task's scratch dir at once), and a path that resolves to ``<kanban_home>
+    /kanban`` itself, ``<kanban_home>/kanban/logs``, or
+    ``<kanban_home>/kanban/boards/<slug>`` is rejected because those
+    subtrees hold Hermes' own DB, metadata, and logs, not task workspaces.
+
+    Used by :func:`_cleanup_workspace` to refuse to ``shutil.rmtree`` paths
+    outside Hermes-managed storage. A board ``default_workdir`` pointing at a
+    real source tree can otherwise pair with ``workspace_kind='scratch'`` and
+    cause task completion to delete user data (#28818).
+    """
+    try:
+        p_abs = p.resolve(strict=False)
+    except OSError:
+        return False
+    roots: list[Path] = []
+    override = os.environ.get("HERMES_KANBAN_WORKSPACES_ROOT", "").strip()
+    if override:
+        try:
+            roots.append(Path(override).expanduser().resolve(strict=False))
+        except OSError:
+            pass
+    try:
+        home = kanban_home()
+    except OSError:
+        home = None
+    if home is not None:
+        try:
+            roots.append((home / "kanban" / "workspaces").resolve(strict=False))
+        except OSError:
+            pass
+        try:
+            boards_parent = (home / "kanban" / "boards").resolve(strict=False)
+        except OSError:
+            boards_parent = None
+        if boards_parent is not None:
+            try:
+                entries = list(boards_parent.iterdir())
+            except OSError:
+                entries = []
+            for entry in entries:
+                try:
+                    if not entry.is_dir():
+                        continue
+                except OSError:
+                    continue
+                try:
+                    roots.append((entry / "workspaces").resolve(strict=False))
+                except OSError:
+                    continue
+    for root in roots:
+        if p_abs == root:
+            continue
+        try:
+            if p_abs.is_relative_to(root):
+                return True
+        except ValueError:
+            continue
+    return False
+
+
 def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None:
    """Remove a task's scratch workspace dir and kill its stale tmux session.

@ -2926,8 +3141,21 @@ def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None:
        import shutil
        wp = Path(path)
        if wp.is_dir():
-            shutil.rmtree(wp, ignore_errors=True)
-            _log.debug("Removed scratch workspace: %s", wp)
+            # Containment guard (#28818): a board's ``default_workdir`` can
+            # pair ``workspace_kind='scratch'`` with a user-supplied path
+            # pointing at a real source tree. Without this check, task
+            # completion would unconditionally ``shutil.rmtree`` that path
+            # and silently delete the user's source data.
+            if _is_managed_scratch_path(wp):
+                shutil.rmtree(wp, ignore_errors=True)
+                _log.debug("Removed scratch workspace: %s", wp)
+            else:
+                _log.warning(
+                    "Refusing to remove out-of-scratch workspace for task %s: %s "
+                    "(workspace_kind='scratch' but path is outside any "
+                    "kanban-managed workspaces root)",
+                    task_id, wp,
+                )
        # Also kill the tmux session for the worker that owned this task,
        # if the tmux session is now dead (worker process exited).
        _cleanup_worker_tmux(conn, task_id)
@ -2961,6 +3189,93 @@ def _cleanup_worker_tmux(conn: sqlite3.Connection, task_id: str) -> None:
        pass  # best-effort — never block completion


+# ---------------------------------------------------------------------------
+# First-use tip for scratch workspaces
+# ---------------------------------------------------------------------------
+#
+# Scratch workspaces are intentionally ephemeral — ``_cleanup_workspace``
+# removes them as soon as ``complete_task`` runs.  New users often don't
+# realize that and lose worker output (community report, May 2026).  The
+# behavior is right; the lack of warning is the bug.
+#
+# On the FIRST scratch workspace materialization across the whole install
+# we:
+#   1. Log a warning line on the dispatcher logger.
+#   2. Append a ``tip_scratch_workspace`` event on the task so it's visible
+#      via ``hermes kanban show <id>`` and the dashboard.
+#   3. Touch a sentinel file under ``kanban_home() / '.scratch_tip_shown'``
+#      so we don't repeat the tip — once you know, you know.
+#
+# Scope is per-install, not per-board: a user creating a second board
+# already learned the lesson on board #1.
+
+_SCRATCH_TIP_SENTINEL_NAME = ".scratch_tip_shown"
+
+_SCRATCH_TIP_MESSAGE = (
+    "scratch workspaces are ephemeral — they're deleted when the task "
+    "completes. Use --workspace worktree: (git worktree) or "
+    "--workspace dir:/abs/path (existing dir) to preserve worker output."
+)
+
+
+def _scratch_tip_sentinel_path() -> Path:
+    """Path to the per-install scratch-workspace-tip sentinel file."""
+    return kanban_home() / _SCRATCH_TIP_SENTINEL_NAME
+
+
+def _scratch_tip_shown() -> bool:
+    """True iff the scratch-workspace tip has already been emitted on this
+    install. Best-effort — any error means we re-emit, which is the safer
+    failure mode for a help message."""
+    try:
+        return _scratch_tip_sentinel_path().exists()
+    except OSError:
+        return False
+
+
+def _mark_scratch_tip_shown() -> None:
+    """Touch the sentinel so future scratch workspaces stay silent.
+
+    Best-effort: a failure here just means the tip might appear once more,
+    which is preferable to crashing dispatch over a help message.
+    """
+    try:
+        path = _scratch_tip_sentinel_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.touch(exist_ok=True)
+    except OSError:
+        pass
+
+
+def _maybe_emit_scratch_tip(
+    conn: sqlite3.Connection,
+    task_id: str,
+    workspace_kind: Optional[str],
+) -> None:
+    """Emit the first-use scratch-workspace tip exactly once per install.
+
+    Called from the dispatcher right after a scratch workspace is
+    materialized. No-op for ``worktree`` / ``dir`` workspaces (they're
+    preserved by design) and no-op after the sentinel exists.
+    """
+    if (workspace_kind or "scratch") != "scratch":
+        return
+    if _scratch_tip_shown():
+        return
+    try:
+        _log.warning("kanban: %s (task %s)", _SCRATCH_TIP_MESSAGE, task_id)
+        with write_txn(conn):
+            _append_event(
+                conn, task_id, "tip_scratch_workspace",
+                {"message": _SCRATCH_TIP_MESSAGE},
+            )
+    except Exception:
+        # Best-effort — never block the spawn loop over a help message.
+        pass
+    finally:
+        _mark_scratch_tip_shown()
+
+
 def edit_completed_task_result(
    conn: sqlite3.Connection,
    task_id: str,
@ -3083,6 +3398,77 @@ def block_task(
        return True


+
+def promote_task(
+    conn: sqlite3.Connection,
+    task_id: str,
+    *,
+    actor: str,
+    reason: Optional[str] = None,
+    force: bool = False,
+    dry_run: bool = False,
+) -> tuple[bool, Optional[str]]:
+    """Manually promote a `todo` or `blocked` task to `ready`.
+
+    Mirrors the automatic promotion done by ``recompute_ready`` but
+    drives it from a deliberate operator action with an audit-trail
+    entry. Refuses to promote if any parent dep is not in a terminal
+    state (`done`/`archived`) unless ``force=True``. Does NOT change
+    assignee or claim state. Returns ``(True, None)`` on success and
+    ``(False, reason)`` if refused. ``dry_run=True`` validates the
+    promotion would succeed without mutating state.
+    """
+    row = conn.execute(
+        "SELECT status FROM tasks WHERE id = ?", (task_id,)
+    ).fetchone()
+    if row is None:
+        return False, f"task {task_id} not found"
+
+    cur_status = row["status"]
+    if cur_status not in ("todo", "blocked"):
+        return False, (
+            f"task {task_id} is {cur_status!r}; promote only applies to "
+            f"'todo' or 'blocked'"
+        )
+
+    if not force:
+        parents = conn.execute(
+            "SELECT t.id, t.status FROM tasks t "
+            "JOIN task_links l ON l.parent_id = t.id "
+            "WHERE l.child_id = ?",
+            (task_id,),
+        ).fetchall()
+        unsatisfied = [
+            p["id"] for p in parents
+            if p["status"] not in ("done", "archived")
+        ]
+        if unsatisfied:
+            return False, (
+                f"unsatisfied parent dependencies: "
+                f"{', '.join(unsatisfied)} (use --force to override)"
+            )
+
+    if dry_run:
+        return True, None
+
+    with write_txn(conn):
+        upd = conn.execute(
+            "UPDATE tasks SET status = 'ready' "
+            "WHERE id = ? AND status IN ('todo', 'blocked')",
+            (task_id,),
+        )
+        if upd.rowcount != 1:
+            return False, f"task {task_id} status changed during promotion"
+        _append_event(
+            conn,
+            task_id,
+            "promoted_manual",
+            {"actor": actor, "reason": reason, "forced": force},
+        )
+
+    return True, None
+
+
 def unblock_task(conn: sqlite3.Connection, task_id: str) -> bool:
    """Transition ``blocked``/``scheduled`` -> ready or todo.

@ -4892,6 +5278,7 @@ def dispatch_once(
            continue
        # Persist the resolved workspace path so the worker can cd there.
        set_workspace_path(conn, claimed.id, str(workspace))
+        _maybe_emit_scratch_tip(conn, claimed.id, claimed.workspace_kind)
        _spawn = spawn_fn if spawn_fn is not None else _default_spawn
        try:
            # Back-compat: older spawn_fn signatures accept only
@ -4970,6 +5357,7 @@ def dispatch_once(
            continue
        # Persist the resolved workspace path so the worker can cd there.
        set_workspace_path(conn, claimed.id, str(workspace))
+        _maybe_emit_scratch_tip(conn, claimed.id, claimed.workspace_kind)
        # Force-load sdlc-review skill for review agents.  The
        # _default_spawn function already auto-loads kanban-worker, and
        # appends task.skills via --skills.  Setting task.skills here
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
--- a/hermes_cli/mcp_catalog.py
+++ b/hermes_cli/mcp_catalog.py
@ -0,0 +1,776 @@
+"""MCP catalog — curated, Nous-approved MCP servers shipped with the repo.
+
+Mirrors the optional-skills/ pattern: each catalog entry lives under
+``optional-mcps/<name>/manifest.yaml`` and ships disabled. Users discover
+entries via ``hermes mcp catalog`` or the interactive ``hermes mcp picker``,
+and install them with ``hermes mcp install <name>`` (or by toggling in the
+picker, which flows them through any required env/OAuth setup).
+
+Catalog policy:
+- Entries are added only by merging a PR into hermes-agent. Presence in the
+  ``optional-mcps/`` directory = Nous approval. No community tier, no trust
+  signals beyond "it's in the catalog".
+- Manifests pin transport details (commands, args, refs). MCPs are never
+  auto-updated; users explicitly re-run ``hermes mcp install <name>`` to
+  pull a new manifest version after a repo update.
+- Secrets prompted at install time go to ``~/.hermes/.env`` (the
+  .env-is-for-secrets rule). Non-secret env vars also go to .env to keep
+  one credential store.
+
+See website/docs/user-guide/mcp-catalog.md for user docs.
+See references/mcp-catalog.md (this repo's skill) for the manifest schema.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import shutil
+import subprocess
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import yaml
+
+from hermes_constants import get_hermes_home, get_optional_mcps_dir
+from hermes_cli.colors import Colors, color
+from hermes_cli.config import (
+    load_config,
+    save_config,
+    get_env_value,
+    save_env_value,
+)
+from hermes_cli.cli_output import prompt as _prompt_input, prompt_yes_no
+
+_MANIFEST_VERSION = 1
+
+# Substituted at install time inside `transport.command` / `transport.args`.
+_INSTALL_DIR_VAR = "${INSTALL_DIR}"
+
+
+# ─── Data classes ────────────────────────────────────────────────────────────
+
+
+@dataclass
+class EnvVarSpec:
+    name: str
+    prompt: str
+    required: bool = True
+    secret: bool = True
+    default: str = ""
+
+
+@dataclass
+class AuthSpec:
+    type: str  # "api_key" | "oauth" | "none"
+    env: List[EnvVarSpec] = field(default_factory=list)
+    # OAuth-specific (case 2: third-party provider like Google)
+    provider: Optional[str] = None
+    scopes: List[str] = field(default_factory=list)
+    env_var: Optional[str] = None
+
+
+@dataclass
+class TransportSpec:
+    type: str  # "stdio" | "http"
+    command: Optional[str] = None
+    args: List[str] = field(default_factory=list)
+    url: Optional[str] = None
+    version: Optional[str] = None  # informational, pinned
+
+
+@dataclass
+class InstallSpec:
+    """Optional bootstrap step (git clone + dep install).
+
+    Omit for one-shot launchable servers (npx, uvx).
+    """
+    type: str  # "git"
+    url: str
+    ref: str  # commit/tag/branch — pinned, never floats
+    bootstrap: List[str] = field(default_factory=list)
+
+
+@dataclass
+class ToolsSpec:
+    """Manifest-side tool-selection hints.
+
+    Drives the pre-checked state of the install-time tool checklist, and acts
+    as the fallback selection when probe fails. See install_entry() flow.
+    """
+
+    # If declared, these tool names are pre-checked in the checklist (or
+    # applied directly when probe fails). If None, all probed tools are
+    # pre-checked (or no filter is written when probe fails).
+    default_enabled: Optional[List[str]] = None
+
+
+@dataclass
+class CatalogEntry:
+    name: str
+    description: str
+    source: str
+    transport: TransportSpec
+    auth: AuthSpec
+    tools: ToolsSpec = field(default_factory=ToolsSpec)
+    install: Optional[InstallSpec] = None
+    post_install: str = ""
+    manifest_path: Path = field(default_factory=Path)
+
+
+# ─── Manifest loader ─────────────────────────────────────────────────────────
+
+
+class CatalogError(Exception):
+    """Manifest parse/validation failure or install error."""
+
+
+def _catalog_root() -> Path:
+    """Return the optional-mcps/ directory shipped with this Hermes install."""
+    # Prefer the env-var override / packaged location; fall back to the repo's
+    # optional-mcps/ next to the package (source checkout).
+    return get_optional_mcps_dir(Path(__file__).parent.parent / "optional-mcps")
+
+
+def _parse_env_spec(raw: Any) -> EnvVarSpec:
+    if not isinstance(raw, dict):
+        raise CatalogError(f"env entry must be a mapping, got {type(raw).__name__}")
+    name = raw.get("name") or ""
+    if not name or not re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", name):
+        raise CatalogError(f"invalid env var name: {name!r}")
+    return EnvVarSpec(
+        name=name,
+        prompt=raw.get("prompt") or name,
+        required=bool(raw.get("required", True)),
+        secret=bool(raw.get("secret", True)),
+        default=str(raw.get("default") or ""),
+    )
+
+
+def _parse_manifest(path: Path) -> CatalogEntry:
+    """Read and validate a manifest.yaml. Raise CatalogError on any problem."""
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f) or {}
+    except Exception as exc:
+        raise CatalogError(f"failed to read {path}: {exc}") from exc
+
+    if not isinstance(data, dict):
+        raise CatalogError(f"{path}: manifest must be a mapping")
+
+    mv = data.get("manifest_version")
+    if mv != _MANIFEST_VERSION:
+        raise CatalogError(
+            f"{path}: manifest_version {mv!r} unsupported "
+            f"(this Hermes understands version {_MANIFEST_VERSION})"
+        )
+
+    name = data.get("name") or ""
+    if not name or not re.match(r"^[A-Za-z0-9_-]+$", name):
+        raise CatalogError(f"{path}: invalid or missing 'name'")
+
+    description = str(data.get("description") or "").strip()
+    if not description:
+        raise CatalogError(f"{path}: 'description' required")
+
+    source = str(data.get("source") or "").strip()
+
+    transport_raw = data.get("transport") or {}
+    if not isinstance(transport_raw, dict):
+        raise CatalogError(f"{path}: 'transport' must be a mapping")
+    t_type = transport_raw.get("type")
+    if t_type not in ("stdio", "http"):
+        raise CatalogError(f"{path}: transport.type must be 'stdio' or 'http'")
+    args = transport_raw.get("args") or []
+    if not isinstance(args, list):
+        raise CatalogError(f"{path}: transport.args must be a list")
+    transport = TransportSpec(
+        type=t_type,
+        command=transport_raw.get("command"),
+        args=[str(a) for a in args],
+        url=transport_raw.get("url"),
+        version=transport_raw.get("version"),
+    )
+    if t_type == "stdio" and not transport.command:
+        raise CatalogError(f"{path}: stdio transport requires 'command'")
+    if t_type == "http" and not transport.url:
+        raise CatalogError(f"{path}: http transport requires 'url'")
+
+    auth_raw = data.get("auth") or {"type": "none"}
+    if not isinstance(auth_raw, dict):
+        raise CatalogError(f"{path}: 'auth' must be a mapping")
+    a_type = auth_raw.get("type") or "none"
+    if a_type not in ("api_key", "oauth", "none"):
+        raise CatalogError(f"{path}: auth.type must be 'api_key'|'oauth'|'none'")
+    env_list_raw = auth_raw.get("env") or []
+    if not isinstance(env_list_raw, list):
+        raise CatalogError(f"{path}: auth.env must be a list")
+    env_list = [_parse_env_spec(e) for e in env_list_raw]
+    auth = AuthSpec(
+        type=a_type,
+        env=env_list,
+        provider=auth_raw.get("provider"),
+        scopes=list(auth_raw.get("scopes") or []),
+        env_var=auth_raw.get("env_var"),
+    )
+
+    tools_raw = data.get("tools") or {}
+    if not isinstance(tools_raw, dict):
+        raise CatalogError(f"{path}: 'tools' must be a mapping")
+    default_enabled = tools_raw.get("default_enabled")
+    if default_enabled is not None:
+        if not isinstance(default_enabled, list) or not all(
+            isinstance(t, str) for t in default_enabled
+        ):
+            raise CatalogError(
+                f"{path}: tools.default_enabled must be a list of strings"
+            )
+    tools_spec = ToolsSpec(default_enabled=default_enabled)
+
+    install: Optional[InstallSpec] = None
+    install_raw = data.get("install")
+    if install_raw is not None:
+        if not isinstance(install_raw, dict):
+            raise CatalogError(f"{path}: 'install' must be a mapping")
+        i_type = install_raw.get("type")
+        if i_type != "git":
+            raise CatalogError(f"{path}: install.type must be 'git' (got {i_type!r})")
+        url = install_raw.get("url") or ""
+        ref = install_raw.get("ref") or ""
+        if not url or not ref:
+            raise CatalogError(f"{path}: install.url and install.ref are required")
+        bootstrap = install_raw.get("bootstrap") or []
+        if not isinstance(bootstrap, list):
+            raise CatalogError(f"{path}: install.bootstrap must be a list")
+        install = InstallSpec(
+            type=i_type,
+            url=url,
+            ref=ref,
+            bootstrap=[str(c) for c in bootstrap],
+        )
+
+    return CatalogEntry(
+        name=name,
+        description=description,
+        source=source,
+        transport=transport,
+        auth=auth,
+        tools=tools_spec,
+        install=install,
+        post_install=str(data.get("post_install") or ""),
+        manifest_path=path,
+    )
+
+
+def list_catalog() -> List[CatalogEntry]:
+    """Return all valid catalog entries, sorted by name.
+
+    Invalid manifests are skipped silently (CI tests catch them at PR time).
+    Manifests with a future ``manifest_version`` are also skipped, but the
+    skip is surfaced via :func:`catalog_diagnostics` so the picker / catalog
+    UIs can tell the user their Hermes is out of date.
+    """
+    root = _catalog_root()
+    if not root.exists():
+        return []
+    entries: List[CatalogEntry] = []
+    _CATALOG_DIAGNOSTICS.clear()
+    for child in sorted(root.iterdir()):
+        manifest = child / "manifest.yaml"
+        if not manifest.is_file():
+            continue
+        try:
+            entries.append(_parse_manifest(manifest))
+        except CatalogError as exc:
+            msg = str(exc)
+            # Recognize the future-manifest error specifically so the UI can
+            # surface a more actionable nudge than "broken manifest".
+            if "manifest_version" in msg and "unsupported" in msg:
+                _CATALOG_DIAGNOSTICS.append((child.name, "future_manifest", msg))
+            else:
+                _CATALOG_DIAGNOSTICS.append((child.name, "invalid", msg))
+            continue
+    return entries
+
+
+# Populated by list_catalog(). Inspected by the picker / catalog UIs so the
+# user gets actionable feedback instead of a silently-shorter list.
+_CATALOG_DIAGNOSTICS: List[tuple] = []
+
+
+def catalog_diagnostics() -> List[tuple]:
+    """Diagnostics from the most recent :func:`list_catalog` call.
+
+    Returns a list of ``(entry_name, kind, message)`` tuples where ``kind``
+    is one of:
+      - ``future_manifest`` — manifest_version is newer than this Hermes
+        understands. Update Hermes to install this entry.
+      - ``invalid`` — manifest is malformed in some other way (caught by
+        CI for shipped manifests; user-modified manifests can hit this).
+    """
+    return list(_CATALOG_DIAGNOSTICS)
+
+
+def get_entry(name: str) -> Optional[CatalogEntry]:
+    """Look up a single entry by name. ``official/<name>`` prefix accepted."""
+    if name.startswith("official/"):
+        name = name[len("official/"):]
+    for entry in list_catalog():
+        if entry.name == name:
+            return entry
+    return None
+
+
+# ─── Status helpers ──────────────────────────────────────────────────────────
+
+
+def installed_servers() -> Dict[str, dict]:
+    """Return current ``mcp_servers`` block from config.yaml."""
+    cfg = load_config()
+    servers = cfg.get("mcp_servers") or {}
+    return servers if isinstance(servers, dict) else {}
+
+
+def is_installed(name: str) -> bool:
+    return name in installed_servers()
+
+
+def is_enabled(name: str) -> bool:
+    servers = installed_servers()
+    cfg = servers.get(name)
+    if not cfg:
+        return False
+    enabled = cfg.get("enabled", True)
+    if isinstance(enabled, str):
+        return enabled.lower() in {"true", "1", "yes"}
+    return bool(enabled)
+
+
+# ─── Install ─────────────────────────────────────────────────────────────────
+
+
+def _install_root() -> Path:
+    """Where git-bootstrapped MCPs are cloned. Per-user, profile-aware."""
+    root = get_hermes_home() / "mcp-installs"
+    root.mkdir(parents=True, exist_ok=True)
+    return root
+
+
+def _run_bootstrap(cwd: Path, commands: List[str]) -> None:
+    """Execute bootstrap commands in *cwd*. Raise CatalogError on first failure.
+
+    Each command runs through the shell (so `&&` etc. work). The output is
+    streamed to the user's terminal for visibility.
+    """
+    for cmd in commands:
+        print(color(f"  $ {cmd}", Colors.DIM))
+        proc = subprocess.run(cmd, cwd=str(cwd), shell=True)
+        if proc.returncode != 0:
+            raise CatalogError(
+                f"bootstrap step failed (exit {proc.returncode}): {cmd}"
+            )
+
+
+def _do_git_install(entry: CatalogEntry) -> Path:
+    """Clone the entry's repo into ``~/.hermes/mcp-installs/<name>`` and run
+    bootstrap commands. Returns the install directory."""
+    assert entry.install is not None and entry.install.type == "git"
+    install = entry.install
+    dest = _install_root() / entry.name
+
+    git = shutil.which("git")
+    if not git:
+        raise CatalogError("git is required to install this MCP but was not found on PATH")
+
+    if dest.exists():
+        # Fresh checkout each install — manifest version is the source of truth,
+        # so wipe + re-clone for determinism.
+        print(color(f"  Removing existing install at {dest}", Colors.DIM))
+        shutil.rmtree(dest)
+
+    print(color(f"  Cloning {install.url} ({install.ref}) → {dest}", Colors.CYAN))
+
+    # `git clone --branch` only accepts branches and tags, NOT commit SHAs.
+    # Detecting SHA-shaped refs upfront avoids a guaranteed stderr leak on
+    # the fast path (the --branch attempt would always fail noisily for a
+    # SHA ref before we fall back to full-clone-then-checkout).
+    is_sha_ref = bool(re.fullmatch(r"[0-9a-f]{7,40}", install.ref))
+
+    if not is_sha_ref:
+        proc = subprocess.run(
+            [git, "clone", "--depth", "1", "--branch", install.ref, install.url, str(dest)],
+        )
+        if proc.returncode == 0:
+            pass
+        else:
+            # Branch/tag form failed (unlikely for valid manifests; possible if
+            # the ref was deleted upstream). Fall through to the full-clone path.
+            if dest.exists():
+                shutil.rmtree(dest)
+            is_sha_ref = True  # treat the same as a SHA ref from here
+
+    if is_sha_ref:
+        proc = subprocess.run([git, "clone", install.url, str(dest)])
+        if proc.returncode != 0:
+            raise CatalogError(f"git clone failed for {install.url}")
+        proc = subprocess.run([git, "-C", str(dest), "checkout", install.ref])
+        if proc.returncode != 0:
+            raise CatalogError(f"git checkout {install.ref} failed")
+
+    if install.bootstrap:
+        _run_bootstrap(dest, install.bootstrap)
+
+    return dest
+
+
+def _expand_install_dir(value: str, install_dir: Optional[Path]) -> str:
+    if _INSTALL_DIR_VAR not in value:
+        return value
+    if install_dir is None:
+        raise CatalogError(
+            f"manifest references {_INSTALL_DIR_VAR} but no install block exists"
+        )
+    return value.replace(_INSTALL_DIR_VAR, str(install_dir))
+
+
+def _prompt_env_vars(specs: List[EnvVarSpec]) -> Dict[str, str]:
+    """Walk the env spec list, prompting the user for each. Writes secrets and
+    non-secrets alike to ~/.hermes/.env via save_env_value()."""
+    collected: Dict[str, str] = {}
+    for spec in specs:
+        existing = get_env_value(spec.name)
+        if existing:
+            print(color(f"  ✓ {spec.name} already set in .env", Colors.GREEN))
+            collected[spec.name] = existing
+            continue
+        value = _prompt_input(
+            spec.prompt,
+            default=spec.default or None,
+            password=spec.secret,
+        )
+        if not value:
+            if spec.required:
+                raise CatalogError(f"{spec.name} is required but no value was provided")
+            continue
+        save_env_value(spec.name, value)
+        collected[spec.name] = value
+    return collected
+
+
+def _build_server_config(
+    entry: CatalogEntry, install_dir: Optional[Path]
+) -> dict:
+    """Translate a manifest into the ``mcp_servers.<name>`` block format used
+    by hermes_cli/mcp_config.py."""
+    cfg: dict = {}
+    t = entry.transport
+    if t.type == "stdio":
+        cfg["command"] = _expand_install_dir(t.command or "", install_dir)
+        if t.args:
+            cfg["args"] = [_expand_install_dir(a, install_dir) for a in t.args]
+    elif t.type == "http":
+        cfg["url"] = t.url
+        if entry.auth.type == "oauth":
+            cfg["auth"] = "oauth"
+    return cfg
+
+
+def _read_prior_tool_selection(name: str) -> Optional[List[str]]:
+    """Return the user's prior `tools.include` for *name*, if any.
+
+    Used during reinstalls so the install-time checklist starts pre-checked
+    with whatever the user already had. Tools no longer on the server are
+    silently dropped at checklist-display time.
+    """
+    servers = installed_servers()
+    cfg = servers.get(name) or {}
+    tools_cfg = cfg.get("tools") or {}
+    if not isinstance(tools_cfg, dict):
+        return None
+    include = tools_cfg.get("include")
+    if isinstance(include, list) and all(isinstance(t, str) for t in include):
+        return list(include)
+    return None
+
+
+def _probe_tools(name: str) -> Optional[List[tuple]]:
+    """Connect to a freshly-configured MCP and list its tools.
+
+    Returns a list of ``(tool_name, description)`` tuples on success, or
+    ``None`` on any failure (server unreachable, OAuth not yet completed,
+    backing service offline, etc.). Failures are intentionally swallowed
+    here — the fallback path in :func:`_apply_tool_selection` handles them.
+    """
+    servers = installed_servers()
+    server_cfg = servers.get(name)
+    if not server_cfg:
+        return None
+    try:
+        # Import lazily so the catalog module stays cheap to load.
+        from hermes_cli.mcp_config import _probe_single_server
+
+        tools = _probe_single_server(name, server_cfg)
+        return list(tools) if tools is not None else []
+    except Exception as exc:
+        # Display the cause but never raise from the install path.
+        print(color(f"  Probe failed: {exc}", Colors.YELLOW))
+        return None
+
+
+def _write_tools_include(name: str, include: Optional[List[str]]) -> None:
+    """Persist or clear ``mcp_servers.<name>.tools.include``."""
+    cfg = load_config()
+    servers = cfg.setdefault("mcp_servers", {})
+    server_entry = servers.get(name) or {}
+    if include is None:
+        # No filter — drop any existing tools block.
+        server_entry.pop("tools", None)
+    else:
+        tools_block = server_entry.get("tools") or {}
+        if not isinstance(tools_block, dict):
+            tools_block = {}
+        tools_block["include"] = list(include)
+        tools_block.pop("exclude", None)
+        server_entry["tools"] = tools_block
+    servers[name] = server_entry
+    cfg["mcp_servers"] = servers
+    save_config(cfg)
+
+
+def _apply_tool_selection(
+    entry: CatalogEntry, *, prior_selection: Optional[List[str]]
+) -> None:
+    """Probe the server and let the user pick which tools to enable.
+
+    Probe-success path:
+      - Curses checklist of all probed tools.
+      - Pre-check uses (in priority order):
+          1. *prior_selection* (reinstall: preserve what the user had)
+          2. manifest's ``tools.default_enabled``
+          3. all tools (default)
+      - All-on selection clears any filter (no ``tools.include`` written).
+      - Sub-selection writes ``tools.include``.
+
+    Probe-fail path:
+      - If manifest declares ``tools.default_enabled`` → apply directly.
+      - Otherwise → leave config with no filter (all on when reachable).
+      - Either way, point the user at ``hermes mcp configure <name>``.
+    """
+    print()
+    print(color(f"  Probing '{entry.name}' for available tools...", Colors.CYAN))
+    probed = _probe_tools(entry.name)
+
+    # Probe failure path
+    if probed is None:
+        manifest_default = entry.tools.default_enabled
+        if manifest_default:
+            _write_tools_include(entry.name, manifest_default)
+            print(color(
+                f"  Couldn\'t probe server. Applied manifest default "
+                f"({len(manifest_default)} tools). "
+                f"Run `hermes mcp configure {entry.name}` after the server "
+                "is reachable to refine.",
+                Colors.YELLOW,
+            ))
+        else:
+            _write_tools_include(entry.name, None)
+            print(color(
+                f"  Couldn\'t probe server; installed with no tool filter "
+                "(all tools enabled when reachable). "
+                f"Run `hermes mcp configure {entry.name}` after first "
+                "connect to prune.",
+                Colors.YELLOW,
+            ))
+        return
+
+    if not probed:
+        # Probe succeeded but server reported zero tools. Nothing to filter.
+        _write_tools_include(entry.name, None)
+        print(color("  Server reported no tools.", Colors.YELLOW))
+        return
+
+    tool_names = [t[0] for t in probed]
+
+    # Build the pre-checked set in priority order
+    if prior_selection:
+        pre_set = {n for n in prior_selection if n in tool_names}
+    elif entry.tools.default_enabled:
+        pre_set = {n for n in entry.tools.default_enabled if n in tool_names}
+    else:
+        pre_set = set(tool_names)
+
+    pre_indices = {i for i, n in enumerate(tool_names) if n in pre_set}
+
+    # Non-TTY: skip the checklist. Priority matches the interactive
+    # pre-check priority: prior user selection > manifest default > all-on.
+    import sys as _sys
+    if not _sys.stdin.isatty():
+        if prior_selection is not None:
+            include = [n for n in prior_selection if n in tool_names]
+            _write_tools_include(entry.name, include)
+        elif entry.tools.default_enabled:
+            include = [n for n in entry.tools.default_enabled if n in tool_names]
+            _write_tools_include(entry.name, include)
+        else:
+            _write_tools_include(entry.name, None)
+        return
+
+    print(color(
+        f"  Found {len(probed)} tool(s). "
+        f"Pre-checked: {len(pre_indices)}.",
+        Colors.GREEN,
+    ))
+
+    from hermes_cli.curses_ui import curses_checklist
+
+    labels = [
+        f"{n}  —  {(d[:60] + '...') if len(d) > 60 else d}"
+        for n, d in probed
+    ]
+    chosen_indices = curses_checklist(
+        f"Select tools for '{entry.name}' (SPACE toggle, ENTER confirm)",
+        labels,
+        pre_indices,
+    )
+
+    if not chosen_indices:
+        # User unchecked everything; treat as "no tools" — write empty include
+        # so the server is installed but contributes nothing until reconfigured.
+        _write_tools_include(entry.name, [])
+        print(color(
+            f"  No tools selected. Run `hermes mcp configure {entry.name}` "
+            "to change.",
+            Colors.YELLOW,
+        ))
+        return
+
+    if len(chosen_indices) == len(probed):
+        # Everything selected — clear filter for the cleanest config shape.
+        # NOTE: this means any tools the server adds later (e.g. a future MCP
+        # version) will also be auto-enabled. To pin to the current set,
+        # the user can re-run `hermes mcp configure <name>` and unselect a
+        # tool to switch back to include-mode.
+        _write_tools_include(entry.name, None)
+        print(color(
+            f"  ✓ All {len(probed)} tools enabled (no filter — new tools "
+            "the server adds later will be auto-enabled).",
+            Colors.GREEN,
+        ))
+        return
+
+    chosen_names = [tool_names[i] for i in sorted(chosen_indices)]
+    _write_tools_include(entry.name, chosen_names)
+    print(color(
+        f"  ✓ {len(chosen_names)}/{len(probed)} tools enabled.",
+        Colors.GREEN,
+    ))
+
+
+def install_entry(entry: CatalogEntry, *, enable: bool = True) -> None:
+    """Install a catalog entry end-to-end.
+
+    Steps:
+        1. If ``install.type == git``, clone + run bootstrap commands.
+        2. If ``auth.type == api_key``, prompt for env vars, save to .env.
+        3. If ``auth.type == oauth`` (remote MCP / case 1), write the
+           ``auth: oauth`` marker (MCP client handles browser on first connect
+           in the non-pre-authenticated case).
+        4. Translate the manifest into an ``mcp_servers.<name>`` block and
+           save into config.yaml.
+        5. Probe the server, present a curses checklist for tool selection,
+           write ``tools.include`` (or no filter, depending on choice).
+           If probe fails, fall back to the manifest's
+           ``tools.default_enabled`` or all-on.
+        6. Print post_install notes.
+    """
+    print()
+    print(color(f"  Installing MCP '{entry.name}'", Colors.CYAN + Colors.BOLD))
+    if entry.description:
+        print(color(f"  {entry.description}", Colors.DIM))
+    if entry.source:
+        print(color(f"  Source: {entry.source}", Colors.DIM))
+    print()
+
+    install_dir: Optional[Path] = None
+    if entry.install is not None:
+        install_dir = _do_git_install(entry)
+
+    # Auth
+    if entry.auth.type == "api_key":
+        print()
+        print(color("  Configure credentials:", Colors.CYAN))
+        _prompt_env_vars(entry.auth.env)
+    elif entry.auth.type == "oauth":
+        if entry.auth.provider:
+            # Case 2: provider-mediated (Google, GitHub, etc.). We rely on
+            # the existing `hermes auth <provider>` flow. Surface guidance
+            # here rather than auto-running it — keeps the catalog install
+            # decoupled from provider-auth lifecycle.
+            print(color(
+                f"  This MCP uses {entry.auth.provider} OAuth. Run "
+                f"`hermes auth {entry.auth.provider}` if you have not "
+                "already authenticated.",
+                Colors.YELLOW,
+            ))
+        else:
+            print(color(
+                "  This MCP uses native OAuth 2.1; tokens will be acquired "
+                "on first connection (browser flow).",
+                Colors.DIM,
+            ))
+    # auth.type == "none": nothing to do.
+
+    # ── Preserve any prior user tool selection across reinstalls ────────
+    # Reading BEFORE we overwrite the entry below so a reinstall pre-checks
+    # whatever the user picked last time.
+    prior_selection = _read_prior_tool_selection(entry.name)
+
+    # Build and write the mcp_servers entry (without tools filter yet;
+    # _apply_tool_selection() finalizes it below).
+    server_cfg = _build_server_config(entry, install_dir)
+    server_cfg["enabled"] = enable
+
+    cfg = load_config()
+    cfg.setdefault("mcp_servers", {})[entry.name] = server_cfg
+    save_config(cfg)
+
+    # ── Probe + tool selection ──────────────────────────────────────────
+    _apply_tool_selection(entry, prior_selection=prior_selection)
+
+    print()
+    print(color(
+        f"  ✓ Installed '{entry.name}' "
+        f"({'enabled' if enable else 'disabled'}). "
+        f"Start a new Hermes session to load its tools.",
+        Colors.GREEN,
+    ))
+    if entry.post_install:
+        print()
+        for line in entry.post_install.strip().splitlines():
+            print(color(f"  {line}", Colors.DIM))
+    print()
+
+
+def uninstall_entry(name: str, *, purge_install_dir: bool = True) -> bool:
+    """Remove a catalog-installed MCP from config and (optionally) wipe its
+    clone directory. Returns True if anything was removed."""
+    cfg = load_config()
+    servers = cfg.get("mcp_servers") or {}
+    removed = False
+    if name in servers:
+        del servers[name]
+        if not servers:
+            cfg.pop("mcp_servers", None)
+        else:
+            cfg["mcp_servers"] = servers
+        save_config(cfg)
+        removed = True
+
+    if purge_install_dir:
+        clone = _install_root() / name
+        if clone.exists():
+            shutil.rmtree(clone)
+            removed = True
+
+    return removed
--- a/hermes_cli/mcp_config.py
+++ b/hermes_cli/mcp_config.py
@ -749,6 +749,24 @@ def mcp_command(args):
        run_mcp_server(verbose=getattr(args, "verbose", False))
        return

+    # Catalog subcommands live in mcp_picker / mcp_catalog. Import lazily so
+    # the original `mcp_config` module stays import-cheap.
+    if action == "picker":
+        from hermes_cli.mcp_picker import run_picker
+        run_picker()
+        return
+    if action == "catalog":
+        from hermes_cli.mcp_picker import show_catalog
+        show_catalog()
+        return
+    if action == "install":
+        from hermes_cli.mcp_picker import install_by_name
+        import sys as _sys
+        rc = install_by_name(getattr(args, "identifier", "") or "")
+        if rc:
+            _sys.exit(rc)
+        return
+
    handlers = {
        "add": cmd_mcp_add,
        "remove": cmd_mcp_remove,
@ -765,15 +783,20 @@ def mcp_command(args):
    if handler:
        handler(args)
    else:
-        # No subcommand — show list
-        cmd_mcp_list()
+        # No subcommand — drop the user into the catalog picker. This is the
+        # "try enabling and it flows you into setup" UX matching `hermes plugin`.
+        from hermes_cli.mcp_picker import run_picker
+        run_picker()
        print(color("  Commands:", Colors.CYAN))
+        _info("hermes mcp                                    Open the catalog picker (default)")
+        _info("hermes mcp catalog                            List Nous-approved MCPs")
+        _info("hermes mcp install <name>                     Install a catalog MCP")
        _info("hermes mcp serve                              Run as MCP server")
-        _info("hermes mcp add <name> --url <endpoint>        Add an MCP server")
+        _info("hermes mcp add <name> --url <endpoint>        Add a custom MCP server")
        _info("hermes mcp add <name> --command <cmd>         Add a stdio server")
        _info("hermes mcp add <name> --preset <preset>       Add from a known preset")
        _info("hermes mcp remove <name>                      Remove a server")
-        _info("hermes mcp list                               List servers")
+        _info("hermes mcp list                               List configured servers")
        _info("hermes mcp test <name>                        Test connection")
        _info("hermes mcp configure <name>                   Toggle tools")
        _info("hermes mcp login <name>                       Re-authenticate OAuth")
--- a/hermes_cli/mcp_picker.py
+++ b/hermes_cli/mcp_picker.py
@ -0,0 +1,322 @@
+"""MCP picker — interactive `hermes mcp picker` (also the default `hermes mcp`).
+
+Lists every catalog entry plus any custom MCP servers the user has added via
+``hermes mcp add``, lets them pick one, and routes to install / enable /
+disable / uninstall / configure-tools flows.
+
+Mirrors the `hermes plugin` picker UX: arrow keys to navigate, ENTER on a row
+to act on it. The action depends on current status:
+
+  not installed (catalog)   → install  (clone/bootstrap if needed, prompt for creds)
+  installed / disabled      → enable
+  installed / enabled       → submenu: configure tools / disable / uninstall / reinstall
+  custom (non-catalog)      → submenu: configure tools / enable / disable / remove
+
+The picker loops until the user hits ESC/q so they can manage multiple
+entries in one session.
+"""
+
+from __future__ import annotations
+
+import sys
+from dataclasses import dataclass
+from typing import List, Optional
+
+from hermes_cli.colors import Colors, color
+from hermes_cli.cli_output import prompt_yes_no
+from hermes_cli.curses_ui import curses_single_select
+from hermes_cli.mcp_catalog import (
+    CatalogEntry,
+    CatalogError,
+    catalog_diagnostics,
+    install_entry,
+    is_enabled,
+    is_installed,
+    list_catalog,
+    installed_servers,
+    uninstall_entry,
+)
+from hermes_cli.config import load_config, save_config
+
+
+# ─── Status badges ────────────────────────────────────────────────────────────
+
+_STATUS_NOT_INSTALLED = "available"
+_STATUS_DISABLED = "installed (disabled)"
+_STATUS_ENABLED = "enabled"
+_STATUS_CUSTOM_ENABLED = "custom — enabled"
+_STATUS_CUSTOM_DISABLED = "custom — disabled"
+
+
+# ─── Row model — unifies catalog and custom entries ──────────────────────────
+
+
+@dataclass
+class _Row:
+    """A row in the picker. ``entry`` is set for catalog rows; for custom
+    user-added MCPs only ``name`` + ``description`` + status are populated."""
+
+    name: str
+    description: str
+    status: str
+    entry: Optional[CatalogEntry] = None  # None for non-catalog (custom) rows
+
+    @property
+    def is_custom(self) -> bool:
+        return self.entry is None
+
+
+def _build_rows() -> List[_Row]:
+    """Return catalog rows + any custom (non-catalog) MCPs found in config."""
+    catalog_entries = list_catalog()
+    catalog_names = {e.name for e in catalog_entries}
+
+    rows: List[_Row] = []
+    for entry in catalog_entries:
+        if not is_installed(entry.name):
+            status = _STATUS_NOT_INSTALLED
+        elif is_enabled(entry.name):
+            status = _STATUS_ENABLED
+        else:
+            status = _STATUS_DISABLED
+        rows.append(
+            _Row(
+                name=entry.name,
+                description=entry.description,
+                status=status,
+                entry=entry,
+            )
+        )
+
+    # Custom MCPs the user added directly (not in the catalog)
+    for name, cfg in sorted(installed_servers().items()):
+        if name in catalog_names:
+            continue
+        enabled = cfg.get("enabled", True)
+        if isinstance(enabled, str):
+            enabled = enabled.lower() in {"true", "1", "yes"}
+        status = _STATUS_CUSTOM_ENABLED if enabled else _STATUS_CUSTOM_DISABLED
+        # Use the transport URL/command as the "description" for custom rows
+        desc = cfg.get("url") or cfg.get("command") or "(no transport)"
+        rows.append(_Row(name=name, description=str(desc), status=status))
+
+    return rows
+
+
+def _format_row(row: _Row) -> str:
+    return f"{row.name:<18} {row.status:<24} {row.description}"
+
+
+# ─── Actions ──────────────────────────────────────────────────────────────────
+
+
+def _enable_disable(name: str, *, enable: bool) -> None:
+    cfg = load_config()
+    servers = cfg.get("mcp_servers") or {}
+    server = servers.get(name)
+    if not server:
+        print(color(f"  '{name}' is not installed.", Colors.RED))
+        return
+    server["enabled"] = enable
+    cfg["mcp_servers"] = servers
+    save_config(cfg)
+    print(color(
+        f"  ✓ '{name}' {'enabled' if enable else 'disabled'}. "
+        "Start a new Hermes session for changes to take effect.",
+        Colors.GREEN,
+    ))
+
+
+def _configure_tools(name: str) -> None:
+    """Open the tool selection checklist for an already-installed MCP.
+
+    Delegates to the existing ``cmd_mcp_configure`` flow which probes the
+    server, displays a checklist, and writes ``tools.include``.
+    """
+    import argparse
+    from hermes_cli.mcp_config import cmd_mcp_configure
+
+    cmd_mcp_configure(argparse.Namespace(name=name))
+
+
+def _remove_custom(name: str) -> None:
+    """Remove a non-catalog MCP entry from config.yaml."""
+    cfg = load_config()
+    servers = cfg.get("mcp_servers") or {}
+    if name not in servers:
+        print(color(f"  '{name}' is not configured.", Colors.RED))
+        return
+    if not prompt_yes_no(f"Remove '{name}' from mcp_servers?", default=False):
+        return
+    del servers[name]
+    if not servers:
+        cfg.pop("mcp_servers", None)
+    else:
+        cfg["mcp_servers"] = servers
+    save_config(cfg)
+    print(color(f"  ✓ Removed '{name}'", Colors.GREEN))
+
+
+def _handle_row(row: _Row) -> None:
+    """Act on the picked row based on its current status."""
+    # === Catalog row, not yet installed ===
+    if row.entry and not is_installed(row.name):
+        try:
+            install_entry(row.entry, enable=True)
+        except CatalogError as exc:
+            print(color(f"  ✗ install failed: {exc}", Colors.RED))
+        return
+
+    # === Catalog row, installed but disabled ===
+    if row.entry and not is_enabled(row.name):
+        _enable_disable(row.name, enable=True)
+        return
+
+    # === Catalog row, installed + enabled OR custom row ===
+    if row.is_custom:
+        # Custom (non-catalog) row submenu
+        actions = [
+            "Configure tools (probe server + re-pick)",
+            "Enable" if not is_enabled(row.name) else "Disable",
+            "Remove from config",
+        ]
+        choice = curses_single_select(f"Action for '{row.name}' (custom)", actions)
+        if choice is None:
+            return
+        if choice == 0:
+            _configure_tools(row.name)
+        elif choice == 1:
+            _enable_disable(row.name, enable=not is_enabled(row.name))
+        elif choice == 2:
+            _remove_custom(row.name)
+        return
+
+    # Catalog row, installed + enabled
+    print()
+    print(color(f"  '{row.name}' is already enabled.", Colors.DIM))
+    actions = [
+        "Configure tools (probe server + re-pick)",
+        "Disable (keep config, stop loading on next session)",
+        "Uninstall (remove config and any cloned files)",
+        "Reinstall (re-clone, re-prompt for credentials)",
+    ]
+    choice = curses_single_select(f"Action for '{row.name}'", actions)
+    if choice is None:
+        return
+    if choice == 0:
+        _configure_tools(row.name)
+    elif choice == 1:
+        _enable_disable(row.name, enable=False)
+    elif choice == 2:
+        if prompt_yes_no(f"Uninstall '{row.name}'?", default=False):
+            if uninstall_entry(row.name):
+                print(color(
+                    f"  ✓ Uninstalled '{row.name}'. "
+                    "Credentials in .env preserved — delete manually if no longer needed.",
+                    Colors.GREEN,
+                ))
+            else:
+                print(color(f"  '{row.name}' was not installed", Colors.DIM))
+    elif choice == 3:
+        try:
+            assert row.entry is not None
+            install_entry(row.entry, enable=True)
+        except CatalogError as exc:
+            print(color(f"  ✗ reinstall failed: {exc}", Colors.RED))
+
+
+# ─── Output / entry points ────────────────────────────────────────────────────
+
+
+def _print_rows_text(rows: List[_Row]) -> None:
+    """Plain-text catalog dump used as a fallback when curses can't run, and
+    as the default output of `hermes mcp catalog`."""
+    if not rows:
+        print()
+        print(color("  No MCPs in the catalog or configured.", Colors.DIM))
+        print()
+        return
+
+    print()
+    print(color("  MCP Catalog + configured servers:", Colors.CYAN + Colors.BOLD))
+    print()
+    print(f"  {'Name':<18} {'Status':<24} Description")
+    print(f"  {'-' * 18} {'-' * 24} {'-' * 11}")
+    for row in rows:
+        print(f"  {_format_row(row)}")
+    print()
+    print(color(
+        "  Install: hermes mcp install <name>    Picker: hermes mcp",
+        Colors.DIM,
+    ))
+
+    # Surface manifest-version warnings so users know when their Hermes is
+    # too old to install everything in the catalog.
+    diags = catalog_diagnostics()
+    future = [d for d in diags if d[1] == "future_manifest"]
+    if future:
+        print()
+        for name, _, msg in future:
+            print(color(
+                f"  ⚠ '{name}' requires a newer Hermes — run `hermes update` "
+                "to install this entry.",
+                Colors.YELLOW,
+            ))
+        print()
+    print()
+
+
+def show_catalog() -> None:
+    """`hermes mcp catalog` — print the curated list + custom servers, no interaction."""
+    _print_rows_text(_build_rows())
+
+
+def run_picker() -> None:
+    """`hermes mcp picker` (and default `hermes mcp`) — interactive selector.
+
+    Loops until the user hits ESC/q. After each action the picker re-renders
+    so the user can manage several entries in one session.
+    """
+    if not sys.stdin.isatty():
+        # Non-interactive shell: degrade to the text dump rather than failing.
+        _print_rows_text(_build_rows())
+        return
+
+    while True:
+        rows = _build_rows()
+        if not rows:
+            _print_rows_text(rows)
+            return
+
+        labels = [_format_row(r) for r in rows]
+        idx = curses_single_select(
+            "MCP Catalog  —  ↑↓ navigate  ENTER act on entry  ESC/q quit",
+            labels,
+        )
+        if idx is None:
+            return
+        _handle_row(rows[idx])
+
+
+def install_by_name(identifier: str) -> int:
+    """`hermes mcp install <name>` — non-interactive entry-point.
+
+    Returns 0 on success, non-zero on failure (so the CLI can propagate
+    exit codes).
+    """
+    from hermes_cli.mcp_catalog import get_entry
+
+    entry = get_entry(identifier)
+    if entry is None:
+        print(color(
+            f"  ✗ '{identifier}' is not in the catalog. "
+            "Run `hermes mcp catalog` to see available entries.",
+            Colors.RED,
+        ))
+        return 1
+    try:
+        install_entry(entry, enable=True)
+    except CatalogError as exc:
+        print(color(f"  ✗ install failed: {exc}", Colors.RED))
+        return 1
+    return 0
--- a/hermes_cli/memory_setup.py
+++ b/hermes_cli/memory_setup.py
@ -7,13 +7,13 @@ the provider's config schema. Writes config to config.yaml + .env.

 from __future__ import annotations

-import getpass
 import os
 import sys
 import shlex
 from pathlib import Path

 from hermes_constants import get_hermes_home
+from hermes_cli.secret_prompt import masked_secret_prompt


 # ---------------------------------------------------------------------------
@ -39,12 +39,7 @@ def _prompt(label: str, default: str | None = None, secret: bool = False) -> str
    """Prompt for a value with optional default and secret masking."""
    suffix = f" [{default}]" if default else ""
    if secret:
-        sys.stdout.write(f"  {label}{suffix}: ")
-        sys.stdout.flush()
-        if sys.stdin.isatty():
-            val = getpass.getpass(prompt="")
-        else:
-            val = sys.stdin.readline().strip()
+        val = masked_secret_prompt(f"  {label}{suffix}: ")
    else:
        sys.stdout.write(f"  {label}{suffix}: ")
        sys.stdout.flush()
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -37,7 +37,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-sonnet-4.6",            ""),
    ("moonshotai/kimi-k2.6",                   "recommended"),
    ("openrouter/pareto-code",                 "auto-routes to cheapest coder meeting openrouter.min_coding_score"),
-    ("qwen/qwen3.6-plus",                      ""),
+    ("qwen/qwen3.7-max",                       ""),
    ("anthropic/claude-haiku-4.5",             ""),
    ("openai/gpt-5.5",                         ""),
    ("openai/gpt-5.5-pro",                     ""),
@ -166,7 +166,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
        "moonshotai/kimi-k2.6",
-        "qwen/qwen3.6-plus",
+        "qwen/qwen3.7-max",
        "anthropic/claude-haiku-4.5",
        "openai/gpt-5.5",
        "openai/gpt-5.5-pro",
@ -199,6 +199,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "gpt-4o",
        "gpt-4o-mini",
    ],
+    "openai-api": [
+        "gpt-5.5",
+        "gpt-5.5-pro",
+        "gpt-5.4",
+        "gpt-5.4-mini",
+        "gpt-5.4-nano",
+        "gpt-5-mini",
+        "gpt-5.3-codex",
+        "gpt-4.1",
+        "gpt-4o",
+        "gpt-4o-mini",
+    ],
    "openai-codex": _codex_curated_models(),
    "xai-oauth": _xai_curated_models(),
    "copilot-acp": [
@ -387,6 +399,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "mimo-v2-omni",
        "minimax-m2.7",
        "minimax-m2.5",
+        "qwen3.7-max",
        "qwen3.6-plus",
        "qwen3.5-plus",
    ],
@ -928,8 +941,9 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("lmstudio",       "LM Studio",                "LM Studio (local desktop app with built-in model server)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
+    ProviderEntry("openai-api",     "OpenAI API",               "OpenAI API (api.openai.com, API key)"),
    ProviderEntry("alibaba",        "Qwen Cloud",               "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"),
-    ProviderEntry("xai-oauth",      "xAI Grok OAuth (SuperGrok Subscription)", "xAI Grok OAuth (SuperGrok Subscription)"),
+    ProviderEntry("xai-oauth",      "xAI Grok OAuth (SuperGrok / Premium+)", "xAI Grok OAuth (SuperGrok / Premium+)"),
    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
    ProviderEntry("tencent-tokenhub", "Tencent TokenHub",       "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
@ -2229,7 +2243,7 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
        live = fetch_ollama_cloud_models(force_refresh=force_refresh)
        if live:
            return live
-    if normalized == "openai":
+    if normalized in ("openai", "openai-api"):
        api_key = os.getenv("OPENAI_API_KEY", "").strip()
        if api_key:
            base_raw = os.getenv("OPENAI_BASE_URL", "").strip().rstrip("/")
@ -3002,6 +3016,8 @@ def opencode_model_api_mode(provider_id: Optional[str], model_id: Optional[str])
    if provider == "opencode-go":
        if normalized.startswith("minimax-"):
            return "anthropic_messages"
+        if normalized.startswith("qwen3.7-max"):
+            return "anthropic_messages"
        return "chat_completions"

    if provider == "opencode-zen":
@ -3491,7 +3507,7 @@ def validate_requested_model(
            suggestion_text = ""
            if suggestions:
                suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
-            provider_label = "OpenAI Codex" if normalized == "openai-codex" else "xAI Grok OAuth (SuperGrok Subscription)"
+            provider_label = "OpenAI Codex" if normalized == "openai-codex" else "xAI Grok OAuth (SuperGrok / Premium+)"
            return {
                "accepted": True,
                "persist": True,
--- a/hermes_cli/oneshot.py
+++ b/hermes_cli/oneshot.py
@ -17,7 +17,6 @@ Model / provider selection mirrors `hermes chat`:

 Env var fallbacks (used when the corresponding arg is not passed):
    - HERMES_INFERENCE_MODEL
-    - HERMES_INFERENCE_PROVIDER  (already read by resolve_runtime_provider)
 """

 from __future__ import annotations
@ -28,6 +27,8 @@ import sys
 from contextlib import redirect_stderr, redirect_stdout
 from typing import Optional

+from hermes_cli.fallback_config import get_fallback_chain
+

 def _normalize_toolsets(toolsets: object = None) -> list[str] | None:
    if not toolsets:
@ -133,9 +134,8 @@ def run_oneshot(
        prompt: The user message to send.
        model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
            env var, then config.yaml's model.default / model.model.
-        provider: Optional provider override. Falls back to
-            HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
-            then "auto".
+        provider: Optional provider override. Falls back to config.yaml's
+            model.provider, then "auto".
        toolsets: Optional comma-separated string or iterable of toolsets.

    Returns the exit code.  Caller should sys.exit() with the return.
@ -301,14 +301,9 @@ def _run_agent(
        toolsets_list = sorted(_get_platform_tools(cfg, "cli"))

    session_db = _create_session_db_for_oneshot()
-    # Read fallback chain from profile config — supports both the new list
-    # format (fallback_providers) and the legacy single-dict (fallback_model).
-    # Mirrors the same normalization in cli.py so oneshot workers (e.g. kanban
-    # workers spawned via `hermes -p <profile> chat -q ...`) honour the
-    # profile's fallback chain just like interactive sessions do.
-    _fb = cfg.get("fallback_providers") or cfg.get("fallback_model") or []
-    if isinstance(_fb, dict):
-        _fb = [_fb] if _fb.get("provider") and _fb.get("model") else []
+    # Read the effective fallback chain from profile config so oneshot workers
+    # honour the same merge semantics as interactive CLI and gateway sessions.
+    _fb = get_fallback_chain(cfg)

    agent = AIAgent(
        api_key=runtime.get("api_key"),
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@ -640,6 +640,88 @@ class PluginContext:
            self.manifest.name, provider.name,
        )

+    # -- TTS provider registration -------------------------------------------
+
+    def register_tts_provider(self, provider) -> None:
+        """Register a text-to-speech backend.
+
+        ``provider`` must be an instance of
+        :class:`agent.tts_provider.TTSProvider`. The ``provider.name``
+        attribute is what ``tts.provider`` in ``config.yaml`` matches
+        against when routing ``text_to_speech`` tool calls — **but
+        only when**:
+
+        1. ``provider.name`` is NOT a built-in TTS provider name
+           (``edge``, ``openai``, ``elevenlabs``, …). Built-ins always
+           win — the registry rejects shadowing names with a warning.
+        2. There is NO ``tts.providers.<name>: type: command`` entry
+           with the same name. Command-providers (PR #17843) win on
+           name collision because config is more local than plugin
+           install.
+
+        Coexists with the command-provider registry rather than
+        replacing it — see issue #30398 for the full design rationale.
+        """
+        from agent.tts_provider import TTSProvider
+        from agent.tts_registry import register_provider as _register_tts_provider
+
+        if not isinstance(provider, TTSProvider):
+            logger.warning(
+                "Plugin '%s' tried to register a TTS provider that does "
+                "not inherit from TTSProvider. Ignoring.",
+                self.manifest.name,
+            )
+            return
+        _register_tts_provider(provider)
+        logger.info(
+            "Plugin '%s' registered TTS provider: %s",
+            self.manifest.name, provider.name,
+        )
+
+    # -- transcription (STT) provider registration ---------------------------
+
+    def register_transcription_provider(self, provider) -> None:
+        """Register a speech-to-text backend.
+
+        ``provider`` must be an instance of
+        :class:`agent.transcription_provider.TranscriptionProvider`.
+        The ``provider.name`` attribute is what ``stt.provider`` in
+        ``config.yaml`` matches against when routing
+        :func:`tools.transcription_tools.transcribe_audio` calls —
+        **but only when**:
+
+        1. ``provider.name`` is NOT a built-in STT provider name
+           (``local``, ``local_command``, ``groq``, ``openai``,
+           ``mistral``, ``xai``). Built-ins always win — the registry
+           rejects shadowing names with a warning.
+        2. There is NO ``stt.providers.<name>: type: command`` entry
+           with the same name. Command-providers win on name
+           collision because config is more local than plugin install
+           — same precedence rule as TTS.
+
+        Coexists with the in-tree dispatcher and the STT
+        command-provider registry rather than replacing them. The 6
+        built-in STT backends keep their native implementations in
+        ``tools/transcription_tools.py``; this hook is for *new* Python
+        engines (OpenRouter, SenseAudio, Gemini-STT, custom proprietary
+        backends).
+        """
+        from agent.transcription_provider import TranscriptionProvider
+        from agent.transcription_registry import register_provider as _register_stt_provider
+
+        if not isinstance(provider, TranscriptionProvider):
+            logger.warning(
+                "Plugin '%s' tried to register a transcription provider that "
+                "does not inherit from TranscriptionProvider. Ignoring.",
+                self.manifest.name,
+            )
+            return
+        _register_stt_provider(provider)
+        logger.info(
+            "Plugin '%s' registered transcription provider: %s",
+            self.manifest.name, provider.name,
+        )
+
    # -- platform adapter registration ---------------------------------------

    def register_platform(
@ -698,6 +780,119 @@ class PluginContext:

    # -- hook registration --------------------------------------------------

+    # -- auxiliary task registration ---------------------------------------
+
+    def register_auxiliary_task(
+        self,
+        key: str,
+        *,
+        display_name: str,
+        description: str,
+        defaults: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """Register a plugin-defined auxiliary LLM task.
+
+        Auxiliary tasks are LLM-backed side jobs (vision analysis, web extraction,
+        compression, smart-approval, etc.) that route through ``auxiliary_client.py``.
+        Each task has its own ``auxiliary.<key>`` config block where users can
+        pin a provider/model independent of the main chat model.
+
+        Plugins use this to declare their own auxiliary tasks without touching
+        core files. After registration, the task:
+
+          - Appears in the ``hermes model → Configure auxiliary models`` picker
+          - Has its provider/model/base_url/api_key bridged from config.yaml to
+            ``AUXILIARY_<KEY_UPPER>_*`` env vars at gateway startup
+          - Gets default routing fields (provider="auto", model="", etc.) merged
+            into loaded configs so ``cfg.get("auxiliary", {}).get(key)`` works
+
+        Args:
+            key: stable task key (snake_case). Used in config ``auxiliary.<key>``
+                and env vars ``AUXILIARY_<KEY_UPPER>_*``. Must not shadow a
+                built-in task key (vision, compression, web_extract, approval,
+                mcp, title_generation, skills_hub, curator).
+            display_name: human-readable name shown in the picker.
+            description: short one-line description shown next to the name.
+            defaults: optional dict of default routing fields. Recognized keys:
+                ``provider`` (default "auto"), ``model`` (default ""),
+                ``base_url`` (default ""), ``api_key`` (default ""),
+                ``timeout`` (default 60), ``extra_body`` (default {}),
+                plus any task-specific extras (e.g. ``download_timeout``).
+                Unknown keys are preserved verbatim — the plugin owns the
+                schema for its own task.
+
+        Raises:
+            ValueError: if *key* is empty, contains invalid characters, or
+                shadows a built-in auxiliary task key.
+
+        Example:
+            ctx.register_auxiliary_task(
+                key="memory_retain_filter",
+                display_name="Memory retain filter",
+                description="hindsight pre-retain dedup/extract",
+                defaults={"provider": "auto", "timeout": 30},
+            )
+        """
+        # Validate key shape
+        if not key or not isinstance(key, str):
+            raise ValueError(
+                f"Plugin '{self.manifest.name}' tried to register auxiliary task "
+                f"with invalid key {key!r}"
+            )
+        if not all(c.isalnum() or c == "_" for c in key):
+            raise ValueError(
+                f"Plugin '{self.manifest.name}' auxiliary task key {key!r} "
+                f"must contain only alphanumeric characters and underscores"
+            )
+
+        # Lazy import to avoid circular: hermes_cli.main imports plugins indirectly
+        from hermes_cli.main import _AUX_TASKS as _BUILTIN_AUX_TASKS
+
+        builtin_keys = {k for k, _name, _desc in _BUILTIN_AUX_TASKS}
+        if key in builtin_keys:
+            raise ValueError(
+                f"Plugin '{self.manifest.name}' cannot register auxiliary task "
+                f"{key!r} — that key is reserved for a built-in task. "
+                f"Pick a plugin-namespaced key (e.g. '{self.manifest.name}_{key}')."
+            )
+
+        # Reject duplicate registrations across plugins
+        existing = self._manager._aux_tasks.get(key)
+        if existing is not None and existing.get("plugin") != self.manifest.name:
+            raise ValueError(
+                f"Plugin '{self.manifest.name}' cannot register auxiliary task "
+                f"{key!r} — already registered by plugin "
+                f"'{existing.get('plugin')}'"
+            )
+
+        # Normalize defaults — plugin owns the schema, but we ensure routing
+        # fields exist with sensible types so consumers don't crash.
+        merged_defaults: Dict[str, Any] = {
+            "provider": "auto",
+            "model": "",
+            "base_url": "",
+            "api_key": "",
+            "timeout": 60,
+            "extra_body": {},
+        }
+        if defaults:
+            for k, v in defaults.items():
+                merged_defaults[k] = v
+
+        self._manager._aux_tasks[key] = {
+            "key": key,
+            "display_name": display_name,
+            "description": description,
+            "defaults": merged_defaults,
+            "plugin": self.manifest.name,
+        }
+        logger.debug(
+            "Plugin %s registered auxiliary task: %s (%s)",
+            self.manifest.name,
+            key,
+            display_name,
+        )
+
    def register_hook(self, hook_name: str, callback: Callable) -> None:
        """Register a lifecycle hook callback.

@ -782,6 +977,9 @@ class PluginManager:
        self._cli_ref = None  # Set by CLI after plugin discovery
        # Plugin skill registry: qualified name → metadata dict.
        self._plugin_skills: Dict[str, Dict[str, Any]] = {}
+        # Plugin-registered auxiliary tasks: key → {key, display_name,
+        # description, defaults, plugin}. See PluginContext.register_auxiliary_task.
+        self._aux_tasks: Dict[str, Dict[str, Any]] = {}

    # -----------------------------------------------------------------------
    # Public
@ -803,6 +1001,7 @@ class PluginManager:
            self._cli_commands.clear()
            self._plugin_commands.clear()
            self._plugin_skills.clear()
+            self._aux_tasks.clear()
            self._context_engine = None
        self._discovered = True

@ -1548,6 +1747,21 @@ def get_plugin_commands() -> Dict[str, dict]:
    return _ensure_plugins_discovered()._plugin_commands


+def get_plugin_auxiliary_tasks() -> List[Dict[str, Any]]:
+    """Return all plugin-registered auxiliary tasks as a stable-ordered list.
+
+    Each entry is the registration dict from
+    :meth:`PluginContext.register_auxiliary_task`:
+    ``{key, display_name, description, defaults, plugin}``.
+
+    Triggers idempotent plugin discovery so callers can read the registry
+    before any explicit ``discover_plugins()`` call. Sorted by ``key`` for
+    deterministic ordering in pickers and tests.
+    """
+    manager = _ensure_plugins_discovered()
+    return [manager._aux_tasks[k] for k in sorted(manager._aux_tasks)]
+
+
 def get_plugin_toolsets() -> List[tuple]:
    """Return plugin toolsets as ``(key, label, description)`` tuples.

--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@ -20,6 +20,7 @@ from typing import Any, Optional

 from hermes_constants import get_hermes_home
 from hermes_cli.config import cfg_get
+from hermes_cli.secret_prompt import masked_secret_prompt

 logger = logging.getLogger(__name__)

@ -76,22 +77,42 @@ def _plugins_dir() -> Path:
    return plugins


-def _sanitize_plugin_name(name: str, plugins_dir: Path) -> Path:
+def _sanitize_plugin_name(
+    name: str,
+    plugins_dir: Path,
+    *,
+    allow_subdir: bool = False,
+) -> Path:
    """Validate a plugin name and return the safe target path inside *plugins_dir*.

    Raises ``ValueError`` if the name contains path-traversal sequences or would
    resolve outside the plugins directory.
+
+    ``allow_subdir=True`` permits a single forward slash inside *name* so
+    category-namespaced plugin keys like ``observability/langfuse`` or
+    ``image_gen/openai`` (the registry keys emitted by ``_discover_all_plugins``)
+    can be looked up. ``..`` and backslash are still rejected, leading and
+    trailing slashes are stripped, and the resolved target must still live
+    inside *plugins_dir*. Install paths leave this at the default ``False``
+    because a freshly-cloned plugin always lands top-level under
+    ``~/.hermes/plugins/<name>/``.
    """
    if not name:
        raise ValueError("Plugin name must not be empty.")

+    if allow_subdir:
+        name = name.strip("/")
+        if not name:
+            raise ValueError("Plugin name must not be empty.")
+
    if name in {".", ".."}:
        raise ValueError(
            f"Invalid plugin name '{name}': must not reference the plugins directory itself."
        )

    # Reject obvious traversal characters
-    for bad in ("/", "\\", ".."):
+    bad_chars = ("\\", "..") if allow_subdir else ("/", "\\", "..")
+    for bad in bad_chars:
        if bad in name:
            raise ValueError(f"Invalid plugin name '{name}': must not contain '{bad}'.")

@ -267,8 +288,7 @@ def _prompt_plugin_env_vars(manifest: dict, console) -> None:

        try:
            if secret:
-                import getpass
-                value = getpass.getpass(f"  {name}: ").strip()
+                value = masked_secret_prompt(f"  {name}: ").strip()
            else:
                value = input(f"  {name}: ").strip()
        except (EOFError, KeyboardInterrupt):
@ -326,7 +346,7 @@ def _display_removed(name: str, plugins_dir: Path) -> None:

 def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path:
    """Return the plugin path if it exists, or exit with an error listing installed plugins."""
-    target = _sanitize_plugin_name(name, plugins_dir)
+    target = _sanitize_plugin_name(name, plugins_dir, allow_subdir=True)
    if not target.exists():
        installed = ", ".join(d.name for d in plugins_dir.iterdir() if d.is_dir()) or "(none)"
        console.print(
@ -1051,7 +1071,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
            curses.init_pair(1, curses.COLOR_GREEN, -1)
            curses.init_pair(2, curses.COLOR_YELLOW, -1)
            curses.init_pair(3, curses.COLOR_CYAN, -1)
-            curses.init_pair(4, 8, -1)  # dim gray
+            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim gray
        cursor = 0
        scroll_offset = 0

@ -1196,7 +1216,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                            curses.init_pair(1, curses.COLOR_GREEN, -1)
                            curses.init_pair(2, curses.COLOR_YELLOW, -1)
                            curses.init_pair(3, curses.COLOR_CYAN, -1)
-                            curses.init_pair(4, 8, -1)
+                            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
                        curses.curs_set(0)
            elif key in {curses.KEY_ENTER, 10, 13}:
                if cursor < n_plugins:
@ -1228,7 +1248,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                            curses.init_pair(1, curses.COLOR_GREEN, -1)
                            curses.init_pair(2, curses.COLOR_YELLOW, -1)
                            curses.init_pair(3, curses.COLOR_CYAN, -1)
-                            curses.init_pair(4, 8, -1)
+                            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
                        curses.curs_set(0)
            elif key in {27, ord("q")}:
                # Save plugin changes on exit
@ -1508,7 +1528,7 @@ def _user_installed_plugin_dir(name: str) -> Optional[Path]:
    """Resolved path under ``~/.hermes/plugins/<name>`` if it exists."""
    plugins_dir = _plugins_dir()
    try:
-        target = _sanitize_plugin_name(name, plugins_dir)
+        target = _sanitize_plugin_name(name, plugins_dir, allow_subdir=True)
    except ValueError:
        return None
    return target if target.is_dir() else None
--- a/hermes_cli/portal_cli.py
+++ b/hermes_cli/portal_cli.py
@ -0,0 +1,219 @@
+"""``hermes portal`` — small CLI surface for Nous Portal users.
+
+Subcommands:
+  status   Show Portal auth state + which Tool Gateway tools are routed.
+  open     Open the Portal subscription page in the user's default browser.
+  tools    List Tool Gateway tools and which are active in the current config.
+
+This command is intentionally minimal — it does not duplicate functionality
+already in ``hermes auth`` or ``hermes tools``. It's a discovery + status
+surface for the Portal subscription itself.
+"""
+from __future__ import annotations
+
+import sys
+import webbrowser
+from typing import Optional
+
+from hermes_cli.colors import Colors, color
+from hermes_cli.config import load_config
+
+DEFAULT_PORTAL_URL = "https://portal.nousresearch.com"
+SUBSCRIPTION_URL = "https://portal.nousresearch.com/manage-subscription"
+DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway"
+
+
+def _nous_portal_base_url() -> str:
+    """Resolve the Portal base URL from auth state or default."""
+    try:
+        from hermes_cli.auth import get_nous_auth_status
+        status = get_nous_auth_status() or {}
+        url = status.get("portal_base_url")
+        if isinstance(url, str) and url.strip():
+            return url.rstrip("/")
+    except Exception:
+        pass
+    return DEFAULT_PORTAL_URL
+
+
+def _cmd_status(args) -> int:
+    """Show Portal auth + Tool Gateway routing summary."""
+    from hermes_cli.auth import get_nous_auth_status
+    from hermes_cli.nous_subscription import get_nous_subscription_features
+
+    config = load_config() or {}
+
+    try:
+        auth = get_nous_auth_status() or {}
+    except Exception:
+        auth = {}
+
+    logged_in = bool(auth.get("logged_in"))
+
+    print()
+    print(color("  Nous Portal", Colors.MAGENTA))
+    print(color("  ───────────", Colors.MAGENTA))
+    if logged_in:
+        portal = auth.get("portal_base_url") or DEFAULT_PORTAL_URL
+        print(f"  Auth:    {color('✓ logged in', Colors.GREEN)}")
+        print(f"  Portal:  {portal}")
+        inference = auth.get("inference_base_url")
+        if inference:
+            print(f"  API:     {inference}")
+    else:
+        print(f"  Auth:    {color('not logged in', Colors.YELLOW)}")
+        print(f"  Sign up: {SUBSCRIPTION_URL}")
+        print(f"  Login:   hermes auth add nous --type oauth")
+
+    # Provider selection (independent of auth)
+    model_cfg = config.get("model") if isinstance(config.get("model"), dict) else {}
+    provider = str(model_cfg.get("provider") or "").strip().lower()
+    if provider == "nous":
+        print(f"  Model:   {color('✓ using Nous as inference provider', Colors.GREEN)}")
+    elif provider:
+        print(f"  Model:   currently {provider} (switch with `hermes model`)")
+
+    # Tool Gateway routing
+    print()
+    print(color("  Tool Gateway", Colors.MAGENTA))
+    print(color("  ────────────", Colors.MAGENTA))
+    try:
+        features = get_nous_subscription_features(config)
+    except Exception:
+        features = None
+
+    if features is None:
+        print("  (could not resolve subscription state)")
+        return 0
+
+    rows = []
+    for feat in features.items():
+        if feat.managed_by_nous:
+            state = color("via Nous Portal", Colors.GREEN)
+        elif feat.active and feat.current_provider:
+            state = feat.current_provider
+        elif feat.active:
+            state = "active"
+        else:
+            state = color("not configured", Colors.DIM)
+        rows.append((feat.label, state))
+
+    width = max((len(r[0]) for r in rows), default=0)
+    for label, state in rows:
+        print(f"  {label:<{width}}   {state}")
+
+    if not logged_in:
+        print()
+        print(color(f"  Docs: {DOCS_URL}", Colors.DIM))
+    return 0
+
+
+def _cmd_open(args) -> int:
+    """Open the Portal subscription page in the default browser."""
+    target = SUBSCRIPTION_URL
+    print(f"Opening {target}")
+    try:
+        opened = webbrowser.open(target)
+    except Exception:
+        opened = False
+    if not opened:
+        print()
+        print("Could not launch a browser. Visit the URL above manually.")
+        return 1
+    return 0
+
+
+def _cmd_tools(args) -> int:
+    """List the Tool Gateway catalog + current routing."""
+    from hermes_cli.nous_subscription import get_nous_subscription_features
+
+    config = load_config() or {}
+    try:
+        features = get_nous_subscription_features(config)
+    except Exception:
+        print("Could not resolve Tool Gateway state.", file=sys.stderr)
+        return 1
+
+    # Static catalog — the partners Tool Gateway routes to today.
+    catalog = [
+        ("web",       "Web search & extract",  "Firecrawl"),
+        ("image_gen", "Image generation",      "FAL"),
+        ("tts",       "Text-to-speech",        "OpenAI TTS"),
+        ("browser",   "Browser automation",    "Browser Use"),
+        ("modal",     "Cloud terminal",        "Modal"),
+    ]
+
+    print()
+    print(color("  Tool Gateway catalog", Colors.MAGENTA))
+    print(color("  ────────────────────", Colors.MAGENTA))
+
+    if not features.nous_auth_present:
+        print(color("  Not logged into Nous Portal — sign in with `hermes auth add nous --type oauth`.", Colors.YELLOW))
+        print()
+
+    label_width = max(len(label) for _, label, _ in catalog)
+    for key, label, partner in catalog:
+        feat = features.features.get(key)
+        if feat is None:
+            state = color("unknown", Colors.DIM)
+        elif feat.managed_by_nous:
+            state = color("✓ via Nous Portal", Colors.GREEN)
+        elif feat.active and feat.current_provider:
+            state = feat.current_provider
+        elif feat.active:
+            state = "active"
+        else:
+            state = color("not configured", Colors.DIM)
+        print(f"  {label:<{label_width}}  partner: {partner:<14} {state}")
+
+    print()
+    print(color(f"  Manage your subscription: {SUBSCRIPTION_URL}", Colors.DIM))
+    print(color(f"  Docs: {DOCS_URL}", Colors.DIM))
+    return 0
+
+
+def portal_command(args) -> int:
+    """Top-level dispatch for `hermes portal <subcommand>`."""
+    sub = getattr(args, "portal_command", None)
+    if sub in {None, ""}:
+        # Default to status — matches gh / kubectl conventions where the
+        # subcommand-less form gives a useful overview.
+        return _cmd_status(args)
+    if sub == "status":
+        return _cmd_status(args)
+    if sub == "open":
+        return _cmd_open(args)
+    if sub == "tools":
+        return _cmd_tools(args)
+    print(f"Unknown portal subcommand: {sub}", file=sys.stderr)
+    print("Run `hermes portal -h` for usage.", file=sys.stderr)
+    return 1
+
+
+def add_parser(subparsers) -> None:
+    """Register `hermes portal` on the given argparse subparsers object."""
+    portal_parser = subparsers.add_parser(
+        "portal",
+        help="Nous Portal status, subscription, and Tool Gateway routing",
+        description=(
+            "Inspect Nous Portal auth, Tool Gateway routing, and open the "
+            "Portal subscription page. Subcommands: status (default), "
+            "open, tools."
+        ),
+    )
+    portal_sub = portal_parser.add_subparsers(dest="portal_command")
+
+    portal_sub.add_parser(
+        "status",
+        help="Show Portal auth + Tool Gateway routing summary (default)",
+    )
+    portal_sub.add_parser(
+        "open",
+        help="Open the Portal subscription page in your default browser",
+    )
+    portal_sub.add_parser(
+        "tools",
+        help="List Tool Gateway tools and which are routed via Nous",
+    )
+
+    portal_parser.set_defaults(func=portal_command)
--- a/hermes_cli/profile_describer.py
+++ b/hermes_cli/profile_describer.py
@ -35,6 +35,7 @@ from pathlib import Path
 from typing import Optional

 from hermes_cli import profiles as profiles_mod
+from agent.skill_utils import is_excluded_skill_path

 logger = logging.getLogger(__name__)

@ -109,8 +110,7 @@ def _collect_skills(profile_dir: Path) -> list[str]:
        return []
    names: list[str] = []
    for md in skills_dir.rglob("SKILL.md"):
-        path_str = str(md)
-        if "/.hub/" in path_str or "/.git/" in path_str:
+        if is_excluded_skill_path(md):
            continue
        try:
            rel = md.relative_to(skills_dir)
@ -201,7 +201,7 @@ def describe_profile(
    skill_list = "\n".join(f"  - {n}" for n in skill_names) or "  (no skills installed)"
    skill_count = sum(
        1 for _ in (profile_dir / "skills").rglob("SKILL.md")
-        if "/.hub/" not in str(_) and "/.git/" not in str(_)
+        if not is_excluded_skill_path(_)
    ) if (profile_dir / "skills").is_dir() else 0

    # Read model + provider from the profile's config.
--- a/hermes_cli/profile_distribution.py
+++ b/hermes_cli/profile_distribution.py
@ -70,6 +70,8 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple

+from agent.skill_utils import is_excluded_skill_path
+

 # ---------------------------------------------------------------------------
 # Constants
@ -430,6 +432,20 @@ def _stage_source(source: str, workdir: Path) -> Tuple[Path, str]:
    )


+def _reject_distribution_symlinks(staged: Path) -> None:
+    """Reject symlinks before reading or copying distribution files."""
+    for entry in staged.rglob("*"):
+        if not entry.is_symlink():
+            continue
+        try:
+            rel = entry.relative_to(staged)
+        except ValueError:
+            rel = entry
+        raise DistributionError(
+            f"Profile distributions cannot contain symlinks: {rel}"
+        )
+
+
 # ---------------------------------------------------------------------------
 # Install
 # ---------------------------------------------------------------------------
@ -463,7 +479,9 @@ def _count_skills(staged: Path) -> int:
    skills_dir = staged / "skills"
    if not skills_dir.is_dir():
        return 0
-    return sum(1 for _ in skills_dir.rglob("SKILL.md"))
+    return sum(
+        1 for p in skills_dir.rglob("SKILL.md") if not is_excluded_skill_path(p)
+    )


 def plan_install(
@ -480,6 +498,7 @@ def plan_install(
    from hermes_cli import __version__ as hermes_version

    staged, provenance = _stage_source(source, workdir)
+    _reject_distribution_symlinks(staged)
    manifest = read_manifest(staged)
    if manifest is None:
        raise DistributionError(
--- a/hermes_cli/profiles.py
+++ b/hermes_cli/profiles.py
@ -30,6 +30,8 @@ from dataclasses import dataclass
 from pathlib import Path, PurePosixPath, PureWindowsPath
 from typing import List, Optional

+from agent.skill_utils import is_excluded_skill_path
+
 _PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")

 # Directories bootstrapped inside every new profile
@ -485,8 +487,9 @@ def _count_skills(profile_dir: Path) -> int:
        return 0
    count = 0
    for md in skills_dir.rglob("SKILL.md"):
-        if "/.hub/" not in str(md) and "/.git/" not in str(md):
-            count += 1
+        if is_excluded_skill_path(md):
+            continue
+        count += 1
    return count


@ -720,7 +723,17 @@ def create_profile(
            for filename in _CLONE_CONFIG_FILES:
                src = source_dir / filename
                if src.exists():
-                    shutil.copy2(src, profile_dir / filename)
+                    dst = profile_dir / filename
+                    shutil.copy2(src, dst)
+                    # Tighten .env to owner-only after copy. shutil.copy2
+                    # preserves source mode bits, but if the source's .env
+                    # was loose (host umask 0o022 leaving 0o644), tighten
+                    # explicitly so the clone doesn't inherit weak perms.
+                    if filename == ".env":
+                        try:
+                            os.chmod(str(dst), 0o600)
+                        except OSError:
+                            pass

            # Clone installed skills from the source profile. The dashboard's
            # "clone from default" flow is expected to preserve both bundled
@ -774,6 +787,14 @@ def create_profile(
        except Exception:
            pass  # non-fatal — user can describe later with `hermes profile describe`

+    # Phase 4: when running inside a container under s6, register the
+    # new profile's gateway as a runtime s6 service so
+    # `hermes -p <profile> gateway start` can supervise it via
+    # `s6-svc -u` instead of spawning a bare process. On host (systemd
+    # / launchd / windows) this is a no-op — the existing per-profile
+    # unit-generation paths handle gateway lifecycle.
+    _maybe_register_gateway_service(canon)
+
    return profile_dir


@ -890,6 +911,10 @@ def delete_profile(name: str, yes: bool = False) -> Path:

    # 1. Disable service (prevents auto-restart)
    _cleanup_gateway_service(canon, profile_dir)
+    # 1b. Phase 4: unregister the s6 service slot (container path).
+    # On host this is a no-op; on container it removes
+    # /run/service/gateway-<profile>/ so s6-supervise drops it.
+    _maybe_unregister_gateway_service(canon)

    # 2. Stop running gateway
    if gw_running:
@ -902,7 +927,49 @@ def delete_profile(name: str, yes: bool = False) -> Path:

    # 4. Remove profile directory
    try:
-        shutil.rmtree(profile_dir)
+        def _make_writable(func, path, exc):
+            """onexc/onerror handler: add +w on PermissionError so rmtree can proceed.
+
+            Handles two cases on NixOS (and other systems with read-only
+            copies from immutable stores):
+            1. The path itself isn't writable (e.g. a file with mode 0444)
+            2. The *parent* directory isn't writable (e.g. mode 0555)
+
+            Compatible with both the ``onexc`` API (3.12+, receives an
+            exception instance) and the ``onerror`` API (3.11-, receives
+            ``sys.exc_info()`` tuple).
+            """
+            import stat as _stat
+            import sys as _sys
+
+            # Normalise the two callback signatures:
+            #   onexc(func, path, exc_instance)   — 3.12+
+            #   onerror(func, path, exc_info_tuple) — 3.11
+            if isinstance(exc, tuple):
+                exc = exc[1]  # exc_info → actual exception object
+
+            if isinstance(exc, PermissionError):
+                # Make the path writable
+                try:
+                    os.chmod(path, os.stat(path).st_mode | _stat.S_IWUSR)
+                except OSError:
+                    pass
+                # Also make the parent writable (needed for unlink/rmdir)
+                parent = os.path.dirname(path)
+                if parent:
+                    try:
+                        os.chmod(parent, os.stat(parent).st_mode | _stat.S_IWUSR)
+                    except OSError:
+                        pass
+                func(path)
+            else:
+                raise
+
+        # ``onexc`` was added in 3.12; fall back to ``onerror`` on 3.11.
+        try:
+            shutil.rmtree(profile_dir, onexc=_make_writable)
+        except TypeError:
+            shutil.rmtree(profile_dir, onerror=_make_writable)
        print(f"✓ Removed {profile_dir}")
    except Exception as e:
        print(f"⚠ Could not remove {profile_dir}: {e}")
@ -920,6 +987,87 @@ def delete_profile(name: str, yes: bool = False) -> Path:
    return profile_dir


+def _maybe_register_gateway_service(profile_name: str) -> None:
+    """Register a profile's gateway with s6 inside the container.
+
+    No-op on host (systemd/launchd/windows) — those backends raise
+    ``NotImplementedError`` on ``register_profile_gateway`` and the
+    existing per-profile unit-generation paths handle lifecycle.
+
+    Best-effort: any error (no backend detected, s6 not yet ready,
+    etc.) is logged and swallowed so profile creation doesn't fail
+    because the s6 supervision tree is in a weird state. The user
+    can re-register manually later via the gateway start command,
+    which goes through the same dispatch path.
+
+    Port selection is governed by the profile's ``config.yaml``
+    (``[gateway] port = …``) — there is no Python-side allocator
+    (PR #30136 review item I5 retired the SHA-256-derived range
+    [9200, 9800) because it was dead code through the entire stack).
+
+    Host short-circuit: check ``detect_service_manager()`` first and
+    return immediately if it isn't ``"s6"``. This keeps host
+    (systemd/launchd/windows) profile creation completely silent —
+    no ``get_service_manager()`` call, no exception path, no chance
+    of the ``⚠ Could not register s6 gateway service`` warning ever
+    rendering on a non-container machine. The earlier
+    ``supports_runtime_registration()`` check still catches the case
+    where detection somehow returns ``"s6"`` but the backend isn't
+    actually the S6 one.
+    """
+    try:
+        from hermes_cli.service_manager import detect_service_manager
+        if detect_service_manager() != "s6":
+            return  # host path — silent, no registration needed
+        from hermes_cli.service_manager import get_service_manager
+        mgr = get_service_manager()
+    except RuntimeError:
+        return  # no backend on this host — nothing to do
+    except Exception:
+        # Defensive: detect_service_manager failed for some other
+        # reason. Stay silent on host rather than printing a confusing
+        # s6 warning to users who have never touched the container.
+        return
+    if not mgr.supports_runtime_registration():
+        return  # host backend; no-op
+    try:
+        mgr.register_profile_gateway(profile_name)
+    except ValueError:
+        # Already registered (e.g. the container-boot reconciler ran
+        # first and brought up a stale slot). That's fine.
+        pass
+    except Exception as exc:
+        # Don't fail profile create over a supervision-tree hiccup.
+        print(f"⚠ Could not register s6 gateway service: {exc}")
+
+
+def _maybe_unregister_gateway_service(profile_name: str) -> None:
+    """Tear down a profile's s6 gateway service inside the container.
+
+    No-op on host. Idempotent: absent services are silently skipped
+    by ``unregister_profile_gateway``.
+
+    Same host short-circuit as :func:`_maybe_register_gateway_service`
+    — see that docstring.
+    """
+    try:
+        from hermes_cli.service_manager import detect_service_manager
+        if detect_service_manager() != "s6":
+            return  # host path — silent
+        from hermes_cli.service_manager import get_service_manager
+        mgr = get_service_manager()
+    except RuntimeError:
+        return
+    except Exception:
+        return
+    if not mgr.supports_runtime_registration():
+        return
+    try:
+        mgr.unregister_profile_gateway(profile_name)
+    except Exception as exc:
+        print(f"⚠ Could not unregister s6 gateway service: {exc}")
+
+
 def _cleanup_gateway_service(name: str, profile_dir: Path) -> None:
    """Disable and remove systemd/launchd service for a profile."""
    import platform as _platform
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@ -60,6 +60,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        auth_type="oauth_external",
        base_url_override="https://chatgpt.com/backend-api/codex",
    ),
+    "openai-api": HermesOverlay(
+        transport="codex_responses",
+        base_url_override="https://api.openai.com/v1",
+        base_url_env_var="OPENAI_BASE_URL",
+    ),
    "xai-oauth": HermesOverlay(
        transport="codex_responses",
        auth_type="oauth_external",
@ -381,6 +386,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
    "ollama-cloud": "Ollama Cloud",
+    "xai-oauth": "xAI Grok OAuth (SuperGrok / Premium+)",
 }


--- a/hermes_cli/proxy/adapters/nous_portal.py
+++ b/hermes_cli/proxy/adapters/nous_portal.py
@ -27,6 +27,7 @@ from hermes_cli.auth import (
    _quarantine_nous_oauth_state,
    _quarantine_nous_pool_entries,
    _save_auth_store,
+    _validate_nous_inference_url_from_network,
    _write_shared_nous_state,
    resolve_nous_runtime_credentials,
 )
@ -103,7 +104,7 @@ class NousPortalAdapter(UpstreamAdapter):
            state = self._read_state()
            if state is None:
                raise RuntimeError(
-                    "Not logged into Nous Portal. Run `hermes login nous` first."
+                    "Not logged into Nous Portal. Run `hermes auth add nous` first."
                )

            try:
@ -134,10 +135,13 @@ class NousPortalAdapter(UpstreamAdapter):
            if not agent_key:
                raise RuntimeError(
                    "Nous Portal refresh did not return a usable agent_key. "
-                    "Try `hermes login nous` to re-authenticate."
+                    "Try `hermes auth add nous` to re-authenticate."
                )

-            base_url = refreshed.get("base_url") or DEFAULT_NOUS_INFERENCE_URL
+            base_url = (
+                _validate_nous_inference_url_from_network(refreshed.get("base_url"))
+                or DEFAULT_NOUS_INFERENCE_URL
+            )
            base_url = base_url.rstrip("/")

            return UpstreamCredential(
--- a/hermes_cli/proxy/cli.py
+++ b/hermes_cli/proxy/cli.py
@ -44,7 +44,7 @@ def cmd_proxy_start(args: Any) -> int:
        return 2

    if not adapter.is_authenticated():
-        auth_hint = getattr(adapter, "auth_hint", f"hermes login {adapter.name}")
+        auth_hint = getattr(adapter, "auth_hint", f"hermes auth add {adapter.name}")
        print(
            f"Not logged into {adapter.display_name}. "
            f"Run `{auth_hint}` first.",
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -100,6 +100,63 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
    return None


+def _host_derived_api_key(base_url: str) -> str:
+    """Look up `<VENDOR>_API_KEY` in the env, derived from the base URL host.
+
+    Examples:
+        https://api.deepseek.com/v1   → DEEPSEEK_API_KEY
+        https://api.groq.com/openai/v1 → GROQ_API_KEY
+        https://api.mistral.ai/v1     → MISTRAL_API_KEY
+        https://generativelanguage.googleapis.com/v1beta/openai/ → GOOGLEAPIS_API_KEY
+
+    Returns the env value (stripped) or "". Never returns env vars whose names
+    are already explicitly checked elsewhere — those are handled by their own
+    host-gated paths (OPENAI/OPENROUTER/OLLAMA).
+
+    The vendor label is the *registrable* portion of the hostname: strip
+    ``api.`` / ``www.`` prefixes, then take the second-to-last label
+    (``api.deepseek.com`` → ``deepseek``). Falls back to "" for hostnames
+    that don't yield a usable vendor label (IPs, loopback, single-label
+    hosts).
+    """
+    hostname = base_url_hostname(base_url)
+    if not hostname:
+        return ""
+    # Reject IPv4 / IPv6 / loopback — no meaningful vendor label.
+    if any(ch.isdigit() for ch in hostname.split(".")[-1]):
+        # Last label starts with a digit → likely IP. (TLDs are never numeric.)
+        return ""
+    if hostname in ("localhost",) or ":" in hostname:
+        return ""
+    labels = [lbl for lbl in hostname.split(".") if lbl]
+    # Strip common API/CDN prefixes.
+    while labels and labels[0] in ("api", "www"):
+        labels.pop(0)
+    if len(labels) < 2:
+        return ""
+    # Take the *registrable* label (second-to-last). For typical provider
+    # hosts this is what users intuitively call "the vendor":
+    #   deepseek.com               → labels[-2] = "deepseek"  ✓
+    #   api.groq.com → groq.com    → labels[-2] = "groq"      ✓
+    #   api.mistral.ai             → labels[-2] = "mistral"   ✓
+    # Crucially, lookalike hosts pick the ATTACKER's label, not the spoofed
+    # vendor:
+    #   api.deepseek.com.attacker.test → labels[-2] = "attacker"
+    # so DEEPSEEK_API_KEY stays put and the chain falls through to
+    # no-key-required. This mirrors how `base_url_host_matches` resists the
+    # same lookalike attack for explicit hosts.
+    vendor = labels[-2]
+    # Sanitize to env var charset: A-Z, 0-9, underscore.
+    sanitized = "".join(ch if ch.isalnum() else "_" for ch in vendor).upper()
+    if not sanitized or not sanitized[0].isalpha():
+        return ""
+    # Don't re-derive env vars already handled by explicit host-gated paths.
+    if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"):
+        return ""
+    env_name = f"{sanitized}_API_KEY"
+    return (os.getenv(env_name, "") or "").strip()
+
+
 def _auto_detect_local_model(base_url: str) -> str:
    """Query a local server for its model name when only one model is loaded."""
    if not base_url:
@ -471,6 +528,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
                        "api_key": resolved_api_key,
                        "model": entry.get("default_model", ""),
                    }
+                    extra_body = entry.get("extra_body")
+                    if isinstance(extra_body, dict):
+                        result["extra_body"] = dict(extra_body)
                    # The v11→v12 migration writes the API mode under the new
                    # ``transport`` field, but hand-edited configs may still
                    # use the legacy ``api_mode`` spelling.  Accept both —
@ -496,6 +556,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
                            "api_key": resolved_api_key,
                            "model": entry.get("default_model", ""),
                        }
+                        extra_body = entry.get("extra_body")
+                        if isinstance(extra_body, dict):
+                            result["extra_body"] = dict(extra_body)
                        api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport"))
                        if api_mode:
                            result["api_mode"] = api_mode
@ -539,6 +602,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
            result["key_env"] = key_env
        if provider_key:
            result["provider_key"] = provider_key
+        extra_body = entry.get("extra_body")
+        if isinstance(extra_body, dict):
+            result["extra_body"] = dict(extra_body)
        api_mode = _parse_api_mode(entry.get("api_mode"))
        if api_mode:
            result["api_mode"] = api_mode
@ -550,6 +616,13 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
    return None


+def _custom_provider_request_overrides(custom_provider: Dict[str, Any]) -> Dict[str, Any]:
+    extra_body = custom_provider.get("extra_body")
+    if not isinstance(extra_body, dict) or not extra_body:
+        return {}
+    return {"extra_body": dict(extra_body)}
+
+
 def _resolve_named_custom_runtime(
    *,
    requested_provider: str,
@ -582,10 +655,17 @@ def _resolve_named_custom_runtime(
        if pool_result:
            pool_result["source"] = "direct-alias"
            return pool_result
+        _da_is_openai_url   = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
+        _da_is_openrouter   = base_url_host_matches(base_url, "openrouter.ai")
        api_key_candidates = [
            (explicit_api_key or "").strip(),
-            os.getenv("OPENAI_API_KEY", "").strip(),
-            os.getenv("OPENROUTER_API_KEY", "").strip(),
+            # Gate env key fallbacks on authoritative hosts (#28660)
+            (os.getenv("OPENAI_API_KEY", "").strip()     if _da_is_openai_url else ""),
+            (os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter  else ""),
+            # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
+            # who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
+            # intuitive match without configuring `custom_providers` first.
+            _host_derived_api_key(base_url),
        ]
        api_key = next(
            (c for c in api_key_candidates if has_usable_secret(c)),
@ -619,14 +699,27 @@ def _resolve_named_custom_runtime(
        model_name = custom_provider.get("model")
        if model_name:
            pool_result["model"] = model_name
+        request_overrides = _custom_provider_request_overrides(custom_provider)
+        if request_overrides:
+            pool_result["request_overrides"] = {
+                **dict(pool_result.get("request_overrides") or {}),
+                **request_overrides,
+            }
        return pool_result

+    _cp_is_openai_url   = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
+    _cp_is_openrouter   = base_url_host_matches(base_url, "openrouter.ai")
    api_key_candidates = [
        (explicit_api_key or "").strip(),
        str(custom_provider.get("api_key", "") or "").strip(),
        os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
-        os.getenv("OPENAI_API_KEY", "").strip(),
-        os.getenv("OPENROUTER_API_KEY", "").strip(),
+        # Gate provider env keys on their authoritative hosts — sending
+        # OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660).
+        (os.getenv("OPENAI_API_KEY", "").strip()     if _cp_is_openai_url  else ""),
+        (os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter  else ""),
+        # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host as a final
+        # fallback when key_env wasn't set explicitly.
+        _host_derived_api_key(base_url),
    ]
    api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "")

@ -643,6 +736,9 @@ def _resolve_named_custom_runtime(
    # provider name differs from the actual model string the API expects.
    if custom_provider.get("model"):
        result["model"] = custom_provider["model"]
+    request_overrides = _custom_provider_request_overrides(custom_provider)
+    if request_overrides:
+        result["request_overrides"] = request_overrides
    return result


@ -707,7 +803,15 @@ def _resolve_openrouter_runtime(
    # OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
    # provider (issues #420, #560).
    _is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
-    if _is_openrouter_url:
+    # Also treat explicitly-configured OpenRouter mirrors/proxies as OpenRouter
+    # for key selection — if the user set OPENROUTER_BASE_URL or requested
+    # provider=openrouter explicitly, OPENROUTER_API_KEY should still be used.
+    _is_openrouter_context = _is_openrouter_url or (
+        requested_norm == "openrouter"
+        and (env_openrouter_base_url or base_url == env_openrouter_base_url)
+        and base_url == (env_openrouter_base_url or "").rstrip("/")
+    )
+    if _is_openrouter_context:
        api_key_candidates = [
            explicit_api_key,
            os.getenv("OPENROUTER_API_KEY"),
@ -721,13 +825,24 @@ def _resolve_openrouter_runtime(
        # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
        # hostname is a look-alike (ollama.com.attacker.test) must not
        # receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
-        _is_ollama_url = base_url_host_matches(base_url, "ollama.com")
+        _is_ollama_url    = base_url_host_matches(base_url, "ollama.com")
+        _is_openai_url    = base_url_host_matches(base_url, "openai.com")
+        _is_openai_azure  = base_url_host_matches(base_url, "openai.azure.com")
+        # Gate each provider key on its own host — sending OPENAI_API_KEY or
+        # OPENROUTER_API_KEY to an unrelated custom endpoint (DeepSeek, Groq,
+        # Mistral, …) leaks credentials and causes 401s (issue #28660).
+        # Mirrors the OLLAMA_API_KEY host-gate added in GHSA-76xc-57q6-vm5m.
        api_key_candidates = [
            explicit_api_key,
            (cfg_api_key if use_config_base_url else ""),
-            (os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
-            os.getenv("OPENAI_API_KEY"),
-            os.getenv("OPENROUTER_API_KEY"),
+            (os.getenv("OLLAMA_API_KEY")     if _is_ollama_url                       else ""),
+            (os.getenv("OPENAI_API_KEY")     if (_is_openai_url or _is_openai_azure) else ""),
+            (os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url                   else ""),
+            # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
+            # who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
+            # intuitive match. Helper returns "" for IPs/loopback and for env
+            # vars already handled by the explicit host-gated paths above.
+            _host_derived_api_key(base_url),
        ]
    api_key = next(
        (str(candidate or "").strip() for candidate in api_key_candidates if has_usable_secret(candidate)),
--- a/hermes_cli/secret_prompt.py
+++ b/hermes_cli/secret_prompt.py
@ -0,0 +1,126 @@
+"""Secret input prompts with masked typing feedback."""
+
+from __future__ import annotations
+
+import getpass
+import os
+import sys
+from collections.abc import Callable
+
+
+_BACKSPACE_CHARS = {"\b", "\x7f"}
+_ENTER_CHARS = {"\r", "\n"}
+_EOF_CHARS = {"\x04", "\x1a"}
+
+
+def _collect_masked_input(
+    read_char: Callable[[], str],
+    write: Callable[[str], object],
+    prompt: str,
+    *,
+    mask: str = "*",
+) -> str:
+    """Read one secret line while writing a mask character per typed char."""
+    value: list[str] = []
+    write(prompt)
+
+    while True:
+        ch = read_char()
+        if ch == "":
+            write("\n")
+            raise EOFError
+        if ch in _ENTER_CHARS:
+            write("\n")
+            return "".join(value)
+        if ch == "\x03":
+            write("\n")
+            raise KeyboardInterrupt
+        if ch in _EOF_CHARS:
+            write("\n")
+            raise EOFError
+        if ch in _BACKSPACE_CHARS:
+            if value:
+                value.pop()
+                write("\b \b")
+            continue
+        if ch == "\x1b":
+            # Ignore escape itself. Terminals commonly send escape-prefixed
+            # navigation/delete sequences; they should not become secret text.
+            continue
+
+        value.append(ch)
+        if mask:
+            write(mask)
+
+
+def masked_secret_prompt(prompt: str, *, mask: str = "*") -> str:
+    """Prompt for a secret while showing masked typing feedback.
+
+    Falls back to ``getpass.getpass`` when stdin/stdout are not interactive or
+    when raw terminal handling is unavailable.
+    """
+    stdin = sys.stdin
+    stdout = sys.stdout
+
+    if not _stream_is_tty(stdin) or not _stream_is_tty(stdout):
+        return getpass.getpass(prompt)
+
+    if os.name == "nt":
+        try:
+            return _masked_secret_prompt_windows(prompt, mask=mask)
+        except (KeyboardInterrupt, EOFError):
+            raise
+        except Exception:
+            return getpass.getpass(prompt)
+
+    try:
+        return _masked_secret_prompt_posix(prompt, mask=mask)
+    except (KeyboardInterrupt, EOFError):
+        raise
+    except Exception:
+        return getpass.getpass(prompt)
+
+
+def _stream_is_tty(stream) -> bool:
+    try:
+        return bool(stream.isatty())
+    except Exception:
+        return False
+
+
+def _masked_secret_prompt_windows(prompt: str, *, mask: str) -> str:
+    import msvcrt
+
+    def read_char() -> str:
+        ch = msvcrt.getwch()
+        if ch in {"\x00", "\xe0"}:
+            msvcrt.getwch()
+            return "\x1b"
+        return ch
+
+    def write(text: str) -> None:
+        sys.stdout.write(text)
+        sys.stdout.flush()
+
+    return _collect_masked_input(read_char, write, prompt, mask=mask)
+
+
+def _masked_secret_prompt_posix(prompt: str, *, mask: str) -> str:
+    import termios
+    import tty
+
+    fd = sys.stdin.fileno()
+    old_attrs = termios.tcgetattr(fd)
+
+    def read_char() -> str:
+        return sys.stdin.read(1)
+
+    def write(text: str) -> None:
+        sys.stdout.write(text)
+        sys.stdout.flush()
+
+    try:
+        tty.setraw(fd)
+        return _collect_masked_input(read_char, write, prompt, mask=mask)
+    finally:
+        termios.tcsetattr(fd, termios.TCSADRAIN, old_attrs)
--- a/hermes_cli/secrets_cli.py
+++ b/hermes_cli/secrets_cli.py
@ -0,0 +1,577 @@
+"""CLI handlers for ``hermes secrets bitwarden ...``.
+
+Subcommands:
+    setup    — interactive wizard: install bws, prompt for token + project, test fetch
+    status   — show current config + binary version + last fetch outcome
+    sync     — run a fetch right now and show what would be applied (dry-run friendly)
+    disable  — flip ``secrets.bitwarden.enabled`` to False
+    install  — just download the bws binary (no token / project required)
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+
+from agent.secret_sources import bitwarden as bw
+from hermes_cli.config import (
+    get_env_path,
+    load_config,
+    save_config,
+    save_env_value,
+)
+from hermes_cli.secret_prompt import masked_secret_prompt
+
+
+# ---------------------------------------------------------------------------
+# Argparse wiring — called from hermes_cli.main
+# ---------------------------------------------------------------------------
+
+
+def register_cli(parent_parser: argparse.ArgumentParser) -> None:
+    """Attach the ``bitwarden`` subcommand tree to a parent parser.
+
+    Called from ``hermes_cli.main`` as part of building the top-level
+    ``hermes secrets`` parser.
+    """
+    sub = parent_parser.add_subparsers(dest="secrets_bw_command")
+
+    setup = sub.add_parser(
+        "setup",
+        help="Interactive wizard: install bws, store access token, pick project",
+    )
+    setup.add_argument(
+        "--project-id",
+        help="Pre-select a project UUID instead of prompting",
+    )
+    setup.add_argument(
+        "--access-token",
+        help="Provide the access token non-interactively (will be stored in .env)",
+    )
+    setup.add_argument(
+        "--server-url",
+        help=(
+            "Bitwarden region / self-hosted endpoint. Examples: "
+            "https://vault.bitwarden.com (US, default), "
+            "https://vault.bitwarden.eu (EU), or your self-hosted URL. "
+            "Skips the interactive region prompt."
+        ),
+    )
+    setup.set_defaults(func=cmd_setup)
+
+    status = sub.add_parser("status", help="Show config + binary + last fetch")
+    status.set_defaults(func=cmd_status)
+
+    sync = sub.add_parser("sync", help="Fetch secrets now and report what changed")
+    sync.add_argument(
+        "--apply",
+        action="store_true",
+        help="Actually export the secrets into the current shell's env (default: dry-run)",
+    )
+    sync.set_defaults(func=cmd_sync)
+
+    disable = sub.add_parser("disable", help="Turn off the Bitwarden integration")
+    disable.set_defaults(func=cmd_disable)
+
+    install = sub.add_parser(
+        "install",
+        help=f"Download and verify the pinned bws binary (v{bw._BWS_VERSION})",
+    )
+    install.add_argument(
+        "--force",
+        action="store_true",
+        help="Re-download even if a managed copy already exists",
+    )
+    install.set_defaults(func=cmd_install)
+
+
+# ---------------------------------------------------------------------------
+# Handlers
+# ---------------------------------------------------------------------------
+
+
+def cmd_setup(args: argparse.Namespace) -> int:
+    console = Console()
+    console.print(
+        Panel.fit(
+            "[bold]Bitwarden Secrets Manager setup[/bold]\n\n"
+            "Need an access token? In the Bitwarden web app:\n"
+            "  Secrets Manager → Machine accounts → [your account] →\n"
+            "  Access tokens → Create access token\n\n"
+            "Copy the token (starts with [cyan]0.[/cyan]…) — it cannot be retrieved later.",
+            border_style="cyan",
+        )
+    )
+
+    # ------------------------------------------------------------------ binary
+    console.print()
+    console.print("[bold]Step 1[/bold]  Install the bws CLI")
+    try:
+        binary = bw.find_bws(install_if_missing=False)
+        if binary is None:
+            console.print("  No bws on PATH — downloading…")
+            binary = bw.install_bws()
+        version = _bws_version(binary)
+        console.print(f"  [green]✓[/green] {binary}  ({version})")
+    except Exception as exc:  # noqa: BLE001
+        console.print(f"  [red]✗ Could not install bws: {exc}[/red]")
+        console.print(
+            "  Manual install: "
+            "https://github.com/bitwarden/sdk-sm/releases"
+        )
+        return 1
+
+    # ------------------------------------------------------------------- token
+    console.print()
+    console.print("[bold]Step 2[/bold]  Provide your access token")
+    cfg = load_config()
+    secrets_cfg = (cfg.setdefault("secrets", {})
+                     .setdefault("bitwarden", {}))
+    token_env = secrets_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
+
+    token = (args.access_token or "").strip()
+    if not token:
+        token = masked_secret_prompt(f"  Paste access token ({token_env}): ").strip()
+    if not token:
+        console.print("  [red]Empty token, aborting.[/red]")
+        return 1
+    if not token.startswith("0."):
+        console.print(
+            "  [yellow]Warning: token doesn't start with '0.' — usually that means "
+            "you pasted something other than a BSM access token.  Continuing anyway.[/yellow]"
+        )
+
+    save_env_value(token_env, token)
+    os.environ[token_env] = token  # so the test fetch below sees it
+    console.print(f"  [green]✓[/green] stored in {get_env_path()} as {token_env}")
+
+    # ------------------------------------------------------------------ region
+    console.print()
+    console.print("[bold]Step 3[/bold]  Pick a Bitwarden region")
+    server_url = _resolve_server_url(args, secrets_cfg, console)
+    if server_url is None:
+        return 1
+    if server_url:
+        console.print(f"  [green]✓[/green] using {server_url}")
+    else:
+        console.print(
+            "  [green]✓[/green] using bws default "
+            "(US Cloud, https://vault.bitwarden.com)"
+        )
+
+    # ------------------------------------------------------------------- project
+    if args.project_id and args.project_id.strip():
+        project_id = args.project_id.strip()
+    else:
+        console.print()
+        console.print("[bold]Step 4[/bold]  Pick a project")
+        project_id = ""
+        projects = _list_projects(binary, token, console, server_url=server_url)
+        if projects is None:
+            return 1
+        if not projects:
+            console.print("  [yellow]No projects visible to this machine account.[/yellow]")
+            console.print(
+                "  In the Bitwarden web app, open the machine account → Projects tab "
+                "and grant it access to at least one project."
+            )
+            return 1
+
+        table = Table(show_header=True, header_style="bold")
+        table.add_column("#", style="cyan", width=4)
+        table.add_column("Name")
+        table.add_column("ID", style="dim")
+        for i, p in enumerate(projects, 1):
+            table.add_row(str(i), p.get("name", "?"), p.get("id", "?"))
+        console.print(table)
+
+        while True:
+            choice = console.input(f"  Select project [1-{len(projects)}]: ").strip()
+            if not choice:
+                continue
+            try:
+                idx = int(choice)
+            except ValueError:
+                console.print("  [red]Enter a number.[/red]")
+                continue
+            if 1 <= idx <= len(projects):
+                project_id = projects[idx - 1]["id"]
+                break
+            console.print(f"  [red]Out of range — pick 1-{len(projects)}.[/red]")
+
+    # ------------------------------------------------------------------- test
+    console.print()
+    step_num = 5 if not (args.project_id and args.project_id.strip()) else 4
+    console.print(f"[bold]Step {step_num}[/bold]  Test fetch")
+    try:
+        secrets, warnings = bw.fetch_bitwarden_secrets(
+            access_token=token,
+            project_id=project_id,
+            binary=binary,
+            use_cache=False,
+            server_url=server_url,
+        )
+    except Exception as exc:  # noqa: BLE001
+        console.print(f"  [red]✗ Fetch failed: {exc}[/red]")
+        return 1
+
+    if not secrets:
+        console.print("  [yellow]Fetch succeeded but the project has no secrets.[/yellow]")
+    else:
+        table = Table(show_header=True, header_style="bold")
+        table.add_column("Name", style="cyan")
+        table.add_column("Status")
+        for key in sorted(secrets):
+            if key == token_env:
+                status = "[dim]bootstrap token — never overrides itself[/dim]"
+            elif os.environ.get(key):
+                status = "[yellow]already set in env (will be overwritten)[/yellow]"
+            else:
+                status = "[green]new[/green]"
+            table.add_row(key, status)
+        console.print(table)
+    for w in warnings:
+        console.print(f"  [yellow]warning:[/yellow] {w}")
+
+    # ------------------------------------------------------------------- save
+    secrets_cfg["enabled"] = True
+    secrets_cfg["project_id"] = project_id
+    secrets_cfg["server_url"] = server_url
+    secrets_cfg.setdefault("access_token_env", token_env)
+    secrets_cfg.setdefault("cache_ttl_seconds", 300)
+    secrets_cfg.setdefault("override_existing", True)
+    secrets_cfg.setdefault("auto_install", True)
+    save_config(cfg)
+
+    console.print()
+    console.print(
+        "[green]✓ Bitwarden Secrets Manager is enabled.[/green]  "
+        "Secrets will be pulled at the start of every Hermes process."
+    )
+    console.print(
+        "  Status:  [cyan]hermes secrets bitwarden status[/cyan]\n"
+        "  Refresh: [cyan]hermes secrets bitwarden sync[/cyan]\n"
+        "  Disable: [cyan]hermes secrets bitwarden disable[/cyan]"
+    )
+    return 0
+
+
+def cmd_status(args: argparse.Namespace) -> int:
+    console = Console()
+    cfg = load_config()
+    bw_cfg = (cfg.get("secrets") or {}).get("bitwarden") or {}
+
+    enabled = bool(bw_cfg.get("enabled"))
+    token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
+    project_id = bw_cfg.get("project_id", "")
+    server_url = str(bw_cfg.get("server_url", "") or "").strip()
+    token_set = bool(os.environ.get(token_env))
+
+    table = Table(show_header=False, box=None, padding=(0, 2))
+    table.add_column("", style="bold")
+    table.add_column("")
+    table.add_row("Enabled",         _yn(enabled))
+    table.add_row("Token env var",   token_env)
+    table.add_row("Token in env",    _yn(token_set))
+    table.add_row("Project ID",      project_id or "[dim](unset)[/dim]")
+    table.add_row(
+        "Server URL",
+        server_url or "[dim]default (US Cloud, https://vault.bitwarden.com)[/dim]",
+    )
+    table.add_row("Override existing", _yn(bool(bw_cfg.get("override_existing", False))))
+    table.add_row("Cache TTL (s)",   str(bw_cfg.get("cache_ttl_seconds", 300)))
+    table.add_row("Auto-install",    _yn(bool(bw_cfg.get("auto_install", True))))
+
+    binary = bw.find_bws(install_if_missing=False)
+    if binary:
+        table.add_row("bws binary",  f"{binary} ({_bws_version(binary)})")
+    else:
+        table.add_row("bws binary",  "[yellow]not installed[/yellow]")
+
+    console.print(Panel(table, title="Bitwarden Secrets Manager", border_style="cyan"))
+
+    if not enabled:
+        console.print("\n  Run [cyan]hermes secrets bitwarden setup[/cyan] to enable.")
+        return 0
+    if not token_set:
+        console.print(
+            f"\n  [yellow]Enabled but {token_env} is not set — Hermes will skip BSM "
+            "and warn on next startup.[/yellow]"
+        )
+    if not project_id:
+        console.print(
+            "\n  [yellow]Enabled but no project_id — nothing to fetch.[/yellow]"
+        )
+    return 0
+
+
+def cmd_sync(args: argparse.Namespace) -> int:
+    console = Console()
+    cfg = load_config()
+    bw_cfg = (cfg.get("secrets") or {}).get("bitwarden") or {}
+    if not bw_cfg.get("enabled"):
+        console.print(
+            "[yellow]Bitwarden integration is disabled.  Run "
+            "`hermes secrets bitwarden setup` first.[/yellow]"
+        )
+        return 1
+
+    token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
+    token = os.environ.get(token_env, "").strip()
+    if not token:
+        console.print(f"[red]{token_env} is not set.[/red]")
+        return 1
+
+    project_id = bw_cfg.get("project_id", "")
+    if not project_id:
+        console.print("[red]No project_id configured.[/red]")
+        return 1
+
+    server_url = str(bw_cfg.get("server_url", "") or "").strip()
+
+    try:
+        secrets, warnings = bw.fetch_bitwarden_secrets(
+            access_token=token,
+            project_id=project_id,
+            use_cache=False,
+            server_url=server_url,
+        )
+    except Exception as exc:  # noqa: BLE001
+        console.print(f"[red]Fetch failed: {exc}[/red]")
+        return 1
+
+    if not secrets:
+        console.print("[yellow]No secrets in project.[/yellow]")
+        return 0
+
+    override = bool(bw_cfg.get("override_existing", False)) or args.apply
+    table = Table(show_header=True, header_style="bold")
+    table.add_column("Name", style="cyan")
+    table.add_column("Action")
+    applied = 0
+    for key in sorted(secrets):
+        if key == token_env:
+            table.add_row(key, "[dim]skip (bootstrap token)[/dim]")
+            continue
+        already = bool(os.environ.get(key))
+        if already and not override:
+            table.add_row(key, "[dim]skip (already set)[/dim]")
+            continue
+        if args.apply:
+            os.environ[key] = secrets[key]
+            applied += 1
+            table.add_row(key, "[green]exported[/green]" + (" (overrode)" if already else ""))
+        else:
+            table.add_row(key, "[green]would export[/green]" + (" (overrides)" if already else ""))
+
+    console.print(table)
+    for w in warnings:
+        console.print(f"[yellow]warning:[/yellow] {w}")
+
+    if not args.apply:
+        console.print(
+            "\n  This was a dry-run — secrets are picked up automatically on the "
+            "next [cyan]hermes[/cyan] invocation.  Re-run with [cyan]--apply[/cyan] "
+            "to export into the current shell instead."
+        )
+    else:
+        console.print(f"\n  [green]Exported {applied} secret(s) into current process.[/green]")
+    return 0
+
+
+def cmd_disable(args: argparse.Namespace) -> int:
+    console = Console()
+    cfg = load_config()
+    bw_cfg = (cfg.setdefault("secrets", {})
+                .setdefault("bitwarden", {}))
+    bw_cfg["enabled"] = False
+    save_config(cfg)
+    console.print(
+        "[green]Disabled.[/green]  Bitwarden secrets will NOT be pulled on the next "
+        "Hermes invocation.\n"
+        "  Your access token is left in .env — remove it manually if you also want "
+        "to revoke the credential."
+    )
+    return 0
+
+
+def cmd_install(args: argparse.Namespace) -> int:
+    console = Console()
+    try:
+        path = bw.install_bws(force=bool(args.force))
+        console.print(f"[green]✓[/green] {path}  ({_bws_version(path)})")
+        return 0
+    except Exception as exc:  # noqa: BLE001
+        console.print(f"[red]Install failed: {exc}[/red]")
+        return 1
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _yn(b: bool) -> str:
+    return "[green]yes[/green]" if b else "[dim]no[/dim]"
+
+
+def _bws_version(binary: Path) -> str:
+    try:
+        res = subprocess.run(
+            [str(binary), "--version"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if res.returncode == 0:
+            return (res.stdout or res.stderr).strip().splitlines()[0]
+    except (OSError, subprocess.TimeoutExpired):
+        pass
+    return "version unknown"
+
+
+def _list_projects(
+    binary: Path, token: str, console: Console, *, server_url: str = ""
+) -> Optional[List[dict]]:
+    """Call ``bws project list`` and return the parsed list, or None on failure."""
+    env = os.environ.copy()
+    env["BWS_ACCESS_TOKEN"] = token
+    env.setdefault("NO_COLOR", "1")
+    if server_url:
+        env["BWS_SERVER_URL"] = server_url
+    try:
+        res = subprocess.run(
+            [str(binary), "project", "list", "--output", "json"],
+            env=env,
+            capture_output=True,
+            text=True,
+            timeout=15,
+        )
+    except (OSError, subprocess.TimeoutExpired) as exc:
+        console.print(f"  [red]Couldn't list projects: {exc}[/red]")
+        return None
+
+    if res.returncode != 0:
+        err = (res.stderr or res.stdout).strip()[:300]
+        console.print(f"  [red]bws project list failed: {err}[/red]")
+        lowered = err.lower()
+        if "invalid_client" in lowered or "400 bad request" in lowered:
+            console.print(
+                "  [yellow]'invalid_client' from the US identity endpoint usually "
+                "means the token is for a different Bitwarden region.  Re-run "
+                "[cyan]hermes secrets bitwarden setup[/cyan] and pick EU or "
+                "self-hosted at the region prompt, or set [cyan]secrets.bitwarden."
+                "server_url[/cyan] in config.yaml.[/yellow]"
+            )
+        elif "authorization" in lowered or "invalid" in lowered:
+            console.print(
+                "  [yellow]This usually means the access token is wrong or revoked. "
+                "Double-check it in the Bitwarden web app.[/yellow]"
+            )
+        return None
+
+    try:
+        data = json.loads(res.stdout or "[]")
+    except json.JSONDecodeError as exc:
+        console.print(f"  [red]bws returned non-JSON: {exc}[/red]")
+        return None
+    if not isinstance(data, list):
+        return []
+    return [p for p in data if isinstance(p, dict) and p.get("id")]
+
+
+# Canonical Bitwarden region endpoints.  Keep in sync with what Bitwarden
+# publishes — these are stable but if a third region appears, add it here
+# and to the prompt below.
+_REGION_PRESETS = [
+    ("US Cloud  (https://vault.bitwarden.com — bws default)", ""),
+    ("EU Cloud  (https://vault.bitwarden.eu)", "https://vault.bitwarden.eu"),
+]
+
+
+def _resolve_server_url(
+    args: argparse.Namespace,
+    secrets_cfg: dict,
+    console: Console,
+) -> Optional[str]:
+    """Pick a Bitwarden server URL for setup.
+
+    Resolution order:
+      1. ``--server-url`` CLI flag (non-interactive)
+      2. ``BWS_SERVER_URL`` env var (so users running with that already set
+         in their shell don't have to re-enter it)
+      3. Existing ``secrets.bitwarden.server_url`` value (for re-runs)
+      4. Interactive menu: US / EU / self-hosted
+
+    Returns the chosen URL as a string (empty string = bws default,
+    i.e. US Cloud).  Returns None if the user aborted with an empty
+    custom URL.
+    """
+    if args.server_url and args.server_url.strip():
+        return args.server_url.strip()
+
+    env_url = os.environ.get("BWS_SERVER_URL", "").strip()
+    if env_url:
+        console.print(
+            f"  Detected [cyan]BWS_SERVER_URL[/cyan]={env_url} in your shell — using it."
+        )
+        return env_url
+
+    existing = str(secrets_cfg.get("server_url", "") or "").strip()
+    if existing:
+        console.print(
+            f"  Existing config: [cyan]{existing}[/cyan]. "
+            "Press Enter to keep, or pick a different option below."
+        )
+
+    table = Table(show_header=True, header_style="bold", box=None, padding=(0, 2))
+    table.add_column("#", style="cyan", width=4)
+    table.add_column("Region / endpoint")
+    for i, (label, _url) in enumerate(_REGION_PRESETS, 1):
+        table.add_row(str(i), label)
+    table.add_row(str(len(_REGION_PRESETS) + 1), "Self-hosted / custom URL")
+    console.print(table)
+
+    custom_idx = len(_REGION_PRESETS) + 1
+    while True:
+        prompt = f"  Select region [1-{custom_idx}]"
+        if existing:
+            prompt += " (Enter to keep current)"
+        prompt += ": "
+        choice = console.input(prompt).strip()
+        if not choice:
+            if existing:
+                return existing
+            console.print("  [red]Enter a number.[/red]")
+            continue
+        try:
+            idx = int(choice)
+        except ValueError:
+            console.print("  [red]Enter a number.[/red]")
+            continue
+        if 1 <= idx <= len(_REGION_PRESETS):
+            return _REGION_PRESETS[idx - 1][1]
+        if idx == custom_idx:
+            custom = console.input(
+                "  Enter your Bitwarden server URL "
+                "(e.g. https://vault.example.com): "
+            ).strip()
+            if not custom:
+                console.print("  [red]Empty URL, aborting.[/red]")
+                return None
+            if not custom.startswith(("http://", "https://")):
+                console.print(
+                    "  [yellow]Warning: URL doesn't start with http:// or "
+                    "https:// — bws may reject it.[/yellow]"
+                )
+            return custom
+        console.print(f"  [red]Out of range — pick 1-{custom_idx}.[/red]")
--- a/hermes_cli/security_audit.py
+++ b/hermes_cli/security_audit.py
@ -0,0 +1,576 @@
+"""On-demand supply-chain audit for Hermes Agent installs.
+
+Scans three surfaces a Hermes user actually controls and we can map to
+upstream advisories without auth or extra binaries:
+
+1. The Hermes venv (every PyPI dist via ``importlib.metadata``).
+2. Python deps declared by user-installed plugins under ``~/.hermes/plugins``
+   (``requirements.txt`` + ``pyproject.toml`` best-effort pin extraction).
+3. MCP servers wired in ``config.yaml`` whose ``command/args`` look like
+   ``npx -y <pkg>@<ver>`` or ``uvx <pkg>==<ver>``.
+
+Vulnerabilities are looked up against OSV.dev (``api.osv.dev/v1/querybatch``
+ ``/v1/vulns/{id}``). Single-shot, on-demand, never daily — see the design
+notes in ``references/security-disclosure-triage.md``.
+
+Out of scope on purpose: global pip/npm, editor/browser extensions,
+daily background scans, auto-blocking installs.
+"""
+
+from __future__ import annotations
+
+import argparse
+import concurrent.futures
+import json
+import re
+import sys
+import urllib.error
+import urllib.request
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Iterable, Optional
+
+from hermes_constants import get_hermes_home
+
+OSV_BATCH_URL = "https://api.osv.dev/v1/querybatch"
+OSV_VULN_URL = "https://api.osv.dev/v1/vulns/{vid}"
+OSV_BATCH_MAX = 1000  # OSV documented hard cap per request
+HTTP_TIMEOUT = 20
+DETAIL_PARALLELISM = 8
+
+# Severity ordering for --fail-on gating. UNKNOWN sits below LOW so it
+# never blocks unless --fail-on is passed something even lower (we don't
+# expose that).
+SEVERITY_ORDER = {
+    "UNKNOWN": 0,
+    "LOW": 1,
+    "MODERATE": 2,
+    "MEDIUM": 2,
+    "HIGH": 3,
+    "CRITICAL": 4,
+}
+
+
+# ─── Data shapes ──────────────────────────────────────────────────────────────
+
+
+@dataclass(frozen=True)
+class Component:
+    """A single (name, version, ecosystem) tuple discovered on disk."""
+
+    name: str
+    version: str
+    ecosystem: str  # "PyPI" | "npm" — exactly as OSV expects
+    source: str    # human-readable origin, e.g. "venv", "plugin:foo", "mcp:bar"
+
+
+@dataclass
+class Vulnerability:
+    osv_id: str
+    severity: str = "UNKNOWN"
+    summary: str = ""
+    fixed_versions: list[str] = field(default_factory=list)
+
+
+@dataclass
+class Finding:
+    component: Component
+    vuln: Vulnerability
+
+
+# ─── Component discovery ──────────────────────────────────────────────────────
+
+
+def _discover_venv() -> list[Component]:
+    """Every dist installed in the running Python's import path."""
+    from importlib.metadata import distributions
+
+    out: list[Component] = []
+    seen: set[tuple[str, str]] = set()
+    for dist in distributions():
+        try:
+            name = (dist.metadata["Name"] or "").strip()
+        except Exception:
+            continue
+        version = (dist.version or "").strip()
+        if not name or not version:
+            continue
+        key = (name.lower(), version)
+        if key in seen:
+            continue
+        seen.add(key)
+        out.append(Component(name=name, version=version, ecosystem="PyPI", source="venv"))
+    return out
+
+
+# requirements.txt line: drop comments, environment markers, options, extras
+_REQ_LINE = re.compile(
+    r"""^\s*
+        (?P<name>[A-Za-z0-9][A-Za-z0-9._-]*)
+        (?:\[[^\]]+\])?              # extras
+        \s*==\s*
+        (?P<version>[A-Za-z0-9._+!-]+)
+        \s*(?:;.*)?$
+    """,
+    re.VERBOSE,
+)
+
+
+def _parse_requirements(text: str) -> list[tuple[str, str]]:
+    """Extract ``name==version`` pins. Everything else (>=, ~=, no pin) is skipped.
+
+    A loose pin can't be mapped to a single OSV query, and getting it wrong
+    is worse than missing a finding for an audit tool — false positives
+    train users to ignore output.
+    """
+    pins: list[tuple[str, str]] = []
+    for raw in text.splitlines():
+        line = raw.strip()
+        if not line or line.startswith("#") or line.startswith("-"):
+            continue
+        m = _REQ_LINE.match(line)
+        if m:
+            pins.append((m.group("name"), m.group("version")))
+    return pins
+
+
+def _parse_pyproject_pins(text: str) -> list[tuple[str, str]]:
+    """Pull ``name==version`` pins from a ``pyproject.toml`` ``dependencies`` list.
+
+    Uses stdlib ``tomllib`` (3.11+). Same exact-pin policy as requirements.
+    """
+    try:
+        import tomllib
+    except ImportError:  # pragma: no cover - 3.10 only
+        return []
+    try:
+        data = tomllib.loads(text)
+    except Exception:
+        return []
+    deps: list[str] = []
+    project = data.get("project") or {}
+    if isinstance(project.get("dependencies"), list):
+        deps.extend(str(x) for x in project["dependencies"])
+    optional = project.get("optional-dependencies") or {}
+    if isinstance(optional, dict):
+        for group in optional.values():
+            if isinstance(group, list):
+                deps.extend(str(x) for x in group)
+    pins: list[tuple[str, str]] = []
+    for dep in deps:
+        m = _REQ_LINE.match(dep)
+        if m:
+            pins.append((m.group("name"), m.group("version")))
+    return pins
+
+
+def _discover_plugins(hermes_home: Path) -> list[Component]:
+    """Python deps declared by plugins under ``~/.hermes/plugins``.
+
+    Plugins typically don't install into the venv (they're directory-based
+    with relative imports), so their stated requirements are useful audit
+    surface even when the venv scan misses them.
+    """
+    plugins_dir = hermes_home / "plugins"
+    if not plugins_dir.is_dir():
+        return []
+
+    out: list[Component] = []
+    for plugin_dir in sorted(plugins_dir.iterdir()):
+        if not plugin_dir.is_dir() or plugin_dir.name.startswith("."):
+            continue
+        source = f"plugin:{plugin_dir.name}"
+        for req_file in ("requirements.txt", "requirements-dev.txt"):
+            path = plugin_dir / req_file
+            if path.is_file():
+                try:
+                    pins = _parse_requirements(path.read_text(encoding="utf-8", errors="replace"))
+                except OSError:
+                    continue
+                for name, version in pins:
+                    out.append(Component(name=name, version=version, ecosystem="PyPI", source=source))
+        pyproject = plugin_dir / "pyproject.toml"
+        if pyproject.is_file():
+            try:
+                pins = _parse_pyproject_pins(pyproject.read_text(encoding="utf-8", errors="replace"))
+            except OSError:
+                continue
+            for name, version in pins:
+                out.append(Component(name=name, version=version, ecosystem="PyPI", source=source))
+    return out
+
+
+# npx forms we recognise:
+#   npx -y @scope/pkg@1.2.3
+#   npx --yes pkg@1.2.3
+#   npx pkg@1.2.3 [...args]
+# We deliberately don't try to resolve unversioned names — that maps to
+# "latest" at runtime and isn't a stable audit subject.
+_NPX_PKG = re.compile(r"^(@[A-Za-z0-9._-]+/[A-Za-z0-9._-]+|[A-Za-z0-9._-]+)@([A-Za-z0-9._+-]+)$")
+# uvx forms:
+#   uvx pkg==1.2.3
+#   uvx --with pkg==1.2.3 entrypoint
+_UVX_PKG = re.compile(r"^([A-Za-z0-9][A-Za-z0-9._-]*)==([A-Za-z0-9._+!-]+)$")
+
+
+def _extract_mcp_component(server_name: str, command: str, args: list[str]) -> Optional[Component]:
+    """Best-effort: parse `command/args` into a (name, version, ecosystem).
+
+    Returns None when the entry doesn't pin a version we can audit (local
+    paths, Docker images, unversioned npx, etc.). Audit output stays silent
+    rather than guess.
+    """
+    cmd = (command or "").strip().lower()
+    if not args:
+        return None
+    # npx (any prefix path)
+    if cmd.endswith("npx") or cmd == "npx":
+        # Skip flag tokens until we see the first thing that looks like a pkg ref
+        for token in args:
+            if token.startswith("-"):
+                continue
+            m = _NPX_PKG.match(token)
+            if m:
+                return Component(
+                    name=m.group(1),
+                    version=m.group(2),
+                    ecosystem="npm",
+                    source=f"mcp:{server_name}",
+                )
+            return None  # First non-flag token isn't a pinned ref
+    # uvx (any prefix path)
+    if cmd.endswith("uvx") or cmd == "uvx":
+        for token in args:
+            if token.startswith("-"):
+                continue
+            m = _UVX_PKG.match(token)
+            if m:
+                return Component(
+                    name=m.group(1),
+                    version=m.group(2),
+                    ecosystem="PyPI",
+                    source=f"mcp:{server_name}",
+                )
+            return None
+    return None
+
+
+def _discover_mcp() -> list[Component]:
+    """Pinned MCP server packages from ``config.yaml``."""
+    try:
+        from hermes_cli.mcp_config import _get_mcp_servers
+    except Exception:
+        return []
+
+    out: list[Component] = []
+    servers = _get_mcp_servers()
+    if not isinstance(servers, dict):
+        return []
+    for name, cfg in servers.items():
+        if not isinstance(cfg, dict):
+            continue
+        command = cfg.get("command", "") or ""
+        args = cfg.get("args") or []
+        if not isinstance(args, list):
+            continue
+        comp = _extract_mcp_component(name, command, [str(a) for a in args])
+        if comp is not None:
+            out.append(comp)
+    return out
+
+
+# ─── OSV client ───────────────────────────────────────────────────────────────
+
+
+def _http_post_json(url: str, payload: dict) -> dict:
+    data = json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(
+        url, data=data, headers={"Content-Type": "application/json"}, method="POST"
+    )
+    with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def _http_get_json(url: str) -> dict:
+    req = urllib.request.Request(url, method="GET")
+    with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def _osv_query_batch(components: list[Component]) -> dict[Component, list[str]]:
+    """Return {component -> [osv_id, ...]} for components with any vulns.
+
+    Components without findings are omitted from the result dict.
+    """
+    if not components:
+        return {}
+    findings: dict[Component, list[str]] = {}
+    for chunk_start in range(0, len(components), OSV_BATCH_MAX):
+        chunk = components[chunk_start:chunk_start + OSV_BATCH_MAX]
+        payload = {
+            "queries": [
+                {
+                    "package": {"name": c.name, "ecosystem": c.ecosystem},
+                    "version": c.version,
+                }
+                for c in chunk
+            ]
+        }
+        try:
+            resp = _http_post_json(OSV_BATCH_URL, payload)
+        except (urllib.error.URLError, TimeoutError, ConnectionError) as exc:
+            raise RuntimeError(f"OSV batch query failed: {exc}") from exc
+        results = resp.get("results") or []
+        for comp, result in zip(chunk, results):
+            vulns = (result or {}).get("vulns") or []
+            ids = [v.get("id") for v in vulns if v.get("id")]
+            if ids:
+                findings[comp] = ids
+    return findings
+
+
+def _osv_severity_from_record(record: dict) -> str:
+    """Extract CVSS-derived severity tier from an OSV vuln record."""
+    # OSV puts CVSS in `severity` (top-level or per-affected) and a
+    # human-readable bucket in `database_specific.severity` for GHSAs.
+    db_specific = record.get("database_specific") or {}
+    raw = db_specific.get("severity")
+    if isinstance(raw, str) and raw.strip():
+        upper = raw.strip().upper()
+        if upper in SEVERITY_ORDER:
+            return upper
+    # Fall back to CVSS score → tier
+    score: Optional[float] = None
+    for sev_entry in record.get("severity") or []:
+        s = sev_entry.get("score")
+        if isinstance(s, str):
+            # CVSS vector strings look like "CVSS:3.1/AV:N/..." — we can't
+            # parse without a lib. Look for an explicit numeric in
+            # affected[].ecosystem_specific later if present.
+            continue
+    affected = record.get("affected") or []
+    for entry in affected:
+        eco_spec = entry.get("ecosystem_specific") or {}
+        sev = eco_spec.get("severity")
+        if isinstance(sev, str) and sev.strip().upper() in SEVERITY_ORDER:
+            return sev.strip().upper()
+    if score is not None:
+        if score >= 9.0:
+            return "CRITICAL"
+        if score >= 7.0:
+            return "HIGH"
+        if score >= 4.0:
+            return "MODERATE"
+        if score > 0:
+            return "LOW"
+    return "UNKNOWN"
+
+
+def _osv_fixed_versions(record: dict) -> list[str]:
+    fixes: list[str] = []
+    for entry in record.get("affected") or []:
+        for rng in entry.get("ranges") or []:
+            for event in rng.get("events") or []:
+                if "fixed" in event:
+                    fixes.append(str(event["fixed"]))
+    # Dedupe, preserve order
+    seen: set[str] = set()
+    out: list[str] = []
+    for f in fixes:
+        if f not in seen:
+            seen.add(f)
+            out.append(f)
+    return out
+
+
+def _osv_fetch_details(vuln_ids: Iterable[str]) -> dict[str, Vulnerability]:
+    """Fetch summary/severity for each unique vuln id, in parallel."""
+    unique = sorted({vid for vid in vuln_ids if vid})
+    if not unique:
+        return {}
+    out: dict[str, Vulnerability] = {}
+
+    def _fetch_one(vid: str) -> Vulnerability:
+        try:
+            rec = _http_get_json(OSV_VULN_URL.format(vid=vid))
+        except (urllib.error.URLError, TimeoutError, ConnectionError):
+            return Vulnerability(osv_id=vid)
+        return Vulnerability(
+            osv_id=vid,
+            severity=_osv_severity_from_record(rec),
+            summary=(rec.get("summary") or "").strip(),
+            fixed_versions=_osv_fixed_versions(rec),
+        )
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=DETAIL_PARALLELISM) as pool:
+        for vuln in pool.map(_fetch_one, unique):
+            out[vuln.osv_id] = vuln
+    return out
+
+
+# ─── Orchestration ────────────────────────────────────────────────────────────
+
+
+def run_audit(
+    *,
+    skip_venv: bool = False,
+    skip_plugins: bool = False,
+    skip_mcp: bool = False,
+    hermes_home: Optional[Path] = None,
+) -> list[Finding]:
+    """Discover components, query OSV, return findings sorted by severity desc."""
+    home = hermes_home or Path(get_hermes_home())
+    components: list[Component] = []
+    if not skip_venv:
+        components.extend(_discover_venv())
+    if not skip_plugins:
+        components.extend(_discover_plugins(home))
+    if not skip_mcp:
+        components.extend(_discover_mcp())
+
+    if not components:
+        return []
+
+    raw = _osv_query_batch(components)
+    if not raw:
+        return []
+
+    all_ids: list[str] = []
+    for ids in raw.values():
+        all_ids.extend(ids)
+    details = _osv_fetch_details(all_ids)
+
+    findings: list[Finding] = []
+    for comp, ids in raw.items():
+        for vid in ids:
+            vuln = details.get(vid) or Vulnerability(osv_id=vid)
+            findings.append(Finding(component=comp, vuln=vuln))
+
+    findings.sort(
+        key=lambda f: (
+            -SEVERITY_ORDER.get(f.vuln.severity, 0),
+            f.component.source,
+            f.component.name.lower(),
+            f.vuln.osv_id,
+        )
+    )
+    return findings
+
+
+# ─── Rendering ────────────────────────────────────────────────────────────────
+
+
+def _render_human(findings: list[Finding], total_components: int) -> str:
+    if not findings:
+        return f"No known vulnerabilities found across {total_components} component(s)."
+
+    lines: list[str] = []
+    lines.append(
+        f"Found {len(findings)} known vulnerability finding(s) "
+        f"across {total_components} component(s):"
+    )
+    lines.append("")
+    last_source = None
+    for f in findings:
+        if f.component.source != last_source:
+            lines.append(f"[{f.component.source}]")
+            last_source = f.component.source
+        sev = f.vuln.severity.ljust(8)
+        head = f"  {sev}  {f.component.name}=={f.component.version}  {f.vuln.osv_id}"
+        lines.append(head)
+        if f.vuln.summary:
+            summary = f.vuln.summary
+            if len(summary) > 100:
+                summary = summary[:97] + "..."
+            lines.append(f"           {summary}")
+        if f.vuln.fixed_versions:
+            lines.append(f"           fixed in: {', '.join(f.vuln.fixed_versions[:3])}")
+    return "\n".join(lines)
+
+
+def _render_json(findings: list[Finding], total_components: int) -> str:
+    payload = {
+        "total_components_scanned": total_components,
+        "finding_count": len(findings),
+        "findings": [
+            {
+                "package": f.component.name,
+                "version": f.component.version,
+                "ecosystem": f.component.ecosystem,
+                "source": f.component.source,
+                "vuln_id": f.vuln.osv_id,
+                "severity": f.vuln.severity,
+                "summary": f.vuln.summary,
+                "fixed_versions": f.vuln.fixed_versions,
+            }
+            for f in findings
+        ],
+    }
+    return json.dumps(payload, indent=2)
+
+
+def _count_components(
+    *, skip_venv: bool, skip_plugins: bool, skip_mcp: bool, hermes_home: Path
+) -> int:
+    total = 0
+    if not skip_venv:
+        total += len(_discover_venv())
+    if not skip_plugins:
+        total += len(_discover_plugins(hermes_home))
+    if not skip_mcp:
+        total += len(_discover_mcp())
+    return total
+
+
+# ─── CLI entrypoint ───────────────────────────────────────────────────────────
+
+
+def cmd_security_audit(args: argparse.Namespace) -> int:
+    """Implementation of `hermes security audit`."""
+    home = Path(get_hermes_home())
+    skip_venv = bool(getattr(args, "skip_venv", False))
+    skip_plugins = bool(getattr(args, "skip_plugins", False))
+    skip_mcp = bool(getattr(args, "skip_mcp", False))
+    output_json = bool(getattr(args, "json", False))
+    fail_on = (getattr(args, "fail_on", None) or "critical").upper()
+    if fail_on not in SEVERITY_ORDER:
+        print(
+            f"unknown --fail-on value: {fail_on.lower()} "
+            f"(choose from: low, moderate, high, critical)",
+            file=sys.stderr,
+        )
+        return 2
+
+    total = _count_components(
+        skip_venv=skip_venv, skip_plugins=skip_plugins, skip_mcp=skip_mcp, hermes_home=home
+    )
+    if total == 0:
+        msg = "No components discovered (everything skipped, or empty environment)."
+        if output_json:
+            print(json.dumps({"total_components_scanned": 0, "finding_count": 0, "findings": []}))
+        else:
+            print(msg)
+        return 0
+
+    try:
+        findings = run_audit(
+            skip_venv=skip_venv,
+            skip_plugins=skip_plugins,
+            skip_mcp=skip_mcp,
+            hermes_home=home,
+        )
+    except RuntimeError as exc:
+        print(f"audit failed: {exc}", file=sys.stderr)
+        return 2
+
+    if output_json:
+        print(_render_json(findings, total))
+    else:
+        print(_render_human(findings, total))
+
+    # Exit code: 1 iff any finding meets or exceeds the --fail-on threshold.
+    threshold = SEVERITY_ORDER[fail_on]
+    for f in findings:
+        if SEVERITY_ORDER.get(f.vuln.severity, 0) >= threshold:
+            return 1
+    return 0
--- a/hermes_cli/service_manager.py
+++ b/hermes_cli/service_manager.py
@ -0,0 +1,886 @@
+"""Abstract service manager interface.
+
+Wraps the existing systemd (Linux host), launchd (macOS host), Windows
+Scheduled Task (native Windows host), and s6 (container) backends behind
+a common Protocol. Only the s6 backend supports runtime registration
+(for per-profile gateways) — host backends raise NotImplementedError
+from those methods, and callers MUST check supports_runtime_registration()
+before invoking them.
+
+Host-side call sites (setup wizard, uninstall, status) continue to use
+the existing module-level functions in hermes_cli.gateway and
+hermes_cli.gateway_windows directly. This protocol is a thin facade
+used by new code that needs to be backend-agnostic — specifically the
+profile create/delete hooks (Phase 4) and the s6 dispatch path in
+``hermes gateway start/stop/restart`` when running inside a container.
+"""
+from __future__ import annotations
+
+import re
+from pathlib import Path
+from typing import Literal, Protocol, runtime_checkable
+
+ServiceManagerKind = Literal["systemd", "launchd", "windows", "s6", "none"]
+
+# Profile name → service directory mapping. Profile names must be safe
+# as filesystem directory names because the s6 backend creates a service
+# directory at ``<scandir>/gateway-<profile>/``. We reject anything that
+# could traverse paths, span filesystems, or break s6's own naming rules.
+_VALID_PROFILE_RE = re.compile(r"^[a-z0-9][a-z0-9_-]*$")
+_MAX_PROFILE_LEN = 251  # s6-svscan default name_max
+
+
+def validate_profile_name(name: str) -> None:
+    """Raise ValueError if ``name`` is not usable as a profile name.
+
+    Profile names are used as s6 service directory names, so they must
+    match a conservative subset of filesystem-safe characters. Reject
+    empty strings, uppercase, paths-traversal sequences, and anything
+    longer than s6's default ``name_max``.
+    """
+    if not name:
+        raise ValueError("profile name must not be empty")
+    if len(name) > _MAX_PROFILE_LEN:
+        raise ValueError(
+            f"profile name too long ({len(name)} > {_MAX_PROFILE_LEN})"
+        )
+    if not _VALID_PROFILE_RE.match(name):
+        raise ValueError(
+            f"profile name must match [a-z0-9][a-z0-9_-]*, got {name!r}"
+        )
+
+
+@runtime_checkable
+class ServiceManager(Protocol):
+    """Abstract interface for init-system-specific service operations.
+
+    Lifecycle methods (start / stop / restart / is_running) are
+    implemented by every backend. Runtime registration
+    (register_profile_gateway / unregister_profile_gateway /
+    list_profile_gateways) is implemented only by the s6 backend —
+    callers MUST check ``supports_runtime_registration()`` before
+    invoking the registration methods.
+    """
+
+    kind: ServiceManagerKind
+
+    # Lifecycle of a pre-declared service.
+    def start(self, name: str) -> None: ...
+    def stop(self, name: str) -> None: ...
+    def restart(self, name: str) -> None: ...
+    def is_running(self, name: str) -> bool: ...
+
+    # Runtime registration (s6 only).
+    def supports_runtime_registration(self) -> bool: ...
+    def register_profile_gateway(
+        self,
+        profile: str,
+        *,
+        extra_env: dict[str, str] | None = None,
+    ) -> None: ...
+    def unregister_profile_gateway(self, profile: str) -> None: ...
+    def list_profile_gateways(self) -> list[str]: ...
+
+
+def detect_service_manager() -> ServiceManagerKind:
+    """Detect which service manager is available in this environment.
+
+    Returns:
+        "s6" — inside a container when /init is s6-svscan (Phase 2+)
+        "windows" — native Windows host
+        "launchd" — macOS host
+        "systemd" — Linux host with a working user/system bus
+        "none" — anything else (Termux, sandbox shells, etc.)
+
+    This function does NOT replace ``supports_systemd_services()`` —
+    host call sites continue to use that. It exists for new backend-
+    agnostic code (profile create/delete hooks, the s6 dispatch path
+    in ``hermes gateway start/stop/restart``).
+    """
+    # Imports deferred so importing this module doesn't drag in the
+    # whole gateway dependency graph for callers that only need the
+    # Protocol type or validate_profile_name().
+    from hermes_constants import is_container
+    from hermes_cli.gateway import (
+        is_macos,
+        is_windows,
+        supports_systemd_services,
+    )
+
+    if is_container() and _s6_running():
+        return "s6"
+    if is_windows():
+        return "windows"
+    if is_macos():
+        return "launchd"
+    if supports_systemd_services():
+        return "systemd"
+    return "none"
+
+
+def _s6_running() -> bool:
+    """True when s6-svscan is running as PID 1 in this container.
+
+    Detection has to work for **both** root and the unprivileged hermes
+    user (UID 10000). The obvious probe — ``Path('/proc/1/exe').resolve()``
+    — only works as root: for any other UID, the symlink at
+    ``/proc/1/exe`` is unreadable and ``resolve()`` silently returns the
+    path unchanged, so the resolved name is the literal ``"exe"`` and
+    detection always fails. Since every Hermes runtime call inside the
+    container drops to hermes via ``s6-setuidgid``, that silent failure
+    made the entire service-manager runtime-registration path inert in
+    production (PR #30136 review).
+
+    Probe instead via:
+      * ``/proc/1/comm`` — world-readable, contains the process comm
+        (``s6-svscan`` when s6-overlay is PID 1).
+      * ``/run/s6/basedir`` — s6-overlay-specific directory created by
+        stage1. World-readable. More specific than ``/run/s6`` (which
+        other tools occasionally create).
+
+    Both signals are required; either alone could false-positive
+    (e.g. a container with the s6 binaries installed but a different
+    init, or an unrelated process named ``s6-svscan``).
+    """
+    try:
+        comm = Path("/proc/1/comm").read_text(encoding="utf-8").strip()
+    except OSError:
+        return False
+    if comm != "s6-svscan":
+        return False
+    return Path("/run/s6/basedir").is_dir()
+
+
+# ---------------------------------------------------------------------------
+# Backend wrappers
+#
+# These adapters are thin facades over the existing module-level functions
+# in ``hermes_cli.gateway`` (systemd/launchd) and ``hermes_cli.gateway_windows``
+# (Windows Scheduled Tasks). The protocol's ``name`` parameter is currently
+# unused for host backends — they operate on whichever profile is currently
+# active (set via the ``hermes -p <profile>`` flag before the call). This
+# matches existing host-side semantics; the parameter shape is designed
+# for s6 where each profile maps to a distinct service directory.
+# ---------------------------------------------------------------------------
+
+
+class _RegistrationUnsupportedMixin:
+    """Mixin for host backends that don't support runtime registration."""
+
+    def supports_runtime_registration(self) -> bool:
+        return False
+
+    def register_profile_gateway(
+        self,
+        profile: str,
+        *,
+        extra_env: dict[str, str] | None = None,
+    ) -> None:
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support runtime profile "
+            "gateway registration (container-only feature)"
+        )
+
+    def unregister_profile_gateway(self, profile: str) -> None:
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support runtime profile "
+            "gateway unregistration (container-only feature)"
+        )
+
+    def list_profile_gateways(self) -> list[str]:
+        return []
+
+
+class SystemdServiceManager(_RegistrationUnsupportedMixin):
+    """Thin wrapper around the ``systemd_*`` functions in hermes_cli.gateway.
+
+    Existing host call sites continue to use those functions directly;
+    this wrapper exists for new code that needs to be backend-agnostic
+    (the Phase 4 profile create/delete hooks).
+    """
+
+    kind: ServiceManagerKind = "systemd"
+
+    def start(self, name: str) -> None:
+        from hermes_cli.gateway import systemd_start
+        systemd_start()
+
+    def stop(self, name: str) -> None:
+        from hermes_cli.gateway import systemd_stop
+        systemd_stop()
+
+    def restart(self, name: str) -> None:
+        from hermes_cli.gateway import systemd_restart
+        systemd_restart()
+
+    def is_running(self, name: str) -> bool:
+        from hermes_cli.gateway import _probe_systemd_service_running
+        _, running = _probe_systemd_service_running()
+        return running
+
+
+class LaunchdServiceManager(_RegistrationUnsupportedMixin):
+    """Thin wrapper around the ``launchd_*`` functions in hermes_cli.gateway."""
+
+    kind: ServiceManagerKind = "launchd"
+
+    def start(self, name: str) -> None:
+        from hermes_cli.gateway import launchd_start
+        launchd_start()
+
+    def stop(self, name: str) -> None:
+        from hermes_cli.gateway import launchd_stop
+        launchd_stop()
+
+    def restart(self, name: str) -> None:
+        from hermes_cli.gateway import launchd_restart
+        launchd_restart()
+
+    def is_running(self, name: str) -> bool:
+        from hermes_cli.gateway import _probe_launchd_service_running
+        return _probe_launchd_service_running()
+
+
+class WindowsServiceManager(_RegistrationUnsupportedMixin):
+    """Thin wrapper around ``hermes_cli.gateway_windows`` (Scheduled Task /
+    Startup-folder fallback).
+
+    The native Windows backend uses a Scheduled Task rather than a true
+    init-system service, but for protocol purposes the lifecycle is the
+    same: start / stop / restart / is_running. ``install`` accepts a
+    handful of Windows-specific kwargs (start_now, start_on_login,
+    elevated_handoff) that are passed straight through — non-Windows
+    callers should never invoke ``install`` on this wrapper.
+    """
+
+    kind: ServiceManagerKind = "windows"
+
+    def install(
+        self,
+        *,
+        force: bool = False,
+        start_now: bool | None = None,
+        start_on_login: bool | None = None,
+        elevated_handoff: bool = False,
+    ) -> None:
+        from hermes_cli import gateway_windows
+        gateway_windows.install(
+            force=force,
+            start_now=start_now,
+            start_on_login=start_on_login,
+            elevated_handoff=elevated_handoff,
+        )
+
+    def start(self, name: str) -> None:
+        from hermes_cli import gateway_windows
+        gateway_windows.start()
+
+    def stop(self, name: str) -> None:
+        from hermes_cli import gateway_windows
+        gateway_windows.stop()
+
+    def restart(self, name: str) -> None:
+        from hermes_cli import gateway_windows
+        gateway_windows.restart()
+
+    def is_running(self, name: str) -> bool:
+        from hermes_cli import gateway_windows
+        from hermes_cli.gateway import find_gateway_pids
+        if not gateway_windows.is_installed():
+            return False
+        return bool(find_gateway_pids())
+
+
+def get_service_manager() -> ServiceManager:
+    """Return the ServiceManager instance for the current environment.
+
+    Raises:
+        RuntimeError: when no supported backend is available.
+    """
+    kind = detect_service_manager()
+    if kind == "systemd":
+        return SystemdServiceManager()
+    if kind == "launchd":
+        return LaunchdServiceManager()
+    if kind == "windows":
+        return WindowsServiceManager()
+    if kind == "s6":
+        return S6ServiceManager()
+    raise RuntimeError("no supported service manager detected")
+
+
+# ---------------------------------------------------------------------------
+# S6ServiceManager (container-only)
+#
+# Per-profile gateways are registered dynamically when `hermes profile create`
+# runs inside the container (Phase 4). Static services (main-hermes, dashboard)
+# live in /etc/s6-overlay/s6-rc.d/ and are NOT managed by this class — they're
+# part of the image, not runtime-created.
+# ---------------------------------------------------------------------------
+
+
+# s6-overlay's dynamic scandir for runtime-registered services. Lives on
+# tmpfs and is the directory s6-svscan watches. Writes here trigger
+# automatic supervision on the next rescan.
+S6_DYNAMIC_SCANDIR = Path("/run/service")
+S6_SERVICE_PREFIX = "gateway-"
+
+# s6-overlay installs its binaries under /command/ and only adds that
+# directory to PATH for processes started under the supervision tree
+# (services started by s6-svscan, cont-init.d scripts, etc.). Code
+# that runs via `docker exec` or any other out-of-tree entry point —
+# notably our Phase 4 profile create/delete hooks — inherits the
+# container's base PATH which does NOT include /command/.
+#
+# Rather than asking every caller to fix up its environment, the
+# S6ServiceManager calls s6-* binaries by absolute path via this
+# constant. We don't use `/usr/bin/s6-…` symlinks because the
+# s6-overlay-symlinks-noarch tarball only links a subset, and we
+# want every s6 invocation to be guaranteed-findable.
+_S6_BIN_DIR = "/command"
+
+
+# UID/GID of the in-image ``hermes`` user. Hardcoded to match what
+# ``stage2-hook.sh`` enforces (the runtime invariant — see also
+# tests/docker/test_uid_remap.py). The container starts s6-supervise
+# under root and immediately drops to this UID via ``s6-setuidgid``.
+_HERMES_UID = 10000
+_HERMES_GID = 10000
+
+
+def _seed_supervise_skeleton(svc_dir: Path) -> None:
+    """Pre-create the ``supervise/`` and top-level ``event/`` skeleton
+    inside a service directory, owned by the hermes user.
+
+    Why this exists
+    ---------------
+    When s6-supervise spawns a service it tries to ``mkdir`` two
+    directories: ``<svc>/event`` and ``<svc>/supervise``, both with mode
+    ``0700``. It also ``mkfifo``s ``<svc>/supervise/control`` with mode
+    ``0600``. Because s6-supervise runs as PID 1's effective UID (root)
+    these dirs end up root-owned mode 0700, and an unprivileged client
+    (the ``hermes`` user — UID 10000 — running every Hermes runtime
+    operation via ``s6-setuidgid``) gets ``EACCES`` on any ``s6-svc``,
+    ``s6-svstat``, or ``s6-svwait`` invocation against the slot.
+
+    The PR #30136 review surfaced this as a real product gap: the
+    entire S6ServiceManager lifecycle (``register/start/stop/unregister
+    _profile_gateway``) was inert in production because every operation
+    is dispatched as the hermes user.
+
+    Why this works
+    --------------
+    Reading s6's source (src/supervision/s6-supervise.c::trymkdir +
+    control_init): the ``mkdir`` and ``mkfifo`` calls both treat
+    ``EEXIST`` as success. If the directory is already present, the
+    chown/chmod fix-up that would normally make event/ ``03730
+    root:root`` is **skipped** entirely — s6-supervise just opens the
+    pre-existing FIFOs and proceeds. So if we lay the skeleton down
+    with hermes ownership before triggering ``s6-svscanctl -a``,
+    s6-supervise inherits our layout and never touches it.
+
+    Layout produced
+    ---------------
+    ``svc_dir/``                           hermes:hermes, 0755 (parent must already exist)
+    ``svc_dir/event/``                     hermes:hermes, 03730   (setgid + g+rwx + sticky)
+    ``svc_dir/supervise/``                 hermes:hermes, 0755
+    ``svc_dir/supervise/event/``           hermes:hermes, 03730
+    ``svc_dir/supervise/control``          hermes:hermes, 0660    (FIFO)
+
+    The ``death_tally``, ``lock``, and ``status`` regular files end up
+    written by s6-supervise itself (as root), but those land mode 0644 —
+    world-readable — and ``s6-svstat`` only needs read access, so the
+    hermes user reads them fine.
+
+    If ``svc_dir/log/`` is present (the canonical s6 logger pattern —
+    one s6-supervise instance per service, plus a second for its
+    logger), the same skeleton is seeded under ``log/`` as well:
+    ``log/event/``, ``log/supervise/``, ``log/supervise/event/``,
+    ``log/supervise/control``. Without this, unregister teardown
+    would EACCES on the logger's supervise dir even after the parent
+    slot's supervise/ was hermes-owned.
+
+    Idempotency
+    -----------
+    Safe to call against a directory where the skeleton already exists.
+    Existing entries are left untouched (the helper doesn't try to
+    re-chown / re-chmod live FIFOs that s6-supervise may have already
+    opened).
+
+    Reference
+    ---------
+    Discussed at length on the skarnet `skaware` mailing list in 2020
+    (`<http://skarnet.org/lists/skaware/1424.html>`_); see also
+    just-containers/s6-overlay#130. The pre-creation pattern was
+    historically called out as forward-compatibility-fragile, but the
+    EEXIST handling in s6-supervise has been stable since 2015 — it's
+    the same pattern ``s6-svperms`` and ``fix-attrs.d`` rely on.
+    """
+    import os
+
+    def _mkdir_owned(path: Path, mode: int) -> None:
+        if path.exists():
+            return
+        path.mkdir(parents=False, exist_ok=False)
+        path.chmod(mode)
+        try:
+            os.chown(path, _HERMES_UID, _HERMES_GID)
+        except PermissionError:
+            # Running as the hermes user already — directory is hermes-
+            # owned by default. The chown is a no-op in that case, so
+            # swallowing this keeps both root and unprivileged callers
+            # on one code path.
+            pass
+
+    # Top-level event/ dir (this is the s6-svlisten1 event-subscription
+    # dir at the service root, distinct from supervise/event/).
+    _mkdir_owned(svc_dir / "event", 0o3730)
+
+    # supervise/ dir + its inner event/ dir.
+    supervise = svc_dir / "supervise"
+    _mkdir_owned(supervise, 0o755)
+    _mkdir_owned(supervise / "event", 0o3730)
+
+    # supervise/control FIFO. Same EEXIST-safe pattern: if it's already
+    # there (s6-supervise has already started against this slot), leave
+    # it alone. The explicit chmod after mkfifo is required because
+    # mkfifo honors the process umask, which can strip group-write
+    # (e.g. the default 0022 on most dev hosts → 0o660 becomes 0o640).
+    # The container runs with umask 0 inside s6-overlay's stage2, but
+    # being defensive here keeps the helper consistent under any
+    # invocation context.
+    control = supervise / "control"
+    if not control.exists():
+        os.mkfifo(control, 0o660)
+        control.chmod(0o660)
+        try:
+            os.chown(control, _HERMES_UID, _HERMES_GID)
+        except PermissionError:
+            pass
+
+    # If a log/ subdir is present (the canonical s6 logger pattern —
+    # see servicedir(7)), it gets its own s6-supervise instance and
+    # needs the same skeleton. Without this, unregister teardown
+    # would EACCES on the logger's root-owned supervise/ dir even
+    # when the parent slot's supervise/ is hermes-owned.
+    log_dir = svc_dir / "log"
+    if log_dir.is_dir():
+        _mkdir_owned(log_dir / "event", 0o3730)
+        log_supervise = log_dir / "supervise"
+        _mkdir_owned(log_supervise, 0o755)
+        _mkdir_owned(log_supervise / "event", 0o3730)
+        log_control = log_supervise / "control"
+        if not log_control.exists():
+            os.mkfifo(log_control, 0o660)
+            log_control.chmod(0o660)
+            try:
+                os.chown(log_control, _HERMES_UID, _HERMES_GID)
+            except PermissionError:
+                pass
+
+
+class S6Error(RuntimeError):
+    """Base error for S6ServiceManager lifecycle failures.
+
+    Concrete subclasses carry the slot name (and, where useful, the
+    underlying subprocess output) so the CLI can render an actionable
+    message instead of leaking a raw ``CalledProcessError`` traceback.
+    """
+
+    def __init__(self, message: str, *, service: str | None = None) -> None:
+        super().__init__(message)
+        self.service = service
+
+
+class GatewayNotRegisteredError(S6Error):
+    """Raised when a lifecycle method targets a slot that doesn't exist.
+
+    Most commonly: ``hermes -p typo gateway start`` when no profile
+    ``typo`` exists. Carries the unprefixed profile name (not the
+    full ``gateway-<profile>`` service-dir name) so callers can phrase
+    a user-facing message like "no such gateway 'typo'".
+    """
+
+    def __init__(self, profile: str) -> None:
+        self.profile = profile
+        super().__init__(
+            f"no such gateway {profile!r}: register it with "
+            f"`hermes profile create {profile}` first, or pass "
+            "an existing profile name via `-p <name>`",
+            service=f"gateway-{profile}",
+        )
+
+
+class S6CommandError(S6Error):
+    """Raised when an s6 command fails for a reason other than a
+    missing slot — e.g. permission denied on the supervise control
+    FIFO, or s6-svc returning a non-zero exit for an unexpected
+    reason. Carries the stderr from the failing command so callers
+    can surface it.
+    """
+
+    def __init__(
+        self, *, service: str, action: str, returncode: int, stderr: str,
+    ) -> None:
+        self.action = action
+        self.returncode = returncode
+        self.stderr = stderr
+        message = (
+            f"s6-svc {action} on {service!r} failed (rc={returncode})"
+        )
+        if stderr.strip():
+            message += f": {stderr.strip()}"
+        super().__init__(message, service=service)
+
+
+class S6ServiceManager:
+    """Per-profile gateway supervision via s6-overlay.
+
+    Only handles runtime-registered services under
+    ``S6_DYNAMIC_SCANDIR``. Static services (main-hermes, dashboard)
+    are managed by s6-rc at image-build time and are out of scope.
+    """
+
+    kind: ServiceManagerKind = "s6"
+
+    def __init__(self, scandir: Path = S6_DYNAMIC_SCANDIR) -> None:
+        self.scandir = scandir
+
+    # -- internal helpers --------------------------------------------------
+
+    def _service_dir(self, profile: str) -> Path:
+        validate_profile_name(profile)
+        return self.scandir / f"{S6_SERVICE_PREFIX}{profile}"
+
+    def _service_name(self, profile: str) -> str:
+        return f"{S6_SERVICE_PREFIX}{profile}"
+
+    @staticmethod
+    def _render_run_script(
+        profile: str,
+        extra_env: dict[str, str],
+    ) -> str:
+        """Generate the run script for a profile-gateway s6 service.
+
+        The script:
+          1. Sources HERMES_HOME (and any extra env) via with-contenv —
+             so e.g. ``-e HERMES_HOME=/data/hermes`` is honored at run
+             time, not Python-substituted at registration time (OQ8-C).
+          2. Activates the bundled venv.
+          3. Drops to the hermes user and exec's
+             ``hermes -p <profile> gateway run`` (or just ``hermes
+             gateway run`` for the default profile — see below).
+
+        Special case: ``profile == "default"`` emits ``hermes gateway
+        run`` with **no** ``-p`` flag. This is the sentinel for "the
+        root HERMES_HOME profile" (the implicit profile that exists at
+        the top of $HERMES_HOME, not under profiles/). It must be
+        spelled this way because ``_profile_suffix()`` returns the
+        empty string for the root profile, and the dispatcher in
+        ``hermes_cli.gateway`` maps that empty string to the
+        ``gateway-default`` service slot. Passing ``-p default`` here
+        would instead look up ``$HERMES_HOME/profiles/default/`` — a
+        completely different (and almost always nonexistent) profile.
+
+        Port selection: the gateway picks its bind port from the
+        profile's ``config.yaml`` (``[gateway] port = ...``) — that
+        is the single source of truth. Previously this method took a
+        ``port`` parameter that was passed in but never substituted
+        into the rendered script (it was carried in for "API parity"
+        with a deterministic SHA-256 allocator in
+        ``hermes_cli.profiles._allocate_gateway_port``). PR #30136
+        review item I5 retired both the allocator and the parameter
+        because they were dead code through the entire stack.
+        """
+        import shlex
+        lines = [
+            "#!/command/with-contenv sh",
+            "# shellcheck shell=sh",
+            "set -e",
+            "cd /opt/data",
+            ". /opt/hermes/.venv/bin/activate",
+        ]
+        for k, v in sorted(extra_env.items()):
+            lines.append(f"export {k}={shlex.quote(v)}")
+        if profile == "default":
+            lines.append("exec s6-setuidgid hermes hermes gateway run")
+        else:
+            lines.append(
+                f"exec s6-setuidgid hermes hermes -p {shlex.quote(profile)} gateway run"
+            )
+        return "\n".join(lines) + "\n"
+
+    @staticmethod
+    def _render_log_run(profile: str) -> str:
+        """Generate the log/run script for a profile-gateway service.
+
+        OQ8-C: persist to ``${HERMES_HOME}/logs/gateways/<profile>/``.
+        CRITICAL: the HERMES_HOME path is sourced from the runtime env
+        via with-contenv — NOT Python-substituted at registration time
+        — so a container started with ``-e HERMES_HOME=/data/hermes``
+        gets its logs under /data/hermes/logs/..., not the build-time
+        default.
+        """
+        import shlex
+        prof = shlex.quote(profile)
+        return (
+            f"#!/command/with-contenv sh\n"
+            f"# shellcheck shell=sh\n"
+            f': "${{HERMES_HOME:=/opt/data}}"\n'
+            f'log_dir="$HERMES_HOME/logs/gateways/{prof}"\n'
+            f'mkdir -p "$log_dir"\n'
+            f'chown -R hermes:hermes "$log_dir" 2>/dev/null || true\n'
+            f'exec s6-setuidgid hermes s6-log n10 s1000000 T "$log_dir"\n'
+        )
+
+    # -- lifecycle ---------------------------------------------------------
+
+    def _run_svc(self, action_flag: str, action_label: str, name: str) -> None:
+        """Shared lifecycle dispatch for start / stop / restart.
+
+        Translates the two failure modes operators care about into
+        named errors:
+
+        * ``GatewayNotRegisteredError`` — the service directory at
+          ``<scandir>/<name>/`` doesn't exist. ``s6-svc`` would
+          exit non-zero with a fairly opaque message; we pre-empt
+          it with a clear "no such gateway 'X'" tied to the profile
+          name (without the ``gateway-`` prefix).
+        * ``S6CommandError`` — anything else (EACCES on the
+          supervise control FIFO, timeout, etc.). Carries the
+          subprocess return code and stderr so callers can render
+          them inline.
+
+        ``action_flag`` is the ``s6-svc`` flag (``-u`` / ``-d`` /
+        ``-t``); ``action_label`` is the human verb (``start`` /
+        ``stop`` / ``restart``) used in error messages.
+        """
+        import subprocess
+
+        service_dir = self.scandir / name
+        if not service_dir.is_dir():
+            # Strip the gateway- prefix back off so the message
+            # matches what the user typed on the CLI (``-p <profile>``).
+            profile = (
+                name[len(S6_SERVICE_PREFIX):]
+                if name.startswith(S6_SERVICE_PREFIX)
+                else name
+            )
+            raise GatewayNotRegisteredError(profile)
+
+        try:
+            subprocess.run(
+                [f"{_S6_BIN_DIR}/s6-svc", action_flag, str(service_dir)],
+                check=True, capture_output=True, text=True, timeout=5,
+            )
+        except subprocess.CalledProcessError as exc:
+            raise S6CommandError(
+                service=name,
+                action=action_label,
+                returncode=exc.returncode,
+                stderr=exc.stderr or "",
+            ) from exc
+
+    def start(self, name: str) -> None:
+        """Bring up a registered service (``s6-svc -u``).
+
+        Raises:
+            GatewayNotRegisteredError: no service directory for ``name``.
+            S6CommandError: s6-svc exited non-zero for any other reason
+                (permission denied on the supervise FIFO, timeout, etc.).
+        """
+        self._run_svc("-u", "start", name)
+
+    def stop(self, name: str) -> None:
+        """Bring down a registered service (``s6-svc -d``).
+
+        Raises:
+            GatewayNotRegisteredError: no service directory for ``name``.
+            S6CommandError: s6-svc exited non-zero for any other reason.
+        """
+        self._run_svc("-d", "stop", name)
+
+    def restart(self, name: str) -> None:
+        """Restart a registered service (``s6-svc -t`` = SIGTERM).
+
+        Raises:
+            GatewayNotRegisteredError: no service directory for ``name``.
+            S6CommandError: s6-svc exited non-zero for any other reason.
+        """
+        self._run_svc("-t", "restart", name)
+
+    def is_running(self, name: str) -> bool:
+        """True iff ``s6-svstat`` reports the service as up."""
+        import subprocess
+        result = subprocess.run(
+            [f"{_S6_BIN_DIR}/s6-svstat", str(self.scandir / name)],
+            capture_output=True, text=True, timeout=5,
+        )
+        return result.returncode == 0 and "up " in result.stdout
+
+    # -- runtime registration ---------------------------------------------
+
+    def supports_runtime_registration(self) -> bool:
+        return True
+
+    def register_profile_gateway(
+        self,
+        profile: str,
+        *,
+        extra_env: dict[str, str] | None = None,
+    ) -> None:
+        """Create the s6 service directory for a profile gateway.
+
+        Triggers ``s6-svscanctl -a`` so s6-svscan picks the new directory
+        up immediately. The service is created in the *up* state — to
+        register without auto-starting, follow up with ``stop(profile)``
+        (or pass the start flag via the future ``start_now=False`` arg,
+        which the Phase 4 reconciliation path uses via a ``down``
+        marker file written directly).
+
+        Raises:
+            ValueError: if the profile name is invalid or the service
+                directory already exists.
+            RuntimeError: if ``s6-svscanctl`` fails.
+        """
+        import shutil
+        import subprocess
+
+        svc_dir = self._service_dir(profile)
+        if svc_dir.exists():
+            raise ValueError(
+                f"profile gateway {profile!r} already registered at {svc_dir}"
+            )
+
+        # Build the service directory atomically: write to a sibling
+        # temp dir, then rename. Avoids s6-svscan observing a half-
+        # populated directory on a fast rescan.
+        tmp_dir = svc_dir.with_name(svc_dir.name + ".tmp")
+        if tmp_dir.exists():
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+        tmp_dir.mkdir(parents=True)
+
+        try:
+            (tmp_dir / "type").write_text("longrun\n")
+
+            run_script = self._render_run_script(profile, extra_env or {})
+            run_path = tmp_dir / "run"
+            run_path.write_text(run_script)
+            run_path.chmod(0o755)
+
+            # Persistent log rotation (OQ8-C).
+            log_subdir = tmp_dir / "log"
+            log_subdir.mkdir()
+            log_run = log_subdir / "run"
+            log_run.write_text(self._render_log_run(profile))
+            log_run.chmod(0o755)
+
+            # Pre-create the supervise/ skeleton with hermes ownership
+            # BEFORE we publish the slot. s6-supervise will EEXIST our
+            # dirs/FIFOs and inherit the ownership, so the runtime
+            # s6-svc / s6-svstat / s6-svwait calls (all dispatched as
+            # the hermes user) won't hit EACCES on root-owned 0700
+            # dirs. See ``_seed_supervise_skeleton`` for the full
+            # rationale.
+            _seed_supervise_skeleton(tmp_dir)
+
+            tmp_dir.rename(svc_dir)
+        except Exception:
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+            raise
+
+        # Trigger rescan so s6-svscan picks up the new service.
+        result = subprocess.run(
+            [f"{_S6_BIN_DIR}/s6-svscanctl", "-a", str(self.scandir)],
+            capture_output=True, text=True, timeout=5,
+        )
+        if result.returncode != 0:
+            # Clean up: rescan failed, leave the directory in place would
+            # be confusing (no supervisor watching it).
+            shutil.rmtree(svc_dir, ignore_errors=True)
+            raise RuntimeError(
+                f"s6-svscanctl failed: {result.stderr or result.stdout}"
+            )
+
+    def unregister_profile_gateway(self, profile: str) -> None:
+        """Stop the profile gateway service and remove its directory.
+
+        Idempotent: absent services are a no-op. Best-effort stop +
+        wait-for-down before removal so the running gateway process
+        gets a chance to shut down cleanly before its service dir
+        disappears.
+
+        Teardown ordering matters: ``s6-svscanctl -an`` is fired
+        **before** ``rmtree`` so s6-svscan reaps the supervise child
+        process (releasing its handle on ``supervise/lock`` and the
+        regular files inside the supervise dir), giving us a clean
+        directory to remove. Without the reap-first ordering, the
+        rmtree races s6-supervise on a set of root-owned files inside
+        the supervise dir and the dir is left half-removed.
+        """
+        import shutil
+        import subprocess
+        import time
+
+        svc_dir = self._service_dir(profile)
+        if not svc_dir.exists():
+            return
+
+        # Stop the service (best effort — service may already be down).
+        subprocess.run(
+            [f"{_S6_BIN_DIR}/s6-svc", "-d", str(svc_dir)],
+            capture_output=True, text=True, timeout=5,
+            check=False,
+        )
+        # Wait for it to actually go down (up to 10s).
+        subprocess.run(
+            [f"{_S6_BIN_DIR}/s6-svwait", "-D", "-t", "10000", str(svc_dir)],
+            capture_output=True, text=True, timeout=15,
+            check=False,
+        )
+
+        # Reap the supervise child FIRST: -n tells s6-svscan to drop
+        # any supervise processes whose service dir is gone (which
+        # includes any service dir we're about to remove). This
+        # releases the file handles s6-supervise holds against the
+        # supervise/lock + supervise/status + supervise/death_tally
+        # files inside the slot, so the upcoming rmtree doesn't race.
+        subprocess.run(
+            [f"{_S6_BIN_DIR}/s6-svscanctl", "-an", str(self.scandir)],
+            capture_output=True, text=True, timeout=5,
+            check=False,
+        )
+        # Give s6-svscan a moment to reap. There's no synchronous
+        # "scan completed" handshake — the -a/-n trigger just sets a
+        # flag s6-svscan reads on its next loop iteration. 200ms is
+        # comfortably above the loop's resolution but well under any
+        # user-perceived latency.
+        time.sleep(0.2)
+
+        # Now the supervise dir's files are no longer held open by a
+        # live s6-supervise, so rmtree can remove them. Files inside
+        # supervise/ are root-owned (death_tally, lock, status, written
+        # by s6-supervise itself) — but the parent supervise/ directory
+        # is hermes-owned (see ``_seed_supervise_skeleton``), and on
+        # POSIX you only need write+execute on the parent to remove
+        # contained files regardless of file ownership.
+        shutil.rmtree(svc_dir, ignore_errors=True)
+
+    def list_profile_gateways(self) -> list[str]:
+        """Return the profile names of all currently-registered gateway services.
+
+        Filters the scandir to entries that match the ``gateway-`` prefix.
+        Other services (e.g. ``s6-linux-init-shutdownd``) are ignored.
+        """
+        if not self.scandir.exists():
+            return []
+        profiles: list[str] = []
+        for entry in self.scandir.iterdir():
+            if entry.name.startswith("."):
+                continue
+            if not entry.is_dir():
+                continue
+            if not entry.name.startswith(S6_SERVICE_PREFIX):
+                continue
+            profiles.append(entry.name[len(S6_SERVICE_PREFIX):])
+        return profiles
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -104,7 +104,7 @@ _DEFAULT_PROVIDER_MODELS = {
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
    "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
-    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"],
+    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.7-max", "qwen3.6-plus", "qwen3.5-plus"],
    "huggingface": [
        "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@ -161,6 +161,7 @@ from hermes_cli.cli_output import (  # noqa: E402
    print_success,
    print_warning,
 )
+from hermes_cli.secret_prompt import masked_secret_prompt  # noqa: E402


 def is_interactive_stdin() -> bool:
@ -202,9 +203,7 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:

    try:
        if password:
-            import getpass
-
-            value = getpass.getpass(color(display, Colors.YELLOW))
+            value = masked_secret_prompt(color(display, Colors.YELLOW))
        else:
            value = input(color(display, Colors.YELLOW))

@ -1094,7 +1093,7 @@ def _xai_oauth_logged_in_for_setup() -> bool:
    """True iff xAI Grok OAuth credentials are already stored locally.

    Lets TTS / STT setup skip the API-key prompt for users who logged in
-    through ``hermes model`` -> xAI Grok OAuth (SuperGrok Subscription).
+    through ``hermes model`` -> xAI Grok OAuth (SuperGrok / Premium+).
    """
    try:
        from hermes_cli.auth import get_xai_oauth_auth_status
@ -1124,7 +1123,7 @@ def _run_xai_oauth_login_from_setup() -> bool:

    open_browser = not _is_remote_session()
    print()
-    print_info("Signing in to xAI Grok OAuth (SuperGrok Subscription)...")
+    print_info("Signing in to xAI Grok OAuth (SuperGrok / Premium+)...")
    try:
        creds = _xai_oauth_loopback_login(open_browser=open_browser)
        _save_xai_oauth_tokens(
@ -1259,7 +1258,7 @@ def _setup_tts_provider(config: dict):

        if oauth_logged_in:
            print_success(
-                "xAI TTS will use your xAI Grok OAuth (SuperGrok Subscription) "
+                "xAI TTS will use your xAI Grok OAuth (SuperGrok / Premium+) "
                "credentials"
            )
        elif existing_api_key:
@ -1269,7 +1268,7 @@ def _setup_tts_provider(config: dict):
            choice_idx = prompt_choice(
                "How do you want xAI TTS to authenticate?",
                choices=[
-                    "Sign in with xAI Grok OAuth (SuperGrok Subscription) — browser login",
+                    "Sign in with xAI Grok OAuth (SuperGrok / Premium+) — browser login",
                    "Paste an xAI API key (console.x.ai)",
                    "Skip → fallback to Edge TTS",
                ],
@ -2034,74 +2033,6 @@ def _setup_telegram():
            save_env_value("TELEGRAM_HOME_CHANNEL", home_channel)


-def _setup_discord():
-    """Configure Discord bot credentials and allowlist."""
-    print_header("Discord")
-    existing = get_env_value("DISCORD_BOT_TOKEN")
-    if existing:
-        print_info("Discord: already configured")
-        if not prompt_yes_no("Reconfigure Discord?", False):
-            if not get_env_value("DISCORD_ALLOWED_USERS"):
-                print_info("⚠️  Discord has no user allowlist - anyone can use your bot!")
-                if prompt_yes_no("Add allowed users now?", True):
-                    print_info("   To find Discord ID: Enable Developer Mode, right-click name → Copy ID")
-                    allowed_users = prompt("Allowed user IDs (comma-separated)")
-                    if allowed_users:
-                        cleaned_ids = _clean_discord_user_ids(allowed_users)
-                        save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
-                        print_success("Discord allowlist configured")
-            return
-
-    print_info("Create a bot at https://discord.com/developers/applications")
-    token = prompt("Discord bot token", password=True)
-    if not token:
-        return
-    save_env_value("DISCORD_BOT_TOKEN", token)
-    print_success("Discord token saved")
-
-    print()
-    print_info("🔒 Security: Restrict who can use your bot")
-    print_info("   To find your Discord user ID:")
-    print_info("   1. Enable Developer Mode in Discord settings")
-    print_info("   2. Right-click your name → Copy ID")
-    print()
-    print_info("   You can also use Discord usernames (resolved on gateway start).")
-    print()
-    allowed_users = prompt(
-        "Allowed user IDs or usernames (comma-separated, leave empty for open access)"
-    )
-    if allowed_users:
-        cleaned_ids = _clean_discord_user_ids(allowed_users)
-        save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
-        print_success("Discord allowlist configured")
-    else:
-        print_info("⚠️  No allowlist set - anyone in servers with your bot can use it!")
-
-    print()
-    print_info("📬 Home Channel: where Hermes delivers cron job results,")
-    print_info("   cross-platform messages, and notifications.")
-    print_info("   To get a channel ID: right-click a channel → Copy Channel ID")
-    print_info("   (requires Developer Mode in Discord settings)")
-    print_info("   You can also set this later by typing /set-home in a Discord channel.")
-    home_channel = prompt("Home channel ID (leave empty to set later with /set-home)")
-    if home_channel:
-        save_env_value("DISCORD_HOME_CHANNEL", home_channel)
-
-
-def _clean_discord_user_ids(raw: str) -> list:
-    """Strip common Discord mention prefixes from a comma-separated ID string."""
-    cleaned = []
-    for uid in raw.replace(" ", "").split(","):
-        uid = uid.strip()
-        if uid.startswith("<@") and uid.endswith(">"):
-            uid = uid.lstrip("<@!").rstrip(">")
-        if uid.lower().startswith("user:"):
-            uid = uid[5:]
-        if uid:
-            cleaned.append(uid)
-    return cleaned
-
-
 def _setup_slack():
    """Configure Slack bot credentials."""
    print_header("Slack")
@ -2256,28 +2187,58 @@ def _setup_matrix():
            print_success("E2EE enabled")

        matrix_pkg = "mautrix[encryption]" if want_e2ee else "mautrix"
+        # Use the central lazy-deps feature group so we install ALL of
+        # platform.matrix's dependencies (mautrix, Markdown, aiosqlite,
+        # asyncpg, aiohttp-socks) — not just mautrix itself.  The previous
+        # hand-rolled ``pip install mautrix[encryption]`` left asyncpg /
+        # aiosqlite uninstalled and broke E2EE connect with
+        # ``No module named 'asyncpg'`` on every fresh install (#31116).
        try:
-            __import__("mautrix")
+            from tools.lazy_deps import ensure as _lazy_ensure, feature_missing
+            _missing_before = feature_missing("platform.matrix")
+            if _missing_before:
+                print_info(
+                    f"Installing {matrix_pkg} (+ {len(_missing_before)} runtime deps)..."
+                )
+                try:
+                    _lazy_ensure("platform.matrix", prompt=False)
+                    print_success(f"{matrix_pkg} installed")
+                except Exception as exc:
+                    print_warning(
+                        f"Install failed — run manually: pip install "
+                        f"'mautrix[encryption]' asyncpg aiosqlite Markdown "
+                        f"aiohttp-socks"
+                    )
+                    print_info(f"  Error: {exc}")
        except ImportError:
-            print_info(f"Installing {matrix_pkg}...")
-            import subprocess
-            uv_bin = shutil.which("uv")
-            if uv_bin:
-                result = subprocess.run(
-                    [uv_bin, "pip", "install", "--python", sys.executable, matrix_pkg],
-                    capture_output=True, text=True,
-                )
-            else:
-                result = subprocess.run(
-                    [sys.executable, "-m", "pip", "install", matrix_pkg],
-                    capture_output=True, text=True,
-                )
-            if result.returncode == 0:
-                print_success(f"{matrix_pkg} installed")
-            else:
-                print_warning(f"Install failed — run manually: pip install '{matrix_pkg}'")
-                if result.stderr:
-                    print_info(f"  Error: {result.stderr.strip().splitlines()[-1]}")
+            # tools.lazy_deps unavailable (extreme edge case — partial
+            # install).  Fall back to the legacy single-package install
+            # path so the wizard still does *something*.
+            try:
+                __import__("mautrix")
+            except ImportError:
+                print_info(f"Installing {matrix_pkg}...")
+                import subprocess
+                uv_bin = shutil.which("uv")
+                if uv_bin:
+                    result = subprocess.run(
+                        [uv_bin, "pip", "install", "--python", sys.executable, matrix_pkg],
+                        capture_output=True, text=True,
+                    )
+                else:
+                    result = subprocess.run(
+                        [sys.executable, "-m", "pip", "install", matrix_pkg],
+                        capture_output=True, text=True,
+                    )
+                if result.returncode == 0:
+                    print_success(f"{matrix_pkg} installed")
+                else:
+                    print_warning(
+                        f"Install failed — run manually: pip install "
+                        f"'{matrix_pkg}' asyncpg aiosqlite Markdown aiohttp-socks"
+                    )
+                    if result.stderr:
+                        print_info(f"  Error: {result.stderr.strip().splitlines()[-1]}")

        print()
        print_info("🔒 Security: Restrict who can use your bot")
@ -2299,50 +2260,6 @@ def _setup_matrix():
            save_env_value("MATRIX_HOME_ROOM", home_room)


-def _setup_mattermost():
-    """Configure Mattermost bot credentials."""
-    print_header("Mattermost")
-    existing = get_env_value("MATTERMOST_TOKEN")
-    if existing:
-        print_info("Mattermost: already configured")
-        if not prompt_yes_no("Reconfigure Mattermost?", False):
-            return
-
-    print_info("Works with any self-hosted Mattermost instance.")
-    print_info("   1. In Mattermost: Integrations → Bot Accounts → Add Bot Account")
-    print_info("   2. Copy the bot token")
-    print()
-    mm_url = prompt("Mattermost server URL (e.g. https://mm.example.com)")
-    if mm_url:
-        save_env_value("MATTERMOST_URL", mm_url.rstrip("/"))
-    token = prompt("Bot token", password=True)
-    if not token:
-        return
-    save_env_value("MATTERMOST_TOKEN", token)
-    print_success("Mattermost token saved")
-
-    print()
-    print_info("🔒 Security: Restrict who can use your bot")
-    print_info("   To find your user ID: click your avatar → Profile")
-    print_info("   or use the API: GET /api/v4/users/me")
-    print()
-    allowed_users = prompt("Allowed user IDs (comma-separated, leave empty for open access)")
-    if allowed_users:
-        save_env_value("MATTERMOST_ALLOWED_USERS", allowed_users.replace(" ", ""))
-        print_success("Mattermost allowlist configured")
-    else:
-        print_info("⚠️  No allowlist set - anyone who can message the bot can use it!")
-
-    print()
-    print_info("📬 Home Channel: where Hermes delivers cron job results and notifications.")
-    print_info("   To get a channel ID: click channel name → View Info → copy the ID")
-    print_info("   You can also set this later by typing /set-home in a Mattermost channel.")
-    home_channel = prompt("Home channel ID (leave empty to set later with /set-home)")
-    if home_channel:
-        save_env_value("MATTERMOST_HOME_CHANNEL", home_channel)
-    print_info("   Open config in your editor:  hermes config edit")
-
-
 def _setup_bluebubbles():
    """Configure BlueBubbles iMessage gateway."""
    print_header("BlueBubbles (iMessage)")
@ -3128,6 +3045,119 @@ SETUP_SECTIONS = [
 ]


+def _run_portal_one_shot(config: dict) -> None:
+    """One-shot Nous Portal setup — OAuth + provider switch + Tool Gateway.
+
+    Wired into ``hermes setup --portal``. Does NOT prompt for anything
+    besides what the underlying OAuth + Tool Gateway prompts already need.
+    Designed to be shareable as a single command (``hermes setup --portal``)
+    that gets a brand-new user from zero to a fully working Hermes session
+    with web/image/tts/browser tools all routed via their Portal sub.
+    """
+    from types import SimpleNamespace
+
+    from hermes_cli.auth_commands import auth_add_command
+    from hermes_cli.config import save_config
+    from hermes_cli.auth import get_nous_auth_status
+    from hermes_cli.nous_subscription import prompt_enable_tool_gateway
+
+    print()
+    print(
+        color(
+            "┌─────────────────────────────────────────────────────────┐",
+            Colors.MAGENTA,
+        )
+    )
+    print(color("│     ⚕ Hermes Setup — Nous Portal (one-shot)             │", Colors.MAGENTA))
+    print(
+        color(
+            "└─────────────────────────────────────────────────────────┘",
+            Colors.MAGENTA,
+        )
+    )
+    print()
+    print_info("  One subscription, 300+ models, plus the Tool Gateway:")
+    print_info("    web search, image generation, TTS, browser automation")
+    print_info("    — all routed through your Nous Portal sub.")
+    print()
+    print_info("  Sign up: https://portal.nousresearch.com/manage-subscription")
+    print()
+
+    # Skip OAuth if already logged in (don't re-prompt every time the user
+    # runs `hermes setup --portal` after a successful first run).
+    already_logged_in = False
+    try:
+        already_logged_in = bool((get_nous_auth_status() or {}).get("logged_in"))
+    except Exception:
+        already_logged_in = False
+
+    if already_logged_in:
+        print_success("  Already logged into Nous Portal.")
+    else:
+        # Hand off to the shared auth wiring so the device-code flow is
+        # identical to `hermes auth add nous --type oauth`. SimpleNamespace
+        # mirrors the argparse Namespace contract that auth_add_command expects.
+        ns = SimpleNamespace(
+            provider="nous",
+            auth_type="oauth",
+            label=None,
+            api_key=None,
+            portal_url=None,
+            inference_url=None,
+            client_id=None,
+            scope=None,
+            no_browser=False,
+            timeout=None,
+            insecure=False,
+            ca_bundle=None,
+            min_key_ttl_seconds=5 * 60,
+        )
+        try:
+            auth_add_command(ns)
+        except SystemExit as e:
+            print()
+            print_error(f"  Nous Portal login failed (exit {e.code}).")
+            print_info("  You can retry later with `hermes auth add nous --type oauth`.")
+            return
+        except (KeyboardInterrupt, EOFError):
+            print()
+            print_info("  Setup cancelled.")
+            return
+        except Exception as exc:
+            print()
+            print_error(f"  Nous Portal login failed: {exc}")
+            print_info("  You can retry later with `hermes auth add nous --type oauth`.")
+            return
+
+    # Set provider → nous so the model picker, status surfaces, and
+    # managed-tool gating all light up. Leave model.model empty so the
+    # runtime picks Nous's default model; the user can change it later
+    # with `hermes model`.
+    model_cfg = config.get("model")
+    if not isinstance(model_cfg, dict):
+        model_cfg = {}
+        config["model"] = model_cfg
+    model_cfg["provider"] = "nous"
+    save_config(config)
+    print()
+    print_success("  Nous set as your inference provider.")
+
+    # Offer the Tool Gateway opt-in (single Y/n) — same flow that fires
+    # from `hermes model` after picking Nous.
+    print()
+    try:
+        prompt_enable_tool_gateway(config)
+    except (KeyboardInterrupt, EOFError):
+        pass
+    except Exception as exc:
+        print_warning(f"  Tool Gateway prompt skipped: {exc}")
+
+    print()
+    print_success("Portal setup complete.")
+    print_info("  Run `hermes portal status` to inspect routing.")
+    print_info("  Run `hermes` to start chatting.")
+
+
 def run_setup_wizard(args):
    """Run the interactive setup wizard.

@ -3183,6 +3213,11 @@ def run_setup_wizard(args):
        )
        return

+    # --portal: one-shot Nous Portal setup. Skips the rest of the wizard.
+    if bool(getattr(args, "portal", False)):
+        _run_portal_one_shot(config)
+        return
+
    # Check if a specific section was requested
    section = getattr(args, "section", None)
    if section:
--- a/hermes_cli/skills_hub.py
+++ b/hermes_cli/skills_hub.py
@ -23,6 +23,7 @@ from rich.table import Table
 # Lazy imports to avoid circular dependencies and slow startup.
 # tools.skills_hub and tools.skills_guard are imported inside functions.
 from hermes_constants import display_hermes_home
+from agent.skill_utils import is_excluded_skill_path

 _console = Console()

@ -178,9 +179,12 @@ def _existing_categories() -> List[str]:
            # top level (no category); otherwise treat as a category bucket.
            if (entry / "SKILL.md").exists():
                continue
-            # Has at least one nested SKILL.md?
+            # Has at least one nested SKILL.md (excluding dependency/cache dirs)?
            try:
-                if any(entry.rglob("SKILL.md")):
+                if any(
+                    not is_excluded_skill_path(p)
+                    for p in entry.rglob("SKILL.md")
+                ):
                    out.append(entry.name)
            except OSError:
                continue
@ -546,7 +550,14 @@ def do_install(identifier: str, category: str = "", force: bool = False,

    # Scan
    c.print("[bold]Running security scan...[/]")
-    scan_source = getattr(bundle, "identifier", "") or getattr(meta, "identifier", "") or identifier
+    if bundle.source == "official":
+        scan_source = "official"
+    else:
+        scan_source = (
+            getattr(bundle, "identifier", "")
+            or getattr(meta, "identifier", "")
+            or identifier
+        )
    result = scan_skill(q_path, source=scan_source)
    c.print(format_scan_report(result))

@ -902,8 +913,14 @@ def do_update(name: Optional[str] = None, console: Optional[Console] = None) ->
    c.print(f"[bold green]Updated {len(updates)} skill(s).[/]\n")


-def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> None:
-    """Re-run security scan on installed hub skills."""
+def do_audit(name: Optional[str] = None, console: Optional[Console] = None,
+             deep: bool = False) -> None:
+    """Re-run security scan on installed hub skills.
+
+    When ``deep=True``, also runs an opt-in AST-level diagnostic on Python
+    files (review aid only — not a security gate; skills_guard.py verdicts
+    are unchanged).
+    """
    from tools.skills_hub import HubLockFile, SKILLS_DIR
    from tools.skills_guard import scan_skill, format_scan_report

@ -924,6 +941,9 @@ def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> N

    c.print(f"\n[bold]Auditing {len(targets)} skill(s)...[/]\n")

+    if deep:
+        from tools.skills_ast_audit import ast_scan_path, format_ast_report
+
    for entry in targets:
        skill_path = SKILLS_DIR / entry["install_path"]
        if not skill_path.exists():
@ -932,6 +952,10 @@ def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> N

        result = scan_skill(skill_path, source=entry.get("identifier", entry["source"]))
        c.print(format_scan_report(result))
+
+        if deep:
+            c.print(format_ast_report(ast_scan_path(skill_path), skill_name=entry["name"]))
+
        c.print()


@ -1339,7 +1363,8 @@ def skills_command(args) -> None:
    elif action == "update":
        do_update(name=getattr(args, "name", None))
    elif action == "audit":
-        do_audit(name=getattr(args, "name", None))
+        do_audit(name=getattr(args, "name", None),
+                 deep=getattr(args, "deep", False))
    elif action == "uninstall":
        do_uninstall(args.name)
    elif action == "reset":
@ -1391,6 +1416,8 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
        /skills update
        /skills audit
        /skills audit my-skill
+        /skills audit --deep
+        /skills audit my-skill --deep
        /skills uninstall my-skill
        /skills tap list
        /skills tap add owner/repo
@ -1505,8 +1532,9 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
        do_update(name=name, console=c)

    elif action == "audit":
-        name = args[0] if args else None
-        do_audit(name=name, console=c)
+        name = args[0] if args and not args[0].startswith("--") else None
+        deep = "--deep" in args
+        do_audit(name=name, console=c, deep=deep)

    elif action == "uninstall":
        if not args:
--- a/hermes_cli/tips.py
+++ b/hermes_cli/tips.py
@ -227,6 +227,9 @@ TIPS = [
    "browser_vision with annotate=true overlays numbered labels on interactive elements.",

    # --- MCP ---
+    "hermes mcp opens an interactive picker of Nous-approved MCPs you can install in one keystroke.",
+    "hermes mcp catalog lists Nous-approved MCP servers shipped with the repo.",
+    "hermes mcp install <name> installs a catalog entry, prompts for credentials, and lets you pick which of its tools to enable.",
    "MCP servers are configured in config.yaml — both stdio and HTTP transports supported.",
    "Per-server tool filtering: tools.include whitelists and tools.exclude blacklists specific tools.",
    "MCP servers auto-generate toolsets at runtime — hermes tools can toggle them per platform.",
@ -260,7 +263,7 @@ TIPS = [
    "Custom providers: save named endpoints in config.yaml under custom_providers.",
    "HERMES_EPHEMERAL_SYSTEM_PROMPT injects a system prompt that's never persisted to history.",
    "credential_pool_strategies supports fill_first, round_robin, least_used, and random rotation.",
-    "hermes login supports OAuth-based auth for Nous and OpenAI Codex providers.",
+    "hermes auth add nous or hermes auth add openai-codex sets up OAuth-based providers.",
    "The API server supports both Chat Completions and Responses API with server-side state.",
    "tool_preview_length: 0 in config shows full file paths in the spinner's activity feed.",
    "hermes status --deep runs deeper diagnostic checks across all components.",
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@ -101,7 +101,7 @@ def _xai_credentials_present() -> bool:
    """Cheap, side-effect-free check for usable xAI credentials.

    Used to auto-enable the ``x_search`` toolset when the user has either
-    completed xAI Grok OAuth (SuperGrok subscription) or set
+    completed xAI Grok OAuth (SuperGrok / Premium+) or set
    ``XAI_API_KEY``. Does NOT hit the network — only inspects the local
    auth store and environment. The tool's runtime ``check_fn`` still
    gates schema registration if creds later expire or get revoked.
@ -311,6 +311,16 @@ TOOL_CATEGORIES = {
    "image_gen": {
        "name": "Image Generation",
        "icon": "🎨",
+        # Per-provider rows for FAL.ai (`plugins/image_gen/fal`), OpenAI,
+        # OpenAI Codex, and xAI are injected at runtime from each
+        # ``plugins.image_gen.<vendor>`` package via
+        # ``_plugin_image_gen_providers()`` in ``_visible_providers``.
+        # Only non-provider UX setup-flow rows remain here:
+        #   - "Nous Subscription" — managed FAL billed via the Nous
+        #     subscription (requires_nous_auth + override_env_vars).
+        #     Uses the fal plugin as the underlying backend but has a
+        #     distinct setup UX.
+        # Mirrors the shape browser/video_gen ship today.
        "providers": [
            {
                "name": "Nous Subscription",
@ -322,15 +332,6 @@ TOOL_CATEGORIES = {
                "override_env_vars": ["FAL_KEY"],
                "imagegen_backend": "fal",
            },
-            {
-                "name": "FAL.ai",
-                "badge": "paid",
-                "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
-                "env_vars": [
-                    {"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
-                ],
-                "imagegen_backend": "fal",
-            },
        ],
    },
    "video_gen": {
@ -355,7 +356,7 @@ TOOL_CATEGORIES = {
        "icon": "🐦",
        "providers": [
            {
-                "name": "xAI Grok OAuth (SuperGrok Subscription)",
+                "name": "xAI Grok OAuth (SuperGrok / Premium+)",
                "badge": "subscription",
                "tag": "Browser login at accounts.x.ai — no API key required",
                "env_vars": [],
@ -482,6 +483,11 @@ TOOLSET_ENV_REQUIREMENTS = {
 # ─── Post-Setup Hooks ─────────────────────────────────────────────────────────


+def _cua_driver_cmd() -> str:
+    """Return the cua-driver executable name/path, honoring non-empty overrides."""
+    return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
+
+
 def _pip_install(
    args: List[str],
    *,
@ -550,6 +556,55 @@ def _pip_install(
    )


+
+def _check_cua_driver_asset_for_arch() -> bool:
+    """Check whether the latest CUA release ships an asset for this architecture.
+
+    Returns True if the asset likely exists (or if we cannot determine it).
+    Returns False and prints a warning when the asset is confirmed missing,
+    so callers can skip the install attempt and avoid a raw 404.
+    """
+    import platform as _plat
+    import urllib.request
+
+    machine = _plat.machine()  # "x86_64" or "arm64"
+    if machine == "arm64":
+        # arm64 (Apple Silicon) assets are always published.
+        return True
+
+    # x86_64 / Intel — probe the latest release for an architecture-specific
+    # asset before falling through to the upstream installer.
+    api_url = (
+        "https://api.github.com/repos/trycua/cua/releases/latest"
+    )
+    try:
+        req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github+json"})
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            release = _json.loads(resp.read().decode())
+        tag = release.get("tag_name", "")
+        assets = release.get("assets", [])
+        arch_names = {"x86_64", "amd64"}
+        has_asset = any(
+            any(a in a_info.get("name", "").lower() for a in arch_names)
+            for a_info in assets
+        )
+        if not has_asset:
+            _print_warning(
+                f"    Latest CUA release ({tag}) has no Intel (x86_64) asset."
+            )
+            _print_info(
+                "    CUA Driver currently only ships Apple Silicon builds."
+            )
+            _print_info(
+                "    See: https://github.com/trycua/cua/issues/1493"
+            )
+            return False
+    except Exception:
+        # Network / API failure — proceed and let the installer handle it.
+        pass
+    return True
+
+
 def install_cua_driver(upgrade: bool = False) -> bool:
    """Install or refresh the cua-driver binary used by Computer Use.

@ -579,7 +634,8 @@ def install_cua_driver(upgrade: bool = False) -> bool:
        _print_warning("    Computer Use (cua-driver) is macOS-only; skipping.")
        return False

-    binary = shutil.which("cua-driver")
+    driver_cmd = _cua_driver_cmd()
+    binary = shutil.which(driver_cmd)

    # Not installed → fresh install path (only when caller asked for it).
    if not binary and not upgrade:
@ -587,18 +643,20 @@ def install_cua_driver(upgrade: bool = False) -> bool:
            _print_warning("    curl not found — install manually:")
            _print_info("      https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
            return False
+        if not _check_cua_driver_asset_for_arch():
+            return False
        return _run_cua_driver_installer(label="Installing")

    # Already installed and caller didn't ask to upgrade → just confirm.
    if binary and not upgrade:
        try:
            version = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
-            _print_success(f"    cua-driver already installed: {version or 'unknown version'}")
+            _print_success(f"    {driver_cmd} already installed: {version or 'unknown version'}")
        except Exception:
-            _print_success("    cua-driver already installed.")
+            _print_success(f"    {driver_cmd} already installed.")
        _print_info("    Grant macOS permissions if not done yet:")
        _print_info("      System Settings > Privacy & Security > Accessibility")
        _print_info("      System Settings > Privacy & Security > Screen Recording")
@ -609,11 +667,14 @@ def install_cua_driver(upgrade: bool = False) -> bool:
        _print_warning("    curl not found — cannot refresh cua-driver.")
        return bool(binary)

+    if not _check_cua_driver_asset_for_arch():
+        return bool(binary)
+
    if binary:
        # Show before/after version when we have a baseline. Best-effort.
        try:
            before = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
        except Exception:
@ -625,13 +686,13 @@ def install_cua_driver(upgrade: bool = False) -> bool:
    if ok and before:
        try:
            after = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
            if after and after != before:
-                _print_success(f"    cua-driver upgraded: {before} → {after}")
+                _print_success(f"    {driver_cmd} upgraded: {before} → {after}")
            elif after:
-                _print_info(f"    cua-driver up to date: {after}")
+                _print_info(f"    {driver_cmd} up to date: {after}")
        except Exception:
            pass
    return ok
@ -655,11 +716,12 @@ def _run_cua_driver_installer(label: str = "Installing", verbose: bool = True) -
        _print_info(f"    {label} cua-driver (macOS background computer-use)...")
    else:
        _print_info(f"    {label} cua-driver...")
+    driver_cmd = _cua_driver_cmd()
    try:
        result = subprocess.run(install_cmd, shell=True, timeout=300)
-        if result.returncode == 0 and shutil.which("cua-driver"):
+        if result.returncode == 0 and shutil.which(driver_cmd):
            if verbose:
-                _print_success("    cua-driver installed.")
+                _print_success(f"    {driver_cmd} installed.")
                _print_info("    IMPORTANT — grant macOS permissions now:")
                _print_info("      System Settings > Privacy & Security > Accessibility")
                _print_info("      System Settings > Privacy & Security > Screen Recording")
@ -946,7 +1008,7 @@ def _run_post_setup(post_setup_key: str):

        if oauth_logged_in:
            _print_success(
-                "    xAI will use your xAI Grok OAuth (SuperGrok Subscription) credentials"
+                "    xAI will use your xAI Grok OAuth (SuperGrok / Premium+) credentials"
            )
            return
        if existing_api_key:
@ -969,7 +1031,7 @@ def _run_post_setup(post_setup_key: str):
        idx = prompt_choice(
            "    How do you want xAI to authenticate?",
            choices=[
-                "Sign in with xAI Grok OAuth (SuperGrok Subscription) — browser login",
+                "Sign in with xAI Grok OAuth (SuperGrok / Premium+) — browser login",
                "Paste an xAI API key (console.x.ai)",
                "Skip — configure later via `hermes auth add xai-oauth`",
            ],
@ -1506,12 +1568,9 @@ def _plugin_image_gen_providers() -> list[dict]:
    Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
    row but carries an ``image_gen_plugin_name`` marker so downstream
    code (config writing, model picker) knows to route through the
-    plugin registry instead of the in-tree FAL backend.
-
-    FAL is skipped — it's already exposed by the hardcoded
-    ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
-    a plugin in a follow-up PR, the hardcoded entries go away and this
-    function surfaces it alongside OpenAI automatically.
+    plugin registry. Every image-gen backend is a plugin now — there
+    are no hardcoded rows left in ``TOOL_CATEGORIES["image_gen"]`` for
+    this function to dedupe against (see issue #26241).
    """
    try:
        from agent.image_gen_registry import list_providers
@ -1524,9 +1583,6 @@ def _plugin_image_gen_providers() -> list[dict]:

    rows: list[dict] = []
    for provider in providers:
-        if getattr(provider, "name", None) == "fal":
-            # FAL has its own hardcoded rows today.
-            continue
        try:
            schema = provider.get_setup_schema()
        except Exception:
@ -1697,6 +1753,62 @@ def _plugin_browser_providers() -> list[dict]:
    return rows


+def _plugin_tts_providers() -> list[dict]:
+    """Build picker-row dicts from plugin-registered TTS providers.
+
+    Issue #30398 — the ``register_tts_provider()`` plugin hook
+    coexists alongside the 10 built-in TTS providers
+    (``edge``/``openai``/``elevenlabs``/…) and the
+    ``tts.providers.<name>: type: command`` registry from PR #17843.
+    Built-in rows stay hardcoded in ``TOOL_CATEGORIES["tts"]``; this
+    function only injects PLUGIN-registered providers.
+
+    Defensive: plugins whose name collides with a built-in TTS provider
+    are filtered out — even though the registry already rejects them
+    at registration time, a future code path that registers directly
+    via :func:`agent.tts_registry.register_provider` could slip
+    through. Filtering here keeps the picker invariant.
+    """
+    try:
+        from agent.tts_registry import _BUILTIN_NAMES, list_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        providers = list_providers()
+    except Exception:
+        return []
+
+    rows: list[dict] = []
+    for provider in providers:
+        name = getattr(provider, "name", None)
+        if not name:
+            continue
+        # Defensive: reject built-in shadowing at the picker layer too.
+        if name.lower().strip() in _BUILTIN_NAMES:
+            continue
+        try:
+            schema = provider.get_setup_schema()
+        except Exception:
+            continue
+        if not isinstance(schema, dict):
+            continue
+        row = {
+            "name": schema.get("name", provider.display_name),
+            "badge": schema.get("badge", ""),
+            "tag": schema.get("tag", ""),
+            "env_vars": schema.get("env_vars", []),
+            # Selecting this row writes ``tts.provider: <name>`` — the
+            # same write-path used by hardcoded rows. The plugin
+            # dispatcher picks it up automatically from there.
+            "tts_provider": name,
+            "tts_plugin_name": name,
+        }
+        if schema.get("post_setup"):
+            row["post_setup"] = schema["post_setup"]
+        rows.append(row)
+    return rows
+
+
 def _visible_providers(cat: dict, config: dict) -> list[dict]:
    """Return provider entries visible for the current auth/config state."""
    features = get_nous_subscription_features(config)
@ -1734,6 +1846,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
    if cat.get("name") == "Browser Automation":
        visible.extend(_plugin_browser_providers())

+    # Inject plugin-registered TTS backends (issue #30398). Plugin rows
+    # render BELOW the 10 hardcoded built-in rows. Built-in shadowing
+    # is filtered out by ``_plugin_tts_providers`` defensively.
+    if cat.get("name") == "Text-to-Speech":
+        visible.extend(_plugin_tts_providers())
+
    return visible


@ -1751,7 +1869,7 @@ _POST_SETUP_INSTALLED: dict = {
    # entry when (a) the post_setup is the ONLY install side-effect for
    # a no-key provider, and (b) an installed-state check is cheap and
    # doesn't trigger a heavy import.
-    "cua_driver": lambda: bool(shutil.which("cua-driver")),
+    "cua_driver": lambda: bool(shutil.which(_cua_driver_cmd())),
 }


@ -1869,6 +1987,16 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
        print()

        # Plain text labels only (no ANSI codes in menu items)
+        # When the user is logged into Nous, surface a marker on providers
+        # whose access is included in their subscription so it's visually
+        # obvious which options cost extra vs. cost nothing on top of Nous.
+        try:
+            _nous_logged_in = bool(
+                get_nous_subscription_features(config).nous_auth_present
+            )
+        except Exception:
+            _nous_logged_in = False
+
        provider_choices = []
        for p in providers:
            badge = f" [{p['badge']}]" if p.get("badge") else ""
@ -1882,7 +2010,15 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
                    configured = ""
                else:
                    configured = " [configured]"
-            provider_choices.append(f"{p['name']}{badge}{tag}{configured}")
+            # Highlight Nous-managed entries when the user has Portal auth.
+            # curses_radiolist can't render ANSI inside item strings, so we
+            # use a plain unicode star + parenthetical phrase. Suppressed
+            # when no Portal auth is present so non-subscribers see the
+            # picker unchanged.
+            sub_marker = ""
+            if _nous_logged_in and p.get("managed_nous_feature"):
+                sub_marker = "  ★ Included with your Nous subscription"
+            provider_choices.append(f"{p['name']}{badge}{tag}{configured}{sub_marker}")

        # Add skip option
        provider_choices.append("Skip — keep defaults / configure later")
@ -2349,6 +2485,30 @@ def _configure_provider(provider: dict, config: dict):

    # Prompt for each required env var
    all_configured = True
+    # If this BYOK provider lives in a category that ALSO has a
+    # Nous-managed sibling, show a single dim hint so users know
+    # they can avoid the key entirely via a Portal subscription.
+    # Suppressed when the user is already authed to Nous.
+    _show_portal_hint = False
+    if env_vars and not managed_feature and not provider.get("requires_nous_auth"):
+        try:
+            _has_managed_sibling = False
+            for _cat_key, _cat in TOOL_CATEGORIES.items():
+                _providers = _cat.get("providers", [])
+                if provider in _providers and any(
+                    sib.get("managed_nous_feature") for sib in _providers
+                ):
+                    _has_managed_sibling = True
+                    break
+            if _has_managed_sibling:
+                _features = get_nous_subscription_features(config)
+                _show_portal_hint = not _features.nous_auth_present
+        except Exception:
+            _show_portal_hint = False
+
+    if _show_portal_hint:
+        _print_info("  Available through Nous Portal subscription.")
+
    for var in env_vars:
        existing = get_env_value(var["key"])
        if existing:
@ -3030,21 +3190,26 @@ def _configure_mcp_tools_interactive(config: dict):
            _print_info(f"  {server_name}: no changes")
            continue

-        # Compute new exclude list based on unchecked tools
-        new_exclude = [tool_names[i] for i in range(len(tool_names)) if i not in chosen]
+        # Compute new include list (the chosen tools). We standardize on
+        # tools.include across the codebase (catalog installs, hermes mcp
+        # configure, and this UI) so a server\'s on-disk config shape doesn\'t
+        # depend on which UI the user touched last.
+        chosen_names = [tool_names[i] for i in sorted(chosen)]

        # Update config
        srv_cfg = mcp_servers.setdefault(server_name, {})
        tools_cfg = srv_cfg.setdefault("tools", {})

-        if new_exclude:
-            tools_cfg["exclude"] = new_exclude
-            # Remove include if present — we're switching to exclude mode
-            tools_cfg.pop("include", None)
-        else:
-            # All tools enabled — clear filters
+        if len(chosen) == len(tools):
+            # All tools enabled — clear filters (cleanest config shape; the
+            # server\'s native tool set is the active set, and any tools the
+            # server adds later are auto-enabled).
            tools_cfg.pop("exclude", None)
            tools_cfg.pop("include", None)
+        else:
+            tools_cfg["include"] = chosen_names
+            # Drop any legacy exclude block — we\'re include-mode now.
+            tools_cfg.pop("exclude", None)

        enabled_count = len(chosen)
        disabled_count = len(tools) - enabled_count
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -16,6 +16,7 @@ import json
 import logging
 import os
 import secrets
+import stat
 import subprocess
 import sys
 import threading
@ -48,6 +49,7 @@ from hermes_cli.config import (
    redact_key,
 )
 from gateway.status import get_running_pid, read_runtime_status
+from utils import env_var_enabled

 try:
    from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
@ -118,7 +120,6 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
    "/api/model/info",
    "/api/dashboard/themes",
    "/api/dashboard/plugins",
-    "/api/dashboard/plugins/rescan",
 })


@ -975,11 +976,13 @@ _AUX_TASK_SLOTS: Tuple[str, ...] = (
    "vision",
    "web_extract",
    "compression",
-    "session_search",
    "skills_hub",
    "approval",
    "mcp",
    "title_generation",
+    "triage_specifier",
+    "kanban_decomposer",
+    "profile_describer",
    "curator",
 )

@ -1220,6 +1223,12 @@ async def set_env_var(body: EnvVarUpdate):
    try:
        save_env_value(body.key, body.value)
        return {"ok": True, "key": body.key}
+    except ValueError as exc:
+        # save_env_value raises ValueError for invalid names and for keys
+        # on the denylist (LD_PRELOAD, PATH, PYTHONPATH, …). Surface the
+        # message to the SPA so the user understands why the write was
+        # refused instead of seeing an opaque 500.
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
    except Exception:
        _log.exception("PUT /api/env failed")
        raise HTTPException(status_code=500, detail="Internal server error")
@ -1684,7 +1693,25 @@ def _save_anthropic_oauth_creds(access_token: str, refresh_token: str, expires_a
        "expiresAt": expires_at_ms,
    }
    _HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
-    _HERMES_OAUTH_FILE.write_text(json.dumps(payload, indent=2), encoding="utf-8")
+    tmp_path = _HERMES_OAUTH_FILE.with_name(
+        f"{_HERMES_OAUTH_FILE.name}.tmp.{os.getpid()}.{secrets.token_hex(8)}"
+    )
+    try:
+        with tmp_path.open("w", encoding="utf-8") as handle:
+            handle.write(json.dumps(payload, indent=2))
+            handle.flush()
+            os.fsync(handle.fileno())
+        os.replace(tmp_path, _HERMES_OAUTH_FILE)
+        try:
+            _HERMES_OAUTH_FILE.chmod(stat.S_IRUSR | stat.S_IWUSR)
+        except OSError:
+            pass
+    finally:
+        try:
+            if tmp_path.exists():
+                tmp_path.unlink()
+        except OSError:
+            pass
    # Best-effort credential-pool insert. Failure here doesn't invalidate
    # the file write — pool registration only matters for the rotation
    # strategy, not for runtime credential resolution.
@ -2690,7 +2717,10 @@ async def update_cron_job(job_id: str, body: CronJobUpdate, profile: Optional[st
    selected = profile or _find_cron_job_profile(job_id)
    if not selected:
        raise HTTPException(status_code=404, detail="Job not found")
-    job = _call_cron_for_profile(selected, "update_job", job_id, body.updates)
+    try:
+        job = _call_cron_for_profile(selected, "update_job", job_id, body.updates)
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
    if not job:
        raise HTTPException(status_code=404, detail="Job not found")
    return job
@ -2734,7 +2764,11 @@ async def delete_cron_job(job_id: str, profile: Optional[str] = None):
    selected = profile or _find_cron_job_profile(job_id)
    if not selected:
        raise HTTPException(status_code=404, detail="Job not found")
-    if not _call_cron_for_profile(selected, "remove_job", job_id):
+    try:
+        removed = _call_cron_for_profile(selected, "remove_job", job_id)
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    if not removed:
        raise HTTPException(status_code=404, detail="Job not found")
    return {"ok": True}

@ -3293,24 +3327,49 @@ _VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
 _LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})


-def _is_public_bind() -> bool:
-    """True when bound to all-interfaces (operator used --insecure)."""
-    return getattr(app.state, "bound_host", "") in {"0.0.0.0", "::"}
-
-
 def _ws_client_is_allowed(ws: "WebSocket") -> bool:
    """Check if the WebSocket client IP is acceptable.

-    Allows loopback always; allows any IP when bound to all-interfaces
-    (--insecure mode, guarded by session token auth).
+    Allows loopback clients only.
    """
-    if _is_public_bind():
-        return True
    client_host = ws.client.host if ws.client else ""
    if not client_host:
        return True
    return client_host in _LOOPBACK_HOSTS

+
+def _ws_host_origin_is_allowed(ws: "WebSocket") -> bool:
+    """Apply the dashboard Host/Origin guard to WebSocket upgrades.
+
+    FastAPI HTTP middleware does not run for WebSocket routes, so the
+    DNS-rebinding Host check used for normal dashboard HTTP requests must be
+    repeated here before accepting the upgrade.  Browsers also send an Origin
+    header on WebSocket handshakes; when present, require it to target the
+    same bound dashboard host.
+    """
+    bound_host = getattr(app.state, "bound_host", None)
+    if not bound_host:
+        return True
+
+    host_header = ws.headers.get("host", "")
+    if not _is_accepted_host(host_header, bound_host):
+        return False
+
+    origin = ws.headers.get("origin", "")
+    if not origin:
+        return True
+
+    parsed = urllib.parse.urlparse(origin)
+    if parsed.scheme not in {"http", "https"} or not parsed.netloc:
+        return False
+
+    return _is_accepted_host(parsed.netloc, bound_host)
+
+
+def _ws_request_is_allowed(ws: "WebSocket") -> bool:
+    """Return True when the WebSocket upgrade matches dashboard boundaries."""
+    return _ws_host_origin_is_allowed(ws) and _ws_client_is_allowed(ws)
+
 # Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
 # and /api/events (dashboard → browser sidebar).  Keyed by an opaque channel id
 # the chat tab generates on mount; entries auto-evict when the last subscriber
@ -3389,7 +3448,7 @@ async def _broadcast_event(channel: str, payload: str) -> None:
        except Exception:
            # Subscriber went away mid-send; the /api/events finally clause
            # will remove it from the registry on its next iteration.
-            pass
+            _log.warning("broadcast send failed for subscriber on %s", channel, exc_info=True)


 def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
@ -3412,7 +3471,7 @@ async def pty_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    if not _ws_client_is_allowed(ws):
+    if not _ws_request_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -3531,7 +3590,7 @@ async def gateway_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    if not _ws_client_is_allowed(ws):
+    if not _ws_request_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -3563,7 +3622,7 @@ async def pub_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    if not _ws_client_is_allowed(ws):
+    if not _ws_request_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -3592,7 +3651,7 @@ async def events_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    if not _ws_client_is_allowed(ws):
+    if not _ws_request_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -4044,6 +4103,43 @@ async def set_dashboard_theme(body: ThemeSetBody):
 # Dashboard plugin system
 # ---------------------------------------------------------------------------

+def _safe_plugin_api_relpath(api_field: Any, *, dashboard_dir: Path) -> Optional[str]:
+    """Validate the manifest's ``api`` field for the plugin loader.
+
+    The web server later imports this file as a Python module via
+    ``importlib.util.spec_from_file_location`` (arbitrary code
+    execution by design — that's how plugins extend the backend).
+    Pre-#29156 the field was used as-is, which meant:
+
+    * An absolute path swallowed the plugin's dashboard directory
+      entirely — ``Path('safe/dashboard') / '/tmp/evil.py'`` resolves
+      to ``/tmp/evil.py``, so any attacker-controlled manifest could
+      point the import at any Python file on disk (GHSA-5qr3-c538-wm9j).
+    * A ``../..`` traversal could climb out of the plugin into
+      neighbouring directories on the search path.
+
+    Return the original string when the resolved path stays under
+    ``dashboard_dir``; return ``None`` (with a warning logged at the
+    call site) otherwise so the plugin still loads its static JS/CSS
+    but its backend ``api`` is rejected.
+    """
+    if not isinstance(api_field, str) or not api_field.strip():
+        return None
+    candidate = Path(api_field)
+    if candidate.is_absolute():
+        return None
+    try:
+        resolved = (dashboard_dir / candidate).resolve()
+        base = dashboard_dir.resolve()
+    except (OSError, RuntimeError):
+        return None
+    try:
+        resolved.relative_to(base)
+    except ValueError:
+        return None
+    return api_field
+
+
 def _discover_dashboard_plugins() -> list:
    """Scan plugins/*/dashboard/manifest.json for dashboard extensions.

@ -4062,7 +4158,16 @@ def _discover_dashboard_plugins() -> list:
        (bundled_root / "memory", "bundled"),
        (bundled_root, "bundled"),
    ]
-    if os.environ.get("HERMES_ENABLE_PROJECT_PLUGINS"):
+    # GHSA-5qr3-c538-wm9j (#29156): the previous ``os.environ.get(...)``
+    # check treated *any* non-empty string as truthy, so ``=0``, ``=false``,
+    # and ``=no`` — all of which the agent loader and operators correctly
+    # read as "disabled" — silently *enabled* the untrusted project source
+    # in the web server.  Combined with the absolute-path RCE primitive on
+    # the manifest's ``api`` field (now patched below), this turned the
+    # opt-in into a sticky always-on switch.  Use the shared truthy
+    # semantics (``1`` / ``true`` / ``yes`` / ``on``) so the gate matches
+    # ``hermes_cli/plugins.py`` and the documented user contract.
+    if env_var_enabled("HERMES_ENABLE_PROJECT_PLUGINS"):
        search_dirs.append((Path.cwd() / ".hermes" / "plugins", "project"))

    for plugins_root, source in search_dirs:
@ -4101,6 +4206,23 @@ def _discover_dashboard_plugins() -> list:
                slots: List[str] = []
                if isinstance(slots_src, list):
                    slots = [s for s in slots_src if isinstance(s, str) and s]
+                # Validate ``api`` at discovery time so the value cached
+                # on the plugin entry is already safe to feed into the
+                # importer.  An attacker-controlled manifest can name
+                # any absolute path or ``..`` traversal here — the
+                # web server then imports that file as a Python module
+                # (RCE, GHSA-5qr3-c538-wm9j).
+                raw_api = data.get("api")
+                dashboard_dir = child / "dashboard"
+                safe_api = _safe_plugin_api_relpath(raw_api, dashboard_dir=dashboard_dir)
+                if raw_api and safe_api is None:
+                    _log.warning(
+                        "Plugin %s: refusing unsafe api path %r (must be a "
+                        "relative file inside the plugin's dashboard/ "
+                        "directory); backend routes from this plugin will "
+                        "not be mounted",
+                        name, raw_api,
+                    )
                plugins.append({
                    "name": name,
                    "label": data.get("label", name),
@ -4111,10 +4233,10 @@ def _discover_dashboard_plugins() -> list:
                    "slots": slots,
                    "entry": data.get("entry", "dist/index.js"),
                    "css": data.get("css"),
-                    "has_api": bool(data.get("api")),
+                    "has_api": bool(safe_api),
                    "source": source,
-                    "_dir": str(child / "dashboard"),
-                    "_api_file": data.get("api"),
+                    "_dir": str(dashboard_dir),
+                    "_api_file": safe_api,
                })
            except Exception as exc:
                _log.warning("Bad dashboard plugin manifest %s: %s", manifest_file, exc)
@ -4317,12 +4439,13 @@ async def post_agent_plugin_install(request: Request, body: _AgentPluginInstallB

 def _validate_plugin_name(name: str) -> str:
    """Reject path-traversal attempts in plugin name URL parameters."""
-    if not name or "/" in name or "\\" in name or ".." in name:
+    name = name.strip("/")
+    if not name or ".." in name or "\\" in name:
        raise HTTPException(status_code=400, detail="Invalid plugin name.")
    return name


-@app.post("/api/dashboard/agent-plugins/{name}/enable")
+@app.post("/api/dashboard/agent-plugins/{name:path}/enable")
 async def post_agent_plugin_enable(request: Request, name: str):
    _require_token(request)
    name = _validate_plugin_name(name)
@ -4334,7 +4457,7 @@ async def post_agent_plugin_enable(request: Request, name: str):
    return result


-@app.post("/api/dashboard/agent-plugins/{name}/disable")
+@app.post("/api/dashboard/agent-plugins/{name:path}/disable")
 async def post_agent_plugin_disable(request: Request, name: str):
    _require_token(request)
    name = _validate_plugin_name(name)
@ -4346,7 +4469,7 @@ async def post_agent_plugin_disable(request: Request, name: str):
    return result


-@app.post("/api/dashboard/agent-plugins/{name}/update")
+@app.post("/api/dashboard/agent-plugins/{name:path}/update")
 async def post_agent_plugin_update(request: Request, name: str):
    _require_token(request)
    name = _validate_plugin_name(name)
@ -4359,7 +4482,7 @@ async def post_agent_plugin_update(request: Request, name: str):
    return result


-@app.delete("/api/dashboard/agent-plugins/{name}")
+@app.delete("/api/dashboard/agent-plugins/{name:path}")
 async def delete_agent_plugin(request: Request, name: str):
    _require_token(request)
    name = _validate_plugin_name(name)
@ -4397,7 +4520,7 @@ class _PluginVisibilityBody(BaseModel):
    hidden: bool


-@app.post("/api/dashboard/plugins/{name}/visibility")
+@app.post("/api/dashboard/plugins/{name:path}/visibility")
 async def post_plugin_visibility(request: Request, name: str, body: _PluginVisibilityBody):
    """Toggle a plugin's sidebar visibility (persists to config.yaml dashboard.hidden_plugins)."""
    _require_token(request)
@ -4426,6 +4549,17 @@ async def serve_plugin_asset(plugin_name: str, file_path: str):

    Only serves files from the plugin's ``dashboard/`` subdirectory.
    Path traversal is blocked by checking ``resolve().is_relative_to()``.
+
+    Restricted to a browser-fetchable suffix allowlist (JS/CSS/JSON/HTML/
+    SVG/PNG/JPG/WOFF). The dashboard loads plugin JS via ``<script src>``
+    and CSS via ``<link href>``, neither of which can attach a custom
+    auth header — so this route stays unauthenticated to keep the SPA
+    working. But user-installed plugins ship a ``plugin_api.py``
+    backend module that the browser never fetches; it's only imported
+    by :func:`_mount_plugin_api_routes` at startup. Without a suffix
+    allowlist, anyone on the loopback port can curl the ``.py`` source
+    of a private third-party plugin. Reject everything outside the
+    browser-asset set.
    """
    plugins = _get_dashboard_plugins()
    plugin = next((p for p in plugins if p["name"] == plugin_name), None)
@ -4440,7 +4574,11 @@ async def serve_plugin_asset(plugin_name: str, file_path: str):
    if not target.exists() or not target.is_file():
        raise HTTPException(status_code=404, detail="File not found")

-    # Guess content type
+    # Browser-asset suffix allowlist. Everything outside this set is
+    # rejected with 404 so we don't leak ``.py`` backend sources, README
+    # files, ``.env.example`` templates, etc. — none of which the SPA
+    # actually fetches. Add to this set deliberately when a new asset
+    # type comes up; do NOT change the default fallback.
    suffix = target.suffix.lower()
    content_types = {
        ".js": "application/javascript",
@ -4451,10 +4589,22 @@ async def serve_plugin_asset(plugin_name: str, file_path: str):
        ".svg": "image/svg+xml",
        ".png": "image/png",
        ".jpg": "image/jpeg",
+        ".jpeg": "image/jpeg",
+        ".gif": "image/gif",
+        ".webp": "image/webp",
+        ".ico": "image/x-icon",
        ".woff2": "font/woff2",
        ".woff": "font/woff",
+        ".ttf": "font/ttf",
+        ".otf": "font/otf",
+        ".map": "application/json",
    }
-    media_type = content_types.get(suffix, "application/octet-stream")
+    if suffix not in content_types:
+        raise HTTPException(
+            status_code=404,
+            detail="File not found",
+        )
+    media_type = content_types[suffix]
    return FileResponse(
        target,
        media_type=media_type,
@ -4468,12 +4618,42 @@ def _mount_plugin_api_routes():
    Each plugin's ``api`` field points to a Python file that must expose
    a ``router`` (FastAPI APIRouter).  Routes are mounted under
    ``/api/plugins/<name>/``.
+
+    Backend import is restricted to ``bundled`` and ``user`` sources.
+    Project plugins (``./.hermes/plugins/``) ship with the CWD and are
+    therefore attacker-controlled in any threat model where the user
+    opens a malicious repo; they can extend the dashboard UI via
+    static JS/CSS but their Python ``api`` file is never auto-imported
+    by the web server.  See GHSA-5qr3-c538-wm9j (#29156).
    """
    for plugin in _get_dashboard_plugins():
        api_file_name = plugin.get("_api_file")
        if not api_file_name:
            continue
-        api_path = Path(plugin["_dir"]) / api_file_name
+        if plugin.get("source") == "project":
+            _log.warning(
+                "Plugin %s: ignoring backend api=%s (project plugins may "
+                "not auto-import Python code; move the plugin to "
+                "~/.hermes/plugins/ if you trust it)",
+                plugin["name"], api_file_name,
+            )
+            continue
+        dashboard_dir = Path(plugin["_dir"])
+        api_path = dashboard_dir / api_file_name
+        try:
+            resolved_api = api_path.resolve()
+            resolved_base = dashboard_dir.resolve()
+            resolved_api.relative_to(resolved_base)
+        except (OSError, RuntimeError, ValueError):
+            # Discovery already filters this, but re-check here in case
+            # ``_dir`` was tampered with after caching or a future caller
+            # bypasses the validator.  Defence in depth keeps the import
+            # primitive contained even if the upstream check regresses.
+            _log.warning(
+                "Plugin %s: refusing to import api file outside its "
+                "dashboard directory (%s)", plugin["name"], api_path,
+            )
+            continue
        if not api_path.exists():
            _log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
            continue
--- a/hermes_cli/webhook.py
+++ b/hermes_cli/webhook.py
@ -11,8 +11,10 @@ hot-reloaded by the webhook adapter without a gateway restart.
 """

 import json
+import os
 import re
 import secrets
+import tempfile
 import time
 from pathlib import Path
 from typing import Dict
@ -23,6 +25,7 @@ from hermes_cli.config import cfg_get


 _SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
+_SUBSCRIPTIONS_FILE_MODE = 0o600


 def _hermes_home() -> Path:
@ -48,12 +51,33 @@ def _load_subscriptions() -> Dict[str, dict]:
 def _save_subscriptions(subs: Dict[str, dict]) -> None:
    path = _subscriptions_path()
    path.parent.mkdir(parents=True, exist_ok=True)
-    tmp_path = path.with_suffix(".tmp")
-    tmp_path.write_text(
-        json.dumps(subs, indent=2, ensure_ascii=False),
-        encoding="utf-8",
+    # webhook_subscriptions.json contains per-route HMAC secrets — write
+    # via tempfile + chmod 0o600 before the atomic rename so a permissive
+    # umask cannot leave the secrets readable to other local users in the
+    # window between create and rename.
+    fd, tmp_name = tempfile.mkstemp(
+        prefix=f".{path.name}.",
+        suffix=".tmp",
+        dir=path.parent,
+        text=True,
    )
-    atomic_replace(tmp_path, path)
+    tmp_path = Path(tmp_name)
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as fh:
+            json.dump(subs, fh, indent=2, ensure_ascii=False)
+            fh.flush()
+            os.fsync(fh.fileno())
+        os.chmod(tmp_path, _SUBSCRIPTIONS_FILE_MODE)
+        atomic_replace(tmp_path, path)
+        # Re-assert after rename in case the destination existed with a
+        # broader mode and atomic_replace preserved it.
+        os.chmod(path, _SUBSCRIPTIONS_FILE_MODE)
+    except Exception:
+        try:
+            tmp_path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        raise


 def _get_webhook_config() -> dict: