Merge remote-tracking branch 'origin/main' into fix/bundle-size

2026-05-21 05:11:26 +00:00 · 2026-05-11 16:01:00 -04:00 · 2026-05-11 16:01:00 -04:00 · 3197b4de6d
commit 3197b4de6d
parent 9d645d98c4 ce0f529cde
1437 changed files with 219762 additions and 11968 deletions
--- a/hermes_cli/init.py
+++ b/hermes_cli/init.py
@ -5,11 +5,43 @@ Provides subcommands for:
 - hermes chat          - Interactive chat (same as ./hermes)
 - hermes gateway       - Run gateway in foreground
 - hermes gateway start - Start gateway service
- hermes gateway stop  - Stop gateway service  
+- hermes gateway stop  - Stop gateway service
 - hermes setup         - Interactive setup wizard
 - hermes status        - Show status of all components
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.12.0"
-__release_date__ = "2026.4.30"
+import os
+import sys
+
+__version__ = "0.13.0"
+__release_date__ = "2026.5.7"
+
+
+def _ensure_utf8():
+    """Force UTF-8 stdout/stderr on Windows to prevent UnicodeEncodeError.
+
+    Windows services and terminals default to cp1252, which cannot encode
+    box-drawing characters used in CLI output. This causes unhandled
+    UnicodeEncodeError crashes on gateway startup.
+    """
+    if sys.platform != "win32":
+        return
+    os.environ.setdefault("PYTHONUTF8", "1")
+    os.environ.setdefault("PYTHONIOENCODING", "utf-8")
+    for stream_name in ("stdout", "stderr"):
+        stream = getattr(sys, stream_name, None)
+        if stream is None:
+            continue
+        try:
+            if getattr(stream, "encoding", "").lower().replace("-", "") != "utf8":
+                new_stream = open(
+                    stream.fileno(), "w", encoding="utf-8",
+                    buffering=1, closefd=False,
+                )
+                setattr(sys, stream_name, new_stream)
+        except (AttributeError, OSError):
+            pass
+
+
+_ensure_utf8()
--- a/hermes_cli/_parser.py
+++ b/hermes_cli/_parser.py
@ -70,6 +70,9 @@ Examples:
    hermes logs --since 1h        Lines from the last hour
    hermes debug share             Upload debug report for support
    hermes update                 Update to latest version
+    hermes dashboard              Start web UI dashboard (port 9119)
+    hermes dashboard --stop       Stop running dashboard processes
+    hermes dashboard --status     List running dashboard processes

 For more help on a command:
    hermes <command> --help
--- a/hermes_cli/_subprocess_compat.py
+++ b/hermes_cli/_subprocess_compat.py
@ -0,0 +1,175 @@
+"""Windows subprocess compatibility helpers.
+
+Hermes is developed on Linux / macOS and tested natively on Windows too.
+Several common subprocess patterns break silently-or-loudly on Windows:
+
+* ``["npm", "install", ...]`` — on Windows ``npm`` is ``npm.cmd``, a batch
+  shim.  ``subprocess.Popen(["npm", ...])`` fails with WinError 193
+  ("not a valid Win32 application") because CreateProcessW can't run a
+  ``.cmd`` file without ``shell=True`` or PATHEXT resolution.
+
+* ``start_new_session=True`` — on POSIX, this maps to ``os.setsid()`` and
+  actually detaches the child.  On Windows it's silently ignored; the
+  Windows equivalent is ``CREATE_NEW_PROCESS_GROUP | DETACHED_PROCESS``
+  creationflags, which Python only applies when you pass them explicitly.
+
+* Console-window flashes — every ``subprocess.Popen`` of a ``.exe`` on
+  Windows spawns a cmd window briefly unless ``CREATE_NO_WINDOW`` is
+  passed.  Cosmetic but jarring for background daemons.
+
+This module centralizes the platform-branching logic so the rest of the
+codebase doesn't sprinkle ``if sys.platform == "win32":`` everywhere.
+
+**All helpers are no-ops on non-Windows** — calling them in Linux/macOS
+code paths is safe by design.  That's the "do no damage on POSIX"
+guarantee.
+"""
+
+from __future__ import annotations
+
+import os
+import shutil
+import subprocess
+import sys
+from typing import Optional, Sequence
+
+__all__ = [
+    "IS_WINDOWS",
+    "resolve_node_command",
+    "windows_detach_flags",
+    "windows_hide_flags",
+    "windows_detach_popen_kwargs",
+]
+
+
+IS_WINDOWS = sys.platform == "win32"
+
+
+# -----------------------------------------------------------------------------
+# Node ecosystem launcher resolution
+# -----------------------------------------------------------------------------
+
+
+def resolve_node_command(name: str, argv: Sequence[str]) -> list[str]:
+    """Resolve a Node-ecosystem command name to an absolute-path argv.
+
+    On Windows, commands like ``npm``, ``npx``, ``yarn``, ``pnpm``,
+    ``playwright``, ``prettier`` ship as ``.cmd`` files (batch shims).
+    ``subprocess.Popen(["npm", "install"])`` fails with WinError 193
+    because CreateProcessW doesn't execute batch files directly.
+
+    ``shutil.which(name)`` *does* resolve ``.cmd`` via PATHEXT and returns
+    the fully-qualified path — which CreateProcessW accepts because the
+    extension tells Windows to route through ``cmd.exe /c``.
+
+    On POSIX ``shutil.which`` also returns a fully-qualified path when
+    found.  That's a small change from bare-name resolution (the OS does
+    its own PATH search) but functionally identical and has the side
+    benefit of making the argv reproducible in logs.
+
+    Behavior when the command is not on PATH:
+    - On Windows: return the bare name — caller can still try with
+      ``shell=True`` as a last resort, OR the subsequent Popen will
+      raise FileNotFoundError with a readable error we want to surface.
+    - On POSIX: same.  Bare ``npm`` on a Linux box without npm installed
+      fails the same way it did before this function existed.
+
+    Args:
+        name: The command name to resolve (``npm``, ``npx``, ``node`` …).
+        argv: The remaining arguments.  Must NOT include ``name`` itself —
+            this function builds the full argv list.
+
+    Returns:
+        A list suitable for passing to subprocess.Popen/run/call.
+    """
+    resolved = shutil.which(name)
+    if resolved:
+        return [resolved, *argv]
+    return [name, *argv]
+
+
+# -----------------------------------------------------------------------------
+# Detached / hidden process creation
+# -----------------------------------------------------------------------------
+
+
+# Win32 CreationFlags — defined here rather than imported from subprocess
+# because CREATE_NO_WINDOW and DETACHED_PROCESS aren't guaranteed to be
+# present on stdlib subprocess on older Pythons or non-Windows builds.
+_CREATE_NEW_PROCESS_GROUP = 0x00000200
+_DETACHED_PROCESS = 0x00000008
+_CREATE_NO_WINDOW = 0x08000000
+
+
+def windows_detach_flags() -> int:
+    """Return Win32 creationflags that detach a child from the parent
+    console and process group.  0 on non-Windows.
+
+    Pair with ``start_new_session=False`` (default) when calling
+    subprocess.Popen — on POSIX use ``start_new_session=True`` instead,
+    which maps to ``os.setsid()`` in the child.
+
+    Rationale:
+    - ``CREATE_NEW_PROCESS_GROUP`` — child has its own process group so
+      Ctrl+C in the parent console doesn't propagate.
+    - ``DETACHED_PROCESS`` — child has no console at all.  Necessary for
+      background daemons (gateway watchers, update respawners) because
+      without it, closing the console kills the child.
+    - ``CREATE_NO_WINDOW`` — suppress the brief cmd flash that would
+      otherwise appear when launching a console app.  Redundant with
+      DETACHED_PROCESS but explicit for clarity.
+    """
+    if not IS_WINDOWS:
+        return 0
+    return _CREATE_NEW_PROCESS_GROUP | _DETACHED_PROCESS | _CREATE_NO_WINDOW
+
+
+def windows_hide_flags() -> int:
+    """Return Win32 creationflags that merely hide the child's console
+    window without detaching the child.  0 on non-Windows.
+
+    Use for short-lived console apps spawned as part of a larger
+    operation (``taskkill``, ``where``, version probes) where we want no
+    flash but also want to collect stdout/exit code synchronously.
+
+    The key difference from :func:`windows_detach_flags`: NO
+    ``DETACHED_PROCESS`` — the child still inherits stdio handles so
+    ``capture_output=True`` works.  ``DETACHED_PROCESS`` would sever
+    stdio and break stdout capture.
+    """
+    if not IS_WINDOWS:
+        return 0
+    return _CREATE_NO_WINDOW
+
+
+def windows_detach_popen_kwargs() -> dict:
+    """Return a dict of Popen kwargs that detach a child on Windows and
+    fall back to the POSIX equivalent (``start_new_session=True``) on
+    Linux/macOS.
+
+    Usage pattern:
+
+    .. code-block:: python
+
+        subprocess.Popen(
+            argv,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            stdin=subprocess.DEVNULL,
+            close_fds=True,
+            **windows_detach_popen_kwargs(),
+        )
+
+    This replaces the unsafe-on-Windows pattern:
+
+    .. code-block:: python
+
+        subprocess.Popen(..., start_new_session=True)
+
+    which silently fails to detach on Windows (the flag is accepted but
+    has no effect — the child stays attached to the parent's console
+    and dies when the console closes).
+    """
+    if IS_WINDOWS:
+        return {"creationflags": windows_detach_flags()}
+    return {"start_new_session": True}
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@ -245,6 +245,47 @@ def auth_add_command(args) -> None:
        return

    if provider == "nous":
+        # Codex-style auto-import: if a shared Nous credential lives at
+        # <hermes-root>/shared/nous_auth.json (written by any previous
+        # successful login), offer to import it instead of running the
+        # full device-code flow. This makes `hermes --profile <name>
+        # auth add nous --type oauth` a one-tap operation for users who
+        # run multiple profiles.
+        shared = auth_mod._read_shared_nous_state()
+        if shared:
+            try:
+                path = auth_mod._nous_shared_store_path()
+            except RuntimeError:
+                path = None
+            print()
+            if path:
+                print(f"Found existing Nous OAuth credentials at {path}")
+            else:
+                print("Found existing shared Nous OAuth credentials")
+            try:
+                do_import = input("Import these credentials? [Y/n]: ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                do_import = "y"
+            if do_import in {"", "y", "yes"}:
+                print("Rehydrating Nous session from shared credentials...")
+                rehydrated = auth_mod._try_import_shared_nous_state(
+                    timeout_seconds=getattr(args, "timeout", None) or 15.0,
+                    min_key_ttl_seconds=max(
+                        60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))
+                    ),
+                )
+                if rehydrated is not None:
+                    custom_label = (getattr(args, "label", None) or "").strip() or None
+                    entry = auth_mod.persist_nous_credentials(rehydrated, label=custom_label)
+                    shown_label = entry.label if entry is not None else label_from_token(
+                        rehydrated.get("access_token", ""), _oauth_default_label(provider, 1),
+                    )
+                    print(f'Imported {provider} OAuth credentials: "{shown_label}"')
+                    return
+                # Rehydrate failed (expired refresh_token, portal down, etc.)
+                # — fall through to device-code flow.
+                print("Could not refresh shared credentials — falling back to device-code login.")
+
        creds = auth_mod._nous_device_code_login(
            portal_base_url=getattr(args, "portal_url", None),
            inference_base_url=getattr(args, "inference_url", None),
--- a/hermes_cli/backup.py
+++ b/hermes_cli/backup.py
@ -61,6 +61,9 @@ _EXCLUDED_NAMES = {
    "cron.pid",
 }

+# zipfile.open() drops Unix mode bits on extract; restore tightens these to 0600.
+_SECRET_FILE_NAMES = {".env", "auth.json", "state.db"}
+

 def _should_exclude(rel_path: Path) -> bool:
    """Return True if *rel_path* (relative to hermes root) should be skipped."""
@ -295,7 +298,7 @@ def _detect_prefix(zf: zipfile.ZipFile) -> str:
    if len(first_parts) == 1:
        prefix = first_parts.pop()
        # Only strip if it looks like a hermes dir name
-        if prefix in (".hermes", "hermes"):
+        if prefix in {".hermes", "hermes"}:
            return prefix + "/"

    return ""
@ -346,7 +349,7 @@ def run_import(args) -> None:
            except (EOFError, KeyboardInterrupt):
                print("\nAborted.")
                sys.exit(1)
-            if answer not in ("y", "yes"):
+            if answer not in {"y", "yes"}:
                print("Aborted.")
                return

@ -381,6 +384,8 @@ def run_import(args) -> None:
                target.parent.mkdir(parents=True, exist_ok=True)
                with zf.open(member) as src, open(target, "wb") as dst:
                    dst.write(src.read())
+                if target.name in _SECRET_FILE_NAMES:
+                    os.chmod(target, 0o600)
                restored += 1
            except (PermissionError, OSError) as exc:
                errors.append(f"  {rel}: {exc}")
@ -568,7 +573,7 @@ def create_quick_snapshot(
        "total_size": sum(manifest.values()),
        "files": manifest,
    }
-    with open(snap_dir / "manifest.json", "w") as f:
+    with open(snap_dir / "manifest.json", "w", encoding="utf-8") as f:
        json.dump(meta, f, indent=2)

    # Auto-prune
@ -594,7 +599,7 @@ def list_quick_snapshots(
        manifest_path = d / "manifest.json"
        if manifest_path.exists():
            try:
-                with open(manifest_path) as f:
+                with open(manifest_path, encoding="utf-8") as f:
                    results.append(json.load(f))
            except (json.JSONDecodeError, OSError):
                results.append({"id": d.name, "file_count": 0, "total_size": 0})
@ -624,7 +629,7 @@ def restore_quick_snapshot(
    if not manifest_path.exists():
        return False

-    with open(manifest_path) as f:
+    with open(manifest_path, encoding="utf-8") as f:
        meta = json.load(f)

    restored = 0
@ -788,9 +793,16 @@ def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
    Returns the number of files deleted.  Only touches files matching
    ``pre-update-*.zip`` so hand-made zips dropped in the same directory
    are never touched.
+
+    ``keep`` is floored to 1 because this helper is only called immediately
+    after a fresh backup is written: deleting that backup right after the
+    user paid the disk/CPU cost to create it would leave them worse off
+    than no backup at all (and the wrapper in ``main.py`` would still print
+    a misleading ``Saved: <path>`` line for a file that no longer exists).
+    Operators who genuinely don't want a backup should set
+    ``updates.pre_update_backup: false`` in config — that gates creation.
    """
-    if keep < 0:
-        keep = 0
+    keep = max(keep, 1)
    if not backup_dir.exists():
        return 0

@ -862,8 +874,7 @@ def _prune_pre_migration_backups(backup_dir: Path, keep: int) -> int:
    Only touches files matching ``pre-migration-*.zip`` so other backups in
    the same directory are never touched.
    """
-    if keep < 0:
-        keep = 0
+    keep = max(keep, 0)
    if not backup_dir.exists():
        return 0

--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@ -206,9 +206,12 @@ def check_for_updates() -> Optional[int]:
    if embedded_rev:
        behind = _check_via_rev(embedded_rev)
    else:
-        repo_dir = hermes_home / "hermes-agent"
+        # Prefer the running code's location over the profile-scoped path.
+        # $HERMES_HOME/hermes-agent/ may be a stale copy from --clone-all;
+        # Path(__file__) always resolves to the actual installed checkout.
+        repo_dir = Path(__file__).parent.parent.resolve()
        if not (repo_dir / ".git").exists():
-            repo_dir = Path(__file__).parent.parent.resolve()
+            repo_dir = hermes_home / "hermes-agent"
        if not (repo_dir / ".git").exists():
            return None
        behind = _check_via_local_git(repo_dir)
@ -222,11 +225,16 @@ def check_for_updates() -> Optional[int]:


 def _resolve_repo_dir() -> Optional[Path]:
-    """Return the active Hermes git checkout, or None if this isn't a git install."""
-    hermes_home = get_hermes_home()
-    repo_dir = hermes_home / "hermes-agent"
+    """Return the active Hermes git checkout, or None if this isn't a git install.
+
+    Prefers the running code's location over the profile-scoped path
+    because ``$HERMES_HOME/hermes-agent/`` may be a stale copy carried
+    over by ``--clone-all``.
+    """
+    repo_dir = Path(__file__).parent.parent.resolve()
    if not (repo_dir / ".git").exists():
-        repo_dir = Path(__file__).parent.parent.resolve()
+        hermes_home = get_hermes_home()
+        repo_dir = hermes_home / "hermes-agent"
    return repo_dir if (repo_dir / ".git").exists() else None


--- a/hermes_cli/checkpoints.py
+++ b/hermes_cli/checkpoints.py
@ -0,0 +1,244 @@
+"""`hermes checkpoints` CLI subcommand.
+
+Gives users direct visibility and control over the filesystem checkpoint
+store at ``~/.hermes/checkpoints/``.  Actions:
+
+    hermes checkpoints               # same as `status`
+    hermes checkpoints status        # total size, project count, breakdown
+    hermes checkpoints list          # per-project checkpoint counts + workdir
+    hermes checkpoints prune [opts]  # force a sweep (ignores the 24h marker)
+    hermes checkpoints clear [-f]    # nuke the entire base (asks first)
+    hermes checkpoints clear-legacy  # delete just the legacy-* archives
+
+Examples::
+
+    hermes checkpoints
+    hermes checkpoints prune --retention-days 3 --max-size-mb 200
+    hermes checkpoints clear -f
+
+None of these require the agent to be running.  Safe to call any time.
+"""
+
+from __future__ import annotations
+
+import argparse
+import time
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict
+
+
+def _fmt_bytes(n: int) -> str:
+    units = ("B", "KB", "MB", "GB", "TB")
+    size = float(n or 0)
+    for unit in units:
+        if size < 1024 or unit == units[-1]:
+            if unit == "B":
+                return f"{int(size)} {unit}"
+            return f"{size:.1f} {unit}"
+        size /= 1024
+    return f"{size:.1f} TB"
+
+
+def _fmt_ts(ts: Any) -> str:
+    try:
+        return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M")
+    except (TypeError, ValueError):
+        return "—"
+
+
+def _fmt_age(ts: Any) -> str:
+    try:
+        age = time.time() - float(ts)
+    except (TypeError, ValueError):
+        return "—"
+    if age < 0:
+        return "now"
+    if age < 60:
+        return f"{int(age)}s ago"
+    if age < 3600:
+        return f"{int(age / 60)}m ago"
+    if age < 86400:
+        return f"{int(age / 3600)}h ago"
+    return f"{int(age / 86400)}d ago"
+
+
+def cmd_status(args: argparse.Namespace) -> int:
+    from tools.checkpoint_manager import store_status
+
+    info = store_status()
+    base = info["base"]
+    print(f"Checkpoint base: {base}")
+    print(f"Total size:      {_fmt_bytes(info['total_size_bytes'])}")
+    print(f"  store/         {_fmt_bytes(info['store_size_bytes'])}")
+    print(f"  legacy-*       {_fmt_bytes(info['legacy_size_bytes'])}")
+    print(f"Projects:        {info['project_count']}")
+
+    projects = sorted(
+        info["projects"],
+        key=lambda p: (p.get("last_touch") or 0),
+        reverse=True,
+    )
+    if projects:
+        print()
+        print(f"  {'WORKDIR':<60}  {'COMMITS':>7}  {'LAST TOUCH':>12}  STATE")
+        for p in projects[: args.limit if hasattr(args, "limit") and args.limit else 20]:
+            wd = p.get("workdir") or "(unknown)"
+            if len(wd) > 60:
+                wd = "…" + wd[-59:]
+            exists = p.get("exists")
+            state = "live" if exists else "orphan"
+            commits = p.get("commits", 0)
+            last = _fmt_age(p.get("last_touch"))
+            print(f"  {wd:<60}  {commits:>7}  {last:>12}  {state}")
+
+    legacy = info.get("legacy_archives", [])
+    if legacy:
+        print()
+        print(f"Legacy archives ({len(legacy)}):")
+        for arch in sorted(legacy, key=lambda a: a.get("mtime", 0), reverse=True):
+            print(f"  {arch['name']:<40}  {_fmt_bytes(arch['size_bytes']):>10}")
+        print()
+        print("Clear with: hermes checkpoints clear-legacy")
+    return 0
+
+
+def cmd_list(args: argparse.Namespace) -> int:
+    # `list` is just a terser status — already covered.
+    return cmd_status(args)
+
+
+def cmd_prune(args: argparse.Namespace) -> int:
+    from tools.checkpoint_manager import prune_checkpoints
+
+    retention_days = args.retention_days
+    max_size_mb = args.max_size_mb
+
+    print("Pruning checkpoint store…")
+    print(f"  retention_days:    {retention_days}")
+    print(f"  delete_orphans:    {not args.keep_orphans}")
+    print(f"  max_total_size_mb: {max_size_mb}")
+    print()
+
+    result = prune_checkpoints(
+        retention_days=retention_days,
+        delete_orphans=not args.keep_orphans,
+        max_total_size_mb=max_size_mb,
+    )
+    print(f"Scanned:         {result['scanned']}")
+    print(f"Deleted orphan:  {result['deleted_orphan']}")
+    print(f"Deleted stale:   {result['deleted_stale']}")
+    print(f"Errors:          {result['errors']}")
+    print(f"Bytes reclaimed: {_fmt_bytes(result['bytes_freed'])}")
+    return 0
+
+
+def _confirm(prompt: str) -> bool:
+    try:
+        resp = input(f"{prompt} [y/N]: ").strip().lower()
+    except (EOFError, KeyboardInterrupt):
+        print()
+        return False
+    return resp in {"y", "yes"}
+
+
+def cmd_clear(args: argparse.Namespace) -> int:
+    from tools.checkpoint_manager import CHECKPOINT_BASE, clear_all, store_status
+
+    info = store_status()
+    if info["total_size_bytes"] == 0 and not Path(CHECKPOINT_BASE).exists():
+        print("Nothing to clear — checkpoint base does not exist.")
+        return 0
+
+    print(f"This will delete the ENTIRE checkpoint base at {info['base']}")
+    print(f"  size:        {_fmt_bytes(info['total_size_bytes'])}")
+    print(f"  projects:    {info['project_count']}")
+    print(f"  legacy dirs: {len(info.get('legacy_archives', []))}")
+    print()
+    print("All /rollback history for every working directory will be lost.")
+    if not args.force and not _confirm("Proceed?"):
+        print("Aborted.")
+        return 1
+
+    result = clear_all()
+    if result["deleted"]:
+        print(f"Cleared. Reclaimed {_fmt_bytes(result['bytes_freed'])}.")
+        return 0
+    print("Could not clear checkpoint base (see logs).")
+    return 2
+
+
+def cmd_clear_legacy(args: argparse.Namespace) -> int:
+    from tools.checkpoint_manager import clear_legacy, store_status
+
+    info = store_status()
+    legacy = info.get("legacy_archives", [])
+    if not legacy:
+        print("No legacy archives to clear.")
+        return 0
+
+    total = sum(a.get("size_bytes", 0) for a in legacy)
+    print(f"Found {len(legacy)} legacy archive(s), total {_fmt_bytes(total)}:")
+    for arch in legacy:
+        print(f"  {arch['name']:<40}  {_fmt_bytes(arch['size_bytes']):>10}")
+    print()
+    print("Legacy archives hold pre-v2 per-project shadow repos, moved aside")
+    print("during the single-store migration. Delete when you're confident")
+    print("you don't need the old /rollback history.")
+    if not args.force and not _confirm("Delete all legacy archives?"):
+        print("Aborted.")
+        return 1
+
+    result = clear_legacy()
+    print(f"Deleted {result['deleted']} archive(s), reclaimed {_fmt_bytes(result['bytes_freed'])}.")
+    return 0
+
+
+def register_cli(parser: argparse.ArgumentParser) -> None:
+    """Wire subcommands onto the ``hermes checkpoints`` parser."""
+    parser.set_defaults(func=cmd_status)  # bare `hermes checkpoints` → status
+    subs = parser.add_subparsers(dest="checkpoints_command", metavar="COMMAND")
+
+    p_status = subs.add_parser(
+        "status",
+        help="Show total size, project count, and per-project breakdown",
+    )
+    p_status.add_argument("--limit", type=int, default=20,
+                          help="Max projects to list (default 20)")
+    p_status.set_defaults(func=cmd_status)
+
+    p_list = subs.add_parser(
+        "list",
+        help="Alias for 'status'",
+    )
+    p_list.add_argument("--limit", type=int, default=20)
+    p_list.set_defaults(func=cmd_list)
+
+    p_prune = subs.add_parser(
+        "prune",
+        help="Delete orphan/stale checkpoints and GC the store",
+    )
+    p_prune.add_argument("--retention-days", type=int, default=7,
+                         help="Drop projects whose last_touch is older than N days (default 7)")
+    p_prune.add_argument("--max-size-mb", type=int, default=500,
+                         help="After orphan/stale prune, drop oldest commits "
+                              "per project until total size <= this (default 500)")
+    p_prune.add_argument("--keep-orphans", action="store_true",
+                         help="Skip deleting projects whose workdir no longer exists")
+    p_prune.set_defaults(func=cmd_prune)
+
+    p_clear = subs.add_parser(
+        "clear",
+        help="Delete the entire checkpoint base (all /rollback history)",
+    )
+    p_clear.add_argument("-f", "--force", action="store_true",
+                         help="Skip confirmation prompt")
+    p_clear.set_defaults(func=cmd_clear)
+
+    p_legacy = subs.add_parser(
+        "clear-legacy",
+        help="Delete only the legacy-<ts>/ archives from v1 migration",
+    )
+    p_legacy.add_argument("-f", "--force", action="store_true",
+                          help="Skip confirmation prompt")
+    p_legacy.set_defaults(func=cmd_clear_legacy)
--- a/hermes_cli/claw.py
+++ b/hermes_cli/claw.py
@ -235,6 +235,9 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
    """
    findings: list[tuple[Path, str]] = []

+    if not source_dir.exists():
+        return findings
+
    # Direct state files in the root
    for name in ("todo.json", "sessions", "logs"):
        candidate = source_dir / name
@ -243,7 +246,12 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
            findings.append((candidate, f"Root {kind}: {name}"))

    # State files inside workspace directories
-    for child in sorted(source_dir.iterdir()):
+    try:
+        children = sorted(source_dir.iterdir())
+    except OSError:
+        return findings
+
+    for child in children:
        if not child.is_dir() or child.name.startswith("."):
            continue
        # Check for workspace-like subdirectories
@ -290,7 +298,7 @@ def claw_command(args):

    if action == "migrate":
        _cmd_migrate(args)
-    elif action in ("cleanup", "clean"):
+    elif action in {"cleanup", "clean"}:
        _cmd_cleanup(args)
    else:
        print("Usage: hermes claw <command> [options]")
@ -662,25 +670,31 @@ def _cmd_cleanup(args):
        elif not auto_yes and not sys.stdin.isatty():
            print_info(f"Non-interactive session — would archive: {source_dir}")
            print_info("To execute, re-run with: hermes claw cleanup --yes")
+        elif auto_yes or prompt_yes_no(f"Archive {source_dir}?", default=True):
+            try:
+                archive_path = _archive_directory(source_dir)
+                print_success(f"Archived: {source_dir} → {archive_path}")
+                total_archived += 1
+            except OSError as e:
+                print_error(f"Could not archive: {e}")
+                print_info(f"Try manually: mv {source_dir} {source_dir}.pre-migration")
        else:
-            if auto_yes or prompt_yes_no(f"Archive {source_dir}?", default=True):
-                try:
-                    archive_path = _archive_directory(source_dir)
-                    print_success(f"Archived: {source_dir} → {archive_path}")
-                    total_archived += 1
-                except OSError as e:
-                    print_error(f"Could not archive: {e}")
-                    print_info(f"Try manually: mv {source_dir} {source_dir}.pre-migration")
-            else:
-                print_info("Skipped.")
+            print_info("Skipped.")

    # Summary
    print()
    if dry_run:
-        print_info(f"Dry run complete. {len(dirs_to_check)} directory(ies) would be archived.")
+        _n_dirs = len(dirs_to_check)
+        print_info(
+            f"Dry run complete. {_n_dirs} "
+            f"{'directory' if _n_dirs == 1 else 'directories'} would be archived."
+        )
        print_info("Run without --dry-run to archive them.")
    elif total_archived:
-        print_success(f"Cleaned up {total_archived} OpenClaw directory(ies).")
+        print_success(
+            f"Cleaned up {total_archived} OpenClaw "
+            f"{'directory' if total_archived == 1 else 'directories'}."
+        )
        print_info("Directories were renamed, not deleted. You can undo by renaming them back.")
    else:
        print_info("No directories were archived.")
--- a/hermes_cli/codex_models.py
+++ b/hermes_cli/codex_models.py
@ -16,6 +16,19 @@ DEFAULT_CODEX_MODELS: List[str] = [
    "gpt-5.4-mini",
    "gpt-5.4",
    "gpt-5.3-codex",
+    # gpt-5.3-codex-spark is in research preview and is exposed *only* via
+    # the Codex CLI / OAuth backend (chatgpt.com/backend-api/codex/models)
+    # for ChatGPT Pro subscribers. It is NOT available in the public OpenAI
+    # API, so it intentionally stays out of the "openai" provider catalog
+    # in hermes_cli/models.py — only the openai-codex (OAuth) provider
+    # surfaces it. The Codex backend reports ``supported_in_api: false`` for
+    # this slug; that flag describes API availability, not Codex backend
+    # availability, so the fetch/cache code paths below intentionally do
+    # not filter on it. PR #12994 removed this entry on the assumption it
+    # was unsupported — that was wrong; restored here. Keep it in the
+    # curated fallback so Pro users still see Spark in `/model` when live
+    # discovery is unavailable (offline first run, transient API failure).
+    "gpt-5.3-codex-spark",
    "gpt-5.2-codex",
    "gpt-5.1-codex-max",
    "gpt-5.1-codex-mini",
@ -26,6 +39,11 @@ _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
    ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.3-codex", ("gpt-5.2-codex",)),
+    # Surface Spark whenever any compatible Codex template is present so
+    # accounts hitting the live endpoint with an older lineup still see
+    # Spark in the picker. Backend gates real availability by ChatGPT Pro
+    # entitlement; Hermes does not.
+    ("gpt-5.3-codex-spark", ("gpt-5.3-codex", "gpt-5.2-codex")),
 ]


@ -78,10 +96,12 @@ def _fetch_models_from_api(access_token: str) -> List[str]:
        if not isinstance(slug, str) or not slug.strip():
            continue
        slug = slug.strip()
-        if item.get("supported_in_api") is False:
-            continue
+        # Codex CLI's catalog uses ``supported_in_api`` for the public OpenAI
+        # API, not for the OAuth-backed Codex backend that this provider uses.
+        # Some valid Codex CLI models (for example gpt-5.3-codex-spark) are
+        # marked false here but are still accepted by the Codex route.
        visibility = item.get("visibility", "")
-        if isinstance(visibility, str) and visibility.strip().lower() in ("hide", "hidden"):
+        if isinstance(visibility, str) and visibility.strip().lower() in {"hide", "hidden"}:
            continue
        priority = item.get("priority")
        rank = int(priority) if isinstance(priority, (int, float)) else 10_000
@ -128,10 +148,11 @@ def _read_cache_models(codex_home: Path) -> List[str]:
            if not isinstance(slug, str) or not slug.strip():
                continue
            slug = slug.strip()
-            if item.get("supported_in_api") is False:
-                continue
+            # Do not filter on ``supported_in_api`` here.  It describes the
+            # public OpenAI API, while Hermes openai-codex talks to the same
+            # OAuth-backed Codex backend as Codex CLI.
            visibility = item.get("visibility")
-            if isinstance(visibility, str) and visibility.strip().lower() in ("hide", "hidden"):
+            if isinstance(visibility, str) and visibility.strip().lower() in {"hide", "hidden"}:
                continue
            priority = item.get("priority")
            rank = int(priority) if isinstance(priority, (int, float)) else 10_000
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -10,6 +10,7 @@ To add an alias: set ``aliases=("short",)`` on the existing ``CommandDef``.

 from __future__ import annotations

+import logging
 import os
 import re
 import shutil
@ -19,6 +20,10 @@ from collections.abc import Callable, Mapping
 from dataclasses import dataclass
 from typing import Any

+from utils import is_truthy_value
+
+logger = logging.getLogger(__name__)
+
 # prompt_toolkit is an optional CLI dependency — only needed for
 # SlashCommandCompleter and SlashCommandAutoSuggest.  Gateway and test
 # environments that lack it must still be able to import this module
@ -59,7 +64,9 @@ class CommandDef:
 COMMAND_REGISTRY: list[CommandDef] = [
    # Session
    CommandDef("new", "Start a new session (fresh session ID + history)", "Session",
-               aliases=("reset",)),
+               aliases=("reset",), args_hint="[name]"),
+    CommandDef("topic", "Enable or inspect Telegram DM topic sessions", "Session",
+               gateway_only=True, args_hint="[off|help|session-id]"),
    CommandDef("clear", "Clear screen and start a new session", "Session",
               cli_only=True),
    CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session",
@ -72,6 +79,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("undo", "Remove the last user/assistant exchange", "Session"),
    CommandDef("title", "Set a title for the current session", "Session",
               args_hint="[name]"),
+    CommandDef("handoff", "Hand off this session to a messaging platform (Telegram, Discord, etc.)", "Session",
+               args_hint="<platform>", cli_only=True),
    CommandDef("branch", "Branch the current session (explore a different path)", "Session",
               aliases=("fork",), args_hint="[name]"),
    CommandDef("compress", "Manually compress conversation context", "Session",
@ -93,13 +102,19 @@ COMMAND_REGISTRY: list[CommandDef] = [
               aliases=("q",), args_hint="<prompt>"),
    CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session",
               args_hint="<prompt>"),
+    CommandDef("goal", "Set a standing goal Hermes works on across turns until achieved", "Session",
+               args_hint="[text | pause | resume | clear | status]"),
    CommandDef("status", "Show session info", "Session"),
+    CommandDef("whoami", "Show your slash command access (admin / user)", "Info"),
    CommandDef("profile", "Show active profile name and home directory", "Info"),
    CommandDef("sethome", "Set this chat as the home channel", "Session",
               gateway_only=True, aliases=("set-home",)),
    CommandDef("resume", "Resume a previously-named session", "Session",
               args_hint="[name]"),

+    # Configuration
+    CommandDef("sessions", "Browse and resume previous sessions", "Session"),
+
    # Configuration
    CommandDef("config", "Show current configuration", "Configuration",
               cli_only=True),
@ -148,9 +163,14 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
-    CommandDef("curator", "Background skill maintenance (status, run, pin, archive)",
+    CommandDef("curator", "Background skill maintenance (status, run, pin, archive, list-archived)",
               "Tools & Skills", args_hint="[subcommand]",
-               subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore")),
+               subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore", "list-archived")),
+    CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
+               "Tools & Skills", args_hint="[subcommand]",
+               subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
+                            "claim", "comment", "complete", "block", "unblock", "archive",
+                            "tail", "dispatch", "context", "init", "gc")),
    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
               cli_only=True),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
@ -366,7 +386,7 @@ def _resolve_config_gates() -> set[str]:
            else:
                val = None
                break
-        if val:
+        if is_truthy_value(val, default=False):
            result.add(cmd.name)
    return result

@ -387,6 +407,11 @@ def _is_gateway_available(cmd: CommandDef, config_overrides: set[str] | None = N
    return False


+def _requires_argument(args_hint: str) -> bool:
+    """Return True when selecting a command without text would be incomplete."""
+    return args_hint.strip().startswith("<")
+
+
 def gateway_help_lines() -> list[str]:
    """Generate gateway help text lines from the registry."""
    overrides = _resolve_config_gates()
@ -443,7 +468,9 @@ def telegram_bot_commands() -> list[tuple[str, str]]:

    Telegram command names cannot contain hyphens, so they are replaced with
    underscores.  Aliases are skipped -- Telegram shows one menu entry per
-    canonical command.
+    canonical command. Commands that require arguments are skipped because
+    selecting a Telegram BotCommand sends only ``/command`` and would execute
+    an incomplete command.

    Plugin-registered slash commands are included so plugins get native
    autocomplete in Telegram without touching core code.
@ -453,10 +480,14 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
    for cmd in COMMAND_REGISTRY:
        if not _is_gateway_available(cmd, overrides):
            continue
+        if _requires_argument(cmd.args_hint):
+            continue
        tg_name = _sanitize_telegram_name(cmd.name)
        if tg_name:
            result.append((tg_name, cmd.description))
-    for name, description, _args_hint in _iter_plugin_command_entries():
+    for name, description, args_hint in _iter_plugin_command_entries():
+        if _requires_argument(args_hint):
+            continue
        tg_name = _sanitize_telegram_name(name)
        if tg_name:
            result.append((tg_name, description))
@ -490,9 +521,9 @@ def _sanitize_telegram_name(raw: str) -> str:


 def _clamp_command_names(
-    entries: list[tuple[str, str]],
+    entries: list[tuple[str, ...]],
    reserved: set[str],
-) -> list[tuple[str, str]]:
+) -> list[tuple[str, ...]]:
    """Enforce 32-char command name limit with collision avoidance.

    Both Telegram and Discord cap slash command names at 32 characters.
@ -500,10 +531,15 @@ def _clamp_command_names(
    (against *reserved* names or earlier entries in the same batch), the name is
    shortened to 31 chars and a digit ``0``-``9`` is appended to differentiate.
    If all 10 digit slots are taken the entry is silently dropped.
+
+    Accepts tuples of any length >= 2.  Extra elements beyond ``(name, desc)``
+    (e.g. ``cmd_key``) are passed through unchanged, so callers can attach
+    metadata that survives the rename.
    """
    used: set[str] = set(reserved)
-    result: list[tuple[str, str]] = []
-    for name, desc in entries:
+    result: list[tuple] = []
+    for entry in entries:
+        name, desc, *extra = entry
        if len(name) > _CMD_NAME_LIMIT:
            candidate = name[:_CMD_NAME_LIMIT]
            if candidate in used:
@ -519,7 +555,7 @@ def _clamp_command_names(
        if name in used:
            continue
        used.add(name)
-        result.append((name, desc))
+        result.append((name, desc, *extra))
    return result


@ -602,13 +638,26 @@ def _collect_gateway_skill_entries(
    try:
        from agent.skill_commands import get_skill_commands
        from tools.skills_tool import SKILLS_DIR
+        from agent.skill_utils import get_external_skills_dirs
        _skills_dir = str(SKILLS_DIR.resolve())
-        _hub_dir = str((SKILLS_DIR / ".hub").resolve())
+        _hub_dir = str((SKILLS_DIR / ".hub").resolve()).rstrip("/") + "/"
+        # Build set of allowed directory prefixes: local skills dir + any
+        # user-configured ``skills.external_dirs``. Ensure each prefix ends
+        # with ``/`` so ``/my-skills`` does not also match ``/my-skills-extra``.
+        # Without this widening, external skills are visible in
+        # ``hermes skills list`` and the agent's ``/skill-name`` dispatch but
+        # silently excluded from gateway slash menus (#8110).
+        _allowed_prefixes = [_skills_dir.rstrip("/") + "/"]
+        _allowed_prefixes.extend(
+            str(d).rstrip("/") + "/" for d in get_external_skills_dirs()
+        )
        skill_cmds = get_skill_commands()
        for cmd_key in sorted(skill_cmds):
            info = skill_cmds[cmd_key]
            skill_path = info.get("skill_md_path", "")
-            if not skill_path.startswith(_skills_dir):
+            if not skill_path:
+                continue
+            if not any(skill_path.startswith(prefix) for prefix in _allowed_prefixes):
                continue
            if skill_path.startswith(_hub_dir):
                continue
@ -626,17 +675,15 @@ def _collect_gateway_skill_entries(
    except Exception:
        pass

-    # Clamp names; _clamp_command_names works on (name, desc) pairs so we
-    # need to zip/unzip.
-    skill_pairs = [(n, d) for n, d, _ in skill_triples]
-    key_by_pair = {(n, d): k for n, d, k in skill_triples}
-    skill_pairs = _clamp_command_names(skill_pairs, reserved_names)
+    # Clamp names; cmd_key is passed through as extra payload so it survives
+    # any clamp-induced renames.
+    skill_triples = _clamp_command_names(skill_triples, reserved_names)

    # Skills fill remaining slots — only tier that gets trimmed
    remaining = max(0, max_slots - len(all_entries))
-    hidden_count = max(0, len(skill_pairs) - remaining)
-    for n, d in skill_pairs[:remaining]:
-        all_entries.append((n, d, key_by_pair.get((n, d), "")))
+    hidden_count = max(0, len(skill_triples) - remaining)
+    for n, d, k in skill_triples[:remaining]:
+        all_entries.append((n, d, k))

    return all_entries[:max_slots], hidden_count

@ -712,24 +759,40 @@ def discord_skill_commands(
 def discord_skill_commands_by_category(
    reserved_names: set[str],
 ) -> tuple[dict[str, list[tuple[str, str, str]]], list[tuple[str, str, str]], int]:
-    """Return skill entries organized by category for Discord ``/skill`` subcommand groups.
+    """Return skill entries organized by category for Discord ``/skill`` autocomplete.

-    Skills whose directory is nested at least 2 levels under ``SKILLS_DIR``
+    Skills whose directory is nested at least 2 levels under a scan root
    (e.g. ``creative/ascii-art/SKILL.md``) are grouped by their top-level
    category.  Root-level skills (e.g. ``dogfood/SKILL.md``) are returned as
-    *uncategorized* — the caller should register them as direct subcommands
-    of the ``/skill`` group.
+    *uncategorized*.

-    The same filtering as :func:`discord_skill_commands` is applied: hub
-    skills excluded, per-platform disabled excluded, names clamped.
+    Scan roots include the local ``SKILLS_DIR`` **and** any configured
+    ``skills.external_dirs`` — matching the widened filter applied to the
+    flat ``discord_skill_commands()`` collector in #18741. Without this
+    parity, external-dir skills are visible via ``hermes skills list`` and
+    the agent's ``/skill-name`` dispatch but silently absent from Discord's
+    ``/skill`` autocomplete.
+
+    Filtering mirrors :func:`discord_skill_commands`: hub skills excluded,
+    per-platform disabled excluded, names clamped to 32 chars, descriptions
+    clamped to 100 chars.
+
+    The legacy 25-group × 25-subcommand caps (from the old nested
+    ``/skill <cat> <name>`` layout) are **not** applied — the live caller
+    (``_register_skill_group`` in ``gateway/platforms/discord.py``, refactored
+    in PR #11580) flattens these results and feeds them into a single
+    autocomplete callback, which scales to thousands of entries without any
+    per-command payload concerns. ``hidden_count`` is retained in the return
+    tuple for backward compatibility and still reports skills dropped for
+    other reasons (32-char clamp collision vs a reserved name).

    Returns:
        ``(categories, uncategorized, hidden_count)``

        - *categories*: ``{category_name: [(name, description, cmd_key), ...]}``
        - *uncategorized*: ``[(name, description, cmd_key), ...]``
-        - *hidden_count*: skills dropped due to Discord group limits
-          (25 subcommand groups, 25 subcommands per group)
+        - *hidden_count*: skills dropped due to name clamp collisions
+          against already-registered command names.
    """
    from pathlib import Path as _P

@ -743,14 +806,33 @@ def discord_skill_commands_by_category(
    # Collect raw skill data --------------------------------------------------
    categories: dict[str, list[tuple[str, str, str]]] = {}
    uncategorized: list[tuple[str, str, str]] = []
-    _names_used: set[str] = set(reserved_names)
+    # Map clamped-32-char-name → what it came from, so we can emit an
+    # actionable warning on collision. Reserved (gateway-builtin) command
+    # names are marked with a sentinel so the warning distinguishes
+    # "skill collided with a reserved command" from "two skills collided
+    # on the 32-char clamp" — the latter is the rename-worthy case.
+    _names_used: dict[str, str] = dict.fromkeys(reserved_names, "<reserved>")
    hidden = 0

    try:
        from agent.skill_commands import get_skill_commands
+        from agent.skill_utils import get_external_skills_dirs
        from tools.skills_tool import SKILLS_DIR
+
        _skills_dir = SKILLS_DIR.resolve()
        _hub_dir = (SKILLS_DIR / ".hub").resolve()
+        # Build list of (resolved_root, is_local) tuples. Each external dir
+        # becomes its own scan root for category derivation — a skill at
+        # ``<external>/mlops/foo/SKILL.md`` is still categorized as "mlops".
+        _scan_roots: list[_P] = [_skills_dir]
+        try:
+            for ext in get_external_skills_dirs():
+                try:
+                    _scan_roots.append(_P(ext).resolve())
+                except Exception:
+                    continue
+        except Exception:
+            pass
        skill_cmds = get_skill_commands()

        for cmd_key in sorted(skill_cmds):
@ -759,33 +841,72 @@ def discord_skill_commands_by_category(
            if not skill_path:
                continue
            sp = _P(skill_path).resolve()
-            # Skip skills outside SKILLS_DIR or from the hub
-            if not str(sp).startswith(str(_skills_dir)):
-                continue
+            # Hub skills are loaded via the skill hub, not surfaced as
+            # slash commands.
            if str(sp).startswith(str(_hub_dir)):
                continue
+            # Accept skill if it lives under any scan root; record the
+            # matching root so we can derive the category correctly.
+            matched_root: _P | None = None
+            for root in _scan_roots:
+                try:
+                    sp.relative_to(root)
+                except ValueError:
+                    continue
+                matched_root = root
+                break
+            if matched_root is None:
+                continue

            skill_name = info.get("name", "")
            if skill_name in _platform_disabled:
                continue

            raw_name = cmd_key.lstrip("/")
-            # Clamp to 32 chars (Discord limit)
+            # Clamp to 32 chars (Discord per-command name limit)
            discord_name = raw_name[:32]
            if discord_name in _names_used:
+                # Two skills whose first 32 chars are identical. One wins
+                # (the first one seen, which is alphabetical because the
+                # caller iterates ``sorted(skill_cmds)``); the other is
+                # dropped from Discord's /skill autocomplete.
+                #
+                # Silently counting this as ``hidden`` (the old behavior)
+                # meant skill authors had no way to discover the drop —
+                # their skill just didn't appear in the picker. Emit a
+                # WARNING naming both sides so the author can rename the
+                # losing skill's frontmatter name to something with a
+                # distinct 32-char prefix.
+                prior = _names_used[discord_name]
+                if prior == "<reserved>":
+                    logger.warning(
+                        "Discord /skill: %r (from %r) collides on its 32-char "
+                        "clamp with a reserved gateway command name %r — the "
+                        "skill will not appear in the /skill autocomplete. "
+                        "Rename the skill's frontmatter ``name:`` to differ "
+                        "in its first 32 chars.",
+                        discord_name, cmd_key, discord_name,
+                    )
+                else:
+                    logger.warning(
+                        "Discord /skill: %r and %r both clamp to %r on "
+                        "Discord's 32-char command-name limit — only %r "
+                        "will appear in the /skill autocomplete. Rename "
+                        "one skill's frontmatter ``name:`` to differ in "
+                        "its first 32 chars.",
+                        prior, cmd_key, discord_name, prior,
+                    )
+                hidden += 1
                continue
-            _names_used.add(discord_name)
+            _names_used[discord_name] = cmd_key

            desc = info.get("description", "")
            if len(desc) > 100:
                desc = desc[:97] + "..."

-            # Determine category from the relative path within SKILLS_DIR.
-            # e.g. creative/ascii-art/SKILL.md → parts = ("creative", "ascii-art")
-            try:
-                rel = sp.parent.relative_to(_skills_dir)
-            except ValueError:
-                continue
+            # Determine category from the relative path within the matched
+            # scan root. e.g. creative/ascii-art/SKILL.md → ("creative", ...)
+            rel = sp.parent.relative_to(matched_root)
            parts = rel.parts
            if len(parts) >= 2:
                cat = parts[0]
@ -795,28 +916,7 @@ def discord_skill_commands_by_category(
    except Exception:
        pass

-    # Enforce Discord limits: 25 subcommand groups, 25 subcommands each ------
-    _MAX_GROUPS = 25
-    _MAX_PER_GROUP = 25
-
-    trimmed_categories: dict[str, list[tuple[str, str, str]]] = {}
-    group_count = 0
-    for cat in sorted(categories):
-        if group_count >= _MAX_GROUPS:
-            hidden += len(categories[cat])
-            continue
-        entries = categories[cat][:_MAX_PER_GROUP]
-        hidden += max(0, len(categories[cat]) - _MAX_PER_GROUP)
-        trimmed_categories[cat] = entries
-        group_count += 1
-
-    # Uncategorized skills also count against the 25 top-level limit
-    remaining_slots = _MAX_GROUPS - group_count
-    if len(uncategorized) > remaining_slots:
-        hidden += len(uncategorized) - remaining_slots
-        uncategorized = uncategorized[:remaining_slots]
-
-    return trimmed_categories, uncategorized, hidden
+    return categories, uncategorized, hidden


 # ---------------------------------------------------------------------------
@ -829,6 +929,13 @@ def discord_skill_commands_by_category(
 _SLACK_MAX_SLASH_COMMANDS = 50
 _SLACK_NAME_LIMIT = 32
 _SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
+_SLACK_RESERVED_COMMANDS = frozenset({
+    # Built-in Slack slash commands that cannot be registered by apps.
+    # https://slack.com/help/articles/201259356-Use-built-in-slash-commands
+    "me", "status", "away", "dnd", "shrug", "remind", "msg", "feed",
+    "who", "collapse", "expand", "leave", "join", "open", "search",
+    "topic", "mute", "pro", "shortcuts",
+})


 def _sanitize_slack_name(raw: str) -> str:
@ -855,6 +962,10 @@ def slack_native_slashes() -> list[tuple[str, str, str]]:
    documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
    Plugin-registered slash commands are included too.

+    Commands whose sanitized name collides with a Slack built-in
+    (e.g. ``/status``, ``/me``, ``/join``) are silently skipped.  Users
+    can still reach them via ``/hermes <command>``.
+
    Results are clamped to Slack's 50-command limit with duplicate-name
    avoidance. ``/hermes`` is always reserved as the first entry so the
    legacy ``/hermes <subcommand>`` form keeps working for anything that
@ -872,6 +983,8 @@ def slack_native_slashes() -> list[tuple[str, str, str]]:
        slack_name = _sanitize_slack_name(name)
        if not slack_name or slack_name in seen:
            return
+        if slack_name in _SLACK_RESERVED_COMMANDS:
+            return
        if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
            return
        # Slack description cap is 2000 chars; keep it short.
@ -1021,6 +1134,12 @@ class SlashCommandCompleter(Completer):
        except Exception:
            return {}

+    # Commands that open pickers when run without arguments.
+    # These should NOT receive a trailing space in completions because:
+    # - The TUI's submit handler applies completions on Enter if input differs
+    # - Adding space makes "/model" → "/model " which blocks picker execution
+    _PICKER_COMMANDS = frozenset({"model", "skin", "personality"})
+
    @staticmethod
    def _completion_text(cmd_name: str, word: str) -> str:
        """Return replacement text for a completion.
@ -1029,8 +1148,17 @@ class SlashCommandCompleter(Completer):
        returning ``help`` would be a no-op and prompt_toolkit suppresses the
        menu. Appending a trailing space keeps the dropdown visible and makes
        backspacing retrigger it naturally.
+
+        However, commands that open pickers (model, skin, personality) should
+        NOT get a trailing space — the TUI would apply the completion on Enter
+        and block the picker from opening.
        """
-        return f"{cmd_name} " if cmd_name == word else cmd_name
+        if cmd_name != word:
+            return cmd_name
+        # Don't add space for picker commands — allows Enter to execute them
+        if cmd_name in SlashCommandCompleter._PICKER_COMMANDS:
+            return cmd_name
+        return f"{cmd_name} "

    @staticmethod
    def _extract_path_word(text: str) -> str | None:
--- a/hermes_cli/completion.py
+++ b/hermes_cli/completion.py
@ -216,9 +216,9 @@ _hermes() {{
    typeset -A opt_args

    _arguments -C \\
-        '(-h --help){{-h,--help}}[Show help and exit]' \\
-        '(-V --version){{-V,--version}}[Show version and exit]' \\
-        '(-p --profile){{-p,--profile}}[Profile name]:profile:_hermes_profiles' \\
+        '(-)'{{-h,--help}}'[Show help and exit]' \\
+        '(-)'{{-V,--version}}'[Show version and exit]' \\
+        '(-)'{{-p,--profile}}'[Profile name]:profile:_hermes_profiles' \\
        '1:command:->commands' \\
        '*::arg:->args'

--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
--- a/hermes_cli/copilot_auth.py
+++ b/hermes_cli/copilot_auth.py
@ -128,7 +128,7 @@ def _try_gh_cli_token() -> Optional[str]:

    # Build a clean env so gh doesn't short-circuit on GITHUB_TOKEN / GH_TOKEN
    clean_env = {k: v for k, v in os.environ.items()
-                 if k not in ("GITHUB_TOKEN", "GH_TOKEN")}
+                 if k not in {"GITHUB_TOKEN", "GH_TOKEN"}}

    for gh_path in _gh_cli_candidates():
        cmd = [gh_path, "auth", "token"]
@ -212,9 +212,9 @@ def copilot_device_code_login(
    print("  Waiting for authorization...", end="", flush=True)

    # Step 3: Poll for completion
-    deadline = time.time() + timeout_seconds
+    deadline = time.monotonic() + timeout_seconds

-    while time.time() < deadline:
+    while time.monotonic() < deadline:
        time.sleep(interval + _DEVICE_CODE_POLL_SAFETY_MARGIN)

        poll_data = urllib.parse.urlencode({
--- a/hermes_cli/cron.py
+++ b/hermes_cli/cron.py
@ -93,6 +93,8 @@ def cron_list(show_all: bool = False):
        script = job.get("script")
        if script:
            print(f"    Script:    {script}")
+        if job.get("no_agent"):
+            print(f"    Mode:      {color('no-agent', Colors.DIM)} (script stdout delivered directly)")
        workdir = job.get("workdir")
        if workdir:
            print(f"    Workdir:   {workdir}")
@ -172,6 +174,7 @@ def cron_create(args):
        skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
        script=getattr(args, "script", None),
        workdir=getattr(args, "workdir", None),
+        no_agent=getattr(args, "no_agent", False) or None,
    )
    if not result.get("success"):
        print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
@ -184,6 +187,8 @@ def cron_create(args):
    job_data = result.get("job", {})
    if job_data.get("script"):
        print(f"  Script: {job_data['script']}")
+    if job_data.get("no_agent"):
+        print("  Mode: no-agent (script stdout delivered directly)")
    if job_data.get("workdir"):
        print(f"  Workdir: {job_data['workdir']}")
    print(f"  Next run: {result['next_run_at']}")
@ -225,6 +230,7 @@ def cron_edit(args):
        skills=final_skills,
        script=getattr(args, "script", None),
        workdir=getattr(args, "workdir", None),
+        no_agent=getattr(args, "no_agent", None),
    )
    if not result.get("success"):
        print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED))
@ -240,6 +246,8 @@ def cron_edit(args):
        print("  Skills: none")
    if updated.get("script"):
        print(f"  Script: {updated['script']}")
+    if updated.get("no_agent"):
+        print("  Mode: no-agent (script stdout delivered directly)")
    if updated.get("workdir"):
        print(f"  Workdir: {updated['workdir']}")
    return 0
--- a/hermes_cli/curator.py
+++ b/hermes_cli/curator.py
@ -12,6 +12,7 @@ from __future__ import annotations
 import argparse
 import sys
 from datetime import datetime, timezone
+from pathlib import Path
 from typing import Optional


@ -54,10 +55,20 @@ def _cmd_status(args) -> int:
    print(f"curator: {status_line}")
    print(f"  runs:           {runs}")
    print(f"  last run:       {_fmt_ts(last_run)}")
-    print(f"  last summary:   {summary}")
+    # Summary may be multi-line when the curator archived skills (the rename
+    # map gets appended as `name → umbrella` lines). Indent continuation
+    # lines so the block reads as one logical field.
+    if "\n" in summary:
+        first, *rest = summary.splitlines()
+        print(f"  last summary:   {first}")
+        for line in rest:
+            print(f"                  {line}")
+    else:
+        print(f"  last summary:   {summary}")
    _report = state.get("last_report_path")
    if _report:
-        print(f"  last report:    {_report}")
+        suffix = "" if Path(_report).exists() else " (missing)"
+        print(f"  last report:    {_report}{suffix}")
    _ih = curator.get_interval_hours()
    _interval_label = (
        f"{_ih // 24}d" if _ih % 24 == 0 and _ih >= 24
@ -160,25 +171,49 @@ def _cmd_run(args) -> int:
        print("curator: disabled via config; enable with `curator.enabled: true`")
        return 1

-    print("curator: running review pass...")
+    dry = bool(getattr(args, "dry_run", False))
+    background = bool(getattr(args, "background", False))
+    synchronous = bool(getattr(args, "synchronous", False)) or not background
+    if dry:
+        print("curator: running DRY-RUN (report only, no mutations)...")
+    else:
+        print("curator: running review pass...")

    def _on_summary(msg: str) -> None:
        print(msg)

    result = curator.run_curator_review(
        on_summary=_on_summary,
-        synchronous=bool(args.synchronous),
+        synchronous=synchronous,
+        dry_run=dry,
    )
    auto = result.get("auto_transitions", {})
    if auto:
-        print(
-            f"auto: checked={auto.get('checked', 0)} "
-            f"stale={auto.get('marked_stale', 0)} "
-            f"archived={auto.get('archived', 0)} "
-            f"reactivated={auto.get('reactivated', 0)}"
-        )
-    if not args.synchronous:
+        if dry:
+            print(
+                f"auto (preview): {auto.get('checked', 0)} candidate skill(s) "
+                "— no transitions applied in dry-run"
+            )
+        else:
+            print(
+                f"auto: checked={auto.get('checked', 0)} "
+                f"stale={auto.get('marked_stale', 0)} "
+                f"archived={auto.get('archived', 0)} "
+                f"reactivated={auto.get('reactivated', 0)}"
+            )
+    if not synchronous:
        print("llm pass running in background — check `hermes curator status` later")
+    if dry:
+        if synchronous:
+            print(
+                "dry-run: no changes applied. Read the report with "
+                "`hermes curator status` and run `hermes curator run` (no flag) to apply."
+            )
+        else:
+            print(
+                "dry-run: no changes applied. When the report lands, read it with "
+                "`hermes curator status` and run `hermes curator run` (no flag) to apply."
+            )
    return 0


@ -229,6 +264,215 @@ def _cmd_restore(args) -> int:
    return 0 if ok else 1


+def _cmd_archive(args) -> int:
+    """Manually archive an agent-created skill. Refuses if pinned.
+
+    The auto-curator archives stale skills on its own schedule; this verb is
+    for the user who wants to archive *now* without waiting for a run.
+    """
+    from tools import skill_usage
+    if skill_usage.get_record(args.skill).get("pinned"):
+        print(
+            f"curator: '{args.skill}' is pinned — unpin first with "
+            f"`hermes curator unpin {args.skill}`"
+        )
+        return 1
+    ok, msg = skill_usage.archive_skill(args.skill)
+    print(f"curator: {msg}")
+    return 0 if ok else 1
+
+
+def _idle_days(record: dict) -> Optional[int]:
+    """Days since the skill's last activity (view / use / patch).
+
+    Falls back to ``created_at`` so a skill that was authored but never used
+    can still be pruned — otherwise never-touched skills would be immortal.
+    Returns None only when both fields are missing or unparseable.
+    """
+    ts = record.get("last_activity_at") or record.get("created_at")
+    if not ts:
+        return None
+    try:
+        dt = datetime.fromisoformat(str(ts))
+    except (TypeError, ValueError):
+        return None
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    return max(0, (datetime.now(timezone.utc) - dt).days)
+
+
+def _cmd_prune(args) -> int:
+    """Bulk-archive agent-created skills idle for >= N days.
+
+    Pinned skills are exempt. Already-archived skills are skipped. Default
+    ``--days 90`` matches a conservative read of the curator's own archive
+    threshold; adjust with ``--days``. Use ``--dry-run`` to preview.
+    """
+    from tools import skill_usage
+    days = getattr(args, "days", 90)
+    if days < 1:
+        print(f"curator: --days must be >= 1 (got {days})", file=sys.stderr)
+        return 2
+
+    dry_run = bool(getattr(args, "dry_run", False))
+    skip_confirm = bool(getattr(args, "yes", False))
+
+    candidates = []
+    for r in skill_usage.agent_created_report():
+        if r.get("pinned"):
+            continue
+        if r.get("state") == skill_usage.STATE_ARCHIVED:
+            continue
+        idle = _idle_days(r)
+        if idle is None or idle < days:
+            continue
+        candidates.append((r["name"], idle))
+
+    if not candidates:
+        print(f"curator: nothing to prune (no unpinned skills idle >= {days}d)")
+        return 0
+
+    candidates.sort(key=lambda c: -c[1])
+    print(f"curator: {len(candidates)} skill(s) idle >= {days}d:")
+    for name, idle in candidates:
+        print(f"  {name:40s} idle {idle}d")
+
+    if dry_run:
+        print("\n(dry run — no changes made)")
+        return 0
+
+    if not skip_confirm:
+        try:
+            reply = input(f"\nArchive {len(candidates)} skill(s)? [y/N] ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            print("\ncurator: aborted")
+            return 1
+        if reply not in {"y", "yes"}:
+            print("curator: aborted")
+            return 1
+
+    archived = 0
+    failures = []
+    for name, _ in candidates:
+        ok, msg = skill_usage.archive_skill(name)
+        if ok:
+            archived += 1
+        else:
+            failures.append((name, msg))
+
+    print(f"\ncurator: archived {archived}/{len(candidates)}")
+    if failures:
+        print("failures:")
+        for name, msg in failures:
+            print(f"  {name}: {msg}")
+        return 1
+    return 0
+
+
+def _cmd_backup(args) -> int:
+    """Take a manual snapshot of the skills tree. Same mechanism as the
+    automatic pre-run snapshot, just user-initiated."""
+    from agent import curator_backup
+    if not curator_backup.is_enabled():
+        print(
+            "curator: backups are disabled via config "
+            "(`curator.backup.enabled: false`); re-enable to snapshot"
+        )
+        return 1
+    reason = getattr(args, "reason", None) or "manual"
+    snap = curator_backup.snapshot_skills(reason=reason)
+    if snap is None:
+        print("curator: snapshot failed — check logs (backup disabled or IO error)")
+        return 1
+    print(f"curator: snapshot created at ~/.hermes/skills/.curator_backups/{snap.name}")
+    return 0
+
+
+def _cmd_rollback(args) -> int:
+    """Restore the skills tree from a snapshot. Defaults to newest.
+
+    ``--list`` prints available snapshots and exits. ``--id <stamp>`` picks
+    a specific one. Without ``-y``, prompts for confirmation. A safety
+    snapshot of the current tree is always taken first, so rollbacks are
+    themselves undoable.
+    """
+    from agent import curator_backup
+
+    if getattr(args, "list", False):
+        print(curator_backup.summarize_backups())
+        return 0
+
+    backup_id = getattr(args, "backup_id", None)
+    target_path = curator_backup._resolve_backup(backup_id)
+    if target_path is None:
+        rows = curator_backup.list_backups()
+        if not rows:
+            print(
+                "curator: no snapshots exist yet. Take one with "
+                "`hermes curator backup` or wait for the next curator run."
+            )
+        else:
+            print(
+                f"curator: no snapshot matching "
+                f"{'id ' + repr(backup_id) if backup_id else 'your query'}."
+            )
+            print("Available:")
+            print(curator_backup.summarize_backups())
+        return 1
+
+    manifest = curator_backup._read_manifest(target_path)
+    print(f"Rollback target: {target_path.name}")
+    if manifest:
+        print(f"  reason:      {manifest.get('reason', '?')}")
+        print(f"  created_at:  {manifest.get('created_at', '?')}")
+        print(f"  skill files: {manifest.get('skill_files', '?')}")
+        cron = manifest.get("cron_jobs") or {}
+        if isinstance(cron, dict):
+            if cron.get("backed_up"):
+                print(
+                    f"  cron jobs:   {cron.get('jobs_count', 0)} "
+                    f"(will be restored for skill-link fields only)"
+                )
+            else:
+                reason = cron.get("reason", "not captured")
+                print(f"  cron jobs:   not in snapshot ({reason})")
+    print(
+        "\nThis will replace the current ~/.hermes/skills/ tree (a safety "
+        "snapshot of the current state is taken first so this is undoable). "
+        "Cron jobs that still exist will have their skills/skill fields "
+        "restored from the snapshot; all other cron fields are left alone."
+    )
+
+    if not getattr(args, "yes", False):
+        try:
+            ans = input("Proceed? [y/N] ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            print("\ncancelled")
+            return 1
+        if ans not in {"y", "yes"}:
+            print("cancelled")
+            return 1
+
+    ok, msg, _ = curator_backup.rollback(backup_id=target_path.name)
+    if ok:
+        print(f"curator: {msg}")
+        return 0
+    print(f"curator: rollback failed — {msg}")
+    return 1
+
+
+def _cmd_list_archived(args) -> int:
+    """List archived (recoverable) skills."""
+    from tools import skill_usage
+    names = skill_usage.list_archived_skill_names()
+    if not names:
+        print("curator: no archived skills")
+        return 0
+    for name in names:
+        print(name)
+    return 0
+
+
 # ---------------------------------------------------------------------------
 # argparse wiring (called from hermes_cli.main)
 # ---------------------------------------------------------------------------
@ -248,7 +492,16 @@ def register_cli(parent: argparse.ArgumentParser) -> None:
    p_run = subs.add_parser("run", help="Trigger a curator review now")
    p_run.add_argument(
        "--sync", "--synchronous", dest="synchronous", action="store_true",
-        help="Wait for the LLM review pass to finish (default: background thread)",
+        help="Wait for the LLM review pass to finish (default for manual runs)",
+    )
+    p_run.add_argument(
+        "--background", dest="background", action="store_true",
+        help="Start the LLM review pass in a background thread and return immediately",
+    )
+    p_run.add_argument(
+        "--dry-run", dest="dry_run", action="store_true",
+        help="Report only — no state changes, no archives, no consolidation "
+             "(use this to preview what curator would do)",
    )
    p_run.set_defaults(func=_cmd_run)

@ -270,6 +523,64 @@ def register_cli(parent: argparse.ArgumentParser) -> None:
    p_restore.add_argument("skill", help="Skill name")
    p_restore.set_defaults(func=_cmd_restore)

+    subs.add_parser("list-archived", help="List archived skills") \
+        .set_defaults(func=_cmd_list_archived)
+
+    p_archive = subs.add_parser(
+        "archive",
+        help="Manually archive a skill (move to .archive/, excluded from prompt)",
+    )
+    p_archive.add_argument("skill", help="Skill name")
+    p_archive.set_defaults(func=_cmd_archive)
+
+    p_prune = subs.add_parser(
+        "prune",
+        help="Bulk-archive agent-created skills idle for >= N days (default 90)",
+    )
+    p_prune.add_argument(
+        "--days", type=int, default=90,
+        help="Archive skills idle for at least N days (default: 90)",
+    )
+    p_prune.add_argument(
+        "-y", "--yes", action="store_true",
+        help="Skip the confirmation prompt",
+    )
+    p_prune.add_argument(
+        "--dry-run", dest="dry_run", action="store_true",
+        help="Show what would be archived without doing it",
+    )
+    p_prune.set_defaults(func=_cmd_prune)
+
+    p_backup = subs.add_parser(
+        "backup",
+        help="Take a manual tar.gz snapshot of ~/.hermes/skills/ "
+             "(curator also does this automatically before every real run)",
+    )
+    p_backup.add_argument(
+        "--reason", default=None,
+        help="Free-text label stored in manifest.json (default: 'manual')",
+    )
+    p_backup.set_defaults(func=_cmd_backup)
+
+    p_rollback = subs.add_parser(
+        "rollback",
+        help="Restore ~/.hermes/skills/ from a curator snapshot "
+             "(defaults to the newest)",
+    )
+    p_rollback.add_argument(
+        "--list", action="store_true",
+        help="List available snapshots and exit without restoring",
+    )
+    p_rollback.add_argument(
+        "--id", dest="backup_id", default=None,
+        help="Snapshot id to restore (see `--list`); default: newest",
+    )
+    p_rollback.add_argument(
+        "-y", "--yes", action="store_true",
+        help="Skip confirmation prompt",
+    )
+    p_rollback.set_defaults(func=_cmd_rollback)
+

 def cli_main(argv=None) -> int:
    """Standalone entry (also usable by hermes_cli.main fallthrough)."""
--- a/hermes_cli/curses_ui.py
+++ b/hermes_cli/curses_ui.py
@ -139,16 +139,16 @@ def curses_checklist(
                stdscr.refresh()
                key = stdscr.getch()

-                if key in (curses.KEY_UP, ord("k")):
+                if key in {curses.KEY_UP, ord("k")}:
                    cursor = (cursor - 1) % len(items)
-                elif key in (curses.KEY_DOWN, ord("j")):
+                elif key in {curses.KEY_DOWN, ord("j")}:
                    cursor = (cursor + 1) % len(items)
                elif key == ord(" "):
                    chosen.symmetric_difference_update({cursor})
-                elif key in (curses.KEY_ENTER, 10, 13):
+                elif key in {curses.KEY_ENTER, 10, 13}:
                    result_holder[0] = set(chosen)
                    return
-                elif key in (27, ord("q")):
+                elif key in {27, ord("q")}:
                    result_holder[0] = cancel_returns
                    return

@ -156,6 +156,8 @@ def curses_checklist(
        flush_stdin()
        return result_holder[0] if result_holder[0] is not None else cancel_returns

+    except KeyboardInterrupt:
+        return cancel_returns
    except Exception:
        return _numbered_fallback(title, items, selected, cancel_returns, status_fn)

@ -263,14 +265,14 @@ def curses_radiolist(
                stdscr.refresh()
                key = stdscr.getch()

-                if key in (curses.KEY_UP, ord("k")):
+                if key in {curses.KEY_UP, ord("k")}:
                    cursor = (cursor - 1) % len(items)
-                elif key in (curses.KEY_DOWN, ord("j")):
+                elif key in {curses.KEY_DOWN, ord("j")}:
                    cursor = (cursor + 1) % len(items)
-                elif key in (ord(" "), curses.KEY_ENTER, 10, 13):
+                elif key in {ord(" "), curses.KEY_ENTER, 10, 13}:
                    result_holder[0] = cursor
                    return
-                elif key in (27, ord("q")):
+                elif key in {27, ord("q")}:
                    result_holder[0] = cancel_returns
                    return

@ -278,6 +280,8 @@ def curses_radiolist(
        flush_stdin()
        return result_holder[0] if result_holder[0] is not None else cancel_returns

+    except KeyboardInterrupt:
+        return cancel_returns
    except Exception:
        return _radio_numbered_fallback(title, items, selected, cancel_returns)

@ -384,14 +388,14 @@ def curses_single_select(
                stdscr.refresh()
                key = stdscr.getch()

-                if key in (curses.KEY_UP, ord("k")):
+                if key in {curses.KEY_UP, ord("k")}:
                    cursor = (cursor - 1) % len(all_items)
-                elif key in (curses.KEY_DOWN, ord("j")):
+                elif key in {curses.KEY_DOWN, ord("j")}:
                    cursor = (cursor + 1) % len(all_items)
-                elif key in (curses.KEY_ENTER, 10, 13):
+                elif key in {curses.KEY_ENTER, 10, 13}:
                    result_holder[0] = cursor
                    return
-                elif key in (27, ord("q")):
+                elif key in {27, ord("q")}:
                    result_holder[0] = None
                    return

@ -401,6 +405,8 @@ def curses_single_select(
            return None
        return result_holder[0]

+    except KeyboardInterrupt:
+        return None
    except Exception:
        all_items = list(items) + [cancel_label]
        cancel_idx = len(items)
--- a/hermes_cli/debug.py
+++ b/hermes_cli/debug.py
@ -1,12 +1,19 @@
-"""``hermes debug`` — debug tools for Hermes Agent.
+"""``hermes debug`` debug tools for Hermes Agent.

 Currently supports:
    hermes debug share    Upload debug report (system info + logs) to a
                          paste service and print a shareable URL.
+                          By default, log content is run through
+                          ``agent.redact.redact_sensitive_text`` with
+                          ``force=True`` before upload so credentials in
+                          ``~/.hermes/logs/*.log`` are not leaked into
+                          the public paste service. Pass ``--no-redact``
+                          to disable.
 """

 import io
 import json
+import logging
 import sys
 import time
 import urllib.error
@ -19,6 +26,16 @@ from typing import Optional
 from hermes_constants import get_hermes_home
 from utils import atomic_replace

+logger = logging.getLogger(__name__)
+
+# Banner prepended to upload-bound log content when redaction is enabled.
+# Visible in the public paste so reviewers know the content was sanitized.
+# Kept short; the trailing newline guarantees the banner sits on its own line.
+_REDACTION_BANNER = (
+    "[hermes debug share: log content redacted at upload time. "
+    "run with --no-redact to disable]\n"
+)
+

 # ---------------------------------------------------------------------------
 # Paste services — try paste.rs first, dpaste.com as fallback.
@ -368,17 +385,40 @@ def _resolve_log_path(log_name: str) -> Optional[Path]:
    return None


+def _redact_log_text(text: str) -> str:
+    """Run ``redact_sensitive_text`` with ``force=True`` over upload-bound text.
+
+    Uses ``force=True`` so redaction fires regardless of the operator's
+    ``security.redact_secrets`` setting. The local on-disk log file is
+    not modified; only the in-memory copy headed for the public paste
+    service is sanitized. Returns the redacted text (or the original
+    when empty / non-string).
+    """
+    if not text:
+        return text
+    from agent.redact import redact_sensitive_text
+
+    return redact_sensitive_text(text, force=True)
+
+
 def _capture_log_snapshot(
    log_name: str,
    *,
    tail_lines: int,
    max_bytes: int = _MAX_LOG_BYTES,
+    redact: bool = True,
 ) -> LogSnapshot:
    """Capture a log once and derive summary/full-log views from it.

    The report tail and standalone log upload must come from the same file
    snapshot. Otherwise a rotation/truncate between reads can make the report
    look newer than the uploaded ``agent.log`` paste.
+
+    When ``redact`` is True (the default), both ``tail_text`` and
+    ``full_text`` are run through ``_redact_log_text`` so the snapshot
+    returned is upload-safe. The on-disk log file is never modified.
+    Pass ``redact=False`` to capture original log content (used by
+    ``hermes debug share --no-redact``).
    """
    log_path = _resolve_log_path(log_name)
    if log_path is None:
@ -438,18 +478,34 @@ def _capture_log_snapshot(
        if truncated:
            full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"

+        if redact:
+            tail_text = _redact_log_text(tail_text)
+            full_text = _redact_log_text(full_text)
+
        return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
    except Exception as exc:
        return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)


-def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
-    """Capture all logs used by debug-share exactly once."""
+def _capture_default_log_snapshots(
+    log_lines: int, *, redact: bool = True
+) -> dict[str, LogSnapshot]:
+    """Capture all logs used by debug-share exactly once.
+
+    ``redact`` is forwarded to each ``_capture_log_snapshot`` call so all
+    captured logs share the same redaction policy for a given run.
+    """
    errors_lines = min(log_lines, 100)
    return {
-        "agent": _capture_log_snapshot("agent", tail_lines=log_lines),
-        "errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
-        "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
+        "agent": _capture_log_snapshot(
+            "agent", tail_lines=log_lines, redact=redact
+        ),
+        "errors": _capture_log_snapshot(
+            "errors", tail_lines=errors_lines, redact=redact
+        ),
+        "gateway": _capture_log_snapshot(
+            "gateway", tail_lines=errors_lines, redact=redact
+        ),
    }


@ -532,6 +588,7 @@ def run_debug_share(args):
    log_lines = getattr(args, "lines", 200)
    expiry = getattr(args, "expire", 7)
    local_only = getattr(args, "local", False)
+    redact = not getattr(args, "no_redact", False)

    if not local_only:
        print(_PRIVACY_NOTICE)
@ -539,8 +596,16 @@ def run_debug_share(args):
    print("Collecting debug report...")

    # Capture dump once — prepended to every paste for context.
+    # The dump is already redacted at extract time via dump.py:_redact;
+    # log_snapshots are redacted by _capture_default_log_snapshots when
+    # redact=True so credentials never reach the public paste service.
    dump_text = _capture_dump()
-    log_snapshots = _capture_default_log_snapshots(log_lines)
+    log_snapshots = _capture_default_log_snapshots(log_lines, redact=redact)
+
+    if redact:
+        logger.info(
+            "hermes debug share: applied force-mode redaction to log snapshots before upload"
+        )

    report = collect_debug_report(
        log_lines=log_lines,
@ -556,6 +621,15 @@ def run_debug_share(args):
    if gateway_log:
        gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log

+    # Visible banner so reviewers reading the public paste know redaction
+    # was applied at upload time. Banner is omitted under --no-redact.
+    if redact:
+        report = _REDACTION_BANNER + report
+        if agent_log:
+            agent_log = _REDACTION_BANNER + agent_log
+        if gateway_log:
+            gateway_log = _REDACTION_BANNER + gateway_log
+
    if local_only:
        print(report)
        if agent_log:
@ -666,6 +740,7 @@ def run_debug(args):
        print("  --lines N    Number of log lines to include (default: 200)")
        print("  --expire N   Paste expiry in days (default: 7)")
        print("  --local      Print report locally instead of uploading")
+        print("  --no-redact  Disable upload-time secret redaction (default: redact)")
        print()
        print("Options (delete):")
        print("  <url> ...    One or more paste URLs to delete")
--- a/hermes_cli/dingtalk_auth.py
+++ b/hermes_cli/dingtalk_auth.py
@ -93,7 +93,7 @@ def poll_registration(device_code: str) -> dict:
    """
    data = _api_post("/app/registration/poll", {"device_code": device_code})
    status_raw = str(data.get("status", "")).strip().upper()
-    if status_raw not in ("WAITING", "SUCCESS", "FAIL", "EXPIRED"):
+    if status_raw not in {"WAITING", "SUCCESS", "FAIL", "EXPIRED"}:
        status_raw = "UNKNOWN"
    return {
        "status": status_raw,
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -12,6 +12,7 @@ import importlib.util
 from pathlib import Path

 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
+from hermes_cli.env_loader import load_hermes_dotenv
 from hermes_constants import display_hermes_home

 PROJECT_ROOT = get_project_root()
@ -19,15 +20,8 @@ HERMES_HOME = get_hermes_home()
 _DHH = display_hermes_home()  # user-facing display path (e.g. ~/.hermes or ~/.hermes/profiles/coder)

 # Load environment variables from ~/.hermes/.env so API key checks work
-from dotenv import load_dotenv
 _env_path = get_env_path()
-if _env_path.exists():
-    try:
-        load_dotenv(_env_path, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(_env_path, encoding="latin-1")
-# Also try project .env as dev fallback
-load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
+load_hermes_dotenv(hermes_home=_env_path.parent, project_env=PROJECT_ROOT / ".env")

 from hermes_cli.colors import Colors, color
 from hermes_cli.models import _HERMES_USER_AGENT
@ -97,6 +91,15 @@ def _termux_browser_setup_steps(node_installed: bool) -> list[str]:
    return steps


+def _termux_install_all_fallback_notes() -> list[str]:
+    return [
+        "Termux install profile: use .[termux-all] for broad compatibility (installer default on Termux).",
+        "Matrix E2EE extra is excluded on Termux (python-olm currently fails to build).",
+        "Local faster-whisper extra is excluded on Termux (ctranslate2/av build path unavailable).",
+        "STT fallback: use Groq Whisper (set GROQ_API_KEY) or OpenAI Whisper (set VOICE_TOOLS_OPENAI_KEY).",
+    ]
+
+
 def _has_provider_env_config(content: str) -> bool:
    """Return True when ~/.hermes/.env contains provider auth/base URL settings."""
    return any(key in content for key in _PROVIDER_ENV_HINTS)
@ -113,15 +116,35 @@ def _honcho_is_configured_for_doctor() -> bool:
        return False


+def _is_kanban_worker_env_gate(item: dict) -> bool:
+    """Return True when Kanban is unavailable only because this is not a worker process."""
+    if item.get("name") != "kanban":
+        return False
+    if os.environ.get("HERMES_KANBAN_TASK"):
+        return False
+
+    tools = item.get("tools") or []
+    return bool(tools) and all(str(tool).startswith("kanban_") for tool in tools)
+
+
+def _doctor_tool_availability_detail(toolset: str) -> str:
+    """Optional explanatory suffix for toolsets whose doctor status needs context."""
+    if toolset == "kanban" and not os.environ.get("HERMES_KANBAN_TASK"):
+        return "(runtime-gated; loaded only for dispatcher-spawned workers)"
+    return ""
+
+
 def _apply_doctor_tool_availability_overrides(available: list[str], unavailable: list[dict]) -> tuple[list[str], list[dict]]:
    """Adjust runtime-gated tool availability for doctor diagnostics."""
-    if not _honcho_is_configured_for_doctor():
-        return available, unavailable
-
    updated_available = list(available)
    updated_unavailable = []
    for item in unavailable:
-        if item.get("name") == "honcho":
+        name = item.get("name")
+        if _is_kanban_worker_env_gate(item):
+            if "kanban" not in updated_available:
+                updated_available.append("kanban")
+            continue
+        if name == "honcho" and _honcho_is_configured_for_doctor():
            if "honcho" not in updated_available:
                updated_available.append("honcho")
            continue
@ -175,6 +198,101 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
        check_warn("Could not verify systemd linger", f"({linger_detail})")


+_APIKEY_PROVIDERS_CACHE: list | None = None
+
+
+def _build_apikey_providers_list() -> list:
+    """Build the API-key provider health-check list once and cache it.
+
+    Tuple format: (name, env_vars, default_url, base_env, supports_models_endpoint)
+    Base list augmented with any ProviderProfile with auth_type="api_key" not
+    already present — adding plugins/model-providers/<name>/ is sufficient to get into doctor.
+    """
+    _static = [
+        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
+        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("StepFun Step Plan", ("STEPFUN_API_KEY",),                          "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
+        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
+        ("Arcee AI",         ("ARCEEAI_API_KEY",),                           "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
+        ("GMI Cloud",        ("GMI_API_KEY",),                               "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
+        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                          "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
+        ("Hugging Face",     ("HF_TOKEN",),                                  "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
+        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                            "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
+        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                        "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
+        # MiniMax global: /v1 endpoint supports /models.
+        ("MiniMax",          ("MINIMAX_API_KEY",),                           "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
+        # MiniMax CN: /v1 endpoint does NOT support /models (returns 404).
+        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                        "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", False),
+        ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",),                       "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
+        ("Kilo Code",        ("KILOCODE_API_KEY",),                          "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
+        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                      "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
+        # OpenCode Go has no shared /models endpoint; skip the health check.
+        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                       None,                                  "OPENCODE_GO_BASE_URL", False),
+    ]
+    _known_names = {t[0] for t in _static}
+    # Also index by profile canonical name so profiles without display_name
+    # don't create duplicate entries for providers already in the static list.
+    _known_canonical: set[str] = set()
+    _name_to_canonical = {
+        "Z.AI / GLM": "zai", "Kimi / Moonshot": "kimi-coding",
+        "StepFun Step Plan": "stepfun", "Kimi / Moonshot (China)": "kimi-coding-cn",
+        "Arcee AI": "arcee", "GMI Cloud": "gmi", "DeepSeek": "deepseek",
+        "Hugging Face": "huggingface", "NVIDIA NIM": "nvidia",
+        "Alibaba/DashScope": "alibaba", "MiniMax": "minimax",
+        "MiniMax (China)": "minimax-cn", "Vercel AI Gateway": "ai-gateway",
+        "Kilo Code": "kilocode", "OpenCode Zen": "opencode-zen",
+        "OpenCode Go": "opencode-go",
+    }
+    for _label, _canonical in _name_to_canonical.items():
+        _known_canonical.add(_canonical)
+    # Providers that already have a dedicated health check above the generic
+    # API-key loop (with custom headers/auth). Skip their pluggable profiles
+    # here so the generic Bearer-auth loop doesn't run a duplicate, broken
+    # check (e.g. Anthropic native API requires x-api-key, not Bearer).
+    _dedicated_canonical = {"anthropic", "openrouter", "bedrock"}
+    _known_canonical.update(_dedicated_canonical)
+    try:
+        from providers import list_providers
+        from providers.base import ProviderProfile as _PP
+        try:
+            from hermes_cli.providers import normalize_provider as _normalize_provider
+        except Exception:  # pragma: no cover - normalization is best-effort
+            def _normalize_provider(_name: str) -> str:
+                return (_name or "").strip().lower()
+        for _pp in list_providers():
+            if not isinstance(_pp, _PP) or _pp.auth_type != "api_key" or not _pp.env_vars:
+                continue
+            _label = _pp.display_name or _pp.name
+            if _label in _known_names or _pp.name in _known_canonical:
+                continue
+            _candidates = {_normalize_provider(_pp.name)}
+            for _alias in (_pp.aliases or ()):
+                _candidates.add(_normalize_provider(_alias))
+            if _candidates & _dedicated_canonical:
+                continue
+            # Separate API-key vars from base-URL override vars — the health-check
+            # loop sends the first found value as Authorization: Bearer, so a URL
+            # string must never be picked.
+            _key_vars = tuple(
+                v for v in _pp.env_vars
+                if not v.endswith("_BASE_URL") and not v.endswith("_URL")
+            )
+            _base_var = next(
+                (v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")),
+                None,
+            )
+            if not _key_vars:
+                continue
+            _models_url = (
+                (_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
+                if _pp.base_url else None
+            )
+            _static.append((_label, _key_vars, _models_url, _base_var, True))
+    except Exception:
+        pass
+    return _static
+
+
 def run_doctor(args):
    """Run diagnostic checks."""
    should_fix = getattr(args, 'fix', False)
@ -263,8 +381,11 @@ def run_doctor(args):
    if env_path.exists():
        check_ok(f"{_DHH}/.env file exists")
        
-        # Check for common issues
-        content = env_path.read_text()
+        # Check for common issues. Pin encoding to UTF-8 because .env files are
+        # written as UTF-8 everywhere in the codebase, while Path.read_text()
+        # defaults to the system locale — which crashes on non-UTF-8 Windows
+        # locales (e.g. GBK) as soon as the file contains any non-ASCII byte.
+        content = env_path.read_text(encoding="utf-8")
        if _has_provider_env_config(content):
            check_ok("API key or custom endpoint configured")
        else:
@ -352,7 +473,7 @@ def run_doctor(args):
            if (
                provider
                and _resolve_auth_provider is not None
-                and provider not in ("auto", "custom")
+                and provider not in {"auto", "custom"}
            ):
                try:
                    runtime_provider = _resolve_auth_provider(provider)
@ -364,7 +485,7 @@ def run_doctor(args):
            if (
                provider
                and _resolve_provider_full is not None
-                and provider not in ("auto", "custom")
+                and provider not in {"auto", "custom"}
            ):
                provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
                catalog_provider = provider_def.id if provider_def is not None else None
@ -421,7 +542,7 @@ def run_doctor(args):
            # own env-var checks elsewhere in doctor, and get_auth_status()
            # returns a bare {logged_in: False} for anything it doesn't
            # explicitly dispatch, which would produce false positives.
-            if runtime_provider and runtime_provider not in ("auto", "custom", "openrouter"):
+            if runtime_provider and runtime_provider not in {"auto", "custom", "openrouter"}:
                try:
                    from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
                    pconfig = PROVIDER_REGISTRY.get(runtime_provider)
@ -493,7 +614,7 @@ def run_doctor(args):
        # Detect stale root-level model keys (known bug source — PR #4329)
        try:
            import yaml
-            with open(config_path) as f:
+            with open(config_path, encoding="utf-8") as f:
                raw_config = yaml.safe_load(f) or {}
            stale_root_keys = [k for k in ("provider", "base_url") if k in raw_config and isinstance(raw_config[k], str)]
            if stale_root_keys:
@ -608,13 +729,12 @@ def run_doctor(args):
    hermes_home = HERMES_HOME
    if hermes_home.exists():
        check_ok(f"{_DHH} directory exists")
+    elif should_fix:
+        hermes_home.mkdir(parents=True, exist_ok=True)
+        check_ok(f"Created {_DHH} directory")
+        fixed_count += 1
    else:
-        if should_fix:
-            hermes_home.mkdir(parents=True, exist_ok=True)
-            check_ok(f"Created {_DHH} directory")
-            fixed_count += 1
-        else:
-            check_warn(f"{_DHH} not found", "(will be created on first use)")
+        check_warn(f"{_DHH} not found", "(will be created on first use)")
    
    # Check expected subdirectories
    expected_subdirs = ["cron", "sessions", "logs", "skills", "memories"]
@ -622,13 +742,12 @@ def run_doctor(args):
        subdir_path = hermes_home / subdir_name
        if subdir_path.exists():
            check_ok(f"{_DHH}/{subdir_name}/ exists")
+        elif should_fix:
+            subdir_path.mkdir(parents=True, exist_ok=True)
+            check_ok(f"Created {_DHH}/{subdir_name}/")
+            fixed_count += 1
        else:
-            if should_fix:
-                subdir_path.mkdir(parents=True, exist_ok=True)
-                check_ok(f"Created {_DHH}/{subdir_name}/")
-                fixed_count += 1
-            else:
-                check_warn(f"{_DHH}/{subdir_name}/ not found", "(will be created on first use)")
+            check_warn(f"{_DHH}/{subdir_name}/ not found", "(will be created on first use)")
    
    # Check for SOUL.md persona file
    soul_path = hermes_home / "SOUL.md"
@ -834,14 +953,12 @@ def run_doctor(args):
        else:
            check_fail("docker not found", "(required for TERMINAL_ENV=docker)")
            issues.append("Install Docker or change TERMINAL_ENV")
+    elif _safe_which("docker"):
+        check_ok("docker", "(optional)")
+    elif _is_termux():
+        check_info("Docker backend is not available inside Termux (expected on Android)")
    else:
-        if _safe_which("docker"):
-            check_ok("docker", "(optional)")
-        else:
-            if _is_termux():
-                check_info("Docker backend is not available inside Termux (expected on Android)")
-            else:
-                check_warn("docker not found", "(optional)")
+        check_warn("docker not found", "(optional)")
    
    # SSH (if using ssh backend)
    if terminal_env == "ssh":
@ -893,7 +1010,7 @@ def run_doctor(args):
            issues.append(f"Set TERMINAL_VERCEL_RUNTIME to one of: {supported}")

        disk = os.getenv("TERMINAL_CONTAINER_DISK", "51200").strip()
-        if disk in ("", "0", "51200"):
+        if disk in {"", "0", "51200"}:
            check_ok("Vercel disk setting", "(uses platform default)")
        else:
            check_fail("Vercel custom disk unsupported", "(reset terminal.container_disk to 51200)")
@ -919,7 +1036,7 @@ def run_doctor(args):
        for line in auth_status.detail_lines:
            check_info(f"Vercel auth {line}")

-        persistent = os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("1", "true", "yes", "on")
+        persistent = os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in {"1", "true", "yes", "on"}
        if persistent:
            check_info("Vercel persistence: snapshot filesystem only; live processes do not survive sandbox recreation")
        else:
@ -930,29 +1047,83 @@ def run_doctor(args):
        check_ok("Node.js")
        # Check if agent-browser is installed
        agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser"
+        agent_browser_ok = False
        if agent_browser_path.exists():
            check_ok("agent-browser (Node.js)", "(browser automation)")
-        else:
-            if _is_termux():
-                check_info("agent-browser is not installed (expected in the tested Termux path)")
-                check_info("Install it manually later with: npm install -g agent-browser && agent-browser install")
-                check_info("Termux browser setup:")
-                for step in _termux_browser_setup_steps(node_installed=True):
-                    check_info(step)
-            else:
-                check_warn("agent-browser not installed", "(run: npm install)")
-    else:
-        if _is_termux():
-            check_info("Node.js not found (browser tools are optional in the tested Termux path)")
-            check_info("Install Node.js on Termux with: pkg install nodejs")
+            agent_browser_ok = True
+        elif shutil.which("agent-browser"):
+            check_ok("agent-browser", "(browser automation)")
+            agent_browser_ok = True
+        elif _is_termux():
+            check_info("agent-browser is not installed (expected in the tested Termux path)")
+            check_info("Install it manually later with: npm install -g agent-browser && agent-browser install")
            check_info("Termux browser setup:")
-            for step in _termux_browser_setup_steps(node_installed=False):
+            for step in _termux_browser_setup_steps(node_installed=True):
                check_info(step)
        else:
-            check_warn("Node.js not found", "(optional, needed for browser tools)")
+            check_warn("agent-browser not installed", "(run: npm install)")
+
+        # Chromium presence — the browser tools silently fail to register when
+        # agent-browser is found but no Playwright-managed Chromium is on disk
+        # (tools/browser_tool.py::check_browser_requirements filters them out
+        # before the agent ever sees them).  Reuse the exact predicate it uses
+        # so the two checks cannot diverge.  Skip on Termux (not a tested
+        # path).
+        if agent_browser_ok and not _is_termux():
+            try:
+                # Lazy import: browser_tool is a ~150KB module we don't want
+                # to eagerly load in every `hermes doctor` invocation.
+                from tools.browser_tool import (
+                    _chromium_installed,
+                    _is_camofox_mode,
+                    _get_cloud_provider,
+                    _get_cdp_override,
+                    _using_lightpanda_engine,
+                )
+            except Exception:
+                # If browser_tool can't even import, that's a separate bug
+                # surfaced elsewhere; don't crash doctor.
+                pass
+            else:
+                # Only warn about Chromium if the installed engine actually
+                # requires it: Camofox, CDP override, a cloud provider, or
+                # Lightpanda all bypass the local Chromium requirement.
+                skip_chromium_check = (
+                    _is_camofox_mode()
+                    or bool(_get_cdp_override())
+                    or _get_cloud_provider() is not None
+                    or _using_lightpanda_engine()
+                )
+                if not skip_chromium_check:
+                    if _chromium_installed():
+                        check_ok("Playwright Chromium", "(browser engine)")
+                    else:
+                        check_warn(
+                            "Playwright Chromium not installed",
+                            "(browser_* tools will be hidden from the agent)",
+                        )
+                        if sys.platform == "win32":
+                            check_info(
+                                f"Install with: cd {PROJECT_ROOT} && "
+                                "npx playwright install chromium"
+                            )
+                        else:
+                            check_info(
+                                f"Install with: cd {PROJECT_ROOT} && "
+                                "npx playwright install --with-deps chromium"
+                            )
+    elif _is_termux():
+        check_info("Node.js not found (browser tools are optional in the tested Termux path)")
+        check_info("Install Node.js on Termux with: pkg install nodejs")
+        check_info("Termux browser setup:")
+        for step in _termux_browser_setup_steps(node_installed=False):
+            check_info(step)
+    else:
+        check_warn("Node.js not found", "(optional, needed for browser tools)")
    
    # npm audit for all Node.js packages
-    if _safe_which("npm"):
+    _npm_bin = _safe_which("npm")
+    if _npm_bin:
        npm_dirs = [
            (PROJECT_ROOT, "Browser tools (agent-browser)"),
            (PROJECT_ROOT / "scripts" / "whatsapp-bridge", "WhatsApp bridge"),
@ -961,8 +1132,10 @@ def run_doctor(args):
            if not (npm_dir / "node_modules").exists():
                continue
            try:
+                # Use resolved absolute path so Windows can execute
+                # npm.cmd (CreateProcessW can't run bare .cmd names).
                audit_result = subprocess.run(
-                    ["npm", "audit", "--json"],
+                    [_npm_bin, "audit", "--json"],
                    cwd=str(npm_dir),
                    capture_output=True, text=True, timeout=30,
                )
@ -980,55 +1153,115 @@ def run_doctor(args):
                        f"{label} deps",
                        f"({critical} critical, {high} high, {moderate} moderate — run: cd {npm_dir} && npm audit fix)"
                    )
-                    issues.append(f"{label} has {total} npm vulnerability(ies)")
+                    issues.append(
+                        f"{label} has {total} npm "
+                        f"{'vulnerability' if total == 1 else 'vulnerabilities'}"
+                    )
                else:
-                    check_ok(f"{label} deps", f"({moderate} moderate vulnerability(ies))")
+                    check_ok(
+                        f"{label} deps",
+                        f"({moderate} moderate "
+                        f"{'vulnerability' if moderate == 1 else 'vulnerabilities'})",
+                    )
            except Exception:
                pass

+    if _is_termux():
+        check_info("Termux compatibility fallbacks:")
+        for note in _termux_install_all_fallback_notes():
+            check_info(note)
+
    # =========================================================================
    # Check: API connectivity
    # =========================================================================
    print()
    print(color("◆ API Connectivity", Colors.CYAN, Colors.BOLD))
-    
-    openrouter_key = os.getenv("OPENROUTER_API_KEY")
-    if openrouter_key:
-        print("  Checking OpenRouter API...", end="", flush=True)
+
+    # Refactor: every connectivity probe below is HTTP-bound and fully
+    # independent. Running them in series spent ~5s wall on a typical
+    # workstation (2s of that was boto3's IMDS lookup for AWS credentials,
+    # which times out unless you're actually on EC2). Threading them with
+    # a small executor pool collapses the section to roughly the slowest
+    # single probe — about 2s — without changing the output format.
+    #
+    # Each ``_probe_*`` helper is a pure function: takes its inputs,
+    # makes one HTTP/SDK call, returns a ``_ConnectivityResult`` carrying
+    # the line(s) to print and any issue strings to append. No globals,
+    # no shared mutable state, no printing inside the workers.
+    import concurrent.futures as _futures
+    from collections import namedtuple as _namedtuple
+
+    _ConnectivityResult = _namedtuple(
+        "_ConnectivityResult", ["label", "lines", "issues"]
+    )
+    _probes: list = []  # list of (label, callable) submitted in display order
+
+    def _probe_openrouter() -> _ConnectivityResult:
+        key = os.getenv("OPENROUTER_API_KEY")
+        if not key:
+            return _ConnectivityResult(
+                "OpenRouter API",
+                [(color("⚠", Colors.YELLOW), "OpenRouter API",
+                  color("(not configured)", Colors.DIM))],
+                [],
+            )
        try:
            import httpx
-            response = httpx.get(
+            r = httpx.get(
                OPENROUTER_MODELS_URL,
-                headers={"Authorization": f"Bearer {openrouter_key}"},
-                timeout=10
+                headers={"Authorization": f"Bearer {key}"},
+                timeout=10,
            )
-            if response.status_code == 200:
-                print(f"\r  {color('✓', Colors.GREEN)} OpenRouter API                          ")
-            elif response.status_code == 401:
-                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)}                ")
-                issues.append("Check OPENROUTER_API_KEY in .env")
-            elif response.status_code == 402:
-                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color('(out of credits — payment required)', Colors.DIM)}")
-                issues.append(
-                    "OpenRouter account has insufficient credits. "
-                    "Fix: run 'hermes config set model.provider <provider>' to switch providers, "
-                    "or fund your OpenRouter account at https://openrouter.ai/settings/credits"
+            if r.status_code == 200:
+                return _ConnectivityResult(
+                    "OpenRouter API",
+                    [(color("✓", Colors.GREEN), "OpenRouter API", "")],
+                    [],
                )
-            elif response.status_code == 429:
-                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color('(rate limited)', Colors.DIM)}                ")
-                issues.append("OpenRouter rate limit hit — consider switching to a different provider or waiting")
-            else:
-                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)}                ")
+            if r.status_code == 401:
+                return _ConnectivityResult(
+                    "OpenRouter API",
+                    [(color("✗", Colors.RED), "OpenRouter API",
+                      color("(invalid API key)", Colors.DIM))],
+                    ["Check OPENROUTER_API_KEY in .env"],
+                )
+            if r.status_code == 402:
+                return _ConnectivityResult(
+                    "OpenRouter API",
+                    [(color("✗", Colors.RED), "OpenRouter API",
+                      color("(out of credits — payment required)", Colors.DIM))],
+                    ["OpenRouter account has insufficient credits. "
+                     "Fix: run 'hermes config set model.provider <provider>' "
+                     "to switch providers, or fund your OpenRouter account "
+                     "at https://openrouter.ai/settings/credits"],
+                )
+            if r.status_code == 429:
+                return _ConnectivityResult(
+                    "OpenRouter API",
+                    [(color("✗", Colors.RED), "OpenRouter API",
+                      color("(rate limited)", Colors.DIM))],
+                    ["OpenRouter rate limit hit — consider switching to "
+                     "a different provider or waiting"],
+                )
+            return _ConnectivityResult(
+                "OpenRouter API",
+                [(color("✗", Colors.RED), "OpenRouter API",
+                  color(f"(HTTP {r.status_code})", Colors.DIM))],
+                [],
+            )
        except Exception as e:
-            print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)}                ")
-            issues.append("Check network connectivity")
-    else:
-        check_warn("OpenRouter API", "(not configured)")
-    
-    from hermes_cli.auth import get_anthropic_key
-    anthropic_key = get_anthropic_key()
-    if anthropic_key:
-        print("  Checking Anthropic API...", end="", flush=True)
+            return _ConnectivityResult(
+                "OpenRouter API",
+                [(color("✗", Colors.RED), "OpenRouter API",
+                  color(f"({e})", Colors.DIM))],
+                ["Check network connectivity"],
+            )
+
+    def _probe_anthropic() -> _ConnectivityResult:
+        from hermes_cli.auth import get_anthropic_key
+        key = get_anthropic_key()
+        if not key:
+            return _ConnectivityResult("Anthropic API", [], [])
        try:
            import httpx
            from agent.anthropic_adapter import (
@ -1037,145 +1270,247 @@ def run_doctor(args):
                _OAUTH_ONLY_BETAS,
                _CONTEXT_1M_BETA,
            )
-
            headers = {"anthropic-version": "2023-06-01"}
-            is_oauth = _is_oauth_token(anthropic_key)
+            is_oauth = _is_oauth_token(key)
            if is_oauth:
-                headers["Authorization"] = f"Bearer {anthropic_key}"
+                headers["Authorization"] = f"Bearer {key}"
                headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
            else:
-                headers["x-api-key"] = anthropic_key
-            response = httpx.get(
+                headers["x-api-key"] = key
+            r = httpx.get(
                "https://api.anthropic.com/v1/models",
-                headers=headers,
-                timeout=10
+                headers=headers, timeout=10,
            )
-            # Reactive recovery: OAuth subscriptions that don't include 1M
-            # context reject the request with 400 "long context beta is not
-            # yet available for this subscription". Retry once with that
-            # beta stripped so the doctor check doesn't falsely report the
-            # Anthropic API as unreachable for those users.
+            # Reactive recovery: OAuth subscriptions without 1M context reject the
+            # request with 400 "long context beta is not yet available for this
+            # subscription". Retry once with that beta stripped so the doctor
+            # check doesn't falsely report Anthropic as unreachable.
            if (
                is_oauth
-                and response.status_code == 400
-                and "long context beta" in response.text.lower()
-                and "not yet available" in response.text.lower()
+                and r.status_code == 400
+                and "long context beta" in r.text.lower()
+                and "not yet available" in r.text.lower()
            ):
                headers["anthropic-beta"] = ",".join(
-                    [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + list(_OAUTH_ONLY_BETAS)
+                    [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
+                    + list(_OAUTH_ONLY_BETAS)
                )
-                response = httpx.get(
+                r = httpx.get(
                    "https://api.anthropic.com/v1/models",
-                    headers=headers,
-                    timeout=10,
+                    headers=headers, timeout=10,
                )
-            if response.status_code == 200:
-                print(f"\r  {color('✓', Colors.GREEN)} Anthropic API                           ")
-            elif response.status_code == 401:
-                print(f"\r  {color('✗', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)}                 ")
-            else:
-                msg = "(couldn't verify)"
-                print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)}                 ")
+            if r.status_code == 200:
+                return _ConnectivityResult(
+                    "Anthropic API",
+                    [(color("✓", Colors.GREEN), "Anthropic API", "")],
+                    [],
+                )
+            if r.status_code == 401:
+                return _ConnectivityResult(
+                    "Anthropic API",
+                    [(color("✗", Colors.RED), "Anthropic API",
+                      color("(invalid API key)", Colors.DIM))],
+                    [],
+                )
+            return _ConnectivityResult(
+                "Anthropic API",
+                [(color("⚠", Colors.YELLOW), "Anthropic API",
+                  color("(couldn't verify)", Colors.DIM))],
+                [],
+            )
        except Exception as e:
-            print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)}                 ")
+            return _ConnectivityResult(
+                "Anthropic API",
+                [(color("⚠", Colors.YELLOW), "Anthropic API",
+                  color(f"({e})", Colors.DIM))],
+                [],
+            )

-    # -- API-key providers --
-    # Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
-    # If supports_models_endpoint is False, we skip the health check and just show "configured"
-    _apikey_providers = [
-        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
-        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
-        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
-        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
-        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
-        ("GMI Cloud",        ("GMI_API_KEY",),                                "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
-        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
-        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
-        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                             "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
-        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
-        # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
-        ("MiniMax",          ("MINIMAX_API_KEY",),                            "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
-        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", True),
-        ("Vercel AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
-        ("Kilo Code",        ("KILOCODE_API_KEY",),                            "https://api.kilo.ai/api/gateway/models",  "KILOCODE_BASE_URL", True),
-        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                        "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
-        # OpenCode Go has no shared /models endpoint; skip the health check.
-        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                         None,                                  "OPENCODE_GO_BASE_URL", False),
-    ]
-    for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
-        _key = ""
-        for _ev in _env_vars:
-            _key = os.getenv(_ev, "")
-            if _key:
+    def _probe_apikey_provider(pname, env_vars, default_url, base_env,
+                               supports_health_check) -> _ConnectivityResult:
+        key = ""
+        for ev in env_vars:
+            key = os.getenv(ev, "")
+            if key:
                break
-        if _key:
-            _label = _pname.ljust(20)
-            # Some providers (like MiniMax) don't support /models endpoint
-            if not _supports_health_check:
-                print(f"  {color('✓', Colors.GREEN)} {_label} {color('(key configured)', Colors.DIM)}")
-                continue
-            print(f"  Checking {_pname} API...", end="", flush=True)
-            try:
-                import httpx
-                _base = os.getenv(_base_env, "") if _base_env else ""
-                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
-                # (OpenAI-compat surface, which exposes /models for health check).
-                if not _base and _key.startswith("sk-kimi-"):
-                    _base = "https://api.kimi.com/coding/v1"
-                # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
-                # with no /v1) don't support /models.  Rewrite to the OpenAI-compat
-                # /v1 surface for health checks.
-                if _base and _base.rstrip("/").endswith("/anthropic"):
-                    from agent.auxiliary_client import _to_openai_base_url
-                    _base = _to_openai_base_url(_base)
-                if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
-                    _base = _base.rstrip("/") + "/v1"
-                _url = (_base.rstrip("/") + "/models") if _base else _default_url
-                _headers = {
-                    "Authorization": f"Bearer {_key}",
-                    "User-Agent": _HERMES_USER_AGENT,
-                }
-                if base_url_host_matches(_base, "api.kimi.com"):
-                    _headers["User-Agent"] = "claude-code/0.1.0"
-                _resp = httpx.get(
-                    _url,
-                    headers=_headers,
-                    timeout=10,
+        if not key:
+            return _ConnectivityResult(pname, [], [])
+        label = pname.ljust(20)
+        if not supports_health_check:
+            return _ConnectivityResult(
+                pname,
+                [(color("✓", Colors.GREEN), label,
+                  color("(key configured)", Colors.DIM))],
+                [],
+            )
+        try:
+            import httpx
+            base = os.getenv(base_env, "") if base_env else ""
+            # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
+            # (OpenAI-compat surface, which exposes /models for health check).
+            if not base and key.startswith("sk-kimi-"):
+                base = "https://api.kimi.com/coding/v1"
+            # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
+            # with no /v1) don't support /models. Rewrite to OpenAI-compat
+            # /v1 surface for health checks.
+            if base and base.rstrip("/").endswith("/anthropic"):
+                from agent.auxiliary_client import _to_openai_base_url
+                base = _to_openai_base_url(base)
+            if base_url_host_matches(base, "api.kimi.com") and base.rstrip("/").endswith("/coding"):
+                base = base.rstrip("/") + "/v1"
+            url = (base.rstrip("/") + "/models") if base else default_url
+            headers = {
+                "Authorization": f"Bearer {key}",
+                "User-Agent": _HERMES_USER_AGENT,
+            }
+            if base_url_host_matches(base, "api.kimi.com"):
+                headers["User-Agent"] = "claude-code/0.1.0"
+            r = httpx.get(url, headers=headers, timeout=10)
+            if (
+                pname == "Alibaba/DashScope"
+                and not base
+                and r.status_code == 401
+            ):
+                r = httpx.get(
+                    "https://dashscope.aliyuncs.com/compatible-mode/v1/models",
+                    headers=headers, timeout=10,
                )
-                if _resp.status_code == 200:
-                    print(f"\r  {color('✓', Colors.GREEN)} {_label}                          ")
-                elif _resp.status_code == 401:
-                    print(f"\r  {color('✗', Colors.RED)} {_label} {color('(invalid API key)', Colors.DIM)}           ")
-                    issues.append(f"Check {_env_vars[0]} in .env")
-                else:
-                    print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'(HTTP {_resp.status_code})', Colors.DIM)}           ")
-            except Exception as _e:
-                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)}           ")
+            if r.status_code == 200:
+                return _ConnectivityResult(
+                    pname,
+                    [(color("✓", Colors.GREEN), label, "")],
+                    [],
+                )
+            if r.status_code == 401:
+                return _ConnectivityResult(
+                    pname,
+                    [(color("✗", Colors.RED), label,
+                      color("(invalid API key)", Colors.DIM))],
+                    [f"Check {env_vars[0]} in .env"],
+                )
+            return _ConnectivityResult(
+                pname,
+                [(color("⚠", Colors.YELLOW), label,
+                  color(f"(HTTP {r.status_code})", Colors.DIM))],
+                [],
+            )
+        except Exception as e:
+            return _ConnectivityResult(
+                pname,
+                [(color("⚠", Colors.YELLOW), label,
+                  color(f"({e})", Colors.DIM))],
+                [],
+            )

-    # -- AWS Bedrock --
-    # Bedrock uses the AWS SDK credential chain, not API keys.
+    def _probe_bedrock() -> _ConnectivityResult:
+        try:
+            from agent.bedrock_adapter import (
+                has_aws_credentials,
+                resolve_aws_auth_env_var,
+                resolve_bedrock_region,
+            )
+        except ImportError:
+            return _ConnectivityResult("AWS Bedrock", [], [])
+        if not has_aws_credentials():
+            return _ConnectivityResult("AWS Bedrock", [], [])
+        auth_var = resolve_aws_auth_env_var()
+        region = resolve_bedrock_region()
+        label = "AWS Bedrock".ljust(20)
+        try:
+            import boto3
+            from botocore.config import Config as _BotoConfig
+            # Trim retries on the actual Bedrock API call so a transient
+            # failure doesn't pad the doctor run by 30+ seconds.
+            cfg = _BotoConfig(
+                connect_timeout=5,
+                read_timeout=10,
+                retries={"max_attempts": 1},
+            )
+            client = boto3.client("bedrock", region_name=region, config=cfg)
+            resp = client.list_foundation_models()
+            n = len(resp.get("modelSummaries", []))
+            return _ConnectivityResult(
+                "AWS Bedrock",
+                [(color("✓", Colors.GREEN), label,
+                  color(f"({auth_var}, {region}, {n} models)", Colors.DIM))],
+                [],
+            )
+        except ImportError:
+            return _ConnectivityResult(
+                "AWS Bedrock",
+                [(color("⚠", Colors.YELLOW), label,
+                  color(f"(boto3 not installed — {sys.executable} -m pip install boto3)",
+                        Colors.DIM))],
+                [f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3"],
+            )
+        except Exception as e:
+            err_name = type(e).__name__
+            return _ConnectivityResult(
+                "AWS Bedrock",
+                [(color("⚠", Colors.YELLOW), label,
+                  color(f"({err_name}: {e})", Colors.DIM))],
+                [f"AWS Bedrock: {err_name} — check IAM permissions for "
+                 f"bedrock:ListFoundationModels"],
+            )
+
+    # Build the probe submission list in display order
+    _probes.append(("OpenRouter API", _probe_openrouter))
+    _probes.append(("Anthropic API", _probe_anthropic))
+
+    global _APIKEY_PROVIDERS_CACHE
+    if _APIKEY_PROVIDERS_CACHE is None:
+        _APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list()
+    for _entry in _APIKEY_PROVIDERS_CACHE:
+        _pname, _env_vars, _default_url, _base_env, _supports = _entry
+        # Capture loop vars by binding default args — without this, all closures
+        # would share the final iteration's values and every probe would hit
+        # the last provider's URL.
+        _probes.append((_pname, lambda p=_pname, e=_env_vars, u=_default_url,
+                                       b=_base_env, s=_supports:
+                                _probe_apikey_provider(p, e, u, b, s)))
+
+    _probes.append(("AWS Bedrock", _probe_bedrock))
+
+    # Print a single status line so users see something happening, then
+    # fan out. ``\r`` clears it once the first real result line lands.
+    print(f"  {color(f'Running {len(_probes)} connectivity checks in parallel…', Colors.DIM)}",
+          end="", flush=True)
+
+    # Disable boto3's EC2 instance-metadata-service probe for the duration
+    # of the parallel block. boto's default credential chain tries
+    # 169.254.169.254 with a multi-second timeout when we're not on EC2,
+    # which dominated the section's wall time before this fix
+    # (~2s on a developer laptop, even with the rest parallelized).
+    # Set on the parent thread before submitting work so the env-var
+    # mutation never races with another worker. has_aws_credentials() in
+    # the bedrock probe already gates on real env-var creds, so IMDS is
+    # never the legitimate source for `hermes doctor`.
+    _imds_prev = os.environ.get("AWS_EC2_METADATA_DISABLED")
+    os.environ["AWS_EC2_METADATA_DISABLED"] = "true"
    try:
-        from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
-        if has_aws_credentials():
-            _auth_var = resolve_aws_auth_env_var()
-            _region = resolve_bedrock_region()
-            _label = "AWS Bedrock".ljust(20)
-            print(f"  Checking AWS Bedrock...", end="", flush=True)
-            try:
-                import boto3
-                _br_client = boto3.client("bedrock", region_name=_region)
-                _br_resp = _br_client.list_foundation_models()
-                _model_count = len(_br_resp.get("modelSummaries", []))
-                print(f"\r  {color('✓', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)}           ")
-            except ImportError:
-                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'(boto3 not installed — {sys.executable} -m pip install boto3)', Colors.DIM)}           ")
-                issues.append(f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3")
-            except Exception as _e:
-                _err_name = type(_e).__name__
-                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)}           ")
-                issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels")
-    except ImportError:
-        pass  # bedrock_adapter not available — skip silently
+        # 8 workers is plenty — each probe is a single HTTP call plus a TLS
+        # handshake. More than that wastes thread-startup cost and risks
+        # noisy output if anything ever printed from inside a worker.
+        with _futures.ThreadPoolExecutor(max_workers=8,
+                                         thread_name_prefix="doctor-probe") as _ex:
+            _futures_in_order = [_ex.submit(_fn) for _, _fn in _probes]
+            _results = [_f.result() for _f in _futures_in_order]
+    finally:
+        if _imds_prev is None:
+            os.environ.pop("AWS_EC2_METADATA_DISABLED", None)
+        else:
+            os.environ["AWS_EC2_METADATA_DISABLED"] = _imds_prev
+
+    # Clear the "Running …" line and print all results in submission order.
+    print("\r" + " " * 70 + "\r", end="")
+    for _r in _results:
+        for _glyph, _label, _detail in _r.lines:
+            if _detail:
+                print(f"  {_glyph} {_label} {_detail}")
+            else:
+                print(f"  {_glyph} {_label}")
+        for _issue in _r.issues:
+            issues.append(_issue)

    # =========================================================================
    # Check: Submodules
@ -1215,7 +1550,7 @@ def run_doctor(args):
        
        for tid in available:
            info = TOOLSET_REQUIREMENTS.get(tid, {})
-            check_ok(info.get("name", tid))
+            check_ok(info.get("name", tid), _doctor_tool_availability_detail(tid))
        
        for item in unavailable:
            env_vars = item.get("missing_vars") or item.get("env_vars") or []
@ -1258,9 +1593,23 @@ def run_doctor(args):
        check_warn("Skills Hub directory not initialized", "(run: hermes skills list)")

    from hermes_cli.config import get_env_value
+
+    def _gh_authenticated() -> bool:
+        """Check if gh CLI is authenticated via token file or device flow."""
+        try:
+            result = subprocess.run(
+                ["gh", "auth", "status", "--json", "authenticated"],
+                capture_output=True, timeout=10,
+            )
+            return result.returncode == 0
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            return False
+
    github_token = get_env_value("GITHUB_TOKEN") or get_env_value("GH_TOKEN")
    if github_token:
        check_ok("GitHub token configured (authenticated API access)")
+    elif _gh_authenticated():
+        check_ok("GitHub authenticated via gh CLI", "(full API access — no GITHUB_TOKEN needed)")
    else:
        check_warn("No GITHUB_TOKEN", f"(60 req/hr rate limit — set in {_DHH}/.env for better rates)")

@ -1275,7 +1624,7 @@ def run_doctor(args):
        import yaml as _yaml
        _mem_cfg_path = HERMES_HOME / "config.yaml"
        if _mem_cfg_path.exists():
-            with open(_mem_cfg_path) as _f:
+            with open(_mem_cfg_path, encoding="utf-8") as _f:
                _raw_cfg = _yaml.safe_load(_f) or {}
            _active_memory_provider = (_raw_cfg.get("memory") or {}).get("provider", "")
    except Exception:
--- a/hermes_cli/dump.py
+++ b/hermes_cli/dump.py
@ -14,6 +14,7 @@ import sys
 from pathlib import Path

 from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
+from hermes_cli.env_loader import load_hermes_dotenv
 from hermes_constants import display_hermes_home


@ -195,15 +196,11 @@ def run_dump(args):
    show_keys = getattr(args, "show_keys", False)

    # Load env from .env file so key checks work
-    from dotenv import load_dotenv
    env_path = get_env_path()
-    if env_path.exists():
-        try:
-            load_dotenv(env_path, encoding="utf-8")
-        except UnicodeDecodeError:
-            load_dotenv(env_path, encoding="latin-1")
-    # Also try project .env as dev fallback
-    load_dotenv(get_project_root() / ".env", override=False, encoding="utf-8")
+    load_hermes_dotenv(
+        hermes_home=env_path.parent,
+        project_env=get_project_root() / ".env",
+    )

    project_root = get_project_root()
    hermes_home = get_hermes_home()
--- a/hermes_cli/env_loader.py
+++ b/hermes_cli/env_loader.py
@ -113,7 +113,7 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
    except ImportError:
        return  # early bootstrap — config module not available yet

-    read_kw = {"encoding": "utf-8", "errors": "replace"}
+    read_kw = {"encoding": "utf-8-sig", "errors": "replace"}
    try:
        with open(path, **read_kw) as f:
            original = f.readlines()
--- a/hermes_cli/fallback_cmd.py
+++ b/hermes_cli/fallback_cmd.py
@ -307,7 +307,7 @@ def cmd_fallback_clear(args) -> None:  # noqa: ARG001
        print()
        print("  Cancelled.")
        return
-    if resp not in ("y", "yes"):
+    if resp not in {"y", "yes"}:
        print("  Cancelled — no change.")
        return

@ -347,11 +347,11 @@ def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
 def cmd_fallback(args) -> None:
    """Top-level dispatcher for ``hermes fallback [subcommand]``."""
    sub = getattr(args, "fallback_command", None)
-    if sub in (None, "", "list", "ls"):
+    if sub in {None, "", "list", "ls"}:
        cmd_fallback_list(args)
    elif sub == "add":
        cmd_fallback_add(args)
-    elif sub in ("remove", "rm"):
+    elif sub in {"remove", "rm"}:
        cmd_fallback_remove(args)
    elif sub == "clear":
        cmd_fallback_clear(args)
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
--- a/hermes_cli/gateway_windows.py
+++ b/hermes_cli/gateway_windows.py
@ -0,0 +1,691 @@
+"""Windows gateway service backend (Scheduled Task + Startup-folder fallback).
+
+This mirrors the contract exposed by ``launchd_install`` / ``launchd_start`` /
+``launchd_status`` etc. on macOS and ``systemd_install`` / ``systemd_start`` on
+Linux. It uses ``schtasks`` under the hood with ``/SC ONLOGON`` and restart-on-
+failure XML settings, and falls back to a ``%APPDATA%\\...\\Startup\\<name>.cmd``
+dropper when Scheduled Task creation is denied (locked-down corporate boxes).
+
+Design notes
+------------
+* ``schtasks /Create /SC ONLOGON /RL LIMITED`` means the task runs at the
+  CURRENT USER's next logon without any elevation prompt. We also
+  ``schtasks /Run`` immediately after install so the gateway starts right
+  away without waiting for the next logon.
+* We write two files: a shared ``gateway.cmd`` wrapper script (cwd + env + the
+  actual ``python -m hermes_cli.main gateway run --replace`` invocation) and
+  EITHER a schtasks entry pointing at it OR a Startup-folder ``.cmd`` that
+  spawns it detached.
+* Status = merge of "is the schtasks entry registered?" + "is the startup
+  .cmd present?" + "is there a gateway process running?" so the status
+  command keeps working regardless of which install path was taken.
+* Quoting is tricky: schtasks parses ``/TR`` itself and cmd.exe parses the
+  generated ``gateway.cmd``. Those are DIFFERENT parsers. We keep two
+  separate quote helpers (same pattern OpenClaw uses) and never cross them.
+* All of this is Windows-only. ``import`` paths are still safe on POSIX but
+  the functions raise if called on non-Windows.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+# Short timeouts: schtasks occasionally wedges and we don't want to hang forever.
+_SCHTASKS_TIMEOUT_S = 15
+_SCHTASKS_NO_OUTPUT_TIMEOUT_S = 30
+# Patterns in schtasks stderr that mean "fall back to the Startup folder".
+_FALLBACK_PATTERNS = re.compile(
+    r"(access is denied|acceso denegado|schtasks timed out|schtasks produced no output)",
+    re.IGNORECASE,
+)
+
+_TASK_NAME_DEFAULT = "Hermes_Gateway"
+_TASK_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"
+
+
+# ---------------------------------------------------------------------------
+# Platform guard
+# ---------------------------------------------------------------------------
+
+def _assert_windows() -> None:
+    if sys.platform != "win32":
+        raise RuntimeError("gateway_windows is Windows-only")
+
+
+# ---------------------------------------------------------------------------
+# Quoting helpers (two DIFFERENT parsers — do not mix)
+# ---------------------------------------------------------------------------
+
+def _quote_cmd_script_arg(value: str) -> str:
+    """Quote a single argument for use INSIDE a .cmd file, for cmd.exe parsing.
+
+    cmd.exe splits on spaces/tabs outside of double quotes. Embedded quotes
+    are doubled. We also refuse line breaks because they'd terminate the
+    logical command line mid-script.
+    """
+    if "\r" in value or "\n" in value:
+        raise ValueError(f"refusing to quote value containing newline: {value!r}")
+    if not value:
+        return '""'
+    if not re.search(r'[ \t"]', value):
+        return value
+    return '"' + value.replace('"', '""') + '"'
+
+
+def _quote_schtasks_arg(value: str) -> str:
+    """Quote a single argument for schtasks.exe's /TR parser.
+
+    Schtasks uses a different quoting convention than cmd.exe: embedded
+    quotes are backslash-escaped, and the whole thing is wrapped in double
+    quotes if it contains whitespace or quotes.
+    """
+    if not re.search(r'[ \t"]', value):
+        return value
+    return '"' + value.replace('"', '\\"') + '"'
+
+
+# ---------------------------------------------------------------------------
+# schtasks.exe wrapper
+# ---------------------------------------------------------------------------
+
+def _exec_schtasks(args: list[str]) -> tuple[int, str, str]:
+    """Run ``schtasks.exe`` with a hard timeout. Return (code, stdout, stderr).
+
+    If schtasks wedges, returns code=124 with a synthetic stderr string —
+    same convention OpenClaw uses, so the fallback detection regex matches.
+    """
+    _assert_windows()
+    schtasks = shutil.which("schtasks")
+    if schtasks is None:
+        return (1, "", "schtasks.exe not found on PATH")
+    try:
+        proc = subprocess.run(
+            [schtasks, *args],
+            capture_output=True,
+            text=True,
+            timeout=_SCHTASKS_TIMEOUT_S,
+            # CREATE_NO_WINDOW avoids a flashing console window when the CLI
+            # is itself hosted in a TUI. See tools/browser_tool.py for the
+            # same pattern and the windows-subprocess-sigint-storm.md ref.
+            creationflags=0x08000000,  # CREATE_NO_WINDOW
+        )
+        return (proc.returncode, proc.stdout or "", proc.stderr or "")
+    except subprocess.TimeoutExpired:
+        return (124, "", f"schtasks timed out after {_SCHTASKS_TIMEOUT_S}s")
+    except OSError as e:
+        return (1, "", f"schtasks invocation failed: {e}")
+
+
+def _should_fall_back(code: int, detail: str) -> bool:
+    return code == 124 or bool(_FALLBACK_PATTERNS.search(detail or ""))
+
+
+# ---------------------------------------------------------------------------
+# Paths: where we stash our task script and where Startup lives
+# ---------------------------------------------------------------------------
+
+def get_task_name() -> str:
+    """Scheduled Task name, scoped per profile.
+
+    Default profile: ``Hermes_Gateway``
+    Named profile X: ``Hermes_Gateway_<X>``
+    """
+    _assert_windows()
+    # Local import to avoid circular module initialization during hermes_cli boot.
+    from hermes_cli.gateway import _profile_suffix
+
+    suffix = _profile_suffix()
+    if not suffix:
+        return _TASK_NAME_DEFAULT
+    return f"{_TASK_NAME_DEFAULT}_{suffix}"
+
+
+def _sanitize_filename(value: str) -> str:
+    """Remove characters illegal in Windows filenames."""
+    return re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", value)
+
+
+def get_task_script_path() -> Path:
+    """The generated ``gateway.cmd`` wrapper that the schtasks entry invokes.
+
+    Lives under ``%LOCALAPPDATA%\\hermes\\gateway-service\\<task_name>.cmd``
+    (or ``<HERMES_HOME>/gateway-service/<task_name>.cmd`` so per-profile
+    Hermes installs stay self-contained).
+    """
+    _assert_windows()
+    from hermes_cli.config import get_hermes_home
+
+    script_dir = Path(get_hermes_home()) / "gateway-service"
+    script_dir.mkdir(parents=True, exist_ok=True)
+    return script_dir / f"{_sanitize_filename(get_task_name())}.cmd"
+
+
+def _startup_dir() -> Path:
+    appdata = os.environ.get("APPDATA", "").strip()
+    if appdata:
+        return Path(appdata) / "Microsoft" / "Windows" / "Start Menu" / "Programs" / "Startup"
+    userprofile = os.environ.get("USERPROFILE", "").strip() or os.environ.get("HOME", "").strip()
+    if not userprofile:
+        raise RuntimeError("neither APPDATA nor USERPROFILE is set — cannot resolve Startup folder")
+    return (
+        Path(userprofile)
+        / "AppData"
+        / "Roaming"
+        / "Microsoft"
+        / "Windows"
+        / "Start Menu"
+        / "Programs"
+        / "Startup"
+    )
+
+
+def get_startup_entry_path() -> Path:
+    _assert_windows()
+    return _startup_dir() / f"{_sanitize_filename(get_task_name())}.cmd"
+
+
+# ---------------------------------------------------------------------------
+# Script rendering
+# ---------------------------------------------------------------------------
+
+def _build_gateway_cmd_script(
+    python_path: str,
+    working_dir: str,
+    hermes_home: str,
+    profile_arg: str,
+) -> str:
+    """Build the ``gateway.cmd`` wrapper content (CRLF-terminated).
+
+    The script:
+      - cd's into the project directory
+      - exports HERMES_HOME, PYTHONIOENCODING, VIRTUAL_ENV
+      - invokes ``python -m hermes_cli.main [--profile X] gateway run --replace``
+
+    We intentionally do NOT inline PATH overrides here — cmd.exe inherits
+    the per-user PATH the Scheduled Task was created with, and forcibly
+    rewriting PATH tends to break Homebrew/nvm-style installations.
+    """
+    lines = ["@echo off", f"rem {_TASK_DESCRIPTION}"]
+    lines.append(f"cd /d {_quote_cmd_script_arg(working_dir)}")
+    lines.append(f'set "HERMES_HOME={hermes_home}"')
+    lines.append('set "PYTHONIOENCODING=utf-8"')
+    lines.append('set "HERMES_GATEWAY_DETACHED=1"')
+    # VIRTUAL_ENV lets the gateway's own python detection find the venv
+    # if someone imports hermes_constants-based logic during startup.
+    venv_dir = str(Path(python_path).resolve().parent.parent)
+    lines.append(f'set "VIRTUAL_ENV={venv_dir}"')
+
+    prog_args = [python_path, "-m", "hermes_cli.main"]
+    if profile_arg:
+        prog_args.extend(profile_arg.split())
+    prog_args.extend(["gateway", "run", "--replace"])
+    lines.append(" ".join(_quote_cmd_script_arg(a) for a in prog_args))
+    return "\r\n".join(lines) + "\r\n"
+
+
+def _build_startup_launcher(script_path: Path) -> str:
+    """The tiny .cmd that goes in the Startup folder. Just minimizes and chains."""
+    lines = [
+        "@echo off",
+        f"rem {_TASK_DESCRIPTION}",
+        # ``start "" /min`` detaches with a minimized console window.
+        # ``/d /c`` on cmd.exe skips AUTORUN and runs the target script once.
+        f'start "" /min cmd.exe /d /c {_quote_cmd_script_arg(str(script_path))}',
+    ]
+    return "\r\n".join(lines) + "\r\n"
+
+
+def _write_task_script() -> Path:
+    """Generate and write the gateway.cmd wrapper. Return its absolute path."""
+    _assert_windows()
+    # Local imports to avoid circular-init at module load time.
+    from hermes_cli.config import get_hermes_home
+    from hermes_cli.gateway import (
+        PROJECT_ROOT,
+        _profile_arg,
+        get_python_path,
+    )
+
+    python_path = get_python_path()
+    working_dir = str(PROJECT_ROOT)
+    hermes_home = str(Path(get_hermes_home()).resolve())
+    profile_arg = _profile_arg(hermes_home)
+
+    content = _build_gateway_cmd_script(python_path, working_dir, hermes_home, profile_arg)
+    script_path = get_task_script_path()
+    script_path.write_text(content, encoding="utf-8", newline="")
+    return script_path
+
+
+# ---------------------------------------------------------------------------
+# Install / uninstall
+# ---------------------------------------------------------------------------
+
+def _resolve_task_user() -> str | None:
+    """Return ``DOMAIN\\USER`` if available, else bare USERNAME, else None."""
+    username = os.environ.get("USERNAME") or os.environ.get("USER") or os.environ.get("LOGNAME")
+    if not username:
+        return None
+    if "\\" in username:
+        return username
+    domain = os.environ.get("USERDOMAIN")
+    return f"{domain}\\{username}" if domain else username
+
+
+def _install_scheduled_task(task_name: str, script_path: Path) -> tuple[bool, str]:
+    """Create or update the Scheduled Task. Returns (success, detail)."""
+    quoted_script = _quote_schtasks_arg(str(script_path))
+    # First try /Change in case the task already exists — keeps the existing
+    # trigger + settings intact and just repoints /TR.
+    change_code, _out, change_err = _exec_schtasks(
+        ["/Change", "/TN", task_name, "/TR", quoted_script]
+    )
+    if change_code == 0:
+        return (True, f"Updated existing Scheduled Task {task_name!r}")
+
+    # Create fresh. Start with the "current user, interactive, no stored
+    # password" variant; if that fails, retry without /RU /NP /IT.
+    base = [
+        "/Create",
+        "/F",
+        "/SC",
+        "ONLOGON",
+        "/RL",
+        "LIMITED",
+        "/TN",
+        task_name,
+        "/TR",
+        quoted_script,
+    ]
+    user = _resolve_task_user()
+    variants = []
+    if user:
+        variants.append([*base, "/RU", user, "/NP", "/IT"])
+    variants.append(base)
+
+    last_code = 1
+    last_err = ""
+    for argv in variants:
+        code, out, err = _exec_schtasks(argv)
+        if code == 0:
+            return (True, f"Created Scheduled Task {task_name!r}")
+        last_code, last_err = code, (err or out or "")
+    return (False, f"schtasks /Create failed (code {last_code}): {last_err.strip()}")
+
+
+def _install_startup_entry(script_path: Path) -> Path:
+    """Write the Startup-folder fallback launcher. Returns its path."""
+    entry = get_startup_entry_path()
+    entry.parent.mkdir(parents=True, exist_ok=True)
+    entry.write_text(_build_startup_launcher(script_path), encoding="utf-8", newline="")
+    return entry
+
+
+def _derive_venv_pythonw(python_exe: str) -> str:
+    """Given a ``python.exe`` path, return the sibling ``pythonw.exe`` if present.
+
+    ``pythonw.exe`` is the console-less variant. Using it for detached
+    daemons means there's no console handle to inherit from the spawning
+    shell, which is what lets the gateway survive a parent-shell exit on
+    Windows. Falls back to the original ``python.exe`` if the ``w`` variant
+    isn't there — caller must still set CREATE_NO_WINDOW in that case.
+    """
+    p = Path(python_exe)
+    candidate = p.with_name(p.stem + "w" + p.suffix)
+    if candidate.exists():
+        return str(candidate)
+    return python_exe
+
+
+def _build_gateway_argv() -> tuple[list[str], str, dict[str, str]]:
+    """Build (argv, working_dir, env_overlay) for the gateway subprocess.
+
+    Same logical command as what gateway.cmd runs, but assembled as a
+    native argv for direct ``subprocess.Popen`` invocation — no cmd.exe
+    layer in between.
+    """
+    _assert_windows()
+    from hermes_cli.config import get_hermes_home
+    from hermes_cli.gateway import (
+        PROJECT_ROOT,
+        _profile_arg,
+        get_python_path,
+    )
+
+    python_exe = _derive_venv_pythonw(get_python_path())
+    working_dir = str(PROJECT_ROOT)
+    hermes_home = str(Path(get_hermes_home()).resolve())
+    profile_arg = _profile_arg(hermes_home)
+
+    argv = [python_exe, "-m", "hermes_cli.main"]
+    if profile_arg:
+        argv.extend(profile_arg.split())
+    argv.extend(["gateway", "run", "--replace"])
+
+    env_overlay = {
+        "HERMES_HOME": hermes_home,
+        "PYTHONIOENCODING": "utf-8",
+        "HERMES_GATEWAY_DETACHED": "1",
+        "VIRTUAL_ENV": str(Path(python_exe).resolve().parent.parent),
+    }
+    return argv, working_dir, env_overlay
+
+
+def _spawn_detached(script_path: Path | None = None) -> int:
+    """Launch the gateway as a fully detached background process.
+
+    We spawn ``pythonw.exe -m hermes_cli.main gateway run --replace``
+    directly — NOT through a cmd.exe shim — because on Windows a cmd.exe
+    child inherits the parent session's console handle and tends to get
+    reaped when the spawning shell exits. pythonw.exe has no console, and
+    combined with DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP |
+    CREATE_NO_WINDOW + DEVNULL stdio + a fresh env, the resulting process
+    is independent of whichever shell started it.
+
+    Arg ``script_path`` is accepted for API symmetry with older callers
+    but ignored — we don't need it now that we go direct.
+
+    Returns the spawned PID so callers can verify the process actually
+    came up.
+    """
+    _assert_windows()
+    argv, working_dir, env_overlay = _build_gateway_argv()
+
+    # Inherit PATH etc. from the current env, overlay our required vars.
+    env = {**os.environ, **env_overlay}
+
+    # DETACHED_PROCESS        0x00000008  — no console attached to child
+    # CREATE_NEW_PROCESS_GROUP 0x00000200 — child gets its own group, won't
+    #                                       receive Ctrl+C from our group
+    # CREATE_NO_WINDOW         0x08000000 — belt-and-braces no-console flag
+    # CREATE_BREAKAWAY_FROM_JOB 0x01000000 — escape any job object the
+    #                                       parent is in (prevents parent-
+    #                                       job teardown from reaping us;
+    #                                       some Windows Terminal versions
+    #                                       wrap their children in a job).
+    flags = 0x00000008 | 0x00000200 | 0x08000000 | 0x01000000
+
+    # Redirect any stray stdout/stderr output to a sidecar log. Python's
+    # logging module writes to gateway.log through a FileHandler, so the
+    # real gateway logs still land there — this just captures anything
+    # that goes to print() or native stderr.
+    from hermes_cli.config import get_hermes_home
+
+    log_dir = Path(get_hermes_home()) / "logs"
+    log_dir.mkdir(parents=True, exist_ok=True)
+    stray_log = log_dir / "gateway-stdio.log"
+
+    try:
+        with open(stray_log, "ab", buffering=0) as log_fh:
+            proc = subprocess.Popen(
+                argv,
+                cwd=working_dir,
+                env=env,
+                creationflags=flags,
+                close_fds=True,
+                stdin=subprocess.DEVNULL,
+                stdout=log_fh,
+                stderr=log_fh,
+            )
+    except OSError:
+        # CREATE_BREAKAWAY_FROM_JOB can fail with "access denied" when the
+        # parent's job object doesn't permit breakaway (some Windows
+        # Terminal configs). Retry without the breakaway flag — in most
+        # setups pythonw.exe + DETACHED_PROCESS is enough on its own.
+        flags_no_breakaway = flags & ~0x01000000
+        with open(stray_log, "ab", buffering=0) as log_fh:
+            proc = subprocess.Popen(
+                argv,
+                cwd=working_dir,
+                env=env,
+                creationflags=flags_no_breakaway,
+                close_fds=True,
+                stdin=subprocess.DEVNULL,
+                stdout=log_fh,
+                stderr=log_fh,
+            )
+    return proc.pid
+
+
+def install(force: bool = False) -> None:
+    """Install the gateway as a Windows Scheduled Task (with Startup fallback).
+
+    Idempotent: re-running updates the task to point at the current python/
+    project paths. ``force`` is accepted for API parity with ``launchd_install``
+    / ``systemd_install`` but isn't needed — we always reconcile.
+    """
+    _assert_windows()
+    task_name = get_task_name()
+    script_path = _write_task_script()
+
+    ok, detail = _install_scheduled_task(task_name, script_path)
+    if ok:
+        print(f"✓ {detail}")
+        print(f"  Task script: {script_path}")
+        # Start it now so the user doesn't have to log off/on.
+        run_code, _out, run_err = _exec_schtasks(["/Run", "/TN", task_name])
+        if run_code == 0:
+            _report_gateway_start("Scheduled Task")
+        else:
+            # Scheduled Task was created but /Run failed (e.g. the task's
+            # action is malformed). Spawn directly as a backstop.
+            pid = _spawn_detached(script_path)
+            _report_gateway_start(
+                f"direct spawn (PID {pid}; schtasks /Run said: {run_err.strip()})"
+            )
+        _print_next_steps()
+        return
+
+    # schtasks create didn't work. See if it's a "fall back to startup" case.
+    if _should_fall_back(1, detail):
+        print(f"↻ Scheduled Task install blocked ({detail.splitlines()[0]}) — using Startup folder fallback")
+        entry = _install_startup_entry(script_path)
+        pid = _spawn_detached(script_path)
+        print(f"✓ Installed Windows login item: {entry}")
+        print(f"  Task script: {script_path}")
+        _report_gateway_start(f"direct spawn (PID {pid})")
+        _print_next_steps()
+        return
+
+    # Unknown schtasks error — surface it and bail.
+    raise RuntimeError(f"Windows gateway install failed: {detail}")
+
+
+def _wait_for_gateway_ready(timeout_s: float = 6.0, interval_s: float = 0.4) -> list[int]:
+    """Poll for a live gateway process for up to ``timeout_s`` seconds.
+
+    Returns the list of PIDs found. Empty list means nothing came up in
+    time — the caller should surface that to the user as a failed start.
+    """
+    from hermes_cli.gateway import find_gateway_pids
+
+    deadline = time.time() + timeout_s
+    while time.time() < deadline:
+        pids = list(find_gateway_pids())
+        if pids:
+            return pids
+        time.sleep(interval_s)
+    return []
+
+
+def _report_gateway_start(via: str) -> None:
+    pids = _wait_for_gateway_ready()
+    if pids:
+        print(f"✓ Gateway started via {via} (PID: {', '.join(map(str, pids))})")
+    else:
+        print(f"⚠ Launched gateway via {via}, but no process detected after 6s.")
+        print("  Check the log for startup errors:")
+        from hermes_cli.config import get_hermes_home
+        print(f"    type {Path(get_hermes_home()).resolve()}\\logs\\gateway.log")
+        print(f"    type {Path(get_hermes_home()).resolve()}\\logs\\gateway-stdio.log")
+
+
+def _print_next_steps() -> None:
+    from hermes_cli.config import get_hermes_home
+
+    hermes_home = Path(get_hermes_home()).resolve()
+    print()
+    print("Next steps:")
+    print("  hermes gateway status                      # Check status")
+    print(f"  type {hermes_home}\\logs\\gateway.log       # View logs")
+
+
+def uninstall() -> None:
+    """Remove both the Scheduled Task and the Startup-folder fallback, if present."""
+    _assert_windows()
+    task_name = get_task_name()
+    script_path = get_task_script_path()
+    startup_entry = get_startup_entry_path()
+
+    if is_task_registered():
+        code, _out, err = _exec_schtasks(["/Delete", "/F", "/TN", task_name])
+        if code == 0:
+            print(f"✓ Removed Scheduled Task {task_name!r}")
+        else:
+            print(f"⚠ schtasks /Delete returned code {code}: {err.strip()}")
+
+    for path, label in [(startup_entry, "Windows login item"), (script_path, "Task script")]:
+        try:
+            path.unlink()
+            print(f"✓ Removed {label}: {path}")
+        except FileNotFoundError:
+            pass
+
+
+# ---------------------------------------------------------------------------
+# Status / start / stop / restart
+# ---------------------------------------------------------------------------
+
+def is_task_registered() -> bool:
+    code, _out, _err = _exec_schtasks(["/Query", "/TN", get_task_name()])
+    return code == 0
+
+
+def is_startup_entry_installed() -> bool:
+    return get_startup_entry_path().exists()
+
+
+def is_installed() -> bool:
+    """True when either the schtasks entry or the Startup fallback is present."""
+    return is_task_registered() or is_startup_entry_installed()
+
+
+def query_task_status() -> dict[str, str]:
+    """Parse ``schtasks /Query /V /FO LIST`` and pull the interesting keys."""
+    code, out, err = _exec_schtasks(["/Query", "/TN", get_task_name(), "/V", "/FO", "LIST"])
+    if code != 0:
+        return {}
+    info: dict[str, str] = {}
+    for raw in out.splitlines():
+        line = raw.strip()
+        if not line or ":" not in line:
+            continue
+        key, _, value = line.partition(":")
+        key = key.strip().lower()
+        value = value.strip()
+        # Some Windows locales emit "Last Result" instead of "Last Run Result".
+        if key in {"status", "last run time", "last run result", "last result"}:
+            if key == "last result":
+                info.setdefault("last run result", value)
+            else:
+                info[key] = value
+    return info
+
+
+def _gateway_pids() -> list[int]:
+    """Reuse the cross-platform PID scanner in gateway.py."""
+    from hermes_cli.gateway import find_gateway_pids
+
+    return list(find_gateway_pids())
+
+
+def status(deep: bool = False) -> None:
+    """Print a status report for the Windows gateway service."""
+    _assert_windows()
+    task_name = get_task_name()
+    task_installed = is_task_registered()
+    startup_installed = is_startup_entry_installed()
+    pids = _gateway_pids()
+
+    if task_installed:
+        print(f"✓ Scheduled Task registered: {task_name}")
+        info = query_task_status()
+        if info:
+            for key in ("status", "last run time", "last run result"):
+                if key in info:
+                    print(f"  {key.title()}: {info[key]}")
+    elif startup_installed:
+        print(f"✓ Windows login item installed: {get_startup_entry_path()}")
+    else:
+        print("✗ Gateway service not installed")
+
+    if pids:
+        print(f"✓ Gateway process running (PID: {', '.join(map(str, pids))})")
+    else:
+        print("✗ No gateway process detected")
+
+    if deep:
+        print()
+        print(f"  Task name:     {task_name}")
+        print(f"  Task script:   {get_task_script_path()}")
+        print(f"  Startup entry: {get_startup_entry_path()}")
+
+    if not task_installed and not startup_installed and not pids:
+        print()
+        print("To install:")
+        print("  hermes gateway install")
+
+
+def start() -> None:
+    """Start the gateway. Prefers /Run on the scheduled task if present."""
+    _assert_windows()
+    if is_task_registered():
+        code, _out, err = _exec_schtasks(["/Run", "/TN", get_task_name()])
+        if code == 0:
+            _report_gateway_start(f"Scheduled Task {get_task_name()!r}")
+            return
+        print(f"⚠ schtasks /Run failed (code {code}): {err.strip()} — falling back to direct spawn")
+
+    # Direct spawn — no script_path needed with the new argv-based spawner.
+    pid = _spawn_detached()
+    _report_gateway_start(f"direct spawn (PID {pid})")
+
+
+def stop() -> None:
+    """Stop the gateway. Tries /End on the scheduled task, then kills any stragglers."""
+    _assert_windows()
+    from hermes_cli.gateway import kill_gateway_processes
+
+    stopped_any = False
+    if is_task_registered():
+        code, _out, err = _exec_schtasks(["/End", "/TN", get_task_name()])
+        # schtasks returns nonzero when the task isn't currently running — don't treat that as an error.
+        if code == 0:
+            stopped_any = True
+        elif "not running" not in (err or "").lower():
+            print(f"⚠ schtasks /End returned code {code}: {err.strip()}")
+
+    killed = kill_gateway_processes(all_profiles=False)
+    if killed:
+        stopped_any = True
+        print(f"✓ Killed {killed} gateway process(es)")
+    if stopped_any:
+        print("✓ Gateway stopped")
+    else:
+        print("✗ No gateway was running")
+
+
+def restart() -> None:
+    """Stop the gateway then start it again."""
+    _assert_windows()
+    stop()
+    # Give Windows a moment to release the listening port.
+    time.sleep(1.0)
+    start()
--- a/hermes_cli/goals.py
+++ b/hermes_cli/goals.py
@ -0,0 +1,593 @@
+"""Persistent session goals — the Ralph loop for Hermes.
+
+A goal is a free-form user objective that stays active across turns. After
+each turn completes, a small judge call asks an auxiliary model "is this
+goal satisfied by the assistant's last response?". If not, Hermes feeds a
+continuation prompt back into the same session and keeps working until the
+goal is done, turn budget is exhausted, the user pauses/clears it, or the
+user sends a new message (which takes priority and pauses the goal loop).
+
+State is persisted in SessionDB's ``state_meta`` table keyed by
+``goal:<session_id>`` so ``/resume`` picks it up.
+
+Design notes / invariants:
+
+- The continuation prompt is just a normal user message appended to the
+  session via ``run_conversation``. No system-prompt mutation, no toolset
+  swap — prompt caching stays intact.
+- Judge failures are fail-OPEN: ``continue``. A broken judge must not wedge
+  progress; the turn budget is the backstop.
+- When a real user message arrives mid-loop it preempts the continuation
+  prompt and also pauses the goal loop for that turn (we still re-judge
+  after, so if the user's message happens to complete the goal the judge
+  will say ``done``).
+- This module has zero hard dependency on ``cli.HermesCLI`` or the gateway
+  runner — both wire the same ``GoalManager`` in.
+
+Nothing in this module touches the agent's system prompt or toolset.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+import time
+from dataclasses import dataclass, asdict
+from typing import Any, Dict, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Constants & defaults
+# ──────────────────────────────────────────────────────────────────────
+
+DEFAULT_MAX_TURNS = 20
+DEFAULT_JUDGE_TIMEOUT = 30.0
+# Cap how much of the last response + recent messages we send to the judge.
+_JUDGE_RESPONSE_SNIPPET_CHARS = 4000
+# After this many consecutive judge *parse* failures (empty output / non-JSON),
+# the loop auto-pauses and points the user at the goal_judge config. API /
+# transport errors do NOT count toward this — those are transient. This guards
+# against small models (e.g. deepseek-v4-flash) that cannot follow the strict
+# JSON reply contract; without it the loop runs until the turn budget is
+# exhausted with every reply shaped like `judge returned empty response` or
+# `judge reply was not JSON`.
+DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES = 3
+
+
+CONTINUATION_PROMPT_TEMPLATE = (
+    "[Continuing toward your standing goal]\n"
+    "Goal: {goal}\n\n"
+    "Continue working toward this goal. Take the next concrete step. "
+    "If you believe the goal is complete, state so explicitly and stop. "
+    "If you are blocked and need input from the user, say so clearly and stop."
+)
+
+
+JUDGE_SYSTEM_PROMPT = (
+    "You are a strict judge evaluating whether an autonomous agent has "
+    "achieved a user's stated goal. You receive the goal text and the "
+    "agent's most recent response. Your only job is to decide whether "
+    "the goal is fully satisfied based on that response.\n\n"
+    "A goal is DONE only when:\n"
+    "- The response explicitly confirms the goal was completed, OR\n"
+    "- The response clearly shows the final deliverable was produced, OR\n"
+    "- The response explains the goal is unachievable / blocked / needs "
+    "user input (treat this as DONE with reason describing the block).\n\n"
+    "Otherwise the goal is NOT done — CONTINUE.\n\n"
+    "Reply ONLY with a single JSON object on one line:\n"
+    '{\"done\": <true|false>, \"reason\": \"<one-sentence rationale>\"}'
+)
+
+
+JUDGE_USER_PROMPT_TEMPLATE = (
+    "Goal:\n{goal}\n\n"
+    "Agent's most recent response:\n{response}\n\n"
+    "Is the goal satisfied?"
+)
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Dataclass
+# ──────────────────────────────────────────────────────────────────────
+
+
+@dataclass
+class GoalState:
+    """Serializable goal state stored per session."""
+
+    goal: str
+    status: str = "active"          # active | paused | done | cleared
+    turns_used: int = 0
+    max_turns: int = DEFAULT_MAX_TURNS
+    created_at: float = 0.0
+    last_turn_at: float = 0.0
+    last_verdict: Optional[str] = None        # "done" | "continue" | "skipped"
+    last_reason: Optional[str] = None
+    paused_reason: Optional[str] = None       # why we auto-paused (budget, etc.)
+    consecutive_parse_failures: int = 0       # judge-output parse failures in a row
+
+    def to_json(self) -> str:
+        return json.dumps(asdict(self), ensure_ascii=False)
+
+    @classmethod
+    def from_json(cls, raw: str) -> "GoalState":
+        data = json.loads(raw)
+        return cls(
+            goal=data.get("goal", ""),
+            status=data.get("status", "active"),
+            turns_used=int(data.get("turns_used", 0) or 0),
+            max_turns=int(data.get("max_turns", DEFAULT_MAX_TURNS) or DEFAULT_MAX_TURNS),
+            created_at=float(data.get("created_at", 0.0) or 0.0),
+            last_turn_at=float(data.get("last_turn_at", 0.0) or 0.0),
+            last_verdict=data.get("last_verdict"),
+            last_reason=data.get("last_reason"),
+            paused_reason=data.get("paused_reason"),
+            consecutive_parse_failures=int(data.get("consecutive_parse_failures", 0) or 0),
+        )
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Persistence (SessionDB state_meta)
+# ──────────────────────────────────────────────────────────────────────
+
+
+def _meta_key(session_id: str) -> str:
+    return f"goal:{session_id}"
+
+
+_DB_CACHE: Dict[str, Any] = {}
+
+
+def _get_session_db() -> Optional[Any]:
+    """Return a SessionDB instance for the current HERMES_HOME.
+
+    SessionDB has no built-in singleton, but opening a new connection per
+    /goal call would thrash the file. We cache one instance per
+    ``hermes_home`` path so profile switches still pick up the right DB.
+    Defensive against import/instantiation failures so tests and
+    non-standard launchers can still use the GoalManager.
+    """
+    try:
+        from hermes_constants import get_hermes_home
+        from hermes_state import SessionDB
+
+        home = str(get_hermes_home())
+    except Exception as exc:  # pragma: no cover
+        logger.debug("GoalManager: SessionDB bootstrap failed (%s)", exc)
+        return None
+
+    cached = _DB_CACHE.get(home)
+    if cached is not None:
+        return cached
+    try:
+        db = SessionDB()
+    except Exception as exc:  # pragma: no cover
+        logger.debug("GoalManager: SessionDB() raised (%s)", exc)
+        return None
+    _DB_CACHE[home] = db
+    return db
+
+
+def load_goal(session_id: str) -> Optional[GoalState]:
+    """Load the goal for a session, or None if none exists."""
+    if not session_id:
+        return None
+    db = _get_session_db()
+    if db is None:
+        return None
+    try:
+        raw = db.get_meta(_meta_key(session_id))
+    except Exception as exc:
+        logger.debug("GoalManager: get_meta failed: %s", exc)
+        return None
+    if not raw:
+        return None
+    try:
+        return GoalState.from_json(raw)
+    except Exception as exc:
+        logger.warning("GoalManager: could not parse stored goal for %s: %s", session_id, exc)
+        return None
+
+
+def save_goal(session_id: str, state: GoalState) -> None:
+    """Persist a goal to SessionDB. No-op if DB unavailable."""
+    if not session_id:
+        return
+    db = _get_session_db()
+    if db is None:
+        return
+    try:
+        db.set_meta(_meta_key(session_id), state.to_json())
+    except Exception as exc:
+        logger.debug("GoalManager: set_meta failed: %s", exc)
+
+
+def clear_goal(session_id: str) -> None:
+    """Mark a goal cleared in the DB (preserved for audit, status=cleared)."""
+    state = load_goal(session_id)
+    if state is None:
+        return
+    state.status = "cleared"
+    save_goal(session_id, state)
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Judge
+# ──────────────────────────────────────────────────────────────────────
+
+
+def _truncate(text: str, limit: int) -> str:
+    if not text:
+        return ""
+    if len(text) <= limit:
+        return text
+    return text[:limit] + "… [truncated]"
+
+
+_JSON_OBJECT_RE = re.compile(r"\{.*?\}", re.DOTALL)
+
+
+def _parse_judge_response(raw: str) -> Tuple[bool, str, bool]:
+    """Parse the judge's reply. Fail-open to ``(False, "<reason>", parse_failed)``.
+
+    Returns ``(done, reason, parse_failed)``. ``parse_failed`` is True when the
+    judge returned output that couldn't be interpreted as the expected JSON
+    verdict (empty body, prose, malformed JSON). Callers use that flag to
+    auto-pause after N consecutive parse failures so a weak judge model
+    doesn't silently burn the turn budget.
+    """
+    if not raw:
+        return False, "judge returned empty response", True
+
+    text = raw.strip()
+
+    # Strip markdown code fences the model may wrap JSON in.
+    if text.startswith("```"):
+        text = text.strip("`")
+        # Peel off leading json/JSON/etc tag
+        nl = text.find("\n")
+        if nl != -1:
+            text = text[nl + 1:]
+
+    # First try: parse the whole blob.
+    data: Optional[Dict[str, Any]] = None
+    try:
+        data = json.loads(text)
+    except Exception:
+        # Second try: pull the first JSON object out.
+        match = _JSON_OBJECT_RE.search(text)
+        if match:
+            try:
+                data = json.loads(match.group(0))
+            except Exception:
+                data = None
+
+    if not isinstance(data, dict):
+        return False, f"judge reply was not JSON: {_truncate(raw, 200)!r}", True
+
+    done_val = data.get("done")
+    if isinstance(done_val, str):
+        done = done_val.strip().lower() in {"true", "yes", "1", "done"}
+    else:
+        done = bool(done_val)
+    reason = str(data.get("reason") or "").strip()
+    if not reason:
+        reason = "no reason provided"
+    return done, reason, False
+
+
+def judge_goal(
+    goal: str,
+    last_response: str,
+    *,
+    timeout: float = DEFAULT_JUDGE_TIMEOUT,
+) -> Tuple[str, str, bool]:
+    """Ask the auxiliary model whether the goal is satisfied.
+
+    Returns ``(verdict, reason, parse_failed)`` where verdict is ``"done"``,
+    ``"continue"``, or ``"skipped"`` (when the judge couldn't be reached).
+
+    ``parse_failed`` is True only when the judge call succeeded but its output
+    was unusable (empty or non-JSON). API/transport errors return False — they
+    are transient and should fail-open silently. Callers use this flag to
+    auto-pause after N consecutive parse failures (see
+    ``DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES``).
+
+    This is deliberately fail-open: any error returns ``("continue", "...", False)``
+    so a broken judge doesn't wedge progress — the turn budget and the
+    consecutive-parse-failures auto-pause are the backstops.
+    """
+    if not goal.strip():
+        return "skipped", "empty goal", False
+    if not last_response.strip():
+        # No substantive reply this turn — almost certainly not done yet.
+        return "continue", "empty response (nothing to evaluate)", False
+
+    try:
+        from agent.auxiliary_client import get_text_auxiliary_client
+    except Exception as exc:
+        logger.debug("goal judge: auxiliary client import failed: %s", exc)
+        return "continue", "auxiliary client unavailable", False
+
+    try:
+        client, model = get_text_auxiliary_client("goal_judge")
+    except Exception as exc:
+        logger.debug("goal judge: get_text_auxiliary_client failed: %s", exc)
+        return "continue", "auxiliary client unavailable", False
+
+    if client is None or not model:
+        return "continue", "no auxiliary client configured", False
+
+    prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
+        goal=_truncate(goal, 2000),
+        response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
+    )
+
+    try:
+        resp = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": JUDGE_SYSTEM_PROMPT},
+                {"role": "user", "content": prompt},
+            ],
+            temperature=0,
+            max_tokens=200,
+            timeout=timeout,
+        )
+    except Exception as exc:
+        logger.info("goal judge: API call failed (%s) — falling through to continue", exc)
+        return "continue", f"judge error: {type(exc).__name__}", False
+
+    try:
+        raw = resp.choices[0].message.content or ""
+    except Exception:
+        raw = ""
+
+    done, reason, parse_failed = _parse_judge_response(raw)
+    verdict = "done" if done else "continue"
+    logger.info("goal judge: verdict=%s reason=%s", verdict, _truncate(reason, 120))
+    return verdict, reason, parse_failed
+
+
+# ──────────────────────────────────────────────────────────────────────
+# GoalManager — the orchestration surface CLI + gateway talk to
+# ──────────────────────────────────────────────────────────────────────
+
+
+class GoalManager:
+    """Per-session goal state + continuation decisions.
+
+    The CLI and gateway each hold one ``GoalManager`` per live session.
+
+    Methods:
+
+    - ``set(goal)`` — start a new standing goal.
+    - ``clear()`` — remove the active goal.
+    - ``pause()`` / ``resume()`` — explicit user controls.
+    - ``status()`` — printable one-liner.
+    - ``evaluate_after_turn(last_response)`` — call the judge, update state,
+      and return a decision dict the caller uses to drive the next turn.
+    - ``next_continuation_prompt()`` — the canonical user-role message to
+      feed back into ``run_conversation``.
+    """
+
+    def __init__(self, session_id: str, *, default_max_turns: int = DEFAULT_MAX_TURNS):
+        self.session_id = session_id
+        self.default_max_turns = int(default_max_turns or DEFAULT_MAX_TURNS)
+        self._state: Optional[GoalState] = load_goal(session_id)
+
+    # --- introspection ------------------------------------------------
+
+    @property
+    def state(self) -> Optional[GoalState]:
+        return self._state
+
+    def is_active(self) -> bool:
+        return self._state is not None and self._state.status == "active"
+
+    def has_goal(self) -> bool:
+        return self._state is not None and self._state.status in {"active", "paused"}
+
+    def status_line(self) -> str:
+        s = self._state
+        if s is None or s.status in {"cleared",}:
+            return "No active goal. Set one with /goal <text>."
+        turns = f"{s.turns_used}/{s.max_turns} turns"
+        if s.status == "active":
+            return f"⊙ Goal (active, {turns}): {s.goal}"
+        if s.status == "paused":
+            extra = f" — {s.paused_reason}" if s.paused_reason else ""
+            return f"⏸ Goal (paused, {turns}{extra}): {s.goal}"
+        if s.status == "done":
+            return f"✓ Goal done ({turns}): {s.goal}"
+        return f"Goal ({s.status}, {turns}): {s.goal}"
+
+    # --- mutation -----------------------------------------------------
+
+    def set(self, goal: str, *, max_turns: Optional[int] = None) -> GoalState:
+        goal = (goal or "").strip()
+        if not goal:
+            raise ValueError("goal text is empty")
+        state = GoalState(
+            goal=goal,
+            status="active",
+            turns_used=0,
+            max_turns=int(max_turns) if max_turns else self.default_max_turns,
+            created_at=time.time(),
+            last_turn_at=0.0,
+        )
+        self._state = state
+        save_goal(self.session_id, state)
+        return state
+
+    def pause(self, reason: str = "user-paused") -> Optional[GoalState]:
+        if not self._state:
+            return None
+        self._state.status = "paused"
+        self._state.paused_reason = reason
+        save_goal(self.session_id, self._state)
+        return self._state
+
+    def resume(self, *, reset_budget: bool = True) -> Optional[GoalState]:
+        if not self._state:
+            return None
+        self._state.status = "active"
+        self._state.paused_reason = None
+        if reset_budget:
+            self._state.turns_used = 0
+        save_goal(self.session_id, self._state)
+        return self._state
+
+    def clear(self) -> None:
+        if self._state is None:
+            return
+        self._state.status = "cleared"
+        save_goal(self.session_id, self._state)
+        self._state = None
+
+    def mark_done(self, reason: str) -> None:
+        if not self._state:
+            return
+        self._state.status = "done"
+        self._state.last_verdict = "done"
+        self._state.last_reason = reason
+        save_goal(self.session_id, self._state)
+
+    # --- the main entry point called after every turn -----------------
+
+    def evaluate_after_turn(
+        self,
+        last_response: str,
+        *,
+        user_initiated: bool = True,
+    ) -> Dict[str, Any]:
+        """Run the judge and update state. Return a decision dict.
+
+        ``user_initiated`` distinguishes a real user prompt (True) from a
+        continuation prompt we fed ourselves (False). Both increment
+        ``turns_used`` because both consume model budget.
+
+        Decision keys:
+          - ``status``: current goal status after update
+          - ``should_continue``: bool — caller should fire another turn
+          - ``continuation_prompt``: str or None
+          - ``verdict``: "done" | "continue" | "skipped" | "inactive"
+          - ``reason``: str
+          - ``message``: user-visible one-liner to print/send
+        """
+        state = self._state
+        if state is None or state.status != "active":
+            return {
+                "status": state.status if state else None,
+                "should_continue": False,
+                "continuation_prompt": None,
+                "verdict": "inactive",
+                "reason": "no active goal",
+                "message": "",
+            }
+
+        # Count the turn that just finished.
+        state.turns_used += 1
+        state.last_turn_at = time.time()
+
+        verdict, reason, parse_failed = judge_goal(state.goal, last_response)
+        state.last_verdict = verdict
+        state.last_reason = reason
+
+        # Track consecutive judge parse failures. Reset on any usable reply,
+        # including API / transport errors (parse_failed=False) so a flaky
+        # network doesn't trip the auto-pause meant for bad judge models.
+        if parse_failed:
+            state.consecutive_parse_failures += 1
+        else:
+            state.consecutive_parse_failures = 0
+
+        if verdict == "done":
+            state.status = "done"
+            save_goal(self.session_id, state)
+            return {
+                "status": "done",
+                "should_continue": False,
+                "continuation_prompt": None,
+                "verdict": "done",
+                "reason": reason,
+                "message": f"✓ Goal achieved: {reason}",
+            }
+
+        # Auto-pause when the judge model can't produce the expected JSON
+        # verdict N turns in a row. Points the user at the goal_judge config
+        # so they can route this side task to a model that follows the
+        # contract (e.g. google/gemini-3-flash-preview). Without this guard,
+        # weak judge models burn the entire turn budget returning prose or
+        # empty strings.
+        if state.consecutive_parse_failures >= DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES:
+            state.status = "paused"
+            state.paused_reason = (
+                f"judge model returned unparseable output {state.consecutive_parse_failures} turns in a row"
+            )
+            save_goal(self.session_id, state)
+            return {
+                "status": "paused",
+                "should_continue": False,
+                "continuation_prompt": None,
+                "verdict": "continue",
+                "reason": reason,
+                "message": (
+                    f"⏸ Goal paused — the judge model ({state.consecutive_parse_failures} turns) "
+                    "isn't returning the required JSON verdict. Route the judge to a stricter "
+                    "model in ~/.hermes/config.yaml:\n"
+                    "  auxiliary:\n"
+                    "    goal_judge:\n"
+                    "      provider: openrouter\n"
+                    "      model: google/gemini-3-flash-preview\n"
+                    "Then /goal resume to continue."
+                ),
+            }
+
+        if state.turns_used >= state.max_turns:
+            state.status = "paused"
+            state.paused_reason = f"turn budget exhausted ({state.turns_used}/{state.max_turns})"
+            save_goal(self.session_id, state)
+            return {
+                "status": "paused",
+                "should_continue": False,
+                "continuation_prompt": None,
+                "verdict": "continue",
+                "reason": reason,
+                "message": (
+                    f"⏸ Goal paused — {state.turns_used}/{state.max_turns} turns used. "
+                    "Use /goal resume to keep going, or /goal clear to stop."
+                ),
+            }
+
+        save_goal(self.session_id, state)
+        return {
+            "status": "active",
+            "should_continue": True,
+            "continuation_prompt": self.next_continuation_prompt(),
+            "verdict": "continue",
+            "reason": reason,
+            "message": (
+                f"↻ Continuing toward goal ({state.turns_used}/{state.max_turns}): {reason}"
+            ),
+        }
+
+    def next_continuation_prompt(self) -> Optional[str]:
+        if not self._state or self._state.status != "active":
+            return None
+        return CONTINUATION_PROMPT_TEMPLATE.format(goal=self._state.goal)
+
+
+__all__ = [
+    "GoalState",
+    "GoalManager",
+    "CONTINUATION_PROMPT_TEMPLATE",
+    "DEFAULT_MAX_TURNS",
+    "load_goal",
+    "save_goal",
+    "clear_goal",
+    "judge_goal",
+]
--- a/hermes_cli/hooks.py
+++ b/hermes_cli/hooks.py
@ -32,11 +32,11 @@ def hooks_command(args) -> None:
        print("Run 'hermes hooks --help' for details.")
        return

-    if sub in ("list", "ls"):
+    if sub in {"list", "ls"}:
        _cmd_list(args)
    elif sub == "test":
        _cmd_test(args)
-    elif sub in ("revoke", "remove", "rm"):
+    elif sub in {"revoke", "remove", "rm"}:
        _cmd_revoke(args)
    elif sub == "doctor":
        _cmd_doctor(args)
@ -205,7 +205,7 @@ def _cmd_test(args) -> None:

    if getattr(args, "payload_file", None):
        try:
-            custom = json.loads(Path(args.payload_file).read_text())
+            custom = json.loads(Path(args.payload_file).read_text(encoding="utf-8"))
            if isinstance(custom, dict):
                payload.update(custom)
            else:
@ -220,7 +220,7 @@ def _cmd_test(args) -> None:
    if getattr(args, "for_tool", None):
        specs = [
            s for s in specs
-            if s.event not in ("pre_tool_call", "post_tool_call")
+            if s.event not in {"pre_tool_call", "post_tool_call"}
            or s.matches_tool(args.for_tool)
        ]

--- a/hermes_cli/kanban.py
+++ b/hermes_cli/kanban.py
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
--- a/hermes_cli/kanban_diagnostics.py
+++ b/hermes_cli/kanban_diagnostics.py
@ -0,0 +1,776 @@
+"""Kanban diagnostics — structured, actionable distress signals for tasks.
+
+A ``Diagnostic`` is a machine-readable description of something that's wrong
+with a kanban task: a hallucinated card id, a spawn crash-loop, a task
+stuck blocked for too long, etc. Each one carries:
+
+* A **kind** (canonical code; UI/tests match on this).
+* A **severity** (``warning`` / ``error`` / ``critical``).
+* A **title** (one-line human description) and **detail** (longer text).
+* A list of **suggested actions** — structured entries the dashboard
+  turns into buttons and the CLI turns into hints.
+
+Rules run over (task, recent events, recent runs) and emit diagnostics.
+They are stateless and read-only — no DB writes. Callers compute
+diagnostics on demand (on ``/board`` load, ``/tasks/:id`` fetch, or
+``hermes kanban diagnostics``).
+
+Design goals:
+
+* Fixable-on-the-operator's-side signals only (missing config, phantom
+  ids, crash loop). Not "the provider returned 502 once" — that's a
+  transient runtime blip, not a diagnostic.
+* Recoverable: every diagnostic comes with at least one suggested
+  recovery action the operator can actually take from the UI.
+* Auto-clearing: when the underlying failure mode resolves (a clean
+  ``completed`` event arrives, a spawn succeeds, the task gets
+  unblocked), the diagnostic stops firing. The audit event trail stays.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Callable, Iterable, Optional
+import json
+import time
+
+
+# Severity rungs, ordered least → most urgent. The UI colors them
+# amber (warning), orange (error), red (critical). Sorted outputs put
+# critical first so operators see the worst fires at the top.
+SEVERITY_ORDER = ("warning", "error", "critical")
+
+
+@dataclass
+class DiagnosticAction:
+    """A single recovery action attached to a diagnostic.
+
+    The ``kind`` determines how both the UI and CLI render it:
+
+    * ``reclaim`` / ``reassign`` — POST to the matching /tasks/:id/*
+      endpoint; dashboard wires into the existing recovery popover.
+    * ``unblock`` — PATCH status back to ``ready`` (for stuck-blocked
+      diagnostics).
+    * ``cli_hint`` — print/copy a shell command (e.g.
+      ``hermes -p <profile> auth``). No HTTP side effect.
+    * ``open_docs`` — deep-link to the docs URL named in ``payload.url``.
+    * ``comment`` — nudge the operator to add a comment (for
+      stuck-blocked tasks that need human input).
+
+    ``suggested=True`` marks the action as the recommended first step;
+    the UI highlights it. Multiple actions can be suggested if they're
+    equally valid.
+    """
+
+    kind: str
+    label: str
+    payload: dict = field(default_factory=dict)
+    suggested: bool = False
+
+    def to_dict(self) -> dict:
+        return {
+            "kind": self.kind,
+            "label": self.label,
+            "payload": self.payload,
+            "suggested": self.suggested,
+        }
+
+
+@dataclass
+class Diagnostic:
+    """One active distress signal on a task."""
+
+    kind: str
+    severity: str  # "warning" | "error" | "critical"
+    title: str
+    detail: str
+    actions: list[DiagnosticAction] = field(default_factory=list)
+    first_seen_at: int = 0
+    last_seen_at: int = 0
+    count: int = 1
+    # Optional: the run id this diagnostic is scoped to. None = task-wide.
+    run_id: Optional[int] = None
+    # Optional structured payload for the UI (phantom ids, failure count).
+    data: dict = field(default_factory=dict)
+
+    def to_dict(self) -> dict:
+        return {
+            "kind": self.kind,
+            "severity": self.severity,
+            "title": self.title,
+            "detail": self.detail,
+            "actions": [a.to_dict() for a in self.actions],
+            "first_seen_at": self.first_seen_at,
+            "last_seen_at": self.last_seen_at,
+            "count": self.count,
+            "run_id": self.run_id,
+            "data": self.data,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Rule helpers
+# ---------------------------------------------------------------------------
+
+def _task_field(task, name, default=None):
+    """Read a field from a task regardless of representation.
+
+    Callers pass sqlite3.Row (dict-like with [] but no attribute
+    access), kanban_db.Task dataclasses (attribute access), or plain
+    dicts (both). This normalises them so rule functions don't have
+    to branch on type each time.
+    """
+    if task is None:
+        return default
+    # sqlite Row + plain dicts both support mapping access; Row also
+    # supports .keys().
+    try:
+        # Row raises IndexError if the key isn't a column in the query;
+        # dicts return default via .get. Handle both.
+        if hasattr(task, "keys") and name in task.keys():
+            return task[name]
+    except Exception:
+        pass
+    if isinstance(task, dict):
+        return task.get(name, default)
+    return getattr(task, name, default)
+
+
+def _parse_payload(ev) -> dict:
+    """Tolerate event.payload being either a dict or a JSON string."""
+    p = _task_field(ev, "payload", None)
+    if p is None:
+        return {}
+    if isinstance(p, dict):
+        return p
+    if isinstance(p, str):
+        try:
+            return json.loads(p) or {}
+        except Exception:
+            return {}
+    return {}
+
+
+def _event_kind(ev) -> str:
+    return _task_field(ev, "kind", "") or ""
+
+
+def _event_ts(ev) -> int:
+    t = _task_field(ev, "created_at", 0)
+    return int(t or 0)
+
+
+def _active_hallucination_events(
+    events: Iterable[Any],
+    kind: str,
+) -> list[Any]:
+    """Return events of ``kind`` that have no ``completed``/``edited``
+    event *strictly after* them. Walks chronologically: each clean
+    event resets the accumulator; each matching event gets appended.
+
+    Events must be sorted by id (i.e. arrival order); callers pass the
+    task's full event list which the DB already returns in that order.
+    """
+    # Events arrive sorted by id asc (chronological). Walk once, track
+    # which hallucination events are still "active" (no clean event
+    # supersedes them).
+    active: list[Any] = []
+    for ev in events:
+        k = _event_kind(ev)
+        if k in {"completed", "edited"}:
+            active.clear()
+        elif k == kind:
+            active.append(ev)
+    return active
+
+
+def _latest_clean_event_ts(events: Iterable[Any]) -> int:
+    """Timestamp of the most recent clean completion / edit event.
+
+    Kept for general "has this task ever been successfully completed"
+    lookups; hallucination rules use ``_active_hallucination_events``
+    instead because they need strict ordering.
+    """
+    latest = 0
+    for ev in events:
+        if _event_kind(ev) in {"completed", "edited"}:
+            t = _event_ts(ev)
+            latest = max(latest, t)
+    return latest
+
+
+# Standard always-available actions. Every diagnostic can offer these as
+# fallbacks regardless of kind — they're the two baseline recovery
+# primitives the kernel supports.
+def _generic_recovery_actions(task: Any, *, running: bool) -> list[DiagnosticAction]:
+    out: list[DiagnosticAction] = []
+    if running:
+        out.append(DiagnosticAction(
+            kind="reclaim",
+            label="Reclaim task",
+            payload={},
+        ))
+    out.append(DiagnosticAction(
+        kind="reassign",
+        label="Reassign to different profile",
+        payload={"reclaim_first": running},
+    ))
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Rule implementations
+# ---------------------------------------------------------------------------
+
+# Each rule takes (task, events, runs, now_ts, config) and returns
+# zero or more Diagnostic instances. ``events`` / ``runs`` are lists of
+# kanban_db.Event / kanban_db.Run (or plain dicts matching the same
+# shape — for test convenience).
+
+RuleFn = Callable[[Any, list[Any], list[Any], int, dict], list[Diagnostic]]
+
+
+def _rule_hallucinated_cards(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """Blocked-hallucination gate fires: a worker called kanban_complete
+    with created_cards that didn't exist or weren't created by the
+    completing profile. Task stayed in its prior state; the operator
+    needs to decide how to proceed.
+
+    Auto-clears when a successful completion (or edit) follows the
+    blocked event.
+    """
+    hits = _active_hallucination_events(events, "completion_blocked_hallucination")
+    if not hits:
+        return []
+    phantom_ids: list[str] = []
+    first = _event_ts(hits[0])
+    last = _event_ts(hits[-1])
+    for ev in hits:
+        payload = _parse_payload(ev)
+        for pid in payload.get("phantom_cards", []) or []:
+            if pid not in phantom_ids:
+                phantom_ids.append(pid)
+    running = _task_field(task, "status") == "running"
+    actions: list[DiagnosticAction] = []
+    actions.append(DiagnosticAction(
+        kind="comment",
+        label="Add a comment explaining what to do",
+        suggested=False,
+    ))
+    actions.extend(_generic_recovery_actions(task, running=running))
+    return [Diagnostic(
+        kind="hallucinated_cards",
+        severity="error",
+        title="Worker claimed cards that don't exist",
+        detail=(
+            f"The completing worker declared created_cards that either didn't "
+            f"exist or weren't created by its profile. The completion was "
+            f"blocked and the task stayed in its prior state. "
+            f"Usually means the worker hallucinated ids instead of capturing "
+            f"return values from kanban_create."
+        ),
+        actions=actions,
+        first_seen_at=first,
+        last_seen_at=last,
+        count=len(hits),
+        data={"phantom_ids": phantom_ids},
+    )]
+
+
+def _rule_prose_phantom_refs(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """Advisory prose-scan: the completion summary mentions ``t_<hex>``
+    ids that don't resolve. Non-blocking; surfaced as a warning only.
+
+    Auto-clears when a fresh clean completion arrives AFTER the
+    suspected event.
+    """
+    hits = _active_hallucination_events(events, "suspected_hallucinated_references")
+    if not hits:
+        return []
+    phantom_refs: list[str] = []
+    for ev in hits:
+        for pid in _parse_payload(ev).get("phantom_refs", []) or []:
+            if pid not in phantom_refs:
+                phantom_refs.append(pid)
+    running = _task_field(task, "status") == "running"
+    return [Diagnostic(
+        kind="prose_phantom_refs",
+        severity="warning",
+        title="Completion summary references unknown task ids",
+        detail=(
+            "The completion summary mentions task ids that don't resolve "
+            "in this board's database. The completion itself succeeded, "
+            "but downstream consumers parsing the summary may be pointed "
+            "at cards that never existed."
+        ),
+        actions=_generic_recovery_actions(task, running=running),
+        first_seen_at=_event_ts(hits[0]),
+        last_seen_at=_event_ts(hits[-1]),
+        count=len(hits),
+        data={"phantom_refs": phantom_refs},
+    )]
+
+
+def _rule_repeated_failures(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """Task's unified ``consecutive_failures`` counter is climbing —
+    something about this task+profile combo is broken and each retry
+    fails the same way. Triggers regardless of the specific failure
+    mode (spawn error, timeout, crash) because operationally they
+    all look the same: the kernel keeps retrying and the operator
+    needs to intervene.
+
+    Threshold: cfg["failure_threshold"] (default 3). A threshold of 3
+    is one below the circuit-breaker's default (5), so the diagnostic
+    surfaces BEFORE the breaker trips — giving operators a window to
+    fix the problem while the dispatcher's still retrying.
+
+    Accepts the legacy ``spawn_failure_threshold`` config key for
+    back-compat.
+    """
+    threshold = int(cfg.get(
+        "failure_threshold",
+        cfg.get("spawn_failure_threshold", 3),
+    ))
+    # Read the new unified counter name, with a fallback to the legacy
+    # column name so this rule keeps working against old DB rows the
+    # caller somehow materialised without running the migration.
+    failures = (
+        _task_field(task, "consecutive_failures", None)
+        if _task_field(task, "consecutive_failures", None) is not None
+        else _task_field(task, "spawn_failures", 0)
+    )
+    if failures is None or failures < threshold:
+        return []
+    last_err = (
+        _task_field(task, "last_failure_error", None)
+        if _task_field(task, "last_failure_error", None) is not None
+        else _task_field(task, "last_spawn_error", None)
+    )
+    assignee = _task_field(task, "assignee")
+
+    # Classify the most recent failure by peeking at run outcomes so
+    # the title + suggested action can be specific without a separate
+    # per-outcome rule.
+    ordered_runs = sorted(runs, key=lambda r: _task_field(r, "id", 0))
+    most_recent_outcome = None
+    for r in reversed(ordered_runs):
+        oc = _task_field(r, "outcome")
+        if oc in {"spawn_failed", "timed_out", "crashed"}:
+            most_recent_outcome = oc
+            break
+
+    actions: list[DiagnosticAction] = []
+    if most_recent_outcome == "spawn_failed" and assignee and assignee != "default":
+        # Spawn is failing specifically — profile setup issue.
+        actions.append(DiagnosticAction(
+            kind="cli_hint",
+            label=f"Verify profile: hermes -p {assignee} doctor",
+            payload={"command": f"hermes -p {assignee} doctor"},
+            suggested=True,
+        ))
+        actions.append(DiagnosticAction(
+            kind="cli_hint",
+            label=f"Fix profile auth: hermes -p {assignee} auth",
+            payload={"command": f"hermes -p {assignee} auth"},
+        ))
+    elif most_recent_outcome in {"timed_out", "crashed"}:
+        # Worker got off the ground but died. Logs are the right place
+        # to diagnose; reclaim/reassign are the recovery levers.
+        task_id = _task_field(task, "id")
+        if task_id:
+            actions.append(DiagnosticAction(
+                kind="cli_hint",
+                label=f"Check logs: hermes kanban log {task_id}",
+                payload={"command": f"hermes kanban log {task_id}"},
+                suggested=True,
+            ))
+    actions.extend(_generic_recovery_actions(
+        task, running=_task_field(task, "status") == "running",
+    ))
+
+    severity = "critical" if failures >= threshold * 2 else "error"
+    err_text = (last_err or "").strip() if last_err else ""
+    err_snippet = err_text[:500] + ("…" if len(err_text) > 500 else "") if err_text else ""
+    outcome_label = {
+        "spawn_failed": "spawn",
+        "timed_out": "timeout",
+        "crashed": "crash",
+    }.get(most_recent_outcome or "", "failure")
+    if err_snippet:
+        title = f"Agent {outcome_label} x{failures}: {err_snippet.splitlines()[0][:160]}"
+        detail = (
+            f"This task has failed {failures} times in a row "
+            f"(most recent: {outcome_label}). Full last error:\n\n"
+            f"{err_snippet}\n\n"
+            f"The dispatcher will keep retrying until the consecutive-"
+            f"failures counter trips the circuit breaker (default 5), "
+            f"at which point the task auto-blocks. Fix the root cause "
+            f"and reclaim to retry."
+        )
+    else:
+        title = f"Agent {outcome_label} x{failures} (no error recorded)"
+        detail = (
+            f"This task has failed {failures} times in a row "
+            f"(most recent: {outcome_label}) but no error text was "
+            f"captured. Check the suggested command or the worker log."
+        )
+    return [Diagnostic(
+        kind="repeated_failures",
+        severity=severity,
+        title=title,
+        detail=detail,
+        actions=actions,
+        first_seen_at=now,
+        last_seen_at=now,
+        count=failures,
+        data={
+            "consecutive_failures": failures,
+            "most_recent_outcome": most_recent_outcome,
+            "last_error": last_err,
+        },
+    )]
+
+
+def _rule_repeated_crashes(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """The worker spawns fine but keeps crashing mid-run. Check the last
+    N runs' outcomes; N consecutive ``crashed`` without a successful
+    ``completed`` means something about the task + profile combo is
+    broken (OOM, missing dependency, tool it needs is down).
+
+    Threshold: cfg["crash_threshold"] (default 2).
+
+    Narrower than ``repeated_failures`` — fires earlier (2 crashes vs 3
+    total failures) so the operator gets a crash-specific heads-up
+    before the unified rule kicks in. Suppresses itself when the
+    unified rule is also about to fire, to avoid double-flagging.
+    """
+    failure_threshold = int(cfg.get(
+        "failure_threshold",
+        cfg.get("spawn_failure_threshold", 3),
+    ))
+    unified_counter = (
+        _task_field(task, "consecutive_failures", 0) or 0
+    )
+    # Unified rule will catch this — let it handle to avoid double fire.
+    if unified_counter >= failure_threshold:
+        return []
+
+    threshold = int(cfg.get("crash_threshold", 2))
+    ordered = sorted(runs, key=lambda r: _task_field(r, "id", 0))
+    # Count trailing consecutive 'crashed' outcomes.
+    consecutive = 0
+    last_err = None
+    for r in reversed(ordered):
+        outcome = _task_field(r, "outcome")
+        if outcome == "crashed":
+            consecutive += 1
+            if last_err is None:
+                last_err = _task_field(r, "error")
+        elif outcome in {"completed", "reclaimed"}:
+            # A success (or manual reclaim) breaks the streak.
+            break
+        else:
+            # Other outcomes (timed_out, blocked, spawn_failed, gave_up)
+            # aren't crash signals — don't count them, but they also
+            # don't break the crash streak.
+            continue
+    if consecutive < threshold:
+        return []
+    task_id = _task_field(task, "id")
+    actions: list[DiagnosticAction] = []
+    if task_id:
+        actions.append(DiagnosticAction(
+            kind="cli_hint",
+            label=f"Check logs: hermes kanban log {task_id}",
+            payload={"command": f"hermes kanban log {task_id}"},
+            suggested=True,
+        ))
+    running = _task_field(task, "status") == "running"
+    actions.extend(_generic_recovery_actions(task, running=running))
+    severity = "critical" if consecutive >= threshold * 2 else "error"
+    # Put the actual error up-front so operators see WHAT broke without
+    # having to open the logs. Truncate defensively — these can be huge
+    # (full tracebacks).
+    err_text = (last_err or "").strip() if last_err else ""
+    err_snippet = err_text[:500] + ("…" if len(err_text) > 500 else "") if err_text else ""
+    if err_snippet:
+        title = f"Agent crashed {consecutive}x: {err_snippet.splitlines()[0][:160]}"
+        detail = (
+            f"The last {consecutive} runs ended with outcome=crashed. "
+            f"Full last error:\n\n{err_snippet}"
+        )
+    else:
+        title = f"Agent crashed {consecutive}x (no error recorded)"
+        detail = (
+            f"The last {consecutive} runs ended with outcome=crashed but "
+            f"no error text was captured. Check the worker log for more."
+        )
+    return [Diagnostic(
+        kind="repeated_crashes",
+        severity=severity,
+        title=title,
+        detail=detail,
+        actions=actions,
+        first_seen_at=now,
+        last_seen_at=now,
+        count=consecutive,
+        data={"consecutive_crashes": consecutive, "last_error": last_err},
+    )]
+
+
+def _rule_stuck_in_blocked(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """Task has been in ``blocked`` status for too long without a comment.
+
+    Threshold: cfg["blocked_stale_hours"] (default 24).
+    Surfaced as a warning so humans know there's a pending unblock.
+    """
+    hours = float(cfg.get("blocked_stale_hours", 24))
+    status = _task_field(task, "status")
+    if status != "blocked":
+        return []
+    # Find the most recent ``blocked`` event.
+    last_blocked_ts = 0
+    for ev in events:
+        if _event_kind(ev) == "blocked":
+            t = _event_ts(ev)
+            last_blocked_ts = max(last_blocked_ts, t)
+    if last_blocked_ts == 0:
+        return []
+    age_hours = (now - last_blocked_ts) / 3600.0
+    if age_hours < hours:
+        return []
+    # Any comment / unblock after the block breaks the "stale" signal.
+    for ev in events:
+        if _event_kind(ev) in {"commented", "unblocked"} and _event_ts(ev) > last_blocked_ts:
+            return []
+    actions: list[DiagnosticAction] = [
+        DiagnosticAction(
+            kind="comment",
+            label="Add a comment / unblock the task",
+            suggested=True,
+        ),
+    ]
+    return [Diagnostic(
+        kind="stuck_in_blocked",
+        severity="warning",
+        title=f"Task has been blocked for {int(age_hours)}h",
+        detail=(
+            f"This task transitioned to blocked {int(age_hours)}h ago and "
+            f"has had no comments or unblock attempts since. Blocked tasks "
+            f"are waiting for human input — check the block reason and "
+            f"either unblock with feedback or answer with a comment."
+        ),
+        actions=actions,
+        first_seen_at=last_blocked_ts,
+        last_seen_at=last_blocked_ts,
+        count=1,
+        data={"blocked_at": last_blocked_ts, "age_hours": round(age_hours, 1)},
+    )]
+
+
+def _rule_stranded_in_ready(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """Task has been in ``ready`` status for too long without any worker
+    claiming it.
+
+    Threshold: cfg["stranded_threshold_seconds"] (default 1800 = 30 min).
+
+    Catches every "task waiting for a worker that never comes" case
+    without caring WHY:
+
+    * Operator typo'd the assignee — no profile or external worker matches.
+    * Profile was deleted, leaving its tasks stranded.
+    * External worker pool (Codex CLI, Claude Code lane, custom daemon)
+      is down, hung, or wasn't started.
+    * Dispatcher is misconfigured (wrong board, wrong HERMES_HOME).
+
+    Pre-rule, all of these silently rotted in ``skipped_nonspawnable`` —
+    the dispatcher correctly skipped them (good — no respawn loop) but
+    nobody surfaced the fact that operator-actionable work was
+    accumulating. The rule fires when a ready task's promoted-to-ready
+    timestamp is older than the threshold AND the assignee is non-empty
+    (truly unassigned tasks have their own ``skipped_unassigned`` signal
+    on the dispatcher and a different operator response).
+
+    The signal is age-based on purpose: it's identity-agnostic, so it
+    works for Hermes profiles, registered lanes, external workers, and
+    typos uniformly. No registry to curate, no per-board allowlist.
+    """
+    threshold_seconds = float(
+        cfg.get("stranded_threshold_seconds", 30 * 60)
+    )
+    status = _task_field(task, "status")
+    if status != "ready":
+        return []
+    # Skip tasks with a live claim — they're being worked on, even if
+    # the worker hasn't reported progress yet (run-level liveness
+    # extends the claim TTL; we don't want to second-guess that here).
+    if _task_field(task, "claim_lock"):
+        return []
+    assignee = _task_field(task, "assignee") or ""
+    if not assignee.strip():
+        # Unassigned tasks: the dispatcher's ``skipped_unassigned`` is
+        # already the right signal. A separate diagnostic here would
+        # double-flag the same condition.
+        return []
+
+    # Find the most recent event that put this task into ready.
+    # ``created`` covers tasks born ready; ``promoted`` covers parent-
+    # done auto-promotion; ``reclaimed`` covers TTL/crash recovery;
+    # ``unblocked`` covers human-driven resumes.
+    READY_TRANSITION_KINDS = {
+        "created", "promoted", "reclaimed", "unblocked",
+    }
+    last_ready_ts = 0
+    for ev in events:
+        if _event_kind(ev) in READY_TRANSITION_KINDS:
+            t = _event_ts(ev)
+            last_ready_ts = max(last_ready_ts, t)
+
+    # Fallback: if no qualifying event exists (very old task or events
+    # truncated), fall back to ``created_at`` on the task row. Better
+    # to occasionally over-flag an ancient task than miss a stranded one.
+    if last_ready_ts == 0:
+        last_ready_ts = int(_task_field(task, "created_at", default=0) or 0)
+    if last_ready_ts == 0:
+        return []
+
+    age_seconds = now - last_ready_ts
+    if age_seconds < threshold_seconds:
+        return []
+
+    # Format the age in the largest sensible unit.
+    if age_seconds >= 3600:
+        age_str = f"{age_seconds / 3600:.1f}h"
+    else:
+        age_str = f"{int(age_seconds / 60)}m"
+
+    # Severity escalates with age. Below 2x threshold = warning;
+    # 2x – 6x = error; beyond 6x = critical (something is clearly
+    # broken, not just slow).
+    if age_seconds >= threshold_seconds * 6:
+        severity = "critical"
+    elif age_seconds >= threshold_seconds * 2:
+        severity = "error"
+    else:
+        severity = "warning"
+
+    actions = [
+        DiagnosticAction(
+            kind="reassign",
+            label="Reassign to a different worker",
+            payload={"current_assignee": assignee},
+        ),
+        DiagnosticAction(
+            kind="cli_hint",
+            label="Check dispatcher status",
+            payload={"command": "hermes kanban diagnostics"},
+        ),
+    ]
+
+    return [Diagnostic(
+        kind="stranded_in_ready",
+        severity=severity,
+        title=f"Ready for {age_str} with no worker",
+        detail=(
+            f"This task has been ready for {age_str} but nothing has "
+            f"claimed it. Common causes: assignee {assignee!r} is "
+            f"misspelled, the profile was deleted, or the external "
+            f"worker pool for this lane is down. Confirm the assignee "
+            f"is correct and that a worker is actually polling for it."
+        ),
+        actions=actions,
+        first_seen_at=last_ready_ts,
+        last_seen_at=last_ready_ts,
+        count=1,
+        data={
+            "ready_since": last_ready_ts,
+            "age_seconds": int(age_seconds),
+            "assignee": assignee,
+            "threshold_seconds": int(threshold_seconds),
+        },
+    )]
+
+
+# Registry — order matters: rules higher on the list render first when
+# severity ties. Add new rules here.
+_RULES: list[RuleFn] = [
+    _rule_hallucinated_cards,
+    _rule_prose_phantom_refs,
+    _rule_repeated_failures,
+    _rule_repeated_crashes,
+    _rule_stuck_in_blocked,
+    _rule_stranded_in_ready,
+]
+
+
+# Known kinds (for the UI's filter / legend / i18n keys). Update when
+# rules are added.
+DIAGNOSTIC_KINDS = (
+    "hallucinated_cards",
+    "prose_phantom_refs",
+    "repeated_failures",
+    "repeated_crashes",
+    "stuck_in_blocked",
+    "stranded_in_ready",
+)
+
+
+DEFAULT_CONFIG = {
+    "failure_threshold": 3,
+    # Legacy alias accepted at read time by _rule_repeated_failures.
+    "spawn_failure_threshold": 3,
+    "crash_threshold": 2,
+    "blocked_stale_hours": 24,
+    # Stranded-task threshold. 30 min by default — below that, the
+    # signal is dominated by tasks that are about to be claimed on the
+    # next dispatcher tick (default 60s) and would just be noise.
+    "stranded_threshold_seconds": 30 * 60,
+}
+
+
+def compute_task_diagnostics(
+    task,
+    events: list,
+    runs: list,
+    *,
+    now: Optional[int] = None,
+    config: Optional[dict] = None,
+) -> list[Diagnostic]:
+    """Run every rule against a single task's state and return a
+    severity-sorted list of active diagnostics.
+
+    Sorting: critical first, then error, then warning; ties broken by
+    most-recent ``last_seen_at``.
+    """
+    now_ts = int(now if now is not None else time.time())
+    cfg = {**DEFAULT_CONFIG, **(config or {})}
+    out: list[Diagnostic] = []
+    for rule in _RULES:
+        try:
+            out.extend(rule(task, events, runs, now_ts, cfg))
+        except Exception:
+            # A broken rule must never crash the dashboard. Rule bugs
+            # get caught in tests; in production we'd rather drop the
+            # diagnostic than 500 a whole /board request.
+            continue
+    severity_idx = {s: i for i, s in enumerate(SEVERITY_ORDER)}
+    out.sort(
+        key=lambda d: (
+            -severity_idx.get(d.severity, -1),
+            -(d.last_seen_at or 0),
+        )
+    )
+    return out
+
+
+def severity_of_highest(diagnostics: Iterable[Diagnostic]) -> Optional[str]:
+    """Highest severity present in the list, or None if empty. Useful
+    for card badges that need a single color."""
+    highest_idx = -1
+    highest = None
+    for d in diagnostics:
+        idx = SEVERITY_ORDER.index(d.severity) if d.severity in SEVERITY_ORDER else -1
+        if idx > highest_idx:
+            highest_idx = idx
+            highest = d.severity
+    return highest
--- a/hermes_cli/kanban_specify.py
+++ b/hermes_cli/kanban_specify.py
@ -0,0 +1,265 @@
+"""Kanban triage specifier — flesh out a one-liner into a real spec.
+
+Used by ``hermes kanban specify [task_id | --all]``. Takes a task that
+lives in the Triage column (a rough idea, typically only a title), calls
+the auxiliary LLM to produce:
+
+  * A tightened title (optional — only replaces if the model proposes a
+    materially different one)
+  * A concrete body: goal, proposed approach, acceptance criteria
+
+and then flips the task ``triage -> todo`` via
+``kanban_db.specify_triage_task``. The dispatcher promotes it to
+``ready`` on its next tick (or immediately if there are no open parents).
+
+Design notes
+------------
+
+* This module intentionally mirrors ``hermes_cli/goals.py`` — same aux
+  client pattern, same "empty config => skip, don't crash" tolerance.
+  Keeps the surface area tiny and the failure modes predictable.
+
+* The prompt is a short system + user pair. We ask for JSON with
+  ``{title, body}``; if parsing fails, we fall back to treating the
+  whole response as the body and leave the title untouched. No
+  retry loop — one shot, keep cost bounded.
+
+* Structured output / JSON mode is not requested explicitly so the
+  specifier works on providers that don't implement it. The parse
+  is lenient (tolerates markdown code fences around the JSON).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass
+from typing import Optional
+
+from hermes_cli import kanban_db as kb
+
+logger = logging.getLogger(__name__)
+
+
+_SYSTEM_PROMPT = """You are the Kanban triage specifier for the Hermes Agent board.
+A user dropped a rough idea into the Triage column. Your job is to turn it
+into a concrete, actionable task spec that an autonomous worker can pick up
+and execute without further clarification.
+
+Output a single JSON object with exactly two keys:
+
+  {
+    "title": "<tightened task title, <= 80 chars, imperative voice>",
+    "body":  "<multi-line spec, see structure below>"
+  }
+
+The body MUST include these sections, each prefixed with a bold markdown
+heading, in this order:
+
+  **Goal** — one sentence, user-facing outcome.
+  **Approach** — 2-5 bullets on how a worker should tackle it.
+  **Acceptance criteria** — checklist of concrete, verifiable conditions.
+  **Out of scope** — short list of things NOT to touch (omit if nothing
+      obvious; never invent scope creep).
+
+Rules:
+  - Keep the tightened title close in meaning to the original idea — do
+    NOT invent a different project.
+  - If the original idea is already detailed, preserve its substance and
+    just reformat into the sections above.
+  - Never add invented requirements the user didn't hint at.
+  - No preamble, no closing remarks, no code fences around the JSON.
+  - Output only the JSON object and nothing else.
+"""
+
+
+_USER_TEMPLATE = """Task id: {task_id}
+Current title: {title}
+Current body:
+{body}
+"""
+
+
+@dataclass
+class SpecifyOutcome:
+    """Result of specifying a single triage task."""
+
+    task_id: str
+    ok: bool
+    reason: str = ""
+    new_title: Optional[str] = None
+
+
+def _truncate(text: str, limit: int) -> str:
+    if len(text) <= limit:
+        return text
+    return text[: limit - 1] + "…"
+
+
+_FENCE_RE = re.compile(r"^\s*```(?:json)?\s*|\s*```\s*$", re.IGNORECASE)
+
+
+def _extract_json_blob(raw: str) -> Optional[dict]:
+    """Lenient JSON extraction — tolerates fenced code blocks and
+    leading/trailing whitespace. Returns None if nothing parses."""
+    if not raw:
+        return None
+    stripped = _FENCE_RE.sub("", raw.strip())
+    # Greedy: find the first `{` and last `}` and try that slice.
+    first = stripped.find("{")
+    last = stripped.rfind("}")
+    if first == -1 or last == -1 or last <= first:
+        return None
+    candidate = stripped[first : last + 1]
+    try:
+        val = json.loads(candidate)
+    except (ValueError, json.JSONDecodeError):
+        return None
+    if not isinstance(val, dict):
+        return None
+    return val
+
+
+def _profile_author() -> str:
+    """Mirror of ``hermes_cli.kanban._profile_author``. Kept local to
+    avoid a circular import when kanban.py imports this module."""
+    return (
+        os.environ.get("HERMES_PROFILE")
+        or os.environ.get("USER")
+        or "specifier"
+    )
+
+
+def specify_task(
+    task_id: str,
+    *,
+    author: Optional[str] = None,
+    timeout: Optional[int] = None,
+) -> SpecifyOutcome:
+    """Specify a single triage task and promote it to ``todo``.
+
+    Returns an outcome describing what happened. Never raises for expected
+    failure modes (task not in triage, no aux client configured, API
+    error, malformed response) — those surface via ``ok=False`` so the
+    ``--all`` sweep can continue past individual failures.
+    """
+    with kb.connect() as conn:
+        task = kb.get_task(conn, task_id)
+    if task is None:
+        return SpecifyOutcome(task_id, False, "unknown task id")
+    if task.status != "triage":
+        return SpecifyOutcome(
+            task_id, False, f"task is not in triage (status={task.status!r})"
+        )
+
+    try:
+        from agent.auxiliary_client import get_text_auxiliary_client
+    except Exception as exc:  # pragma: no cover — import smoke test
+        logger.debug("specify: auxiliary client import failed: %s", exc)
+        return SpecifyOutcome(task_id, False, "auxiliary client unavailable")
+
+    try:
+        client, model = get_text_auxiliary_client("triage_specifier")
+    except Exception as exc:
+        logger.debug("specify: get_text_auxiliary_client failed: %s", exc)
+        return SpecifyOutcome(task_id, False, "auxiliary client unavailable")
+
+    if client is None or not model:
+        return SpecifyOutcome(
+            task_id, False, "no auxiliary client configured"
+        )
+
+    user_msg = _USER_TEMPLATE.format(
+        task_id=task.id,
+        title=_truncate(task.title or "", 400),
+        body=_truncate(task.body or "(no body)", 4000),
+    )
+
+    try:
+        resp = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": _SYSTEM_PROMPT},
+                {"role": "user", "content": user_msg},
+            ],
+            temperature=0.3,
+            max_tokens=1500,
+            timeout=timeout or 120,
+        )
+    except Exception as exc:
+        logger.info(
+            "specify: API call failed for %s (%s) — skipping",
+            task_id, exc,
+        )
+        return SpecifyOutcome(
+            task_id, False, f"LLM error: {type(exc).__name__}"
+        )
+
+    try:
+        raw = resp.choices[0].message.content or ""
+    except Exception:
+        raw = ""
+
+    parsed = _extract_json_blob(raw)
+
+    new_title: Optional[str]
+    new_body: Optional[str]
+    if parsed is None:
+        # Fall back: treat the whole reply as the body, leave title as-is.
+        # Worst case the user edits afterward — still better than stranding
+        # the task in triage on a malformed LLM reply.
+        stripped_raw = raw.strip()
+        if not stripped_raw:
+            return SpecifyOutcome(
+                task_id, False, "LLM returned an empty response"
+            )
+        new_title = None
+        new_body = stripped_raw
+    else:
+        title_val = parsed.get("title")
+        body_val = parsed.get("body")
+        new_title = (
+            title_val.strip()
+            if isinstance(title_val, str) and title_val.strip()
+            else None
+        )
+        new_body = (
+            body_val if isinstance(body_val, str) and body_val.strip() else None
+        )
+        if new_body is None and new_title is None:
+            return SpecifyOutcome(
+                task_id, False, "LLM response missing title and body"
+            )
+
+    with kb.connect() as conn:
+        ok = kb.specify_triage_task(
+            conn,
+            task_id,
+            title=new_title,
+            body=new_body,
+            author=author or _profile_author(),
+        )
+    if not ok:
+        # Race: someone else promoted / archived the task between our
+        # read above and the write. Report, don't crash.
+        return SpecifyOutcome(
+            task_id, False, "task moved out of triage before promotion"
+        )
+    return SpecifyOutcome(task_id, True, "specified", new_title=new_title)
+
+
+def list_triage_ids(*, tenant: Optional[str] = None) -> list[str]:
+    """Return task ids currently in the triage column.
+
+    ``tenant`` narrows the sweep; ``None`` returns every triage task.
+    """
+    with kb.connect() as conn:
+        tasks = kb.list_tasks(
+            conn,
+            status="triage",
+            tenant=tenant,
+            include_archived=False,
+        )
+    return [t.id for t in tasks]
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
--- a/hermes_cli/mcp_config.py
+++ b/hermes_cli/mcp_config.py
@ -31,7 +31,12 @@ logger = logging.getLogger(__name__)
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")


-_MCP_PRESETS: Dict[str, Dict[str, Any]] = {}
+_MCP_PRESETS: Dict[str, Dict[str, Any]] = {
+    "codex": {
+        "command": "codex",
+        "args": ["mcp-server"],
+    },
+}


 # ─── UI Helpers ───────────────────────────────────────────────────────────────
@ -58,7 +63,7 @@ def _confirm(question: str, default: bool = True) -> bool:
        return default
    if not val:
        return default
-    return val in ("y", "yes")
+    return val in {"y", "yes"}


 def _prompt(question: str, *, password: bool = False, default: str = "") -> str:
@ -221,7 +226,10 @@ def cmd_mcp_add(args):
    """Add a new MCP server with discovery-first tool selection."""
    name = args.name
    url = getattr(args, "url", None)
-    command = getattr(args, "command", None)
+    # Read from `mcp_command` (set by --command via explicit dest) — see
+    # mcp_add_p.add_argument("--command", dest="mcp_command", ...) in
+    # hermes_cli/main.py for why the dest is renamed.
+    command = getattr(args, "mcp_command", None)
    cmd_args = getattr(args, "args", None) or []
    auth_type = getattr(args, "auth", None)
    preset_name = getattr(args, "preset", None)
@ -367,11 +375,11 @@ def cmd_mcp_add(args):
        _info("Cancelled.")
        return

-    if choice in ("n", "no"):
+    if choice in {"n", "no"}:
        _info("Cancelled — server not saved.")
        return

-    if choice in ("s", "select"):
+    if choice in {"s", "select"}:
        # Interactive tool selection
        from hermes_cli.curses_ui import curses_checklist

@ -501,7 +509,7 @@ def cmd_mcp_list(args=None):
        # Enabled status
        enabled = cfg.get("enabled", True)
        if isinstance(enabled, str):
-            enabled = enabled.lower() in ("true", "1", "yes")
+            enabled = enabled.lower() in {"true", "1", "yes"}
        status = color("✓ enabled", Colors.GREEN) if enabled else color("✗ disabled", Colors.DIM)

        print(f"  {name:<16} {transport:<30} {tools_str:<12} {status}")
--- a/hermes_cli/memory_setup.py
+++ b/hermes_cli/memory_setup.py
@ -69,7 +69,7 @@ def _install_dependencies(provider_name: str) -> None:

    try:
        import yaml
-        with open(yaml_path) as f:
+        with open(yaml_path, encoding="utf-8") as f:
            meta = yaml.safe_load(f) or {}
    except Exception:
        return
@ -361,7 +361,7 @@ def _write_env_vars(env_path: Path, env_writes: dict) -> None:

    existing_lines = []
    if env_path.exists():
-        existing_lines = env_path.read_text().splitlines()
+        existing_lines = env_path.read_text(encoding="utf-8").splitlines()

    updated_keys = set()
    new_lines = []
@ -377,7 +377,7 @@ def _write_env_vars(env_path: Path, env_writes: dict) -> None:
        if key not in updated_keys:
            new_lines.append(f"{key}={val}")

-    env_path.write_text("\n".join(new_lines) + "\n")
+    env_path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")


 # ---------------------------------------------------------------------------
--- a/hermes_cli/model_catalog.py
+++ b/hermes_cli/model_catalog.py
@ -173,7 +173,7 @@ def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
    except (OSError, FileNotFoundError):
        return (None, 0.0)
    try:
-        with open(path) as fh:
+        with open(path, encoding="utf-8") as fh:
            data = json.load(fh)
    except (OSError, json.JSONDecodeError):
        return (None, 0.0)
@ -187,7 +187,7 @@ def _write_disk_cache(data: dict[str, Any]) -> None:
    try:
        path.parent.mkdir(parents=True, exist_ok=True)
        tmp = path.with_suffix(path.suffix + ".tmp")
-        with open(tmp, "w") as fh:
+        with open(tmp, "w", encoding="utf-8") as fh:
            json.dump(data, fh, indent=2)
            fh.write("\n")
        atomic_replace(tmp, path)
--- a/hermes_cli/model_normalize.py
+++ b/hermes_cli/model_normalize.py
@ -393,14 +393,21 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
    if provider in _AGGREGATOR_PROVIDERS:
        return _prepend_vendor(name)

-    # --- OpenCode Zen: Claude stays hyphenated; other models keep dots ---
-    if provider == "opencode-zen":
-        bare = _strip_matching_provider_prefix(name, provider)
-        if "/" in bare:
-            return bare
-        if bare.lower().startswith("claude-"):
-            return _dots_to_hyphens(bare)
-        return bare
+    # --- OpenCode Zen / OpenCode Go: flat-namespace resellers.
+    #     Their /v1/models API returns bare IDs only (no vendor prefix), and
+    #     the inference endpoint rejects vendor-prefixed names with HTTP 401
+    #     "Model not supported".  Strip ANY leading ``vendor/`` so config
+    #     entries like ``minimax/minimax-m2.7`` or ``deepseek/deepseek-v4-flash``
+    #     — commonly copied from aggregator slugs into fallback_model lists —
+    #     resolve to bare ``minimax-m2.7`` / ``deepseek-v4-flash`` the API
+    #     actually serves.  See PR reviewing opencode-go fallback 401s. ---
+    if provider in {"opencode-zen", "opencode-go"}:
+        if "/" in name:
+            _, bare_after_slash = name.split("/", 1)
+            name = bare_after_slash.strip() or name
+        if provider == "opencode-zen" and name.lower().startswith("claude-"):
+            return _dots_to_hyphens(name)
+        return name

    # --- Anthropic: strip matching provider prefix, dots -> hyphens ---
    if provider in _DOT_TO_HYPHEN_PROVIDERS:
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@ -190,11 +190,18 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:
            model: "minimax-m2.7"
            provider: custom
            base_url: "https://ollama.com/v1"
+
+    Also reads ``model.aliases`` (set by ``hermes config set model.aliases.xxx``)
+    and converts simple string entries (``ds-flash: deepseek/deepseek-v4-flash``)
+    into DirectAlias objects.  The provider is parsed from the ``provider/``
+    prefix in the value; if no slash, the current provider is used.
    """
    merged = dict(_BUILTIN_DIRECT_ALIASES)
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
+
+        # --- model_aliases (dict-based format) ---
        user_aliases = cfg.get("model_aliases")
        if isinstance(user_aliases, dict):
            for name, entry in user_aliases.items():
@ -207,6 +214,30 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:
                    merged[name.strip().lower()] = DirectAlias(
                        model=model, provider=provider, base_url=base_url,
                    )
+
+        # --- model.aliases (string-based format, from config set) ---
+        model_section = cfg.get("model", {})
+        if isinstance(model_section, dict):
+            simple_aliases = model_section.get("aliases")
+            if isinstance(simple_aliases, dict):
+                current_provider = model_section.get("provider", "")
+                for name, value in simple_aliases.items():
+                    if not isinstance(value, str) or not value.strip():
+                        continue
+                    key = name.strip().lower()
+                    if key in merged:
+                        continue  # don't override explicit model_aliases entries
+                    val = value.strip()
+                    if "/" in val:
+                        provider, model = val.split("/", 1)
+                    else:
+                        provider = current_provider
+                        model = val
+                    merged[key] = DirectAlias(
+                        model=model.strip(),
+                        provider=provider.strip() or current_provider,
+                        base_url="",
+                    )
    except Exception:
        pass
    return merged
@ -768,6 +799,12 @@ def switch_model(
                        )

        # --- Step d: Aggregator catalog search ---
+        # Track whether the live catalog of the CURRENT provider resolved the
+        # model — if so, step e must not second-guess and switch providers.
+        # Critical for flat-namespace resellers like opencode-go / opencode-zen
+        # whose live /v1/models returns bare IDs (e.g. "deepseek-v4-flash") that
+        # coincidentally match entries in native providers' static catalogs.
+        resolved_in_current_catalog = False
        if is_aggregator(target_provider) and not resolved_alias:
            catalog = list_provider_models(target_provider)
            if catalog:
@ -775,6 +812,7 @@ def switch_model(
                for mid in catalog:
                    if mid.lower() == new_model_lower:
                        new_model = mid
+                        resolved_in_current_catalog = True
                        break
                else:
                    for mid in catalog:
@ -782,11 +820,12 @@ def switch_model(
                            _, bare = mid.split("/", 1)
                            if bare.lower() == new_model_lower:
                                new_model = mid
+                                resolved_in_current_catalog = True
                                break

        # --- Step e: detect_provider_for_model() as last resort ---
        _base = current_base_url or ""
-        is_custom = current_provider in ("custom", "local") or (
+        is_custom = current_provider in {"custom", "local"} or (
            "localhost" in _base or "127.0.0.1" in _base
        )

@ -794,6 +833,7 @@ def switch_model(
            target_provider == current_provider
            and not is_custom
            and not resolved_alias
+            and not resolved_in_current_catalog
        ):
            detected = detect_provider_for_model(new_model, current_provider)
            if detected:
@ -849,10 +889,9 @@ def switch_model(
            # "ollama-launch" that resolve_runtime_provider doesn't know), keep existing
            # credentials. Otherwise use the resolved values (picks up credential rotation,
            # base_url adjustments for OpenCode, etc.).
-            if runtime.get("provider") != "custom":
-                api_key = runtime.get("api_key", "")
-                base_url = runtime.get("base_url", "")
-                api_mode = runtime.get("api_mode", "")
+            api_key = runtime.get("api_key", "")
+            base_url = runtime.get("base_url", "")
+            api_mode = runtime.get("api_mode", "")
        except Exception:
            pass

@ -891,12 +930,37 @@ def switch_model(
    if not validation.get("accepted"):
        override = False
        if user_providers:
-            for up in user_providers:
-                if isinstance(up, dict) and up.get("provider") == target_provider:
-                    cfg_models = up.get("models", [])
-                    if new_model in cfg_models or any(
-                        m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
-                    ):
+            # user_providers is a dict: {provider_slug: config_dict}
+            for slug, cfg in user_providers.items():
+                if slug == target_provider:
+                    cfg_models = cfg.get("models", {})
+                    # Direct membership works for dict (keys) and list (strings)
+                    if new_model in cfg_models:
+                        override = True
+                        break
+                    # Also accept if models is a list of dicts with 'name' field
+                    if isinstance(cfg_models, list):
+                        if any(m.get("name") == new_model for m in cfg_models if isinstance(m, dict)):
+                            override = True
+                            break
+        # Also check custom_providers list — models declared there should be accepted
+        # even if the remote /v1/models endpoint doesn't list them.
+        if not override and custom_providers and isinstance(custom_providers, list):
+            for entry in custom_providers:
+                if not isinstance(entry, dict):
+                    continue
+                # Match by provider slug (custom:<name>) or by base_url
+                entry_name = entry.get("name", "")
+                entry_slug = f"custom:{entry_name}" if entry_name else ""
+                entry_url = entry.get("base_url", "")
+                if entry_slug == target_provider or entry_url == base_url:
+                    # Check if the requested model matches the entry's model
+                    entry_model = entry.get("model", "")
+                    entry_models = entry.get("models", {})
+                    if new_model == entry_model:
+                        override = True
+                        break
+                    if isinstance(entry_models, dict) and new_model in entry_models:
                        override = True
                        break
        if override:
@ -1015,6 +1079,7 @@ def list_authenticated_providers(
    from hermes_cli.models import (
        OPENROUTER_MODELS, _PROVIDER_MODELS,
        _MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids,
+        get_curated_nous_model_ids,
    )

    results: List[dict] = []
@ -1052,14 +1117,56 @@ def list_authenticated_providers(
        if normed:
            _builtin_endpoints.add(normed)

+    def _has_fast_aws_sdk_signal() -> bool:
+        """Return True when explicit AWS auth config is present.
+
+        This intentionally avoids botocore's full credential chain. Provider
+        picker/model-switch discovery can run for non-Bedrock providers, and
+        botocore may otherwise probe EC2 IMDS (169.254.169.254) on local
+        machines before returning no credentials.
+        """
+        if os.environ.get("AWS_BEARER_TOKEN_BEDROCK", "").strip():
+            return True
+        if (
+            os.environ.get("AWS_ACCESS_KEY_ID", "").strip()
+            and os.environ.get("AWS_SECRET_ACCESS_KEY", "").strip()
+        ):
+            return True
+        return any(
+            os.environ.get(name, "").strip()
+            for name in (
+                "AWS_PROFILE",
+                "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
+                "AWS_CONTAINER_CREDENTIALS_FULL_URI",
+                "AWS_WEB_IDENTITY_TOKEN_FILE",
+            )
+        )
+
+    def _has_aws_sdk_creds_for_listing(slug: str) -> bool:
+        """Credential check for AWS SDK providers in non-runtime discovery."""
+        slug_norm = str(slug or "").strip().lower()
+        current_norm = str(current_provider or "").strip().lower()
+        if _has_fast_aws_sdk_signal():
+            return True
+        if slug_norm != current_norm:
+            return False
+        try:
+            from agent.bedrock_adapter import has_aws_credentials
+            return bool(has_aws_credentials())
+        except Exception:
+            return False
+
    data = fetch_models_dev()

    # Build curated model lists keyed by hermes provider ID
    curated: dict[str, list[str]] = dict(_PROVIDER_MODELS)
    curated["openrouter"] = [mid for mid, _ in OPENROUTER_MODELS]
-    # "nous" shares OpenRouter's curated list if not separately defined
-    if "nous" not in curated:
-        curated["nous"] = curated["openrouter"]
+    # "nous" pulls from the remote model-catalog manifest published at
+    # https://hermes-agent.nousresearch.com/docs/api/model-catalog.json so
+    # newly added Portal models surface in the /model picker without
+    # requiring a Hermes release. Falls back to the in-repo
+    # _PROVIDER_MODELS["nous"] snapshot when the manifest is unreachable.
+    curated["nous"] = get_curated_nous_model_ids()
    # Ollama Cloud uses dynamic discovery (no static curated list)
    if "ollama-cloud" not in curated:
        from hermes_cli.models import fetch_ollama_cloud_models
@ -1179,7 +1286,9 @@ def list_authenticated_providers(

        # Check if credentials exist
        has_creds = False
-        if overlay.extra_env_vars:
+        if overlay.auth_type == "aws_sdk":
+            has_creds = _has_aws_sdk_creds_for_listing(hermes_slug)
+        elif overlay.extra_env_vars:
            has_creds = any(os.environ.get(ev) for ev in overlay.extra_env_vars)
        # Also check api_key_env_vars from PROVIDER_REGISTRY for api_key auth_type
        if not has_creds and overlay.auth_type == "api_key":
@ -1198,11 +1307,7 @@ def list_authenticated_providers(
                from hermes_cli.auth import _load_auth_store
                store = _load_auth_store()
                providers_store = store.get("providers", {})
-                pool_store = store.get("credential_pool", {})
-                if store and (
-                    pid in providers_store or hermes_slug in providers_store
-                    or pid in pool_store or hermes_slug in pool_store
-                ):
+                if store and (pid in providers_store or hermes_slug in providers_store):
                    has_creds = True
            except Exception as exc:
                logger.debug("Auth store check failed for %s: %s", pid, exc)
@ -1241,7 +1346,14 @@ def list_authenticated_providers(
        if not has_creds:
            continue

-        if hermes_slug in {"copilot", "copilot-acp"}:
+        if hermes_slug in {"openai-codex", "copilot", "copilot-acp"}:
+            # Use live OAuth-backed discovery so the gateway /model picker
+            # matches what the user's authenticated Codex/Copilot backend
+            # actually serves — including ChatGPT-Pro-only Codex slugs
+            # (e.g. gpt-5.3-codex-spark) that aren't in the static curated
+            # catalog. ``provider_model_ids()`` falls back to the curated
+            # list when the live endpoint is unreachable, so this is safe
+            # for unauthenticated and offline cases too.
            model_ids = provider_model_ids(hermes_slug)
        # For aws_sdk providers (bedrock), use live discovery so the list
        # reflects the active region (eu.*, ap.*) not the static us.* list.
@ -1298,11 +1410,7 @@ def list_authenticated_providers(
                from hermes_cli.auth import _load_auth_store
                _cp_store = _load_auth_store()
                _cp_providers_store = _cp_store.get("providers", {})
-                _cp_pool_store = _cp_store.get("credential_pool", {})
-                if _cp_store and (
-                    _cp.slug in _cp_providers_store
-                    or _cp.slug in _cp_pool_store
-                ):
+                if _cp_store and _cp.slug in _cp_providers_store:
                    _cp_has_creds = True
            except Exception:
                pass
@ -1319,11 +1427,7 @@ def list_authenticated_providers(
        # credentials come from the boto3 credential chain (env vars,
        # ~/.aws/credentials, instance roles, etc.)
        if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
-            try:
-                from agent.bedrock_adapter import has_aws_credentials
-                _cp_has_creds = has_aws_credentials()
-            except Exception:
-                pass
+            _cp_has_creds = _has_aws_sdk_creds_for_listing(_cp.slug)

        if not _cp_has_creds:
            continue
@ -1412,14 +1516,17 @@ def list_authenticated_providers(
                        models_list = list(fb)

            # Prefer the endpoint's live /models list when credentials are
-            # available. This keeps OpenAI-compatible relays (for example CRS)
-            # in sync when the server catalog changes without requiring the
-            # user to mirror every model into config.yaml.
+            # available, unless the provider explicitly opts out via
+            # discover_models: false (e.g. dedicated endpoints that expose
+            # the entire aggregator catalog via /models).
            api_key = str(ep_cfg.get("api_key", "") or "").strip()
            if not api_key:
                key_env = str(ep_cfg.get("key_env", "") or "").strip()
                api_key = os.environ.get(key_env, "").strip() if key_env else ""
-            if api_url and api_key:
+            discover = ep_cfg.get("discover_models", True)
+            if isinstance(discover, str):
+                discover = discover.lower() not in {"false", "no", "0"}
+            if api_url and api_key and discover:
                try:
                    from hermes_cli.models import fetch_api_models
                    live_models = fetch_api_models(api_key, api_url)
@ -1540,7 +1647,8 @@ def list_authenticated_providers(
                        groups[group_key]["models"].append(m)

        _section4_emitted_slugs: set = set()
-        for grp in groups.values():
+        for grp_key, grp in groups.items():
+            api_url, api_key = grp_key
            slug = grp["slug"]
            # If the slug is already claimed by a built-in / overlay /
            # user-provider row (sections 1-3), skip this custom group
@ -1578,6 +1686,18 @@ def list_authenticated_providers(
            _grp_url_norm = _pair_key[1]
            if _grp_url_norm and _grp_url_norm in _builtin_endpoints:
                continue
+            # Live model discovery from custom provider endpoints (matches
+            # Section 3 behavior for user ``providers:`` entries).
+            if api_url and api_key:
+                try:
+                    from hermes_cli.models import fetch_api_models
+
+                    live_models = fetch_api_models(api_key, api_url)
+                    if live_models:
+                        grp["models"] = live_models
+                        grp["total_models"] = len(live_models)
+                except Exception:
+                    pass
            results.append({
                "slug": slug,
                "name": grp["name"],
@ -1595,3 +1715,63 @@ def list_authenticated_providers(
    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))

    return results
+
+
+def list_picker_providers(
+    current_provider: str = "",
+    current_base_url: str = "",
+    user_providers: dict = None,
+    custom_providers: list | None = None,
+    max_models: int = 8,
+    current_model: str = "",
+) -> List[dict]:
+    """Interactive-picker variant of :func:`list_authenticated_providers`.
+
+    Post-processes the base list so the ``/model`` picker (Telegram/Discord
+    inline keyboards) only surfaces models that are actually callable in the
+    current install:
+
+    - OpenRouter's model list is replaced with the output of
+      :func:`hermes_cli.models.fetch_openrouter_models`, which filters the
+      curated ``OPENROUTER_MODELS`` snapshot against the live OpenRouter
+      catalog.  IDs the live catalog no longer carries drop out, so the
+      picker never offers a model the user can't call.
+    - Provider rows whose model list ends up empty are dropped, except
+      custom endpoints (``is_user_defined=True`` with an ``api_url``) where
+      the user may supply their own model set through config.
+
+    All other providers and metadata fields are passed through unchanged.
+    The typed ``/model <name>`` path is unaffected -- only the interactive
+    picker payload is narrowed.
+    """
+    from hermes_cli.models import fetch_openrouter_models
+
+    providers = list_authenticated_providers(
+        current_provider=current_provider,
+        current_base_url=current_base_url,
+        user_providers=user_providers,
+        custom_providers=custom_providers,
+        max_models=max_models,
+        current_model=current_model,
+    )
+
+    filtered: List[dict] = []
+    for p in providers:
+        slug = str(p.get("slug", "")).lower()
+        if slug == "openrouter":
+            try:
+                live = fetch_openrouter_models()
+                live_ids = [mid for mid, _ in live]
+            except Exception:
+                live_ids = list(p.get("models", []))
+            p = dict(p)
+            p["models"] = live_ids[:max_models]
+            p["total_models"] = len(live_ids)
+
+        has_models = bool(p.get("models"))
+        is_custom_endpoint = bool(p.get("is_user_defined")) and bool(p.get("api_url"))
+        if not has_models and not is_custom_endpoint:
+            continue
+        filtered.append(p)
+
+    return filtered
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -32,40 +32,38 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # Fallback OpenRouter snapshot used when the live catalog is unavailable.
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
-    ("moonshotai/kimi-k2.6",            "recommended"),
-    ("anthropic/claude-opus-4.7",       ""),
-    ("anthropic/claude-opus-4.6",       ""),
-    ("anthropic/claude-sonnet-4.6",     ""),
-    ("qwen/qwen3.6-plus",               ""),
-    ("anthropic/claude-sonnet-4.5",     ""),
-    ("anthropic/claude-haiku-4.5",      ""),
-    ("openrouter/elephant-alpha",       "free"),
-    ("openai/gpt-5.5",                  ""),
-    ("openai/gpt-5.4-mini",             ""),
-    ("xiaomi/mimo-v2.5-pro",             ""),
-    ("xiaomi/mimo-v2.5",                 ""),
-    ("tencent/hy3-preview:free",         "free"),
-    ("openai/gpt-5.3-codex",            ""),
-    ("google/gemini-3-pro-image-preview", ""),
-    ("google/gemini-3-flash-preview",   ""),
-    ("google/gemini-3.1-pro-preview",     ""),
+    ("anthropic/claude-opus-4.7",              ""),
+    ("anthropic/claude-opus-4.6",              ""),
+    ("anthropic/claude-sonnet-4.6",            ""),
+    ("moonshotai/kimi-k2.6",                   "recommended"),
+    ("openrouter/pareto-code",                 "auto-routes to cheapest coder meeting openrouter.min_coding_score"),
+    ("qwen/qwen3.6-plus",                      ""),
+    ("anthropic/claude-haiku-4.5",             ""),
+    ("openai/gpt-5.5",                         ""),
+    ("openai/gpt-5.5-pro",                     ""),
+    ("openai/gpt-5.4-mini",                    ""),
+    ("openai/gpt-5.4-nano",                    ""),
+    ("openai/gpt-5.3-codex",                   ""),
+    ("xiaomi/mimo-v2.5-pro",                   ""),
+    ("tencent/hy3-preview",                    ""),
+    ("google/gemini-3-pro-image-preview",      ""),
+    ("google/gemini-3-flash-preview",          ""),
+    ("google/gemini-3.1-pro-preview",          ""),
    ("google/gemini-3.1-flash-lite-preview",   ""),
-    ("qwen/qwen3.5-plus-02-15",         ""),
-    ("qwen/qwen3.5-35b-a3b",            ""),
-    ("stepfun/step-3.5-flash",          ""),
-    ("minimax/minimax-m2.7",            ""),
-    ("minimax/minimax-m2.5",            ""),
-    ("minimax/minimax-m2.5:free",       "free"),
-    ("z-ai/glm-5.1",                    ""),
-    ("z-ai/glm-5v-turbo",               ""),
-    ("z-ai/glm-5-turbo",                ""),
-    ("x-ai/grok-4.20",                  ""),
+    ("qwen/qwen3.6-35b-a3b",                   ""),
+    ("stepfun/step-3.5-flash",                 ""),
+    ("minimax/minimax-m2.7",                   ""),
+    ("z-ai/glm-5.1",                           ""),
+    ("x-ai/grok-4.20",                         ""),
+    ("x-ai/grok-4.3",                          ""),
    ("nvidia/nemotron-3-super-120b-a12b",      ""),
+    ("deepseek/deepseek-v4-pro",               ""),
+    # Free tier
+    ("openrouter/elephant-alpha",              "free"),
+    ("openrouter/owl-alpha",                   "free"),
+    ("tencent/hy3-preview:free",               "free"),
    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
-    ("arcee-ai/trinity-large-preview:free", "free"),
-    ("arcee-ai/trinity-large-thinking",  ""),
-    ("openai/gpt-5.5-pro",              ""),
-    ("openai/gpt-5.4-nano",             ""),
+    ("inclusionai/ring-2.6-1t:free",           "free"),
 ]

 _openrouter_catalog_cache: list[tuple[str, str]] | None = None
@ -112,16 +110,16 @@ def _codex_curated_models() -> list[str]:
 # $HERMES_HOME/models_dev_cache.json as of 2026-04-28. Whenever xAI renames
 # or retires a model, the disk cache picks it up on the next refresh and the
 # fallback here only matters until that refresh lands.
+#
+# Models retired by xAI on May 15, 2026 are excluded — see
+# https://docs.x.ai/developers/migration/may-15-retirement
+# (grok-4, grok-4-0709, grok-4-fast{,-reasoning,-non-reasoning},
+#  grok-4-1-fast{,-reasoning,-non-reasoning}, grok-code-fast-1 → grok-4.3).
 _XAI_STATIC_FALLBACK: list[str] = [
    "grok-4.20-0309-reasoning",
    "grok-4.20-0309-non-reasoning",
    "grok-4.20-multi-agent-0309",
-    "grok-4-1-fast",
-    "grok-4-1-fast-non-reasoning",
-    "grok-4-fast",
-    "grok-4-fast-non-reasoning",
-    "grok-4",
-    "grok-code-fast-1",
+    "grok-4.3",
 ]


@ -154,36 +152,30 @@ def _xai_curated_models() -> list[str]:

 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
-        "moonshotai/kimi-k2.6",
-        "xiaomi/mimo-v2.5-pro",
-        "xiaomi/mimo-v2.5",
-        "tencent/hy3-preview",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
-        "anthropic/claude-sonnet-4.5",
+        "moonshotai/kimi-k2.6",
+        "qwen/qwen3.6-plus",
        "anthropic/claude-haiku-4.5",
        "openai/gpt-5.5",
+        "openai/gpt-5.5-pro",
        "openai/gpt-5.4-mini",
+        "openai/gpt-5.4-nano",
        "openai/gpt-5.3-codex",
+        "xiaomi/mimo-v2.5-pro",
+        "tencent/hy3-preview",
        "google/gemini-3-pro-preview",
        "google/gemini-3-flash-preview",
        "google/gemini-3.1-pro-preview",
        "google/gemini-3.1-flash-lite-preview",
-        "qwen/qwen3.5-plus-02-15",
-        "qwen/qwen3.5-35b-a3b",
+        "qwen/qwen3.6-35b-a3b",
        "stepfun/step-3.5-flash",
        "minimax/minimax-m2.7",
-        "minimax/minimax-m2.5",
-        "minimax/minimax-m2.5:free",
        "z-ai/glm-5.1",
-        "z-ai/glm-5v-turbo",
-        "z-ai/glm-5-turbo",
-        "x-ai/grok-4.20-beta",
+        "x-ai/grok-4.3",
        "nvidia/nemotron-3-super-120b-a12b",
-        "arcee-ai/trinity-large-thinking",
-        "openai/gpt-5.5-pro",
-        "openai/gpt-5.4-nano",
+        "deepseek/deepseek-v4-pro",
    ],
    # Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
    # provider_model_ids fallback when /v1/models is unavailable.
@ -218,7 +210,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "gemini-3-pro-preview",
        "gemini-3-flash-preview",
        "gemini-2.5-pro",
-        "grok-code-fast-1",
    ],
    "gemini": [
        "gemini-3.1-pro-preview",
@ -411,6 +402,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "glm-4.7",
        "MiniMax-M2.5",
    ],
+    # Alibaba Coding Plan — same platform as alibaba (DashScope coding-intl),
+    # separate provider ID with its own base_url_env_var.
+    "alibaba-coding-plan": [
+        "qwen3.6-plus",
+        "qwen3.5-plus",
+        "qwen3-coder-plus",
+        "qwen3-coder-next",
+        "kimi-k2.5",
+        "glm-5",
+        "glm-4.7",
+        "MiniMax-M2.5",
+    ],
    # Curated HF model list — only agentic models that map to OpenRouter defaults.
    "huggingface": [
        "moonshotai/Kimi-K2.5",
@ -773,7 +776,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
    ProviderEntry("lmstudio",       "LM Studio",                "LM Studio (local desktop app with built-in model server)"),
-    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
@ -803,8 +805,28 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
    ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
    ProviderEntry("azure-foundry",  "Azure Foundry",            "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
+    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway"),
 ]

+# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
+# that is not already in the list above.  Adding plugins/model-providers/<name>/
+# is sufficient to expose a new provider in the model picker, /model, and all
+# downstream consumers — no edits to this file needed.
+_canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS}
+try:
+    from providers import list_providers as _list_providers_for_canonical
+    for _pp in _list_providers_for_canonical():
+        if _pp.name in _canonical_slugs:
+            continue
+        if _pp.auth_type in {"oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"}:
+            continue  # non-api-key flows need bespoke picker UX; skip auto-inject
+        _label = _pp.display_name or _pp.name
+        _desc = _pp.description or f"{_label} (direct API)"
+        CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc))
+        _canonical_slugs.add(_pp.name)
+except Exception:
+    pass
+
 # Derived dicts — used throughout the codebase
 _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider
@ -1739,10 +1761,20 @@ def model_supports_fast_mode(model_id: Optional[str]) -> bool:


 def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
-    """Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
+    """Return True if the model is a Claude model eligible for Anthropic Fast Mode.
+
+    Fast mode is currently supported on Claude Opus 4.6 only. Per Anthropic's
+    docs (https://platform.claude.com/docs/en/build-with-claude/fast-mode):
+    "Fast mode is currently supported on Opus 4.6 only. Sending speed: fast
+    with an unsupported model returns an error." Opus 4.7 explicitly rejects
+    the ``speed`` parameter with HTTP 400.
+    """
    raw = _strip_vendor_prefix(str(model_id or ""))
    base = raw.split(":")[0]
-    return base.startswith("claude-")
+    if not base.startswith("claude-"):
+        return False
+    # Only Opus 4.6 supports fast mode at present.
+    return "opus-4-6" in base or "opus-4.6" in base


 def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
@ -2012,6 +2044,34 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                return ids
        except Exception:
            pass
+
+    # ── Profile-based generic live fetch (all simple api-key providers) ──
+    # Handles any provider registered in providers/ with auth_type="api_key".
+    # Replaces per-provider copy-paste blocks (stepfun, gmi, zai, etc.).
+    try:
+        from providers import get_provider_profile
+        from hermes_cli.auth import resolve_api_key_provider_credentials
+
+        _p = get_provider_profile(normalized)
+        if _p and _p.auth_type == "api_key" and _p.base_url:
+            try:
+                creds = resolve_api_key_provider_credentials(normalized)
+                api_key = str(creds.get("api_key") or "").strip()
+                base_url = str(creds.get("base_url") or "").strip()
+            except Exception:
+                api_key, base_url = "", _p.base_url
+            if not base_url:
+                base_url = _p.base_url
+            if api_key:
+                live = _p.fetch_models(api_key=api_key)
+                if live:
+                    return live
+            # Use profile's fallback_models if defined
+            if _p.fallback_models:
+                return list(_p.fallback_models)
+    except Exception:
+        pass
+
    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
    if normalized in _MODELS_DEV_PREFERRED:
        return _merge_with_models_dev(normalized, curated_static)
@ -2275,7 +2335,7 @@ def _lmstudio_fetch_raw_models(
        with urllib.request.urlopen(request, timeout=timeout) as resp:
            payload = json.loads(resp.read().decode())
    except urllib.error.HTTPError as exc:
-        if exc.code in (401, 403):
+        if exc.code in {401, 403}:
            from hermes_cli.auth import AuthError
            raise AuthError(
                f"LM Studio rejected the request with HTTP {exc.code}.",
@ -2895,6 +2955,19 @@ def fetch_api_models(
 _OLLAMA_CLOUD_CACHE_TTL = 3600  # 1 hour


+def _strip_ollama_cloud_suffix(model_id: str) -> str:
+    """Strip :cloud / -cloud suffixes that models.dev appends to Ollama Cloud IDs.
+
+    The live API uses clean IDs (e.g. 'kimi-k2.6') while models.dev sometimes
+    returns them as 'kimi-k2.6:cloud'. Normalising before the dedup merge
+    prevents duplicate entries in the merged model list.
+    """
+    for suffix in (":cloud", "-cloud"):
+        if model_id.endswith(suffix):
+            return model_id[: -len(suffix)]
+    return model_id
+
+
 def _ollama_cloud_cache_path() -> Path:
    """Return the path for the Ollama Cloud model cache."""
    from hermes_constants import get_hermes_home
@ -2990,9 +3063,10 @@ def fetch_ollama_cloud_models(
                seen.add(m)
                merged.append(m)
        for m in mdev_models:
-            if m and m not in seen:
-                seen.add(m)
-                merged.append(m)
+            normalized = _strip_ollama_cloud_suffix(m)
+            if normalized and normalized not in seen:
+                seen.add(normalized)
+                merged.append(normalized)
        if merged:
            _save_ollama_cloud_cache(merged)
            return merged
@ -3086,7 +3160,7 @@ def validate_requested_model(
            "message": f"Model `{requested}` was not found in LM Studio's model listing.",
        }

-    if normalized == "custom":
+    if normalized == "custom" or normalized.startswith("custom:"):
        # Try probing with correct auth for the api_mode.
        if api_mode == "anthropic_messages":
            probe = probe_api_models(api_key, base_url, api_mode=api_mode)
@ -3184,18 +3258,19 @@ def validate_requested_model(
            if suggestions:
                suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
            return {
-                "accepted": False,
-                "persist": False,
+                "accepted": True,
+                "persist": True,
                "recognized": False,
                "message": (
-                    f"Model `{requested}` was not found in the OpenAI Codex model listing."
+                    f"Note: `{requested}` was not found in the OpenAI Codex model listing. "
+                    "It may still work if your ChatGPT/Codex account has access to a newer or hidden model ID."
                    f"{suggestion_text}"
                ),
            }

    # MiniMax providers don't expose a /models endpoint — validate against
    # the static catalog instead, similar to openai-codex.
-    if normalized in ("minimax", "minimax-cn"):
+    if normalized in {"minimax", "minimax-cn"}:
        try:
            catalog_models = provider_model_ids(normalized)
        except Exception:
--- a/hermes_cli/nous_subscription.py
+++ b/hermes_cli/nous_subscription.py
@ -255,6 +255,10 @@ def get_nous_subscription_features(
    terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {}

    web_backend = str(web_cfg.get("backend") or "").strip().lower()
+    # Per-capability overrides: if set, they determine which backend is active for
+    # search/extract independently of web.backend.
+    web_search_backend = str(web_cfg.get("search_backend") or "").strip().lower()
+    web_extract_backend = str(web_cfg.get("extract_backend") or "").strip().lower()
    tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower()
    browser_provider_explicit = "cloud_provider" in browser_cfg
    browser_provider = normalize_browser_cloud_provider(
@ -280,6 +284,7 @@ def get_nous_subscription_features(
    direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
    direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
    direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
+    direct_searxng = bool(get_env_value("SEARXNG_URL"))
    direct_fal = fal_key_is_configured()
    direct_openai_tts = bool(resolve_openai_audio_api_key())
    direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
@ -323,10 +328,18 @@ def get_nous_subscription_features(
            or (web_backend == "firecrawl" and direct_firecrawl)
            or (web_backend == "parallel" and direct_parallel)
            or (web_backend == "tavily" and direct_tavily)
+            or (web_backend == "searxng" and direct_searxng)
+            # Per-capability overrides: search_backend or extract_backend may be set
+            # without web.backend (using the new split config from #20061)
+            or (web_search_backend == "searxng" and direct_searxng)
+            or (web_search_backend == "exa" and direct_exa)
+            or (web_search_backend == "firecrawl" and direct_firecrawl)
+            or (web_search_backend == "parallel" and direct_parallel)
+            or (web_search_backend == "tavily" and direct_tavily)
        )
    )
    web_available = bool(
-        managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily
+        managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily or direct_searxng
    )

    image_managed = image_tool_enabled and managed_image_available and not direct_fal
@ -412,8 +425,8 @@ def get_nous_subscription_features(
            managed_by_nous=web_managed,
            direct_override=web_active and not web_managed,
            toolset_enabled=web_tool_enabled,
-            current_provider=web_backend or "",
-            explicit_configured=bool(web_backend),
+            current_provider=web_backend or web_search_backend or "",
+            explicit_configured=bool(web_backend or web_search_backend),
        ),
        "image_gen": NousFeatureState(
            key="image_gen",
--- a/hermes_cli/oneshot.py
+++ b/hermes_cli/oneshot.py
@ -174,7 +174,7 @@ def run_oneshot(
    # Redirect stderr AND stdout to devnull for the entire call tree.
    # We'll print the final response to the real stdout at the end.
    real_stdout = sys.stdout
-    devnull = open(os.devnull, "w")
+    devnull = open(os.devnull, "w", encoding="utf-8")

    try:
        with redirect_stdout(devnull), redirect_stderr(devnull):
@ -199,6 +199,22 @@ def run_oneshot(
    return 0


+def _create_session_db_for_oneshot():
+    """Best-effort SessionDB for ``hermes -z`` / oneshot mode.
+
+    Oneshot bypasses ``HermesCLI._init_agent()``, so it must wire the SQLite
+    session store itself. Without this, the ``session_search``/recall tool is
+    advertised but every call returns "Session database not available.".
+    """
+    try:
+        from hermes_state import SessionDB
+
+        return SessionDB()
+    except Exception as exc:
+        logging.debug("SQLite session store not available for oneshot mode: %s", exc)
+        return None
+
+
 def _run_agent(
    prompt: str,
    model: Optional[str] = None,
@ -284,6 +300,8 @@ def _run_agent(
    if toolsets_list is None and use_config_toolsets:
        toolsets_list = sorted(_get_platform_tools(cfg, "cli"))

+    session_db = _create_session_db_for_oneshot()
+
    agent = AIAgent(
        api_key=runtime.get("api_key"),
        base_url=runtime.get("base_url"),
@ -293,6 +311,7 @@ def _run_agent(
        enabled_toolsets=toolsets_list,
        quiet_mode=True,
        platform="cli",
+        session_db=session_db,
        credential_pool=runtime.get("credential_pool"),
        # Interactive callbacks are intentionally NOT wired beyond this
        # one.  In oneshot mode there's no user sitting at a terminal:
--- a/hermes_cli/pairing.py
+++ b/hermes_cli/pairing.py
@ -73,6 +73,24 @@ def _cmd_approve(store, platform: str, code: str):
        display = f"{name} ({uid})" if name else uid
        print(f"\n  Approved! User {display} on {platform} can now use the bot~")
        print("  They'll be recognized automatically on their next message.\n")
+    elif store._is_locked_out(platform):
+        # Disambiguate: approve_code returns None for both invalid codes
+        # and lockout. Tell the operator it's lockout so they don't chase
+        # a "wrong code" rabbit hole (#10195).
+        import time as _time
+        limits = store._load_json(store._rate_limit_path())
+        lockout_until = limits.get(f"_lockout:{platform}", 0)
+        remaining = max(0, int(lockout_until - _time.time()))
+        mins = remaining // 60
+        print(
+            f"\n  Platform '{platform}' is locked out after too many failed "
+            f"approval attempts."
+        )
+        print(f"  Lockout clears in ~{mins} minute(s).")
+        print(
+            "  To reset sooner, delete the '_lockout:{0}' entry from "
+            "~/.hermes/platforms/pairing/_rate_limits.json\n".format(platform)
+        )
    else:
        print(f"\n  Code '{code}' not found or expired for platform '{platform}'.")
        print("  Run 'hermes pairing list' to see pending codes.\n")
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@ -33,12 +33,15 @@ so plugin-defined tools appear alongside the built-in tools.

 from __future__ import annotations

+import asyncio
 import importlib
 import importlib.metadata
 import importlib.util
+import inspect
 import logging
 import os
 import sys
+import threading
 import types
 from dataclasses import dataclass, field
 from pathlib import Path
@ -68,6 +71,56 @@ except ImportError:  # pragma: no cover – yaml is optional at import time

 logger = logging.getLogger(__name__)

+
+# ---------------------------------------------------------------------------
+# Plugin developer debug logging
+# ---------------------------------------------------------------------------
+#
+# Set ``HERMES_PLUGINS_DEBUG=1`` to surface verbose plugin-discovery logs to
+# stderr in addition to ~/.hermes/logs/agent.log. Aimed at plugin authors
+# trying to figure out why their plugin isn't showing up: which directories
+# were scanned, which manifests parsed, which plugins were skipped (and why),
+# what each ``register(ctx)`` call registered, and full tracebacks on load
+# failure.
+#
+# The env var is read once at import time; tests that need to flip it
+# mid-process can call ``_install_plugin_debug_handler(force=True)``.
+
+_PLUGINS_DEBUG = os.getenv("HERMES_PLUGINS_DEBUG", "").strip().lower() in {
+    "1", "true", "yes", "on",
+}
+_DEBUG_HANDLER_INSTALLED = False
+
+
+def _install_plugin_debug_handler(force: bool = False) -> None:
+    """When HERMES_PLUGINS_DEBUG is on, tee plugin logs to stderr at DEBUG.
+
+    Idempotent: only attaches the handler once per process unless ``force``
+    is passed. Does not touch the root logger or other Hermes loggers.
+    """
+    global _DEBUG_HANDLER_INSTALLED, _PLUGINS_DEBUG
+    if force:
+        _PLUGINS_DEBUG = os.getenv("HERMES_PLUGINS_DEBUG", "").strip().lower() in {
+            "1", "true", "yes", "on",
+        }
+    if not _PLUGINS_DEBUG or _DEBUG_HANDLER_INSTALLED:
+        return
+    handler = logging.StreamHandler(sys.stderr)
+    handler.setLevel(logging.DEBUG)
+    handler.setFormatter(logging.Formatter("[plugins] %(levelname)s %(message)s"))
+    logger.addHandler(handler)
+    logger.setLevel(logging.DEBUG)
+    # Don't double-emit through the root logger when the central logging
+    # config also writes to stderr. agent.log still captures everything.
+    logger.propagate = True
+    _DEBUG_HANDLER_INSTALLED = True
+    logger.debug(
+        "HERMES_PLUGINS_DEBUG=1 — verbose plugin discovery logging enabled"
+    )
+
+
+_install_plugin_debug_handler()
+
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
@ -77,6 +130,10 @@ VALID_HOOKS: Set[str] = {
    "post_tool_call",
    "transform_terminal_output",
    "transform_tool_result",
+    # Transform LLM output before it's returned to the user.
+    # Plugins return a string to replace the response text, or None/empty to leave unchanged.
+    # First non-None string wins. Useful for vocabulary/personality transformation.
+    "transform_llm_output",
    "pre_llm_call",
    "post_llm_call",
    "pre_api_request",
@ -170,7 +227,7 @@ def _get_enabled_plugins() -> Optional[set]:
 # Data classes
 # ---------------------------------------------------------------------------

-_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive", "platform"}
+_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive", "platform", "model-provider"}


@dataclass
@ -233,6 +290,27 @@ class PluginContext:
    def __init__(self, manifest: PluginManifest, manager: "PluginManager"):
        self.manifest = manifest
        self._manager = manager
+        # Lazy-built host-owned LLM facade — see ctx.llm property below.
+        self._llm: Any = None
+
+    # -- host-owned LLM access ----------------------------------------------
+
+    @property
+    def llm(self) -> Any:
+        """Return the plugin's :class:`agent.plugin_llm.PluginLlm` facade.
+
+        Lets trusted plugins run host-owned chat or structured completions
+        against the user's active model and auth without bringing their
+        own provider keys. Override capability (model, agent id, auth
+        profile) is fail-closed by default and gated through
+        ``plugins.entries.<plugin_id>.llm.*`` config keys.
+
+        See :mod:`agent.plugin_llm` for the full surface."""
+        if self._llm is None:
+            from agent.plugin_llm import PluginLlm
+            plugin_id = self.manifest.key or self.manifest.name
+            self._llm = PluginLlm(plugin_id=plugin_id)
+        return self._llm

    # -- tool registration --------------------------------------------------

@ -640,32 +718,49 @@ class PluginManager:
        #   - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
        #   - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
        #
-        # ``memory/`` and ``context_engine/`` are skipped at the top level —
-        # they have their own discovery systems. ``platforms/`` is a category
-        # holding platform adapters (scanned one level deeper below).
+        # ``memory/``, ``context_engine/``, and ``model-providers/`` are
+        # skipped at the top level — they have their own discovery systems
+        # (plugins/memory/__init__.py, providers/__init__.py). ``platforms/``
+        # is a category holding platform adapters (scanned one level deeper
+        # below).
        repo_plugins = get_bundled_plugins_dir()
-        manifests.extend(
-            self._scan_directory(
-                repo_plugins,
-                source="bundled",
-                skip_names={"memory", "context_engine", "platforms"},
-            )
+        logger.debug("Scanning bundled plugins: %s", repo_plugins)
+        bundled = self._scan_directory(
+            repo_plugins,
+            source="bundled",
+            skip_names={"memory", "context_engine", "platforms", "model-providers"},
        )
-        manifests.extend(
-            self._scan_directory(repo_plugins / "platforms", source="bundled")
+        logger.debug("  bundled (top-level): %d manifest(s)", len(bundled))
+        manifests.extend(bundled)
+        bundled_platforms = self._scan_directory(
+            repo_plugins / "platforms", source="bundled"
        )
+        logger.debug("  bundled/platforms: %d manifest(s)", len(bundled_platforms))
+        manifests.extend(bundled_platforms)

        # 2. User plugins (~/.hermes/plugins/)
        user_dir = get_hermes_home() / "plugins"
-        manifests.extend(self._scan_directory(user_dir, source="user"))
+        logger.debug("Scanning user plugins: %s", user_dir)
+        user_manifests = self._scan_directory(user_dir, source="user")
+        logger.debug("  user: %d manifest(s)", len(user_manifests))
+        manifests.extend(user_manifests)

        # 3. Project plugins (./.hermes/plugins/)
        if _env_enabled("HERMES_ENABLE_PROJECT_PLUGINS"):
            project_dir = Path.cwd() / ".hermes" / "plugins"
-            manifests.extend(self._scan_directory(project_dir, source="project"))
+            logger.debug("Scanning project plugins: %s", project_dir)
+            project_manifests = self._scan_directory(project_dir, source="project")
+            logger.debug("  project: %d manifest(s)", len(project_manifests))
+            manifests.extend(project_manifests)
+        else:
+            logger.debug(
+                "Project plugins disabled (set HERMES_ENABLE_PROJECT_PLUGINS=1 to enable)"
+            )

        # 4. Pip / entry-point plugins
-        manifests.extend(self._scan_entry_points())
+        ep_manifests = self._scan_entry_points()
+        logger.debug("  entrypoints: %d manifest(s)", len(ep_manifests))
+        manifests.extend(ep_manifests)

        # Load each manifest (skip user-disabled plugins).
        # Later sources override earlier ones on key collision — user
@ -706,6 +801,21 @@ class PluginManager:
                )
                continue

+            # Model provider plugins are loaded by providers/__init__.py
+            # (its own lazy discovery keyed off first get_provider_profile()
+            # call). We record the manifest here for introspection but do
+            # not import the module — a second import would create two
+            # ProviderProfile instances and break the "last writer wins"
+            # override semantics between bundled and user plugins.
+            if manifest.kind == "model-provider":
+                loaded = LoadedPlugin(manifest=manifest, enabled=True)
+                self._plugins[lookup_key] = loaded
+                logger.debug(
+                    "Skipping '%s' (model-provider, handled by providers/ discovery)",
+                    lookup_key,
+                )
+                continue
+
            # Built-in backends auto-load — they ship with hermes and must
            # just work. Selection among them (e.g. which image_gen backend
            # services calls) is driven by ``<category>.provider`` config,
@ -714,7 +824,7 @@ class PluginManager:
            # Bundled platform plugins (gateway adapters like IRC) auto-load
            # for the same reason: every platform Hermes ships must be
            # available out of the box without the user having to opt in.
-            if manifest.source == "bundled" and manifest.kind in ("backend", "platform"):
+            if manifest.source == "bundled" and manifest.kind in {"backend", "platform"}:
                self._load_plugin(manifest)
                continue

@ -846,7 +956,7 @@ class PluginManager:
            if yaml is None:
                logger.warning("PyYAML not installed – cannot load %s", manifest_file)
                return None
-            data = yaml.safe_load(manifest_file.read_text()) or {}
+            data = yaml.safe_load(manifest_file.read_text(encoding="utf-8")) or {}

            name = data.get("name", plugin_dir.name)
            key = f"{prefix}/{plugin_dir.name}" if prefix else name
@ -883,9 +993,26 @@ class PluginManager:
                                "treating as kind='exclusive'",
                                key,
                            )
+                        elif (
+                            "register_provider" in source_text
+                            and "ProviderProfile" in source_text
+                        ):
+                            # Model provider plugin (calls register_provider()
+                            # from ``providers`` with a ProviderProfile). Route
+                            # to providers/__init__.py discovery.
+                            kind = "model-provider"
+                            logger.debug(
+                                "Plugin %s: detected model provider, "
+                                "treating as kind='model-provider'",
+                                key,
+                            )
                    except Exception:
                        pass

+            logger.debug(
+                "Parsed manifest: key=%s name=%s kind=%s source=%s path=%s",
+                key, name, kind, source, plugin_dir,
+            )
            return PluginManifest(
                name=name,
                version=str(data.get("version", "")),
@ -900,7 +1027,9 @@ class PluginManager:
                key=key,
            )
        except Exception as exc:
-            logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            logger.warning(
+                "Failed to parse %s: %s", manifest_file, exc, exc_info=_PLUGINS_DEBUG,
+            )
            return None

    # -----------------------------------------------------------------------
@ -940,9 +1069,13 @@ class PluginManager:
    def _load_plugin(self, manifest: PluginManifest) -> None:
        """Import a plugin module and call its ``register(ctx)`` function."""
        loaded = LoadedPlugin(manifest=manifest)
+        logger.debug(
+            "Loading plugin '%s' (source=%s, kind=%s, path=%s)",
+            manifest.key or manifest.name, manifest.source, manifest.kind, manifest.path,
+        )

        try:
-            if manifest.source in ("user", "project", "bundled"):
+            if manifest.source in {"user", "project", "bundled"}:
                module = self._load_directory_module(manifest)
            else:
                module = self._load_entrypoint_module(manifest)
@ -982,10 +1115,23 @@ class PluginManager:
                    if self._plugin_commands[c].get("plugin") == manifest.name
                ]
                loaded.enabled = True
+                logger.debug(
+                    "  registered: %d tool(s), %d hook(s), %d slash command(s), %d CLI command(s)",
+                    len(loaded.tools_registered),
+                    len(loaded.hooks_registered),
+                    len(loaded.commands_registered),
+                    sum(
+                        1 for c in self._cli_commands
+                        if self._cli_commands[c].get("plugin") == manifest.name
+                    ),
+                )

        except Exception as exc:
            loaded.error = str(exc)
-            logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)
+            logger.warning(
+                "Failed to load plugin '%s': %s",
+                manifest.name, exc, exc_info=_PLUGINS_DEBUG,
+            )

        self._plugins[manifest.key or manifest.name] = loaded

@ -1226,6 +1372,55 @@ def get_plugin_command_handler(name: str) -> Optional[Callable]:
    return entry["handler"] if entry else None


+_PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS = 30.0
+
+
+def resolve_plugin_command_result(result: Any) -> Any:
+    """Resolve a plugin command return value, awaiting async handlers when needed.
+
+    Sync CLI/TUI dispatch sites call plugin handlers from plain functions.
+    If a handler is async, await it directly when no loop is running; if
+    we're already inside an active loop, run it in a helper thread with its
+    own loop so the caller still gets a concrete result synchronously. The
+    threaded path is bounded by a 30s timeout so a hung async handler cannot
+    wedge the terminal indefinitely.
+    """
+    if not inspect.isawaitable(result):
+        return result
+
+    try:
+        asyncio.get_running_loop()
+    except RuntimeError:
+        return asyncio.run(result)
+
+    outcome: Dict[str, Any] = {}
+    failure: Dict[str, BaseException] = {}
+    done = threading.Event()
+
+    def _runner() -> None:
+        try:
+            outcome["value"] = asyncio.run(result)
+        except BaseException as exc:  # pragma: no cover - re-raised below
+            failure["exc"] = exc
+        finally:
+            done.set()
+
+    thread = threading.Thread(
+        target=_runner,
+        name="hermes-plugin-command-await",
+        daemon=True,
+    )
+    thread.start()
+    if not done.wait(timeout=_PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS):
+        raise TimeoutError(
+            "Plugin command async handler did not complete within "
+            f"{_PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS:.0f}s"
+        )
+    if "exc" in failure:
+        raise failure["exc"]
+    return outcome.get("value")
+
+
 def get_plugin_commands() -> Dict[str, dict]:
    """Return the full plugin commands dict (name → {handler, description, plugin}).

--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@ -9,19 +9,60 @@ rendered with Rich Markdown.  Otherwise a default confirmation is shown.

 from __future__ import annotations

+import functools
 import logging
 import os
 import shutil
 import subprocess
 import sys
 from pathlib import Path
-from typing import Optional
+from typing import Any, Optional

 from hermes_constants import get_hermes_home
 from hermes_cli.config import cfg_get

 logger = logging.getLogger(__name__)

+
+@functools.lru_cache(maxsize=1)
+def _resolve_git_executable() -> Optional[str]:
+    """Resolve a git binary for subprocess use when ``PATH`` may be minimal.
+
+    Matches other Hermes subprocess resolution: :func:`shutil.which` first,
+    then common Git for Windows install paths and POSIX defaults.
+    """
+    found = shutil.which("git")
+    if found:
+        return found
+    if os.name == "nt":
+        prog = os.environ.get("ProgramFiles", r"C:\Program Files")
+        prog_x86 = os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)")
+        local = os.environ.get("LOCALAPPDATA", "")
+        candidates = [
+            os.path.join(prog, "Git", "cmd", "git.exe"),
+            os.path.join(prog, "Git", "bin", "git.exe"),
+            os.path.join(prog_x86, "Git", "cmd", "git.exe"),
+            os.path.join(prog_x86, "Git", "bin", "git.exe"),
+        ]
+        if local:
+            candidates.extend(
+                (
+                    os.path.join(local, "Programs", "Git", "cmd", "git.exe"),
+                    os.path.join(local, "Programs", "Git", "bin", "git.exe"),
+                )
+            )
+    else:
+        candidates = ["/usr/bin/git", "/usr/local/bin/git", "/bin/git"]
+    for c in candidates:
+        if c and os.path.isfile(c):
+            return c
+    return None
+
+
+class PluginOperationError(Exception):
+    """Recoverable plugin install/update failure (CLI exits; HTTP maps to 4xx)."""
+
+
 # Minimum manifest version this installer understands.
 # Plugins may declare ``manifest_version: 1`` in plugin.yaml;
 # future breaking changes to the manifest schema bump this.
@ -44,7 +85,7 @@ def _sanitize_plugin_name(name: str, plugins_dir: Path) -> Path:
    if not name:
        raise ValueError("Plugin name must not be empty.")

-    if name in (".", ".."):
+    if name in {".", ".."}:
        raise ValueError(
            f"Invalid plugin name '{name}': must not reference the plugins directory itself."
        )
@ -122,7 +163,7 @@ def _read_manifest(plugin_dir: Path) -> dict:
    try:
        import yaml

-        with open(manifest_file) as f:
+        with open(manifest_file, encoding="utf-8") as f:
            return yaml.safe_load(f) or {}
    except Exception as e:
        logger.warning("Failed to read plugin.yaml in %s: %s", plugin_dir, e)
@ -150,6 +191,24 @@ def _copy_example_files(plugin_dir: Path, console) -> None:
                )


+def _missing_requires_env_names(manifest: dict) -> list[str]:
+    """Return declared ``requires_env`` names that are unset in ``~/.hermes/.env``."""
+    requires_env = manifest.get("requires_env") or []
+    if not requires_env:
+        return []
+
+    from hermes_cli.config import get_env_value
+
+    env_specs: list[dict] = []
+    for entry in requires_env:
+        if isinstance(entry, str):
+            env_specs.append({"name": entry})
+        elif isinstance(entry, dict) and entry.get("name"):
+            env_specs.append(entry)
+
+    return [s["name"] for s in env_specs if s.get("name") and not get_env_value(s["name"])]
+
+
 def _prompt_plugin_env_vars(manifest: dict, console) -> None:
    """Prompt for required environment variables declared in plugin.yaml.

@ -283,6 +342,99 @@ def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path:
 # ---------------------------------------------------------------------------


+def _install_plugin_core(identifier: str, *, force: bool) -> tuple[Path, dict, str]:
+    """Clone Git plugin into ``~/.hermes/plugins``.
+
+    Returns ``(target_dir, installed_manifest, canonical_name)``.
+    Raises ``PluginOperationError`` on failure.
+    """
+    import tempfile
+
+    try:
+        git_url = _resolve_git_url(identifier)
+    except ValueError as e:
+        raise PluginOperationError(str(e)) from e
+
+    plugins_dir = _plugins_dir()
+
+    with tempfile.TemporaryDirectory() as tmp:
+        tmp_target = Path(tmp) / "plugin"
+
+        git_exe = _resolve_git_executable()
+        if not git_exe:
+            raise PluginOperationError("git is not installed or not in PATH.")
+
+        try:
+            result = subprocess.run(
+                [git_exe, "clone", "--depth", "1", git_url, str(tmp_target)],
+                capture_output=True,
+                text=True,
+                timeout=60,
+            )
+        except FileNotFoundError as e:
+            raise PluginOperationError(
+                "git is not installed or not in PATH.",
+            ) from e
+        except subprocess.TimeoutExpired as e:
+            raise PluginOperationError(
+                "Git clone timed out after 60 seconds.",
+            ) from e
+
+        if result.returncode != 0:
+            err = (result.stderr or result.stdout or "").strip()
+            raise PluginOperationError(f"Git clone failed:\n{err}")
+
+        manifest = _read_manifest(tmp_target)
+        plugin_name = manifest.get("name") or _repo_name_from_url(git_url)
+
+        try:
+            target = _sanitize_plugin_name(plugin_name, plugins_dir)
+        except ValueError as e:
+            raise PluginOperationError(str(e)) from e
+
+        mv = manifest.get("manifest_version")
+        if mv is not None:
+            try:
+                mv_int = int(mv)
+            except (ValueError, TypeError):
+                raise PluginOperationError(
+                    f"Plugin '{plugin_name}' has invalid manifest_version "
+                    f"'{mv}' (expected an integer).",
+                ) from None
+            if mv_int > _SUPPORTED_MANIFEST_VERSION:
+                from hermes_cli.config import recommended_update_command
+
+                raise PluginOperationError(
+                    f"Plugin '{plugin_name}' requires manifest_version {mv}, "
+                    f"but this installer only supports up to {_SUPPORTED_MANIFEST_VERSION}. "
+                    f"Run {recommended_update_command()} to update Hermes.",
+                ) from None
+
+        if target.exists():
+            if not force:
+                raise PluginOperationError(
+                    f"Plugin '{plugin_name}' already exists. Use force reinstall "
+                    f"or run `hermes plugins update {plugin_name}`.",
+                )
+            shutil.rmtree(target)
+
+        shutil.move(str(tmp_target), str(target))
+
+    has_yaml = (target / "plugin.yaml").exists() or (target / "plugin.yml").exists()
+    if not has_yaml and not (target / "__init__.py").exists():
+        logger.warning(
+            "%s has no plugin.yaml / __init__.py; may not be a valid plugin",
+            plugin_name,
+        )
+
+    from rich.console import Console
+
+    _copy_example_files(target, Console())
+    installed_manifest = _read_manifest(target)
+    installed_name = installed_manifest.get("name") or target.name
+    return target, installed_manifest, installed_name
+
+
 def cmd_install(
    identifier: str,
    force: bool = False,
@ -293,7 +445,6 @@ def cmd_install(
    After install, prompt "Enable now? [y/N]" unless *enable* is provided
    (True = auto-enable without prompting, False = install disabled).
    """
-    import tempfile
    from rich.console import Console

    console = Console()
@ -304,116 +455,43 @@ def cmd_install(
        console.print(f"[red]Error:[/red] {e}")
        sys.exit(1)

-    # Warn about insecure / local URL schemes
    if git_url.startswith(("http://", "file://")):
        console.print(
            "[yellow]Warning:[/yellow] Using insecure/local URL scheme. "
-            "Consider using https:// or git@ for production installs."
+            "Consider using https:// or git@ for production installs.",
        )

-    plugins_dir = _plugins_dir()
+    console.print(f"[dim]Cloning {git_url}...[/dim]")

-    # Clone into a temp directory first so we can read plugin.yaml for the name
-    with tempfile.TemporaryDirectory() as tmp:
-        tmp_target = Path(tmp) / "plugin"
-        console.print(f"[dim]Cloning {git_url}...[/dim]")
+    try:
+        target, installed_manifest, installed_name = _install_plugin_core(
+            identifier,
+            force=force,
+        )
+    except PluginOperationError as e:
+        console.print(f"[red]Error:[/red] {e}")
+        sys.exit(1)

-        try:
-            result = subprocess.run(
-                ["git", "clone", "--depth", "1", git_url, str(tmp_target)],
-                capture_output=True,
-                text=True,
-                timeout=60,
-            )
-        except FileNotFoundError:
-            console.print("[red]Error:[/red] git is not installed or not in PATH.")
-            sys.exit(1)
-        except subprocess.TimeoutExpired:
-            console.print("[red]Error:[/red] Git clone timed out after 60 seconds.")
-            sys.exit(1)
-
-        if result.returncode != 0:
-            console.print(
-                f"[red]Error:[/red] Git clone failed:\n{result.stderr.strip()}"
-            )
-            sys.exit(1)
-
-        # Read manifest
-        manifest = _read_manifest(tmp_target)
-        plugin_name = manifest.get("name") or _repo_name_from_url(git_url)
-
-        # Sanitize plugin name against path traversal
-        try:
-            target = _sanitize_plugin_name(plugin_name, plugins_dir)
-        except ValueError as e:
-            console.print(f"[red]Error:[/red] {e}")
-            sys.exit(1)
-
-        # Check manifest_version compatibility
-        mv = manifest.get("manifest_version")
-        if mv is not None:
-            try:
-                mv_int = int(mv)
-            except (ValueError, TypeError):
-                console.print(
-                    f"[red]Error:[/red] Plugin '{plugin_name}' has invalid "
-                    f"manifest_version '{mv}' (expected an integer)."
-                )
-                sys.exit(1)
-            if mv_int > _SUPPORTED_MANIFEST_VERSION:
-                from hermes_cli.config import recommended_update_command
-                console.print(
-                    f"[red]Error:[/red] Plugin '{plugin_name}' requires manifest_version "
-                    f"{mv}, but this installer only supports up to {_SUPPORTED_MANIFEST_VERSION}.\n"
-                    f"Run [bold]{recommended_update_command()}[/bold] to get a newer installer."
-                )
-                sys.exit(1)
-
-        if target.exists():
-            if not force:
-                console.print(
-                    f"[red]Error:[/red] Plugin '{plugin_name}' already exists at {target}.\n"
-                    f"Use [bold]--force[/bold] to remove and reinstall, or "
-                    f"[bold]hermes plugins update {plugin_name}[/bold] to pull latest."
-                )
-                sys.exit(1)
-            console.print(f"[dim]  Removing existing {plugin_name}...[/dim]")
-            shutil.rmtree(target)
-
-        # Move from temp to final location
-        shutil.move(str(tmp_target), str(target))
-
-    # Validate it looks like a plugin
-    if not (target / "plugin.yaml").exists() and not (target / "__init__.py").exists():
+    if not (target / "plugin.yaml").exists() and not (target / "plugin.yml").exists() and not (
+        target / "__init__.py"
+    ).exists():
        console.print(
-            f"[yellow]Warning:[/yellow] {plugin_name} doesn't contain plugin.yaml "
-            f"or __init__.py. It may not be a valid Hermes plugin."
+            f"[yellow]Warning:[/yellow] {installed_name} doesn't contain plugin.yaml "
+            f"or __init__.py. It may not be a valid Hermes plugin.",
        )

-    # Copy .example files to their real names (e.g. config.yaml.example → config.yaml)
-    _copy_example_files(target, console)
-
-    # Re-read manifest from installed location (for env var prompting)
-    installed_manifest = _read_manifest(target)
-
-    # Prompt for required environment variables before showing after-install docs
    _prompt_plugin_env_vars(installed_manifest, console)

    _display_after_install(target, identifier)

-    # Determine the canonical plugin name for enable-list bookkeeping.
-    installed_name = installed_manifest.get("name") or target.name
-
-    # Decide whether to enable: explicit flag > interactive prompt > default off
    should_enable = enable
    if should_enable is None:
-        # Interactive prompt unless stdin isn't a TTY (scripted install).
        if sys.stdin.isatty() and sys.stdout.isatty():
            try:
                answer = input(
-                    f"  Enable '{installed_name}' now? [y/N]: "
+                    f"  Enable '{installed_name}' now? [y/N]: ",
                ).strip().lower()
-                should_enable = answer in ("y", "yes")
+                should_enable = answer in {"y", "yes"}
            except (EOFError, KeyboardInterrupt):
                should_enable = False
        else:
@ -427,12 +505,12 @@ def cmd_install(
        _save_enabled_set(enabled)
        _save_disabled_set(disabled)
        console.print(
-            f"[green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled."
+            f"[green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled.",
        )
    else:
        console.print(
            f"[dim]Plugin installed but not enabled. "
-            f"Run `hermes plugins enable {installed_name}` to activate.[/dim]"
+            f"Run `hermes plugins enable {installed_name}` to activate.[/dim]",
        )

    console.print("[dim]Restart the gateway for the plugin to take effect:[/dim]")
@ -462,36 +540,22 @@ def cmd_update(name: str) -> None:

    console.print(f"[dim]Updating {name}...[/dim]")

-    try:
-        result = subprocess.run(
-            ["git", "pull", "--ff-only"],
-            capture_output=True,
-            text=True,
-            timeout=60,
-            cwd=str(target),
-        )
-    except FileNotFoundError:
-        console.print("[red]Error:[/red] git is not installed or not in PATH.")
-        sys.exit(1)
-    except subprocess.TimeoutExpired:
-        console.print("[red]Error:[/red] Git pull timed out after 60 seconds.")
-        sys.exit(1)
-
-    if result.returncode != 0:
-        console.print(f"[red]Error:[/red] Git pull failed:\n{result.stderr.strip()}")
+    ok, output = _git_pull_plugin_dir(target)
+    if not ok:
+        console.print(f"[red]Error:[/red] {output}")
        sys.exit(1)

    # Copy any new .example files
    _copy_example_files(target, console)

-    output = result.stdout.strip()
-    if "Already up to date" in output:
+    out = output.strip()
+    if "Already up to date" in out:
        console.print(
            f"[green]✓[/green] Plugin [bold]{name}[/bold] is already up to date."
        )
    else:
        console.print(f"[green]✓[/green] Plugin [bold]{name}[/bold] updated.")
-        console.print(f"[dim]{output}[/dim]")
+        console.print(f"[dim]{out}[/dim]")


 def cmd_remove(name: str) -> None:
@ -667,7 +731,7 @@ def _discover_all_plugins() -> list:
        for d in sorted(base.iterdir()):
            if not d.is_dir():
                continue
-            if source == "bundled" and d.name in ("memory", "context_engine"):
+            if source == "bundled" and d.name in {"memory", "context_engine"}:
                continue
            manifest_file = d / "plugin.yaml"
            if not manifest_file.exists():
@ -679,7 +743,7 @@ def _discover_all_plugins() -> list:
            description = ""
            if yaml:
                try:
-                    with open(manifest_file) as f:
+                    with open(manifest_file, encoding="utf-8") as f:
                        manifest = yaml.safe_load(f) or {}
                    name = manifest.get("name", d.name)
                    version = manifest.get("version", "")
@ -1065,10 +1129,10 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
            stdscr.refresh()
            key = stdscr.getch()

-            if key in (curses.KEY_UP, ord("k")):
+            if key in {curses.KEY_UP, ord("k")}:
                if total_items > 0:
                    cursor = (cursor - 1) % total_items
-            elif key in (curses.KEY_DOWN, ord("j")):
+            elif key in {curses.KEY_DOWN, ord("j")}:
                if total_items > 0:
                    cursor = (cursor + 1) % total_items
            elif key == ord(" "):
@ -1104,7 +1168,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                            curses.init_pair(3, curses.COLOR_CYAN, -1)
                            curses.init_pair(4, 8, -1)
                        curses.curs_set(0)
-            elif key in (curses.KEY_ENTER, 10, 13):
+            elif key in {curses.KEY_ENTER, 10, 13}:
                if cursor < n_plugins:
                    # ENTER on a plugin checkbox — confirm and exit
                    result_holder["plugins_changed"] = True
@ -1136,7 +1200,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                            curses.init_pair(3, curses.COLOR_CYAN, -1)
                            curses.init_pair(4, 8, -1)
                        curses.curs_set(0)
-            elif key in (27, ord("q")):
+            elif key in {27, ord("q")}:
                # Save plugin changes on exit
                result_holder["plugins_changed"] = True
                return
@ -1244,6 +1308,249 @@ def _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
    print()


+def dashboard_install_plugin(
+    identifier: str,
+    *,
+    force: bool,
+    enable: bool,
+) -> dict[str, Any]:
+    """Non-interactive install for the web dashboard. Returns a JSON-serializable dict."""
+    warnings: list[str] = []
+    try:
+        git_url = _resolve_git_url(identifier)
+        if git_url.startswith(("http://", "file://")):
+            warnings.append(
+                "Insecure URL scheme; prefer https:// or git@ for production installs.",
+            )
+    except ValueError:
+        pass
+
+    try:
+        target, installed_manifest, installed_name = _install_plugin_core(
+            identifier,
+            force=force,
+        )
+    except PluginOperationError as exc:
+        return {"ok": False, "error": str(exc)}
+
+    missing_env = _missing_requires_env_names(installed_manifest)
+    if enable:
+        en = _get_enabled_set()
+        dis = _get_disabled_set()
+        en.add(installed_name)
+        dis.discard(installed_name)
+        _save_enabled_set(en)
+        _save_disabled_set(dis)
+
+    hint: str | None = None
+    ap = target / "after-install.md"
+    if ap.exists():
+        hint = str(ap)
+
+    return {
+        "ok": True,
+        "plugin_name": installed_name,
+        "warnings": warnings,
+        "missing_env": missing_env,
+        "after_install_path": hint,
+        "enabled": enable,
+    }
+
+
+def _get_plugin_toolset_key(name: str) -> Optional[str]:
+    """Return the toolset key a plugin registers its tools under, or None.
+
+    Queries the live tool registry — the plugin must already be loaded.
+    Falls back to reading ``provides_tools`` from plugin.yaml and looking
+    up the toolset from the registry for the first tool name found.
+    """
+    try:
+        from tools.registry import registry
+    except Exception:
+        return None
+
+    # Check the plugin manager for tools this plugin registered
+    try:
+        from hermes_cli.plugins import discover_plugins, get_plugin_manager
+        discover_plugins()  # idempotent — ensures plugins are loaded
+        manager = get_plugin_manager()
+        for _key, loaded in manager._plugins.items():
+            if loaded.manifest.name == name or _key == name:
+                for tool_name in loaded.tools_registered:
+                    entry = registry.get_entry(tool_name)
+                    if entry and entry.toolset:
+                        return entry.toolset
+                break
+    except Exception:
+        pass
+
+    # Fallback: read provides_tools from manifest on disk and query registry
+    try:
+        from hermes_cli.plugins import get_bundled_plugins_dir
+        for base in (get_bundled_plugins_dir(), _plugins_dir()):
+            if not base.is_dir():
+                continue
+            candidate = base / name
+            if candidate.is_dir():
+                manifest = _read_manifest(candidate)
+                for tool_name in manifest.get("provides_tools") or []:
+                    entry = registry.get_entry(tool_name)
+                    if entry and entry.toolset:
+                        return entry.toolset
+    except Exception:
+        pass
+
+    return None
+
+
+def _toggle_plugin_toolset(name: str, *, enable: bool) -> None:
+    """Add or remove a plugin's toolset from platform_toolsets for all platforms.
+
+    Only acts if the plugin actually provides tools (has a toolset key).
+    """
+    toolset_key = _get_plugin_toolset_key(name)
+    if not toolset_key:
+        return
+
+    from hermes_cli.config import load_config, save_config
+
+    config = load_config()
+    platform_toolsets = config.get("platform_toolsets")
+    if not isinstance(platform_toolsets, dict):
+        platform_toolsets = {}
+        config["platform_toolsets"] = platform_toolsets
+
+    changed = False
+    for platform, ts_list in platform_toolsets.items():
+        if not isinstance(ts_list, list):
+            continue
+        if enable:
+            if toolset_key not in ts_list:
+                ts_list.append(toolset_key)
+                changed = True
+        elif toolset_key in ts_list:
+            ts_list.remove(toolset_key)
+            changed = True
+
+    # If enabling and no platforms have toolset lists yet, add to "cli" at minimum
+    if enable and not changed and not platform_toolsets:
+        platform_toolsets["cli"] = [toolset_key]
+        changed = True
+
+    if changed:
+        save_config(config)
+
+
+def dashboard_set_agent_plugin_enabled(name: str, *, enabled: bool) -> dict[str, Any]:
+    """Enable or disable a plugin in ``config.yaml`` (runtime allow/deny lists).
+
+    For plugins that provide tools (toolsets), also toggles the toolset in
+    ``platform_toolsets`` so the agent actually sees the tools in sessions.
+    """
+    if not _plugin_exists(name):
+        return {"ok": False, "error": f"Plugin '{name}' is not installed or bundled."}
+
+    en = _get_enabled_set()
+    dis = _get_disabled_set()
+
+    if enabled:
+        if name in en and name not in dis:
+            return {"ok": True, "name": name, "unchanged": True}
+        en.add(name)
+        dis.discard(name)
+        _save_enabled_set(en)
+        _save_disabled_set(dis)
+        _toggle_plugin_toolset(name, enable=True)
+        return {"ok": True, "name": name, "unchanged": False}
+
+    if name not in en and name in dis:
+        return {"ok": True, "name": name, "unchanged": True}
+
+    en.discard(name)
+    dis.add(name)
+    _save_enabled_set(en)
+    _save_disabled_set(dis)
+    _toggle_plugin_toolset(name, enable=False)
+    return {"ok": True, "name": name, "unchanged": False}
+
+
+def _user_installed_plugin_dir(name: str) -> Optional[Path]:
+    """Resolved path under ``~/.hermes/plugins/<name>`` if it exists."""
+    plugins_dir = _plugins_dir()
+    try:
+        target = _sanitize_plugin_name(name, plugins_dir)
+    except ValueError:
+        return None
+    return target if target.is_dir() else None
+
+
+def dashboard_update_user_plugin(name: str) -> dict[str, Any]:
+    """``git pull`` inside ``~/.hermes/plugins/<name>``."""
+    target = _user_installed_plugin_dir(name)
+    if target is None:
+        return {
+            "ok": False,
+            "error": f"Plugin '{name}' was not found under {_plugins_dir()}.",
+        }
+
+    if not (target / ".git").exists():
+        return {
+            "ok": False,
+            "error": f"Plugin '{name}' is not a git checkout; cannot pull updates.",
+        }
+
+    ok, msg = _git_pull_plugin_dir(target)
+    if not ok:
+        return {"ok": False, "error": msg}
+
+    from rich.console import Console
+
+    _copy_example_files(target, Console())
+    unchanged = "Already up to date" in msg
+    return {"ok": True, "name": name, "output": msg, "unchanged": unchanged}
+
+
+def _git_pull_plugin_dir(target: Path) -> tuple[bool, str]:
+    git_exe = _resolve_git_executable()
+    if not git_exe:
+        return False, "git is not installed or not in PATH."
+    try:
+        result = subprocess.run(
+            [git_exe, "pull", "--ff-only"],
+            capture_output=True,
+            text=True,
+            timeout=60,
+            cwd=str(target),
+        )
+    except FileNotFoundError:
+        return False, "git is not installed or not in PATH."
+    except subprocess.TimeoutExpired:
+        return False, "Git pull timed out after 60 seconds."
+
+    if result.returncode != 0:
+        err = (result.stderr or "").strip() or result.stdout.strip()
+        return False, err or "git pull failed."
+    return True, result.stdout.strip()
+
+
+def dashboard_remove_user_plugin(name: str) -> dict[str, Any]:
+    """Delete a plugin tree under ``~/.hermes/plugins/`` only."""
+    plugins_dir = _plugins_dir()
+    for n, _ver, _d, src, _path in _discover_all_plugins():
+        if n == name and src == "bundled":
+            return {"ok": False, "error": "Bundled plugins cannot be removed from the dashboard."}
+
+    target = _user_installed_plugin_dir(name)
+    if target is None:
+        return {
+            "ok": False,
+            "error": f"Plugin '{name}' was not found under {plugins_dir}.",
+        }
+
+    shutil.rmtree(target)
+    return {"ok": True, "name": name}
+
+
 def plugins_command(args) -> None:
    """Dispatch hermes plugins subcommands."""
    action = getattr(args, "plugins_action", None)
@ -1262,13 +1569,13 @@ def plugins_command(args) -> None:
        )
    elif action == "update":
        cmd_update(args.name)
-    elif action in ("remove", "rm", "uninstall"):
+    elif action in {"remove", "rm", "uninstall"}:
        cmd_remove(args.name)
    elif action == "enable":
        cmd_enable(args.name)
    elif action == "disable":
        cmd_disable(args.name)
-    elif action in ("list", "ls"):
+    elif action in {"list", "ls"}:
        cmd_list()
    elif action is None:
        cmd_toggle()
--- a/hermes_cli/profile_distribution.py
+++ b/hermes_cli/profile_distribution.py
@ -0,0 +1,702 @@
+"""Profile distributions — shareable, packaged Hermes profiles via git.
+
+A distribution is a Hermes profile published as a git repository (or
+installed from a local directory for development). Install with one command
+from a git URL, update in place, and keep your local memories / sessions /
+credentials untouched.
+
+Where this fits relative to the existing pieces:
+
+* ``hermes profile export/import`` — local backup / restore for a profile
+  on your own machine. NOT a distribution format. Stays as-is.
+* ``hermes skills install <url>`` — the URL install pattern we're mirroring,
+  but at the profile granularity.
+
+Subcommands (all live under ``hermes profile``, not a parallel tree):
+
+    hermes profile install <source> [--name N] [--alias] [--force] [--yes]
+    hermes profile update  <name>  [--force-config] [--yes]
+    hermes profile info    <name>
+
+``<source>`` is one of:
+
+* A git URL (``github.com/user/repo``, ``https://github.com/...``, ``git@...``,
+  ``ssh://``, ``git://``), optionally with ``#<ref>`` to pin a tag / branch /
+  commit SHA.
+* A local directory that already contains ``distribution.yaml`` — used
+  during profile development before the first push.
+
+Manifest format (``distribution.yaml`` at the profile root)::
+
+    name: telemetry
+    version: 0.1.0
+    description: "Compliance monitoring harness"
+    hermes_requires: ">=0.12.0"
+    author: "..."
+    license: "..."
+    env_requires:
+      - name: OPENAI_API_KEY
+        description: "OpenAI API key"
+        required: true
+      - name: GRAPHITI_MCP_URL
+        description: "Memory graph URL"
+        required: false
+        default: "http://127.0.0.1:8000/sse"
+    distribution_owned:      # optional; sensible defaults apply
+      - SOUL.md
+      - skills/
+      - cron/
+      - mcp.json
+
+Update semantics:
+
+* Distribution-owned paths (SOUL.md, mcp.json, skills/, cron/,
+  distribution.yaml) are replaced from the new source.
+* ``config.yaml`` is distribution-owned but preserved on update unless
+  ``--force-config`` is passed (user overrides typically live here).
+* User-owned paths (memories/, sessions/, state.db, auth.json, .env,
+  logs/, workspace/, home/, plans/, *_cache/, and anything under
+  ``local/``) are never touched.
+"""
+
+from __future__ import annotations
+
+import re
+import shutil
+import subprocess
+import tempfile
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+MANIFEST_FILENAME = "distribution.yaml"
+ENV_TEMPLATE_FILENAME = ".env.template"
+ENV_EXAMPLE_FILENAME = ".env.EXAMPLE"
+
+# Default distribution-owned paths (relative to profile root).  Authors may
+# override via ``distribution_owned:`` in the manifest.  config.yaml is
+# distribution-owned but treated specially on update (see _is_config_like).
+DEFAULT_DIST_OWNED: Tuple[str, ...] = (
+    "SOUL.md",
+    "config.yaml",
+    "mcp.json",
+    "skills",
+    "cron",
+    MANIFEST_FILENAME,
+)
+
+# Paths that are NEVER part of a distribution. These are user-owned and are
+# protected on update. Must stay consistent with
+# ``profiles.py::_DEFAULT_EXPORT_EXCLUDE_ROOT`` plus the ``local/``
+# convention for user customizations.
+USER_OWNED_EXCLUDE: frozenset = frozenset({
+    # Credentials & runtime secrets
+    "auth.json", ".env",
+    # Databases & runtime state
+    "state.db", "state.db-shm", "state.db-wal",
+    "hermes_state.db", "response_store.db",
+    "response_store.db-shm", "response_store.db-wal",
+    "gateway.pid", "gateway_state.json", "processes.json",
+    "auth.lock", "active_profile", ".update_check",
+    "errors.log", ".hermes_history",
+    # User data
+    "memories", "sessions", "logs", "plans", "workspace", "home",
+    "image_cache", "audio_cache", "document_cache",
+    "browser_screenshots", "checkpoints", "sandboxes",
+    "backups", "cache",
+    # Infrastructure
+    "hermes-agent", ".worktrees", "profiles", "bin", "node_modules",
+    # User customization namespace
+    "local",
+})
+
+
+# ---------------------------------------------------------------------------
+# Errors
+# ---------------------------------------------------------------------------
+
+
+class DistributionError(Exception):
+    """Raised for distribution install/update failures."""
+
+
+# ---------------------------------------------------------------------------
+# Manifest
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class EnvRequirement:
+    name: str
+    description: str = ""
+    required: bool = True
+    default: Optional[str] = None
+
+    @classmethod
+    def from_dict(cls, data: Any) -> "EnvRequirement":
+        if not isinstance(data, dict):
+            raise DistributionError(
+                f"env_requires entry must be a mapping, got {type(data).__name__}"
+            )
+        name = str(data.get("name") or "").strip()
+        if not name:
+            raise DistributionError("env_requires entry missing 'name'")
+        return cls(
+            name=name,
+            description=str(data.get("description") or ""),
+            required=bool(data.get("required", True)),
+            default=data.get("default"),
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        out: Dict[str, Any] = {"name": self.name, "description": self.description}
+        if not self.required:
+            out["required"] = False
+        if self.default is not None:
+            out["default"] = self.default
+        return out
+
+
+@dataclass
+class DistributionManifest:
+    name: str
+    version: str = "0.1.0"
+    description: str = ""
+    hermes_requires: str = ""
+    author: str = ""
+    license: str = ""
+    env_requires: List[EnvRequirement] = field(default_factory=list)
+    distribution_owned: List[str] = field(default_factory=list)
+    # Tracked after install — where we pulled from, so ``update`` can re-pull.
+    source: str = ""
+    # ISO-8601 UTC timestamp written on install / update, so ``info`` and
+    # ``list`` can show when a distribution landed on disk.  Empty for
+    # manifests that ship in a repo (authors don't populate this).
+    installed_at: str = ""
+
+    @classmethod
+    def from_dict(cls, data: Any) -> "DistributionManifest":
+        if not isinstance(data, dict):
+            raise DistributionError(
+                f"{MANIFEST_FILENAME} must be a mapping, got {type(data).__name__}"
+            )
+        name = str(data.get("name") or "").strip()
+        if not name:
+            raise DistributionError(f"{MANIFEST_FILENAME} missing 'name'")
+        env_raw = data.get("env_requires") or []
+        if not isinstance(env_raw, list):
+            raise DistributionError("env_requires must be a list")
+        env_requires = [EnvRequirement.from_dict(e) for e in env_raw]
+        dist_owned_raw = data.get("distribution_owned") or []
+        if dist_owned_raw and not isinstance(dist_owned_raw, list):
+            raise DistributionError("distribution_owned must be a list")
+        distribution_owned = [str(p).strip().strip("/") for p in dist_owned_raw if str(p).strip()]
+        return cls(
+            name=name,
+            version=str(data.get("version") or "0.1.0"),
+            description=str(data.get("description") or ""),
+            hermes_requires=str(data.get("hermes_requires") or ""),
+            author=str(data.get("author") or ""),
+            license=str(data.get("license") or ""),
+            env_requires=env_requires,
+            distribution_owned=distribution_owned,
+            source=str(data.get("source") or ""),
+            installed_at=str(data.get("installed_at") or ""),
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        out: Dict[str, Any] = {
+            "name": self.name,
+            "version": self.version,
+        }
+        if self.description:
+            out["description"] = self.description
+        if self.hermes_requires:
+            out["hermes_requires"] = self.hermes_requires
+        if self.author:
+            out["author"] = self.author
+        if self.license:
+            out["license"] = self.license
+        if self.env_requires:
+            out["env_requires"] = [e.to_dict() for e in self.env_requires]
+        if self.distribution_owned:
+            out["distribution_owned"] = self.distribution_owned
+        if self.source:
+            out["source"] = self.source
+        if self.installed_at:
+            out["installed_at"] = self.installed_at
+        return out
+
+    def owned_paths(self) -> List[str]:
+        """Resolve which paths count as distribution-owned."""
+        if self.distribution_owned:
+            return list(self.distribution_owned)
+        return list(DEFAULT_DIST_OWNED)
+
+
+def _load_yaml(text: str) -> Any:
+    try:
+        import yaml
+    except ImportError as exc:  # pragma: no cover — pyyaml is a hard dep
+        raise DistributionError("PyYAML is required for distribution manifests") from exc
+    return yaml.safe_load(text)
+
+
+def _dump_yaml(data: Any) -> str:
+    import yaml
+
+    return yaml.safe_dump(data, sort_keys=False, default_flow_style=False)
+
+
+def read_manifest(profile_dir: Path) -> Optional[DistributionManifest]:
+    """Return the manifest for *profile_dir*, or None if it isn't a distribution."""
+    mf_path = profile_dir / MANIFEST_FILENAME
+    if not mf_path.is_file():
+        return None
+    try:
+        data = _load_yaml(mf_path.read_text(encoding="utf-8"))
+    except Exception as exc:
+        raise DistributionError(f"Failed to parse {mf_path}: {exc}") from exc
+    return DistributionManifest.from_dict(data or {})
+
+
+def write_manifest(profile_dir: Path, manifest: DistributionManifest) -> Path:
+    mf_path = profile_dir / MANIFEST_FILENAME
+    mf_path.write_text(_dump_yaml(manifest.to_dict()), encoding="utf-8")
+    return mf_path
+
+
+# ---------------------------------------------------------------------------
+# Version check
+# ---------------------------------------------------------------------------
+
+
+_VERSION_OP_RE = re.compile(r"^\s*(>=|<=|==|!=|>|<)\s*(.+?)\s*$")
+
+
+def _parse_semver(v: str) -> Tuple[int, int, int]:
+    """Very small semver parser — major.minor.patch only.  Extra labels stripped."""
+    s = str(v).strip().lstrip("v")
+    # Strip any pre-release / build metadata (e.g. "0.12.0-rc1+abc")
+    s = re.split(r"[-+]", s, 1)[0]
+    parts = s.split(".")
+    while len(parts) < 3:
+        parts.append("0")
+    try:
+        return (int(parts[0]), int(parts[1]), int(parts[2]))
+    except ValueError as exc:
+        raise DistributionError(f"Unparseable version: {v!r}") from exc
+
+
+def check_hermes_requires(spec: str, current_version: str) -> None:
+    """Raise DistributionError if ``current_version`` does not satisfy ``spec``.
+
+    ``spec`` accepts a single comparator (``>=0.12.0``, ``==0.12.0``, etc.).
+    Empty or blank spec is a no-op — no requirement.
+    """
+    if not spec or not spec.strip():
+        return
+    m = _VERSION_OP_RE.match(spec)
+    if not m:
+        # Bare version → treat as ``>=``
+        op, target = ">=", spec.strip()
+    else:
+        op, target = m.group(1), m.group(2)
+    cur = _parse_semver(current_version)
+    tgt = _parse_semver(target)
+    ok = {
+        ">=": cur >= tgt,
+        "<=": cur <= tgt,
+        "==": cur == tgt,
+        "!=": cur != tgt,
+        ">":  cur > tgt,
+        "<":  cur < tgt,
+    }[op]
+    if not ok:
+        raise DistributionError(
+            f"This distribution requires Hermes {op}{target}, "
+            f"but you have {current_version}."
+        )
+
+
+# ---------------------------------------------------------------------------
+# Env var template helper
+# ---------------------------------------------------------------------------
+
+
+def _env_template_from_manifest(manifest: DistributionManifest) -> str:
+    """Generate a ``.env.template`` body from env_requires."""
+    lines = [
+        "# Environment variables required by this Hermes distribution.",
+        "# Copy to `.env` and fill in your own values before running.",
+        "",
+    ]
+    for req in manifest.env_requires:
+        if req.description:
+            lines.append(f"# {req.description}")
+        status = "required" if req.required else "optional"
+        lines.append(f"# ({status})")
+        default_val = req.default if req.default is not None else ""
+        prefix = "" if req.required else "# "
+        lines.append(f"{prefix}{req.name}={default_val}")
+        lines.append("")
+    return "\n".join(lines).rstrip() + "\n"
+
+
+# ---------------------------------------------------------------------------
+# Source staging — git clone or local directory
+# ---------------------------------------------------------------------------
+
+
+def _looks_like_git_url(s: str) -> bool:
+    s = s.strip()
+    if s.endswith(".git"):
+        return True
+    if s.startswith(("git@", "ssh://", "git://")):
+        return True
+    if s.startswith(("http://", "https://")):
+        # Any http(s) URL is treated as a git repo.  We no longer accept
+        # tar.gz URLs — git is the only remote transport.
+        return True
+    # Bare github.com/user/repo shorthand
+    if re.match(r"^github\.com/[\w.-]+/[\w.-]+/?$", s):
+        return True
+    return False
+
+
+def _git_clone(url: str, dest: Path) -> None:
+    # Normalize github.com/user/repo shorthand
+    if re.match(r"^github\.com/[\w.-]+/[\w.-]+/?$", url):
+        url = f"https://{url.rstrip('/')}"
+    try:
+        subprocess.run(
+            ["git", "clone", "--depth", "1", url, str(dest)],
+            check=True,
+            capture_output=True,
+        )
+    except FileNotFoundError as exc:
+        raise DistributionError("git is required for git-URL installs") from exc
+    except subprocess.CalledProcessError as exc:
+        stderr = exc.stderr.decode("utf-8", errors="replace") if exc.stderr else ""
+        raise DistributionError(f"git clone failed: {stderr.strip()}") from exc
+
+
+def _stage_source(source: str, workdir: Path) -> Tuple[Path, str]:
+    """Resolve *source* to a local directory containing distribution.yaml.
+
+    Returns ``(staged_dir, provenance)`` where ``provenance`` is stored in the
+    installed manifest's ``source:`` field so ``hermes profile update`` can
+    re-pull from the same place.
+
+    Accepts:
+      * A git URL (https / ssh / git@ / bare github.com shorthand) — cloned
+        into a temp directory; ``.git`` removed after clone.
+      * A local directory already containing ``distribution.yaml``.
+    """
+    src_str = source.strip()
+
+    # Git URL
+    if _looks_like_git_url(src_str):
+        cloned = workdir / "clone"
+        _git_clone(src_str, cloned)
+        # Remove .git to keep the staged tree clean
+        shutil.rmtree(cloned / ".git", ignore_errors=True)
+        if not (cloned / MANIFEST_FILENAME).is_file():
+            raise DistributionError(
+                f"No {MANIFEST_FILENAME} at the root of {src_str!r}. "
+                "This repository is not a Hermes profile distribution."
+            )
+        return cloned, src_str
+
+    # Local directory
+    path_guess = Path(src_str).expanduser()
+    if path_guess.is_dir():
+        if not (path_guess / MANIFEST_FILENAME).is_file():
+            raise DistributionError(
+                f"No {MANIFEST_FILENAME} in {path_guess}. "
+                "A local-directory source must contain a distribution.yaml at its root."
+            )
+        return path_guess.resolve(), str(path_guess.resolve())
+
+    raise DistributionError(
+        f"Cannot resolve distribution source: {source!r}. "
+        "Expected a git URL (e.g. github.com/user/repo) or a local directory."
+    )
+
+
+# ---------------------------------------------------------------------------
+# Install
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class InstallPlan:
+    """Summary of what an install will do, surfaced for user confirmation."""
+    manifest: DistributionManifest
+    staged_dir: Path
+    provenance: str
+    target_dir: Path
+    existing: bool  # True if target profile already exists (update path)
+    preserves_config: bool = True
+    has_cron: bool = False
+    has_skills: bool = False
+
+
+def _has_cron_jobs(staged: Path) -> bool:
+    cron_dir = staged / "cron"
+    if not cron_dir.is_dir():
+        return False
+    for _ in cron_dir.rglob("*.json"):
+        return True
+    for _ in cron_dir.rglob("*.yaml"):
+        return True
+    return False
+
+
+def _count_skills(staged: Path) -> int:
+    skills_dir = staged / "skills"
+    if not skills_dir.is_dir():
+        return 0
+    return sum(1 for _ in skills_dir.rglob("SKILL.md"))
+
+
+def plan_install(
+    source: str,
+    workdir: Path,
+    override_name: Optional[str] = None,
+) -> InstallPlan:
+    """Stage *source* and produce a plan describing what install would do."""
+    from hermes_cli.profiles import (
+        get_profile_dir,
+        normalize_profile_name,
+        validate_profile_name,
+    )
+    from hermes_cli import __version__ as hermes_version
+
+    staged, provenance = _stage_source(source, workdir)
+    manifest = read_manifest(staged)
+    if manifest is None:
+        raise DistributionError(
+            f"No {MANIFEST_FILENAME} found at the distribution root — "
+            "this source is not a Hermes distribution."
+        )
+
+    # Version check up-front so we fail fast
+    check_hermes_requires(manifest.hermes_requires, hermes_version)
+
+    # Resolve target profile name
+    target_name = override_name or manifest.name
+    canon = normalize_profile_name(target_name)
+    validate_profile_name(canon)
+    if canon == "default":
+        raise DistributionError(
+            "Cannot install a distribution as 'default' — that is the built-in "
+            "root profile (~/.hermes).  Pass --name <name> to install under a "
+            "new profile."
+        )
+    manifest.name = canon
+    manifest.source = provenance
+    # Stamped once here so plan_install() callers (both fresh install and
+    # update) propagate a freshly-minted timestamp through _copy_dist_payload.
+    manifest.installed_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
+
+    target_dir = get_profile_dir(canon)
+    existing = target_dir.is_dir()
+    has_cron = _has_cron_jobs(staged)
+    skill_count = _count_skills(staged)
+
+    return InstallPlan(
+        manifest=manifest,
+        staged_dir=staged,
+        provenance=provenance,
+        target_dir=target_dir,
+        existing=existing,
+        preserves_config=existing,
+        has_cron=has_cron,
+        has_skills=skill_count > 0,
+    )
+
+
+def _copy_dist_payload(
+    staged: Path,
+    target: Path,
+    manifest: DistributionManifest,
+    preserve_config: bool,
+) -> None:
+    """Copy distribution-owned files from *staged* into *target*.
+
+    User-owned paths are never touched.  ``config.yaml`` is replaced only when
+    ``preserve_config`` is False (fresh install or ``--force-config`` update).
+    ``.env.template`` is renamed to ``.env.EXAMPLE`` in the target to avoid
+    shadowing a real ``.env``.
+    """
+    target.mkdir(parents=True, exist_ok=True)
+
+    for entry in staged.iterdir():
+        name = entry.name
+
+        if name in USER_OWNED_EXCLUDE:
+            continue
+        if name == ENV_TEMPLATE_FILENAME:
+            shutil.copy2(entry, target / ENV_EXAMPLE_FILENAME)
+            continue
+        if name == "config.yaml" and preserve_config and (target / "config.yaml").exists():
+            # Leave user's config.yaml alone on update
+            continue
+
+        dest = target / name
+        if entry.is_dir():
+            if dest.exists():
+                shutil.rmtree(dest)
+            shutil.copytree(
+                entry,
+                dest,
+                ignore=lambda d, names: [n for n in names if n in USER_OWNED_EXCLUDE],
+            )
+        else:
+            shutil.copy2(entry, dest)
+
+    # Emit .env.EXAMPLE from manifest if the staged tree didn't ship one
+    if manifest.env_requires and not (target / ENV_EXAMPLE_FILENAME).exists():
+        (target / ENV_EXAMPLE_FILENAME).write_text(
+            _env_template_from_manifest(manifest), encoding="utf-8"
+        )
+
+    # Make sure the manifest on disk reflects resolved name + source
+    write_manifest(target, manifest)
+
+
+def _bootstrap_user_dirs(target: Path) -> None:
+    """Create the bootstrap dirs a fresh profile expects."""
+    for d in ("memories", "sessions", "skills", "skins", "logs",
+              "plans", "workspace", "cron", "home"):
+        (target / d).mkdir(parents=True, exist_ok=True)
+
+
+def install_distribution(
+    source: str,
+    name: Optional[str] = None,
+    force: bool = False,
+    create_alias: bool = False,
+) -> InstallPlan:
+    """Install a distribution from *source* into a new profile.
+
+    Returns the resolved :class:`InstallPlan`.  Use :func:`plan_install`
+    first if you want to preview + prompt the user before calling this.
+    """
+    from hermes_cli.profiles import (
+        check_alias_collision,
+        create_wrapper_script,
+    )
+
+    with tempfile.TemporaryDirectory(prefix="hermes_dist_install_") as tmp:
+        plan = plan_install(source, Path(tmp), override_name=name)
+
+        if plan.existing and not force:
+            raise DistributionError(
+                f"Profile '{plan.manifest.name}' already exists at {plan.target_dir}. "
+                "Use `hermes profile update` to upgrade in place, "
+                "or pass --force to overwrite."
+            )
+
+        # Fresh install: config.yaml comes from the distribution.
+        _bootstrap_user_dirs(plan.target_dir)
+        _copy_dist_payload(
+            plan.staged_dir,
+            plan.target_dir,
+            plan.manifest,
+            preserve_config=False,
+        )
+
+        if create_alias:
+            collision = check_alias_collision(plan.manifest.name)
+            if collision is None:
+                create_wrapper_script(plan.manifest.name)
+
+        return plan
+
+
+def update_distribution(
+    profile_name: str,
+    force_config: bool = False,
+) -> InstallPlan:
+    """Re-pull the distribution for an existing profile and apply updates.
+
+    The source is read from the installed profile's ``distribution.yaml``
+    ``source:`` field.  Distribution-owned files are overwritten; user-owned
+    data (memories, sessions, auth) is never touched.  ``config.yaml`` is
+    preserved unless ``force_config`` is True.
+    """
+    from hermes_cli.profiles import (
+        get_profile_dir,
+        normalize_profile_name,
+        validate_profile_name,
+    )
+
+    canon = normalize_profile_name(profile_name)
+    validate_profile_name(canon)
+    target = get_profile_dir(canon)
+    if not target.is_dir():
+        raise DistributionError(f"Profile '{canon}' does not exist.")
+
+    existing_manifest = read_manifest(target)
+    if existing_manifest is None:
+        raise DistributionError(
+            f"Profile '{canon}' is not a distribution (no {MANIFEST_FILENAME}). "
+            "Only profiles installed via `hermes profile install` can be updated."
+        )
+    if not existing_manifest.source:
+        raise DistributionError(
+            f"Profile '{canon}' has no recorded source.  Re-install with "
+            "`hermes profile install <source> --name {canon} --force`."
+        )
+
+    with tempfile.TemporaryDirectory(prefix="hermes_dist_update_") as tmp:
+        plan = plan_install(
+            existing_manifest.source,
+            Path(tmp),
+            override_name=canon,
+        )
+        plan.preserves_config = not force_config
+
+        _copy_dist_payload(
+            plan.staged_dir,
+            plan.target_dir,
+            plan.manifest,
+            preserve_config=plan.preserves_config,
+        )
+        return plan
+
+
+# ---------------------------------------------------------------------------
+# Info — render a manifest summary
+# ---------------------------------------------------------------------------
+
+
+def describe_distribution(profile_name: str) -> Dict[str, Any]:
+    """Return a structured view of a profile's distribution metadata.
+
+    Returns an empty dict if the profile exists but has no manifest.
+    Raises DistributionError if the profile itself doesn't exist.
+    """
+    from hermes_cli.profiles import (
+        get_profile_dir,
+        normalize_profile_name,
+        validate_profile_name,
+    )
+
+    canon = normalize_profile_name(profile_name)
+    validate_profile_name(canon)
+    target = get_profile_dir(canon)
+    if not target.is_dir():
+        raise DistributionError(f"Profile '{canon}' does not exist.")
+    manifest = read_manifest(target)
+    if manifest is None:
+        return {}
+    return manifest.to_dict()
--- a/hermes_cli/profiles.py
+++ b/hermes_cli/profiles.py
@ -64,32 +64,99 @@ _CLONE_SUBDIR_FILES = [
    "memories/USER.md",
 ]

-# Runtime files stripped after --clone-all (shouldn't carry over)
-_CLONE_ALL_STRIP = [
+# Runtime files stripped after --clone-all (shouldn't carry over).
+# Kept as a post-copy step rather than in the ignore filter because they
+# are created dynamically during normal use and may be absent at copy time.
+_CLONE_ALL_STRIP: list[str] = [
    "gateway.pid",
    "gateway_state.json",
    "processes.json",
 ]

+# Infrastructure artifacts excluded from --clone-all when the source is the
+# default profile (``~/.hermes``).  Named profiles never contain these
+# directories at root, so the exclusion is gated to avoid silently dropping
+# user data from a named-profile source.
+#
+# Rationale per item:
+#   hermes-agent  — git repo checkout (~84 MB source + ~3 GB venv)
+#   .worktrees    — git worktrees
+#   profiles      — sibling named profiles (recursive copy never intended)
+#   bin           — installed binaries (tirith etc., ~10 MB) shared per-host
+#   node_modules  — npm packages (hundreds of MB)
+#
+# See ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` below for the broader export-side
+# exclusion list (export drops state.db / logs / caches too because the
+# archive is a portable snapshot; clone-all keeps those because the cloned
+# profile is meant to keep working immediately).
+_CLONE_ALL_DEFAULT_EXCLUDE_ROOT: frozenset[str] = frozenset({
+    "hermes-agent",
+    ".worktrees",
+    "profiles",
+    "bin",
+    "node_modules",
+})
+
+# Marker file written by `hermes profile create --no-skills`.  When present in
+# a profile's root, callers of seed_profile_skills() (fresh-create, `hermes
+# update`'s all-profile sync, the web dashboard) skip bundled-skill seeding
+# for that profile.  The user can still install skills manually via
+# `hermes skills install` or drop SKILL.md files into the profile's skills/.
+# Delete the marker file to opt back in.
+NO_BUNDLED_SKILLS_MARKER = ".no-bundled-skills"
+
+
+def has_bundled_skills_opt_out(profile_dir: Path) -> bool:
+    """Return True if the profile opted out of bundled-skill seeding."""
+    try:
+        return (profile_dir / NO_BUNDLED_SKILLS_MARKER).exists()
+    except OSError:
+        return False
+

 def _clone_all_copytree_ignore(source_dir: Path):
-    """Ignore ``profiles/`` at the root of *source_dir* only.
+    """Exclude infrastructure artifacts when cloning a profile via --clone-all.

-    ``~/.hermes`` contains ``profiles/<name>/`` for sibling named profiles.
-    ``shutil.copytree`` would otherwise duplicate that entire tree inside the
-    new profile (recursive ``.../profiles/.../profiles/...``). Export already
-    excludes ``profiles`` via ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` — match that
-    behavior for ``--clone-all``.
+    Two categories:
+      1. Root-level entries in ``_CLONE_ALL_DEFAULT_EXCLUDE_ROOT`` — known
+         Hermes infrastructure directories that only the default profile
+         (``~/.hermes``) ever contains.  Gated on ``source_dir`` actually
+         being the default profile so a named-profile source never has its
+         own data silently dropped.
+      2. Universal exclusions at any depth — Python bytecode caches that
+         are stale or regenerable (``__pycache__``, ``*.pyc``, ``*.pyo``)
+         and runtime sockets / temp files (``*.sock``, ``*.tmp``).
+
+    The export-side ignore (``_default_export_ignore``) uses the same
+    two-tier pattern with the broader ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` set
+    because the export archive is a portable snapshot rather than a live
+    clone.
    """
    source_resolved = source_dir.resolve()
+    is_default_source = source_resolved == _get_default_hermes_home().resolve()

    def _ignore(directory: str, names: List[str]) -> List[str]:
-        try:
-            if Path(directory).resolve() == source_resolved:
-                return [n for n in names if n == "profiles"]
-        except (OSError, ValueError):
-            pass
-        return []
+        ignored: list[str] = []
+        for entry in names:
+            # Universal exclusions at any depth.
+            if (
+                entry == "__pycache__"
+                or entry.endswith((".pyc", ".pyo", ".sock", ".tmp"))
+            ):
+                ignored.append(entry)
+                continue
+            # Root-level exclusions only apply when cloning the default profile.
+            if is_default_source:
+                try:
+                    if Path(directory).resolve() == source_resolved:
+                        if entry in _CLONE_ALL_DEFAULT_EXCLUDE_ROOT:
+                            ignored.append(entry)
+                except (OSError, ValueError):
+                    # ``resolve()`` can fail on unusual FS layouts (broken
+                    # symlinks, missing parents).  Fail open — better to
+                    # over-copy than silently drop user data.
+                    pass
+        return ignored

    return _ignore

@ -179,8 +246,39 @@ def _get_wrapper_dir() -> Path:
 # Validation
 # ---------------------------------------------------------------------------

+def normalize_profile_name(name: str) -> str:
+    """Return the canonical profile id used on disk and in CLI ``-p`` argv.
+
+    Named profiles are stored lowercase under ``profiles/<id>/``. The special
+    alias ``default`` is matched case-insensitively (``Default`` → ``default``).
+    Dashboards and tools may pass title-cased display labels; normalize before
+    validation, assignment, and subprocess spawn (see issue #18498).
+    """
+    if not isinstance(name, str):
+        name = str(name)
+    stripped = name.strip()
+    if not stripped:
+        raise ValueError("profile name cannot be empty")
+    if stripped.casefold() == "default":
+        return "default"
+    return stripped.lower()
+
+
 def validate_profile_name(name: str) -> None:
-    """Raise ``ValueError`` if *name* is not a valid profile identifier."""
+    """Raise ``ValueError`` if *name* is not a valid profile identifier.
+
+    Validates the input as-given — strict lowercase match. Callers that accept
+    mixed-case or title-cased input from users (dashboard UI, CLI args) should
+    call :func:`normalize_profile_name` first. This separation keeps validate
+    honest about what the on-disk directory name must look like, while
+    ingress-point normalization handles UX flexibility (see #18498).
+
+    Also rejects names in :data:`_RESERVED_NAMES` (``hermes``, ``test``,
+    ``tmp``, ``root``, ``sudo``) that would create confusing on-disk
+    collisions (a ``hermes`` profile inside ``~/.hermes/``) or get refused
+    at alias-creation time anyway. ``default`` is a special pass-through —
+    it's a valid alias for the built-in root profile.
+    """
    if name == "default":
        return  # special alias for ~/.hermes
    if not _PROFILE_ID_RE.match(name):
@ -188,20 +286,28 @@ def validate_profile_name(name: str) -> None:
            f"Invalid profile name {name!r}. Must match "
            f"[a-z0-9][a-z0-9_-]{{0,63}}"
        )
+    if name in _RESERVED_NAMES:
+        raise ValueError(
+            f"Profile name {name!r} is reserved — it collides with either "
+            f"the Hermes installation itself or a common system binary.  "
+            f"Pick a different name."
+        )


 def get_profile_dir(name: str) -> Path:
    """Resolve a profile name to its HERMES_HOME directory."""
-    if name == "default":
+    canon = normalize_profile_name(name)
+    if canon == "default":
        return _get_default_hermes_home()
-    return _get_profiles_root() / name
+    return _get_profiles_root() / canon


 def profile_exists(name: str) -> bool:
    """Check whether a profile directory exists."""
-    if name == "default":
+    canon = normalize_profile_name(name)
+    if canon == "default":
        return True
-    return get_profile_dir(name).is_dir()
+    return get_profile_dir(canon).is_dir()


 # ---------------------------------------------------------------------------
@ -213,28 +319,29 @@ def check_alias_collision(name: str) -> Optional[str]:

    Checks: reserved names, hermes subcommands, existing binaries in PATH.
    """
-    if name in _RESERVED_NAMES:
-        return f"'{name}' is a reserved name"
-    if name in _HERMES_SUBCOMMANDS:
-        return f"'{name}' conflicts with a hermes subcommand"
+    canon = normalize_profile_name(name)
+    if canon in _RESERVED_NAMES:
+        return f"'{canon}' is a reserved name"
+    if canon in _HERMES_SUBCOMMANDS:
+        return f"'{canon}' conflicts with a hermes subcommand"

    # Check existing commands in PATH
    wrapper_dir = _get_wrapper_dir()
    try:
        result = subprocess.run(
-            ["which", name], capture_output=True, text=True, timeout=5,
+            ["which", canon], capture_output=True, text=True, timeout=5,
        )
        if result.returncode == 0:
            existing_path = result.stdout.strip()
            # Allow overwriting our own wrappers
-            if existing_path == str(wrapper_dir / name):
+            if existing_path == str(wrapper_dir / canon):
                try:
-                    content = (wrapper_dir / name).read_text()
+                    content = (wrapper_dir / canon).read_text()
                    if "hermes -p" in content:
                        return None  # it's our wrapper, safe to overwrite
                except Exception:
                    pass
-            return f"'{name}' conflicts with an existing command ({existing_path})"
+            return f"'{canon}' conflicts with an existing command ({existing_path})"
    except (FileNotFoundError, subprocess.TimeoutExpired):
        pass

@ -252,6 +359,7 @@ def create_wrapper_script(name: str) -> Optional[Path]:

    Returns the path to the created wrapper, or None if creation failed.
    """
+    canon = normalize_profile_name(name)
    wrapper_dir = _get_wrapper_dir()
    try:
        wrapper_dir.mkdir(parents=True, exist_ok=True)
@ -259,9 +367,9 @@ def create_wrapper_script(name: str) -> Optional[Path]:
        print(f"⚠ Could not create {wrapper_dir}: {e}")
        return None

-    wrapper_path = wrapper_dir / name
+    wrapper_path = wrapper_dir / canon
    try:
-        wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {name} "$@"\n')
+        wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {canon} "$@"\n')
        wrapper_path.chmod(wrapper_path.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
        return wrapper_path
    except OSError as e:
@ -271,7 +379,7 @@ def create_wrapper_script(name: str) -> Optional[Path]:

 def remove_wrapper_script(name: str) -> bool:
    """Remove the wrapper script for a profile. Returns True if removed."""
-    wrapper_path = _get_wrapper_dir() / name
+    wrapper_path = _get_wrapper_dir() / normalize_profile_name(name)
    if wrapper_path.exists():
        try:
            # Verify it's our wrapper before removing
@ -300,6 +408,35 @@ class ProfileInfo:
    has_env: bool = False
    skill_count: int = 0
    alias_path: Optional[Path] = None
+    # Distribution metadata (None if the profile wasn't installed from a distribution).
+    distribution_name: Optional[str] = None
+    distribution_version: Optional[str] = None
+    distribution_source: Optional[str] = None
+
+
+def _read_distribution_meta(profile_dir: Path) -> tuple:
+    """Return ``(name, version, source)`` from the profile's ``distribution.yaml``
+    if present; ``(None, None, None)`` otherwise.
+
+    Failures (missing file, bad YAML) are swallowed — a bad manifest should
+    never break ``hermes profile list`` for an unrelated profile.
+    """
+    mf_path = profile_dir / "distribution.yaml"
+    if not mf_path.is_file():
+        return None, None, None
+    try:
+        import yaml
+        with open(mf_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f) or {}
+        if not isinstance(data, dict):
+            return None, None, None
+        return (
+            data.get("name"),
+            data.get("version"),
+            data.get("source"),
+        )
+    except Exception:
+        return None, None, None


 def _read_config_model(profile_dir: Path) -> tuple:
@ -309,7 +446,7 @@ def _read_config_model(profile_dir: Path) -> tuple:
        return None, None
    try:
        import yaml
-        with open(config_path, "r") as f:
+        with open(config_path, "r", encoding="utf-8") as f:
            cfg = yaml.safe_load(f) or {}
        model_cfg = cfg.get("model", {})
        if isinstance(model_cfg, str):
@ -355,6 +492,7 @@ def list_profiles() -> List[ProfileInfo]:
    default_home = _get_default_hermes_home()
    if default_home.is_dir():
        model, provider = _read_config_model(default_home)
+        dist_name, dist_version, dist_source = _read_distribution_meta(default_home)
        profiles.append(ProfileInfo(
            name="default",
            path=default_home,
@ -364,6 +502,9 @@ def list_profiles() -> List[ProfileInfo]:
            provider=provider,
            has_env=(default_home / ".env").exists(),
            skill_count=_count_skills(default_home),
+            distribution_name=dist_name,
+            distribution_version=dist_version,
+            distribution_source=dist_source,
        ))

    # Named profiles
@ -377,6 +518,7 @@ def list_profiles() -> List[ProfileInfo]:
                continue
            model, provider = _read_config_model(entry)
            alias_path = wrapper_dir / name
+            dist_name, dist_version, dist_source = _read_distribution_meta(entry)
            profiles.append(ProfileInfo(
                name=name,
                path=entry,
@ -387,6 +529,9 @@ def list_profiles() -> List[ProfileInfo]:
                has_env=(entry / ".env").exists(),
                skill_count=_count_skills(entry),
                alias_path=alias_path if alias_path.exists() else None,
+                distribution_name=dist_name,
+                distribution_version=dist_version,
+                distribution_source=dist_source,
            ))

    return profiles
@ -398,6 +543,7 @@ def create_profile(
    clone_all: bool = False,
    clone_config: bool = False,
    no_alias: bool = False,
+    no_skills: bool = False,
 ) -> Path:
    """Create a new profile directory.

@ -415,22 +561,33 @@ def create_profile(
        skills, and selected profile identity files from the source profile.
    no_alias:
        If True, skip wrapper script creation.
+    no_skills:
+        If True, create an empty profile with no bundled skills, and write
+        a marker file so ``hermes update`` skips re-seeding this profile's
+        skills. Mutually exclusive with ``clone_config``/``clone_all`` (those
+        explicitly copy skills from the source).

    Returns
    -------
    Path
        The newly created profile directory.
    """
-    validate_profile_name(name)
+    if no_skills and (clone_config or clone_all):
+        raise ValueError(
+            "--no-skills is mutually exclusive with --clone / --clone-all "
+            "(cloning explicitly copies skills from the source profile)."
+        )
+    canon = normalize_profile_name(name)
+    validate_profile_name(canon)

-    if name == "default":
+    if canon == "default":
        raise ValueError(
            "Cannot create a profile named 'default' — it is the built-in profile (~/.hermes)."
        )

-    profile_dir = get_profile_dir(name)
+    profile_dir = get_profile_dir(canon)
    if profile_dir.exists():
-        raise FileExistsError(f"Profile '{name}' already exists at {profile_dir}")
+        raise FileExistsError(f"Profile '{canon}' already exists at {profile_dir}")

    # Resolve clone source
    source_dir = None
@ -440,6 +597,7 @@ def create_profile(
            from hermes_constants import get_hermes_home
            source_dir = get_hermes_home()
        else:
+            clone_from = normalize_profile_name(clone_from)
            validate_profile_name(clone_from)
            source_dir = get_profile_dir(clone_from)
        if not source_dir.is_dir():
@ -496,6 +654,19 @@ def create_profile(
        except Exception:
            pass  # best-effort — don't fail profile creation over this

+    # Write the opt-out marker so seed_profile_skills() and `hermes update`'s
+    # all-profile sync loop both skip this profile for bundled-skill seeding.
+    if no_skills:
+        try:
+            (profile_dir / NO_BUNDLED_SKILLS_MARKER).write_text(
+                "This profile opted out of bundled-skill seeding "
+                "(`hermes profile create --no-skills`).\n"
+                "Delete this file to re-enable sync on the next `hermes update`.\n",
+                encoding="utf-8",
+            )
+        except OSError:
+            pass  # best-effort — the feature still works via the empty skills/ dir
+
    return profile_dir


@ -504,7 +675,19 @@ def seed_profile_skills(profile_dir: Path, quiet: bool = False) -> Optional[dict

    Uses subprocess because sync_skills() caches HERMES_HOME at module level.
    Returns the sync result dict, or None on failure.
+
+    Profiles that opted out of bundled skills (via ``hermes profile create
+    --no-skills`` — which writes ``.no-bundled-skills`` to the profile root)
+    are skipped and get an empty-result dict so callers can report
+    "opted out" instead of "failed".
    """
+    if has_bundled_skills_opt_out(profile_dir):
+        return {
+            "copied": [],
+            "updated": [],
+            "user_modified": [],
+            "skipped_opt_out": True,
+        }
    project_root = Path(__file__).parent.parent.resolve()
    try:
        result = subprocess.run(
@ -540,36 +723,42 @@ def delete_profile(name: str, yes: bool = False) -> Path:

    Returns the path that was removed.
    """
-    validate_profile_name(name)
+    canon = normalize_profile_name(name)
+    validate_profile_name(canon)

-    if name == "default":
+    if canon == "default":
        raise ValueError(
            "Cannot delete the default profile (~/.hermes).\n"
            "To remove everything, use: hermes uninstall"
        )

-    profile_dir = get_profile_dir(name)
+    profile_dir = get_profile_dir(canon)
    if not profile_dir.is_dir():
-        raise FileNotFoundError(f"Profile '{name}' does not exist.")
+        raise FileNotFoundError(f"Profile '{canon}' does not exist.")

    # Show what will be deleted
    model, provider = _read_config_model(profile_dir)
    gw_running = _check_gateway_running(profile_dir)
    skill_count = _count_skills(profile_dir)
+    dist_name, dist_version, dist_source = _read_distribution_meta(profile_dir)

-    print(f"\nProfile: {name}")
+    print(f"\nProfile: {canon}")
    print(f"Path:    {profile_dir}")
    if model:
        print(f"Model:   {model}" + (f" ({provider})" if provider else ""))
    if skill_count:
        print(f"Skills:  {skill_count}")
+    if dist_name:
+        print(f"Distribution: {dist_name}@{dist_version or '?'}")
+        if dist_source:
+            print(f"Installed from: {dist_source}")

    items = [
        "All config, API keys, memories, sessions, skills, cron jobs",
    ]

    # Check for service
-    wrapper_path = _get_wrapper_dir() / name
+    wrapper_path = _get_wrapper_dir() / canon
    has_wrapper = wrapper_path.exists()
    if has_wrapper:
        items.append(f"Command alias ({wrapper_path})")
@ -584,16 +773,16 @@ def delete_profile(name: str, yes: bool = False) -> Path:
    if not yes:
        print()
        try:
-            confirm = input(f"Type '{name}' to confirm: ").strip()
+            confirm = input(f"Type '{canon}' to confirm: ").strip()
        except (KeyboardInterrupt, EOFError):
            print("\nCancelled.")
            return profile_dir
-        if confirm != name:
+        if confirm != canon:
            print("Cancelled.")
            return profile_dir

    # 1. Disable service (prevents auto-restart)
-    _cleanup_gateway_service(name, profile_dir)
+    _cleanup_gateway_service(canon, profile_dir)

    # 2. Stop running gateway
    if gw_running:
@ -601,7 +790,7 @@ def delete_profile(name: str, yes: bool = False) -> Path:

    # 3. Remove wrapper script
    if has_wrapper:
-        if remove_wrapper_script(name):
+        if remove_wrapper_script(canon):
            print(f"✓ Removed {wrapper_path}")

    # 4. Remove profile directory
@ -614,13 +803,13 @@ def delete_profile(name: str, yes: bool = False) -> Path:
    # 5. Clear active_profile if it pointed to this profile
    try:
        active = get_active_profile()
-        if active == name:
+        if active == canon:
            set_active_profile("default")
            print("✓ Active profile reset to default")
    except Exception:
        pass

-    print(f"\nProfile '{name}' deleted.")
+    print(f"\nProfile '{canon}' deleted.")
    return profile_dir


@ -674,7 +863,6 @@ def _cleanup_gateway_service(name: str, profile_dir: Path) -> None:

 def _stop_gateway_process(profile_dir: Path) -> None:
    """Stop a running gateway process via its PID file."""
-    import signal as _signal
    import time as _time

    pid_file = profile_dir / "gateway.pid"
@ -685,19 +873,25 @@ def _stop_gateway_process(profile_dir: Path) -> None:
        raw = pid_file.read_text().strip()
        data = json.loads(raw) if raw.startswith("{") else {"pid": int(raw)}
        pid = int(data["pid"])
-        os.kill(pid, _signal.SIGTERM)
-        # Wait up to 10s for graceful shutdown
+        # Route through terminate_pid so Windows uses the appropriate
+        # primitive (taskkill / TerminateProcess) — raw os.kill with
+        # _signal.SIGKILL raises AttributeError at import time on Windows,
+        # and raw os.kill with SIGTERM doesn't cascade to child processes
+        # the same way taskkill /T does.
+        from gateway.status import terminate_pid as _terminate_pid
+        from gateway.status import _pid_exists
+        _terminate_pid(pid)  # graceful first
+        # Wait up to 10s for graceful shutdown. On Windows, os.kill(pid, 0)
+        # is NOT a no-op — use the handle-based existence check.
        for _ in range(20):
            _time.sleep(0.5)
-            try:
-                os.kill(pid, 0)
-            except ProcessLookupError:
+            if not _pid_exists(pid):
                print(f"✓ Gateway stopped (PID {pid})")
                return
        # Force kill
        try:
-            os.kill(pid, _signal.SIGKILL)
-        except ProcessLookupError:
+            _terminate_pid(pid, force=True)
+        except (ProcessLookupError, OSError):
            pass
        print(f"✓ Gateway force-stopped (PID {pid})")
    except (ProcessLookupError, PermissionError):
@ -730,22 +924,23 @@ def set_active_profile(name: str) -> None:

    Writes to ``~/.hermes/active_profile``. Use ``"default"`` to clear.
    """
-    validate_profile_name(name)
-    if name != "default" and not profile_exists(name):
+    canon = normalize_profile_name(name)
+    validate_profile_name(canon)
+    if canon != "default" and not profile_exists(canon):
        raise FileNotFoundError(
-            f"Profile '{name}' does not exist. "
-            f"Create it with: hermes profile create {name}"
+            f"Profile '{canon}' does not exist. "
+            f"Create it with: hermes profile create {canon}"
        )

    path = _get_active_profile_path()
    path.parent.mkdir(parents=True, exist_ok=True)
-    if name == "default":
+    if canon == "default":
        # Remove the file to indicate default
        path.unlink(missing_ok=True)
    else:
        # Atomic write
        tmp = path.with_suffix(".tmp")
-        tmp.write_text(name + "\n")
+        tmp.write_text(canon + "\n")
        tmp.replace(path)


@ -794,7 +989,7 @@ def _default_export_ignore(root_dir: Path):
            if entry == "__pycache__" or entry.endswith((".sock", ".tmp")):
                ignored.add(entry)
            # npm lockfiles can appear at root
-            elif entry in ("package.json", "package-lock.json"):
+            elif entry in {"package.json", "package-lock.json"}:
                ignored.add(entry)
        # Root-level exclusions
        if Path(directory) == root_dir:
@ -811,16 +1006,17 @@ def export_profile(name: str, output_path: str) -> Path:
    """
    import tempfile

-    validate_profile_name(name)
-    profile_dir = get_profile_dir(name)
+    canon = normalize_profile_name(name)
+    validate_profile_name(canon)
+    profile_dir = get_profile_dir(canon)
    if not profile_dir.is_dir():
-        raise FileNotFoundError(f"Profile '{name}' does not exist.")
+        raise FileNotFoundError(f"Profile '{canon}' does not exist.")

    output = Path(output_path)
    # shutil.make_archive wants the base name without extension
    base = str(output).removesuffix(".tar.gz").removesuffix(".tgz")

-    if name == "default":
+    if canon == "default":
        # The default profile IS ~/.hermes itself — its parent is ~/ and its
        # directory name is ".hermes", not "default".  We stage a clean copy
        # under a temp dir so the archive contains ``default/...``.
@ -836,14 +1032,14 @@ def export_profile(name: str, output_path: str) -> Path:

    # Named profiles — stage a filtered copy to exclude credentials
    with tempfile.TemporaryDirectory() as tmpdir:
-        staged = Path(tmpdir) / name
+        staged = Path(tmpdir) / canon
        _CREDENTIAL_FILES = {"auth.json", ".env"}
        shutil.copytree(
            profile_dir,
            staged,
            ignore=lambda d, contents: _CREDENTIAL_FILES & set(contents),
        )
-        result = shutil.make_archive(base, "gztar", tmpdir, name)
+        result = shutil.make_archive(base, "gztar", tmpdir, canon)
        return Path(result)


@ -861,7 +1057,7 @@ def _normalize_profile_archive_parts(member_name: str) -> List[str]:
    ):
        raise ValueError(f"Unsafe archive member path: {member_name}")

-    parts = [part for part in posix_path.parts if part not in ("", ".")]
+    parts = [part for part in posix_path.parts if part not in {"", "."}]
    if not parts or any(part == ".." for part in parts):
        raise ValueError(f"Unsafe archive member path: {member_name}")
    return parts
@ -952,16 +1148,17 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
    # Archives exported from the default profile have "default/" as top-level
    # dir.  Importing as "default" would target ~/.hermes itself — disallow
    # that and guide the user toward a named profile.
-    if inferred_name == "default":
+    canon = normalize_profile_name(inferred_name)
+    validate_profile_name(canon)
+    if canon == "default":
        raise ValueError(
            "Cannot import as 'default' — that is the built-in root profile (~/.hermes). "
            "Specify a different name: hermes profile import <archive> --name <name>"
        )

-    validate_profile_name(inferred_name)
-    profile_dir = get_profile_dir(inferred_name)
+    profile_dir = get_profile_dir(canon)
    if profile_dir.exists():
-        raise FileExistsError(f"Profile '{inferred_name}' already exists at {profile_dir}")
+        raise FileExistsError(f"Profile '{canon}' already exists at {profile_dir}")

    profiles_root = _get_profiles_root()
    profiles_root.mkdir(parents=True, exist_ok=True)
@ -977,8 +1174,8 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
            )

        final_source = extracted
-        if archive_root != inferred_name:
-            final_source = staging_root / inferred_name
+        if archive_root != canon:
+            final_source = staging_root / canon
            extracted.rename(final_source)

        shutil.move(str(final_source), str(profile_dir))
@ -1048,25 +1245,27 @@ def rename_profile(old_name: str, new_name: str) -> Path:

    Returns the new profile directory.
    """
-    validate_profile_name(old_name)
-    validate_profile_name(new_name)
+    old_canon = normalize_profile_name(old_name)
+    new_canon = normalize_profile_name(new_name)
+    validate_profile_name(old_canon)
+    validate_profile_name(new_canon)

-    if old_name == "default":
+    if old_canon == "default":
        raise ValueError("Cannot rename the default profile.")
-    if new_name == "default":
+    if new_canon == "default":
        raise ValueError("Cannot rename to 'default' — it is reserved.")

-    old_dir = get_profile_dir(old_name)
-    new_dir = get_profile_dir(new_name)
+    old_dir = get_profile_dir(old_canon)
+    new_dir = get_profile_dir(new_canon)

    if not old_dir.is_dir():
-        raise FileNotFoundError(f"Profile '{old_name}' does not exist.")
+        raise FileNotFoundError(f"Profile '{old_canon}' does not exist.")
    if new_dir.exists():
-        raise FileExistsError(f"Profile '{new_name}' already exists.")
+        raise FileExistsError(f"Profile '{new_canon}' already exists.")

    # 1. Stop gateway if running
    if _check_gateway_running(old_dir):
-        _cleanup_gateway_service(old_name, old_dir)
+        _cleanup_gateway_service(old_canon, old_dir)
        _stop_gateway_process(old_dir)

    # 2. Rename directory
@ -1074,22 +1273,22 @@ def rename_profile(old_name: str, new_name: str) -> Path:
    print(f"✓ Renamed {old_dir.name} → {new_dir.name}")

    # 3. Update profile-scoped Honcho host blocks, preserving aiPeer identity
-    _migrate_honcho_profile_host(old_name, new_name, new_dir)
+    _migrate_honcho_profile_host(old_canon, new_canon, new_dir)

    # 4. Update wrapper script
-    remove_wrapper_script(old_name)
-    collision = check_alias_collision(new_name)
+    remove_wrapper_script(old_canon)
+    collision = check_alias_collision(new_canon)
    if not collision:
-        create_wrapper_script(new_name)
-        print(f"✓ Alias updated: {new_name}")
+        create_wrapper_script(new_canon)
+        print(f"✓ Alias updated: {new_canon}")
    else:
-        print(f"⚠ Cannot create alias '{new_name}' — {collision}")
+        print(f"⚠ Cannot create alias '{new_canon}' — {collision}")

    # 5. Update active_profile if it pointed to old name
    try:
-        if get_active_profile() == old_name:
-            set_active_profile(new_name)
-            print(f"✓ Active profile updated: {new_name}")
+        if get_active_profile() == old_canon:
+            set_active_profile(new_canon)
+            print(f"✓ Active profile updated: {new_canon}")
    except Exception:
        pass

@ -1191,13 +1390,14 @@ def resolve_profile_env(profile_name: str) -> str:
    Called early in the CLI entry point, before any hermes modules
    are imported, to set the HERMES_HOME environment variable.
    """
-    validate_profile_name(profile_name)
-    profile_dir = get_profile_dir(profile_name)
+    canon = normalize_profile_name(profile_name)
+    validate_profile_name(canon)
+    profile_dir = get_profile_dir(canon)

-    if profile_name != "default" and not profile_dir.is_dir():
+    if canon != "default" and not profile_dir.is_dir():
        raise FileNotFoundError(
-            f"Profile '{profile_name}' does not exist. "
-            f"Create it with: hermes profile create {profile_name}"
+            f"Profile '{canon}' does not exist. "
+            f"Create it with: hermes profile create {canon}"
        )

    return str(profile_dir)
--- a/hermes_cli/pt_input_extras.py
+++ b/hermes_cli/pt_input_extras.py
@ -0,0 +1,83 @@
+"""Augmentations to prompt_toolkit's input-parsing tables.
+
+Imported once at CLI startup. Each helper installs a small mapping into
+prompt_toolkit's `ANSI_SEQUENCES` so byte sequences emitted by modern
+keyboard protocols (Kitty / xterm `modifyOtherKeys`) decode to existing
+key tuples Hermes already binds.
+
+Kept in a standalone module — separate from `cli.py` — so the registrations
+can be unit-tested without importing the whole CLI runtime.
+"""
+
+from __future__ import annotations
+
+
+def install_shift_enter_alias() -> int:
+    """Map Shift+Enter byte sequences to the (Escape, ControlM) key tuple
+    that Alt+Enter produces, so the existing Alt+Enter newline handler
+    fires for terminals that emit a distinct Shift+Enter.
+
+    Sequences mapped:
+      - "\\x1b[13;2u"     — Kitty keyboard protocol / CSI-u, modifier=2 (Shift)
+      - "\\x1b[27;2;13~"  — xterm modifyOtherKeys=2, modifier=2 (Shift)
+      - "\\x1b[27;2;13u"  — alternate ordering some emitters use
+
+    The CSI-u sequence is not in stock prompt_toolkit. The modifyOtherKeys
+    variant `\\x1b[27;2;13~` IS in stock prompt_toolkit but mapped to plain
+    `Keys.ControlM` — i.e. Shift+Enter behaves identically to Enter, which
+    is the very bug this helper exists to fix. We therefore overwrite
+    those two specific keys (and `\\x1b[27;2;13u`) unconditionally; other
+    `\\x1b[27;...;13~` sequences (Ctrl+Enter, Alt+Enter via modifyOtherKeys
+    variants 5/6/etc.) are left untouched.
+
+    Default macOS Terminal and stock Windows Terminal still send the same
+    byte for Enter and Shift+Enter, so there is no fix for those terminals
+    at the application layer — the sequences above never reach Hermes.
+
+    Returns the number of sequences whose mapping was changed.
+    """
+    try:
+        from prompt_toolkit.input.ansi_escape_sequences import ANSI_SEQUENCES
+        from prompt_toolkit.keys import Keys
+    except Exception:
+        return 0
+
+    alt_enter = (Keys.Escape, Keys.ControlM)
+    changed = 0
+    for seq in ("\x1b[13;2u", "\x1b[27;2;13~", "\x1b[27;2;13u"):
+        if ANSI_SEQUENCES.get(seq) != alt_enter:
+            ANSI_SEQUENCES[seq] = alt_enter
+            changed += 1
+    return changed
+
+
+def install_ctrl_enter_alias() -> int:
+    """Map Ctrl+Enter byte sequences to the (Escape, ControlM) key tuple
+    that Alt+Enter produces, so the existing Alt+Enter newline handler
+    fires for terminals that emit a distinct Ctrl+Enter.
+
+    Sequences mapped:
+      - "\\x1b[13;5u"     — Kitty keyboard protocol / CSI-u, modifier=5 (Ctrl)
+      - "\\x1b[27;5;13~"  — xterm modifyOtherKeys=2, modifier=5 (Ctrl)
+      - "\\x1b[27;5;13u"  — alternate ordering some emitters use
+
+    Stock prompt_toolkit doesn't map any of these. Without this alias,
+    Kitty/mintty/xterm-with-modifyOtherKeys users over SSH never get a
+    Ctrl+Enter newline — the keystroke arrives as a raw CSI sequence that
+    falls through to the default character-insert handler. See #22379.
+
+    Returns the number of sequences whose mapping was changed.
+    """
+    try:
+        from prompt_toolkit.input.ansi_escape_sequences import ANSI_SEQUENCES
+        from prompt_toolkit.keys import Keys
+    except Exception:
+        return 0
+
+    alt_enter = (Keys.Escape, Keys.ControlM)
+    changed = 0
+    for seq in ("\x1b[13;5u", "\x1b[27;5;13~", "\x1b[27;5;13u"):
+        if ANSI_SEQUENCES.get(seq) != alt_enter:
+            ANSI_SEQUENCES[seq] = alt_enter
+            changed += 1
+    return changed
--- a/hermes_cli/pty_bridge.py
+++ b/hermes_cli/pty_bridge.py
@ -7,11 +7,14 @@ keystrokes can be fed back in.  The only caller today is the

 Design constraints:

-* **POSIX-only.**  Hermes Agent supports Windows exclusively via WSL, which
-  exposes a native POSIX PTY via ``openpty(3)``.  Native Windows Python
-  has no PTY; :class:`PtyUnavailableError` is raised with a user-readable
-  install/platform message so the dashboard can render a banner instead of
-  crashing.
+* **POSIX-only.**  This module depends on ``fcntl``, ``termios``, and
+  ``ptyprocess``, none of which exist on native Windows Python.  Native
+  Windows ConPTY is a different API (Windows 10 build 17763+) and would
+  need a separate Windows implementation (``pywinpty``) — that's tracked
+  as a future enhancement.  On native Windows, importing this module
+  raises :class:`ImportError` and the dashboard's ``/chat`` tab shows a
+  WSL-recommended banner instead of crashing.  Every other feature in the
+  dashboard (sessions, jobs, metrics, config editor) works natively.
 * **Zero Node dependency on the server side.**  We use :mod:`ptyprocess`,
  which is a pure-Python wrapper around the OS calls.  The browser talks
  to the same ``hermes --tui`` binary it would launch from the CLI, so
@ -108,9 +111,14 @@ class PtyBridge:
                    "(or pip install -e '.[pty]')."
                )
            raise PtyUnavailableError("Pseudo-terminals are unavailable.")
-        # Let caller-supplied env fully override inheritance; if they pass
-        # None we inherit the server's env (same semantics as subprocess).
-        spawn_env = os.environ.copy() if env is None else env
+        # PTY-hosted programs expect TERM to describe the terminal type.
+        # CI often runs without TERM in the parent process, which makes
+        # simple terminal probes like `tput cols` fail before winsize reads.
+        # Preserve explicit caller overrides, but backfill a sensible default
+        # when TERM is missing or blank.
+        spawn_env = (os.environ.copy() if env is None else env.copy())
+        if not spawn_env.get("TERM"):
+            spawn_env["TERM"] = "xterm-256color"
        proc = ptyprocess.PtyProcess.spawn(  # type: ignore[union-attr]
            list(argv),
            cwd=cwd,
@ -156,7 +164,7 @@ class PtyBridge:
            data = os.read(self._fd, 65536)
        except OSError as exc:
            # EIO on Linux = slave side closed.  EBADF = already closed.
-            if exc.errno in (errno.EIO, errno.EBADF):
+            if exc.errno in {errno.EIO, errno.EBADF}:
                return None
            raise
        if not data:
@ -173,7 +181,7 @@ class PtyBridge:
            try:
                n = os.write(self._fd, view)
            except OSError as exc:
-                if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE):
+                if exc.errno in {errno.EIO, errno.EBADF, errno.EPIPE}:
                    return
                raise
            if n <= 0:
@ -205,7 +213,7 @@ class PtyBridge:

        # SIGHUP is the conventional "your terminal went away" signal.
        # We escalate if the child ignores it.
-        for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):
+        for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):  # windows-footgun: ok — POSIX-only module (imports fcntl/termios/ptyprocess at top)
            if not self._proc.isalive():
                break
            try:
--- a/hermes_cli/relaunch.py
+++ b/hermes_cli/relaunch.py
@ -84,18 +84,34 @@ def resolve_hermes_bin() -> Optional[str]:
      1. ``sys.argv[0]`` if it resolves to a real executable.
      2. ``shutil.which("hermes")`` on PATH.
      3. ``None`` → caller should fall back to ``python -m hermes_cli.main``.
+
+    Windows note: ``os.access(path, os.X_OK)`` returns True for ``.py`` and
+    ``.pyc`` files on Windows (the OS treats anything listed in PATHEXT as
+    executable, and Python files are often registered there).  But
+    ``subprocess.run([script.py, ...])`` can't actually execute a .py
+    directly — CreateProcessW needs a real .exe, not a script associated
+    with the Python launcher.  On Windows we therefore skip the argv[0]
+    fast-path when it points at a .py file and fall through to either
+    ``hermes.exe`` on PATH or the ``sys.executable -m hermes_cli.main``
+    fallback.
    """
    argv0 = sys.argv[0]
+    _is_windows = sys.platform == "win32"
+
+    def _is_python_script(p: str) -> bool:
+        return p.lower().endswith((".py", ".pyc"))

    # Absolute path to an executable (covers nix store, venv wrappers, etc.)
    if os.path.isabs(argv0) and os.path.isfile(argv0) and os.access(argv0, os.X_OK):
-        return argv0
+        if not (_is_windows and _is_python_script(argv0)):
+            return argv0

    # Relative path — resolve against CWD
    if not argv0.startswith("-") and os.path.isfile(argv0):
        abs_path = os.path.abspath(argv0)
        if os.access(abs_path, os.X_OK):
-            return abs_path
+            if not (_is_windows and _is_python_script(abs_path)):
+                return abs_path

    # PATH lookup
    path_bin = shutil.which("hermes")
@ -142,8 +158,48 @@ def relaunch(
    preserve_inherited: bool = True,
    original_argv: Optional[Sequence[str]] = None,
 ) -> None:
-    """Replace the current process with a fresh hermes invocation."""
+    """Replace the current process with a fresh hermes invocation.
+
+    On POSIX we use ``os.execvp`` which replaces the running process with
+    the new one in place — same PID, no double-fork.  That's what the
+    relaunch contract wants: "run hermes again as if the user had typed
+    the new argv".
+
+    Windows has no native exec semantics — ``os.execvp`` on Windows
+    *emulates* exec by spawning the child and exiting the parent, but
+    only works when the target is a real Win32 executable.  Our target
+    is usually ``hermes.exe`` (a Python console-script shim that wraps
+    ``python -m hermes_cli.main``) or a ``.cmd`` batch file, and both
+    raise ``OSError(8, "Exec format error")`` on Windows' execvp.
+
+    The Windows-correct pattern is: spawn the child with ``subprocess.run``
+    (which routes through ``cmd.exe`` via ``shell=False`` + PATHEXT resolution),
+    wait for it to exit, then propagate its exit code via ``sys.exit``.
+    That's functionally equivalent — the user sees "hermes exited, then
+    new hermes started" — just with two PIDs in play instead of one.
+    """
    new_argv = build_relaunch_argv(
        extra_args, preserve_inherited=preserve_inherited, original_argv=original_argv
    )
-    os.execvp(new_argv[0], new_argv)
+    if sys.platform == "win32":
+        # Windows: subprocess + exit, because execvp can't swap to .cmd/.exe shims.
+        import subprocess
+        try:
+            result = subprocess.run(new_argv)
+            sys.exit(result.returncode)
+        except KeyboardInterrupt:
+            sys.exit(130)
+        except OSError as exc:
+            # Surface a helpful error rather than the raw OSError — the
+            # caller used to see ``[Errno 8] Exec format error`` which is
+            # cryptic.  Common causes: ``hermes`` not on PATH yet (install
+            # hasn't propagated User PATH into this shell) or a stale shim.
+            print(
+                f"\nHermes relaunch failed: {exc}\n"
+                f"Command: {' '.join(new_argv)}\n"
+                f"Fix: open a new terminal so PATH picks up, then re-run hermes.",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+    else:
+        os.execvp(new_argv[0], new_argv)
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -260,7 +260,7 @@ def _resolve_runtime_from_pool_entry(
            if cfg_base_url:
                base_url = cfg_base_url
        configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-        if provider in ("opencode-zen", "opencode-go"):
+        if provider in {"opencode-zen", "opencode-go"}:
            # Re-derive api_mode from the effective model rather than the
            # persisted api_mode: the opencode providers serve both
            # anthropic_messages and chat_completions models, so the previous
@ -282,7 +282,7 @@ def _resolve_runtime_from_pool_entry(
    # Anthropic SDK prepends its own /v1/messages to the base_url.  Strip the
    # trailing /v1 so the SDK constructs the correct path (e.g.
    # https://opencode.ai/zen/go/v1/messages instead of .../v1/v1/messages).
-    if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
+    if api_mode == "anthropic_messages" and provider in {"opencode-zen", "opencode-go"}:
        base_url = re.sub(r"/v1/?$", "", base_url)

    return {
@ -319,9 +319,10 @@ def _try_resolve_from_custom_pool(
    base_url: str,
    provider_label: str,
    api_mode_override: Optional[str] = None,
+    provider_name: Optional[str] = None,
 ) -> Optional[Dict[str, Any]]:
    """Check if a credential pool exists for a custom endpoint and return a runtime dict if so."""
-    pool_key = get_custom_provider_pool_key(base_url)
+    pool_key = get_custom_provider_pool_key(base_url, provider_name=provider_name)
    if not pool_key:
        return None
    try:
@ -358,11 +359,20 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
        return None
    if not requested_norm.startswith("custom:"):
        try:
-            auth_mod.resolve_provider(requested_norm)
+            canonical = auth_mod.resolve_provider(requested_norm)
        except AuthError:
            pass
        else:
-            return None
+            # A user-declared ``custom_providers`` entry whose name matches
+            # only an *alias* (``kimi`` → built-in ``kimi-coding``) is the
+            # user's intended target — alias rewriting would otherwise hijack
+            # the request.  We only defer to the built-in when the raw name is
+            # the canonical provider itself (``nous``, ``openrouter``, …) so
+            # accidentally shadowing a canonical provider still resolves to
+            # the built-in. See tests/hermes_cli/test_runtime_provider_resolution.py
+            # ``test_named_custom_provider_does_not_shadow_builtin_provider``.
+            if (canonical or "").strip().lower() == requested_norm:
+                return None

    config = load_config()
    
@ -482,6 +492,13 @@ def _resolve_named_custom_runtime(
    requested_norm = (requested_provider or "").strip().lower()
    if requested_norm == "custom" and explicit_base_url:
        base_url = explicit_base_url.strip().rstrip("/")
+        # Check credential pool first — mirrors the named-custom-provider path
+        # so bare `provider: custom` with a configured custom_providers entry
+        # also gets its api_key from the pool instead of env var fallbacks.
+        pool_result = _try_resolve_from_custom_pool(base_url, "custom", None)
+        if pool_result:
+            pool_result["source"] = "direct-alias"
+            return pool_result
        api_key_candidates = [
            (explicit_api_key or "").strip(),
            os.getenv("OPENAI_API_KEY", "").strip(),
@ -512,7 +529,7 @@ def _resolve_named_custom_runtime(
        return None

    # Check if a credential pool exists for this custom endpoint
-    pool_result = _try_resolve_from_custom_pool(base_url, "custom", custom_provider.get("api_mode"))
+    pool_result = _try_resolve_from_custom_pool(base_url, "custom", custom_provider.get("api_mode"), provider_name=custom_provider.get("name"))
    if pool_result:
        # Propagate the model name even when using pooled credentials —
        # the pool doesn't know about the custom_providers model field.
@ -631,8 +648,11 @@ def _resolve_openrouter_runtime(

    # For custom endpoints, check if a credential pool exists
    if effective_provider == "custom" and base_url:
+        # Pass requested_provider so pool lookup prefers name match over base_url,
+        # fixing credential mix-ups when multiple custom providers share a base_url.
        pool_result = _try_resolve_from_custom_pool(
            base_url, effective_provider, _parse_api_mode(model_cfg.get("api_mode")),
+            provider_name=requested_provider if requested_norm != "custom" else None,
        )
        if pool_result:
            return pool_result
@ -839,7 +859,7 @@ def _resolve_explicit_runtime(

        base_url = explicit_base_url
        if not base_url:
-            if provider in ("kimi-coding", "kimi-coding-cn"):
+            if provider in {"kimi-coding", "kimi-coding-cn"}:
                creds = resolve_api_key_provider_credentials(provider)
                base_url = creds.get("base_url", "").rstrip("/")
            else:
@ -1203,7 +1223,7 @@ def resolve_runtime_provider(
        # trust boto3's credential chain — it handles IMDS, ECS task roles,
        # Lambda execution roles, SSO, and other implicit sources that our
        # env-var check can't detect.
-        is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon")
+        is_explicit = requested_provider in {"bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon"}
        if not is_explicit and not has_aws_credentials():
            raise AuthError(
                "No AWS credentials found for Bedrock. Configure one of:\n"
@ -1283,7 +1303,7 @@ def resolve_runtime_provider(
            configured_provider = str(model_cfg.get("provider") or "").strip().lower()
            # Only honor persisted api_mode when it belongs to the same provider family.
            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-            if provider in ("opencode-zen", "opencode-go"):
+            if provider in {"opencode-zen", "opencode-go"}:
                # opencode-zen/go must always re-derive api_mode from the
                # target model (not the stale persisted api_mode), because
                # the same provider serves both anthropic_messages
@ -1305,7 +1325,7 @@ def resolve_runtime_provider(
                if detected:
                    api_mode = detected
        # Strip trailing /v1 for OpenCode Anthropic models (see comment above).
-        if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
+        if api_mode == "anthropic_messages" and provider in {"opencode-zen", "opencode-go"}:
            base_url = re.sub(r"/v1/?$", "", base_url)
        return {
            "provider": provider,
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -15,6 +15,7 @@ import importlib.util
 import json
 import logging
 import os
+import re
 import shutil
 import sys
 import copy
@ -88,7 +89,6 @@ _DEFAULT_PROVIDER_MODELS = {
        "claude-sonnet-4.5",
        "claude-haiku-4.5",
        "gemini-2.5-pro",
-        "grok-code-fast-1",
    ],
    "gemini": [
        "gemini-3.1-pro-preview", "gemini-3-pro-preview",
@ -208,12 +208,23 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:
        else:
            value = input(color(display, Colors.YELLOW))

-        return value.strip() or default or ""
+        cleaned = _sanitize_pasted_input(value)
+        return cleaned.strip() or default or ""
    except (KeyboardInterrupt, EOFError):
        print()
        sys.exit(1)


+_BRACKETED_PASTE_PATTERN = re.compile(r"\x1b\[\s*200~|\x1b\[\s*201~")
+
+
+def _sanitize_pasted_input(value: str) -> str:
+    """Strip terminal bracketed-paste control markers from pasted text."""
+    if not isinstance(value, str) or not value:
+        return value
+    return _BRACKETED_PASTE_PATTERN.sub("", value)
+
+
 def _curses_prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int:
    """Single-select menu using curses. Delegates to curses_radiolist."""
    from hermes_cli.curses_ui import curses_radiolist
@ -281,9 +292,9 @@ def prompt_yes_no(question: str, default: bool = True) -> bool:

        if not value:
            return default
-        if value in ("y", "yes"):
+        if value in {"y", "yes"}:
            return True
-        if value in ("n", "no"):
+        if value in {"n", "no"}:
            return False
        print_error("Please enter 'y' or 'n'")

@ -382,7 +393,7 @@ def _print_setup_summary(config: dict, hermes_home):
            label = f"Web Search & Extract ({subscription_features.web.current_provider})"
        tool_status.append((label, True, None))
    else:
-        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, or TAVILY_API_KEY"))
+        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, TAVILY_API_KEY, or SEARXNG_URL"))

    # Browser tools (local Chromium, Camofox, Browserbase, Browser Use, or Firecrawl)
    browser_provider = subscription_features.browser.current_provider
@ -630,7 +641,7 @@ def _prompt_container_resources(config: dict):
    persist_str = prompt(
        "  Persist filesystem across sessions? (yes/no)", persist_label
    )
-    terminal["container_persistent"] = persist_str.lower() in ("yes", "true", "y", "1")
+    terminal["container_persistent"] = persist_str.lower() in {"yes", "true", "y", "1"}

    # CPU
    current_cpu = terminal.get("container_cpu", 1)
@ -681,7 +692,7 @@ def _prompt_vercel_sandbox_settings(config: dict):
    persist_label = "yes" if current_persist else "no"
    terminal["container_persistent"] = prompt(
        "  Persist filesystem with snapshots? (yes/no)", persist_label
-    ).lower() in ("yes", "true", "y", "1")
+    ).lower() in {"yes", "true", "y", "1"}

    current_cpu = terminal.get("container_cpu", 1)
    cpu_str = prompt("  CPU cores", str(current_cpu))
@ -697,7 +708,7 @@ def _prompt_vercel_sandbox_settings(config: dict):
    except ValueError:
        pass

-    if terminal.get("container_disk", 51200) not in (0, 51200):
+    if terminal.get("container_disk", 51200) not in {0, 51200}:
        print_warning("Vercel Sandbox does not support custom disk sizing; resetting container_disk to 51200.")
    terminal["container_disk"] = 51200

@ -964,7 +975,8 @@ def setup_model_provider(config: dict, *, quick: bool = False):
                    )
                else:
                    _selected_vision_model = prompt("  Vision model (blank = use main/custom default)").strip()
-                save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
+                if _selected_vision_model:
+                    save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
                print_success(
                    f"Vision configured with {_base_url}"
                    + (f" ({_selected_vision_model})" if _selected_vision_model else "")
@ -1190,6 +1202,13 @@ def _setup_tts_provider(config: dict):
                    "Falling back to Edge TTS."
                )
                selected = "edge"
+        if selected == "xai":
+            print()
+            voice_id = prompt("xAI voice_id (Enter for 'eve', or paste a custom voice ID)")
+            if voice_id and voice_id.strip():
+                config.setdefault("tts", {}).setdefault("xai", {})["voice_id"] = voice_id.strip()
+                print_success(f"xAI voice_id set to: {voice_id.strip()}")
+

    elif selected == "minimax":
        existing = get_env_value("MINIMAX_API_KEY")
@ -1321,15 +1340,13 @@ def setup_terminal_backend(config: dict):
        print_success("Terminal backend: Local")
        print_info("Commands run directly on this machine.")

-        # CWD for messaging
+        # Gateway/cron working directory
        print()
-        print_info("Working directory for messaging sessions:")
-        print_info("  When using Hermes via Telegram/Discord, this is where")
-        print_info(
-            "  the agent starts. CLI mode always starts in the current directory."
-        )
+        print_info("Gateway working directory:")
+        print_info("  Used by Telegram/Discord/cron sessions.")
+        print_info("  CLI/TUI always uses your launch directory instead.")
        current_cwd = cfg_get(config, "terminal", "cwd", default="")
-        cwd = prompt("  Messaging working directory", current_cwd or str(Path.home()))
+        cwd = prompt("  Gateway working directory", current_cwd or str(Path.home()))
        if cwd:
            config["terminal"]["cwd"] = cwd

@ -1338,14 +1355,13 @@ def setup_terminal_backend(config: dict):
        existing_sudo = get_env_value("SUDO_PASSWORD")
        if existing_sudo:
            print_info("Sudo password: configured")
-        else:
-            if prompt_yes_no(
-                "Enable sudo support? (stores password for apt install, etc.)", False
-            ):
-                sudo_pass = prompt("  Sudo password", password=True)
-                if sudo_pass:
-                    save_env_value("SUDO_PASSWORD", sudo_pass)
-                    print_success("Sudo password saved")
+        elif prompt_yes_no(
+            "Enable sudo support? (stores password for apt install, etc.)", False
+        ):
+            sudo_pass = prompt("  Sudo password", password=True)
+            if sudo_pass:
+                save_env_value("SUDO_PASSWORD", sudo_pass)
+                print_success("Sudo password saved")

    elif selected_backend == "docker":
        print_success("Terminal backend: Docker")
@ -1643,7 +1659,11 @@ def setup_terminal_backend(config: dict):
 def _apply_default_agent_settings(config: dict):
    """Apply recommended defaults for all agent settings without prompting."""
    config.setdefault("agent", {})["max_turns"] = 90
-    save_env_value("HERMES_MAX_ITERATIONS", "90")
+    # config.yaml is the authoritative source for max_turns; the gateway
+    # bridges it into HERMES_MAX_ITERATIONS at startup. We no longer write
+    # to .env to avoid the dual-source inconsistency that caused the
+    # 60-vs-500 bug (stale .env entry silently shadowing config.yaml).
+    remove_env_value("HERMES_MAX_ITERATIONS")

    config.setdefault("display", {})["tool_progress"] = "all"

@ -1673,9 +1693,10 @@ def setup_agent_settings(config: dict):
    print()

    # ── Max Iterations ──
-    current_max = get_env_value("HERMES_MAX_ITERATIONS") or str(
-        cfg_get(config, "agent", "max_turns", default=90)
-    )
+    # config.yaml is authoritative; read from there. If a legacy .env
+    # entry is still around (from pre-PR#18413 setups), prefer the
+    # config value so we don't surface a stale number to the user.
+    current_max = str(cfg_get(config, "agent", "max_turns", default=90))
    print_info("Maximum tool-calling iterations per conversation.")
    print_info("Higher = more complex tasks, but costs more tokens.")
    print_info(
@ -1686,9 +1707,13 @@ def setup_agent_settings(config: dict):
    try:
        max_iter = int(max_iter_str)
        if max_iter > 0:
-            save_env_value("HERMES_MAX_ITERATIONS", str(max_iter))
+            # Write to config.yaml (authoritative) only. Also clean up any
+            # stale .env entry from earlier setup runs — the gateway's
+            # bridge in gateway/run.py now unconditionally derives
+            # HERMES_MAX_ITERATIONS from agent.max_turns at startup.
            config.setdefault("agent", {})["max_turns"] = max_iter
            config.pop("max_turns", None)
+            remove_env_value("HERMES_MAX_ITERATIONS")
            print_success(f"Max iterations set to {max_iter}")
    except ValueError:
        print_warning("Invalid number, keeping current value")
@ -1704,7 +1729,7 @@ def setup_agent_settings(config: dict):

    current_mode = cfg_get(config, "display", "tool_progress", default="all")
    mode = prompt("Tool progress mode", current_mode)
-    if mode.lower() in ("off", "new", "all", "verbose"):
+    if mode.lower() in {"off", "new", "all", "verbose"}:
        if "display" not in config:
            config["display"] = {}
        config["display"]["tool_progress"] = mode.lower()
@ -2033,6 +2058,16 @@ def _setup_slack():
        print_warning("⚠️  No Slack allowlist set - unpaired users will be denied by default.")
        print_info("   Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access.")

+    print()
+    print_info("📬 Home Channel: where Hermes delivers cron job results,")
+    print_info("   cross-platform messages, and notifications.")
+    print_info("   To get a channel ID: open the channel in Slack, then right-click")
+    print_info("   the channel name → Copy link — the ID starts with C (e.g. C01ABC2DE3F).")
+    print_info("   You can also set this later by typing /set-home in a Slack channel.")
+    home_channel = prompt("Home channel ID (leave empty to set later with /set-home)")
+    if home_channel:
+        save_env_value("SLACK_HOME_CHANNEL", home_channel.strip())
+

 def _write_slack_manifest_and_instruct():
    """Generate the Slack manifest, write it under HERMES_HOME, and print
@ -2409,6 +2444,7 @@ def setup_gateway(config: dict):

        _is_linux = _platform.system() == "Linux"
        _is_macos = _platform.system() == "Darwin"
+        _is_windows = _platform.system() == "Windows"

        from hermes_cli.gateway import (
            _is_service_installed,
@ -2425,12 +2461,15 @@ def setup_gateway(config: dict):
            launchd_start,
            launchd_restart,
            UserSystemdUnavailableError,
+            SystemScopeRequiresRootError,
+            _system_scope_wizard_would_need_root,
+            _print_system_scope_remediation,
        )

        service_installed = _is_service_installed()
        service_running = _is_service_running()
        supports_systemd = supports_systemd_services()
-        supports_service_manager = supports_systemd or _is_macos
+        supports_service_manager = supports_systemd or _is_macos or _is_windows

        print()
        if supports_systemd and has_conflicting_systemd_units():
@ -2442,33 +2481,58 @@ def setup_gateway(config: dict):
            print()

        if service_running:
-            if prompt_yes_no("  Restart the gateway to pick up changes?", True):
+            if supports_systemd and _system_scope_wizard_would_need_root():
+                _print_system_scope_remediation("restart")
+            elif prompt_yes_no("  Restart the gateway to pick up changes?", True):
                try:
                    if supports_systemd:
                        systemd_restart()
                    elif _is_macos:
                        launchd_restart()
+                    elif _is_windows:
+                        from hermes_cli import gateway_windows
+                        gateway_windows.restart()
                except UserSystemdUnavailableError as e:
                    print_error("  Restart failed — user systemd not reachable:")
                    for line in str(e).splitlines():
                        print(f"  {line}")
+                except SystemScopeRequiresRootError as e:
+                    # Defense in depth: the pre-check above should have
+                    # caught this, but a race (unit file appearing mid-run)
+                    # could still land here. Previously this exited the
+                    # whole wizard via sys.exit(1).
+                    print_error(f"  Restart failed: {e}")
+                    _print_system_scope_remediation("restart")
                except Exception as e:
                    print_error(f"  Restart failed: {e}")
        elif service_installed:
-            if prompt_yes_no("  Start the gateway service?", True):
+            if supports_systemd and _system_scope_wizard_would_need_root():
+                _print_system_scope_remediation("start")
+            elif prompt_yes_no("  Start the gateway service?", True):
                try:
                    if supports_systemd:
                        systemd_start()
                    elif _is_macos:
                        launchd_start()
+                    elif _is_windows:
+                        from hermes_cli import gateway_windows
+                        gateway_windows.start()
                except UserSystemdUnavailableError as e:
                    print_error("  Start failed — user systemd not reachable:")
                    for line in str(e).splitlines():
                        print(f"  {line}")
+                except SystemScopeRequiresRootError as e:
+                    print_error(f"  Start failed: {e}")
+                    _print_system_scope_remediation("start")
                except Exception as e:
                    print_error(f"  Start failed: {e}")
        elif supports_service_manager:
-            svc_name = "systemd" if supports_systemd else "launchd"
+            if supports_systemd:
+                svc_name = "systemd"
+            elif _is_macos:
+                svc_name = "launchd"
+            else:
+                svc_name = "Scheduled Task"
            if prompt_yes_no(
                f"  Install the gateway as a {svc_name} service? (runs in background, starts on boot)",
                True,
@ -2476,13 +2540,23 @@ def setup_gateway(config: dict):
                try:
                    installed_scope = None
                    did_install = False
+                    started_inline = False
                    if supports_systemd:
                        installed_scope, did_install = install_linux_gateway_from_setup(force=False)
-                    else:
+                    elif _is_macos:
                        launchd_install(force=False)
                        did_install = True
+                    else:
+                        # gateway_windows.install() registers the Scheduled
+                        # Task AND starts it immediately (via schtasks /Run
+                        # or a direct spawn fallback), so no separate start
+                        # prompt is needed here.
+                        from hermes_cli import gateway_windows
+                        gateway_windows.install(force=False)
+                        did_install = True
+                        started_inline = True
                    print()
-                    if did_install and prompt_yes_no("  Start the service now?", True):
+                    if did_install and not started_inline and prompt_yes_no("  Start the service now?", True):
                        try:
                            if supports_systemd:
                                systemd_start(system=installed_scope == "system")
@ -2492,6 +2566,9 @@ def setup_gateway(config: dict):
                            print_error("  Start failed — user systemd not reachable:")
                            for line in str(e).splitlines():
                                print(f"  {line}")
+                        except SystemScopeRequiresRootError as e:
+                            print_error(f"  Start failed: {e}")
+                            _print_system_scope_remediation("start")
                        except Exception as e:
                            print_error(f"  Start failed: {e}")
                except Exception as e:
@ -2979,6 +3056,21 @@ def run_setup_wizard(args):
    config = load_config()
    hermes_home = get_hermes_home()

+    # Back up existing config before setup modifies it (#3522)
+    config_path = get_config_path()
+    if config_path.exists():
+        from datetime import datetime as _dt
+        _backup_path = config_path.with_suffix(
+            f".yaml.bak.{_dt.now().strftime('%Y%m%d_%H%M%S')}"
+        )
+        try:
+            import shutil
+            shutil.copy2(config_path, _backup_path)
+        except Exception:
+            _backup_path = None
+    else:
+        _backup_path = None
+
    # Detect non-interactive environments (headless SSH, Docker, CI/CD)
    non_interactive = getattr(args, 'non_interactive', False)
    if not non_interactive and not is_interactive_stdin():
@ -3148,6 +3240,10 @@ def run_setup_wizard(args):

    # Save and show summary
    save_config(config)
+    if _backup_path and _backup_path.exists():
+        print_info(f"Previous config backed up to: {_backup_path}")
+        print_info("If setup changed a value you customized, restore it with:")
+        print_info(f"  cp {_backup_path} {config_path}")
    _print_setup_summary(config, hermes_home)

    _offer_launch_chat()
@ -3164,22 +3260,23 @@ def _offer_launch_chat():


 def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
-    """Streamlined first-time setup: provider + model only.
+    """Streamlined first-time setup: provider, model, terminal & messaging.

-    Applies sensible defaults for TTS (Edge), terminal (local), agent
-    settings, and tools — the user can customize later via
-    ``hermes setup <section>``.
+    Applies sensible defaults for TTS (Edge), agent settings, and tools —
+    the user can customize later via ``hermes setup <section>``.
    """
    # Step 1: Model & Provider (essential — skips rotation/vision/TTS)
    setup_model_provider(config, quick=True)

-    # Step 2: Apply defaults for everything else
+    # Step 2: Terminal Backend — where commands run is a core decision
+    setup_terminal_backend(config)
+
+    # Step 3: Apply defaults for everything else
    _apply_default_agent_settings(config)
-    config.setdefault("terminal", {}).setdefault("backend", "local")

    save_config(config)

-    # Step 3: Offer messaging gateway setup
+    # Step 4: Offer messaging gateway setup
    print()
    gateway_choice = prompt_choice(
        "Connect a messaging platform? (Telegram, Discord, etc.)",
--- a/hermes_cli/skills_hub.py
+++ b/hermes_cli/skills_hub.py
@ -593,7 +593,7 @@ def do_install(identifier: str, category: str = "", force: bool = False,
            answer = input("Confirm [y/N]: ").strip().lower()
        except (EOFError, KeyboardInterrupt):
            answer = "n"
-        if answer not in ("y", "yes"):
+        if answer not in {"y", "yes"}:
            c.print("[dim]Installation cancelled.[/]\n")
            shutil.rmtree(q_path, ignore_errors=True)
            return
@ -948,7 +948,7 @@ def do_uninstall(name: str, console: Optional[Console] = None,
            answer = input("Confirm [y/N]: ").strip().lower()
        except (EOFError, KeyboardInterrupt):
            answer = "n"
-        if answer not in ("y", "yes"):
+        if answer not in {"y", "yes"}:
            c.print("[dim]Cancelled.[/]\n")
            return

@ -984,7 +984,7 @@ def do_reset(name: str, restore: bool = False,
            answer = input("Confirm [y/N]: ").strip().lower()
        except (EOFError, KeyboardInterrupt):
            answer = "n"
-        if answer not in ("y", "yes"):
+        if answer not in {"y", "yes"}:
            c.print("[dim]Cancelled.[/]\n")
            return

@ -1138,7 +1138,7 @@ def _github_publish(skill_path: Path, skill_name: str, target_repo: str,
            f"https://api.github.com/repos/{target_repo}/forks",
            headers=headers, timeout=30,
        )
-        if resp.status_code in (200, 202):
+        if resp.status_code in {200, 202}:
            fork = resp.json()
            fork_repo = fork["full_name"]
        elif resp.status_code == 403:
@ -1257,7 +1257,7 @@ def do_snapshot_export(output_path: str, console: Optional[Console] = None) -> N
        sys.stdout.write(payload)
    else:
        out = Path(output_path)
-        out.write_text(payload)
+        out.write_text(payload, encoding="utf-8")
        c.print(f"[bold green]Snapshot exported:[/] {out}")
        c.print(f"[dim]{len(installed)} skill(s), {len(tap_list)} tap(s)[/]\n")

@ -1274,7 +1274,7 @@ def do_snapshot_import(input_path: str, force: bool = False,
        return

    try:
-        snapshot = json.loads(inp.read_text())
+        snapshot = json.loads(inp.read_text(encoding="utf-8"))
    except json.JSONDecodeError:
        c.print(f"[bold red]Error:[/] Invalid JSON in {inp}\n")
        return
@ -1564,7 +1564,7 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
        repo = args[1] if len(args) > 1 else ""
        do_tap(tap_action, repo=repo, console=c)

-    elif action in ("help", "--help", "-h"):
+    elif action in {"help", "--help", "-h"}:
        _print_skills_help(c)

    else:
--- a/hermes_cli/skin_engine.py
+++ b/hermes_cli/skin_engine.py
@ -42,6 +42,7 @@ All fields are optional. Missing values inherit from the ``default`` skin.
      session_border: "#8B8682"          # Session ID dim color
      status_bar_bg: "#1a1a2e"          # TUI status/usage bar background
      voice_status_bg: "#1a1a2e"        # TUI voice status background
+      selection_bg: "#333355"           # TUI mouse-selection highlight background
      completion_menu_bg: "#1a1a2e"      # Completion menu background
      completion_menu_current_bg: "#333355"  # Active completion row background
      completion_menu_meta_bg: "#1a1a2e"     # Completion meta column background
--- a/hermes_cli/slack_cli.py
+++ b/hermes_cli/slack_cli.py
@ -18,6 +18,7 @@ for reinstall when scopes/commands change.
 from __future__ import annotations

 import json
+import os
 import sys
 from pathlib import Path

@ -47,6 +48,11 @@ def _build_full_manifest(bot_name: str, bot_description: str) -> dict:
            "background_color": "#1a1a2e",
        },
        "features": {
+            "app_home": {
+                "home_tab_enabled": False,
+                "messages_tab_enabled": True,
+                "messages_tab_read_only_enabled": False,
+            },
            "bot_user": {
                "display_name": bot_name[:80],
                "always_online": True,
@ -68,6 +74,7 @@ def _build_full_manifest(bot_name: str, bot_description: str) -> dict:
                    "files:read",
                    "files:write",
                    "groups:history",
+                    "groups:read",
                    "im:history",
                    "im:read",
                    "im:write",
@ -128,7 +135,7 @@ def slack_manifest_command(args) -> int:

                target = Path(get_hermes_home()) / "slack-manifest.json"
            except Exception:
-                target = Path.home() / ".hermes" / "slack-manifest.json"
+                target = Path(os.environ.get("HERMES_HOME") or str(Path.home() / ".hermes")) / "slack-manifest.json"
        else:
            target = Path(write_target).expanduser()
        target.parent.mkdir(parents=True, exist_ok=True)
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@ -122,10 +122,16 @@ def show_status(args):
    print()
    print(color("◆ API Keys", Colors.CYAN, Colors.BOLD))

-    keys = {
+    # Values may be a single env var name (str) or a tuple of alternates (first found wins).
+    keys: dict[str, str | tuple[str, ...]] = {
        "OpenRouter": "OPENROUTER_API_KEY",
        "OpenAI": "OPENAI_API_KEY",
-        "Z.AI/GLM": "GLM_API_KEY",
+        "Anthropic": ("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN"),
+        "Google / Gemini": ("GOOGLE_API_KEY", "GEMINI_API_KEY"),
+        "DeepSeek": "DEEPSEEK_API_KEY",
+        "xAI / Grok": "XAI_API_KEY",
+        "NVIDIA NIM": "NVIDIA_API_KEY",
+        "Z.AI / GLM": "GLM_API_KEY",
        "Kimi": "KIMI_API_KEY",
        "StepFun Step Plan": "STEPFUN_API_KEY",
        "MiniMax": "MINIMAX_API_KEY",
@ -141,8 +147,23 @@ def show_status(args):
        "GitHub": "GITHUB_TOKEN",
    }

-    for name, env_var in keys.items():
-        value = get_env_value(env_var) or ""
+    def _resolve_env(env_ref) -> str:
+        """Return first non-empty env var value from a str or tuple of names."""
+        if isinstance(env_ref, tuple):
+            for candidate in env_ref:
+                v = get_env_value(candidate) or ""
+                if v:
+                    return v
+            return ""
+        return get_env_value(env_ref) or ""
+
+    for name, env_ref in keys.items():
+        # Anthropic already has a dedicated lookup below; keep that as the
+        # single source of truth (it also resolves OAuth tokens), skip here
+        # so we don't print two "Anthropic" rows.
+        if name == "Anthropic":
+            continue
+        value = _resolve_env(env_ref)
        has_key = bool(value)
        display = redact_key(value) if not show_all else value
        print(f"  {name:<12}  {check_mark(has_key)} {display}")
@ -346,7 +367,7 @@ def show_status(args):
        if persist is None:
            persist_enabled = bool(terminal_cfg.get("container_persistent", True))
        else:
-            persist_enabled = persist.lower() in ("1", "true", "yes", "on")
+            persist_enabled = persist.lower() in {"1", "true", "yes", "on"}
        auth_status = describe_vercel_auth()
        sdk_ok = importlib.util.find_spec("vercel") is not None
        sdk_label = "installed" if sdk_ok else "missing (install: pip install 'hermes-agent[vercel]')"
--- a/hermes_cli/stdio.py
+++ b/hermes_cli/stdio.py
@ -0,0 +1,252 @@
+"""Windows-safe stdio configuration.
+
+On Windows, Python's ``sys.stdout``/``sys.stderr`` default to the console's
+active code page (often ``cp1252``, sometimes ``cp437``, occasionally ``cp932``
+on Japanese locales, etc.).  Hermes's banners, tool output feed, and slash
+command listings all contain Unicode: box-drawing characters (``─┌┐└┘├┤``),
+mathematical and geometric symbols (``◆ ◇ ◎ ▣ ⚔ ⚖ →``), and user-supplied
+text in any language.  Printing those to a cp1252 console raises
+``UnicodeEncodeError: 'charmap' codec can't encode character…`` and kills the
+whole CLI before the REPL even opens.
+
+The fix is to force UTF-8 on the Python side and also flip the console's
+code page to UTF-8 (65001).  Both matter: Python-level only helps when
+Python's stdout is a real TTY; code-page flipping lets subprocesses and
+child Python ``print()`` calls agree on encoding.
+
+This module is a no-op on every non-Windows platform, and idempotent.
+Entry points (``cli.py`` ``main``, ``hermes_cli/main.py`` CLI dispatch,
+``gateway/run.py`` startup) call :func:`configure_windows_stdio` exactly
+once early in startup.
+
+Patterns cribbed from Claude Code (``src/utils/platform.ts``), OpenCode
+(``packages/opencode/src/pty/index.ts`` env injection), and OpenAI Codex
+(``codex-rs/core/src/unified_exec/process_manager.rs``).  None of those
+actually flip the console code page — they rely on their runtime (Node or
+Rust) writing UTF-16 to the Win32 console API and letting the terminal
+sort it out.  Python doesn't get that luxury.
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+
+__all__ = ["configure_windows_stdio", "is_windows"]
+
+
+_CONFIGURED = False
+
+
+def is_windows() -> bool:
+    """Return True iff running on native Windows (not WSL)."""
+    return sys.platform == "win32"
+
+
+def _flip_console_code_page_to_utf8() -> None:
+    """Set the attached console's input and output code pages to UTF-8.
+
+    Uses ``SetConsoleCP`` / ``SetConsoleOutputCP`` via ``ctypes``.  Failure
+    is silent — if there's no attached console (e.g. Hermes is running
+    behind a redirected stdout, under a service, or inside a PTY-less CI
+    runner) these calls simply return 0 and we move on.
+
+    CP_UTF8 is 65001.
+    """
+    try:
+        import ctypes
+
+        kernel32 = ctypes.windll.kernel32  # type: ignore[attr-defined]
+        # Best-effort; if there's no console attached these just fail silently.
+        kernel32.SetConsoleCP(65001)
+        kernel32.SetConsoleOutputCP(65001)
+    except Exception:
+        # ctypes import, missing kernel32, or non-Windows — any failure here
+        # is non-fatal.  We've still reconfigured Python's own streams below.
+        pass
+
+
+def _reconfigure_stream(stream, *, encoding: str = "utf-8", errors: str = "replace") -> None:
+    """Reconfigure a text stream to UTF-8 in place.
+
+    Uses ``TextIOWrapper.reconfigure`` (Python 3.7+).  If the stream isn't
+    a ``TextIOWrapper`` (e.g. it's been redirected to an ``io.StringIO``
+    during tests), we skip rather than blow up.
+    """
+    try:
+        reconfigure = getattr(stream, "reconfigure", None)
+        if reconfigure is None:
+            return
+        reconfigure(encoding=encoding, errors=errors)
+    except Exception:
+        pass
+
+
+def configure_windows_stdio() -> bool:
+    """Force UTF-8 stdio on Windows.  No-op elsewhere.
+
+    Idempotent — safe to call multiple times from different entry points.
+
+    Returns ``True`` if anything was actually changed, ``False`` on
+    non-Windows or on a repeat call.
+
+    Set ``HERMES_DISABLE_WINDOWS_UTF8=1`` in the environment to opt out
+    (for diagnosing encoding-related bugs by forcing the old cp1252 path).
+
+    Also sets a sensible default ``EDITOR`` on Windows if none is already
+    set — see :func:`_default_windows_editor`.
+    """
+    global _CONFIGURED
+
+    if _CONFIGURED:
+        return False
+    if not is_windows():
+        # Mark configured so repeated calls on POSIX are true no-ops.
+        _CONFIGURED = True
+        return False
+
+    if os.environ.get("HERMES_DISABLE_WINDOWS_UTF8") in {"1", "true", "True", "yes"}:
+        _CONFIGURED = True
+        return False
+
+    # Encourage every child Python process spawned by the agent to also use
+    # UTF-8 for its stdio.  PYTHONIOENCODING wins over the locale-based
+    # default in subprocesses.  Don't override an explicit user setting.
+    os.environ.setdefault("PYTHONIOENCODING", "utf-8")
+    # PYTHONUTF8 = 1 enables UTF-8 Mode globally for any Python subprocess
+    # (PEP 540).  Again, don't override an explicit setting.
+    os.environ.setdefault("PYTHONUTF8", "1")
+
+    # Set EDITOR to a working Windows default if neither EDITOR nor VISUAL
+    # is set.  prompt_toolkit's ``open_in_editor`` falls back to POSIX-only
+    # paths (``/usr/bin/nano``, ``/usr/bin/vi``) that don't exist on
+    # Windows — Ctrl+X Ctrl+E and ``/edit`` silently do nothing there
+    # otherwise.  This happens even with full Git for Windows installed,
+    # so it's not a MinGit-specific issue.
+    _default_editor = _default_windows_editor()
+    if _default_editor and not os.environ.get("EDITOR") and not os.environ.get("VISUAL"):
+        os.environ["EDITOR"] = _default_editor
+
+    # Augment PATH with the Hermes-managed Git install directories so
+    # subprocess calls (bash, rg, grep, etc.) resolve even in sessions
+    # that started before the User PATH broadcast reached them.  When
+    # install.ps1 adds these to User PATH via SetEnvironmentVariable,
+    # already-running shells don't see the change — which means hermes
+    # launched from the install session won't find rg / bash / grep
+    # even though they're "installed".  Prepending the known paths here
+    # closes that gap.  No-op when the paths don't exist (e.g. system-Git
+    # install without Hermes-managed PortableGit).
+    _augment_path_with_known_tools()
+
+    # Flip the console code page first so that any subprocess that
+    # inherits the console (e.g. a launched shell) also sees CP_UTF8.
+    _flip_console_code_page_to_utf8()
+
+    # Reconfigure Python's own stdio wrappers so ``print()`` calls from
+    # this process round-trip emoji / box-drawing / non-Latin text.
+    # ``errors="replace"`` means a genuinely unencodable byte sequence
+    # gets a ``?`` rather than crashing the interpreter — we prefer
+    # degraded output over a stack trace.
+    _reconfigure_stream(sys.stdout)
+    _reconfigure_stream(sys.stderr)
+    # stdin is re-configured for completeness; Hermes's interactive
+    # input path uses prompt_toolkit which manages its own encoding,
+    # but batch/pipe input benefits from UTF-8 decoding on stdin too.
+    _reconfigure_stream(sys.stdin)
+
+    _CONFIGURED = True
+    return True
+
+
+def _default_windows_editor() -> str:
+    """Return a Windows-appropriate default for ``$EDITOR``.
+
+    Priority order, first match wins:
+
+    1. ``notepad`` — ships with every Windows install, no deps, works as a
+       blocking editor (``subprocess.call(["notepad", file])`` blocks until
+       the user closes the window).  This is the "always-works" default.
+
+    The prompt_toolkit buffer's ``open_in_editor`` and Hermes's
+    ``hermes config edit`` both honour ``$EDITOR``.  Users who prefer a
+    different editor can override:
+
+    - VSCode: ``$env:EDITOR = "code --wait"``  (``--wait`` is critical;
+      without it the editor returns immediately and any input is lost)
+    - Notepad++: ``$env:EDITOR = "'C:\\Program Files\\Notepad++\\notepad++.exe' -multiInst -nosession"``
+    - Neovim: ``$env:EDITOR = "nvim"``  (if installed)
+
+    Set this before launching Hermes (User env var in Windows Settings, or
+    export in a PowerShell profile) and Hermes picks it up automatically.
+    """
+    import shutil
+
+    # notepad.exe is always in %SystemRoot%\System32 on Windows, so shutil.which
+    # will reliably find it.  Return the bare name so prompt_toolkit's shlex
+    # split doesn't trip over a path containing spaces.
+    if shutil.which("notepad"):
+        return "notepad"
+    # On the extreme off-chance notepad is missing (WinPE, Nano Server), fall
+    # back to nothing and let prompt_toolkit's silent no-op do its thing.
+    return ""
+
+
+
+def _augment_path_with_known_tools() -> None:
+    """Prepend well-known Hermes-managed tool directories to os.environ['PATH'].
+
+    Fixes the "User PATH was just updated but my process can't see it" gap on
+    Windows.  When install.ps1 runs, it adds entries like
+    ``%LOCALAPPDATA%\\hermes\\git\\bin`` to the User PATH via
+    ``SetEnvironmentVariable(..., "User")``.  That write propagates to newly
+    *spawned* processes only — already-running shells (including the one the
+    user invokes ``hermes`` from right after install) retain their old PATH.
+
+    Any subprocess Hermes spawns — bash, ``rg``, ``grep``, ``npm`` — inherits
+    that stale PATH and reports commands as missing even though they're on
+    disk.  Symptom: ``search_files`` reports "rg/find not available" when
+    the user clearly just installed ripgrep.
+
+    Patch-up strategy: add the known Hermes-managed tool directories to our
+    PATH at startup so subprocess calls resolve correctly.  No-op on POSIX
+    and when the directories don't exist.  The User PATH broadcast still
+    happens in the background for future shells; this just smooths over
+    the first-launch gap.
+    """
+    if not is_windows():
+        return
+
+    import shutil as _shutil
+
+    local_appdata = os.environ.get("LOCALAPPDATA", "")
+    if not local_appdata:
+        return
+
+    # Known tool dirs installed by scripts/install.ps1.  Kept in sync with
+    # the PATH entries that installer adds to User scope — the two lists
+    # should match so this prefill fully mirrors what a fresh shell would
+    # see on next launch.
+    candidate_dirs = [
+        os.path.join(local_appdata, "hermes", "git", "cmd"),
+        os.path.join(local_appdata, "hermes", "git", "bin"),
+        os.path.join(local_appdata, "hermes", "git", "usr", "bin"),
+        # Hermes venv Scripts directory — host of the hermes.exe shim itself,
+        # also where any pip-installed console scripts land.  Usually already
+        # on PATH when the user invokes hermes, but harmless to include.
+        os.path.join(local_appdata, "hermes", "hermes-agent", "venv", "Scripts"),
+        # WinGet packages directory — where ``winget install`` drops CLI
+        # shims by default (ripgrep lands here as rg.exe).  Covers the case
+        # of a system-Git install + ripgrep-via-winget that isn't yet on
+        # the spawning shell's PATH.
+        os.path.join(local_appdata, "Microsoft", "WinGet", "Links"),
+    ]
+
+    existing = os.environ.get("PATH", "")
+    existing_lower = {p.lower() for p in existing.split(os.pathsep) if p}
+    prepend = []
+    for d in candidate_dirs:
+        if os.path.isdir(d) and d.lower() not in existing_lower:
+            prepend.append(d)
+
+    if prepend:
+        os.environ["PATH"] = os.pathsep.join([*prepend, existing])
--- a/hermes_cli/tips.py
+++ b/hermes_cli/tips.py
@ -54,7 +54,7 @@ TIPS = [
    "Combine multiple references: \"Review @file:main.py and @file:test.py for consistency.\"",

    # --- Keybindings ---
-    "Alt+Enter (or Ctrl+J) inserts a newline for multi-line input.",
+    "Alt+Enter inserts a newline for multi-line input. (Windows Terminal intercepts Alt+Enter — use Ctrl+Enter instead.)",
    "Ctrl+C interrupts the agent. Double-press within 2 seconds to force exit.",
    "Ctrl+Z suspends Hermes to the background — run fg in your shell to resume.",
    "Tab accepts auto-suggestion ghost text or autocompletes slash commands.",
@ -192,7 +192,7 @@ TIPS = [
    "Voice messages on Telegram, Discord, WhatsApp, and Slack are auto-transcribed.",

    # --- Gateway & Messaging ---
-    "Hermes runs on 18 platforms: Telegram, Discord, Slack, WhatsApp, Signal, Matrix, email, and more.",
+    "Hermes runs on 21 messaging platforms: Telegram, Discord, Slack, WhatsApp, Signal, Matrix, IRC, Microsoft Teams, email, and more.",
    "hermes gateway install sets it up as a system service that starts on boot.",
    "DingTalk uses Stream Mode — no webhooks or public URL needed.",
    "BlueBubbles brings iMessage to Hermes via a local macOS server.",
@ -334,6 +334,144 @@ TIPS = [
    "MCP ${ENV_VAR} placeholders in config are resolved at server spawn — including vars from ~/.hermes/.env.",
    "Skills from trusted repos (NousResearch) get a 'trusted' security level; community skills get extra scanning.",
    "The skills quarantine at ~/.hermes/skills/.hub/quarantine/ holds skills pending security review.",
+
+    # --- Advanced Slash Commands ---
+    '/steer <prompt> injects a note after the next tool call — nudge direction mid-task without interrupting.',
+    '/goal <text> sets a standing Ralph-loop objective — Hermes auto-continues turn after turn until a judge says done.',
+    '/snapshot create [label] saves a full state snapshot of Hermes config; /snapshot restore <id> reverts later.',
+    '/copy [N] copies the last assistant response to your clipboard, or the Nth-from-last with a number.',
+    '/redraw forces a full UI repaint, fixing terminal drift after tmux resize or mouse selection artifacts.',
+    '/agents (alias /tasks) shows active agents and running background tasks across the current session.',
+    '/footer toggles the gateway footer on final replies showing model, tool counts, and turn timing.',
+    '/busy queue|steer|interrupt controls what pressing Enter does while Hermes is working.',
+    '/topic in Telegram DMs enables user-managed multi-session topic mode — /topic <id> restores past sessions inline.',
+    '/approve session|always runs a pending dangerous command with your chosen trust scope; /deny rejects it.',
+    '/restart gracefully restarts the gateway after draining active runs, then pings the requester when back up.',
+    '/kanban boards switch <slug> changes the active multi-project Kanban board from inside chat.',
+    '/reload reloads ~/.hermes/.env into the running session — pick up new API keys without restarting.',
+
+    # --- Cron (no-agent & scripts) ---
+    'cronjob with no_agent=True runs a script on schedule and sends its stdout directly — zero tokens, zero LLM.',
+    'An empty cron script stdout means silent tick — nothing is delivered, perfect for threshold watchdogs.',
+    "HERMES_CRON_MAX_PARALLEL (default 4) caps how many cron jobs run per tick so bursts don't saturate your keys.",
+
+    # --- Gateway Hooks ---
+    'Gateway hooks live under ~/.hermes/hooks/<name>/ with HOOK.yaml + handler.py — handler must be named `handle`.',
+    'Hook events include gateway:startup, session:start, agent:step, and command:* wildcard subscriptions.',
+    'Drop a ~/.hermes/BOOT.md checklist and a gateway:startup hook runs it as a one-shot agent every boot.',
+
+    # --- Curator ---
+    'hermes curator run --dry-run previews what the curator would archive or consolidate without mutating anything.',
+    "hermes curator pin <skill> hard-fences a skill against both auto-archival and the agent's skill_manage tool.",
+    'hermes curator rollback restores skills from a pre-run snapshot — backups live under skills/.curator_backups/.',
+
+    # --- Credential Pools & Routing ---
+    'hermes auth reset <provider> clears all cooldowns and exhaustion flags on a credential pool.',
+    'credential_pool_strategies.<provider>: round_robin cycles keys evenly instead of the fill_first default.',
+    'use_gateway: true per-tool routes web, image, tts, or browser through your Nous subscription — no extra keys.',
+    'provider_routing.data_collection: deny excludes data-storing providers on OpenRouter.',
+    'provider_routing.require_parameters: true only routes to providers that support every param in your request.',
+
+    # --- TUI & Dashboard ---
+    'HERMES_TUI_RESUME=1 auto-re-attaches to the most recent TUI session on launch — handy after SSH drops.',
+    "HERMES_TUI_THEME=light|dark|<hex> forces the TUI theme on terminals that don't set COLORFGBG.",
+    'Ctrl+G or Ctrl+X Ctrl+E in the TUI opens the input buffer in $EDITOR for long multi-line prompts.',
+    'The TUI renders LaTeX inline — $E=mc^2$ becomes Unicode math instead of raw TeX.',
+    'hermes dashboard launches a local web UI at 127.0.0.1:9119 — zero data leaves localhost.',
+    'hermes dashboard --tui embeds the full Hermes TUI in your browser via xterm.js and a WebSocket PTY.',
+    'Drop a YAML in ~/.hermes/dashboard-themes/ with two palette colors to reskin the entire dashboard.',
+    'Dashboard plugins are drop-in: manifest.json + JS bundle in ~/.hermes/dashboard-plugins/ — no npm build required.',
+    'layoutVariant: cockpit in a dashboard theme adds a 260px left rail that plugins can populate via the sidebar slot.',
+
+    # --- Env Vars & Config Gates ---
+    "display.tool_progress_command: true exposes /verbose on messaging platforms; it's CLI-only by default.",
+    'HERMES_BACKGROUND_NOTIFICATIONS=result only pings when background tasks finish (vs all/error/off).',
+    'HERMES_WRITE_SAFE_ROOT restricts write_file and patch to a directory prefix; writes outside require approval.',
+    'HERMES_IGNORE_RULES skips auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills.',
+    'HERMES_ACCEPT_HOOKS auto-approves unseen shell hooks declared in config.yaml without a TTY prompt.',
+    'auxiliary.goal_judge.model routes the /goal judge to a cheap fast model to keep loop cost near zero.',
+    'Checkpoints skip directories with more than 50,000 files to avoid slow git operations on massive monorepos.',
+
+    # --- TTS ---
+    'tts.provider: piper runs 44-language local TTS on CPU — voices auto-download to ~/.hermes/cache/piper-voices/.',
+    'tts.providers.<name>.type: command wires any CLI TTS engine with {input_path} and {output_path} placeholders.',
+
+    # --- API Server & Proxy ---
+    'API_SERVER_ENABLED=true runs an OpenAI-compatible endpoint alongside the gateway for Open WebUI and LibreChat.',
+    'GATEWAY_PROXY_URL runs a split setup: platform I/O locally, agent work delegated to a remote API server.',
+
+    # --- Platform-specific ---
+    'MATRIX_DEVICE_ID pins a stable device ID for E2EE — without it, keys rotate every start and historic decrypt breaks.',
+    'TELEGRAM_WEBHOOK_SECRET is required whenever TELEGRAM_WEBHOOK_URL is set — generate with openssl rand -hex 32.',
+
+    # --- Batch ---
+    "batch_runner.py --resume content-matches completed prompts by text so dataset reorders don't re-run finished work.",
+
+    # --- Less-Known Slash Commands ---
+    '/new starts a fresh session in place (alias /reset) — fresh session ID, clean history, CLI stays open.',
+    '/clear wipes the terminal screen AND starts a new session — one shortcut for a visual reset.',
+    '/history prints the current conversation in-line without leaving the CLI — useful for a quick re-read.',
+    '/save writes the current conversation to disk without ending the session.',
+    '/status shows session info at a glance: ID, title, model, token usage, and elapsed time.',
+    '/image <path> attaches a local image file for your next prompt without pasting or drag-and-drop.',
+    '/platforms shows gateway and messaging-platform connection status right from inside chat.',
+    '/commands paginates the full slash-command + installed-skill list — useful on platforms without tab completion.',
+    '/toolsets lists every available toolset so you know what -t/--toolsets accepts.',
+    '/gquota shows Google Gemini Code Assist quota usage with progress bars when that provider is active.',
+    '/voice tts toggles TTS-only mode — agent replies out loud but you still type your prompts.',
+    '/reload-skills re-scans ~/.hermes/skills/ so drop-in skills appear without restarting the session.',
+    '/indicator kaomoji|emoji|unicode|ascii picks the TUI busy-indicator style shown during agent runs.',
+    '/debug uploads a support bundle (system info + logs) and returns shareable links — works in chat too.',
+
+    # --- CLI Subcommands & Flags ---
+    'hermes -z "<prompt>" is the purest one-shot: final answer on stdout, nothing else — ideal for piping in scripts.',
+    'hermes chat --pass-session-id injects the session ID into the system prompt so the agent can self-reference it.',
+    'hermes chat --image path/to/pic.png attaches a local image to a single -q query without a separate upload step.',
+    'hermes chat --ignore-user-config skips ~/.hermes/config.yaml — reproducible bug reports and CI runs.',
+    "hermes chat --source tool tags programmatic chats so they don't clutter hermes sessions list.",
+    'hermes dump --show-keys includes redacted API key fingerprints for deeper support debugging.',
+    'hermes sessions rename <ID> "new title" renames any past session; hermes sessions delete <ID> removes one.',
+    'hermes import restores a session export or profile archive produced by sessions export or profile export.',
+    'hermes fallback manages the fallback_model chain interactively — no hand-editing config.yaml.',
+    'hermes pairing rotates the DM pairing token — the first messager after rotation claims access to the bot.',
+    'hermes setup walks first-time users through provider, keys, and platform wiring in one interactive flow.',
+    'hermes status --deep runs the full health sweep across every component; plain hermes status is the quick view.',
+
+    # --- Agent Behavior Env Vars ---
+    'HERMES_AGENT_TIMEOUT=0 disables the gateway inactivity kill for a running agent — use for long research runs.',
+    'HERMES_ENABLE_PROJECT_PLUGINS=1 auto-loads repo-local plugins from ./.hermes/plugins/ — trust-gated by design.',
+    "HERMES_DISABLE_FILE_STATE_GUARD=1 turns off the 'file changed since you read it' guard on patch and write_file.",
+    'HERMES_ALLOW_PRIVATE_URLS=true lets web tools hit localhost and private networks — off by default in gateway mode.',
+    'HERMES_OPTIONAL_SKILLS=name1,name2 auto-installs extra optional-catalog skills on first run per profile.',
+    'HERMES_BUNDLED_SKILLS points at a custom bundled-skill tree — used by Homebrew and Nix packaging.',
+    'HERMES_DUMP_REQUEST_STDOUT=1 dumps every API request payload to stdout instead of log files.',
+    'HERMES_OAUTH_TRACE=1 logs redacted OAuth token exchange and refresh attempts for debugging provider auth.',
+    'HERMES_STREAM_RETRIES (default 3) controls mid-stream reconnect attempts on transient network errors.',
+
+    # --- Gateway Behavior Env Vars ---
+    'HERMES_GATEWAY_BUSY_ACK_ENABLED=false silences the ⚡/⏳/⏩ ack messages when a user messages a busy agent.',
+    'HERMES_AGENT_NOTIFY_INTERVAL (default 180s) sets how often the gateway pings with progress on long turns.',
+    'HERMES_RESTART_DRAIN_TIMEOUT (default 900s) caps how long /restart waits for in-flight runs before forcing.',
+    'HERMES_CHECKPOINT_TIMEOUT (default 30s) caps filesystem checkpoint creation — raise it on huge monorepos.',
+
+    # --- Auxiliary Tasks & Image Generation ---
+    'image_gen.model in config.yaml picks the FAL model: flux-2/klein, gpt-image-2, nano-banana-pro, and more.',
+    'image_gen.provider routes image generation through a plugin (OpenAI Images, Codex, FAL) instead of the default.',
+    'AUXILIARY_VISION_BASE_URL + AUXILIARY_VISION_API_KEY point vision analysis at any OpenAI-compatible endpoint.',
+    'auxiliary.session_search.max_concurrency bounds how many matched sessions are summarized in parallel (default 3).',
+    'auxiliary.session_search.extra_body forwards provider-specific OpenAI-compatible fields on summarization calls.',
+
+    # --- Security ---
+    'security.tirith_fail_open: false makes Hermes block commands when the tirith scanner itself errors out.',
+    'TIRITH_FAIL_OPEN env var overrides the tirith_fail_open config — a quick toggle without editing config.yaml.',
+
+    # --- Sessions & Source Tags ---
+    '--source tool chats are excluded from hermes sessions list by default — set --source explicitly to see them.',
+    'Session IDs are timestamp-prefixed (20250305_091523_abcd) so sorting works naturally in ls and jq.',
+
+    # --- Misc ---
+    'API_SERVER_MODEL_NAME customizes the model name on /v1/models — essential for multi-profile Open WebUI setups.',
+    'Dashboard plugins are served from /dashboard-plugins/<name>/ — drop files into ~/.hermes/dashboard-plugins/.',
 ]


--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@ -12,6 +12,8 @@ the `platform_toolsets` key.
 import json as _json
 import logging
 import os
+import shutil
+import subprocess
 import sys
 from pathlib import Path
 from typing import Dict, List, Optional, Set
@ -56,6 +58,7 @@ CONFIGURABLE_TOOLSETS = [
    ("file",            "📁 File Operations",           "read, write, patch, search"),
    ("code_execution",  "⚡ Code Execution",            "execute_code"),
    ("vision",          "👁️  Vision / Image Analysis",  "vision_analyze"),
+    ("video",           "🎬 Video Analysis",            "video_analyze (requires video-capable model)"),
    ("image_gen",       "🎨 Image Generation",          "image_generate"),
    ("moa",             "🧠 Mixture of Agents",         "mixture_of_agents"),
    ("tts",             "🔊 Text-to-Speech",            "text_to_speech"),
@ -73,12 +76,13 @@ CONFIGURABLE_TOOLSETS = [
    ("discord",         "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
    ("discord_admin",   "🛡️  Discord Server Admin",    "list channels/roles, pin, assign roles"),
    ("yuanbao",          "🤖 Yuanbao",                  "group info, member queries, DM"),
+    ("computer_use",     "🖱️  Computer Use (macOS)",     "background desktop control via cua-driver"),
 ]

 # Toolsets that are OFF by default for new installs.
 # They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
 # but the setup checklist won't pre-select them for first-time users.
-_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
+_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin", "video"}

 # Platform-scoped toolsets: only appear in the `hermes tools` checklist for
 # these platforms, and only resolve/save for these platforms.  A toolset
@ -298,6 +302,32 @@ TOOL_CATEGORIES = {
                    {"key": "FIRECRAWL_API_URL", "prompt": "Your Firecrawl instance URL (e.g., http://localhost:3002)"},
                ],
            },
+            {
+                "name": "SearXNG",
+                "badge": "free · self-hosted · search only",
+                "tag": "Privacy-respecting metasearch engine — search only (pair with any extract provider)",
+                "web_backend": "searxng",
+                "env_vars": [
+                    {"key": "SEARXNG_URL", "prompt": "Your SearXNG instance URL (e.g., http://localhost:8080)", "url": "https://searxng.github.io/searxng/"},
+                ],
+            },
+            {
+                "name": "Brave Search (Free Tier)",
+                "badge": "free tier · search only",
+                "tag": "2,000 queries/mo free — search only (pair with any extract provider)",
+                "web_backend": "brave-free",
+                "env_vars": [
+                    {"key": "BRAVE_SEARCH_API_KEY", "prompt": "Brave Search subscription token", "url": "https://brave.com/search/api/"},
+                ],
+            },
+            {
+                "name": "DuckDuckGo (ddgs)",
+                "badge": "free · no key · search only",
+                "tag": "Search via the ddgs Python package — no API key (pair with any extract provider)",
+                "web_backend": "ddgs",
+                "env_vars": [],
+                "post_setup": "ddgs",
+            },
        ],
    },
    "image_gen": {
@ -418,6 +448,27 @@ TOOL_CATEGORIES = {
            },
        ],
    },
+    "computer_use": {
+        "name": "Computer Use (macOS)",
+        "icon": "🖱️",
+        "platform_gate": "darwin",
+        "providers": [
+            {
+                "name": "cua-driver (background)",
+                "badge": "★ recommended · free · local",
+                "tag": (
+                    "macOS background computer-use via SkyLight SPIs — does "
+                    "NOT steal your cursor or focus. Works with any model."
+                ),
+                "env_vars": [
+                    # cua-driver reads HOME/TMPDIR from the process env, no
+                    # extra keys required. HERMES_CUA_DRIVER_VERSION is an
+                    # optional pin for reproducibility across macOS updates.
+                ],
+                "post_setup": "cua_driver",
+            },
+        ],
+    },
    "rl": {
        "name": "RL Training",
        "icon": "🧪",
@ -471,10 +522,79 @@ TOOLSET_ENV_REQUIREMENTS = {

 # ─── Post-Setup Hooks ─────────────────────────────────────────────────────────

+
+def _pip_install(
+    args: List[str],
+    *,
+    timeout: int = 300,
+    capture_output: bool = True,
+):
+    """Install Python packages from a post-setup hook.
+
+    Strategy (in order):
+    1. ``uv pip install`` if uv is on PATH — fast, doesn't need pip in the venv.
+    2. ``python -m pip install`` — works on stdlib venvs.
+    3. ``python -m ensurepip --upgrade`` then retry pip — covers ``uv venv``
+       which creates a venv WITHOUT pip.
+
+    Why this exists: the Windows installer creates the venv via ``uv venv``,
+    which doesn't seed pip. Post-setup hooks that shelled out to
+    ``[sys.executable, '-m', 'pip', 'install', ...]`` failed with
+    ``No module named pip`` on every fresh install. uv-first sidesteps that.
+
+    Returns the ``subprocess.CompletedProcess`` from whichever tier succeeded
+    (or the last failure for the caller to inspect).
+    """
+    venv_root = Path(sys.executable).parent.parent
+    uv_env = {**os.environ, "VIRTUAL_ENV": str(venv_root)}
+
+    uv_bin = shutil.which("uv")
+    if uv_bin:
+        try:
+            result = subprocess.run(
+                [uv_bin, "pip", "install", *args],
+                capture_output=capture_output, text=True, timeout=timeout,
+                env=uv_env,
+            )
+            if result.returncode == 0:
+                return result
+            # Fall through to pip — uv may have failed for an unrelated reason
+            # (resolution conflict, network), and pip might handle it.
+        except (subprocess.TimeoutExpired, FileNotFoundError):
+            pass
+
+    pip_cmd = [sys.executable, "-m", "pip"]
+    try:
+        # Probe for pip; bootstrap via ensurepip if missing (uv venv lacks it).
+        probe = subprocess.run(
+            pip_cmd + ["--version"],
+            capture_output=True, text=True, timeout=15,
+        )
+        if probe.returncode != 0:
+            raise FileNotFoundError("pip not in venv")
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        try:
+            subprocess.run(
+                [sys.executable, "-m", "ensurepip", "--upgrade", "--default-pip"],
+                capture_output=True, text=True, timeout=120, check=True,
+            )
+        except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+            # Synthesize a result so callers see a clean failure path.
+            return subprocess.CompletedProcess(
+                pip_cmd, returncode=1, stdout="",
+                stderr=f"pip not available and ensurepip failed: {e}",
+            )
+
+    return subprocess.run(
+        pip_cmd + ["install", *args],
+        capture_output=capture_output, text=True, timeout=timeout,
+    )
+
+
 def _run_post_setup(post_setup_key: str):
    """Run post-setup hooks for tools that need extra installation steps."""
    import shutil
-    if post_setup_key in ("agent_browser", "browserbase"):
+    if post_setup_key in {"agent_browser", "browserbase"}:
        node_modules = PROJECT_ROOT / "node_modules" / "agent-browser"
        npm_bin = shutil.which("npm")
        npx_bin = shutil.which("npx")
@ -482,8 +602,12 @@ def _run_post_setup(post_setup_key: str):
        if not node_modules.exists() and npm_bin:
            _print_info("    Installing Node.js dependencies for browser tools...")
            import subprocess
+            # Use the resolved npm_bin absolute path so subprocess.Popen can
+            # execute npm.cmd on Windows (CreateProcessW otherwise rejects
+            # batch shims).  On POSIX npm_bin is the plain path — same
+            # behaviour as before.
            result = subprocess.run(
-                ["npm", "install", "--silent"],
+                [npm_bin, "install", "--silent"],
                capture_output=True, text=True, cwd=str(PROJECT_ROOT)
            )
            if result.returncode == 0:
@ -582,11 +706,13 @@ def _run_post_setup(post_setup_key: str):

    elif post_setup_key == "camofox":
        camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camofox-browser"
-        if not camofox_dir.exists() and shutil.which("npm"):
+        _npm_bin = shutil.which("npm")
+        if not camofox_dir.exists() and _npm_bin:
            _print_info("    Installing Camofox browser server...")
            import subprocess
+            # Absolute npm path so .cmd shim executes on Windows.
            result = subprocess.run(
-                ["npm", "install", "--silent"],
+                [_npm_bin, "install", "--silent"],
                capture_output=True, text=True, cwd=str(PROJECT_ROOT)
            )
            if result.returncode == 0:
@ -602,6 +728,53 @@ def _run_post_setup(post_setup_key: str):
            _print_warning("    Node.js not found. Install Camofox via Docker:")
            _print_info("      docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")

+    elif post_setup_key == "cua_driver":
+        # cua-driver provides macOS background computer-use (SkyLight SPIs).
+        # Install via upstream curl script if the binary isn't on $PATH yet.
+        import platform as _plat
+        import subprocess
+        if _plat.system() != "Darwin":
+            _print_warning("    Computer Use (cua-driver) is macOS-only; skipping.")
+            return
+        if shutil.which("cua-driver"):
+            try:
+                version = subprocess.run(
+                    ["cua-driver", "--version"],
+                    capture_output=True, text=True, timeout=5,
+                ).stdout.strip()
+                _print_success(f"    cua-driver already installed: {version or 'unknown version'}")
+            except Exception:
+                _print_success("    cua-driver already installed.")
+            _print_info("    Grant macOS permissions if not done yet:")
+            _print_info("      System Settings > Privacy & Security > Accessibility")
+            _print_info("      System Settings > Privacy & Security > Screen Recording")
+            return
+        if not shutil.which("curl"):
+            _print_warning("    curl not found — install manually:")
+            _print_info("      https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
+            return
+        _print_info("    Installing cua-driver (macOS background computer-use)...")
+        try:
+            install_cmd = (
+                "/bin/bash -c \"$(curl -fsSL "
+                "https://raw.githubusercontent.com/trycua/cua/main/"
+                "libs/cua-driver/scripts/install.sh)\""
+            )
+            result = subprocess.run(install_cmd, shell=True, timeout=300)
+            if result.returncode == 0 and shutil.which("cua-driver"):
+                _print_success("    cua-driver installed.")
+                _print_info("    IMPORTANT — grant macOS permissions now:")
+                _print_info("      System Settings > Privacy & Security > Accessibility")
+                _print_info("      System Settings > Privacy & Security > Screen Recording")
+                _print_info("    Both must allow the terminal / Hermes process.")
+            else:
+                _print_warning("    cua-driver install did not complete. Re-run manually:")
+                _print_info(f"      {install_cmd}")
+        except subprocess.TimeoutExpired:
+            _print_warning("    cua-driver install timed out. Re-run manually.")
+        except Exception as e:
+            _print_warning(f"    cua-driver install failed: {e}")
+
    elif post_setup_key == "kittentts":
        try:
            __import__("kittentts")
@ -609,56 +782,70 @@ def _run_post_setup(post_setup_key: str):
            return
        except ImportError:
            pass
-        import subprocess
        _print_info("    Installing kittentts (~25-80MB model, CPU-only)...")
        wheel_url = (
            "https://github.com/KittenML/KittenTTS/releases/download/"
            "0.8.1/kittentts-0.8.1-py3-none-any.whl"
        )
        try:
-            result = subprocess.run(
-                [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
-                capture_output=True, text=True, timeout=300,
-            )
+            result = _pip_install(["-U", wheel_url, "soundfile", "--quiet"], timeout=300)
            if result.returncode == 0:
                _print_success("    kittentts installed")
                _print_info("    Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
                _print_info("    Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)")
            else:
                _print_warning("    kittentts install failed:")
-                _print_info(f"      {result.stderr.strip()[:300]}")
-                _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")
+                _print_info(f"      {(result.stderr or '').strip()[:300]}")
+                _print_info(f"    Run manually: uv pip install -U '{wheel_url}' soundfile")
        except subprocess.TimeoutExpired:
            _print_warning("    kittentts install timed out (>5min)")
-            _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")
+            _print_info(f"    Run manually: uv pip install -U '{wheel_url}' soundfile")

    elif post_setup_key == "piper":
        try:
            __import__("piper")
            _print_success("    piper-tts is already installed")
        except ImportError:
-            import subprocess
            _print_info("    Installing piper-tts (~14MB wheel, voices downloaded on first use)...")
            try:
-                result = subprocess.run(
-                    [sys.executable, "-m", "pip", "install", "-U", "piper-tts", "--quiet"],
-                    capture_output=True, text=True, timeout=300,
-                )
+                result = _pip_install(["-U", "piper-tts", "--quiet"], timeout=300)
                if result.returncode == 0:
                    _print_success("    piper-tts installed")
                else:
                    _print_warning("    piper-tts install failed:")
-                    _print_info(f"      {result.stderr.strip()[:300]}")
-                    _print_info("    Run manually: python -m pip install -U piper-tts")
+                    _print_info(f"      {(result.stderr or '').strip()[:300]}")
+                    _print_info("    Run manually: uv pip install -U piper-tts")
                    return
            except subprocess.TimeoutExpired:
                _print_warning("    piper-tts install timed out (>5min)")
-                _print_info("    Run manually: python -m pip install -U piper-tts")
+                _print_info("    Run manually: uv pip install -U piper-tts")
                return
        _print_info("    Default voice: en_US-lessac-medium (downloaded on first TTS call)")
        _print_info("    Full voice list: https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/VOICES.md")
        _print_info("    Switch voices by setting tts.piper.voice in ~/.hermes/config.yaml")

+    elif post_setup_key == "ddgs":
+        try:
+            __import__("ddgs")
+            _print_success("    ddgs is already installed")
+        except ImportError:
+            _print_info("    Installing ddgs (DuckDuckGo search package)...")
+            try:
+                result = _pip_install(["-U", "ddgs", "--quiet"], timeout=300)
+                if result.returncode == 0:
+                    _print_success("    ddgs installed")
+                else:
+                    _print_warning("    ddgs install failed:")
+                    _print_info(f"      {(result.stderr or '').strip()[:300]}")
+                    _print_info("    Run manually: uv pip install -U ddgs")
+                    return
+            except subprocess.TimeoutExpired:
+                _print_warning("    ddgs install timed out (>5min)")
+                _print_info("    Run manually: uv pip install -U ddgs")
+                return
+        _print_info("    No API key required. DuckDuckGo enforces server-side rate limits.")
+        _print_info("    Pair with an extract provider if you also need web_extract.")
+
    elif post_setup_key == "spotify":
        # Run the full `hermes auth spotify` flow — if the user has no
        # client_id yet, this drops them into the interactive wizard
@ -695,18 +882,7 @@ def _run_post_setup(post_setup_key: str):
            tinker_dir = PROJECT_ROOT / "tinker-atropos"
            if tinker_dir.exists() and (tinker_dir / "pyproject.toml").exists():
                _print_info("    Installing tinker-atropos submodule...")
-                import subprocess
-                uv_bin = shutil.which("uv")
-                if uv_bin:
-                    result = subprocess.run(
-                        [uv_bin, "pip", "install", "--python", sys.executable, "-e", str(tinker_dir)],
-                        capture_output=True, text=True
-                    )
-                else:
-                    result = subprocess.run(
-                        [sys.executable, "-m", "pip", "install", "-e", str(tinker_dir)],
-                        capture_output=True, text=True
-                    )
+                result = _pip_install(["-e", str(tinker_dir)])
                if result.returncode == 0:
                    _print_success("    tinker-atropos installed")
                else:
@ -723,16 +899,12 @@ def _run_post_setup(post_setup_key: str):
            __import__("langfuse")
            _print_success("    langfuse SDK already installed")
        except ImportError:
-            import subprocess
            _print_info("    Installing langfuse SDK...")
-            result = subprocess.run(
-                [sys.executable, "-m", "pip", "install", "langfuse", "--quiet"],
-                capture_output=True, text=True, timeout=120,
-            )
+            result = _pip_install(["langfuse", "--quiet"], timeout=120)
            if result.returncode == 0:
                _print_success("    langfuse SDK installed")
            else:
-                _print_warning("    langfuse SDK install failed — run manually: pip install langfuse")
+                _print_warning("    langfuse SDK install failed — run manually: uv pip install langfuse")
        # Opt the bundled observability/langfuse plugin into plugins.enabled.
        # The plugin ships in the repo but doesn't load until the user enables
        # it (standalone plugins are opt-in).
@ -844,6 +1016,38 @@ def _get_platform_tools(
            ts for ts in toolset_names
            if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
        }
+        # Mixed config: composite toolset alongside configurables (e.g.
+        # ``[hermes-cli, spotify]`` after enabling Spotify via ``hermes
+        # tools``). Without expansion the composite name is silently dropped,
+        # leaving sessions with only the configurable opt-ins and no native
+        # tools. Mirror the else-branch's subset inference, but apply
+        # _DEFAULT_OFF_TOOLSETS only to the implicit expansion — anything the
+        # user explicitly listed (e.g. ``spotify``) must survive.
+        composite_tools = set()
+        for ts_name in toolset_names:
+            if ts_name in configurable_keys or ts_name in plugin_ts_keys:
+                continue
+            if ts_name not in TOOLSETS:
+                continue
+            composite_tools.update(resolve_toolset(ts_name))
+
+        if composite_tools:
+            expanded = set()
+            for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
+                if not _toolset_allowed_for_platform(ts_key, platform):
+                    continue
+                ts_tools = set(resolve_toolset(ts_key))
+                if ts_tools and ts_tools.issubset(composite_tools):
+                    expanded.add(ts_key)
+
+            default_off = set(_DEFAULT_OFF_TOOLSETS)
+            if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
+                default_off.remove(platform)
+            if "homeassistant" in default_off and os.getenv("HASS_TOKEN"):
+                default_off.remove("homeassistant")
+            expanded -= default_off
+
+            enabled_toolsets |= expanded
    else:
        # No explicit config — fall back to resolving composite toolset names
        # (e.g. "hermes-cli") to individual tool names and reverse-mapping.
@ -1264,12 +1468,52 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
    return visible


+_POST_SETUP_INSTALLED: dict = {
+    # post_setup_key -> predicate(): True when the install side-effect
+    # is already satisfied. Used by `_toolset_needs_configuration_prompt`
+    # to force the provider-setup flow when a no-key provider still needs
+    # a binary/dependency install (otherwise an already-configured user
+    # who toggles the toolset on via `hermes tools` gets a silent no-op
+    # because the gate sees "no env vars to ask about" and skips the
+    # provider-setup flow that would have run the post_setup hook).
+    #
+    # Only entries here are gated; other post_setup hooks (kittentts,
+    # piper, agent_browser, etc.) keep their existing behaviour. Add an
+    # entry when (a) the post_setup is the ONLY install side-effect for
+    # a no-key provider, and (b) an installed-state check is cheap and
+    # doesn't trigger a heavy import.
+    "cua_driver": lambda: bool(shutil.which("cua-driver")),
+}
+
+
+def _post_setup_already_installed(post_setup_key: str) -> bool:
+    """Return True when the post_setup install side-effect is satisfied."""
+    predicate = _POST_SETUP_INSTALLED.get(post_setup_key)
+    if predicate is None:
+        # No install-state check registered → assume satisfied (don't
+        # change behaviour for hooks we haven't explicitly opted in).
+        return True
+    try:
+        return bool(predicate())
+    except Exception:
+        return True
+
+
 def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
    """Return True when enabling this toolset should open provider setup."""
    cat = TOOL_CATEGORIES.get(ts_key)
    if not cat:
        return not _toolset_has_keys(ts_key, config)

+    # If any visible provider has a registered post_setup install-state
+    # check that hasn't been satisfied (e.g. cua-driver binary not on
+    # PATH yet), force the configuration flow so `_configure_provider`
+    # invokes `_run_post_setup` and the install actually runs.
+    for provider in _visible_providers(cat, config):
+        post_setup = provider.get("post_setup")
+        if post_setup and not _post_setup_already_installed(post_setup):
+            return True
+
    if ts_key == "tts":
        tts_cfg = config.get("tts", {})
        return not isinstance(tts_cfg, dict) or "provider" not in tts_cfg
@ -1387,7 +1631,7 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
            image_cfg = config.get("image_gen", {})
            if isinstance(image_cfg, dict):
                configured_provider = image_cfg.get("provider")
-                if configured_provider not in (None, "", "fal"):
+                if configured_provider not in {None, "", "fal"}:
                    return False
                if image_cfg.get("use_gateway") is not None and not is_truthy_value(image_cfg.get("use_gateway"), default=False):
                    return False
@ -1420,7 +1664,7 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
        configured_provider = image_cfg.get("provider")
        return (
            provider["imagegen_backend"] == "fal"
-            and configured_provider in (None, "", "fal")
+            and configured_provider in {None, "", "fal"}
            and not is_truthy_value(image_cfg.get("use_gateway"), default=False)
        )
    return False
@ -1670,7 +1914,7 @@ def _configure_provider(provider: dict, config: dict):

    # For tools without a specific config key (e.g. image_gen), still
    # track use_gateway so the runtime knows the user's intent.
-    if managed_feature and managed_feature not in ("web", "tts", "browser"):
+    if managed_feature and managed_feature not in {"web", "tts", "browser"}:
        config.setdefault(managed_feature, {})["use_gateway"] = True
    elif not managed_feature:
        # User picked a non-gateway provider — find which category this
@ -1702,7 +1946,7 @@ def _configure_provider(provider: dict, config: dict):
            # image_gen.provider clear so the dispatch shim falls through
            # to the legacy FAL path.
            img_cfg = config.setdefault("image_gen", {})
-            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in {None, "", "fal"}:
                img_cfg["provider"] = "fal"
        return

@ -1747,7 +1991,7 @@ def _configure_provider(provider: dict, config: dict):
        if backend:
            _configure_imagegen_model(backend, config)
            img_cfg = config.setdefault("image_gen", {})
-            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in {None, "", "fal"}:
                img_cfg["provider"] = "fal"


@ -1822,7 +2066,7 @@ def _reconfigure_tool(config: dict):
        cat = TOOL_CATEGORIES.get(ts_key)
        reqs = TOOLSET_ENV_REQUIREMENTS.get(ts_key)
        if cat or reqs:
-            if _toolset_has_keys(ts_key, config):
+            if _toolset_has_keys(ts_key, config) or _toolset_enabled_for_reconfigure(ts_key, config):
                configurable.append((ts_key, ts_label))

    if not configurable:
@ -1848,6 +2092,28 @@ def _reconfigure_tool(config: dict):
    save_config(config)


+def _toolset_enabled_for_reconfigure(ts_key: str, config: dict) -> bool:
+    """Return True if a configurable toolset is enabled anywhere.
+
+    Reconfigure must include enabled-but-unconfigured categories so users can
+    finish provider/API-key setup without disabling and re-enabling the toolset.
+    """
+    for platform in PLATFORMS:
+        if not _toolset_allowed_for_platform(ts_key, platform):
+            continue
+        try:
+            enabled = _get_platform_tools(
+                config,
+                platform,
+                include_default_mcp_servers=False,
+            )
+        except Exception:
+            continue
+        if ts_key in enabled:
+            return True
+    return False
+
+
 def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
    """Reconfigure a tool category - provider selection + API key update."""
    icon = cat.get("icon", "")
@ -1897,24 +2163,30 @@ def _reconfigure_provider(provider: dict, config: dict):
            return

    if provider.get("tts_provider"):
-        config.setdefault("tts", {})["provider"] = provider["tts_provider"]
+        tts_cfg = config.setdefault("tts", {})
+        tts_cfg["provider"] = provider["tts_provider"]
+        tts_cfg["use_gateway"] = bool(managed_feature)
        _print_success(f"  TTS provider set to: {provider['tts_provider']}")

    if "browser_provider" in provider:
        bp = provider["browser_provider"]
+        browser_cfg = config.setdefault("browser", {})
        if bp == "local":
-            config.setdefault("browser", {})["cloud_provider"] = "local"
+            browser_cfg["cloud_provider"] = "local"
            _print_success("  Browser set to local mode")
        elif bp:
-            config.setdefault("browser", {})["cloud_provider"] = bp
+            browser_cfg["cloud_provider"] = bp
            _print_success(f"  Browser cloud provider set to: {bp}")
+        browser_cfg["use_gateway"] = bool(managed_feature)

    # Set web search backend in config if applicable
    if provider.get("web_backend"):
-        config.setdefault("web", {})["backend"] = provider["web_backend"]
+        web_cfg = config.setdefault("web", {})
+        web_cfg["backend"] = provider["web_backend"]
+        web_cfg["use_gateway"] = bool(managed_feature)
        _print_success(f"  Web backend set to: {provider['web_backend']}")

-    if managed_feature and managed_feature not in ("web", "tts", "browser"):
+    if managed_feature and managed_feature not in {"web", "tts", "browser"}:
        section = config.setdefault(managed_feature, {})
        if not isinstance(section, dict):
            section = {}
@ -2263,7 +2535,7 @@ def _configure_mcp_tools_interactive(config: dict):
    # Count enabled servers
    enabled_names = [
        k for k, v in mcp_servers.items()
-        if v.get("enabled", True) not in (False, "false", "0", "no", "off")
+        if v.get("enabled", True) not in {False, "false", "0", "no", "off"}
    ]
    if not enabled_names:
        _print_info("All MCP servers are disabled.")
--- a/hermes_cli/uninstall.py
+++ b/hermes_cli/uninstall.py
@ -118,12 +118,13 @@ def remove_wrapper_script():


 def uninstall_gateway_service():
-    """Stop and uninstall the gateway service (systemd, launchd) and kill any
-    standalone gateway processes.
+    """Stop and uninstall the gateway service (systemd, launchd, Windows
+    Scheduled Task / Startup folder) and kill any standalone gateway processes.

    Delegates to the gateway module which handles:
    - Linux: user + system systemd services (with proper DBUS env setup)
    - macOS: launchd plists
+    - Windows: Scheduled Task + Startup-folder fallback, via ``gateway_windows``
    - All platforms: standalone ``hermes gateway run`` processes
    - Termux/Android: skips systemd (no systemd on Android), still kills standalone processes
    """
@ -167,7 +168,7 @@ def uninstall_gateway_service():

                scope = "system" if is_system else "user"
                try:
-                    if is_system and os.geteuid() != 0:
+                    if is_system and os.geteuid() != 0:  # windows-footgun: ok — Linux systemd uninstall path, guarded by `if system == "Linux"` above
                        log_warn(f"System gateway service exists at {unit_path} "
                                 f"but needs sudo to remove")
                        continue
@ -201,9 +202,163 @@ def uninstall_gateway_service():
        except Exception as e:
            log_warn(f"Could not remove launchd gateway service: {e}")

+    # 4. Windows: uninstall Scheduled Task + Startup-folder entry.  The
+    #    gateway_windows module already knows how to locate and remove both
+    #    code paths (schtasks /Delete + .cmd unlink) and how to stop any
+    #    running detached pythonw gateway process.  We call into it so the
+    #    uninstall logic stays in exactly one place.
+    elif system == "Windows":
+        try:
+            from hermes_cli import gateway_windows
+            if gateway_windows.is_installed() or gateway_windows.is_task_registered() \
+                    or gateway_windows.is_startup_entry_installed():
+                try:
+                    gateway_windows.stop()
+                except Exception as e:
+                    log_warn(f"Could not stop Windows gateway cleanly: {e}")
+                try:
+                    gateway_windows.uninstall()
+                    log_success("Removed Windows gateway (Scheduled Task + Startup entry)")
+                    stopped_something = True
+                except Exception as e:
+                    log_warn(f"Could not fully uninstall Windows gateway: {e}")
+        except Exception as e:
+            log_warn(f"Could not check Windows gateway service: {e}")
+
    return stopped_something


+# ============================================================================
+# Windows-specific uninstall helpers
+# ============================================================================
+#
+# The installer (``scripts/install.ps1``) does four Windows-only things that
+# ``remove_path_from_shell_configs`` / ``remove_wrapper_script`` don't cover:
+#
+#   1. Sets User-scope env vars ``HERMES_HOME`` and ``HERMES_GIT_BASH_PATH``
+#      via ``[Environment]::SetEnvironmentVariable(..., "User")``.  These
+#      don't live in ~/.bashrc — they're in the Windows registry at
+#      HKCU\Environment.
+#   2. Prepends to User-scope ``PATH`` (same registry location) entries
+#      like ``%LOCALAPPDATA%\hermes\git\cmd``, ``%LOCALAPPDATA%\hermes\git\bin``,
+#      ``%LOCALAPPDATA%\hermes\git\usr\bin``, ``%LOCALAPPDATA%\hermes\node``.
+#      Again not in any rc file — only accessible via the registry or the
+#      .NET [Environment] API.
+#   3. Downloads PortableGit to ``%LOCALAPPDATA%\hermes\git\`` and Node to
+#      ``%LOCALAPPDATA%\hermes\node\`` as user-scoped, isolated copies.
+#      These are ~200MB combined and serve no purpose after uninstall.
+#   4. On the ``hermes dashboard`` + gateway paths, drops files into
+#      ``%LOCALAPPDATA%\hermes\gateway-service\`` and sometimes
+#      ``%APPDATA%\Microsoft\Windows\Start Menu\Programs\Startup\`` — the
+#      latter is handled by ``gateway_windows.uninstall()`` already.
+#
+# Running a PowerShell one-liner per operation is overkill and fragile on
+# locked-down machines (Constrained Language Mode, restricted ExecutionPolicy).
+# Direct registry writes via ``winreg`` work without spawning any subprocess
+# and apply immediately for new shells (SendMessage WM_SETTINGCHANGE would
+# be nicer but requires ctypes and buys us nothing — the user will log out
+# or open a new terminal anyway).
+
+
+def _hermes_path_markers(hermes_home: Path) -> list[str]:
+    """Path-entry substrings that identify Hermes-owned User-PATH entries."""
+    root = str(hermes_home).rstrip("\\/")
+    # Match on prefix so sub-entries (git\cmd, git\bin, git\usr\bin, node, etc.)
+    # all get swept.  Also match the bare hermes-agent install dir.
+    markers = [root + "\\hermes-agent", root + "\\git", root + "\\node", root + "\\venv"]
+    # Also match if HERMES_HOME was customised to somewhere else — find-and-nuke
+    # any entry whose path component contains "hermes".  We don't want to catch
+    # unrelated entries like "chermes-foo" or "ephermeral", so we look for
+    # backslash-hermes as a word-ish boundary.
+    return markers
+
+
+def remove_path_from_windows_registry(hermes_home: Path) -> list[str]:
+    """Strip Hermes-owned entries from User-scope PATH in the registry.
+
+    Returns the list of removed path entries.  Operates on HKCU\\Environment,
+    same key the installer wrote to via ``[Environment]::SetEnvironmentVariable``.
+    """
+    try:
+        import winreg
+    except ImportError:
+        return []  # not on Windows, nothing to do
+
+    removed: list[str] = []
+    key_path = "Environment"
+    try:
+        with winreg.OpenKey(winreg.HKEY_CURRENT_USER, key_path, 0,
+                            winreg.KEY_READ | winreg.KEY_WRITE) as key:
+            try:
+                path_value, path_type = winreg.QueryValueEx(key, "Path")
+            except FileNotFoundError:
+                return []
+            # Preserve REG_EXPAND_SZ vs REG_SZ so unexpanded %VARS% survive.
+            entries = [e for e in path_value.split(";") if e]
+            markers = _hermes_path_markers(hermes_home)
+            kept: list[str] = []
+            for entry in entries:
+                entry_norm = entry.rstrip("\\/")
+                matched = any(entry_norm.lower().startswith(m.lower()) for m in markers)
+                if matched:
+                    removed.append(entry)
+                else:
+                    kept.append(entry)
+            if removed:
+                new_value = ";".join(kept)
+                winreg.SetValueEx(key, "Path", 0, path_type, new_value)
+    except OSError as e:
+        log_warn(f"Could not edit User PATH in registry: {e}")
+    return removed
+
+
+def remove_hermes_env_vars_windows() -> list[str]:
+    """Delete HERMES_HOME and HERMES_GIT_BASH_PATH from User-scope env vars."""
+    try:
+        import winreg
+    except ImportError:
+        return []
+
+    removed: list[str] = []
+    try:
+        with winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Environment", 0,
+                            winreg.KEY_READ | winreg.KEY_WRITE) as key:
+            for name in ("HERMES_HOME", "HERMES_GIT_BASH_PATH"):
+                try:
+                    winreg.QueryValueEx(key, name)
+                except FileNotFoundError:
+                    continue
+                try:
+                    winreg.DeleteValue(key, name)
+                    removed.append(name)
+                except OSError as e:
+                    log_warn(f"Could not delete {name} from User env: {e}")
+    except OSError as e:
+        log_warn(f"Could not open User Environment key: {e}")
+    return removed
+
+
+def remove_portable_tooling_windows(hermes_home: Path) -> list[Path]:
+    """Delete PortableGit and Node installs the Windows installer created under
+    ``%LOCALAPPDATA%\\hermes\\``.  Only called on full uninstall; they're
+    isolated from any system Git / Node so they cannot break other tools."""
+    removed: list[Path] = []
+    for sub in ("git", "node", "gateway-service"):
+        target = hermes_home / sub
+        if target.exists():
+            try:
+                shutil.rmtree(target, ignore_errors=False)
+                removed.append(target)
+            except Exception as e:
+                log_warn(f"Could not remove {target}: {e}")
+    return removed
+
+
+def _is_windows() -> bool:
+    import sys
+    return sys.platform == "win32"
+
+
 def _is_default_hermes_home(hermes_home: Path) -> bool:
    """Return True when ``hermes_home`` points at the default (non-profile) root."""
    try:
@ -335,7 +490,7 @@ def run_uninstall(args):
        print("Cancelled.")
        return
    
-    if choice == "3" or choice.lower() in ("c", "cancel", "q", "quit", "n", "no"):
+    if choice == "3" or choice.lower() in {"c", "cancel", "q", "quit", "n", "no"}:
        print()
        print("Uninstall cancelled.")
        return
@ -362,7 +517,7 @@ def run_uninstall(args):
            print()
            print("Cancelled.")
            return
-        remove_profiles = resp in ("y", "yes")
+        remove_profiles = resp in {"y", "yes"}

    # Final confirmation
    print()
@ -400,14 +555,36 @@ def run_uninstall(args):
    if not uninstall_gateway_service():
        log_info("No gateway service or processes found")
    
-    # 2. Remove PATH entries from shell configs
+    # 2. Remove PATH entries from shell configs (POSIX) AND from the Windows
+    #    User-scope registry.  Both helpers no-op on the wrong platform so we
+    #    can safely call them unconditionally.
    log_info("Removing PATH entries from shell configs...")
    removed_configs = remove_path_from_shell_configs()
    if removed_configs:
        for config in removed_configs:
            log_success(f"Updated {config}")
    else:
-        log_info("No PATH entries found to remove")
+        log_info("No PATH entries found to remove in shell rc files")
+
+    if _is_windows():
+        log_info("Removing PATH entries from Windows User environment...")
+        # Expand %LOCALAPPDATA% etc. in hermes_home so the marker matching is
+        # against fully resolved paths — installer writes literal strings
+        # like C:\Users\<u>\AppData\Local\hermes\git\cmd, not %LOCALAPPDATA%.
+        removed_path_entries = remove_path_from_windows_registry(Path(os.path.expandvars(str(hermes_home))))
+        if removed_path_entries:
+            for entry in removed_path_entries:
+                log_success(f"Removed from User PATH: {entry}")
+        else:
+            log_info("No Hermes-owned PATH entries in User environment")
+
+        log_info("Removing HERMES_HOME / HERMES_GIT_BASH_PATH User env vars...")
+        removed_env = remove_hermes_env_vars_windows()
+        if removed_env:
+            for name in removed_env:
+                log_success(f"Removed User env var: {name}")
+        else:
+            log_info("No Hermes-set User env vars to remove")
    
    # 3. Remove wrapper script
    log_info("Removing hermes command...")
@ -436,6 +613,21 @@ def run_uninstall(args):
    except Exception as e:
        log_warn(f"Could not fully remove {project_root}: {e}")
        log_info("You may need to manually remove it")
+
+    # 4b. Remove Windows-only installer artifacts that are NOT user data:
+    #     PortableGit, bundled Node, gateway-service dir.  Installer put them
+    #     under HERMES_HOME but they're install tooling, not config — safe to
+    #     remove even in "keep data" mode.  If we're doing a full uninstall
+    #     the step-5 rmtree(hermes_home) would sweep them anyway; calling
+    #     this helper there is a no-op since they'll already be gone.
+    if _is_windows():
+        log_info("Removing Windows installer artifacts (PortableGit, Node, gateway-service)...")
+        removed_artifacts = remove_portable_tooling_windows(hermes_home)
+        if removed_artifacts:
+            for path in removed_artifacts:
+                log_success(f"Removed {path}")
+        else:
+            log_info("No Windows installer artifacts to remove")
    
    # 5. Optionally remove ~/.hermes/ data directory (and named profiles)
    if full_uninstall:
@ -471,11 +663,18 @@ def run_uninstall(args):
        print(f"  {hermes_home}/")
        print()
        print("To reinstall later with your existing settings:")
-        print(color("  curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash", Colors.DIM))
+        if _is_windows():
+            print(color("  irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1 | iex", Colors.DIM))
+        else:
+            print(color("  curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash", Colors.DIM))
        print()
-    
-    print(color("Reload your shell to complete the process:", Colors.YELLOW))
-    print("  source ~/.bashrc  # or ~/.zshrc")
+
+    if _is_windows():
+        print(color("Open a new terminal (PowerShell / Windows Terminal) to pick up", Colors.YELLOW))
+        print(color("the updated User PATH and environment variables.", Colors.YELLOW))
+    else:
+        print(color("Reload your shell to complete the process:", Colors.YELLOW))
+        print("  source ~/.bashrc  # or ~/.zshrc")
    print()
    print("Thank you for using Hermes Agent! ⚕")
    print()
--- a/hermes_cli/voice.py
+++ b/hermes_cli/voice.py
@ -27,6 +27,192 @@ import sys
 import threading
 from typing import Any, Callable, Optional

+# Modifier aliases mirrored from the TUI parser (``ui-tui/src/lib/platform.ts``)
+# ``_MOD_ALIASES`` table — the contract that removes the cross-runtime
+# mismatch Copilot flagged in round-9 on #19835.
+#
+# ``super``/``win``/``windows`` are intentionally absent: prompt_toolkit
+# has no super/meta modifier for the Cmd key, so those spellings are
+# TUI-only. The normalizer below returns the documented default
+# (``c-b``) for them — a silent fallback was preferred to a hard
+# startup crash (Copilot round-11). The CLI binding site
+# (``_register_voice_handler`` in cli.py) logs a warning when that
+# fallback fires so users see why their TUI-only shortcut isn't
+# bound in the classic CLI.
+_VOICE_MOD_ALIASES = {
+    "ctrl": "c-",
+    "control": "c-",
+    "alt": "a-",
+    "option": "a-",
+    "opt": "a-",
+}
+
+# Named keys prompt_toolkit accepts in ``c-<name>`` / ``a-<name>`` form.
+# Aliases collapse to prompt_toolkit's canonical spelling so the same
+# config value binds identically in both runtimes (Copilot round-10 on
+# #19835).
+_VOICE_NAMED_KEYS = {
+    "space": "space",
+    "spc": "space",
+    "enter": "enter",
+    "return": "enter",
+    "ret": "enter",
+    "tab": "tab",
+    "escape": "escape",
+    "esc": "escape",
+    "backspace": "backspace",
+    "bs": "backspace",
+    "delete": "delete",
+    "del": "delete",
+}
+
+# ``useInputHandlers()`` intercepts these before the voice check runs,
+# so a binding like ``ctrl+c`` (interrupt), ``ctrl+d`` (quit), or
+# ``ctrl+l`` (clear screen) would be advertised in /voice status but
+# never fire push-to-talk — the same blocklist the TUI parser uses.
+_VOICE_RESERVED_CTRL_CHARS = frozenset({"c", "d", "l"})
+
+# On macOS the classic CLI's prompt_toolkit bindings for copy / exit /
+# clear also claim ``a-c`` / ``a-d`` / ``a-l`` via the action-modifier
+# lookup, and hermes-ink reports Alt as ``key.meta`` on many terminals.
+# Mirror the TUI parser's darwin-only reservation so ``option+c`` etc.
+# don't bind Alt+C in the CLI while the TUI silently falls back to
+# Ctrl+B (Copilot round-14 on #19835).
+_VOICE_RESERVED_ALT_CHARS_MAC = frozenset({"c", "d", "l"})
+
+_DEFAULT_PT_KEY = "c-b"
+
+
+def voice_record_key_from_config(cfg: Any) -> Any:
+    """Shape-safe ``cfg.voice.record_key`` lookup.
+
+    ``load_config()`` deep-merges raw YAML and preserves scalar
+    overrides, so a hand-edited ``voice: true`` / ``voice: cmd+b``
+    leaves ``cfg["voice"]`` as a bool/str instead of a dict, and the
+    naive ``.get("voice", {}).get("record_key")`` chain raises
+    AttributeError before voice can even start (Copilot round-11 on
+    #19835). Return ``None`` for malformed shapes so call sites can
+    feed the result straight into the normalizer/formatter and get
+    the documented default.
+    """
+    if not isinstance(cfg, dict):
+        return None
+
+    voice = cfg.get("voice")
+    if not isinstance(voice, dict):
+        return None
+
+    return voice.get("record_key")
+
+
+def normalize_voice_record_key_for_prompt_toolkit(raw: Any) -> str:
+    """Coerce ``voice.record_key`` into prompt_toolkit's ``c-x`` / ``a-x`` format.
+
+    Mirrors the TUI parser contract (``ui-tui/src/lib/platform.ts``)
+    so one config value binds the same shortcut in both runtimes:
+
+    * non-string / empty / typo'd / bare-char / multi-modifier / reserved
+      ``ctrl+c|d|l`` → documented default ``c-b``
+    * single-char keys: ``ctrl+o`` → ``c-o``
+    * named keys: ``ctrl+space`` → ``c-space`` (aliases collapse:
+      ``ctrl+return`` → ``c-enter``)
+    * ``super`` / ``win`` / ``windows`` → ``c-b`` (TUI-only modifiers —
+      prompt_toolkit has no super mod; the CLI binding site is
+      expected to warn when this fallback fires so users see the
+      cross-runtime split, Copilot round-11 on #19835)
+    """
+    if not isinstance(raw, str):
+        return _DEFAULT_PT_KEY
+
+    lowered = raw.strip().lower()
+    if not lowered:
+        return _DEFAULT_PT_KEY
+
+    parts = [p.strip() for p in lowered.split("+") if p.strip()]
+    if not parts:
+        return _DEFAULT_PT_KEY
+
+    # Multi-modifier chords like ``ctrl+alt+r`` bind different shortcuts
+    # in prompt_toolkit (a-c-r form) and hermes-ink rejects them; collapse
+    # to the documented default instead of silently diverging.
+    if len(parts) > 2:
+        return _DEFAULT_PT_KEY
+
+    # Bare char / bare named key (no explicit modifier) — the CLI's
+    # prompt_toolkit binds the raw key without a modifier, which the TUI
+    # parser refuses; reject here too so both runtimes agree.
+    if len(parts) == 1:
+        return _DEFAULT_PT_KEY
+
+    modifier_token, key_token = parts
+
+    # ``super`` / ``win`` / ``windows`` are TUI-only (prompt_toolkit has
+    # no super modifier, so ``@kb.add(super+b)`` crashes the CLI at
+    # startup). Fall back to the documented default here; the CLI
+    # binding site is expected to log a warning when the configured
+    # value is one of these spellings so users know the TUI+CLI
+    # runtimes diverge on that shortcut (Copilot round-11 on #19835).
+    if modifier_token in {"super", "win", "windows"}:
+        return _DEFAULT_PT_KEY
+
+    normalized_mod = _VOICE_MOD_ALIASES.get(modifier_token)
+    if not normalized_mod:
+        return _DEFAULT_PT_KEY
+
+    # Single-char key: reject reserved-ctrl chords that the TUI would
+    # also block at parse time, plus the mac-only alt reservation.
+    if len(key_token) == 1:
+        if normalized_mod == "c-" and key_token in _VOICE_RESERVED_CTRL_CHARS:
+            return _DEFAULT_PT_KEY
+        if (
+            normalized_mod == "a-"
+            and sys.platform == "darwin"
+            and key_token in _VOICE_RESERVED_ALT_CHARS_MAC
+        ):
+            return _DEFAULT_PT_KEY
+        return f"{normalized_mod}{key_token}"
+
+    # Multi-char key token must be a known named key; typos like
+    # ``ctrl+spcae`` fall back to the default rather than being passed
+    # through as ``c-spcae`` (which prompt_toolkit would reject).
+    named = _VOICE_NAMED_KEYS.get(key_token)
+    if not named:
+        return _DEFAULT_PT_KEY
+
+    return f"{normalized_mod}{named}"
+
+
+def format_voice_record_key_for_status(raw: Any) -> str:
+    """Render ``voice.record_key`` for ``/voice status`` in CLI-friendly form.
+
+    Mirrors the TUI's ``formatVoiceRecordKey``: returns ``Ctrl+B`` /
+    ``Alt+Space`` / ``Ctrl+Enter``. Malformed configs surface as the
+    documented default so status never advertises a shortcut that
+    won't bind (Copilot round-10 on #19835).
+    """
+    normalized = normalize_voice_record_key_for_prompt_toolkit(raw)
+
+    if normalized.startswith("c-"):
+        prefix, key = "Ctrl+", normalized[2:]
+    elif normalized.startswith("a-"):
+        prefix, key = "Alt+", normalized[2:]
+    elif "+" in normalized:
+        # ``super+<key>`` / ``win+<key>`` — CLI won't bind them, but
+        # render in title case so status output is still readable.
+        mod, key = normalized.split("+", 1)
+        prefix = mod[0].upper() + mod[1:] + "+"
+    else:
+        return "Ctrl+B"
+
+    if not key:
+        return prefix.rstrip("+")
+
+    if len(key) == 1:
+        return prefix + key.upper()
+
+    return prefix + key[0].upper() + key[1:]
+
+
 from tools.voice_mode import (
    create_audio_recorder,
    is_whisper_hallucination,
@ -95,6 +281,8 @@ _recorder_lock = threading.Lock()
 # ── Continuous (VAD) state ───────────────────────────────────────────
 _continuous_lock = threading.Lock()
 _continuous_active = False
+_continuous_stopping = False
+_continuous_auto_restart: bool = True
 _continuous_recorder: Any = None

 # ── TTS-vs-STT feedback guard ────────────────────────────────────────
@ -184,32 +372,43 @@ def start_continuous(
    on_silent_limit: Optional[Callable[[], None]] = None,
    silence_threshold: int = 200,
    silence_duration: float = 3.0,
-) -> None:
+    auto_restart: bool = True,
+) -> bool:
    """Start a VAD-driven continuous recording loop.

    The loop calls ``on_transcript(text)`` each time speech is detected and
-    transcribed successfully, then auto-restarts. After
-    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
-    picked up at all) the loop stops itself and calls ``on_silent_limit``
-    so the UI can reflect "voice off". Idempotent — calling while already
-    active is a no-op.
+    transcribed successfully. If ``auto_restart`` is True, it auto-restarts
+    for the next turn and resets the no-speech counter for that loop. If
+    ``auto_restart`` is False, the first silence-triggered transcription ends
+    the loop and reports ``"idle"``; no-speech counts are retained across
+    starts so a push-to-talk caller can still enforce the three-strikes guard.
+    After ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
+    picked up at all) the loop stops itself and calls ``on_silent_limit`` so the
+    UI can reflect "voice off". Returns False if a previous stop is still
+    transcribing/cleaning up; otherwise returns True. Idempotent — calling while
+    already active is a successful no-op.

    ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
    ``"idle"`` so the UI can show a live indicator.
    """
-    global _continuous_active, _continuous_recorder
+    global _continuous_active, _continuous_recorder, _continuous_auto_restart
    global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
    global _continuous_no_speech_count

    with _continuous_lock:
        if _continuous_active:
            _debug("start_continuous: already active — no-op")
-            return
+            return True
+        if _continuous_stopping:
+            _debug("start_continuous: stop/transcribe in progress — busy")
+            return False
        _continuous_active = True
+        _continuous_auto_restart = auto_restart
        _continuous_on_transcript = on_transcript
        _continuous_on_status = on_status
        _continuous_on_silent_limit = on_silent_limit
-        _continuous_no_speech_count = 0
+        if auto_restart:
+            _continuous_no_speech_count = 0

        if _continuous_recorder is None:
            _continuous_recorder = create_audio_recorder()
@ -242,15 +441,18 @@ def start_continuous(
        except Exception:
            pass

+    return True

-def stop_continuous() -> None:
+
+def stop_continuous(force_transcribe: bool = False) -> None:
    """Stop the active continuous loop and release the microphone.

-    Idempotent — calling while not active is a no-op. Any in-flight
-    transcription completes but its result is discarded (the callback
-    checks ``_continuous_active`` before firing).
+    Idempotent — calling while not active is a no-op. If ``force_transcribe`` is
+    True, the recorder stops synchronously, then transcription/cleanup runs on a
+    background thread before reporting ``"idle"``. Otherwise the buffer is
+    discarded.
    """
-    global _continuous_active, _continuous_on_transcript
+    global _continuous_active, _continuous_on_transcript, _continuous_stopping
    global _continuous_on_status, _continuous_on_silent_limit
    global _continuous_recorder, _continuous_no_speech_count

@ -260,18 +462,98 @@ def stop_continuous() -> None:
        _continuous_active = False
        rec = _continuous_recorder
        on_status = _continuous_on_status
+        on_transcript = _continuous_on_transcript
+        on_silent_limit = _continuous_on_silent_limit
+        auto_restart = _continuous_auto_restart
+        track_no_speech = force_transcribe and not auto_restart
+        _continuous_stopping = rec is not None
        _continuous_on_transcript = None
        _continuous_on_status = None
        _continuous_on_silent_limit = None
-        _continuous_no_speech_count = 0
+        if not track_no_speech:
+            _continuous_no_speech_count = 0

    if rec is not None:
-        try:
-            # cancel() (not stop()) discards buffered frames — the loop
-            # is over, we don't want to transcribe a half-captured turn.
-            rec.cancel()
-        except Exception as e:
-            logger.warning("failed to cancel recorder: %s", e)
+        if force_transcribe and on_transcript:
+            if on_status:
+                try:
+                    on_status("transcribing")
+                except Exception:
+                    pass
+            try:
+                wav_path = rec.stop()
+            except Exception as e:
+                logger.warning("failed to stop recorder: %s", e)
+                try:
+                    rec.cancel()
+                except Exception as cancel_error:
+                    logger.warning("failed to cancel recorder: %s", cancel_error)
+                wav_path = None
+
+            def _transcribe_and_cleanup():
+                global _continuous_no_speech_count, _continuous_stopping
+                transcript: Optional[str] = None
+                should_halt = False
+
+                try:
+                    if wav_path:
+                        try:
+                            result = transcribe_recording(wav_path)
+                            if result.get("success"):
+                                text = (result.get("transcript") or "").strip()
+                                if text and not is_whisper_hallucination(text):
+                                    transcript = text
+                        finally:
+                            if os.path.isfile(wav_path):
+                                os.unlink(wav_path)
+                except Exception as e:
+                    logger.warning("failed to stop/transcribe recorder: %s", e)
+                finally:
+                    if transcript:
+                        try:
+                            on_transcript(transcript)
+                        except Exception as e:
+                            logger.warning("on_transcript callback raised: %s", e)
+
+                    if track_no_speech:
+                        with _continuous_lock:
+                            if transcript:
+                                _continuous_no_speech_count = 0
+                            else:
+                                _continuous_no_speech_count += 1
+                                should_halt = (
+                                    _continuous_no_speech_count
+                                    >= _CONTINUOUS_NO_SPEECH_LIMIT
+                                )
+                                if should_halt:
+                                    _continuous_no_speech_count = 0
+                        if should_halt and on_silent_limit:
+                            try:
+                                on_silent_limit()
+                            except Exception:
+                                pass
+
+                    _play_beep(frequency=660, count=2)
+                    with _continuous_lock:
+                        _continuous_stopping = False
+                    if on_status:
+                        try:
+                            on_status("idle")
+                        except Exception:
+                            pass
+
+            threading.Thread(target=_transcribe_and_cleanup, daemon=True).start()
+            return
+        else:
+            try:
+                # cancel() (not stop()) discards buffered frames — the loop
+                # is over, we don't want to transcribe a half-captured turn.
+                rec.cancel()
+            except Exception as e:
+                logger.warning("failed to cancel recorder: %s", e)
+
+    with _continuous_lock:
+        _continuous_stopping = False

    # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
    # silence-auto-stop path plays).
@ -417,23 +699,39 @@ def _continuous_on_silence() -> None:
                _debug("_continuous_on_silence: stopped while waiting for TTS")
                return

-    # Restart for the next turn.
-    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
-    _play_beep(frequency=880, count=1)
-    try:
-        rec.start(on_silence_stop=_continuous_on_silence)
-    except Exception as e:
-        logger.error("failed to restart continuous recording: %s", e)
-        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
+    if _continuous_auto_restart:
+        # Restart for the next turn.
+        _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
+        _play_beep(frequency=880, count=1)
+        try:
+            rec.start(on_silence_stop=_continuous_on_silence)
+        except Exception as e:
+            logger.error("failed to restart continuous recording: %s", e)
+            _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
+            with _continuous_lock:
+                _continuous_active = False
+            if on_status:
+                try:
+                    on_status("idle")
+                except Exception:
+                    pass
+            return
+
+        if on_status:
+            try:
+                on_status("listening")
+            except Exception:
+                pass
+    else:
+        # Do not auto-restart. Clean up state and notify idle.
+        _debug("_continuous_on_silence: auto_restart=False, stopping loop")
        with _continuous_lock:
            _continuous_active = False
-        return
-
-    if on_status:
-        try:
-            on_status("listening")
-        except Exception:
-            pass
+        if on_status:
+            try:
+                on_status("idle")
+            except Exception:
+                pass


 # ── TTS API ──────────────────────────────────────────────────────────
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -52,7 +52,7 @@ from gateway.status import get_running_pid, read_runtime_status
 try:
    from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
    from fastapi.middleware.cors import CORSMiddleware
-    from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
+    from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response
    from fastapi.staticfiles import StaticFiles
    from pydantic import BaseModel
 except ImportError:
@ -179,7 +179,7 @@ def _is_accepted_host(host_header: str, bound_host: str) -> bool:
    # 0.0.0.0 bind means operator explicitly opted into all-interfaces
    # (requires --insecure per web_server.start_server). No Host-layer
    # defence can protect that mode; rely on operator network controls.
-    if bound_host in ("0.0.0.0", "::"):
+    if bound_host in {"0.0.0.0", "::"}:
        return True

    # Loopback bind: accept the loopback names
@ -225,7 +225,7 @@ async def host_header_middleware(request: Request, call_next):
 async def auth_middleware(request: Request, call_next):
    """Require the session token on all /api/ routes except the public list."""
    path = request.url.path
-    if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"):
+    if path.startswith("/api/") and path not in _PUBLIC_API_PATHS:
        if not _has_valid_session_token(request):
            return JSONResponse(
                status_code=401,
@ -345,6 +345,7 @@ _CATEGORY_MERGE: Dict[str, str] = {
    "dashboard": "display",
    "code_execution": "agent",
    "prompt_caching": "agent",
+    "goals": "agent",
    # Only `telegram.reactions` currently lives under telegram — fold it in
    # with the other messaging-platform config (discord) so it isn't an
    # orphan tab of one field.
@ -384,7 +385,7 @@ def _build_schema_from_config(
        full_key = f"{prefix}.{key}" if prefix else key

        # Skip internal / version keys
-        if full_key in ("_config_version",):
+        if full_key in {"_config_version",}:
            continue

        # Category is the first path component for nested keys, or "general"
@ -469,10 +470,23 @@ except (ValueError, TypeError):
    )
    _GATEWAY_HEALTH_TIMEOUT = 3.0

+# DEPRECATED (scheduled for removal): GATEWAY_HEALTH_URL / GATEWAY_HEALTH_TIMEOUT.
+# Cross-container / cross-host gateway liveness detection will be folded into a
+# first-class dashboard config key so it's no longer Docker-adjacent lore buried
+# in env vars.  The env vars still work for now so existing Compose deployments
+# don't break.  Do not add new callers — wire new uses through the planned
+# config surface.
+

 def _probe_gateway_health() -> tuple[bool, dict | None]:
    """Probe the gateway via its HTTP health endpoint (cross-container).

+    .. deprecated::
+        Driven by the deprecated ``GATEWAY_HEALTH_URL`` /
+        ``GATEWAY_HEALTH_TIMEOUT`` env vars.  Scheduled for removal alongside
+        a move to a first-class dashboard config key.  See
+        :data:`_GATEWAY_HEALTH_URL` for context.
+
    Uses ``/health/detailed`` first (returns full state), falling back to
    the simpler ``/health`` endpoint.  Returns ``(is_alive, body_dict)``.

@ -519,7 +533,7 @@ async def get_status():
    remote_health_body: dict | None = None

    if not gateway_running and _GATEWAY_HEALTH_URL:
-        loop = asyncio.get_event_loop()
+        loop = asyncio.get_running_loop()
        alive, remote_health_body = await loop.run_in_executor(
            None, _probe_gateway_health
        )
@ -562,13 +576,13 @@ async def get_status():
        gateway_exit_reason = runtime.get("exit_reason")
        gateway_updated_at = runtime.get("updated_at")
        if not gateway_running:
-            gateway_state = gateway_state if gateway_state in ("stopped", "startup_failed") else "stopped"
+            gateway_state = gateway_state if gateway_state in {"stopped", "startup_failed"} else "stopped"
            gateway_platforms = {}
        elif gateway_running and remote_health_body is not None:
            # The health probe confirmed the gateway is alive, but the local
            # runtime status file may be stale (cross-container).  Override
            # stopped/None state so the dashboard shows the correct badge.
-            if gateway_state in (None, "stopped"):
+            if gateway_state in {None, "stopped"}:
                gateway_state = "running"

    # If there was no runtime info at all but the health probe confirmed alive,
@ -678,7 +692,7 @@ def _tail_lines(path: Path, n: int) -> List[str]:
    if not path.exists():
        return []
    try:
-        text = path.read_text(errors="replace")
+        text = path.read_text(encoding="utf-8", errors="replace")
    except OSError:
        return []
    lines = text.splitlines()
@ -1061,7 +1075,7 @@ async def set_model_assignment(body: ModelAssignment):
    model = (body.model or "").strip()
    task = (body.task or "").strip().lower()

-    if scope not in ("main", "auxiliary"):
+    if scope not in {"main", "auxiliary"}:
        raise HTTPException(status_code=400, detail="scope must be 'main' or 'auxiliary'")

    try:
@ -1176,14 +1190,13 @@ def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
                else:
                    disk_model.pop("context_length", None)
                config["model"] = disk_model
-            else:
-                # Model was previously a bare string — upgrade to dict if
-                # user is setting a context_length override
-                if ctx_override > 0:
-                    config["model"] = {
-                        "default": model_val,
-                        "context_length": ctx_override,
-                    }
+            # Model was previously a bare string — upgrade to dict if
+            # user is setting a context_length override
+            elif ctx_override > 0:
+                config["model"] = {
+                    "default": model_val,
+                    "context_length": ctx_override,
+                }
        except Exception:
            pass  # can't read disk config — just use the string form
    return config
@ -1555,7 +1568,7 @@ async def disconnect_oauth_provider(provider_id: str, request: Request):
    # AND forget the Claude Code import. We don't touch ~/.claude/* directly
    # — that's owned by the Claude Code CLI; users can re-auth there if they
    # want to undo a disconnect.
-    if provider_id in ("anthropic", "claude-code"):
+    if provider_id in {"anthropic", "claude-code"}:
        try:
            from agent.anthropic_adapter import _HERMES_OAUTH_FILE
            if _HERMES_OAUTH_FILE.exists():
@ -1831,7 +1844,7 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
                    client_id=client_id,
                    scope=scope,
                )
-        device_data = await asyncio.get_event_loop().run_in_executor(None, _do_nous_device_request)
+        device_data = await asyncio.get_running_loop().run_in_executor(None, _do_nous_device_request)
        sid, sess = _new_oauth_session("nous", "device_code")
        sess["device_code"] = str(device_data["device_code"])
        sess["interval"] = int(device_data["interval"])
@ -1863,8 +1876,8 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
            name=f"oauth-codex-{sid[:6]}",
        ).start()
        # Block briefly until the worker has populated the user_code, OR error.
-        deadline = time.time() + 10
-        while time.time() < deadline:
+        deadline = time.monotonic() + 10
+        while time.monotonic() < deadline:
            with _oauth_sessions_lock:
                s = _oauth_sessions.get(sid)
            if s and (s.get("user_code") or s["status"] != "pending"):
@ -1998,10 +2011,10 @@ def _codex_full_login_worker(session_id: str) -> None:
            sess["expires_at"] = time.time() + sess["expires_in"]

        # Step 2: poll until authorized
-        deadline = time.time() + sess["expires_in"]
+        deadline = time.monotonic() + sess["expires_in"]
        code_resp = None
        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
-            while time.time() < deadline:
+            while time.monotonic() < deadline:
                time.sleep(poll_interval)
                poll = client.post(
                    f"{issuer}/api/accounts/deviceauth/token",
@ -2011,7 +2024,7 @@ def _codex_full_login_worker(session_id: str) -> None:
                if poll.status_code == 200:
                    code_resp = poll.json()
                    break
-                if poll.status_code in (403, 404):
+                if poll.status_code in {403, 404}:
                    continue  # user hasn't authorized yet
                raise RuntimeError(f"deviceauth/token poll returned {poll.status_code}")

@ -2120,7 +2133,7 @@ async def submit_oauth_code(provider_id: str, body: OAuthSubmitBody, request: Re
    """Submit the auth code for PKCE flows. Token-protected."""
    _require_token(request)
    if provider_id == "anthropic":
-        return await asyncio.get_event_loop().run_in_executor(
+        return await asyncio.get_running_loop().run_in_executor(
            None, _submit_anthropic_pkce, body.session_id, body.code,
        )
    raise HTTPException(status_code=400, detail=f"submit not supported for {provider_id}")
@ -2159,6 +2172,83 @@ async def cancel_oauth_session(session_id: str, request: Request):
 # ---------------------------------------------------------------------------


+
+def _session_latest_descendant(session_id: str):
+    """Resolve a session id to the newest child leaf session.
+
+    /model may create child sessions. Dashboard refresh should continue the
+    newest child instead of reopening the old parent.
+    """
+    from hermes_state import SessionDB
+
+    def row_get(row, key, index):
+        if isinstance(row, dict):
+            return row.get(key)
+        try:
+            return row[key]
+        except Exception:
+            try:
+                return row[index]
+            except Exception:
+                return None
+
+    db = SessionDB()
+    try:
+        sid = db.resolve_session_id(session_id)
+        if not sid or not db.get_session(sid):
+            return None, []
+
+        conn = (
+            getattr(db, "conn", None)
+            or getattr(db, "_conn", None)
+            or getattr(db, "connection", None)
+            or getattr(db, "_connection", None)
+        )
+
+        rows = []
+        if conn is not None:
+            raw_rows = conn.execute(
+                "SELECT id, parent_session_id, started_at FROM sessions"
+            ).fetchall()
+            for row in raw_rows:
+                rows.append({
+                    "id": row_get(row, "id", 0),
+                    "parent_session_id": row_get(row, "parent_session_id", 1),
+                    "started_at": row_get(row, "started_at", 2),
+                })
+        else:
+            rows = db.list_sessions_rich(limit=10000, offset=0)
+
+        children = {}
+        for row in rows:
+            rid = row.get("id")
+            parent = row.get("parent_session_id")
+            if rid and parent:
+                children.setdefault(parent, []).append(row)
+
+        def started(row):
+            try:
+                return float(row.get("started_at") or 0)
+            except Exception:
+                return 0.0
+
+        current = sid
+        path = [sid]
+        seen = {sid}
+
+        while children.get(current):
+            candidates = [r for r in children[current] if r.get("id") not in seen]
+            if not candidates:
+                break
+            candidates.sort(key=started, reverse=True)
+            current = candidates[0]["id"]
+            path.append(current)
+            seen.add(current)
+
+        return current, path
+    finally:
+        db.close()
+
@app.get("/api/sessions/{session_id}")
 async def get_session_detail(session_id: str):
    from hermes_state import SessionDB
@ -2173,6 +2263,19 @@ async def get_session_detail(session_id: str):
        db.close()


+
+@app.get("/api/sessions/{session_id}/latest-descendant")
+async def get_session_latest_descendant(session_id: str):
+    latest, path = _session_latest_descendant(session_id)
+    if not latest:
+        raise HTTPException(status_code=404, detail="Session not found")
+    return {
+        "requested_session_id": path[0] if path else session_id,
+        "session_id": latest,
+        "path": path,
+        "changed": bool(path and latest != path[0]),
+    }
+
@app.get("/api/sessions/{session_id}/messages")
 async def get_session_messages(session_id: str):
    from hermes_state import SessionDB
@ -2352,6 +2455,7 @@ async def delete_cron_job(job_id: str):
 class ProfileCreate(BaseModel):
    name: str
    clone_from_default: bool = False
+    no_skills: bool = False


 class ProfileRename(BaseModel):
@ -2457,11 +2561,13 @@ async def create_profile_endpoint(body: ProfileCreate):
            name=body.name,
            clone_from="default" if body.clone_from_default else None,
            clone_config=body.clone_from_default,
+            no_skills=body.no_skills,
        )
        # Match the CLI's profile-create flow: fresh named profiles get the
        # bundled skills installed. When cloning from default, create_profile()
        # has already copied the source profile's skills, including any
-        # user-installed skills.
+        # user-installed skills. When no_skills=True, create_profile() wrote
+        # the opt-out marker and seed_profile_skills() will no-op.
        if not body.clone_from_default:
            profiles_mod.seed_profile_skills(path, quiet=True)

@ -2872,7 +2978,20 @@ async def get_models_analytics(days: int = 30):
 import re
 import asyncio

-from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
+# PTY bridge is POSIX-only (depends on fcntl/termios/ptyprocess).  On native
+# Windows the import raises; catch and leave PtyBridge=None so the rest of
+# the dashboard (sessions, jobs, metrics, config editor) still loads and the
+# /api/pty endpoint cleanly refuses with a WSL-suggested message.
+try:
+    from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
+    _PTY_BRIDGE_AVAILABLE = True
+except ImportError as _pty_import_err:  # pragma: no cover - Windows-only path
+    PtyBridge = None  # type: ignore[assignment]
+    _PTY_BRIDGE_AVAILABLE = False
+
+    class PtyUnavailableError(RuntimeError):  # type: ignore[no-redef]
+        """Stub on platforms where pty_bridge can't be imported."""
+        pass

 _RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]")
 _PTY_READ_CHUNK_TIMEOUT = 0.2
@ -2881,6 +3000,25 @@ _VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
 # loopback so tests don't need to rewrite request scope.
 _LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})

+
+def _is_public_bind() -> bool:
+    """True when bound to all-interfaces (operator used --insecure)."""
+    return getattr(app.state, "bound_host", "") in {"0.0.0.0", "::"}
+
+
+def _ws_client_is_allowed(ws: "WebSocket") -> bool:
+    """Check if the WebSocket client IP is acceptable.
+
+    Allows loopback always; allows any IP when bound to all-interfaces
+    (--insecure mode, guarded by session token auth).
+    """
+    if _is_public_bind():
+        return True
+    client_host = ws.client.host if ws.client else ""
+    if not client_host:
+        return True
+    return client_host in _LOOPBACK_HOSTS
+
 # Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
 # and /api/events (dashboard → browser sidebar).  Keyed by an opaque channel id
 # the chat tab generates on mount; entries auto-evict when the last subscriber
@ -2913,8 +3051,18 @@ def _resolve_chat_argv(
    argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
    env = os.environ.copy()
    env.setdefault("NODE_ENV", "production")
+    # Browser-embedded chat should prefer stable wheel-based scrollback over
+    # native terminal mouse tracking. When mouse tracking is enabled, wheel
+    # events are consumed by the TUI and forwarded as terminal input, which
+    # makes browser-side transcript scrolling feel broken. Keep the terminal
+    # build unchanged for native CLI usage; only disable mouse tracking for
+    # the dashboard PTY path.
+    env.setdefault("HERMES_TUI_DISABLE_MOUSE", "1")

    if resume:
+        latest_resume, _latest_path = _session_latest_descendant(resume)
+        if latest_resume:
+            resume = latest_resume
        env["HERMES_TUI_RESUME"] = resume

    if sidecar_url:
@ -2971,13 +3119,24 @@ async def pty_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    client_host = ws.client.host if ws.client else ""
-    if client_host and client_host not in _LOOPBACK_HOSTS:
+    if not _ws_client_is_allowed(ws):
        await ws.close(code=4403)
        return

    await ws.accept()

+    # On native Windows, the POSIX PTY bridge can't be imported.  Tell the
+    # client and close cleanly rather than pretending the feature works.
+    if not _PTY_BRIDGE_AVAILABLE:
+        await ws.send_text(
+            "\r\n\x1b[31mChat unavailable: the embedded terminal requires a "
+            "POSIX PTY, which native Windows Python doesn't provide.\x1b[0m\r\n"
+            "\x1b[33mInstall Hermes inside WSL2 to use the dashboard's /chat "
+            "tab — the rest of the dashboard works here.\x1b[0m\r\n"
+        )
+        await ws.close(code=1011)
+        return
+
    # --- spawn PTY ------------------------------------------------------
    resume = ws.query_params.get("resume") or None
    channel = _channel_or_close_code(ws)
@ -3079,8 +3238,7 @@ async def gateway_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    client_host = ws.client.host if ws.client else ""
-    if client_host and client_host not in _LOOPBACK_HOSTS:
+    if not _ws_client_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -3112,8 +3270,7 @@ async def pub_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    client_host = ws.client.host if ws.client else ""
-    if client_host and client_host not in _LOOPBACK_HOSTS:
+    if not _ws_client_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -3142,8 +3299,7 @@ async def events_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    client_host = ws.client.host if ws.client else ""
-    if client_host and client_host not in _LOOPBACK_HOSTS:
+    if not _ws_client_is_allowed(ws):
        await ws.close(code=4403)
        return

@ -3176,12 +3332,42 @@ async def events_ws(ws: WebSocket) -> None:
                    _event_channels.pop(channel, None)


+def _normalise_prefix(raw: Optional[str]) -> str:
+    """Normalise an X-Forwarded-Prefix header value.
+
+    Returns a string like ``"/hermes"`` (no trailing slash) or ``""`` when
+    no prefix is set / the header is malformed. We deliberately reject
+    anything containing ``..`` or non-printable bytes so a hostile proxy
+    can't inject HTML via the prefix.
+    """
+    if not raw:
+        return ""
+    p = raw.strip()
+    if not p:
+        return ""
+    if not p.startswith("/"):
+        p = "/" + p
+    p = p.rstrip("/")
+    if "//" in p or ".." in p or any(c in p for c in ('"', "'", "<", ">", " ", "\n", "\r", "\t")):
+        return ""
+    if len(p) > 64:
+        return ""
+    return p
+
+
 def mount_spa(application: FastAPI):
    """Mount the built SPA. Falls back to index.html for client-side routing.

    The session token is injected into index.html via a ``<script>`` tag so
    the SPA can authenticate against protected API endpoints without a
    separate (unauthenticated) token-dispensing endpoint.
+
+    When served behind a path-prefix reverse proxy (e.g.
+    ``mission-control.tilos.com/hermes/*`` -> local Caddy -> :9119), the
+    proxy injects ``X-Forwarded-Prefix: /hermes`` on every request. We
+    rewrite the served ``index.html`` so absolute asset URLs (``/assets/...``)
+    and the SPA's runtime ``__HERMES_BASE_PATH__`` honour that prefix
+    without rebuilding the bundle.
    """
    if not WEB_DIST.exists():
        @application.get("/{full_path:path}")
@ -3194,24 +3380,62 @@ def mount_spa(application: FastAPI):

    _index_path = WEB_DIST / "index.html"

-    def _serve_index():
-        """Return index.html with the session token injected."""
+    def _serve_index(prefix: str = ""):
+        """Return index.html with the session token + base-path injected.
+
+        ``prefix`` is the normalised ``X-Forwarded-Prefix`` (e.g. ``/hermes``)
+        or empty string when served at root.
+        """
        html = _index_path.read_text()
        chat_js = "true" if _DASHBOARD_EMBEDDED_CHAT_ENABLED else "false"
        token_script = (
            f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";'
-            f"window.__HERMES_DASHBOARD_EMBEDDED_CHAT__={chat_js};</script>"
+            f"window.__HERMES_DASHBOARD_EMBEDDED_CHAT__={chat_js};"
+            f'window.__HERMES_BASE_PATH__="{prefix}";</script>'
        )
+        if prefix:
+            # Rewrite absolute asset URLs baked into the Vite build so the
+            # browser fetches them through the same proxy prefix.
+            html = html.replace('href="/assets/', f'href="{prefix}/assets/')
+            html = html.replace('src="/assets/', f'src="{prefix}/assets/')
+            html = html.replace('href="/favicon.ico"', f'href="{prefix}/favicon.ico"')
+            html = html.replace('href="/fonts/', f'href="{prefix}/fonts/')
+            html = html.replace('href="/ds-assets/', f'href="{prefix}/ds-assets/')
+            html = html.replace('src="/ds-assets/', f'src="{prefix}/ds-assets/')
        html = html.replace("</head>", f"{token_script}</head>", 1)
        return HTMLResponse(
            html,
            headers={"Cache-Control": "no-store, no-cache, must-revalidate"},
        )

+    # When served behind a path-prefix proxy, the built CSS contains
+    # absolute ``url(/fonts/...)`` and ``url(/ds-assets/...)`` references.
+    # Browsers resolve those against the document origin, which means
+    # under ``/hermes`` they'd hit ``mission-control.tilos.com/fonts/...``
+    # (the MC Pages app), not the Hermes backend. Intercept CSS asset
+    # requests BEFORE the StaticFiles mount and rewrite the absolute paths
+    # when a prefix is in play.
+    @application.get("/assets/{filename}.css")
+    async def serve_css(filename: str, request: Request):
+        css_path = WEB_DIST / "assets" / f"{filename}.css"
+        if not css_path.is_file() or not css_path.resolve().is_relative_to(
+            WEB_DIST.resolve()
+        ):
+            return JSONResponse({"error": "not found"}, status_code=404)
+        prefix = _normalise_prefix(request.headers.get("x-forwarded-prefix"))
+        css = css_path.read_text()
+        if prefix:
+            for asset_dir in ("/fonts/", "/fonts-terminal/", "/ds-assets/", "/assets/"):
+                css = css.replace(f"url({asset_dir}", f"url({prefix}{asset_dir}")
+                css = css.replace(f"url(\"{asset_dir}", f"url(\"{prefix}{asset_dir}")
+                css = css.replace(f"url('{asset_dir}", f"url('{prefix}{asset_dir}")
+        return Response(content=css, media_type="text/css")
+
    application.mount("/assets", StaticFiles(directory=WEB_DIST / "assets"), name="assets")

    @application.get("/{full_path:path}")
-    async def serve_spa(full_path: str):
+    async def serve_spa(full_path: str, request: Request):
+        prefix = _normalise_prefix(request.headers.get("x-forwarded-prefix"))
        file_path = WEB_DIST / full_path
        # Prevent path traversal via url-encoded sequences (%2e%2e/)
        if (
@ -3221,7 +3445,7 @@ def mount_spa(application: FastAPI):
            and file_path.is_file()
        ):
            return FileResponse(file_path)
-        return _serve_index()
+        return _serve_index(prefix)


 # ---------------------------------------------------------------------------
@ -3231,8 +3455,9 @@ def mount_spa(application: FastAPI):
 # Built-in dashboard themes — label + description only.  The actual color
 # definitions live in the frontend (web/src/themes/presets.ts).
 _BUILTIN_DASHBOARD_THEMES = [
-    {"name": "default",   "label": "Hermes Teal",  "description": "Classic dark teal — the canonical Hermes look"},
-    {"name": "midnight",  "label": "Midnight",      "description": "Deep blue-violet with cool accents"},
+    {"name": "default",       "label": "Hermes Teal",         "description": "Classic dark teal — the canonical Hermes look"},
+    {"name": "default-large", "label": "Hermes Teal (Large)", "description": "Hermes Teal with bigger fonts and roomier spacing"},
+    {"name": "midnight",      "label": "Midnight",            "description": "Deep blue-violet with cool accents"},
    {"name": "ember",     "label": "Ember",          "description": "Warm crimson and bronze — forge vibes"},
    {"name": "mono",      "label": "Mono",           "description": "Clean grayscale — minimal and focused"},
    {"name": "cyberpunk", "label": "Cyberpunk",      "description": "Neon green on black — matrix terminal"},
@ -3360,7 +3585,7 @@ def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]
    if isinstance(radius, str) and radius.strip():
        layout["radius"] = radius
    density = layout_src.get("density")
-    if isinstance(density, str) and density in ("compact", "comfortable", "spacious"):
+    if isinstance(density, str) and density in {"compact", "comfortable", "spacious"}:
        layout["density"] = density

    # Color overrides — keep only valid keys with string values.
@ -3617,12 +3842,16 @@ def _get_dashboard_plugins(force_rescan: bool = False) -> list:

@app.get("/api/dashboard/plugins")
 async def get_dashboard_plugins():
-    """Return discovered dashboard plugins."""
+    """Return discovered dashboard plugins (excludes user-hidden ones)."""
    plugins = _get_dashboard_plugins()
-    # Strip internal fields before sending to frontend.
+    # Read user's hidden plugins list from config.
+    config = load_config()
+    hidden: list = cfg_get(config, "dashboard", "hidden_plugins", default=[]) or []
+    # Strip internal fields before sending to frontend and filter out hidden.
    return [
        {k: v for k, v in p.items() if not k.startswith("_")}
        for p in plugins
+        if p["name"] not in hidden
    ]


@ -3633,6 +3862,268 @@ async def rescan_dashboard_plugins():
    return {"ok": True, "count": len(plugins)}


+class _AgentPluginInstallBody(BaseModel):
+    identifier: str
+    force: bool = False
+    enable: bool = True
+
+
+def _strip_dashboard_manifest(p: Dict[str, Any]) -> Dict[str, Any]:
+    return {k: v for k, v in p.items() if not k.startswith("_")}
+
+
+def _merged_plugins_hub() -> Dict[str, Any]:
+    """Agent discovery + dashboard manifests + optional provider picker metadata."""
+    from hermes_cli.plugins_cmd import (
+        _discover_all_plugins,
+        _get_current_context_engine,
+        _get_current_memory_provider,
+        _discover_context_engines,
+        _discover_memory_providers,
+        _get_disabled_set,
+        _get_enabled_set,
+        _read_manifest as _read_plugin_manifest_at,
+    )
+
+    dashboard_list = _get_dashboard_plugins()
+    dash_by_name = {str(p["name"]): p for p in dashboard_list}
+
+    disabled_set = _get_disabled_set()
+    enabled_set = _get_enabled_set()
+
+    # Read user-hidden plugins from config for the user_hidden field.
+    config = load_config()
+    hidden_plugins: list = cfg_get(config, "dashboard", "hidden_plugins", default=[]) or []
+
+    plugins_root_resolved = (get_hermes_home() / "plugins").resolve()
+    rows: List[Dict[str, Any]] = []
+
+    for name, version, description, source, dir_str in _discover_all_plugins():
+        if name in disabled_set:
+            runtime_status = "disabled"
+        elif name in enabled_set:
+            runtime_status = "enabled"
+        else:
+            runtime_status = "inactive"
+
+        dir_path = Path(dir_str)
+        dm = dash_by_name.get(name)
+        has_dash_manifest = dm is not None or (dir_path / "dashboard" / "manifest.json").exists()
+
+        under_user_tree = False
+        try:
+            dir_path.resolve().relative_to(plugins_root_resolved)
+            under_user_tree = True
+        except ValueError:
+            pass
+
+        can_remove_update = (
+            source in {"user", "git"} and under_user_tree and Path(dir_str).is_dir()
+        )
+
+        # Check if this plugin provides tools that require auth
+        auth_required = False
+        auth_command = ""
+        manifest_data = _read_plugin_manifest_at(dir_path)
+        provides_tools = manifest_data.get("provides_tools") or []
+        if provides_tools:
+            try:
+                from tools.registry import registry
+                for tname in provides_tools:
+                    entry = registry.get_entry(tname)
+                    if entry and entry.check_fn and not entry.check_fn():
+                        auth_required = True
+                        auth_command = f"hermes auth {name}"
+                        break
+            except Exception:
+                pass
+
+        rows.append({
+            "name": name,
+            "version": version or "",
+            "description": description or "",
+            "source": source,
+            "runtime_status": runtime_status,
+            "has_dashboard_manifest": has_dash_manifest,
+            "dashboard_manifest": _strip_dashboard_manifest(dm) if dm else None,
+            "path": dir_str,
+            "can_remove": can_remove_update,
+            "can_update_git": can_remove_update and (Path(dir_str) / ".git").exists(),
+            "auth_required": auth_required,
+            "auth_command": auth_command,
+            "user_hidden": name in hidden_plugins,
+        })
+
+    agent_names = {r["name"] for r in rows}
+    orphan_dashboard = [
+        _strip_dashboard_manifest(p)
+        for p in dashboard_list
+        if str(p["name"]) not in agent_names
+    ]
+
+    memory_providers: List[Dict[str, str]] = []
+    try:
+        for n, desc in _discover_memory_providers():
+            memory_providers.append({"name": n, "description": desc})
+    except Exception:
+        memory_providers = []
+
+    context_engines: List[Dict[str, str]] = []
+    try:
+        for n, desc in _discover_context_engines():
+            context_engines.append({"name": n, "description": desc})
+    except Exception:
+        context_engines = []
+
+    return {
+        "plugins": rows,
+        "orphan_dashboard_plugins": orphan_dashboard,
+        "providers": {
+            "memory_provider": _get_current_memory_provider() or "",
+            "memory_options": memory_providers,
+            "context_engine": _get_current_context_engine(),
+            "context_options": context_engines,
+        },
+    }
+
+
+@app.get("/api/dashboard/plugins/hub")
+async def get_plugins_hub(request: Request):
+    """Unified agent plugins + dashboard extension metadata (session protected)."""
+    _require_token(request)
+    try:
+        return _merged_plugins_hub()
+    except Exception as exc:
+        _log.warning("plugins/hub failed: %s", exc)
+        raise HTTPException(status_code=500, detail="Failed to build plugins hub.") from exc
+
+
+@app.post("/api/dashboard/agent-plugins/install")
+async def post_agent_plugin_install(request: Request, body: _AgentPluginInstallBody):
+    _require_token(request)
+    from hermes_cli.plugins_cmd import dashboard_install_plugin
+
+    result = dashboard_install_plugin(
+        body.identifier.strip(),
+        force=body.force,
+        enable=body.enable,
+    )
+    if not result.get("ok"):
+        raise HTTPException(
+            status_code=400,
+            detail=result.get("error") or "Install failed.",
+        )
+    _get_dashboard_plugins(force_rescan=True)
+    # Strip internal paths from the response
+    result.pop("after_install_path", None)
+    return result
+
+
+def _validate_plugin_name(name: str) -> str:
+    """Reject path-traversal attempts in plugin name URL parameters."""
+    if not name or "/" in name or "\\" in name or ".." in name:
+        raise HTTPException(status_code=400, detail="Invalid plugin name.")
+    return name
+
+
+@app.post("/api/dashboard/agent-plugins/{name}/enable")
+async def post_agent_plugin_enable(request: Request, name: str):
+    _require_token(request)
+    name = _validate_plugin_name(name)
+    from hermes_cli.plugins_cmd import dashboard_set_agent_plugin_enabled
+
+    result = dashboard_set_agent_plugin_enabled(name, enabled=True)
+    if not result.get("ok"):
+        raise HTTPException(status_code=400, detail=result.get("error") or "Enable failed.")
+    return result
+
+
+@app.post("/api/dashboard/agent-plugins/{name}/disable")
+async def post_agent_plugin_disable(request: Request, name: str):
+    _require_token(request)
+    name = _validate_plugin_name(name)
+    from hermes_cli.plugins_cmd import dashboard_set_agent_plugin_enabled
+
+    result = dashboard_set_agent_plugin_enabled(name, enabled=False)
+    if not result.get("ok"):
+        raise HTTPException(status_code=400, detail=result.get("error") or "Disable failed.")
+    return result
+
+
+@app.post("/api/dashboard/agent-plugins/{name}/update")
+async def post_agent_plugin_update(request: Request, name: str):
+    _require_token(request)
+    name = _validate_plugin_name(name)
+    from hermes_cli.plugins_cmd import dashboard_update_user_plugin
+
+    result = dashboard_update_user_plugin(name)
+    if not result.get("ok"):
+        raise HTTPException(status_code=400, detail=result.get("error") or "Update failed.")
+    _get_dashboard_plugins(force_rescan=True)
+    return result
+
+
+@app.delete("/api/dashboard/agent-plugins/{name}")
+async def delete_agent_plugin(request: Request, name: str):
+    _require_token(request)
+    name = _validate_plugin_name(name)
+    from hermes_cli.plugins_cmd import dashboard_remove_user_plugin
+
+    result = dashboard_remove_user_plugin(name)
+    if not result.get("ok"):
+        raise HTTPException(status_code=400, detail=result.get("error") or "Remove failed.")
+    _get_dashboard_plugins(force_rescan=True)
+    return result
+
+
+class _PluginProvidersPutBody(BaseModel):
+    memory_provider: Optional[str] = None
+    context_engine: Optional[str] = None
+
+
+@app.put("/api/dashboard/plugin-providers")
+async def put_plugin_providers(request: Request, body: _PluginProvidersPutBody):
+    """Persist memory provider / context engine selection (writes config.yaml)."""
+    _require_token(request)
+    from hermes_cli.plugins_cmd import (
+        _save_context_engine,
+        _save_memory_provider,
+    )
+
+    if body.memory_provider is not None:
+        _save_memory_provider(body.memory_provider)
+    if body.context_engine is not None:
+        _save_context_engine(body.context_engine)
+    return {"ok": True}
+
+
+class _PluginVisibilityBody(BaseModel):
+    hidden: bool
+
+
+@app.post("/api/dashboard/plugins/{name}/visibility")
+async def post_plugin_visibility(request: Request, name: str, body: _PluginVisibilityBody):
+    """Toggle a plugin's sidebar visibility (persists to config.yaml dashboard.hidden_plugins)."""
+    _require_token(request)
+    name = _validate_plugin_name(name)
+
+    config = load_config()
+    if "dashboard" not in config or not isinstance(config.get("dashboard"), dict):
+        config["dashboard"] = {}
+    hidden_list: list = config["dashboard"].get("hidden_plugins") or []
+    if not isinstance(hidden_list, list):
+        hidden_list = []
+
+    if body.hidden and name not in hidden_list:
+        hidden_list.append(name)
+    elif not body.hidden and name in hidden_list:
+        hidden_list.remove(name)
+
+    config["dashboard"]["hidden_plugins"] = hidden_list
+    save_config(config)
+    return {"ok": True, "name": name, "hidden": body.hidden}
+
+
@app.get("/dashboard-plugins/{plugin_name}/{file_path:path}")
 async def serve_plugin_asset(plugin_name: str, file_path: str):
    """Serve static assets from a dashboard plugin directory.
--- a/hermes_cli/webhook.py
+++ b/hermes_cli/webhook.py
@ -124,11 +124,11 @@ def webhook_command(args):
    if not _require_webhook_enabled():
        return

-    if sub in ("subscribe", "add"):
+    if sub in {"subscribe", "add"}:
        _cmd_subscribe(args)
-    elif sub in ("list", "ls"):
+    elif sub in {"list", "ls"}:
        _cmd_list(args)
-    elif sub in ("remove", "rm"):
+    elif sub in {"remove", "rm"}:
        _cmd_remove(args)
    elif sub == "test":
        _cmd_test(args)