Merge remote-tracking branch 'origin/main' into bb/tui-long-session-perf

# Conflicts: # ui-tui/src/app/interfaces.ts
2026-07-29 18:46:59 +00:00 · 2026-04-26 13:39:57 -05:00 · 2026-04-26 13:39:57 -05:00 · cc16d0ef77
commit cc16d0ef77
parent a8fcd1c742 087e74d4d7
82 changed files with 6072 additions and 712 deletions
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@ -14,6 +14,7 @@ from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import OPENROUTER_BASE_URL
+from hermes_cli.config import get_env_value
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@ -1273,7 +1274,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
            return False
    if provider == "openrouter":
-        token = os.getenv("OPENROUTER_API_KEY", "").strip()
+        # Check both os.environ and ~/.hermes/.env file
+        token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
            if _is_source_suppressed(provider, source):
@ -1299,7 +1301,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool

    env_url = ""
    if pconfig.base_url_env_var:
-        env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
+        env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")

    env_vars = list(pconfig.api_key_env_vars)
    if provider == "anthropic":
@ -1310,7 +1312,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        ]

    for env_var in env_vars:
-        token = os.getenv(env_var, "").strip()
+        # Check both os.environ and ~/.hermes/.env file
+        token = (get_env_value(env_var) or "").strip()
        if not token:
            continue
        source = f"env:{env_var}"
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -145,10 +145,11 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude": 200000,
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
-    # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
-    # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
-    # Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
-    "gpt-5.5": 400000,
+    # GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
+    # ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
+    # provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
+    # This hardcoded value is only reached when every probe misses.
+    "gpt-5.5": 1050000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
@ -164,7 +165,17 @@ DEFAULT_CONTEXT_LENGTHS = {
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
-    # DeepSeek
+    # DeepSeek — V4 family ships with a 1M context window. The legacy
+    # aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
+    # mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
+    # and inherit the same 1M window. The ``deepseek`` substring entry
+    # below remains as a 128K fallback for older / unknown DeepSeek model
+    # ids (e.g. via custom endpoints).
+    # https://api-docs.deepseek.com/zh-cn/quick_start/pricing
+    "deepseek-v4-pro": 1_000_000,
+    "deepseek-v4-flash": 1_000_000,
+    "deepseek-chat": 1_000_000,
+    "deepseek-reasoner": 1_000_000,
    "deepseek": 128000,
    # Meta
    "llama": 131072,
--- a/agent/nous_rate_guard.py
+++ b/agent/nous_rate_guard.py
@ -180,3 +180,145 @@ def format_remaining(seconds: float) -> str:
    h, remainder = divmod(s, 3600)
    m = remainder // 60
    return f"{h}h {m}m" if m else f"{h}h"
+
+
+# Buckets with reset windows shorter than this are treated as transient
+# (upstream jitter, secondary throttling) rather than a genuine quota
+# exhaustion worth a cross-session breaker trip.
+_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
+
+
+def is_genuine_nous_rate_limit(
+    *,
+    headers: Optional[Mapping[str, str]] = None,
+    last_known_state: Optional[Any] = None,
+) -> bool:
+    """Decide whether a 429 from Nous Portal is a real account rate limit.
+
+    Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
+    MiMo, Hermes, ...) behind one endpoint.  A 429 can mean either:
+
+      (a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
+          exhausted — a genuine rate limit that will last until the
+          bucket resets.
+      (b) The upstream provider is out of capacity for a specific model
+          — transient, clears in seconds, and has nothing to do with
+          the caller's quota on Nous.
+
+    Tripping the cross-session breaker on (b) blocks ALL Nous requests
+    (and all models, since Nous is one provider key) for minutes even
+    though the caller's account is healthy and a different model would
+    have worked.  That's the bug users hit when DeepSeek V4 Pro 429s
+    trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
+
+    We tell the two apart by looking at:
+
+      1. The 429 response's own ``x-ratelimit-*`` headers.  Nous emits
+         the full suite on every response including 429s.  An exhausted
+         bucket (``remaining == 0`` with a reset window >= 60s) is
+         proof of (a).
+      2. The last-known-good rate-limit state captured by
+         ``_capture_rate_limits()`` on the previous successful
+         response.  If any bucket there was already near-exhausted with
+         a substantial reset window, the current 429 is almost
+         certainly (a) continuing from that condition.
+
+    If neither signal fires, we treat the 429 as (b): fail the single
+    request, let the retry loop or model-switch proceed, and do NOT
+    write the cross-session breaker file.
+
+    Returns True when the evidence points at (a).
+    """
+    # Signal 1: current 429 response headers.
+    state = _parse_buckets_from_headers(headers)
+    if _has_exhausted_bucket(state):
+        return True
+
+    # Signal 2: last-known-good state from a recent successful response.
+    # Accepts either a RateLimitState (dataclass from rate_limit_tracker)
+    # or a dict of bucket snapshots.
+    if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
+        return True
+
+    return False
+
+
+def _parse_buckets_from_headers(
+    headers: Optional[Mapping[str, str]],
+) -> dict[str, tuple[Optional[int], Optional[float]]]:
+    """Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
+
+    Returns empty dict when no rate-limit headers are present.
+    """
+    if not headers:
+        return {}
+
+    lowered = {k.lower(): v for k, v in headers.items()}
+    if not any(k.startswith("x-ratelimit-") for k in lowered):
+        return {}
+
+    def _maybe_int(raw: Optional[str]) -> Optional[int]:
+        if raw is None:
+            return None
+        try:
+            return int(float(raw))
+        except (TypeError, ValueError):
+            return None
+
+    def _maybe_float(raw: Optional[str]) -> Optional[float]:
+        if raw is None:
+            return None
+        try:
+            return float(raw)
+        except (TypeError, ValueError):
+            return None
+
+    result: dict[str, tuple[Optional[int], Optional[float]]] = {}
+    for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
+        remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
+        reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
+        if remaining is not None or reset is not None:
+            result[tag] = (remaining, reset)
+    return result
+
+
+def _has_exhausted_bucket(
+    buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
+) -> bool:
+    """Return True when any bucket has remaining == 0 AND a meaningful reset window."""
+    for remaining, reset in buckets.values():
+        if remaining is None or remaining > 0:
+            continue
+        if reset is None:
+            continue
+        if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
+            return True
+    return False
+
+
+def _has_exhausted_bucket_in_object(state: Any) -> bool:
+    """Check a RateLimitState-like object for an exhausted bucket.
+
+    Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
+    exposed as attributes ``requests_min``, ``requests_hour``,
+    ``tokens_min``, ``tokens_hour``) and falls back gracefully for any
+    object missing those attributes.
+    """
+    for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
+        bucket = getattr(state, attr, None)
+        if bucket is None:
+            continue
+        limit = getattr(bucket, "limit", 0) or 0
+        remaining = getattr(bucket, "remaining", 0) or 0
+        # Prefer the adjusted "remaining_seconds_now" property when present;
+        # fall back to raw reset_seconds.
+        reset = getattr(bucket, "remaining_seconds_now", None)
+        if reset is None:
+            reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
+        if limit <= 0:
+            continue
+        if remaining > 0:
+            continue
+        if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
+            return True
+    return False
--- a/agent/onboarding.py
+++ b/agent/onboarding.py
@ -0,0 +1,144 @@
+"""
+Contextual first-touch onboarding hints.
+
+Instead of blocking first-run questionnaires, show a one-time hint the *first*
+time a user hits a behavior fork — message-while-running, first long-running
+tool, etc.  Each hint is shown once per install (tracked in ``config.yaml`` under
+``onboarding.seen.<flag>``) and then never again.
+
+Keep this module tiny and dependency-free so both the CLI and gateway can import
+it without pulling in heavy modules.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any, Mapping, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# -------------------------------------------------------------------------
+# Flag names (stable — used as config.yaml keys under onboarding.seen)
+# -------------------------------------------------------------------------
+
+BUSY_INPUT_FLAG = "busy_input_prompt"
+TOOL_PROGRESS_FLAG = "tool_progress_prompt"
+
+
+# -------------------------------------------------------------------------
+# Hint content
+# -------------------------------------------------------------------------
+
+def busy_input_hint_gateway(mode: str) -> str:
+    """Hint shown the first time a user messages while the agent is busy.
+
+    ``mode`` is the effective busy_input_mode that was just applied, so the
+    message matches reality ("I just interrupted…" vs "I just queued…").
+    """
+    if mode == "queue":
+        return (
+            "💡 First-time tip — I queued your message instead of interrupting. "
+            "Send `/busy interrupt` to make new messages stop the current task "
+            "immediately, or `/busy status` to check. This notice won't appear again."
+        )
+    return (
+        "💡 First-time tip — I just interrupted my current task to answer you. "
+        "Send `/busy queue` to queue follow-ups for after the current task instead, "
+        "or `/busy status` to check. This notice won't appear again."
+    )
+
+
+def busy_input_hint_cli(mode: str) -> str:
+    """CLI version of the busy-input hint (plain text, no markdown)."""
+    if mode == "queue":
+        return (
+            "(tip) Your message was queued for the next turn. "
+            "Use /busy interrupt to make Enter stop the current run instead. "
+            "This tip only shows once."
+        )
+    return (
+        "(tip) Your message interrupted the current run. "
+        "Use /busy queue to queue messages for the next turn instead. "
+        "This tip only shows once."
+    )
+
+
+def tool_progress_hint_gateway() -> str:
+    return (
+        "💡 First-time tip — that tool took a while and I'm streaming every step. "
+        "If the progress messages feel noisy, send `/verbose` to cycle modes "
+        "(all → new → off). This notice won't appear again."
+    )
+
+
+def tool_progress_hint_cli() -> str:
+    return (
+        "(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
+        "display modes (all -> new -> off -> verbose). This tip only shows once."
+    )
+
+
+# -------------------------------------------------------------------------
+# State read / write
+# -------------------------------------------------------------------------
+
+def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
+    onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
+    if not isinstance(onboarding, Mapping):
+        return {}
+    seen = onboarding.get("seen")
+    return seen if isinstance(seen, Mapping) else {}
+
+
+def is_seen(config: Mapping[str, Any], flag: str) -> bool:
+    """Return True if the user has already been shown this first-touch hint."""
+    return bool(_get_seen_dict(config).get(flag))
+
+
+def mark_seen(config_path: Path, flag: str) -> bool:
+    """Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
+
+    Uses the atomic YAML writer so a concurrent process can't observe a
+    partially-written file.  Returns True on success, False on any error
+    (including the config file being absent — onboarding is best-effort).
+    """
+    try:
+        import yaml
+        from utils import atomic_yaml_write
+    except Exception as e:  # pragma: no cover — dependency issue
+        logger.debug("onboarding: failed to import yaml/utils: %s", e)
+        return False
+
+    try:
+        cfg: dict = {}
+        if config_path.exists():
+            with open(config_path, encoding="utf-8") as f:
+                cfg = yaml.safe_load(f) or {}
+        if not isinstance(cfg.get("onboarding"), dict):
+            cfg["onboarding"] = {}
+        seen = cfg["onboarding"].get("seen")
+        if not isinstance(seen, dict):
+            seen = {}
+            cfg["onboarding"]["seen"] = seen
+        if seen.get(flag) is True:
+            return True  # already marked — nothing to do
+        seen[flag] = True
+        atomic_yaml_write(config_path, cfg)
+        return True
+    except Exception as e:
+        logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
+        return False
+
+
+__all__ = [
+    "BUSY_INPUT_FLAG",
+    "TOOL_PROGRESS_FLAG",
+    "busy_input_hint_gateway",
+    "busy_input_hint_cli",
+    "tool_progress_hint_gateway",
+    "tool_progress_hint_cli",
+    "is_seen",
+    "mark_seen",
+]
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@ -329,7 +329,7 @@ def build_skill_invocation_message(

    loaded_skill, skill_dir, skill_name = loaded
    activation_note = (
-        f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
+        f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
        "you to follow its instructions. The full skill content is loaded below.]"
    )
    return _build_skill_message(
@ -368,7 +368,7 @@ def build_preloaded_skills_prompt(

        loaded_skill, skill_dir, skill_name = loaded
        activation_note = (
-            f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
+            f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
            "preloaded. Treat its instructions as active guidance for the duration of this "
            "session unless the user overrides them.]"
        )
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@ -824,7 +824,9 @@ delegation:
 # Display
 # =============================================================================
 display:
-  # Use compact banner mode
+  # Use compact banner mode (hides the ASCII-art banner, shows a single line).
+  #   true:  Compact single-line banner
+  #   false: Full ASCII banner with tool/skill summary (default)
  compact: false

  # Tool progress display level (CLI and gateway)
@ -838,12 +840,15 @@ display:
  # Gateway-only natural mid-turn assistant updates.
  # When true, completed assistant status messages are sent as separate chat
  # messages. This is independent of tool_progress and gateway streaming.
+  #   true:  Send mid-turn assistant updates as separate messages (default)
+  #   false: Only send the final response
  interim_assistant_messages: true

-  # What Enter does when Hermes is already busy in the CLI.
+  # What Enter does when Hermes is already busy (CLI and gateway platforms).
  #   interrupt: Interrupt the current run and redirect Hermes (default)
  #   queue:     Queue your message for the next turn
-  # Ctrl+C always interrupts regardless of this setting.
+  # Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
+  # Toggle at runtime with /busy_input_mode <interrupt|queue>.
  busy_input_mode: interrupt

  # Background process notifications (gateway/messaging only).
@ -859,17 +864,22 @@ display:
  # Play terminal bell when agent finishes a response.
  # Useful for long-running tasks — your terminal will ding when the agent is done.
  # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
+  #   true:  Ring the terminal bell on each response
+  #   false: Silent (default)
  bell_on_complete: false

  # Show model reasoning/thinking before each response.
  # When enabled, a dim box shows the model's thought process above the response.
  # Toggle at runtime with /reasoning show or /reasoning hide.
+  #   true:  Show the reasoning box
+  #   false: Hide reasoning (default)
  show_reasoning: false

  # Stream tokens to the terminal as they arrive instead of waiting for the
  # full response. The response box opens on first token and text appears
  # line-by-line. Tool calls are still captured silently.
-  # Stream tokens to the terminal in real-time. Disable to wait for full responses.
+  #   true:  Stream tokens as they arrive (default)
+  #   false: Wait for the full response before rendering
  streaming: true

  # ───────────────────────────────────────────────────────────────────────────
@ -879,10 +889,15 @@ display:
  # response box label, and branding text. Change at runtime with /skin <name>.
  #
  # Built-in skins:
-  #   default  — Classic Hermes gold/kawaii
-  #   ares     — Crimson/bronze war-god theme with spinner wings
-  #   mono     — Clean grayscale monochrome
-  #   slate    — Cool blue developer-focused
+  #   default        — Classic Hermes gold/kawaii
+  #   ares           — Crimson/bronze war-god theme with spinner wings
+  #   mono           — Clean grayscale monochrome
+  #   slate          — Cool blue developer-focused
+  #   daylight       — Bright light-mode theme
+  #   warm-lightmode — Warm paper-tone light-mode theme
+  #   poseidon       — Sea-green/teal Olympian theme
+  #   sisyphus       — Earthy stone-and-moss theme
+  #   charizard      — Fiery orange dragon theme
  #
  # Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
  # Schema (all fields optional, missing values inherit from default):
--- a/cli.py
+++ b/cli.py
@ -417,6 +417,11 @@ def load_cli_config() -> Dict[str, Any]:
            "base_url": "",    # Direct OpenAI-compatible endpoint for subagents
            "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
        },
+        "onboarding": {
+            # First-touch hint flags (see agent/onboarding.py).  Each hint is
+            # shown once per install then latched here.
+            "seen": {},
+        },
    }
    
    # Track whether the config file explicitly set terminal config.
@ -1373,7 +1378,7 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:


 def _format_process_notification(evt: dict) -> "str | None":
-    """Format a process notification event into a [SYSTEM: ...] message.
+    """Format a process notification event into a [IMPORTANT: ...] message.

    Handles both completion events (notify_on_complete) and watch pattern
    match events from the unified completion_queue.
@ -1383,14 +1388,14 @@ def _format_process_notification(evt: dict) -> "str | None":
    _cmd = evt.get("command", "unknown")

    if evt_type == "watch_disabled":
-        return f"[SYSTEM: {evt.get('message', '')}]"
+        return f"[IMPORTANT: {evt.get('message', '')}]"

    if evt_type == "watch_match":
        _pat = evt.get("pattern", "?")
        _out = evt.get("output", "")
        _sup = evt.get("suppressed", 0)
        text = (
-            f"[SYSTEM: Background process {_sid} matched "
+            f"[IMPORTANT: Background process {_sid} matched "
            f"watch pattern \"{_pat}\".\n"
            f"Command: {_cmd}\n"
            f"Matched output:\n{_out}"
@ -1404,7 +1409,7 @@ def _format_process_notification(evt: dict) -> "str | None":
    _exit = evt.get("exit_code", "?")
    _out = evt.get("output", "")
    return (
-        f"[SYSTEM: Background process {_sid} completed "
+        f"[IMPORTANT: Background process {_sid} completed "
        f"(exit code {_exit}).\n"
        f"Command: {_cmd}\n"
        f"Output:\n{_out}]"
@ -4910,6 +4915,12 @@ class HermesCLI:
        if self.agent:
            self.agent.session_id = new_session_id
            self.agent.session_start = now
+            # Redirect the JSON session log to the new branch session file so
+            # messages written after branching land in the correct file.
+            if hasattr(self.agent, "session_log_file") and hasattr(self.agent, "logs_dir"):
+                self.agent.session_log_file = (
+                    self.agent.logs_dir / f"session_{new_session_id}.json"
+                )
            self.agent.reset_session_state()
            if hasattr(self.agent, "_last_flushed_db_idx"):
                self.agent._last_flushed_db_idx = len(self.conversation_history)
@ -5153,27 +5164,29 @@ class HermesCLI:
        _cprint(f"  ✓ Model switched: {result.new_model}")
        _cprint(f"    Provider: {provider_label}")

+        # Context: always resolve via the provider-aware chain so Codex OAuth,
+        # Copilot, and Nous-enforced caps win over the raw models.dev entry
+        # (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
        mi = result.model_info
+        try:
+            from hermes_cli.model_switch import resolve_display_context_length
+            ctx = resolve_display_context_length(
+                result.new_model,
+                result.target_provider,
+                base_url=result.base_url or self.base_url or "",
+                api_key=result.api_key or self.api_key or "",
+                model_info=mi,
+            )
+            if ctx:
+                _cprint(f"    Context: {ctx:,} tokens")
+        except Exception:
+            pass
        if mi:
-            if mi.context_window:
-                _cprint(f"    Context: {mi.context_window:,} tokens")
            if mi.max_output:
                _cprint(f"    Max output: {mi.max_output:,} tokens")
            if mi.has_cost_data():
                _cprint(f"    Cost: {mi.format_cost()}")
            _cprint(f"    Capabilities: {mi.format_capabilities()}")
-        else:
-            try:
-                from agent.model_metadata import get_model_context_length
-                ctx = get_model_context_length(
-                    result.new_model,
-                    base_url=result.base_url or self.base_url,
-                    api_key=result.api_key or self.api_key,
-                    provider=result.target_provider,
-                )
-                _cprint(f"    Context: {ctx:,} tokens")
-            except Exception:
-                pass

        cache_enabled = (
            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
@ -6122,8 +6135,6 @@ class HermesCLI:
            self._handle_agents_command()
        elif canonical == "background":
            self._handle_background_command(cmd_original)
-        elif canonical == "btw":
-            self._handle_btw_command(cmd_original)
        elif canonical == "queue":
            # Extract prompt after "/queue " or "/q "
            parts = cmd_original.split(None, 1)
@ -6410,122 +6421,6 @@ class HermesCLI:
        self._background_tasks[task_id] = thread
        thread.start()

-    def _handle_btw_command(self, cmd: str):
-        """Handle /btw <question> — ephemeral side question using session context.
-
-        Snapshots the current conversation history, spawns a no-tools agent in
-        a background thread, and prints the answer without persisting anything
-        to the main session.
-        """
-        parts = cmd.strip().split(maxsplit=1)
-        if len(parts) < 2 or not parts[1].strip():
-            _cprint("  Usage: /btw <question>")
-            _cprint("  Example: /btw what module owns session title sanitization?")
-            _cprint("  Answers using session context. No tools, not persisted.")
-            return
-
-        question = parts[1].strip()
-        task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}"
-
-        if not self._ensure_runtime_credentials():
-            _cprint("  (>_<) Cannot start /btw: no valid credentials.")
-            return
-
-        turn_route = self._resolve_turn_agent_config(question)
-        history_snapshot = list(self.conversation_history)
-
-        preview = question[:60] + ("..." if len(question) > 60 else "")
-        _cprint(f'  💬 /btw: "{preview}"')
-
-        def run_btw():
-            try:
-                btw_agent = AIAgent(
-                    model=turn_route["model"],
-                    api_key=turn_route["runtime"].get("api_key"),
-                    base_url=turn_route["runtime"].get("base_url"),
-                    provider=turn_route["runtime"].get("provider"),
-                    api_mode=turn_route["runtime"].get("api_mode"),
-                    acp_command=turn_route["runtime"].get("command"),
-                    acp_args=turn_route["runtime"].get("args"),
-                    max_iterations=8,
-                    enabled_toolsets=[],
-                    quiet_mode=True,
-                    verbose_logging=False,
-                    session_id=task_id,
-                    platform="cli",
-                    reasoning_config=self.reasoning_config,
-                    service_tier=self.service_tier,
-                    request_overrides=turn_route.get("request_overrides"),
-                    providers_allowed=self._providers_only,
-                    providers_ignored=self._providers_ignore,
-                    providers_order=self._providers_order,
-                    provider_sort=self._provider_sort,
-                    provider_require_parameters=self._provider_require_params,
-                    provider_data_collection=self._provider_data_collection,
-                    fallback_model=self._fallback_model,
-                    session_db=None,
-                    skip_memory=True,
-                    skip_context_files=True,
-                    persist_session=False,
-                )
-
-                btw_prompt = (
-                    "[Ephemeral /btw side question. Answer using the conversation "
-                    "context. No tools available. Be direct and concise.]\n\n"
-                    + question
-                )
-                result = btw_agent.run_conversation(
-                    user_message=btw_prompt,
-                    conversation_history=history_snapshot,
-                    task_id=task_id,
-                )
-
-                response = (result.get("final_response") or "") if result else ""
-                if not response and result and result.get("error"):
-                    response = f"Error: {result['error']}"
-
-                # TUI refresh before printing
-                if self._app:
-                    self._app.invalidate()
-                    time.sleep(0.05)
-                print()
-
-                if response:
-                    try:
-                        from hermes_cli.skin_engine import get_active_skin
-                        _skin = get_active_skin()
-                        _resp_color = _skin.get_color("response_border", "#4F6D4A")
-                    except Exception:
-                        _resp_color = "#4F6D4A"
-
-                    ChatConsole().print(Panel(
-                        _render_final_assistant_content(response, mode=self.final_response_markdown),
-                        title=f"[{_resp_color} bold]⚕ /btw[/]",
-                        title_align="left",
-                        border_style=_resp_color,
-                        box=rich_box.HORIZONTALS,
-                        padding=(1, 4),
-                    ))
-                else:
-                    _cprint("  💬 /btw: (no response)")
-
-                if self.bell_on_complete:
-                    sys.stdout.write("\a")
-                    sys.stdout.flush()
-
-            except Exception as e:
-                if self._app:
-                    self._app.invalidate()
-                    time.sleep(0.05)
-                print()
-                _cprint(f"  ❌ /btw failed: {e}")
-            finally:
-                if self._app:
-                    self._invalidate(min_interval=0)
-
-        thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}")
-        thread.start()
-
    @staticmethod
    def _try_launch_chrome_debug(port: int, system: str) -> bool:
        """Try to launch Chrome/Chromium with remote debugging enabled.
@ -7328,7 +7223,7 @@ class HermesCLI:
            change_detail = ". ".join(change_parts) + ". " if change_parts else ""
            self.conversation_history.append({
                "role": "user",
-                "content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
+                "content": f"[IMPORTANT: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
            })

            # Persist session immediately so the session log reflects the
@ -7410,6 +7305,31 @@ class HermesCLI:
                    _cprint(f"  {line}")
                except Exception:
                    pass
+                # First-touch onboarding: on the first tool in this process
+                # that takes longer than the threshold while we're in the
+                # noisiest progress mode, print a one-time hint about
+                # /verbose.  Latched on self so it fires at most once per
+                # process; persisted to config.yaml so it never fires again
+                # across processes either.
+                try:
+                    if (
+                        not getattr(self, "_long_tool_hint_fired", False)
+                        and self.tool_progress_mode == "all"
+                        and duration >= 30.0
+                    ):
+                        from agent.onboarding import (
+                            TOOL_PROGRESS_FLAG,
+                            is_seen,
+                            mark_seen,
+                            tool_progress_hint_cli,
+                        )
+                        if not is_seen(CLI_CONFIG, TOOL_PROGRESS_FLAG):
+                            self._long_tool_hint_fired = True
+                            _cprint(f"  {_DIM}{tool_progress_hint_cli()}{_RST}")
+                            mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
+                            CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[TOOL_PROGRESS_FLAG] = True
+                except Exception:
+                    pass
            self._invalidate()
            return
        if event_type != "tool.started":
@ -9293,6 +9213,24 @@ class HermesCLI:
                                         f"agent_running={self._agent_running}\n")
                        except Exception:
                            pass
+                    # First-touch onboarding: on the very first busy-while-running
+                    # event for this install, print a one-line tip explaining the
+                    # /busy knob.  Flag persists to config.yaml and never fires
+                    # again.  Guarded for exceptions so onboarding can't break
+                    # the input loop.
+                    try:
+                        from agent.onboarding import (
+                            BUSY_INPUT_FLAG,
+                            busy_input_hint_cli,
+                            is_seen,
+                            mark_seen,
+                        )
+                        if not is_seen(CLI_CONFIG, BUSY_INPUT_FLAG):
+                            _cprint(f"  {_DIM}{busy_input_hint_cli(self.busy_input_mode)}{_RST}")
+                            mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
+                            CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[BUSY_INPUT_FLAG] = True
+                    except Exception:
+                        pass
                else:
                    self._pending_input.put(payload)
                event.app.current_buffer.reset(append_to_history=True)
@ -9909,7 +9847,7 @@ class HermesCLI:
                status = cli_ref._command_status or "Processing command..."
                return f"{frame} {status}"
            if cli_ref._agent_running:
-                return "type a message + Enter to interrupt, Ctrl+C to cancel"
+                return "msg=interrupt · /queue · /bg · /steer · Ctrl+C cancel"
            if cli_ref._voice_mode:
                return "type or Ctrl+B to record"
            return ""
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@ -715,7 +715,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
    # Always prepend cron execution guidance so the agent knows how
    # delivery works and can suppress delivery when appropriate.
    cron_hint = (
-        "[SYSTEM: You are running as a scheduled cron job. "
+        "[IMPORTANT: You are running as a scheduled cron job. "
        "DELIVERY: Your final response will be automatically delivered "
        "to the user — do NOT use send_message or try to deliver "
        "the output yourself. Just produce your report/output as your "
@ -751,7 +751,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
            parts.append("")
        parts.extend(
            [
-                f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
+                f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
                "",
                content,
            ]
@ -759,7 +759,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:

    if skipped:
        notice = (
-            f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
+            f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
            f"and were skipped: {', '.join(skipped)}. "
            f"Start your response with a brief notice so the user is aware, e.g.: "
            f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@ -41,6 +41,15 @@ if [ "$(id -u)" = "0" ]; then
            echo "Warning: chown failed (rootless container?) — continuing anyway"
    fi

+    # Ensure config.yaml is readable by the hermes runtime user even if it was
+    # edited on the host after initial ownership setup. Must run here (as root)
+    # rather than after the gosu drop, otherwise a non-root caller like
+    # `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
+    if [ -f "$HERMES_HOME/config.yaml" ]; then
+        chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
+        chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
+    fi
+
    echo "Dropping root privileges"
    exec gosu hermes "$0" "$@"
 fi
@ -67,13 +76,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
    cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
 fi

-# Ensure the main config file remains accessible to the hermes runtime user
-# even if it was edited on the host after initial ownership setup.
-if [ -f "$HERMES_HOME/config.yaml" ]; then
-    chown hermes:hermes "$HERMES_HOME/config.yaml"
-    chmod 640 "$HERMES_HOME/config.yaml"
-fi
-
 # SOUL.md
 if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
    cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -1025,7 +1025,20 @@ class BasePlatformAdapter(ABC):
        self._post_delivery_callbacks: Dict[str, Any] = {}
        self._expected_cancelled_tasks: set[asyncio.Task] = set()
        self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
-        # Chats where auto-TTS on voice input is disabled (set by /voice off)
+        # Auto-TTS on voice input: ``_auto_tts_default`` is the global default
+        # (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
+        # Per-chat overrides live in two sets populated from ``_voice_mode``:
+        #   - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
+        #     or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
+        #     the global default is False.
+        #   - ``_auto_tts_disabled_chats``: chat explicitly opted out via
+        #     ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
+        #     global default is True.
+        # The gate in _process_message() is:
+        #   fire if chat in _auto_tts_enabled_chats
+        #     OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
+        self._auto_tts_default: bool = False
+        self._auto_tts_enabled_chats: set = set()
        self._auto_tts_disabled_chats: set = set()
        # Chats where typing indicator is paused (e.g. during approval waits).
        # _keep_typing skips send_typing when the chat_id is in this set.
@ -1047,6 +1060,21 @@ class BasePlatformAdapter(ABC):
    def fatal_error_retryable(self) -> bool:
        return self._fatal_error_retryable

+    def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
+        """Whether auto-TTS on voice input should fire for ``chat_id``.
+
+        Decision layers (Issue #16007):
+          1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
+             ``voice.auto_tts`` is False).
+          2. Explicit ``/voice off`` → never fire.
+          3. Fall back to the global ``voice.auto_tts`` config default.
+        """
+        if chat_id in self._auto_tts_enabled_chats:
+            return True
+        if chat_id in self._auto_tts_disabled_chats:
+            return False
+        return bool(self._auto_tts_default)
+
    def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
        self._fatal_error_handler = handler

@ -2214,12 +2242,14 @@ class BasePlatformAdapter(ABC):
                    logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
                
                # Auto-TTS: if voice message, generate audio FIRST (before sending text)
-                # Skipped when the chat has voice mode disabled (/voice off)
+                # Gated via ``_should_auto_tts_for_chat``: fires when the chat has
+                # an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
+                # True globally and no ``/voice off`` has been issued.
                _tts_path = None
-                if (event.message_type == MessageType.VOICE
+                if (self._should_auto_tts_for_chat(event.source.chat_id)
+                        and event.message_type == MessageType.VOICE
                        and text_content
-                        and not media_files
-                        and event.source.chat_id not in self._auto_tts_disabled_chats):
+                        and not media_files):
                    try:
                        from tools.tts_tool import text_to_speech_tool, check_tts_requirements
                        if check_tts_requirements():
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@ -2315,11 +2315,6 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_background(interaction: discord.Interaction, prompt: str):
            await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")

-        @tree.command(name="btw", description="Ephemeral side question using session context")
-        @discord.app_commands.describe(question="Your side question (no tools, not persisted)")
-        async def slash_btw(interaction: discord.Interaction, question: str):
-            await self._run_simple_slash(interaction, f"/btw {question}")
-
        # ── Auto-register any gateway-available commands not yet on the tree ──
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@ -207,8 +207,31 @@ class SlackAdapter(BasePlatformAdapter):
            async def handle_assistant_thread_context_changed(event, say):
                await self._handle_assistant_thread_lifecycle_event(event)

-            # Register slash command handler
-            @self._app.command("/hermes")
+            # Register slash command handler(s)
+            #
+            # Every gateway command from COMMAND_REGISTRY is a native Slack
+            # slash, matching Discord and Telegram's model (e.g. /btw, /stop,
+            # /model work directly without /hermes prefix). A single regex
+            # matcher dispatches all of them to one handler so we don't need
+            # N identical @app.command() decorators.
+            #
+            # The slash commands must ALSO be declared in the Slack app
+            # manifest (see `hermes slack manifest`). In Socket Mode, Slack
+            # routes the command event through the socket regardless of the
+            # manifest's request URL, but it will not deliver an event for
+            # a slash command the manifest doesn't declare.
+            from hermes_cli.commands import slack_native_slashes
+            import re as _re
+
+            _slash_names = [name for name, _d, _h in slack_native_slashes()]
+            if _slash_names:
+                _slash_pattern = _re.compile(
+                    r"^/(?:" + "|".join(_re.escape(n) for n in _slash_names) + r")$"
+                )
+            else:  # pragma: no cover - registry always non-empty
+                _slash_pattern = _re.compile(r"^/hermes$")
+
+            @self._app.command(_slash_pattern)
            async def handle_hermes_command(ack, command):
                await ack()
                await self._handle_slash_command(command)
@ -1561,7 +1584,20 @@ class SlackAdapter(BasePlatformAdapter):
            return ""

    async def _handle_slash_command(self, command: dict) -> None:
-        """Handle /hermes slash command."""
+        """Handle Slack slash commands.
+
+        Every gateway command in COMMAND_REGISTRY is registered as a native
+        Slack slash (``/btw``, ``/stop``, ``/model``, etc.), matching the
+        Discord and Telegram model. The slash name itself is the command;
+        any text after it is the argument list.
+
+        The legacy ``/hermes <subcommand> [args]`` form is preserved for
+        backward compatibility with older workspace manifests and for users
+        who want a single entry point for free-form questions (``/hermes
+        what's the weather`` — non-slash text is treated as a regular
+        message).
+        """
+        slash_name = (command.get("command") or "").lstrip("/").strip()
        text = command.get("text", "").strip()
        user_id = command.get("user_id", "")
        channel_id = command.get("channel_id", "")
@ -1571,20 +1607,25 @@ class SlackAdapter(BasePlatformAdapter):
        if team_id and channel_id:
            self._channel_team[channel_id] = team_id

-        # Map subcommands to gateway commands — derived from central registry.
-        # Also keep "compact" as a Slack-specific alias for /compress.
-        from hermes_cli.commands import slack_subcommand_map
-        subcommand_map = slack_subcommand_map()
-        subcommand_map["compact"] = "/compress"
-        first_word = text.split()[0] if text else ""
-        if first_word in subcommand_map:
-            # Preserve arguments after the subcommand
-            rest = text[len(first_word):].strip()
-            text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word]
-        elif text:
-            pass  # Treat as a regular question
+        if slash_name in ("hermes", ""):
+            # Legacy /hermes <subcommand> [args] routing + free-form questions.
+            # Empty slash_name falls into this branch for backward compat
+            # with any caller that didn't populate command["command"].
+            from hermes_cli.commands import slack_subcommand_map
+            subcommand_map = slack_subcommand_map()
+            subcommand_map["compact"] = "/compress"
+            first_word = text.split()[0] if text else ""
+            if first_word in subcommand_map:
+                rest = text[len(first_word):].strip()
+                text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word]
+            elif text:
+                pass  # Treat as a regular question
+            else:
+                text = "/help"
        else:
-            text = "/help"
+            # Native slash — /<slash_name> [args].  Route directly through the
+            # gateway command dispatcher by prepending the slash.
+            text = f"/{slash_name} {text}".strip()

        source = self.build_source(
            chat_id=channel_id,
--- a/gateway/run.py
+++ b/gateway/run.py
@ -591,20 +591,20 @@ def _parse_session_key(session_key: str) -> "dict | None":


 def _format_gateway_process_notification(evt: dict) -> "str | None":
-    """Format a watch pattern event from completion_queue into a [SYSTEM:] message."""
+    """Format a watch pattern event from completion_queue into a [IMPORTANT:] message."""
    evt_type = evt.get("type", "completion")
    _sid = evt.get("session_id", "unknown")
    _cmd = evt.get("command", "unknown")

    if evt_type == "watch_disabled":
-        return f"[SYSTEM: {evt.get('message', '')}]"
+        return f"[IMPORTANT: {evt.get('message', '')}]"

    if evt_type == "watch_match":
        _pat = evt.get("pattern", "?")
        _out = evt.get("output", "")
        _sup = evt.get("suppressed", 0)
        text = (
-            f"[SYSTEM: Background process {_sid} matched "
+            f"[IMPORTANT: Background process {_sid} matched "
            f"watch pattern \"{_pat}\".\n"
            f"Command: {_cmd}\n"
            f"Matched output:\n{_out}"
@ -881,23 +881,74 @@ class GatewayRunner:
            return
        if disabled:
            disabled_chats.add(chat_id)
+            # ``/voice off`` also clears any explicit enable — it's a hard override.
+            enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
+            if isinstance(enabled_chats, set):
+                enabled_chats.discard(chat_id)
        else:
            disabled_chats.discard(chat_id)

-    def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
-        """Restore persisted /voice off state into a live platform adapter."""
-        disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
-        if not isinstance(disabled_chats, set):
+    def _set_adapter_auto_tts_enabled(self, adapter, chat_id: str, enabled: bool) -> None:
+        """Update an adapter's per-chat auto-TTS opt-in set if present.
+
+        Used for ``/voice on``/``/voice tts`` where the user explicitly wants
+        auto-TTS even when ``voice.auto_tts`` is False globally.
+        """
+        enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
+        if not isinstance(enabled_chats, set):
            return
+        if enabled:
+            enabled_chats.add(chat_id)
+            # An explicit opt-in clears any stale /voice off for this chat.
+            disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
+            if isinstance(disabled_chats, set):
+                disabled_chats.discard(chat_id)
+        else:
+            enabled_chats.discard(chat_id)
+
+    def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
+        """Restore persisted /voice state into a live platform adapter.
+
+        Populates three fields from config + ``self._voice_mode``:
+          - ``_auto_tts_default``: global default from ``voice.auto_tts``
+          - ``_auto_tts_enabled_chats``: chats with mode ``voice_only``/``all``
+          - ``_auto_tts_disabled_chats``: chats with mode ``off``
+        """
        platform = getattr(adapter, "platform", None)
        if not isinstance(platform, Platform):
            return
-        disabled_chats.clear()
+
+        disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
+        enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
+        if not isinstance(disabled_chats, set) and not isinstance(enabled_chats, set):
+            return
+
+        # Push the global voice.auto_tts default (config.yaml) onto the adapter.
+        # Lazy import to avoid adding a module-level dep from gateway → hermes_cli.
+        try:
+            from hermes_cli.config import load_config as _load_full_config
+            _full_cfg = _load_full_config()
+            _auto_tts_default = bool(
+                (_full_cfg.get("voice") or {}).get("auto_tts", False)
+            )
+        except Exception:
+            _auto_tts_default = False
+        if hasattr(adapter, "_auto_tts_default"):
+            adapter._auto_tts_default = _auto_tts_default
+
        prefix = f"{platform.value}:"
-        disabled_chats.update(
-            key[len(prefix):] for key, mode in self._voice_mode.items()
-            if mode == "off" and key.startswith(prefix)
-        )
+        if isinstance(disabled_chats, set):
+            disabled_chats.clear()
+            disabled_chats.update(
+                key[len(prefix):] for key, mode in self._voice_mode.items()
+                if mode == "off" and key.startswith(prefix)
+            )
+        if isinstance(enabled_chats, set):
+            enabled_chats.clear()
+            enabled_chats.update(
+                key[len(prefix):] for key, mode in self._voice_mode.items()
+                if mode in ("voice_only", "all") and key.startswith(prefix)
+            )

    async def _safe_adapter_disconnect(self, adapter, platform) -> None:
        """Call adapter.disconnect() defensively, swallowing any error.
@ -1579,6 +1630,27 @@ class GatewayRunner:
                f"I'll respond to your message shortly."
            )

+        # First-touch onboarding: the very first time a user sends a message
+        # while the agent is busy, append a one-time hint explaining the
+        # queue/interrupt knob.  Flag is persisted to config.yaml so it never
+        # fires again on this install.
+        try:
+            from agent.onboarding import (
+                BUSY_INPUT_FLAG,
+                busy_input_hint_gateway,
+                is_seen,
+                mark_seen,
+            )
+            _user_cfg = _load_gateway_config()
+            if not is_seen(_user_cfg, BUSY_INPUT_FLAG):
+                message = (
+                    f"{message}\n\n"
+                    f"{busy_input_hint_gateway('queue' if is_queue_mode else 'interrupt')}"
+                )
+                mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
+        except Exception as _onb_err:
+            logger.debug("Failed to apply busy-input onboarding hint: %s", _onb_err)
+
        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
        try:
            await adapter._send_with_retry(
@ -3426,6 +3498,8 @@ class GatewayRunner:

            # /background must bypass the running-agent guard — it starts a
            # parallel task and must never interrupt the active conversation.
+            # /btw is an alias of /background and resolves to the same canonical
+            # name, so this branch handles both commands.
            if _cmd_def_inner and _cmd_def_inner.name == "background":
                return await self._handle_background_command(event)

@ -3701,9 +3775,6 @@ class GatewayRunner:
        if canonical == "background":
            return await self._handle_background_command(event)

-        if canonical == "btw":
-            return await self._handle_btw_command(event)
-
        if canonical == "steer":
            # No active agent — /steer has no tool call to inject into.
            # Strip the prefix so downstream treats it as a normal user
@ -4161,7 +4232,7 @@ class GatewayRunner:
                    if _loaded:
                        _loaded_skill, _skill_dir, _display_name = _loaded
                        _note = (
-                            f'[SYSTEM: The "{_display_name}" skill is auto-loaded. '
+                            f'[IMPORTANT: The "{_display_name}" skill is auto-loaded. '
                            f"Follow its instructions for this session.]"
                        )
                        _part = _build_skill_message(_loaded_skill, _skill_dir, _note)
@ -5977,7 +6048,7 @@ class GatewayRunner:
            self._voice_mode[voice_key] = "voice_only"
            self._save_voice_modes()
            if adapter:
-                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
+                self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
            return (
                "Voice mode enabled.\n"
                "I'll reply with voice when you send voice messages.\n"
@ -5993,7 +6064,7 @@ class GatewayRunner:
            self._voice_mode[voice_key] = "all"
            self._save_voice_modes()
            if adapter:
-                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
+                self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
            return (
                "Auto-TTS enabled.\n"
                "All replies will include a voice message."
@ -6032,7 +6103,7 @@ class GatewayRunner:
                self._voice_mode[voice_key] = "voice_only"
                self._save_voice_modes()
                if adapter:
-                    self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
+                    self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
                return "Voice mode enabled."
            else:
                self._voice_mode[voice_key] = "off"
@ -6083,7 +6154,7 @@ class GatewayRunner:
                adapter._voice_sources[guild_id] = event.source.to_dict()
            self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all"
            self._save_voice_modes()
-            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
+            self._set_adapter_auto_tts_enabled(adapter, event.source.chat_id, enabled=True)
            return (
                f"Joined voice channel **{voice_channel.name}**.\n"
                f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
@ -6601,177 +6672,6 @@ class GatewayRunner:
            except Exception:
                pass

-    async def _handle_btw_command(self, event: MessageEvent) -> str:
-        """Handle /btw <question> — ephemeral side question in the same chat."""
-        question = event.get_command_args().strip()
-        if not question:
-            return (
-                "Usage: /btw <question>\n"
-                "Example: /btw what module owns session title sanitization?\n\n"
-                "Answers using session context. No tools, not persisted."
-            )
-
-        source = event.source
-        session_key = self._session_key_for_source(source)
-
-        # Guard: one /btw at a time per session
-        existing = getattr(self, "_active_btw_tasks", {}).get(session_key)
-        if existing and not existing.done():
-            return "A /btw is already running for this chat. Wait for it to finish."
-
-        if not hasattr(self, "_active_btw_tasks"):
-            self._active_btw_tasks: dict = {}
-
-        import uuid as _uuid
-        task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{_uuid.uuid4().hex[:6]}"
-        _task = asyncio.create_task(self._run_btw_task(question, source, session_key, task_id))
-        self._background_tasks.add(_task)
-        self._active_btw_tasks[session_key] = _task
-
-        def _cleanup(task):
-            self._background_tasks.discard(task)
-            if self._active_btw_tasks.get(session_key) is task:
-                self._active_btw_tasks.pop(session_key, None)
-
-        _task.add_done_callback(_cleanup)
-
-        preview = question[:60] + ("..." if len(question) > 60 else "")
-        return f'💬 /btw: "{preview}"\nReply will appear here shortly.'
-
-    async def _run_btw_task(
-        self, question: str, source, session_key: str, task_id: str,
-    ) -> None:
-        """Execute an ephemeral /btw side question and deliver the answer."""
-        from run_agent import AIAgent
-
-        adapter = self.adapters.get(source.platform)
-        if not adapter:
-            logger.warning("No adapter for platform %s in /btw task %s", source.platform, task_id)
-            return
-
-        _thread_meta = {"thread_id": source.thread_id} if source.thread_id else None
-
-        try:
-            user_config = _load_gateway_config()
-            model, runtime_kwargs = self._resolve_session_agent_runtime(
-                source=source,
-                session_key=session_key,
-                user_config=user_config,
-            )
-            if not runtime_kwargs.get("api_key"):
-                await adapter.send(
-                    source.chat_id,
-                    "❌ /btw failed: no provider credentials configured.",
-                    metadata=_thread_meta,
-                )
-                return
-
-            platform_key = _platform_config_key(source.platform)
-            reasoning_config = self._resolve_session_reasoning_config(
-                source=source,
-                session_key=session_key,
-            )
-            self._service_tier = self._load_service_tier()
-            turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs)
-            pr = self._provider_routing
-
-            # Snapshot history from running agent or stored transcript
-            running_agent = self._running_agents.get(session_key)
-            if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
-                history_snapshot = list(getattr(running_agent, "_session_messages", []) or [])
-            else:
-                session_entry = self.session_store.get_or_create_session(source)
-                history_snapshot = self.session_store.load_transcript(session_entry.session_id)
-
-            btw_prompt = (
-                "[Ephemeral /btw side question. Answer using the conversation "
-                "context. No tools available. Be direct and concise.]\n\n"
-                + question
-            )
-
-            def run_sync():
-                agent = AIAgent(
-                    model=turn_route["model"],
-                    **turn_route["runtime"],
-                    max_iterations=8,
-                    quiet_mode=True,
-                    verbose_logging=False,
-                    enabled_toolsets=[],
-                    reasoning_config=reasoning_config,
-                    service_tier=self._service_tier,
-                    request_overrides=turn_route.get("request_overrides"),
-                    providers_allowed=pr.get("only"),
-                    providers_ignored=pr.get("ignore"),
-                    providers_order=pr.get("order"),
-                    provider_sort=pr.get("sort"),
-                    provider_require_parameters=pr.get("require_parameters", False),
-                    provider_data_collection=pr.get("data_collection"),
-                    session_id=task_id,
-                    platform=platform_key,
-                    session_db=None,
-                    fallback_model=self._fallback_model,
-                    skip_memory=True,
-                    skip_context_files=True,
-                    persist_session=False,
-                )
-                try:
-                    return agent.run_conversation(
-                        user_message=btw_prompt,
-                        conversation_history=history_snapshot,
-                        task_id=task_id,
-                    )
-                finally:
-                    self._cleanup_agent_resources(agent)
-
-            result = await self._run_in_executor_with_context(run_sync)
-
-            response = (result.get("final_response") or "") if result else ""
-            if not response and result and result.get("error"):
-                response = f"Error: {result['error']}"
-            if not response:
-                response = "(No response generated)"
-
-            media_files, response = adapter.extract_media(response)
-            images, text_content = adapter.extract_images(response)
-            preview = question[:60] + ("..." if len(question) > 60 else "")
-            header = f'💬 /btw: "{preview}"\n\n'
-
-            if text_content:
-                await adapter.send(
-                    chat_id=source.chat_id,
-                    content=header + text_content,
-                    metadata=_thread_meta,
-                )
-            elif not images and not media_files:
-                await adapter.send(
-                    chat_id=source.chat_id,
-                    content=header + "(No response generated)",
-                    metadata=_thread_meta,
-                )
-
-            for image_url, alt_text in (images or []):
-                try:
-                    await adapter.send_image(chat_id=source.chat_id, image_url=image_url, caption=alt_text)
-                except Exception:
-                    pass
-
-            for media_path, _is_voice in (media_files or []):
-                try:
-                    await adapter.send_file(chat_id=source.chat_id, file_path=media_path)
-                except Exception:
-                    pass
-
-        except Exception as e:
-            logger.exception("/btw task %s failed", task_id)
-            try:
-                await adapter.send(
-                    chat_id=source.chat_id,
-                    content=f"❌ /btw failed: {e}",
-                    metadata=_thread_meta,
-                )
-            except Exception:
-                pass
-
    async def _handle_reasoning_command(self, event: MessageEvent) -> str:
        """Handle /reasoning command — manage reasoning effort and display toggle.

@ -7573,7 +7473,7 @@ class GatewayRunner:
            change_detail = ". ".join(change_parts) + ". " if change_parts else ""
            reload_msg = {
                "role": "user",
-                "content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
+                "content": f"[IMPORTANT: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
            }
            try:
                session_entry = self.session_store.get_or_create_session(event.source)
@ -8512,7 +8412,7 @@ class GatewayRunner:
                    from tools.ansi_strip import strip_ansi
                    _out = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
                    synth_text = (
-                        f"[SYSTEM: Background process {session_id} completed "
+                        f"[IMPORTANT: Background process {session_id} completed "
                        f"(exit code {session.exit_code}).\n"
                        f"Command: {session.command}\n"
                        f"Output:\n{_out}]"
@ -8822,6 +8722,25 @@ class GatewayRunner:
            with _lock:
                self._agent_cache.pop(session_key, None)

+    @staticmethod
+    def _init_cached_agent_for_turn(agent: Any, interrupt_depth: int) -> None:
+        """Reset per-turn state on a cached agent before a new turn starts.
+
+        Both _last_activity_ts and _last_activity_desc are only reset for
+        fresh external turns (depth 0); they are semantically paired —
+        desc describes the activity *at* ts, so updating one without the
+        other would make get_activity_summary() misleading.
+        For interrupt-recursive turns both are preserved so the inactivity
+        watchdog can accumulate stuck-turn idle time and fire the 30-min
+        timeout (#15654).  The depth-0 reset is still needed: a session
+        idle for 29 min would otherwise trip the watchdog before the new
+        turn makes its first API call (#9051).
+        """
+        if interrupt_depth == 0:
+            agent._last_activity_ts = time.time()
+            agent._last_activity_desc = "starting new turn (cached)"
+        agent._api_call_count = 0
+
    def _release_evicted_agent_soft(self, agent: Any) -> None:
        """Soft cleanup for cache-evicted agents — preserves session tool state.

@ -9360,16 +9279,62 @@ class GatewayRunner:
        last_tool = [None]  # Mutable container for tracking in closure
        last_progress_msg = [None]  # Track last message for dedup
        repeat_count = [0]  # How many times the same message repeated
-        
+        # First-touch onboarding latch: fires at most once per run, even if
+        # several tools exceed the threshold.
+        long_tool_hint_fired = [False]
+        _LONG_TOOL_THRESHOLD_S = 30.0
+
        def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs):
            """Callback invoked by agent on tool lifecycle events."""
            if not progress_queue or not _run_still_current():
                return

+            # First-touch onboarding: the first time a tool takes longer than
+            # _LONG_TOOL_THRESHOLD_S during a run that's streaming every tool
+            # (progress_mode == "all"), append a one-time hint suggesting
+            # /verbose.  We only fire when (a) the user hasn't seen the hint
+            # before and (b) /verbose is actually usable on this platform
+            # (gateway gate must be open).  The CLI has its own trigger.
+            if event_type == "tool.completed" and not long_tool_hint_fired[0]:
+                try:
+                    duration = kwargs.get("duration") or 0
+                    if duration >= _LONG_TOOL_THRESHOLD_S and progress_mode == "all":
+                        from agent.onboarding import (
+                            TOOL_PROGRESS_FLAG,
+                            is_seen,
+                            mark_seen,
+                            tool_progress_hint_gateway,
+                        )
+                        _cfg = _load_gateway_config()
+                        gate_on = bool(_cfg.get("display", {}).get("tool_progress_command", False))
+                        if gate_on and not is_seen(_cfg, TOOL_PROGRESS_FLAG):
+                            long_tool_hint_fired[0] = True
+                            progress_queue.put(tool_progress_hint_gateway())
+                            mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
+                except Exception as _hint_err:
+                    logger.debug("tool-progress onboarding hint failed: %s", _hint_err)
+                return
+
            # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.)
            if event_type not in ("tool.started",):
                return

+            # Suppress tool-progress bubbles once the user has sent `stop`.
+            # When the LLM response carries N parallel tool calls, the agent
+            # fires N "tool.started" events back-to-back before checking for
+            # interrupts — without this guard, a late `stop` still renders
+            # all N as 🔍 bubbles, making the interrupt feel ignored.
+            # (agent lives in run_sync's scope; agent_holder[0] is the shared
+            # handle across nested scopes — see line ~9607.)
+            try:
+                _agent_for_interrupt = agent_holder[0] if agent_holder else None
+                if _agent_for_interrupt is not None and getattr(
+                    _agent_for_interrupt, "is_interrupted", False
+                ):
+                    return
+            except Exception:
+                pass
+
            # "new" mode: only report when tool changes
            if progress_mode == "new" and tool_name == last_tool[0]:
                return
@ -9476,6 +9441,22 @@ class GatewayRunner:

                    raw = progress_queue.get_nowait()

+                    # Drain silently when interrupted: events queued in the
+                    # window between tool parse and interrupt processing
+                    # should not render as bubbles.  The "⚡ Interrupting
+                    # current task" message is sent separately and is the
+                    # last progress-flavored bubble the user should see.
+                    try:
+                        _agent_for_interrupt = agent_holder[0] if agent_holder else None
+                        if _agent_for_interrupt is not None and getattr(
+                            _agent_for_interrupt, "is_interrupted", False
+                        ):
+                            # Drop this event and continue draining.
+                            await asyncio.sleep(0)
+                            continue
+                    except Exception:
+                        pass
+
                    # Handle dedup messages: update last line with repeat counter
                    if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
                        _, base_msg, count = raw
@ -9804,12 +9785,7 @@ class GatewayRunner:
                                _cache.move_to_end(session_key)
                            except KeyError:
                                pass
-                        # Reset activity timestamp so the inactivity timeout
-                        # handler doesn't see stale idle time from the previous
-                        # turn and immediately kill this agent.  (#9051)
-                        agent._last_activity_ts = time.time()
-                        agent._last_activity_desc = "starting new turn (cached)"
-                        agent._api_call_count = 0
+                        self._init_cached_agent_for_turn(agent, _interrupt_depth)
                        logger.debug("Reusing cached agent for session %s", session_key)

            if agent is None:
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -467,11 +467,27 @@ def _resolve_api_key_provider_secret(
            pass
        return "", ""

+    from hermes_cli.config import get_env_value
    for env_var in pconfig.api_key_env_vars:
-        val = os.getenv(env_var, "").strip()
+        # Check both os.environ and ~/.hermes/.env file
+        val = (get_env_value(env_var) or "").strip()
        if has_usable_secret(val):
            return val, env_var

+    # Fallback: try credential pool (e.g. zai key stored via auth.json)
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool(provider_id)
+        if pool and pool.has_credentials():
+            entry = pool.peek()
+            if entry:
+                key = getattr(entry, "access_token", "") or getattr(entry, "runtime_api_key", "")
+                key = str(key).strip()
+                if has_usable_secret(key):
+                    return key, f"credential_pool:{provider_id}"
+    except Exception:
+        pass
+
    return "", ""


@ -4244,10 +4260,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                )

            from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider,
+                get_curated_nous_model_ids, get_pricing_for_provider,
                check_nous_free_tier, partition_nous_models_by_tier,
            )
-            model_ids = _PROVIDER_MODELS.get("nous", [])
+            model_ids = get_curated_nous_model_ids()

            print()
            unavailable_models: list = []
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -84,9 +84,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("deny", "Deny a pending dangerous command", "Session",
               gateway_only=True),
    CommandDef("background", "Run a prompt in the background", "Session",
-               aliases=("bg",), args_hint="<prompt>"),
-    CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
-               args_hint="<question>"),
+               aliases=("bg", "btw"), args_hint="<prompt>"),
    CommandDef("agents", "Show active agents and running tasks", "Session",
               aliases=("tasks",)),
    CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
@ -808,6 +806,114 @@ def discord_skill_commands_by_category(
    return trimmed_categories, uncategorized, hidden


+# ---------------------------------------------------------------------------
+# Slack native slash commands
+# ---------------------------------------------------------------------------
+
+# Slack slash command name constraints: lowercase a-z, 0-9, hyphens,
+# underscores. Max 32 chars. Slack app manifest accepts up to 50 slash
+# commands per app.
+_SLACK_MAX_SLASH_COMMANDS = 50
+_SLACK_NAME_LIMIT = 32
+_SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
+
+
+def _sanitize_slack_name(raw: str) -> str:
+    """Convert a command name to a valid Slack slash command name.
+
+    Slack allows lowercase a-z, digits, hyphens, and underscores. Max 32
+    chars. Uppercase is lowercased; invalid chars are stripped.
+    """
+    name = raw.lower()
+    name = _SLACK_INVALID_CHARS.sub("", name)
+    name = name.strip("-_")
+    return name[:_SLACK_NAME_LIMIT]
+
+
+def slack_native_slashes() -> list[tuple[str, str, str]]:
+    """Return (slash_name, description, usage_hint) triples for Slack.
+
+    Every gateway-available command in ``COMMAND_REGISTRY`` is surfaced as
+    a standalone Slack slash command (e.g. ``/btw``, ``/stop``, ``/model``),
+    matching Discord's and Telegram's model where every command is a
+    first-class slash and not a ``/hermes <verb>`` subcommand.
+
+    Both canonical names and aliases are included so users can type any
+    documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
+    Plugin-registered slash commands are included too.
+
+    Results are clamped to Slack's 50-command limit with duplicate-name
+    avoidance. ``/hermes`` is always reserved as the first entry so the
+    legacy ``/hermes <subcommand>`` form keeps working for anything that
+    gets dropped by the clamp or for free-form questions.
+    """
+    overrides = _resolve_config_gates()
+    entries: list[tuple[str, str, str]] = []
+    seen: set[str] = set()
+
+    # Reserve /hermes as the catch-all top-level command.
+    entries.append(("hermes", "Talk to Hermes or run a subcommand", "[subcommand] [args]"))
+    seen.add("hermes")
+
+    def _add(name: str, desc: str, hint: str) -> None:
+        slack_name = _sanitize_slack_name(name)
+        if not slack_name or slack_name in seen:
+            return
+        if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
+            return
+        # Slack description cap is 2000 chars; keep it short.
+        entries.append((slack_name, desc[:140], hint[:100]))
+        seen.add(slack_name)
+
+    # First pass: canonical names (so they win slots if we hit the cap).
+    for cmd in COMMAND_REGISTRY:
+        if not _is_gateway_available(cmd, overrides):
+            continue
+        _add(cmd.name, cmd.description, cmd.args_hint or "")
+
+    # Second pass: aliases.
+    for cmd in COMMAND_REGISTRY:
+        if not _is_gateway_available(cmd, overrides):
+            continue
+        for alias in cmd.aliases:
+            # Skip aliases that only differ from canonical by case/punctuation
+            # normalization (already covered by _add dedup).
+            _add(alias, f"Alias for /{cmd.name} — {cmd.description}", cmd.args_hint or "")
+
+    # Third pass: plugin commands.
+    for name, description, args_hint in _iter_plugin_command_entries():
+        _add(name, description, args_hint or "")
+
+    return entries
+
+
+def slack_app_manifest(request_url: str = "https://hermes-agent.local/slack/commands") -> dict[str, Any]:
+    """Generate a Slack app manifest with all gateway commands as slashes.
+
+    ``request_url`` is required by Slack's manifest schema for every slash
+    command, but in Socket Mode (which we use) Slack ignores it and routes
+    the command event through the WebSocket. A placeholder URL is fine.
+
+    The returned dict is the ``features.slash_commands`` portion only —
+    callers compose it into a full manifest (or merge into an existing
+    one). Keeping it narrow avoids coupling us to the rest of the manifest
+    schema (display_information, oauth_config, settings, etc.) which users
+    set up once in the Slack UI and rarely change.
+    """
+    slashes = []
+    for name, desc, usage in slack_native_slashes():
+        entry = {
+            "command": f"/{name}",
+            "description": desc or f"Run /{name}",
+            "should_escape": False,
+            "url": request_url,
+        }
+        if usage:
+            entry["usage_hint"] = usage
+        slashes.append(entry)
+    return {"features": {"slash_commands": slashes}}
+
+
 def slack_subcommand_map() -> dict[str, str]:
    """Return subcommand -> /command mapping for Slack /hermes handler.

--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -465,6 +465,7 @@ DEFAULT_CONFIG = {
        "command_timeout": 30,  # Timeout for browser commands in seconds (screenshot, navigate, etc.)
        "record_sessions": False,  # Auto-record browser sessions as WebM videos
        "allow_private_urls": False,  # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
+        "auto_local_for_private_urls": True,  # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
        "cdp_url": "",  # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
        # CDP supervisor — dialog + frame detection via a persistent WebSocket.
        # Active only when a CDP-capable backend is attached (Browserbase or
@ -959,6 +960,27 @@ DEFAULT_CONFIG = {
        "backup_count": 3,     # Number of rotated backup files to keep
    },

+    # Remotely-hosted model catalog manifest.  When enabled, the CLI fetches
+    # curated model lists for OpenRouter and Nous Portal from this URL,
+    # falling back to the in-repo snapshot on network failure.  Lets us
+    # update model picker lists without shipping a hermes-agent release.
+    # The default URL is served by the docs site GitHub Pages deploy.
+    "model_catalog": {
+        "enabled": True,
+        "url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
+        # Disk cache TTL in hours.  Beyond this, the CLI refetches on the
+        # next /model or `hermes model` invocation; network failures
+        # silently fall back to the stale cache.
+        "ttl_hours": 24,
+        # Optional per-provider override URLs for third parties that want
+        # to self-host their own curation list using the same schema.
+        # Example:
+        #   providers:
+        #     openrouter:
+        #       url: https://example.com/my-curation.json
+        "providers": {},
+    },
+
    # Network settings — workarounds for connectivity issues.
    "network": {
        # Force IPv4 connections.  On servers with broken or unreachable IPv6,
@ -995,6 +1017,13 @@ DEFAULT_CONFIG = {
        "min_interval_hours": 24,
    },

+    # Contextual first-touch onboarding hints (see agent/onboarding.py).
+    # Each hint is shown once per install and then latched here so it
+    # never fires again.  Users can wipe the section to re-see all hints.
+    "onboarding": {
+        "seen": {},
+    },
+
    # Config schema version - bump this when adding new required fields
    "_config_version": 22,
 }
--- a/hermes_cli/fallback_cmd.py
+++ b/hermes_cli/fallback_cmd.py
@ -0,0 +1,361 @@
+"""
+hermes fallback — manage the fallback provider chain.
+
+Fallback providers are tried in order when the primary model fails with
+rate-limit, overload, or connection errors. See:
+https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
+
+Subcommands:
+  hermes fallback [list]   Show the current fallback chain (default when no subcommand)
+  hermes fallback add      Pick provider + model via the same picker as `hermes model`,
+                           then append the selection to the chain
+  hermes fallback remove   Pick an entry to delete from the chain
+  hermes fallback clear    Remove all fallback entries
+
+Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
+``{provider, model, base_url?, api_mode?}`` dicts).  The legacy single-dict
+``fallback_model`` format is migrated to the new list format on first add.
+"""
+from __future__ import annotations
+
+import copy
+from typing import Any, Dict, List, Optional
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Return the normalized fallback chain as a list of dicts.
+
+    Accepts both the new list format (``fallback_providers``) and the legacy
+    single-dict format (``fallback_model``).  The returned list is always a
+    fresh copy — callers can mutate without touching the config dict.
+    """
+    chain = config.get("fallback_providers") or []
+    if isinstance(chain, list):
+        result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
+        if result:
+            return result
+    legacy = config.get("fallback_model")
+    if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
+        return [dict(legacy)]
+    if isinstance(legacy, list):
+        return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
+    return []
+
+
+def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
+    """Persist the chain to ``fallback_providers`` and clear legacy key."""
+    config["fallback_providers"] = chain
+    # Drop the legacy single-dict key on write so there's only one source of truth.
+    if "fallback_model" in config:
+        config.pop("fallback_model", None)
+
+
+def _format_entry(entry: Dict[str, Any]) -> str:
+    """One-line human-readable rendering of a fallback entry."""
+    provider = entry.get("provider", "?")
+    model = entry.get("model", "?")
+    base = entry.get("base_url")
+    suffix = f"  [{base}]" if base else ""
+    return f"{model}  (via {provider}){suffix}"
+
+
+def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
+    """Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
+    if not isinstance(model_cfg, dict):
+        return None
+    provider = (model_cfg.get("provider") or "").strip()
+    # The picker writes the selected model to ``model.default``.
+    model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
+    if not provider or not model:
+        return None
+    entry: Dict[str, Any] = {"provider": provider, "model": model}
+    base_url = (model_cfg.get("base_url") or "").strip()
+    if base_url:
+        entry["base_url"] = base_url
+    api_mode = (model_cfg.get("api_mode") or "").strip()
+    if api_mode:
+        entry["api_mode"] = api_mode
+    return entry
+
+
+def _snapshot_auth_active_provider() -> Any:
+    """Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
+    try:
+        from hermes_cli.auth import _load_auth_store
+        store = _load_auth_store()
+        return store.get("active_provider")
+    except Exception:
+        return None
+
+
+def _restore_auth_active_provider(value: Any) -> None:
+    """Write back a previously snapshotted ``active_provider`` value."""
+    try:
+        from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
+        with _auth_store_lock():
+            store = _load_auth_store()
+            store["active_provider"] = value
+            _save_auth_store(store)
+    except Exception:
+        # Best-effort — if auth.json can't be restored, the user's primary
+        # provider may have been deactivated by the picker.  They can re-run
+        # `hermes model` to fix it.  Don't fail the fallback add.
+        pass
+
+
+# ---------------------------------------------------------------------------
+# Subcommand handlers
+# ---------------------------------------------------------------------------
+
+def cmd_fallback_list(args) -> None:  # noqa: ARG001
+    """Print the current fallback chain."""
+    from hermes_cli.config import load_config
+
+    config = load_config()
+    chain = _read_chain(config)
+
+    print()
+    if not chain:
+        print("  No fallback providers configured.")
+        print()
+        print("  Add one with:  hermes fallback add")
+        print()
+        return
+
+    primary = _describe_primary(config)
+    if primary:
+        print(f"  Primary:   {primary}")
+        print()
+    print(f"  Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
+    for i, entry in enumerate(chain, 1):
+        print(f"    {i}. {_format_entry(entry)}")
+    print()
+    print("  Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
+    print("  Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
+    print()
+
+
+def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
+    """One-line description of the primary model for display purposes."""
+    model_cfg = config.get("model")
+    if isinstance(model_cfg, dict):
+        provider = (model_cfg.get("provider") or "?").strip() or "?"
+        model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
+        return f"{model}  (via {provider})"
+    if isinstance(model_cfg, str) and model_cfg.strip():
+        return model_cfg.strip()
+    return None
+
+
+def cmd_fallback_add(args) -> None:
+    """Launch the same picker as `hermes model`, then append the selection to the chain."""
+    from hermes_cli.main import _require_tty, select_provider_and_model
+    from hermes_cli.config import load_config, save_config
+
+    _require_tty("fallback add")
+
+    # Snapshot BEFORE the picker runs so we can distinguish "user actually
+    # picked something" from "user cancelled" by comparing before/after.
+    before_cfg = load_config()
+    model_before = copy.deepcopy(before_cfg.get("model"))
+    active_provider_before = _snapshot_auth_active_provider()
+
+    print()
+    print("  Adding a fallback provider.  The picker below is the same one used by")
+    print("  `hermes model` — select the provider + model you want as a fallback.")
+    print()
+
+    try:
+        select_provider_and_model(args=args)
+    except SystemExit:
+        # Some provider flows exit on auth failure — restore state and re-raise.
+        _restore_model_cfg(model_before)
+        _restore_auth_active_provider(active_provider_before)
+        raise
+
+    # Read the post-picker state to see what the user selected.
+    after_cfg = load_config()
+    model_after = after_cfg.get("model")
+
+    new_entry = _extract_fallback_from_model_cfg(model_after)
+    if not new_entry:
+        # Picker didn't complete (user cancelled or flow bailed).  Nothing to do.
+        _restore_model_cfg(model_before)
+        _restore_auth_active_provider(active_provider_before)
+        print()
+        print("  No fallback added.")
+        return
+
+    # Picker picked the same thing that's already the primary → nothing changed,
+    # and there's nothing useful to add as a fallback to itself.
+    primary_entry = _extract_fallback_from_model_cfg(model_before)
+    if primary_entry and primary_entry["provider"] == new_entry["provider"] \
+            and primary_entry["model"] == new_entry["model"]:
+        _restore_model_cfg(model_before)
+        _restore_auth_active_provider(active_provider_before)
+        print()
+        print(f"  Selected model matches the current primary ({_format_entry(new_entry)}).")
+        print("  A provider cannot be a fallback for itself — no change.")
+        return
+
+    # Reload the config with the primary restored, then append the new entry
+    # to ``fallback_providers``.  We deliberately re-load (rather than mutating
+    # ``after_cfg``) because the picker may have touched other top-level keys
+    # (custom_providers, providers credentials) that we want to keep.
+    _restore_model_cfg(model_before)
+    _restore_auth_active_provider(active_provider_before)
+
+    final_cfg = load_config()
+    chain = _read_chain(final_cfg)
+
+    # Reject exact-duplicate fallback entries.
+    for existing in chain:
+        if existing.get("provider") == new_entry["provider"] \
+                and existing.get("model") == new_entry["model"]:
+            print()
+            print(f"  {_format_entry(new_entry)} is already in the fallback chain — skipped.")
+            return
+
+    chain.append(new_entry)
+    _write_chain(final_cfg, chain)
+    save_config(final_cfg)
+
+    print()
+    print(f"  Added fallback: {_format_entry(new_entry)}")
+    print(f"  Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
+    print()
+    print("  Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
+
+
+def _restore_model_cfg(model_before: Any) -> None:
+    """Restore ``config["model"]`` to a previously-captured snapshot."""
+    from hermes_cli.config import load_config, save_config
+
+    cfg = load_config()
+    if model_before is None:
+        cfg.pop("model", None)
+    else:
+        cfg["model"] = copy.deepcopy(model_before)
+    save_config(cfg)
+
+
+def cmd_fallback_remove(args) -> None:  # noqa: ARG001
+    """Pick an entry from the chain and remove it."""
+    from hermes_cli.config import load_config, save_config
+
+    config = load_config()
+    chain = _read_chain(config)
+
+    if not chain:
+        print()
+        print("  No fallback providers configured — nothing to remove.")
+        print()
+        return
+
+    choices = [_format_entry(e) for e in chain]
+    choices.append("Cancel")
+
+    try:
+        from hermes_cli.setup import _curses_prompt_choice
+        idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
+    except Exception:
+        idx = _numbered_pick("Select a fallback to remove:", choices)
+
+    if idx is None or idx < 0 or idx >= len(chain):
+        print()
+        print("  Cancelled — no change.")
+        return
+
+    removed = chain.pop(idx)
+    _write_chain(config, chain)
+    save_config(config)
+
+    print()
+    print(f"  Removed fallback: {_format_entry(removed)}")
+    if chain:
+        print(f"  Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
+    else:
+        print("  Fallback chain is now empty.")
+    print()
+
+
+def cmd_fallback_clear(args) -> None:  # noqa: ARG001
+    """Remove all fallback entries (with confirmation)."""
+    from hermes_cli.config import load_config, save_config
+
+    config = load_config()
+    chain = _read_chain(config)
+
+    if not chain:
+        print()
+        print("  No fallback providers configured — nothing to clear.")
+        print()
+        return
+
+    print()
+    print(f"  Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
+    for i, entry in enumerate(chain, 1):
+        print(f"    {i}. {_format_entry(entry)}")
+    print()
+    try:
+        resp = input("  Clear all entries? [y/N]: ").strip().lower()
+    except (KeyboardInterrupt, EOFError):
+        print()
+        print("  Cancelled.")
+        return
+    if resp not in ("y", "yes"):
+        print("  Cancelled — no change.")
+        return
+
+    _write_chain(config, [])
+    save_config(config)
+    print()
+    print("  Fallback chain cleared.")
+    print()
+
+
+def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
+    """Fallback numbered-list picker when curses is unavailable."""
+    print(question)
+    for i, c in enumerate(choices, 1):
+        print(f"  {i}. {c}")
+    print()
+    while True:
+        try:
+            val = input(f"Choice [1-{len(choices)}]: ").strip()
+            if not val:
+                return None
+            idx = int(val) - 1
+            if 0 <= idx < len(choices):
+                return idx
+            print(f"Please enter 1-{len(choices)}")
+        except ValueError:
+            print("Please enter a number")
+        except (KeyboardInterrupt, EOFError):
+            print()
+            return None
+
+
+# ---------------------------------------------------------------------------
+# Dispatch
+# ---------------------------------------------------------------------------
+
+def cmd_fallback(args) -> None:
+    """Top-level dispatcher for ``hermes fallback [subcommand]``."""
+    sub = getattr(args, "fallback_command", None)
+    if sub in (None, "", "list", "ls"):
+        cmd_fallback_list(args)
+    elif sub == "add":
+        cmd_fallback_add(args)
+    elif sub in ("remove", "rm"):
+        cmd_fallback_remove(args)
+    elif sub == "clear":
+        cmd_fallback_clear(args)
+    else:
+        print(f"Unknown fallback subcommand: {sub}")
+        print("Use one of: list, add, remove, clear")
+        raise SystemExit(2)
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -2315,13 +2315,13 @@ def _model_flow_nous(config, current_model="", args=None):
    # The live /models endpoint returns hundreds of models; the curated list
    # shows only agentic models users recognize from OpenRouter.
    from hermes_cli.models import (
-        _PROVIDER_MODELS,
+        get_curated_nous_model_ids,
        get_pricing_for_provider,
        check_nous_free_tier,
        partition_nous_models_by_tier,
    )

-    model_ids = _PROVIDER_MODELS.get("nous", [])
+    model_ids = get_curated_nous_model_ids()
    if not model_ids:
        print("No curated models available for Nous Portal.")
        return
@ -4780,6 +4780,37 @@ def cmd_webhook(args):
    webhook_command(args)


+def cmd_slack(args):
+    """Slack integration helpers.
+
+    Dispatches ``hermes slack <subcommand>``. Currently supports:
+      manifest — print or write a Slack app manifest with every gateway
+                 command registered as a first-class slash.
+    """
+    sub = getattr(args, "slack_command", None)
+    if sub in (None, ""):
+        # No subcommand — print usage hint.
+        print(
+            "usage: hermes slack <subcommand>\n"
+            "\n"
+            "subcommands:\n"
+            "  manifest   Generate a Slack app manifest with every gateway\n"
+            "             command registered as a native slash\n"
+            "\n"
+            "Run `hermes slack manifest -h` for details.",
+            file=sys.stderr,
+        )
+        return 1
+
+    if sub == "manifest":
+        from hermes_cli.slack_cli import slack_manifest_command
+
+        return slack_manifest_command(args)
+
+    print(f"Unknown slack subcommand: {sub}", file=sys.stderr)
+    return 1
+
+
 def cmd_hooks(args):
    """Shell-hook inspection and management."""
    from hermes_cli.hooks import hooks_command
@ -7223,6 +7254,9 @@ Examples:
    hermes auth remove <p> <t>    Remove pooled credential by index, id, or label
    hermes auth reset <provider>  Clear exhaustion status for a provider
    hermes model                  Select default model
+    hermes fallback [list]        Show fallback provider chain
+    hermes fallback add           Add a fallback provider (same picker as `hermes model`)
+    hermes fallback remove        Remove a fallback provider from the chain
    hermes config                 View configuration
    hermes config edit            Edit config in $EDITOR
    hermes config set model gpt-4 Set a config value
@ -7564,6 +7598,42 @@ For more help on a command:
    )
    model_parser.set_defaults(func=cmd_model)

+    # =========================================================================
+    # fallback command — manage the fallback provider chain
+    # =========================================================================
+    from hermes_cli.fallback_cmd import cmd_fallback
+
+    fallback_parser = subparsers.add_parser(
+        "fallback",
+        help="Manage fallback providers (tried when the primary model fails)",
+        description=(
+            "Manage the fallback provider chain.  Fallback providers are tried "
+            "in order when the primary model fails with rate-limit, overload, or "
+            "connection errors.  See: "
+            "https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers"
+        ),
+    )
+    fallback_subparsers = fallback_parser.add_subparsers(dest="fallback_command")
+    fallback_subparsers.add_parser(
+        "list",
+        aliases=["ls"],
+        help="Show the current fallback chain (default when no subcommand)",
+    )
+    fallback_subparsers.add_parser(
+        "add",
+        help="Pick a provider + model (same picker as `hermes model`) and append to the chain",
+    )
+    fallback_subparsers.add_parser(
+        "remove",
+        aliases=["rm"],
+        help="Pick an entry to delete from the chain",
+    )
+    fallback_subparsers.add_parser(
+        "clear",
+        help="Remove all fallback entries",
+    )
+    fallback_parser.set_defaults(func=cmd_fallback)
+
    # =========================================================================
    # gateway command
    # =========================================================================
@ -7759,6 +7829,54 @@ For more help on a command:
    )
    whatsapp_parser.set_defaults(func=cmd_whatsapp)

+    # =========================================================================
+    # slack command
+    # =========================================================================
+    slack_parser = subparsers.add_parser(
+        "slack",
+        help="Slack integration helpers (manifest generation, etc.)",
+        description="Slack integration helpers for Hermes.",
+    )
+    slack_sub = slack_parser.add_subparsers(dest="slack_command")
+    slack_manifest = slack_sub.add_parser(
+        "manifest",
+        help="Print or write a Slack app manifest with every gateway command "
+             "registered as a native slash (/btw, /stop, /model, ...)",
+        description=(
+            "Generate a Slack app manifest that registers every gateway "
+            "command in COMMAND_REGISTRY as a first-class Slack slash "
+            "command (matching Discord and Telegram parity). Paste the "
+            "output into Slack app config → Features → App Manifest → "
+            "Edit, then Save. Reinstall the app if Slack prompts for it."
+        ),
+    )
+    slack_manifest.add_argument(
+        "--write",
+        nargs="?",
+        const=True,
+        default=None,
+        metavar="PATH",
+        help="Write manifest to a file instead of stdout. With no PATH "
+             "writes to $HERMES_HOME/slack-manifest.json.",
+    )
+    slack_manifest.add_argument(
+        "--name",
+        default=None,
+        help='Bot display name (default: "Hermes")',
+    )
+    slack_manifest.add_argument(
+        "--description",
+        default=None,
+        help="Bot description shown in Slack's app directory.",
+    )
+    slack_manifest.add_argument(
+        "--slashes-only",
+        action="store_true",
+        help="Emit only the features.slash_commands array (for merging "
+             "into an existing manifest manually).",
+    )
+    slack_parser.set_defaults(func=cmd_slack)
+
    # =========================================================================
    # login command
    # =========================================================================
@ -8414,6 +8532,12 @@ Examples:
    skills_list.add_argument(
        "--source", default="all", choices=["all", "hub", "builtin", "local"]
    )
+    skills_list.add_argument(
+        "--enabled-only",
+        action="store_true",
+        help="Hide disabled skills. Use with -p <profile> to see exactly "
+             "which skills will load for that profile.",
+    )

    skills_check = skills_subparsers.add_parser(
        "check", help="Check installed hub skills for updates"
--- a/hermes_cli/model_catalog.py
+++ b/hermes_cli/model_catalog.py
@ -0,0 +1,329 @@
+"""Remote model catalog fetcher.
+
+The Hermes docs site hosts a JSON manifest of curated models for providers
+we want to update without shipping a release (currently OpenRouter and
+Nous Portal). This module fetches, validates, and caches that manifest,
+falling back to the in-repo hardcoded lists when the network is unavailable.
+
+Pipeline
+--------
+1. ``get_catalog()`` — returns a parsed manifest dict.
+   - Checks in-process cache (invalidated by TTL).
+   - Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
+   - Fetches the master URL if disk cache is stale or missing.
+   - On any fetch failure, keeps using the stale cache (or empty dict).
+
+2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()`` —
+   thin accessors returning the shapes existing callers expect. Each
+   falls back to the in-repo hardcoded list on any lookup failure.
+
+Schema (version 1)
+------------------
+::
+
+    {
+      "version": 1,
+      "updated_at": "2026-04-25T22:00:00Z",
+      "metadata": {...},                # free-form
+      "providers": {
+        "openrouter": {
+          "metadata": {...},            # free-form
+          "models": [
+            {"id": "vendor/model", "description": "recommended",
+             "metadata": {...}}          # free-form, model-level
+          ]
+        },
+        "nous": {...}
+      }
+    }
+
+Unknown fields are ignored — extra metadata can be added at either level
+without bumping ``version``. ``version`` bumps are reserved for
+breaking changes (renaming ``providers``, changing ``models`` shape).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+from hermes_cli import __version__ as _HERMES_VERSION
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+DEFAULT_CATALOG_URL = (
+    "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
+)
+DEFAULT_TTL_HOURS = 24
+DEFAULT_FETCH_TIMEOUT = 8.0
+SUPPORTED_SCHEMA_VERSION = 1
+
+_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
+
+# In-process cache to avoid repeated disk + parse work across multiple
+# calls within the same session. Invalidated by TTL against the disk file's
+# mtime, so calling code never has to think about this.
+_catalog_cache: dict[str, Any] | None = None
+_catalog_cache_source_mtime: float = 0.0
+
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+
+
+def _load_catalog_config() -> dict[str, Any]:
+    """Load the ``model_catalog`` config block with defaults filled in."""
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config() or {}
+    except Exception:
+        cfg = {}
+
+    raw = cfg.get("model_catalog")
+    if not isinstance(raw, dict):
+        raw = {}
+
+    return {
+        "enabled": bool(raw.get("enabled", True)),
+        "url": str(raw.get("url") or DEFAULT_CATALOG_URL),
+        "ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
+        "providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
+    }
+
+
+def _cache_path() -> Path:
+    """Return the disk cache path. Import lazily so tests can monkeypatch home."""
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "cache" / "model_catalog.json"
+
+
+# ---------------------------------------------------------------------------
+# Fetch + validate + cache
+# ---------------------------------------------------------------------------
+
+
+def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
+    """HTTP GET the manifest URL and return a parsed dict, or None on failure."""
+    try:
+        req = urllib.request.Request(
+            url,
+            headers={
+                "Accept": "application/json",
+                "User-Agent": _HERMES_USER_AGENT,
+            },
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+    except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
+        logger.info("model catalog fetch failed (%s): %s", url, exc)
+        return None
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.info("model catalog fetch errored (%s): %s", url, exc)
+        return None
+
+    if not _validate_manifest(data):
+        logger.info("model catalog at %s failed schema validation", url)
+        return None
+
+    return data
+
+
+def _validate_manifest(data: Any) -> bool:
+    """Return True when ``data`` matches the minimum manifest shape."""
+    if not isinstance(data, dict):
+        return False
+    version = data.get("version")
+    if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
+        # Future schema version we don't understand — refuse rather than
+        # guess. Older schemas (version < 1) aren't supported either.
+        return False
+    providers = data.get("providers")
+    if not isinstance(providers, dict):
+        return False
+    for pname, pblock in providers.items():
+        if not isinstance(pname, str) or not isinstance(pblock, dict):
+            return False
+        models = pblock.get("models")
+        if not isinstance(models, list):
+            return False
+        for m in models:
+            if not isinstance(m, dict):
+                return False
+            if not isinstance(m.get("id"), str) or not m["id"].strip():
+                return False
+    return True
+
+
+def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
+    """Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
+    path = _cache_path()
+    try:
+        mtime = path.stat().st_mtime
+    except (OSError, FileNotFoundError):
+        return (None, 0.0)
+    try:
+        with open(path) as fh:
+            data = json.load(fh)
+    except (OSError, json.JSONDecodeError):
+        return (None, 0.0)
+    if not _validate_manifest(data):
+        return (None, 0.0)
+    return (data, mtime)
+
+
+def _write_disk_cache(data: dict[str, Any]) -> None:
+    path = _cache_path()
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        tmp = path.with_suffix(path.suffix + ".tmp")
+        with open(tmp, "w") as fh:
+            json.dump(data, fh, indent=2)
+            fh.write("\n")
+        os.replace(tmp, path)
+    except OSError as exc:
+        logger.info("model catalog cache write failed: %s", exc)
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
+    """Return the parsed model catalog manifest, or an empty dict on failure.
+
+    Callers should treat a missing provider/model as "use the in-repo fallback"
+    — never raise from this function so the CLI keeps working offline.
+    """
+    global _catalog_cache, _catalog_cache_source_mtime
+
+    cfg = _load_catalog_config()
+    if not cfg["enabled"]:
+        return {}
+
+    ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
+
+    disk_data, disk_mtime = _read_disk_cache()
+    now = time.time()
+    disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
+
+    # In-process cache hit: disk hasn't changed since we loaded it and still fresh.
+    if (
+        not force_refresh
+        and _catalog_cache is not None
+        and disk_data is not None
+        and disk_mtime == _catalog_cache_source_mtime
+        and disk_fresh
+    ):
+        return _catalog_cache
+
+    # Disk is fresh enough — use it without a network hit.
+    if not force_refresh and disk_fresh and disk_data is not None:
+        _catalog_cache = disk_data
+        _catalog_cache_source_mtime = disk_mtime
+        return disk_data
+
+    # Need to (re)fetch. If it fails, fall back to any stale disk copy.
+    fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
+    if fetched is not None:
+        _write_disk_cache(fetched)
+        new_disk_data, new_mtime = _read_disk_cache()
+        if new_disk_data is not None:
+            _catalog_cache = new_disk_data
+            _catalog_cache_source_mtime = new_mtime
+            return new_disk_data
+        _catalog_cache = fetched
+        _catalog_cache_source_mtime = now
+        return fetched
+
+    if disk_data is not None:
+        _catalog_cache = disk_data
+        _catalog_cache_source_mtime = disk_mtime
+        return disk_data
+
+    return {}
+
+
+def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
+    """If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
+    cfg = _load_catalog_config()
+    if not cfg["enabled"]:
+        return None
+    provider_cfg = cfg["providers"].get(provider)
+    if not isinstance(provider_cfg, dict):
+        return None
+    override_url = provider_cfg.get("url")
+    if not isinstance(override_url, str) or not override_url.strip():
+        return None
+    # Override fetches skip the disk cache because they're usually
+    # third-party self-hosted. Re-request on every call but with a short
+    # timeout so they don't block the picker.
+    return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
+
+
+def _get_provider_block(provider: str) -> dict[str, Any] | None:
+    """Return the provider's manifest block, respecting per-provider overrides."""
+    override = _fetch_provider_override(provider)
+    if override is not None:
+        block = override.get("providers", {}).get(provider)
+        if isinstance(block, dict):
+            return block
+
+    catalog = get_catalog()
+    if not catalog:
+        return None
+    block = catalog.get("providers", {}).get(provider)
+    return block if isinstance(block, dict) else None
+
+
+def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
+    """Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
+
+    Returns ``None`` when the manifest is unavailable, so callers can fall
+    back to their hardcoded list.
+    """
+    block = _get_provider_block("openrouter")
+    if not block:
+        return None
+    out: list[tuple[str, str]] = []
+    for m in block.get("models", []):
+        mid = str(m.get("id") or "").strip()
+        if not mid:
+            continue
+        desc = str(m.get("description") or "")
+        out.append((mid, desc))
+    return out or None
+
+
+def get_curated_nous_models() -> list[str] | None:
+    """Return Nous Portal's curated list of model ids from the manifest.
+
+    Returns ``None`` when the manifest is unavailable.
+    """
+    block = _get_provider_block("nous")
+    if not block:
+        return None
+    out: list[str] = []
+    for m in block.get("models", []):
+        mid = str(m.get("id") or "").strip()
+        if mid:
+            out.append(mid)
+    return out or None
+
+
+def reset_cache() -> None:
+    """Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
+    global _catalog_cache, _catalog_cache_source_mtime
+    _catalog_cache = None
+    _catalog_cache_source_mtime = 0.0
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -876,7 +876,16 @@ def fetch_openrouter_models(
    if _openrouter_catalog_cache is not None and not force_refresh:
        return list(_openrouter_catalog_cache)

-    fallback = list(OPENROUTER_MODELS)
+    # Prefer the remotely-hosted catalog manifest; fall back to the in-repo
+    # snapshot when the manifest is unreachable. Both are curated lists that
+    # drive the picker; the OpenRouter live /v1/models filter (tool support,
+    # free pricing) is applied on top either way.
+    try:
+        from hermes_cli.model_catalog import get_curated_openrouter_models
+        remote = get_curated_openrouter_models()
+    except Exception:
+        remote = None
+    fallback = list(remote) if remote else list(OPENROUTER_MODELS)
    preferred_ids = [mid for mid, _ in fallback]

    try:
@ -929,6 +938,24 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
    return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]


+def get_curated_nous_model_ids() -> list[str]:
+    """Return the curated Nous Portal model-id list.
+
+    Prefers the remotely-hosted catalog manifest (published under
+    ``website/static/api/model-catalog.json``); falls back to the in-repo
+    snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is
+    unreachable. Always returns a list (never None).
+    """
+    try:
+        from hermes_cli.model_catalog import get_curated_nous_models
+        remote = get_curated_nous_models()
+    except Exception:
+        remote = None
+    if remote:
+        return list(remote)
+    return list(_PROVIDER_MODELS.get("nous", []))
+
+
 def _ai_gateway_model_is_free(pricing: Any) -> bool:
    """Return True if an AI Gateway model has $0 input AND output pricing."""
    if not isinstance(pricing, dict):
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -1856,27 +1856,32 @@ def _setup_slack():
    if existing:
        print_info("Slack: already configured")
        if not prompt_yes_no("Reconfigure Slack?", False):
+            # Even without reconfiguring, offer to refresh the manifest so
+            # new commands (e.g. /btw, /stop, ...) get registered in Slack.
+            if prompt_yes_no(
+                "Regenerate the Slack app manifest with the latest command "
+                "list? (recommended after `hermes update`)",
+                True,
+            ):
+                _write_slack_manifest_and_instruct()
            return

    print_info("Steps to create a Slack app:")
-    print_info("   1. Go to https://api.slack.com/apps → Create New App (from scratch)")
+    print_info("   1. Go to https://api.slack.com/apps → Create New App")
+    print_info("      Pick 'From an app manifest' — we'll generate one for you below.")
    print_info("   2. Enable Socket Mode: Settings → Socket Mode → Enable")
    print_info("      • Create an App-Level Token with 'connections:write' scope")
-    print_info("   3. Add Bot Token Scopes: Features → OAuth & Permissions")
-    print_info("      Required scopes: chat:write, app_mentions:read,")
-    print_info("      channels:history, channels:read, im:history,")
-    print_info("      im:read, im:write, users:read, files:read, files:write")
-    print_info("      Optional for private channels: groups:history")
-    print_info("   4. Subscribe to Events: Features → Event Subscriptions → Enable")
-    print_info("      Required events: message.im, message.channels, app_mention")
-    print_info("      Optional for private channels: message.groups")
-    print_warning("   ⚠ Without message.channels the bot will ONLY work in DMs,")
-    print_warning("     not public channels.")
-    print_info("   5. Install to Workspace: Settings → Install App")
-    print_info("   6. Reinstall the app after any scope or event changes")
-    print_info("   7. After installing, invite the bot to channels: /invite @YourBot")
+    print_info("   3. Install to Workspace: Settings → Install App")
+    print_info("   4. After installing, invite the bot to channels: /invite @YourBot")
    print()
    print_info("   Full guide: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/slack/")
+    print()
+
+    # Generate and write manifest up-front so the user can paste it into
+    # the "Create from manifest" flow instead of clicking through scopes /
+    # events / slash commands one at a time.
+    _write_slack_manifest_and_instruct()
+
    print()
    bot_token = prompt("Slack Bot Token (xoxb-...)", password=True)
    if not bot_token:
@ -1902,6 +1907,49 @@ def _setup_slack():
        print_info("   Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access.")


+def _write_slack_manifest_and_instruct():
+    """Generate the Slack manifest, write it under HERMES_HOME, and print
+    paste-into-Slack instructions.
+
+    Exposed as its own helper so both the initial setup flow and the
+    "reconfigure? → no" branch can refresh the manifest without the user
+    re-entering tokens. Failures are non-fatal — if the manifest write
+    fails for any reason, we print a warning and skip rather than abort
+    the whole Slack setup.
+    """
+    try:
+        from hermes_cli.slack_cli import _build_full_manifest
+        from hermes_constants import get_hermes_home
+
+        manifest = _build_full_manifest(
+            bot_name="Hermes",
+            bot_description="Your Hermes agent on Slack",
+        )
+        target = Path(get_hermes_home()) / "slack-manifest.json"
+        target.parent.mkdir(parents=True, exist_ok=True)
+        import json as _json
+        target.write_text(
+            _json.dumps(manifest, indent=2, ensure_ascii=False) + "\n",
+            encoding="utf-8",
+        )
+        print_success(f"Slack app manifest written to: {target}")
+        print_info(
+            "   Paste it into https://api.slack.com/apps → your app → Features "
+            "→ App Manifest → Edit, then Save.  Slack will prompt to "
+            "reinstall if scopes or slash commands changed."
+        )
+        print_info(
+            "   Re-run `hermes slack manifest --write` anytime to refresh after "
+            "Hermes adds new commands."
+        )
+    except Exception as exc:  # pragma: no cover - best-effort UX helper
+        print_warning(f"Couldn't write Slack manifest: {exc}")
+        print_info(
+            "   You can generate it manually later with: "
+            "hermes slack manifest --write"
+        )
+
+
 def _setup_matrix():
    """Configure Matrix credentials."""
    print_header("Matrix")
--- a/hermes_cli/skills_hub.py
+++ b/hermes_cli/skills_hub.py
@ -599,11 +599,24 @@ def inspect_skill(identifier: str) -> Optional[dict]:
    return out


-def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None:
-    """List installed skills, distinguishing hub, builtin, and local skills."""
+def do_list(source_filter: str = "all",
+            enabled_only: bool = False,
+            console: Optional[Console] = None) -> None:
+    """List installed skills, distinguishing hub, builtin, and local skills.
+
+    Args:
+        source_filter: ``all`` | ``hub`` | ``builtin`` | ``local``.
+        enabled_only: If True, hide disabled skills from the output.
+
+    Enabled/disabled state is resolved against the currently active profile's
+    config — ``hermes -p <profile> skills list`` reads that profile's
+    ``skills.disabled`` list because ``-p`` swaps ``HERMES_HOME`` at process
+    start.  No explicit profile flag needed here.
+    """
    from tools.skills_hub import HubLockFile, ensure_hub_dirs
    from tools.skills_sync import _read_manifest
    from tools.skills_tool import _find_all_skills
+    from agent.skill_utils import get_disabled_skill_names

    c = console or _console
    ensure_hub_dirs()
@ -611,17 +624,26 @@ def do_list(source_filter: str = "all", console: Optional[Console] = None) -> No
    hub_installed = {e["name"]: e for e in lock.list_installed()}
    builtin_names = set(_read_manifest())

-    all_skills = _find_all_skills()
+    # Pull ALL skills (including disabled ones) so we can annotate status.
+    all_skills = _find_all_skills(skip_disabled=True)
+    disabled_names = get_disabled_skill_names()

-    table = Table(title="Installed Skills")
+    title = "Installed Skills"
+    if enabled_only:
+        title += " (enabled only)"
+
+    table = Table(title=title)
    table.add_column("Name", style="bold cyan")
    table.add_column("Category", style="dim")
    table.add_column("Source", style="dim")
    table.add_column("Trust", style="dim")
+    table.add_column("Status", style="dim")

    hub_count = 0
    builtin_count = 0
    local_count = 0
+    enabled_count = 0
+    disabled_count = 0

    for skill in sorted(all_skills, key=lambda s: (s.get("category") or "", s["name"])):
        name = skill["name"]
@ -632,29 +654,48 @@ def do_list(source_filter: str = "all", console: Optional[Console] = None) -> No
            source_type = "hub"
            source_display = hub_entry.get("source", "hub")
            trust = hub_entry.get("trust_level", "community")
-            hub_count += 1
        elif name in builtin_names:
            source_type = "builtin"
            source_display = "builtin"
            trust = "builtin"
-            builtin_count += 1
        else:
            source_type = "local"
            source_display = "local"
            trust = "local"
-            local_count += 1

        if source_filter != "all" and source_filter != source_type:
            continue

+        is_enabled = name not in disabled_names
+        if enabled_only and not is_enabled:
+            continue
+
+        if source_type == "hub":
+            hub_count += 1
+        elif source_type == "builtin":
+            builtin_count += 1
+        else:
+            local_count += 1
+
+        if is_enabled:
+            enabled_count += 1
+            status_cell = "[bold green]enabled[/]"
+        else:
+            disabled_count += 1
+            status_cell = "[dim red]disabled[/]"
+
        trust_style = {"builtin": "bright_cyan", "trusted": "green", "community": "yellow", "local": "dim"}.get(trust, "dim")
        trust_label = "official" if source_display == "official" else trust
-        table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]")
+        table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]", status_cell)

    c.print(table)
-    c.print(
-        f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local[/]\n"
-    )
+    summary = f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local"
+    if enabled_only:
+        summary += f" — {enabled_count} enabled shown"
+    else:
+        summary += f" — {enabled_count} enabled, {disabled_count} disabled"
+    summary += "[/]\n"
+    c.print(summary)


 def do_check(name: Optional[str] = None, console: Optional[Console] = None) -> None:
@ -1127,7 +1168,10 @@ def skills_command(args) -> None:
    elif action == "inspect":
        do_inspect(args.identifier)
    elif action == "list":
-        do_list(source_filter=args.source)
+        do_list(
+            source_filter=args.source,
+            enabled_only=getattr(args, "enabled_only", False),
+        )
    elif action == "check":
        do_check(name=getattr(args, "name", None))
    elif action == "update":
@ -1279,11 +1323,12 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:

    elif action == "list":
        source_filter = "all"
+        enabled_only = "--enabled-only" in args or "--enabled" in args
        if "--source" in args:
            idx = args.index("--source")
            if idx + 1 < len(args):
                source_filter = args[idx + 1]
-        do_list(source_filter=source_filter, console=c)
+        do_list(source_filter=source_filter, enabled_only=enabled_only, console=c)

    elif action == "check":
        name = args[0] if args else None
@ -1371,7 +1416,8 @@ def _print_skills_help(console: Console) -> None:
        "  [cyan]search[/] <query>              Search registries for skills\n"
        "  [cyan]install[/] <identifier>        Install a skill (with security scan)\n"
        "  [cyan]inspect[/] <identifier>        Preview a skill without installing\n"
-        "  [cyan]list[/] [--source hub|builtin|local] List installed skills\n"
+        "  [cyan]list[/] [--source hub|builtin|local] [--enabled-only]\n"
+        "       List installed skills; --enabled-only filters to the active profile's live set\n"
        "  [cyan]check[/] [name]                Check hub skills for upstream updates\n"
        "  [cyan]update[/] [name]               Update hub skills with upstream changes\n"
        "  [cyan]audit[/] [name]                Re-scan hub skills for security\n"
--- a/hermes_cli/slack_cli.py
+++ b/hermes_cli/slack_cli.py
@ -0,0 +1,152 @@
+"""``hermes slack ...`` CLI subcommands.
+
+Today only ``hermes slack manifest`` is implemented — it generates the
+Slack app manifest JSON for registering every gateway command as a native
+Slack slash (``/btw``, ``/stop``, ``/model``, …) so users get the same
+first-class slash UX Discord and Telegram already have.
+
+Typical workflow::
+
+    $ hermes slack manifest > slack-manifest.json
+    # or:
+    $ hermes slack manifest --write
+
+Then paste the printed JSON into the Slack app config (Features → App
+Manifest → Edit) and click Save. Slack diffs the manifest and prompts
+for reinstall when scopes/commands change.
+"""
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+
+def _build_full_manifest(bot_name: str, bot_description: str) -> dict:
+    """Build a full Slack manifest merging display info + our slash list.
+
+    The slash-command list is always generated from ``COMMAND_REGISTRY`` so
+    it stays in sync with the rest of Hermes. Other manifest sections
+    (display info, OAuth scopes, socket mode) are set to sensible defaults
+    for a Hermes deployment — users can tweak them in the Slack UI after
+    pasting.
+    """
+    from hermes_cli.commands import slack_app_manifest
+
+    partial = slack_app_manifest()
+    slashes = partial["features"]["slash_commands"]
+
+    return {
+        "_metadata": {
+            "major_version": 1,
+            "minor_version": 1,
+        },
+        "display_information": {
+            "name": bot_name[:35],
+            "description": (bot_description or "Your Hermes agent on Slack")[:140],
+            "background_color": "#1a1a2e",
+        },
+        "features": {
+            "bot_user": {
+                "display_name": bot_name[:80],
+                "always_online": True,
+            },
+            "slash_commands": slashes,
+            "assistant_view": {
+                "assistant_description": "Chat with Hermes in threads and DMs.",
+            },
+        },
+        "oauth_config": {
+            "scopes": {
+                "bot": [
+                    "app_mentions:read",
+                    "assistant:write",
+                    "channels:history",
+                    "channels:read",
+                    "chat:write",
+                    "commands",
+                    "files:read",
+                    "files:write",
+                    "groups:history",
+                    "im:history",
+                    "im:read",
+                    "im:write",
+                    "users:read",
+                ],
+            },
+        },
+        "settings": {
+            "event_subscriptions": {
+                "bot_events": [
+                    "app_mention",
+                    "assistant_thread_context_changed",
+                    "assistant_thread_started",
+                    "message.channels",
+                    "message.groups",
+                    "message.im",
+                ],
+            },
+            "interactivity": {
+                "is_enabled": True,
+            },
+            "org_deploy_enabled": False,
+            "socket_mode_enabled": True,
+            "token_rotation_enabled": False,
+        },
+    }
+
+
+def slack_manifest_command(args) -> int:
+    """Print or write a Slack app manifest JSON.
+
+    Flags (all parsed in ``hermes_cli/main.py``):
+      --write [PATH]  Write to file instead of stdout (default path:
+                      ``$HERMES_HOME/slack-manifest.json``)
+      --name NAME     Override the bot display name (default: "Hermes")
+      --description DESC  Override the bot description
+      --slashes-only  Emit only the ``features.slash_commands`` array (for
+                      merging into an existing manifest manually)
+    """
+    name = getattr(args, "name", None) or "Hermes"
+    description = getattr(args, "description", None) or "Your Hermes agent on Slack"
+
+    if getattr(args, "slashes_only", False):
+        from hermes_cli.commands import slack_app_manifest
+
+        manifest = slack_app_manifest()["features"]["slash_commands"]
+    else:
+        manifest = _build_full_manifest(name, description)
+
+    payload = json.dumps(manifest, indent=2, ensure_ascii=False) + "\n"
+
+    write_target = getattr(args, "write", None)
+    if write_target is not None:
+        if isinstance(write_target, bool) and write_target:
+            # --write with no value → default location
+            try:
+                from hermes_constants import get_hermes_home
+
+                target = Path(get_hermes_home()) / "slack-manifest.json"
+            except Exception:
+                target = Path.home() / ".hermes" / "slack-manifest.json"
+        else:
+            target = Path(write_target).expanduser()
+        target.parent.mkdir(parents=True, exist_ok=True)
+        target.write_text(payload, encoding="utf-8")
+        print(f"Slack manifest written to: {target}", file=sys.stderr)
+        print(
+            "\nNext steps:\n"
+            "  1. Open https://api.slack.com/apps and pick your Hermes app\n"
+            "     (or create a new one: Create New App → From an app manifest).\n"
+            f"  2. Features → App Manifest → paste the contents of\n"
+            f"     {target}\n"
+            "  3. Save; Slack will prompt to reinstall the app if scopes or\n"
+            "     slash commands changed.\n"
+            "  4. Make sure Socket Mode is enabled and you have a bot token\n"
+            "     (xoxb-...) and app token (xapp-...) configured via\n"
+            "     `hermes setup`.\n",
+            file=sys.stderr,
+        )
+    else:
+        sys.stdout.write(payload)
+    return 0
--- a/hermes_cli/tips.py
+++ b/hermes_cli/tips.py
@ -10,8 +10,7 @@ import random

 TIPS = [
    # --- Slash Commands ---
-    "/btw <question> asks a quick side question without tools or history — great for clarifications.",
-    "/background <prompt> runs a task in a separate session while your current one stays free.",
+    "/background <prompt> (alias /bg or /btw) runs a task in a separate session while your current one stays free.",
    "/branch forks the current session so you can explore a different direction without losing progress.",
    "/compress manually compresses conversation context when things get long.",
    "/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",
--- a/hermes_state.py
+++ b/hermes_state.py
@ -832,7 +832,18 @@ class SessionDB:
        params = []

        if not include_children:
-            where_clauses.append("s.parent_session_id IS NULL")
+            # Show root sessions and branch sessions (whose parent ended with
+            # end_reason='branched' before the child was created), while still
+            # hiding sub-agent runs and compression continuations (which also
+            # carry a parent_session_id but were spawned while the parent was
+            # still live — i.e., started_at < parent.ended_at).
+            where_clauses.append(
+                "(s.parent_session_id IS NULL"
+                " OR EXISTS (SELECT 1 FROM sessions p"
+                "            WHERE p.id = s.parent_session_id"
+                "            AND p.end_reason = 'branched'"
+                "            AND s.started_at >= p.ended_at))"
+            )

        if source:
            where_clauses.append("s.source = ?")
--- a/run_agent.py
+++ b/run_agent.py
@ -892,7 +892,6 @@ class AIAgent:
        checkpoints_enabled: bool = False,
        checkpoint_max_snapshots: int = 50,
        pass_session_id: bool = False,
-        persist_session: bool = True,
    ):
        """
        Initialize the AI Agent.
@ -964,7 +963,6 @@ class AIAgent:
        self.background_review_callback = None  # Optional sync callback for gateway delivery
        self.skip_context_files = skip_context_files
        self.pass_session_id = pass_session_id
-        self.persist_session = persist_session
        self._credential_pool = credential_pool
        self.log_prefix_chars = log_prefix_chars
        self.log_prefix = f"{log_prefix} " if log_prefix else ""
@ -3109,13 +3107,28 @@ class AIAgent:
    )

    _SKILL_REVIEW_PROMPT = (
-        "Review the conversation above and consider saving or updating a skill if appropriate.\n\n"
-        "Focus on: was a non-trivial approach used to complete a task that required trial "
-        "and error, or changing course due to experiential findings along the way, or did "
-        "the user expect or desire a different method or outcome?\n\n"
-        "If a relevant skill already exists, update it with what you learned. "
-        "Otherwise, create a new skill if the approach is reusable.\n"
-        "If nothing is worth saving, just say 'Nothing to save.' and stop."
+        "Review the conversation above and consider whether a skill should be saved or updated.\n\n"
+        "Work in this order — do not skip steps:\n\n"
+        "1. SURVEY the existing skill landscape first. Call skills_list to see what you "
+        "have. If anything looks potentially relevant, skill_view it before deciding. "
+        "You are looking for the CLASS of task that just happened, not the exact task. "
+        "Example: a successful Tauri build is in the class \"desktop app build "
+        "troubleshooting\", not \"fix my specific Tauri error today\".\n\n"
+        "2. THINK CLASS-FIRST. What general pattern of task did the user just complete? "
+        "What conditions will trigger this pattern again? Describe the class in one "
+        "sentence before looking at what to save.\n\n"
+        "3. PREFER GENERALIZING AN EXISTING SKILL over creating a new one. If a skill "
+        "already covers the class — even partially — update it (skill_manage patch) "
+        "with the new insight. Broaden its \"when to use\" trigger if needed.\n\n"
+        "4. ONLY CREATE A NEW SKILL when no existing skill reasonably covers the class. "
+        "When you create one, name and scope it at the class level "
+        "(\"react-i18n-setup\", not \"add-i18n-to-my-dashboard-app\"). The trigger "
+        "section must describe the class of situations, not this one session.\n\n"
+        "5. If you notice two existing skills that overlap, note it in your response "
+        "so a future review can consolidate them. Do not consolidate now unless the "
+        "overlap is obvious and low-risk.\n\n"
+        "Only act when something is genuinely worth saving. "
+        "If nothing stands out, just say 'Nothing to save.' and stop."
    )

    _COMBINED_REVIEW_PROMPT = (
@ -3125,9 +3138,16 @@ class AIAgent:
        "about how you should behave, their work style, or ways they want you to operate? "
        "If so, save using the memory tool.\n\n"
        "**Skills**: Was a non-trivial approach used to complete a task that required trial "
-        "and error, or changing course due to experiential findings along the way, or did "
-        "the user expect or desire a different method or outcome? If a relevant skill "
-        "already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n"
+        "and error, changing course due to experiential findings, or a different method "
+        "or outcome than the user expected? If so, work in this order:\n"
+        "  a. SURVEY existing skills first (skills_list, then skill_view on candidates).\n"
+        "  b. Identify the CLASS of task, not the specific task "
+        "(\"desktop app build troubleshooting\", not \"fix my Tauri error\").\n"
+        "  c. PREFER UPDATING/GENERALIZING an existing skill that covers the class.\n"
+        "  d. ONLY CREATE A NEW SKILL if no existing one covers the class. Scope at "
+        "the class level, not this one session.\n"
+        "  e. If you notice overlapping skills during the survey, note it so a future "
+        "review can consolidate them.\n\n"
        "Only act if there's something genuinely worth saving. "
        "If nothing stands out, just say 'Nothing to save.' and stop."
    )
@ -3225,12 +3245,25 @@ class AIAgent:
                with open(os.devnull, "w") as _devnull, \
                     contextlib.redirect_stdout(_devnull), \
                     contextlib.redirect_stderr(_devnull):
+                    # Inherit the parent agent's live runtime (provider, model,
+                    # base_url, api_key, api_mode) so the fork uses the exact
+                    # same credentials the main turn is using.  Without this,
+                    # AIAgent.__init__ re-runs auto-resolution from env vars,
+                    # which fails for OAuth-only providers, session-scoped
+                    # creds, or credential-pool setups where the resolver can't
+                    # reconstruct auth from scratch -- producing the spurious
+                    # "No LLM provider configured" warning at end of turn.
+                    _parent_runtime = self._current_main_runtime()
                    review_agent = AIAgent(
                        model=self.model,
                        max_iterations=8,
                        quiet_mode=True,
                        platform=self.platform,
                        provider=self.provider,
+                        api_mode=_parent_runtime.get("api_mode") or None,
+                        base_url=_parent_runtime.get("base_url") or None,
+                        api_key=_parent_runtime.get("api_key") or None,
+                        credential_pool=getattr(self, "_credential_pool", None),
                        parent_session_id=self.session_id,
                    )
                    review_agent._memory_write_origin = "background_review"
@ -3331,10 +3364,7 @@ class AIAgent:
        """Save session state to both JSON log and SQLite on any exit path.

        Ensures conversations are never lost, even on errors or early returns.
-        Skipped when ``persist_session=False`` (ephemeral helper flows).
        """
-        if not self.persist_session:
-            return
        self._apply_persist_user_message_override(messages)
        self._session_messages = messages
        self._save_session_log(messages)
@ -7851,7 +7881,17 @@ class AIAgent:
            api_msg["reasoning_content"] = existing
            return

-        # 2. DeepSeek / Kimi thinking mode: tool-call turns that lack
+        # 2. Healthy session: promote 'reasoning' field to 'reasoning_content'
+        # for providers that use the internal 'reasoning' key.
+        # This must happen BEFORE the DeepSeek/Kimi tool-call check so that
+        # genuine reasoning content is not overwritten by the empty-string
+        # fallback (#15812 regression in PR #15478).
+        normalized_reasoning = source_msg.get("reasoning")
+        if isinstance(normalized_reasoning, str) and normalized_reasoning:
+            api_msg["reasoning_content"] = normalized_reasoning
+            return
+
+        # 3. DeepSeek / Kimi thinking mode: tool-call turns that lack
        # reasoning_content are "poisoned history" — a prior provider (MiniMax,
        # etc.) left them empty. DeepSeek returns HTTP 400 if reasoning_content
        # is absent on replay; inject "" to satisfy the provider's requirement
@ -7867,13 +7907,6 @@ class AIAgent:
            api_msg["reasoning_content"] = ""
            return

-        # 3. Healthy session: promote 'reasoning' field to 'reasoning_content'
-        # for providers that use the internal 'reasoning' key.
-        normalized_reasoning = source_msg.get("reasoning")
-        if isinstance(normalized_reasoning, str) and normalized_reasoning:
-            api_msg["reasoning_content"] = normalized_reasoning
-            return
-
        # 4. DeepSeek / Kimi thinking mode: all assistant messages need
        # reasoning_content. Inject "" to satisfy the provider's requirement
        # when no explicit reasoning content is present.
@ -11007,36 +11040,69 @@ class AIAgent:
                                continue

                    # ── Nous Portal: record rate limit & skip retries ─────
-                    # When Nous returns a 429, record the reset time to a
-                    # shared file so ALL sessions (cron, gateway, auxiliary)
-                    # know not to pile on.  Then skip further retries —
-                    # each one burns another RPH request and deepens the
-                    # rate limit hole.  The retry loop's top-of-iteration
-                    # guard will catch this on the next pass and try
-                    # fallback or bail with a clear message.
+                    # When Nous returns a 429 that is a genuine account-
+                    # level rate limit, record the reset time to a shared
+                    # file so ALL sessions (cron, gateway, auxiliary) know
+                    # not to pile on, then skip further retries -- each
+                    # one burns another RPH request and deepens the hole.
+                    # The retry loop's top-of-iteration guard will catch
+                    # this on the next pass and try fallback or bail.
+                    #
+                    # IMPORTANT: Nous Portal multiplexes multiple upstream
+                    # providers (DeepSeek, Kimi, MiMo, Hermes).  A 429 can
+                    # also mean an UPSTREAM provider is out of capacity
+                    # for one specific model -- transient, clears in
+                    # seconds, nothing to do with the caller's quota.
+                    # Tripping the cross-session breaker on that would
+                    # block every Nous model for minutes.  We use
+                    # ``is_genuine_nous_rate_limit`` to tell the two
+                    # apart via the 429's own x-ratelimit-* headers and
+                    # the last-known-good state captured on the previous
+                    # successful response.
                    if (
                        is_rate_limited
                        and self.provider == "nous"
                        and classified.reason == FailoverReason.rate_limit
                        and not recovered_with_pool
                    ):
+                        _genuine_nous_rate_limit = False
                        try:
-                            from agent.nous_rate_guard import record_nous_rate_limit
+                            from agent.nous_rate_guard import (
+                                is_genuine_nous_rate_limit,
+                                record_nous_rate_limit,
+                            )
                            _err_resp = getattr(api_error, "response", None)
                            _err_hdrs = (
                                getattr(_err_resp, "headers", None)
                                if _err_resp else None
                            )
-                            record_nous_rate_limit(
+                            _genuine_nous_rate_limit = is_genuine_nous_rate_limit(
                                headers=_err_hdrs,
-                                error_context=error_context,
+                                last_known_state=self._rate_limit_state,
                            )
+                            if _genuine_nous_rate_limit:
+                                record_nous_rate_limit(
+                                    headers=_err_hdrs,
+                                    error_context=error_context,
+                                )
+                            else:
+                                logging.info(
+                                    "Nous 429 looks like upstream capacity "
+                                    "(no exhausted bucket in headers or "
+                                    "last-known state) -- not tripping "
+                                    "cross-session breaker."
+                                )
                        except Exception:
                            pass
-                        # Skip straight to max_retries — the top-of-loop
-                        # guard will handle fallback or bail cleanly.
-                        retry_count = max_retries
-                        continue
+                        if _genuine_nous_rate_limit:
+                            # Skip straight to max_retries -- the
+                            # top-of-loop guard will handle fallback or
+                            # bail cleanly.
+                            retry_count = max_retries
+                            continue
+                        # Upstream capacity 429: fall through to normal
+                        # retry logic.  A different model (or the same
+                        # model a moment later) will typically succeed.

                    is_payload_too_large = (
                        classified.reason == FailoverReason.payload_too_large
--- a/scripts/build_model_catalog.py
+++ b/scripts/build_model_catalog.py
@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""Build the Hermes Model Catalog — a centralized JSON manifest of curated models.
+
+This script reads the in-repo hardcoded curated lists (``OPENROUTER_MODELS``,
+``_PROVIDER_MODELS["nous"]``) and writes them to a JSON manifest that the
+Hermes CLI fetches at runtime. Publishing the catalog through the docs site
+lets maintainers update model lists without shipping a Hermes release.
+
+The runtime fetcher falls back to the same in-repo hardcoded lists if the
+manifest is unreachable, so this script is a convenience for keeping the
+manifest in sync — not a source of truth.
+
+Usage::
+
+    python scripts/build_model_catalog.py
+
+Output: ``website/static/api/model-catalog.json``
+
+Live URL (after ``deploy-site.yml`` runs on merge to main):
+``https://hermes-agent.nousresearch.com/docs/api/model-catalog.json``
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from datetime import datetime, timezone
+
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, REPO_ROOT)
+
+# Ensure HERMES_HOME is set for imports that touch it at module level.
+os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
+
+from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS  # noqa: E402
+
+OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "model-catalog.json")
+CATALOG_VERSION = 1
+
+
+def build_catalog() -> dict:
+    return {
+        "version": CATALOG_VERSION,
+        "updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "metadata": {
+            "source": "hermes-agent repo",
+            "docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog",
+        },
+        "providers": {
+            "openrouter": {
+                "metadata": {
+                    "display_name": "OpenRouter",
+                    "note": (
+                        "Descriptions drive picker badges. Live /api/v1/models "
+                        "filters curated ids by tool-calling support and free pricing."
+                    ),
+                },
+                "models": [
+                    {"id": mid, "description": desc}
+                    for mid, desc in OPENROUTER_MODELS
+                ],
+            },
+            "nous": {
+                "metadata": {
+                    "display_name": "Nous Portal",
+                    "note": (
+                        "Free-tier gating is determined live via Portal pricing "
+                        "(partition_nous_models_by_tier), not this manifest."
+                    ),
+                },
+                "models": [
+                    {"id": mid}
+                    for mid in _PROVIDER_MODELS.get("nous", [])
+                ],
+            },
+        },
+    }
+
+
+def main() -> int:
+    catalog = build_catalog()
+    os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
+    with open(OUTPUT_PATH, "w") as fh:
+        json.dump(catalog, fh, indent=2)
+        fh.write("\n")
+
+    print(f"Wrote {OUTPUT_PATH}")
+    for provider, block in catalog["providers"].items():
+        print(f"  {provider}: {len(block['models'])} models")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/release.py
+++ b/scripts/release.py
@ -43,6 +43,7 @@ AUTHOR_MAP = {
    "teknium1@gmail.com": "teknium1",
    "teknium@nousresearch.com": "teknium1",
    "127238744+teknium1@users.noreply.github.com": "teknium1",
+    "focusflow.app.help@gmail.com": "yes999zc",
    "343873859@qq.com": "DrStrangerUJN",
    "uzmpsk.dilekakbas@gmail.com": "dlkakbs",
    "jefferson@heimdallstrategy.com": "Mind-Dragon",
@ -69,6 +70,8 @@ AUTHOR_MAP = {
    "keira.voss94@gmail.com": "keiravoss94",
    "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
    "fqsy1416@gmail.com": "EKKOLearnAI",
+    "octo-patch@github.com": "octo-patch",
+    "math0r-be@github.com": "math0r-be",
    "simbamax99@gmail.com": "simbam99",
    "iris@growthpillars.co": "irispillars",
    "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
@ -118,6 +121,7 @@ AUTHOR_MAP = {
    "nocoo@users.noreply.github.com": "nocoo",
    "30841158+n-WN@users.noreply.github.com": "n-WN",
    "tsuijinglei@gmail.com": "hiddenpuppy",
+    "buraysandro9@gmail.com": "ygd58",
    "jerome@clawwork.ai": "HiddenPuppy",
    "jerome.benoit@sap.com": "jerome-benoit",
    "wysie@users.noreply.github.com": "Wysie",
--- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md
+++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md
@ -281,7 +281,6 @@ Type these during an interactive chat session.
 ### Utility
 ```
 /branch (/fork)      Branch the current session
-/btw                 Ephemeral side question (doesn't interrupt main task)
 /fast                Toggle priority/fast processing
 /browser             Open CDP browser connection
 /history             Show conversation history (CLI)
--- a/skills/feeds/DESCRIPTION.md
+++ b/skills/feeds/DESCRIPTION.md
@ -1,3 +0,0 @@
---
-description: Skills for monitoring, aggregating, and processing RSS feeds, blogs, and web content sources.
---
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@ -192,6 +192,43 @@ class TestDefaultContextLengths:
                    f"{model_id}: expected {expected_ctx}, got {actual}"
                )

+    def test_deepseek_v4_models_1m_context(self):
+        from agent.model_metadata import get_model_context_length
+        from unittest.mock import patch as mock_patch
+
+        expected_keys = {
+            "deepseek-v4-pro": 1_000_000,
+            "deepseek-v4-flash": 1_000_000,
+            "deepseek-chat": 1_000_000,
+            "deepseek-reasoner": 1_000_000,
+        }
+        for key, value in expected_keys.items():
+            assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing"
+            assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
+                f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
+            )
+
+        # Longest-first substring matching must resolve both the bare V4
+        # ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter
+        # / Nous Portal) to 1M without probing down to the legacy 128K
+        # ``deepseek`` substring fallback.
+        with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
+             mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
+             mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
+            cases = [
+                ("deepseek-v4-pro", 1_000_000),
+                ("deepseek-v4-flash", 1_000_000),
+                ("deepseek/deepseek-v4-pro", 1_000_000),
+                ("deepseek/deepseek-v4-flash", 1_000_000),
+                ("deepseek-chat", 1_000_000),
+                ("deepseek-reasoner", 1_000_000),
+            ]
+            for model_id, expected_ctx in cases:
+                actual = get_model_context_length(model_id)
+                assert actual == expected_ctx, (
+                    f"{model_id}: expected {expected_ctx}, got {actual}"
+                )
+
    def test_all_values_positive(self):
        for key, value in DEFAULT_CONTEXT_LENGTHS.items():
            assert value > 0, f"{key} has non-positive context length"
@ -303,7 +340,9 @@ class TestCodexOAuthContextLength:
        from agent.model_metadata import get_model_context_length

        # OpenRouter — should hit its own catalog path first; when mocked
-        # empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k).
+        # empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (1.05M,
+        # matching the real direct-API value — Codex OAuth's 272k cap is
+        # provider-specific and must not leak here).
        with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
             patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
             patch("agent.model_metadata.get_cached_context_length", return_value=None), \
@ -314,7 +353,7 @@ class TestCodexOAuthContextLength:
                api_key="",
                provider="openrouter",
            )
-        assert ctx == 400_000, (
+        assert ctx == 1_050_000, (
            f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override "
            "leaked outside openai-codex provider"
        )
--- a/tests/agent/test_nous_rate_guard.py
+++ b/tests/agent/test_nous_rate_guard.py
@ -251,3 +251,141 @@ class TestAuxiliaryClientIntegration:
        monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
        result = aux._try_nous()
        assert result == (None, None)
+
+
+class TestIsGenuineNousRateLimit:
+    """Tell a real account-level 429 apart from an upstream-capacity 429.
+
+    Nous Portal multiplexes upstreams (DeepSeek, Kimi, MiMo, Hermes).
+    A 429 from an upstream out of capacity should NOT trip the
+    cross-session breaker; a real user-quota 429 should.
+    """
+
+    def test_exhausted_hourly_bucket_in_429_headers_is_genuine(self):
+        from agent.nous_rate_guard import is_genuine_nous_rate_limit
+
+        headers = {
+            "x-ratelimit-limit-requests-1h": "800",
+            "x-ratelimit-remaining-requests-1h": "0",
+            "x-ratelimit-reset-requests-1h": "3100",
+            "x-ratelimit-limit-requests": "200",
+            "x-ratelimit-remaining-requests": "198",
+            "x-ratelimit-reset-requests": "40",
+        }
+        assert is_genuine_nous_rate_limit(headers=headers) is True
+
+    def test_exhausted_tokens_bucket_is_genuine(self):
+        from agent.nous_rate_guard import is_genuine_nous_rate_limit
+
+        headers = {
+            "x-ratelimit-limit-tokens": "800000",
+            "x-ratelimit-remaining-tokens": "0",
+            "x-ratelimit-reset-tokens": "45",  # < 60s threshold -> not genuine
+            "x-ratelimit-limit-tokens-1h": "8000000",
+            "x-ratelimit-remaining-tokens-1h": "0",
+            "x-ratelimit-reset-tokens-1h": "1800",  # >= 60s threshold -> genuine
+        }
+        assert is_genuine_nous_rate_limit(headers=headers) is True
+
+    def test_healthy_headers_on_429_are_upstream_capacity(self):
+        # Classic upstream-capacity symptom: Nous edge reports plenty of
+        # headroom on every bucket, but returns 429 anyway because
+        # upstream (DeepSeek / Kimi / ...) is out of capacity.
+        from agent.nous_rate_guard import is_genuine_nous_rate_limit
+
+        headers = {
+            "x-ratelimit-limit-requests": "200",
+            "x-ratelimit-remaining-requests": "198",
+            "x-ratelimit-reset-requests": "40",
+            "x-ratelimit-limit-requests-1h": "800",
+            "x-ratelimit-remaining-requests-1h": "750",
+            "x-ratelimit-reset-requests-1h": "3100",
+            "x-ratelimit-limit-tokens": "800000",
+            "x-ratelimit-remaining-tokens": "790000",
+            "x-ratelimit-reset-tokens": "40",
+            "x-ratelimit-limit-tokens-1h": "8000000",
+            "x-ratelimit-remaining-tokens-1h": "7800000",
+            "x-ratelimit-reset-tokens-1h": "3100",
+        }
+        assert is_genuine_nous_rate_limit(headers=headers) is False
+
+    def test_bare_429_with_no_headers_is_upstream(self):
+        from agent.nous_rate_guard import is_genuine_nous_rate_limit
+
+        assert is_genuine_nous_rate_limit(headers=None) is False
+        assert is_genuine_nous_rate_limit(headers={}) is False
+        assert is_genuine_nous_rate_limit(
+            headers={"content-type": "application/json"}
+        ) is False
+
+    def test_exhausted_bucket_with_short_reset_is_not_genuine(self):
+        # remaining == 0 but reset in < 60s: almost certainly a
+        # secondary per-minute throttle that will clear immediately --
+        # not worth tripping the cross-session breaker.
+        from agent.nous_rate_guard import is_genuine_nous_rate_limit
+
+        headers = {
+            "x-ratelimit-limit-requests": "200",
+            "x-ratelimit-remaining-requests": "0",
+            "x-ratelimit-reset-requests": "30",
+        }
+        assert is_genuine_nous_rate_limit(headers=headers) is False
+
+    def test_last_known_state_with_exhausted_bucket_triggers_genuine(self):
+        # Headers on the 429 lack rate-limit info, but the previous
+        # successful response already showed the hourly bucket
+        # exhausted -- the 429 is almost certainly that limit
+        # continuing.
+        from agent.nous_rate_guard import is_genuine_nous_rate_limit
+        from agent.rate_limit_tracker import parse_rate_limit_headers
+
+        prior_headers = {
+            "x-ratelimit-limit-requests-1h": "800",
+            "x-ratelimit-remaining-requests-1h": "0",
+            "x-ratelimit-reset-requests-1h": "2000",
+            "x-ratelimit-limit-requests": "200",
+            "x-ratelimit-remaining-requests": "100",
+            "x-ratelimit-reset-requests": "30",
+            "x-ratelimit-limit-tokens": "800000",
+            "x-ratelimit-remaining-tokens": "700000",
+            "x-ratelimit-reset-tokens": "30",
+            "x-ratelimit-limit-tokens-1h": "8000000",
+            "x-ratelimit-remaining-tokens-1h": "7000000",
+            "x-ratelimit-reset-tokens-1h": "2000",
+        }
+        last_state = parse_rate_limit_headers(prior_headers, provider="nous")
+        assert is_genuine_nous_rate_limit(
+            headers=None, last_known_state=last_state
+        ) is True
+
+    def test_last_known_state_all_healthy_stays_upstream(self):
+        # Prior state was healthy; bare 429 arrives; should be treated
+        # as upstream capacity.
+        from agent.nous_rate_guard import is_genuine_nous_rate_limit
+        from agent.rate_limit_tracker import parse_rate_limit_headers
+
+        prior_headers = {
+            "x-ratelimit-limit-requests-1h": "800",
+            "x-ratelimit-remaining-requests-1h": "750",
+            "x-ratelimit-reset-requests-1h": "2000",
+            "x-ratelimit-limit-requests": "200",
+            "x-ratelimit-remaining-requests": "180",
+            "x-ratelimit-reset-requests": "30",
+            "x-ratelimit-limit-tokens": "800000",
+            "x-ratelimit-remaining-tokens": "790000",
+            "x-ratelimit-reset-tokens": "30",
+            "x-ratelimit-limit-tokens-1h": "8000000",
+            "x-ratelimit-remaining-tokens-1h": "7900000",
+            "x-ratelimit-reset-tokens-1h": "2000",
+        }
+        last_state = parse_rate_limit_headers(prior_headers, provider="nous")
+        assert is_genuine_nous_rate_limit(
+            headers=None, last_known_state=last_state
+        ) is False
+
+    def test_none_last_state_and_no_headers_is_upstream(self):
+        from agent.nous_rate_guard import is_genuine_nous_rate_limit
+
+        assert is_genuine_nous_rate_limit(
+            headers=None, last_known_state=None
+        ) is False
--- a/tests/agent/test_onboarding.py
+++ b/tests/agent/test_onboarding.py
@ -0,0 +1,164 @@
+"""Tests for agent/onboarding.py — contextual first-touch hint helpers."""
+
+from __future__ import annotations
+
+import yaml
+import pytest
+
+from agent.onboarding import (
+    BUSY_INPUT_FLAG,
+    TOOL_PROGRESS_FLAG,
+    busy_input_hint_cli,
+    busy_input_hint_gateway,
+    is_seen,
+    mark_seen,
+    tool_progress_hint_cli,
+    tool_progress_hint_gateway,
+)
+
+
+class TestIsSeen:
+    def test_empty_config_unseen(self):
+        assert is_seen({}, BUSY_INPUT_FLAG) is False
+
+    def test_missing_onboarding_unseen(self):
+        assert is_seen({"display": {}}, BUSY_INPUT_FLAG) is False
+
+    def test_onboarding_not_dict_unseen(self):
+        assert is_seen({"onboarding": "nope"}, BUSY_INPUT_FLAG) is False
+
+    def test_seen_dict_missing_flag(self):
+        assert is_seen({"onboarding": {"seen": {}}}, BUSY_INPUT_FLAG) is False
+
+    def test_seen_flag_true(self):
+        cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
+        assert is_seen(cfg, BUSY_INPUT_FLAG) is True
+
+    def test_seen_flag_falsy(self):
+        cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: False}}}
+        assert is_seen(cfg, BUSY_INPUT_FLAG) is False
+
+    def test_other_flags_isolated(self):
+        cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
+        assert is_seen(cfg, TOOL_PROGRESS_FLAG) is False
+
+
+class TestMarkSeen:
+    def test_creates_missing_file_and_sets_flag(self, tmp_path):
+        cfg_path = tmp_path / "config.yaml"
+        assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
+
+        loaded = yaml.safe_load(cfg_path.read_text())
+        assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
+
+    def test_preserves_other_config(self, tmp_path):
+        cfg_path = tmp_path / "config.yaml"
+        cfg_path.write_text(yaml.safe_dump({
+            "model": {"default": "claude-sonnet-4.6"},
+            "display": {"skin": "default"},
+        }))
+
+        assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
+        loaded = yaml.safe_load(cfg_path.read_text())
+
+        assert loaded["model"]["default"] == "claude-sonnet-4.6"
+        assert loaded["display"]["skin"] == "default"
+        assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
+
+    def test_preserves_other_seen_flags(self, tmp_path):
+        cfg_path = tmp_path / "config.yaml"
+        cfg_path.write_text(yaml.safe_dump({
+            "onboarding": {"seen": {TOOL_PROGRESS_FLAG: True}},
+        }))
+
+        assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
+        loaded = yaml.safe_load(cfg_path.read_text())
+
+        assert loaded["onboarding"]["seen"][TOOL_PROGRESS_FLAG] is True
+        assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
+
+    def test_idempotent(self, tmp_path):
+        cfg_path = tmp_path / "config.yaml"
+        mark_seen(cfg_path, BUSY_INPUT_FLAG)
+        first = cfg_path.read_text()
+
+        # Second call must be a no-op on-disk content (file may be touched,
+        # but the YAML contents should be identical).
+        mark_seen(cfg_path, BUSY_INPUT_FLAG)
+        second = cfg_path.read_text()
+
+        assert yaml.safe_load(first) == yaml.safe_load(second)
+
+    def test_handles_non_dict_onboarding(self, tmp_path):
+        cfg_path = tmp_path / "config.yaml"
+        cfg_path.write_text(yaml.safe_dump({"onboarding": "corrupted"}))
+
+        assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
+        loaded = yaml.safe_load(cfg_path.read_text())
+        assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
+
+    def test_handles_non_dict_seen(self, tmp_path):
+        cfg_path = tmp_path / "config.yaml"
+        cfg_path.write_text(yaml.safe_dump({"onboarding": {"seen": "corrupted"}}))
+
+        assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
+        loaded = yaml.safe_load(cfg_path.read_text())
+        assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
+
+
+class TestHintMessages:
+    def test_busy_input_hint_gateway_interrupt(self):
+        msg = busy_input_hint_gateway("interrupt")
+        assert "/busy queue" in msg
+        assert "interrupted" in msg.lower()
+
+    def test_busy_input_hint_gateway_queue(self):
+        msg = busy_input_hint_gateway("queue")
+        assert "/busy interrupt" in msg
+        assert "queued" in msg.lower()
+
+    def test_busy_input_hint_cli_interrupt(self):
+        msg = busy_input_hint_cli("interrupt")
+        assert "/busy queue" in msg
+
+    def test_busy_input_hint_cli_queue(self):
+        msg = busy_input_hint_cli("queue")
+        assert "/busy interrupt" in msg
+
+    def test_tool_progress_hints_mention_verbose(self):
+        assert "/verbose" in tool_progress_hint_gateway()
+        assert "/verbose" in tool_progress_hint_cli()
+
+    def test_hints_are_not_empty(self):
+        for hint in (
+            busy_input_hint_gateway("queue"),
+            busy_input_hint_gateway("interrupt"),
+            busy_input_hint_cli("queue"),
+            busy_input_hint_cli("interrupt"),
+            tool_progress_hint_gateway(),
+            tool_progress_hint_cli(),
+        ):
+            assert hint.strip()
+
+
+class TestRoundTrip:
+    """After mark_seen, is_seen on the re-loaded config must return True."""
+
+    def test_mark_then_is_seen(self, tmp_path):
+        cfg_path = tmp_path / "config.yaml"
+
+        assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
+        loaded = yaml.safe_load(cfg_path.read_text())
+
+        assert is_seen(loaded, BUSY_INPUT_FLAG) is True
+        assert is_seen(loaded, TOOL_PROGRESS_FLAG) is False
+
+    def test_mark_both_flags_independently(self, tmp_path):
+        cfg_path = tmp_path / "config.yaml"
+
+        mark_seen(cfg_path, BUSY_INPUT_FLAG)
+        mark_seen(cfg_path, TOOL_PROGRESS_FLAG)
+        loaded = yaml.safe_load(cfg_path.read_text())
+
+        assert is_seen(loaded, BUSY_INPUT_FLAG) is True
+        assert is_seen(loaded, TOOL_PROGRESS_FLAG) is True
--- a/tests/cli/test_branch_command.py
+++ b/tests/cli/test_branch_command.py
@ -160,6 +160,30 @@ class TestBranchCommandCLI:
        assert agent.reset_session_state.called
        assert agent._last_flushed_db_idx == 4  # len(conversation_history)

+    def test_branch_updates_agent_session_log_file(self, cli_instance, session_db, tmp_path):
+        """Branching must redirect the agent's session_log_file to the new session's path."""
+        from cli import HermesCLI
+        from pathlib import Path
+
+        logs_dir = tmp_path / "sessions"
+        logs_dir.mkdir()
+
+        agent = MagicMock()
+        agent._last_flushed_db_idx = 0
+        agent.logs_dir = logs_dir
+        agent.session_log_file = logs_dir / f"session_{cli_instance.session_id}.json"
+        cli_instance.agent = agent
+
+        old_log_file = agent.session_log_file
+        HermesCLI._handle_branch_command(cli_instance, "/branch")
+
+        new_session_id = cli_instance.session_id
+        expected_log = logs_dir / f"session_{new_session_id}.json"
+        assert agent.session_log_file == expected_log, (
+            "session_log_file must point to the branch session, not the original"
+        )
+        assert agent.session_log_file != old_log_file
+
    def test_branch_sets_resumed_flag(self, cli_instance, session_db):
        """Branch should set _resumed=True to prevent auto-title generation."""
        from cli import HermesCLI
--- a/tests/gateway/test_agent_cache.py
+++ b/tests/gateway/test_agent_cache.py
@ -1043,3 +1043,132 @@ class TestAgentCacheIdleResume:
            new_agent.close()
        except Exception:
            pass
+
+
+_FAKE_NOW = 10_000.0  # Fixed epoch for deterministic time assertions
+
+
+class TestCachedAgentInactivityReset:
+    """Inactivity-clock reset must be gated on _interrupt_depth == 0.
+
+    On interrupt-recursive turns (_interrupt_depth > 0) the clock must
+    keep accumulating so the inactivity watchdog can fire when a turn is
+    stuck in an interrupt loop.  Resetting unconditionally prevented the
+    30-min timeout from triggering (#15654).  The depth-0 reset is still
+    needed: a session idle for 29 min must not trip the watchdog before
+    the new turn makes its first API call (#9051).
+    """
+
+    def _fake_agent(self, stale_seconds: float = 1800.0):
+        m = MagicMock()
+        m._last_activity_ts = _FAKE_NOW - stale_seconds
+        m._api_call_count = 10
+        m._last_activity_desc = "previous turn activity"
+        return m
+
+    def test_fresh_turn_resets_idle_clock(self):
+        """interrupt_depth=0: clock resets so a post-idle turn gets a
+        fresh 30-min inactivity window (guard for #9051)."""
+        from gateway.run import GatewayRunner
+
+        agent = self._fake_agent(stale_seconds=1800.0)
+        old_ts = agent._last_activity_ts
+
+        with patch("gateway.run.time") as mock_time:
+            mock_time.time.return_value = _FAKE_NOW
+            GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
+
+        assert agent._last_activity_ts == _FAKE_NOW, (
+            "_last_activity_ts was not reset on a fresh turn (interrupt_depth=0)"
+        )
+        assert agent._last_activity_ts > old_ts, (
+            "Stale idle time should be cleared so the new turn gets a fresh window"
+        )
+
+    def test_fresh_turn_resets_desc(self):
+        """interrupt_depth=0: description is updated to reflect the new turn."""
+        from gateway.run import GatewayRunner
+
+        agent = self._fake_agent()
+
+        with patch("gateway.run.time") as mock_time:
+            mock_time.time.return_value = _FAKE_NOW
+            GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
+
+        assert agent._last_activity_desc == "starting new turn (cached)"
+
+    def test_interrupt_turn_preserves_idle_clock(self):
+        """interrupt_depth=1: clock preserved so accumulated stuck-turn
+        idle time is not discarded by an interrupt-recursive re-entry (#15654)."""
+        from gateway.run import GatewayRunner
+
+        agent = self._fake_agent(stale_seconds=1200.0)
+        old_ts = agent._last_activity_ts
+
+        GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
+
+        assert agent._last_activity_ts == old_ts, (
+            "_last_activity_ts must not be reset on interrupt-recursive turns "
+            "(interrupt_depth>0) — the watchdog needs the accumulated idle time"
+        )
+
+    def test_interrupt_turn_preserves_desc(self):
+        """interrupt_depth=1: desc preserved — it is semantically paired with ts."""
+        from gateway.run import GatewayRunner
+
+        agent = self._fake_agent(stale_seconds=1200.0)
+
+        GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
+
+        assert agent._last_activity_desc == "previous turn activity", (
+            "_last_activity_desc must not change on interrupt-recursive turns; "
+            "it describes the activity *at* _last_activity_ts"
+        )
+
+    def test_deep_interrupt_recursion_preserves_idle_clock(self):
+        """interrupt_depth=MAX-1: clock still preserved at any non-zero depth."""
+        from gateway.run import GatewayRunner
+
+        agent = self._fake_agent(stale_seconds=600.0)
+        old_ts = agent._last_activity_ts
+
+        GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=4)
+
+        assert agent._last_activity_ts == old_ts
+
+    def test_api_call_count_reset_regardless_of_depth(self):
+        """_api_call_count is always reset to 0 for the new turn, at any depth."""
+        from gateway.run import GatewayRunner
+
+        agent_fresh = self._fake_agent()
+        agent_interrupted = self._fake_agent()
+
+        with patch("gateway.run.time") as mock_time:
+            mock_time.time.return_value = _FAKE_NOW
+            GatewayRunner._init_cached_agent_for_turn(agent_fresh, interrupt_depth=0)
+        GatewayRunner._init_cached_agent_for_turn(agent_interrupted, interrupt_depth=1)
+
+        assert agent_fresh._api_call_count == 0
+        assert agent_interrupted._api_call_count == 0
+
+    def test_watchdog_accumulation_across_recursive_turns(self):
+        """Scenario: stuck turn + user interrupt → recursive turn.
+
+        The idle time seen by the watchdog must reflect the full stuck
+        duration, not restart from zero on the recursive re-entry.
+        """
+        from gateway.run import GatewayRunner
+
+        STUCK_FOR = 1750.0
+        agent = self._fake_agent(stale_seconds=STUCK_FOR)
+
+        # Simulate: user sees "Still working..." and sends another message.
+        # That triggers an interrupt → _run_agent recurses at depth=1.
+        GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
+
+        # Watchdog sees time.time() - _last_activity_ts ≥ STUCK_FOR.
+        idle_secs = _FAKE_NOW - agent._last_activity_ts
+        assert idle_secs >= STUCK_FOR - 1.0, (
+            f"Watchdog would see {idle_secs:.0f}s idle, expected ~{STUCK_FOR}s. "
+            "Inactivity timeout could not fire for a stuck interrupted turn."
+        )
--- a/tests/gateway/test_busy_session_ack.py
+++ b/tests/gateway/test_busy_session_ack.py
@ -349,3 +349,121 @@ class TestBusySessionAck:

        result = await runner._handle_active_session_busy_message(event, sk)
        assert result is False  # not handled, let default path try
+
+
+class TestBusySessionOnboardingHint:
+    """First-touch hint appended to the busy-ack the first time it fires."""
+
+    @pytest.mark.asyncio
+    async def test_first_busy_ack_appends_interrupt_hint(self, tmp_path, monkeypatch):
+        """First busy-while-running message gets an extra hint about /busy."""
+        import gateway.run as _gr
+
+        monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
+        # mark_seen imports utils.atomic_yaml_write; make sure it resolves
+        # against a writable dir by pointing _hermes_home at tmp_path.
+        monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
+
+        runner, _sentinel = _make_runner()
+        runner._busy_input_mode = "interrupt"
+        adapter = _make_adapter()
+
+        event = _make_event(text="ping")
+        sk = build_session_key(event.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 3, "max_iterations": 60,
+            "current_tool": None, "last_activity_ts": time.time(),
+            "last_activity_desc": "api", "seconds_since_activity": 0.1,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 5
+        runner.adapters[event.source.platform] = adapter
+
+        await runner._handle_active_session_busy_message(event, sk)
+
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content", "")
+
+        # Normal ack body
+        assert "Interrupting" in content
+        # First-touch hint appended
+        assert "First-time tip" in content
+        assert "/busy queue" in content
+
+        # The flag is now persisted to tmp_path/config.yaml
+        import yaml
+        cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
+        assert cfg["onboarding"]["seen"]["busy_input_prompt"] is True
+
+    @pytest.mark.asyncio
+    async def test_second_busy_ack_omits_hint(self, tmp_path, monkeypatch):
+        """Once the flag is marked, the hint never appears again."""
+        import gateway.run as _gr
+        import yaml
+
+        monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
+        # Pre-populate the config so is_seen() returns True from the start.
+        (tmp_path / "config.yaml").write_text(yaml.safe_dump({
+            "onboarding": {"seen": {"busy_input_prompt": True}},
+        }))
+        monkeypatch.setattr(
+            _gr, "_load_gateway_config",
+            lambda: yaml.safe_load((tmp_path / "config.yaml").read_text()),
+        )
+
+        runner, _sentinel = _make_runner()
+        runner._busy_input_mode = "interrupt"
+        adapter = _make_adapter()
+
+        event = _make_event(text="ping again")
+        sk = build_session_key(event.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 3, "max_iterations": 60,
+            "current_tool": None, "last_activity_ts": time.time(),
+            "last_activity_desc": "api", "seconds_since_activity": 0.1,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 5
+        runner.adapters[event.source.platform] = adapter
+
+        await runner._handle_active_session_busy_message(event, sk)
+
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content", "")
+
+        assert "Interrupting" in content
+        assert "First-time tip" not in content
+        assert "/busy queue" not in content
+
+    @pytest.mark.asyncio
+    async def test_queue_mode_hint_points_to_interrupt(self, tmp_path, monkeypatch):
+        """In queue mode the hint should suggest /busy interrupt, not /busy queue."""
+        import gateway.run as _gr
+
+        monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
+        monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
+
+        runner, _sentinel = _make_runner()
+        runner._busy_input_mode = "queue"
+        adapter = _make_adapter()
+
+        event = _make_event(text="queue me")
+        sk = build_session_key(event.source)
+        runner.adapters[event.source.platform] = adapter
+
+        agent = MagicMock()
+        runner._running_agents[sk] = agent
+
+        with patch("gateway.run.merge_pending_message_event"):
+            await runner._handle_active_session_busy_message(event, sk)
+
+        content = adapter._send_with_retry.call_args.kwargs.get("content", "")
+        assert "Queued for the next turn" in content
+        assert "First-time tip" in content
+        assert "/busy interrupt" in content
+        # Must NOT tell the user to /busy queue when they're already on queue.
+        assert "/busy queue" not in content
--- a/tests/gateway/test_run_progress_interrupt.py
+++ b/tests/gateway/test_run_progress_interrupt.py
@ -0,0 +1,215 @@
+"""Tests for interrupt-aware tool-progress suppression in gateway.
+
+When a user sends `stop` while the agent is executing a batch of parallel
+tool calls, the gateway's progress_callback should stop queuing 🔍 bubbles
+and the drain loop should drop any already-queued events.  Without this
+guard, the stop acknowledgement appears first but is followed by a trail
+of tool-progress bubbles for calls that were already parsed from the LLM
+response — making the interrupt feel ignored.
+"""
+
+import asyncio
+import importlib
+import sys
+import time
+import types
+from types import SimpleNamespace
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, SendResult
+from gateway.session import SessionSource
+
+
+class ProgressCaptureAdapter(BasePlatformAdapter):
+    def __init__(self, platform=Platform.TELEGRAM):
+        super().__init__(PlatformConfig(enabled=True, token="***"), platform)
+        self.sent = []
+        self.edits = []
+        self.typing = []
+
+    async def connect(self) -> bool:
+        return True
+
+    async def disconnect(self) -> None:
+        return None
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult:
+        self.sent.append({"chat_id": chat_id, "content": content})
+        return SendResult(success=True, message_id="progress-1")
+
+    async def edit_message(self, chat_id, message_id, content) -> SendResult:
+        self.edits.append({"message_id": message_id, "content": content})
+        return SendResult(success=True, message_id=message_id)
+
+    async def send_typing(self, chat_id, metadata=None) -> None:
+        self.typing.append(chat_id)
+
+    async def stop_typing(self, chat_id) -> None:
+        return None
+
+    async def get_chat_info(self, chat_id: str):
+        return {"id": chat_id}
+
+
+class PreInterruptAgent:
+    """Fires tool-progress events BEFORE the interrupt lands.
+
+    These should render normally.  Baseline for comparison with the
+    interrupted case — proves the harness renders events when no
+    interrupt is active.
+    """
+
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs.get("tool_progress_callback")
+        self.tools = []
+        self._interrupt_requested = False
+
+    @property
+    def is_interrupted(self) -> bool:
+        return self._interrupt_requested
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.tool_progress_callback("tool.started", "web_search", "first search", {})
+        time.sleep(0.35)  # let the drain loop process
+        return {"final_response": "done", "messages": [], "api_calls": 1}
+
+
+class InterruptedAgent:
+    """Fires tool.started events AFTER interrupt — all should be suppressed.
+
+    Mirrors the failure mode in the bug report: LLM returned N parallel
+    web_search calls, interrupt flag flipped, remaining events still
+    rendered as bubbles.  With the fix, none of these should appear.
+    """
+
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs.get("tool_progress_callback")
+        self.tools = []
+        # Start already interrupted — simulates stop having already landed
+        # by the time the agent batch starts firing tool.started events.
+        self._interrupt_requested = True
+
+    @property
+    def is_interrupted(self) -> bool:
+        return self._interrupt_requested
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        # Parallel tool batch — in production these come from one LLM
+        # response with 5 tool_calls.  All are post-interrupt.
+        self.tool_progress_callback("tool.started", "web_search", "cognee hermes", {})
+        self.tool_progress_callback("tool.started", "web_search", "McBee deer hunting", {})
+        self.tool_progress_callback("tool.started", "web_search", "kuzu graph db", {})
+        self.tool_progress_callback("tool.started", "web_search", "moonshot kimi api", {})
+        self.tool_progress_callback("tool.started", "web_search", "platform.moonshot.cn", {})
+        time.sleep(0.35)  # let the drain loop attempt to process the queue
+        return {"final_response": "interrupted", "messages": [], "api_calls": 1}
+
+
+def _make_runner(adapter):
+    gateway_run = importlib.import_module("gateway.run")
+    GatewayRunner = gateway_run.GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.adapters = {adapter.platform: adapter}
+    runner._voice_mode = {}
+    runner._prefill_messages = []
+    runner._ephemeral_system_prompt = ""
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._session_db = None
+    runner._running_agents = {}
+    runner._session_run_generation = {}
+    runner.hooks = SimpleNamespace(loaded_hooks=False)
+    runner.config = SimpleNamespace(
+        thread_sessions_per_user=False,
+        group_sessions_per_user=False,
+        stt_enabled=False,
+    )
+    return runner
+
+
+async def _run_once(monkeypatch, tmp_path, agent_cls, session_id):
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = agent_cls
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    adapter = ProgressCaptureAdapter()
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {"api_key": "fake"},
+    )
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1001",
+        chat_type="group",
+        thread_id="17585",
+    )
+    result = await runner._run_agent(
+        message="hi",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id=session_id,
+        session_key="agent:main:telegram:group:-1001:17585",
+    )
+    return adapter, result
+
+
+@pytest.mark.asyncio
+async def test_baseline_non_interrupted_agent_renders_progress(monkeypatch, tmp_path):
+    """Sanity check: when is_interrupted is False, tool-progress renders normally."""
+    adapter, result = await _run_once(monkeypatch, tmp_path, PreInterruptAgent, "sess-baseline")
+    assert result["final_response"] == "done"
+    rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
+        c["content"] for c in adapter.edits
+    )
+    assert "first search" in rendered, (
+        "baseline agent should render its tool-progress event — "
+        "if this fails the test harness is broken, not the fix"
+    )
+
+
+@pytest.mark.asyncio
+async def test_progress_suppressed_when_agent_is_interrupted(monkeypatch, tmp_path):
+    """Post-interrupt tool.started events must not render as bubbles.
+
+    This is Bug B from the screenshot: user sends `stop`, agent acks with
+    ⚡ Interrupting, but 5 more 🔍 web_search bubbles still render because
+    their tool.started events were already parsed from the LLM response.
+    With the fix, progress_callback and the drain loop both check
+    is_interrupted and skip these events.
+    """
+    adapter, result = await _run_once(
+        monkeypatch, tmp_path, InterruptedAgent, "sess-interrupted"
+    )
+    assert result["final_response"] == "interrupted"
+
+    rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
+        c["content"] for c in adapter.edits
+    )
+
+    # None of the post-interrupt queries should appear.
+    for leaked_query in (
+        "cognee hermes",
+        "McBee deer hunting",
+        "kuzu graph db",
+        "moonshot kimi api",
+        "platform.moonshot.cn",
+    ):
+        assert leaked_query not in rendered, (
+            f"event '{leaked_query}' leaked into the UI after interrupt — "
+            f"progress_callback / drain loop is not checking is_interrupted"
+        )
--- a/tests/gateway/test_running_agent_session_toggles.py
+++ b/tests/gateway/test_running_agent_session_toggles.py
@ -165,3 +165,26 @@ async def test_reasoning_rejected_mid_run():
    assert result is not None
    assert "can't run mid-turn" in result
    assert "/reasoning" in result
+
+
+@pytest.mark.asyncio
+async def test_btw_dispatches_mid_run():
+    """/btw mid-run must dispatch to /background's handler, not hit the catch-all.
+
+    /btw is an alias of /background (see hermes_cli/commands.py). Typing
+    /btw mid-turn must spawn a parallel background task — that's the whole
+    point of the command. Before the mid-turn bypass was added for
+    /background, /btw fell through to the "Agent is running — wait or
+    /stop first" catch-all, making it useless in exactly the scenario it
+    was designed for. The alias and the bypass together make it work.
+    """
+    runner = _make_runner()
+    runner._handle_background_command = AsyncMock(
+        return_value='🚀 Background task started: "what module owns titles?"'
+    )
+
+    result = await runner._handle_message(_make_event("/btw what module owns titles?"))
+
+    runner._handle_background_command.assert_awaited_once()
+    assert result is not None
+    assert "can't run mid-turn" not in result
--- a/tests/gateway/test_slack.py
+++ b/tests/gateway/test_slack.py
@ -147,7 +147,20 @@ class TestAppMentionHandler:
        assert "app_mention" in registered_events
        assert "assistant_thread_started" in registered_events
        assert "assistant_thread_context_changed" in registered_events
-        assert "/hermes" in registered_commands
+        # Slack slash commands are registered via a single regex matcher
+        # covering every COMMAND_REGISTRY entry (e.g. /hermes, /btw, /stop,
+        # /model, ...) so users get native-slash parity with Discord and
+        # Telegram. Verify the regex matches the key expected slashes.
+        assert len(registered_commands) == 1, (
+            f"expected 1 combined slash matcher, got {registered_commands!r}"
+        )
+        slash_matcher = registered_commands[0]
+        import re as _re
+        assert isinstance(slash_matcher, _re.Pattern)
+        for expected in ("/hermes", "/btw", "/stop", "/model", "/help"):
+            assert slash_matcher.match(expected), (
+                f"Slack slash regex does not match {expected}"
+            )


 class TestSlackConnectCleanup:
@ -1544,6 +1557,83 @@ class TestSlashCommands:
        msg = adapter.handle_message.call_args[0][0]
        assert msg.text == "/reasoning"

+    # ------------------------------------------------------------------
+    # Native slash commands — /btw, /stop, /model, ... dispatched directly
+    # instead of as /hermes subcommands. This is the Discord/Telegram parity
+    # fix: the slash name itself becomes the command.
+    # ------------------------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_native_btw_slash(self, adapter):
+        """/btw with args must dispatch to /background, not /hermes btw."""
+        command = {
+            "command": "/btw",
+            "text": "fix the failing test",
+            "user_id": "U1",
+            "channel_id": "C1",
+        }
+        await adapter._handle_slash_command(command)
+        msg = adapter.handle_message.call_args[0][0]
+        # The gateway command dispatcher resolves /btw -> background via
+        # resolve_command() — our handler's job is just to deliver
+        # "/btw <args>" to the gateway runner, which is what this asserts.
+        assert msg.text == "/btw fix the failing test"
+
+    @pytest.mark.asyncio
+    async def test_native_stop_slash_no_args(self, adapter):
+        command = {
+            "command": "/stop",
+            "text": "",
+            "user_id": "U1",
+            "channel_id": "C1",
+        }
+        await adapter._handle_slash_command(command)
+        msg = adapter.handle_message.call_args[0][0]
+        assert msg.text == "/stop"
+
+    @pytest.mark.asyncio
+    async def test_native_model_slash_with_args(self, adapter):
+        command = {
+            "command": "/model",
+            "text": "anthropic/claude-sonnet-4",
+            "user_id": "U1",
+            "channel_id": "C1",
+        }
+        await adapter._handle_slash_command(command)
+        msg = adapter.handle_message.call_args[0][0]
+        assert msg.text == "/model anthropic/claude-sonnet-4"
+
+    @pytest.mark.asyncio
+    async def test_legacy_hermes_prefix_still_works(self, adapter):
+        """Backward compat: /hermes btw foo must still route to /btw foo.
+
+        Old workspace manifests only declared /hermes as the single slash.
+        After users refresh their manifest they get /btw natively, but the
+        legacy form must keep working during the transition.
+        """
+        command = {
+            "command": "/hermes",
+            "text": "btw run the tests",
+            "user_id": "U1",
+            "channel_id": "C1",
+        }
+        await adapter._handle_slash_command(command)
+        msg = adapter.handle_message.call_args[0][0]
+        assert msg.text == "/btw run the tests"
+
+    @pytest.mark.asyncio
+    async def test_legacy_hermes_freeform_question(self, adapter):
+        """/hermes <free-form text> must stay as the raw text (non-command)."""
+        command = {
+            "command": "/hermes",
+            "text": "what's the weather today?",
+            "user_id": "U1",
+            "channel_id": "C1",
+        }
+        await adapter._handle_slash_command(command)
+        msg = adapter.handle_message.call_args[0][0]
+        assert msg.text == "what's the weather today?"
+

 # ---------------------------------------------------------------------------
 # TestMessageSplitting
--- a/tests/gateway/test_voice_command.py
+++ b/tests/gateway/test_voice_command.py
@ -177,6 +177,53 @@ class TestHandleVoiceCommand:

        assert adapter._auto_tts_disabled_chats == {"123"}

+    def test_sync_populates_enabled_chats_from_voice_modes(self, runner):
+        """Issue #16007: sync also restores per-chat /voice on|tts opt-ins.
+
+        The adapter's ``_auto_tts_enabled_chats`` must mirror chats whose
+        persisted voice_mode is ``voice_only`` or ``all`` — without this,
+        ``/voice on`` was relying on a "not in disabled set" default that
+        silently enabled auto-TTS for every chat.
+        """
+        from gateway.config import Platform
+        runner._voice_mode = {
+            "telegram:off_chat": "off",
+            "telegram:on_chat": "voice_only",
+            "telegram:tts_chat": "all",
+            "slack:999": "voice_only",  # wrong platform, must be ignored
+        }
+        adapter = SimpleNamespace(
+            _auto_tts_default=False,
+            _auto_tts_disabled_chats=set(),
+            _auto_tts_enabled_chats=set(),
+            platform=Platform.TELEGRAM,
+        )
+
+        runner._sync_voice_mode_state_to_adapter(adapter)
+
+        assert adapter._auto_tts_disabled_chats == {"off_chat"}
+        assert adapter._auto_tts_enabled_chats == {"on_chat", "tts_chat"}
+
+    def test_sync_pushes_config_default_onto_adapter(self, runner, monkeypatch):
+        """Issue #16007: ``voice.auto_tts`` must propagate to ``_auto_tts_default``."""
+        from gateway.config import Platform
+
+        fake_cfg = {"voice": {"auto_tts": True}}
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: fake_cfg,
+        )
+        adapter = SimpleNamespace(
+            _auto_tts_default=False,
+            _auto_tts_disabled_chats=set(),
+            _auto_tts_enabled_chats=set(),
+            platform=Platform.TELEGRAM,
+        )
+
+        runner._sync_voice_mode_state_to_adapter(adapter)
+
+        assert adapter._auto_tts_default is True
+
    def test_restart_restores_voice_off_state(self, runner, tmp_path):
        from gateway.config import Platform
        runner._VOICE_MODE_PATH.write_text(json.dumps({"telegram:123": "off"}))
@ -2706,3 +2753,56 @@ class TestUDPKeepalive:
            mock_conn.send_packet.assert_called_with(b'\xf8\xff\xfe')
        finally:
            DiscordAdapter._KEEPALIVE_INTERVAL = original_interval
+
+
+# =====================================================================
+# BasePlatformAdapter._should_auto_tts_for_chat — gate for auto-TTS
+# on voice input. Regression test for Issue #16007.
+# =====================================================================
+
+class TestShouldAutoTtsForChat:
+    """Three-layer gate: per-chat enable > per-chat disable > config default."""
+
+    def _make_adapter(self, *, default: bool, enabled=(), disabled=()):
+        """Build a bare adapter with only the attrs the gate reads."""
+        adapter = SimpleNamespace(
+            _auto_tts_default=default,
+            _auto_tts_enabled_chats=set(enabled),
+            _auto_tts_disabled_chats=set(disabled),
+        )
+        # Bind the unbound method — _should_auto_tts_for_chat only reads the
+        # three attrs above via ``self.``, so an unbound call works.
+        from gateway.platforms.base import BasePlatformAdapter
+        return BasePlatformAdapter._should_auto_tts_for_chat, adapter
+
+    def test_default_false_no_override_suppresses(self):
+        """Issue #16007: voice.auto_tts=False and no per-chat state → no TTS."""
+        fn, adapter = self._make_adapter(default=False)
+        assert fn(adapter, "chat1") is False
+
+    def test_default_true_no_override_fires(self):
+        fn, adapter = self._make_adapter(default=True)
+        assert fn(adapter, "chat1") is True
+
+    def test_explicit_enable_overrides_false_default(self):
+        """``/voice on`` with config auto_tts=False still fires."""
+        fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
+        assert fn(adapter, "chat1") is True
+
+    def test_explicit_disable_overrides_true_default(self):
+        """``/voice off`` with config auto_tts=True still suppresses."""
+        fn, adapter = self._make_adapter(default=True, disabled={"chat1"})
+        assert fn(adapter, "chat1") is False
+
+    def test_enabled_wins_over_disabled(self):
+        """An explicit enable beats an explicit disable (enable takes priority)."""
+        fn, adapter = self._make_adapter(
+            default=False, enabled={"chat1"}, disabled={"chat1"}
+        )
+        assert fn(adapter, "chat1") is True
+
+    def test_per_chat_isolation(self):
+        """Enable for chat1 doesn't leak to chat2."""
+        fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
+        assert fn(adapter, "chat1") is True
+        assert fn(adapter, "chat2") is False
--- a/tests/hermes_cli/test_apply_model_switch_result_context.py
+++ b/tests/hermes_cli/test_apply_model_switch_result_context.py
@ -0,0 +1,152 @@
+"""Regression test for the `/model` picker confirmation display.
+
+Bug (April 2026): after choosing a model from the interactive `/model` picker,
+``HermesCLI._apply_model_switch_result()`` printed ``ModelInfo.context_window``
+straight from models.dev, which always reports the vendor-wide value (e.g.
+gpt-5.5 = 1,050,000 on ``openai``). That ignored provider-specific caps — in
+particular, ChatGPT Codex OAuth enforces 272K on the same slug. The sibling
+``_handle_model_switch()`` (typed ``/model <name>``) was already fixed to use
+``resolve_display_context_length()``; the picker path was missed, causing
+"sometimes 1M, sometimes 272K" for the same model across sibling UI paths.
+
+Fix: both display paths now go through ``resolve_display_context_length()``.
+"""
+from __future__ import annotations
+
+from unittest.mock import patch
+
+from hermes_cli.model_switch import ModelSwitchResult
+
+
+class _FakeModelInfo:
+    context_window = 1_050_000
+    max_output = 0
+
+    def has_cost_data(self):
+        return False
+
+    def format_capabilities(self):
+        return ""
+
+
+class _StubCLI:
+    """Minimum attrs ``_apply_model_switch_result`` reads on ``self``."""
+    agent = None
+    model = ""
+    provider = ""
+    requested_provider = ""
+    api_key = ""
+    _explicit_api_key = ""
+    base_url = ""
+    _explicit_base_url = ""
+    api_mode = ""
+    _pending_model_switch_note = ""
+
+
+def _run_display(monkeypatch, result):
+    import cli as cli_mod
+
+    captured: list[str] = []
+    monkeypatch.setattr(cli_mod, "_cprint", lambda s, *a, **k: captured.append(str(s)))
+    # Avoid writing to ~/.hermes/config.yaml during the test.
+    monkeypatch.setattr(cli_mod, "save_config_value", lambda *a, **k: None)
+    cli_mod.HermesCLI._apply_model_switch_result(_StubCLI(), result, False)
+    return captured
+
+
+def test_picker_path_uses_provider_aware_context_on_codex(monkeypatch):
+    """``_apply_model_switch_result`` must prefer the provider-aware resolver
+    (272K on Codex) over the raw models.dev value (1.05M for gpt-5.5).
+    """
+    result = ModelSwitchResult(
+        success=True,
+        new_model="gpt-5.5",
+        target_provider="openai-codex",
+        provider_changed=True,
+        api_key="",
+        base_url="https://chatgpt.com/backend-api/codex",
+        api_mode="codex_responses",
+        warning_message="",
+        provider_label="ChatGPT Codex",
+        resolved_via_alias=False,
+        capabilities=None,
+        model_info=_FakeModelInfo(),  # models.dev says 1.05M
+        is_global=False,
+    )
+    with patch(
+        "agent.model_metadata.get_model_context_length",
+        return_value=272_000,
+    ):
+        lines = _run_display(monkeypatch, result)
+
+    ctx_line = next((l for l in lines if "Context:" in l), "")
+    assert "272,000" in ctx_line, (
+        f"picker-path display must show Codex's 272K cap, got: {ctx_line!r}"
+    )
+    assert "1,050,000" not in ctx_line, (
+        f"picker-path display leaked models.dev's 1.05M for Codex: {ctx_line!r}"
+    )
+
+
+def test_picker_path_shows_vendor_value_when_no_provider_cap(monkeypatch):
+    """On providers with no enforced cap (e.g. OpenRouter), the picker path
+    should surface the real 1.05M context for gpt-5.5 — resolver and models.dev
+    agree here.
+    """
+    result = ModelSwitchResult(
+        success=True,
+        new_model="openai/gpt-5.5",
+        target_provider="openrouter",
+        provider_changed=True,
+        api_key="",
+        base_url="https://openrouter.ai/api/v1",
+        api_mode="chat_completions",
+        warning_message="",
+        provider_label="OpenRouter",
+        resolved_via_alias=False,
+        capabilities=None,
+        model_info=_FakeModelInfo(),
+        is_global=False,
+    )
+    with patch(
+        "agent.model_metadata.get_model_context_length",
+        return_value=1_050_000,
+    ):
+        lines = _run_display(monkeypatch, result)
+
+    ctx_line = next((l for l in lines if "Context:" in l), "")
+    assert "1,050,000" in ctx_line, (
+        f"OpenRouter gpt-5.5 should show 1.05M context, got: {ctx_line!r}"
+    )
+
+
+def test_picker_path_falls_back_to_model_info_when_resolver_empty(monkeypatch):
+    """If ``get_model_context_length`` returns nothing (rare — truly unknown
+    endpoint), the display still surfaces ``ModelInfo.context_window`` so the
+    user sees *something* rather than a silent blank.
+    """
+    result = ModelSwitchResult(
+        success=True,
+        new_model="some-model",
+        target_provider="some-provider",
+        provider_changed=True,
+        api_key="",
+        base_url="",
+        api_mode="chat_completions",
+        warning_message="",
+        provider_label="Some Provider",
+        resolved_via_alias=False,
+        capabilities=None,
+        model_info=_FakeModelInfo(),  # context_window = 1_050_000
+        is_global=False,
+    )
+    with patch(
+        "agent.model_metadata.get_model_context_length",
+        return_value=None,
+    ):
+        lines = _run_display(monkeypatch, result)
+
+    ctx_line = next((l for l in lines if "Context:" in l), "")
+    assert "1,050,000" in ctx_line, (
+        f"resolver-empty path should fall back to ModelInfo, got: {ctx_line!r}"
+    )
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@ -20,6 +20,8 @@ from hermes_cli.commands import (
    discord_skill_commands,
    gateway_help_lines,
    resolve_command,
+    slack_app_manifest,
+    slack_native_slashes,
    slack_subcommand_map,
    telegram_bot_commands,
    telegram_menu_commands,
@ -256,6 +258,115 @@ class TestSlackSubcommandMap:
                assert cmd.name not in mapping


+class TestSlackNativeSlashes:
+    """Slack native slash command generation — used to register every
+    COMMAND_REGISTRY entry as a first-class Slack slash, matching Discord
+    and Telegram."""
+
+    def test_returns_triples(self):
+        slashes = slack_native_slashes()
+        assert len(slashes) >= 10
+        for entry in slashes:
+            assert isinstance(entry, tuple) and len(entry) == 3
+            name, desc, hint = entry
+            assert isinstance(name, str) and name
+            assert isinstance(desc, str)
+            assert isinstance(hint, str)
+
+    def test_hermes_catchall_is_first(self):
+        """``/hermes`` must be reserved as the first slot so the legacy
+        ``/hermes <subcommand>`` form keeps working after we add new
+        commands and hit the 50-slash cap."""
+        slashes = slack_native_slashes()
+        assert slashes[0][0] == "hermes"
+
+    def test_names_respect_slack_limits(self):
+        for name, _desc, _hint in slack_native_slashes():
+            # Slack: lowercase a-z, 0-9, hyphens, underscores; max 32 chars
+            assert len(name) <= 32, f"slash {name!r} exceeds 32 chars"
+            assert name == name.lower()
+            for ch in name:
+                assert ch.isalnum() or ch in "-_", f"invalid char {ch!r} in {name!r}"
+
+    def test_under_fifty_command_cap(self):
+        """Slack allows at most 50 slash commands per app."""
+        assert len(slack_native_slashes()) <= 50
+
+    def test_unique_names(self):
+        names = [n for n, _d, _h in slack_native_slashes()]
+        assert len(names) == len(set(names)), "duplicate Slack slash names"
+
+    def test_includes_canonical_commands(self):
+        names = {n for n, _d, _h in slack_native_slashes()}
+        # Sample of gateway-available canonical commands
+        for expected in ("new", "stop", "background", "model", "help", "status"):
+            assert expected in names, f"missing canonical /{expected}"
+
+    def test_includes_aliases_as_first_class_slashes(self):
+        """Aliases (/btw, /bg, /reset, /q) must be registered as standalone
+        slashes — this is the whole point of native-slashes parity."""
+        names = {n for n, _d, _h in slack_native_slashes()}
+        assert "btw" in names
+        assert "bg" in names
+        assert "reset" in names
+        assert "q" in names
+
+    def test_telegram_parity(self):
+        """Every Telegram bot command must be registerable on Slack too.
+
+        This catches the old behavior where Slack users couldn't invoke
+        commands like /btw natively. If a future command surfaces on
+        Telegram but not Slack (because of Slack's 50-slash cap), this
+        test fails loudly so we can curate the list rather than silently
+        dropping parity.
+        """
+        slack_names = {n for n, _d, _h in slack_native_slashes()}
+        tg_names = {n for n, _d in telegram_bot_commands()}
+        # Some Telegram names have underscores where Slack uses hyphens
+        # (e.g. set_home vs sethome). Normalize both sides for comparison.
+        def _norm(s: str) -> str:
+            return s.replace("-", "_").replace("__", "_").strip("_")
+
+        slack_norm = {_norm(n) for n in slack_names}
+        tg_norm = {_norm(n) for n in tg_names}
+        missing = tg_norm - slack_norm
+        assert not missing, (
+            f"commands on Telegram but missing from Slack native slashes: {sorted(missing)}"
+        )
+
+
+class TestSlackAppManifest:
+    """Generated Slack app manifest (used by `hermes slack manifest`)."""
+
+    def test_returns_dict(self):
+        m = slack_app_manifest()
+        assert isinstance(m, dict)
+        assert "features" in m
+        assert "slash_commands" in m["features"]
+
+    def test_each_slash_has_required_fields(self):
+        m = slack_app_manifest()
+        for entry in m["features"]["slash_commands"]:
+            assert entry["command"].startswith("/")
+            assert "description" in entry
+            assert "url" in entry
+            # should_escape must be present (Slack defaults to True which
+            # HTML-escapes args — we want the raw text)
+            assert "should_escape" in entry
+
+    def test_btw_is_in_manifest(self):
+        """Regression: /btw must be a native Slack slash, not just a
+        /hermes subcommand."""
+        m = slack_app_manifest()
+        commands = [c["command"] for c in m["features"]["slash_commands"]]
+        assert "/btw" in commands
+
+    def test_custom_request_url(self):
+        m = slack_app_manifest(request_url="https://example.com/slack")
+        for entry in m["features"]["slash_commands"]:
+            assert entry["url"] == "https://example.com/slack"
+
+
 # ---------------------------------------------------------------------------
 # Config-gated gateway commands
 # ---------------------------------------------------------------------------
--- a/tests/hermes_cli/test_fallback_cmd.py
+++ b/tests/hermes_cli/test_fallback_cmd.py
@ -0,0 +1,486 @@
+"""Tests for `hermes fallback` — chain reading, add/remove/clear, legacy migration."""
+from __future__ import annotations
+
+import io
+import types
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+import yaml
+
+
+# ---------------------------------------------------------------------------
+# Shared fixture — isolate HERMES_HOME so save_config writes to tmp_path
+# ---------------------------------------------------------------------------
+
+@pytest.fixture()
+def isolated_home(tmp_path, monkeypatch):
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    home = tmp_path / ".hermes"
+    home.mkdir(exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    return tmp_path
+
+
+def _write_config(home: Path, data: dict) -> None:
+    config_path = home / ".hermes" / "config.yaml"
+    config_path.write_text(yaml.safe_dump(data), encoding="utf-8")
+
+
+def _read_config(home: Path) -> dict:
+    config_path = home / ".hermes" / "config.yaml"
+    return yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
+
+
+# ---------------------------------------------------------------------------
+# _read_chain / _write_chain
+# ---------------------------------------------------------------------------
+
+class TestReadChain:
+    def test_returns_empty_list_when_unset(self):
+        from hermes_cli.fallback_cmd import _read_chain
+        assert _read_chain({}) == []
+
+    def test_reads_new_list_format(self):
+        from hermes_cli.fallback_cmd import _read_chain
+        cfg = {
+            "fallback_providers": [
+                {"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
+                {"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
+            ]
+        }
+        assert _read_chain(cfg) == [
+            {"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
+            {"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
+        ]
+
+    def test_migrates_legacy_single_dict(self):
+        from hermes_cli.fallback_cmd import _read_chain
+        cfg = {"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"}}
+        assert _read_chain(cfg) == [{"provider": "openrouter", "model": "gpt-5.4"}]
+
+    def test_skips_incomplete_entries(self):
+        from hermes_cli.fallback_cmd import _read_chain
+        cfg = {
+            "fallback_providers": [
+                {"provider": "openrouter"},            # missing model
+                {"model": "gpt-5.4"},                  # missing provider
+                {"provider": "nous", "model": "foo"},  # valid
+                "not-a-dict",                          # noise
+            ]
+        }
+        assert _read_chain(cfg) == [{"provider": "nous", "model": "foo"}]
+
+    def test_returns_copies_not_aliases(self):
+        from hermes_cli.fallback_cmd import _read_chain
+        cfg = {"fallback_providers": [{"provider": "nous", "model": "foo"}]}
+        result = _read_chain(cfg)
+        result[0]["provider"] = "mutated"
+        assert cfg["fallback_providers"][0]["provider"] == "nous"
+
+
+# ---------------------------------------------------------------------------
+# _extract_fallback_from_model_cfg
+# ---------------------------------------------------------------------------
+
+class TestExtractFallback:
+    def test_extracts_from_default_field(self):
+        from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
+        model_cfg = {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}
+        assert _extract_fallback_from_model_cfg(model_cfg) == {
+            "provider": "openrouter",
+            "model": "anthropic/claude-sonnet-4.6",
+        }
+
+    def test_extracts_optional_base_url_and_api_mode(self):
+        from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
+        model_cfg = {
+            "provider": "custom",
+            "default": "local-model",
+            "base_url": "http://localhost:11434/v1",
+            "api_mode": "chat_completions",
+        }
+        assert _extract_fallback_from_model_cfg(model_cfg) == {
+            "provider": "custom",
+            "model": "local-model",
+            "base_url": "http://localhost:11434/v1",
+            "api_mode": "chat_completions",
+        }
+
+    def test_returns_none_without_provider(self):
+        from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
+        assert _extract_fallback_from_model_cfg({"default": "foo"}) is None
+
+    def test_returns_none_without_model(self):
+        from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
+        assert _extract_fallback_from_model_cfg({"provider": "openrouter"}) is None
+
+    def test_returns_none_for_non_dict(self):
+        from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
+        assert _extract_fallback_from_model_cfg("plain-string") is None
+        assert _extract_fallback_from_model_cfg(None) is None
+
+
+# ---------------------------------------------------------------------------
+# cmd_fallback_list
+# ---------------------------------------------------------------------------
+
+class TestListCommand:
+    def test_list_empty(self, isolated_home, capsys):
+        _write_config(isolated_home, {})
+        from hermes_cli.fallback_cmd import cmd_fallback_list
+        cmd_fallback_list(types.SimpleNamespace())
+        out = capsys.readouterr().out
+        assert "No fallback providers configured" in out
+        assert "hermes fallback add" in out
+
+    def test_list_with_entries(self, isolated_home, capsys):
+        _write_config(isolated_home, {
+            "model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
+            "fallback_providers": [
+                {"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
+                {"provider": "nous", "model": "Hermes-4"},
+            ],
+        })
+        from hermes_cli.fallback_cmd import cmd_fallback_list
+        cmd_fallback_list(types.SimpleNamespace())
+        out = capsys.readouterr().out
+        assert "Fallback chain (2 entries)" in out
+        assert "anthropic/claude-sonnet-4.6" in out
+        assert "Hermes-4" in out
+        # Primary should be shown too
+        assert "claude-sonnet-4-6" in out
+
+    def test_list_migrates_legacy_for_display(self, isolated_home, capsys):
+        _write_config(isolated_home, {
+            "fallback_model": {"provider": "openrouter", "model": "gpt-5.4"},
+        })
+        from hermes_cli.fallback_cmd import cmd_fallback_list
+        cmd_fallback_list(types.SimpleNamespace())
+        out = capsys.readouterr().out
+        assert "1 entry" in out
+        assert "gpt-5.4" in out
+
+
+# ---------------------------------------------------------------------------
+# cmd_fallback_add — mock select_provider_and_model
+# ---------------------------------------------------------------------------
+
+class TestAddCommand:
+    def test_add_appends_new_entry(self, isolated_home, capsys):
+        _write_config(isolated_home, {
+            "model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
+        })
+
+        def fake_picker(args=None):
+            # Simulate what the real picker does: writes the selection to config["model"]
+            from hermes_cli.config import load_config, save_config
+            cfg = load_config()
+            cfg["model"] = {
+                "provider": "openrouter",
+                "default": "anthropic/claude-sonnet-4.6",
+                "base_url": "https://openrouter.ai/api/v1",
+                "api_mode": "chat_completions",
+            }
+            save_config(cfg)
+
+        with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
+                patch("hermes_cli.main._require_tty"):
+            from hermes_cli.fallback_cmd import cmd_fallback_add
+            cmd_fallback_add(types.SimpleNamespace())
+
+        cfg = _read_config(isolated_home)
+        # Primary is preserved
+        assert cfg["model"]["provider"] == "anthropic"
+        assert cfg["model"]["default"] == "claude-sonnet-4-6"
+        # Fallback was appended
+        assert cfg["fallback_providers"] == [
+            {
+                "provider": "openrouter",
+                "model": "anthropic/claude-sonnet-4.6",
+                "base_url": "https://openrouter.ai/api/v1",
+                "api_mode": "chat_completions",
+            }
+        ]
+        out = capsys.readouterr().out
+        assert "Added fallback" in out
+
+    def test_add_rejects_duplicate(self, isolated_home, capsys):
+        _write_config(isolated_home, {
+            "model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
+            "fallback_providers": [
+                {"provider": "openrouter", "model": "gpt-5.4"},
+            ],
+        })
+
+        def fake_picker(args=None):
+            from hermes_cli.config import load_config, save_config
+            cfg = load_config()
+            cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
+            save_config(cfg)
+
+        with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
+                patch("hermes_cli.main._require_tty"):
+            from hermes_cli.fallback_cmd import cmd_fallback_add
+            cmd_fallback_add(types.SimpleNamespace())
+
+        cfg = _read_config(isolated_home)
+        # Should still have exactly one entry
+        assert len(cfg["fallback_providers"]) == 1
+        out = capsys.readouterr().out
+        assert "already in the fallback chain" in out
+
+    def test_add_rejects_same_as_primary(self, isolated_home, capsys):
+        _write_config(isolated_home, {
+            "model": {"provider": "openrouter", "default": "gpt-5.4"},
+        })
+
+        def fake_picker(args=None):
+            # User picks the same thing that's already the primary
+            from hermes_cli.config import load_config, save_config
+            cfg = load_config()
+            cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
+            save_config(cfg)
+
+        with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
+                patch("hermes_cli.main._require_tty"):
+            from hermes_cli.fallback_cmd import cmd_fallback_add
+            cmd_fallback_add(types.SimpleNamespace())
+
+        cfg = _read_config(isolated_home)
+        assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
+        out = capsys.readouterr().out
+        assert "matches the current primary" in out
+
+    def test_add_preserves_primary_when_picker_changes_it(self, isolated_home):
+        """The picker mutates config["model"]; fallback_add must restore the primary."""
+        _write_config(isolated_home, {
+            "model": {
+                "provider": "anthropic",
+                "default": "claude-sonnet-4-6",
+                "base_url": "https://api.anthropic.com",
+                "api_mode": "anthropic_messages",
+            },
+        })
+
+        def fake_picker(args=None):
+            from hermes_cli.config import load_config, save_config
+            cfg = load_config()
+            cfg["model"] = {
+                "provider": "openrouter",
+                "default": "anthropic/claude-sonnet-4.6",
+                "base_url": "https://openrouter.ai/api/v1",
+                "api_mode": "chat_completions",
+            }
+            save_config(cfg)
+
+        with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
+                patch("hermes_cli.main._require_tty"):
+            from hermes_cli.fallback_cmd import cmd_fallback_add
+            cmd_fallback_add(types.SimpleNamespace())
+
+        cfg = _read_config(isolated_home)
+        # Primary exactly as it was
+        assert cfg["model"]["provider"] == "anthropic"
+        assert cfg["model"]["default"] == "claude-sonnet-4-6"
+        assert cfg["model"]["base_url"] == "https://api.anthropic.com"
+        assert cfg["model"]["api_mode"] == "anthropic_messages"
+        # Fallback added
+        assert len(cfg["fallback_providers"]) == 1
+        assert cfg["fallback_providers"][0]["provider"] == "openrouter"
+
+    def test_add_noop_when_picker_cancelled(self, isolated_home, capsys):
+        _write_config(isolated_home, {
+            "model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
+        })
+
+        def fake_picker(args=None):
+            # User cancelled — no change to config
+            pass
+
+        with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
+                patch("hermes_cli.main._require_tty"):
+            from hermes_cli.fallback_cmd import cmd_fallback_add
+            cmd_fallback_add(types.SimpleNamespace())
+
+        cfg = _read_config(isolated_home)
+        assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
+        out = capsys.readouterr().out
+        # Either "No fallback added" (picker fully cancelled) or "matches the current primary"
+        # (picker left config untouched) — both indicate a non-add outcome.
+        assert ("No fallback added" in out) or ("matches the current primary" in out)
+
+    def test_add_noop_when_picker_clears_model(self, isolated_home, capsys):
+        """Simulate picker explicitly clearing model.default (unusual but possible)."""
+        _write_config(isolated_home, {
+            "model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
+        })
+
+        def fake_picker(args=None):
+            from hermes_cli.config import load_config, save_config
+            cfg = load_config()
+            cfg["model"] = {"provider": "", "default": ""}
+            save_config(cfg)
+
+        with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
+                patch("hermes_cli.main._require_tty"):
+            from hermes_cli.fallback_cmd import cmd_fallback_add
+            cmd_fallback_add(types.SimpleNamespace())
+
+        out = capsys.readouterr().out
+        assert "No fallback added" in out
+
+
+# ---------------------------------------------------------------------------
+# cmd_fallback_remove
+# ---------------------------------------------------------------------------
+
+class TestRemoveCommand:
+    def test_remove_empty_chain(self, isolated_home, capsys):
+        _write_config(isolated_home, {})
+        from hermes_cli.fallback_cmd import cmd_fallback_remove
+        cmd_fallback_remove(types.SimpleNamespace())
+        out = capsys.readouterr().out
+        assert "nothing to remove" in out
+
+    def test_remove_selected_entry(self, isolated_home, capsys):
+        _write_config(isolated_home, {
+            "fallback_providers": [
+                {"provider": "openrouter", "model": "gpt-5.4"},
+                {"provider": "nous", "model": "Hermes-4"},
+                {"provider": "anthropic", "model": "claude-sonnet-4-6"},
+            ],
+        })
+
+        # Picker returns index 1 (the middle entry, "nous / Hermes-4")
+        with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
+            from hermes_cli.fallback_cmd import cmd_fallback_remove
+            cmd_fallback_remove(types.SimpleNamespace())
+
+        cfg = _read_config(isolated_home)
+        assert cfg["fallback_providers"] == [
+            {"provider": "openrouter", "model": "gpt-5.4"},
+            {"provider": "anthropic", "model": "claude-sonnet-4-6"},
+        ]
+        out = capsys.readouterr().out
+        assert "Removed fallback" in out
+        assert "Hermes-4" in out
+
+    def test_remove_cancel_keeps_chain(self, isolated_home):
+        _write_config(isolated_home, {
+            "fallback_providers": [
+                {"provider": "openrouter", "model": "gpt-5.4"},
+            ],
+        })
+
+        # Cancel = last item (index == len(chain) == 1 in our menu)
+        with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
+            from hermes_cli.fallback_cmd import cmd_fallback_remove
+            cmd_fallback_remove(types.SimpleNamespace())
+
+        cfg = _read_config(isolated_home)
+        assert len(cfg["fallback_providers"]) == 1
+
+
+# ---------------------------------------------------------------------------
+# cmd_fallback_clear
+# ---------------------------------------------------------------------------
+
+class TestClearCommand:
+    def test_clear_empty_chain(self, isolated_home, capsys):
+        _write_config(isolated_home, {})
+        from hermes_cli.fallback_cmd import cmd_fallback_clear
+        cmd_fallback_clear(types.SimpleNamespace())
+        out = capsys.readouterr().out
+        assert "nothing to clear" in out
+
+    def test_clear_with_confirmation(self, isolated_home, capsys, monkeypatch):
+        _write_config(isolated_home, {
+            "fallback_providers": [
+                {"provider": "openrouter", "model": "gpt-5.4"},
+                {"provider": "nous", "model": "Hermes-4"},
+            ],
+        })
+        monkeypatch.setattr("builtins.input", lambda *a, **kw: "y")
+        from hermes_cli.fallback_cmd import cmd_fallback_clear
+        cmd_fallback_clear(types.SimpleNamespace())
+
+        cfg = _read_config(isolated_home)
+        assert cfg.get("fallback_providers") == []
+        out = capsys.readouterr().out
+        assert "Fallback chain cleared" in out
+
+    def test_clear_cancelled(self, isolated_home, monkeypatch):
+        _write_config(isolated_home, {
+            "fallback_providers": [{"provider": "openrouter", "model": "gpt-5.4"}],
+        })
+        monkeypatch.setattr("builtins.input", lambda *a, **kw: "n")
+        from hermes_cli.fallback_cmd import cmd_fallback_clear
+        cmd_fallback_clear(types.SimpleNamespace())
+
+        cfg = _read_config(isolated_home)
+        assert len(cfg["fallback_providers"]) == 1
+
+
+# ---------------------------------------------------------------------------
+# cmd_fallback dispatcher
+# ---------------------------------------------------------------------------
+
+class TestDispatcher:
+    def test_no_subcommand_lists(self, isolated_home, capsys):
+        _write_config(isolated_home, {})
+        from hermes_cli.fallback_cmd import cmd_fallback
+        cmd_fallback(types.SimpleNamespace(fallback_command=None))
+        out = capsys.readouterr().out
+        assert "No fallback providers configured" in out
+
+    def test_list_alias(self, isolated_home, capsys):
+        _write_config(isolated_home, {})
+        from hermes_cli.fallback_cmd import cmd_fallback
+        cmd_fallback(types.SimpleNamespace(fallback_command="ls"))
+        out = capsys.readouterr().out
+        assert "No fallback providers configured" in out
+
+    def test_remove_alias(self, isolated_home, capsys):
+        _write_config(isolated_home, {})
+        from hermes_cli.fallback_cmd import cmd_fallback
+        cmd_fallback(types.SimpleNamespace(fallback_command="rm"))
+        out = capsys.readouterr().out
+        assert "nothing to remove" in out
+
+    def test_unknown_subcommand_exits(self, isolated_home):
+        _write_config(isolated_home, {})
+        from hermes_cli.fallback_cmd import cmd_fallback
+        with pytest.raises(SystemExit):
+            cmd_fallback(types.SimpleNamespace(fallback_command="nope"))
+
+
+# ---------------------------------------------------------------------------
+# argparse wiring — verify the subparser is registered
+# ---------------------------------------------------------------------------
+
+class TestArgparseWiring:
+    """Verify `hermes fallback` is wired into main.py's argparse tree.
+
+    main() builds the parser inline, so we invoke main([...]) via subprocess
+    with --help to introspect registered subcommands without side effects.
+    """
+
+    def test_fallback_help_lists_subcommands(self):
+        import subprocess
+        import sys
+        result = subprocess.run(
+            [sys.executable, "-m", "hermes_cli.main", "fallback", "--help"],
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+        # --help exits 0
+        assert result.returncode == 0, f"stderr: {result.stderr}"
+        out = result.stdout + result.stderr
+        # All four subcommands should appear in help
+        assert "list" in out
+        assert "add" in out
+        assert "remove" in out
+        assert "clear" in out
--- a/tests/hermes_cli/test_model_catalog.py
+++ b/tests/hermes_cli/test_model_catalog.py
@ -0,0 +1,284 @@
+"""Tests for hermes_cli.model_catalog — remote manifest fetch + cache + fallback."""
+
+from __future__ import annotations
+
+import json
+import time
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+
+@pytest.fixture
+def isolated_home(tmp_path, monkeypatch):
+    """Isolate HERMES_HOME + reset any module-level catalog cache per test."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_HOME", str(home))
+
+    # Force a fresh catalog module state for each test.
+    import importlib
+    from hermes_cli import model_catalog
+    importlib.reload(model_catalog)
+    yield home
+    model_catalog.reset_cache()
+
+
+def _valid_manifest() -> dict:
+    return {
+        "version": 1,
+        "updated_at": "2026-04-25T22:00:00Z",
+        "metadata": {"source": "test"},
+        "providers": {
+            "openrouter": {
+                "metadata": {"display_name": "OpenRouter"},
+                "models": [
+                    {"id": "anthropic/claude-opus-4.7", "description": "recommended"},
+                    {"id": "openai/gpt-5.4", "description": ""},
+                    {"id": "openrouter/elephant-alpha", "description": "free"},
+                ],
+            },
+            "nous": {
+                "metadata": {"display_name": "Nous Portal"},
+                "models": [
+                    {"id": "anthropic/claude-opus-4.7"},
+                    {"id": "moonshotai/kimi-k2.6"},
+                ],
+            },
+        },
+    }
+
+
+class TestValidation:
+    def test_accepts_well_formed_manifest(self, isolated_home):
+        from hermes_cli.model_catalog import _validate_manifest
+        assert _validate_manifest(_valid_manifest()) is True
+
+    def test_rejects_non_dict(self, isolated_home):
+        from hermes_cli.model_catalog import _validate_manifest
+        assert _validate_manifest("string") is False
+        assert _validate_manifest([]) is False
+        assert _validate_manifest(None) is False
+
+    def test_rejects_missing_version(self, isolated_home):
+        from hermes_cli.model_catalog import _validate_manifest
+        m = _valid_manifest()
+        del m["version"]
+        assert _validate_manifest(m) is False
+
+    def test_rejects_future_version(self, isolated_home):
+        from hermes_cli.model_catalog import _validate_manifest
+        m = _valid_manifest()
+        m["version"] = 999
+        assert _validate_manifest(m) is False
+
+    def test_rejects_missing_providers(self, isolated_home):
+        from hermes_cli.model_catalog import _validate_manifest
+        m = _valid_manifest()
+        del m["providers"]
+        assert _validate_manifest(m) is False
+
+    def test_rejects_malformed_model_entry(self, isolated_home):
+        from hermes_cli.model_catalog import _validate_manifest
+        m = _valid_manifest()
+        m["providers"]["openrouter"]["models"][0] = {"id": ""}  # empty id
+        assert _validate_manifest(m) is False
+
+    def test_rejects_non_string_model_id(self, isolated_home):
+        from hermes_cli.model_catalog import _validate_manifest
+        m = _valid_manifest()
+        m["providers"]["openrouter"]["models"][0] = {"id": 42}
+        assert _validate_manifest(m) is False
+
+
+class TestFetchSuccess:
+    def test_fetch_and_cache_writes_disk(self, isolated_home):
+        from hermes_cli import model_catalog
+        manifest = _valid_manifest()
+        with patch.object(
+            model_catalog, "_fetch_manifest", return_value=manifest
+        ) as fetch:
+            result = model_catalog.get_catalog(force_refresh=True)
+
+        assert result == manifest
+        assert fetch.called
+
+        cache_file = model_catalog._cache_path()
+        assert cache_file.exists()
+        with open(cache_file) as fh:
+            assert json.load(fh) == manifest
+
+    def test_second_call_uses_in_process_cache(self, isolated_home):
+        from hermes_cli import model_catalog
+        manifest = _valid_manifest()
+        with patch.object(
+            model_catalog, "_fetch_manifest", return_value=manifest
+        ) as fetch:
+            model_catalog.get_catalog(force_refresh=True)
+            model_catalog.get_catalog()  # should not hit network again
+        assert fetch.call_count == 1
+
+    def test_force_refresh_always_refetches(self, isolated_home):
+        from hermes_cli import model_catalog
+        manifest = _valid_manifest()
+        with patch.object(
+            model_catalog, "_fetch_manifest", return_value=manifest
+        ) as fetch:
+            model_catalog.get_catalog(force_refresh=True)
+            model_catalog.get_catalog(force_refresh=True)
+        assert fetch.call_count == 2
+
+
+class TestFetchFailure:
+    def test_network_failure_returns_empty_when_no_cache(self, isolated_home):
+        from hermes_cli import model_catalog
+        with patch.object(model_catalog, "_fetch_manifest", return_value=None):
+            result = model_catalog.get_catalog(force_refresh=True)
+        assert result == {}
+
+    def test_network_failure_falls_back_to_disk_cache(self, isolated_home):
+        from hermes_cli import model_catalog
+        # Prime disk cache with a fresh copy.
+        manifest = _valid_manifest()
+        with patch.object(model_catalog, "_fetch_manifest", return_value=manifest):
+            model_catalog.get_catalog(force_refresh=True)
+
+        # Now wipe in-process cache and simulate network failure on refetch.
+        model_catalog.reset_cache()
+        with patch.object(model_catalog, "_fetch_manifest", return_value=None):
+            result = model_catalog.get_catalog(force_refresh=True)
+
+        assert result == manifest
+
+    def test_fetch_failure_falls_back_to_stale_cache(self, isolated_home):
+        from hermes_cli import model_catalog
+        manifest = _valid_manifest()
+        # Write stale cache directly (mtime in the past).
+        cache = model_catalog._cache_path()
+        cache.parent.mkdir(parents=True, exist_ok=True)
+        with open(cache, "w") as fh:
+            json.dump(manifest, fh)
+        old = time.time() - 30 * 24 * 3600  # 30 days ago
+        import os as _os
+        _os.utime(cache, (old, old))
+
+        with patch.object(model_catalog, "_fetch_manifest", return_value=None):
+            result = model_catalog.get_catalog()
+
+        # Stale cache is better than nothing.
+        assert result == manifest
+
+
+class TestCuratedAccessors:
+    def test_openrouter_returns_tuples(self, isolated_home):
+        from hermes_cli import model_catalog
+        with patch.object(
+            model_catalog, "_fetch_manifest", return_value=_valid_manifest()
+        ):
+            result = model_catalog.get_curated_openrouter_models()
+        assert result == [
+            ("anthropic/claude-opus-4.7", "recommended"),
+            ("openai/gpt-5.4", ""),
+            ("openrouter/elephant-alpha", "free"),
+        ]
+
+    def test_nous_returns_ids(self, isolated_home):
+        from hermes_cli import model_catalog
+        with patch.object(
+            model_catalog, "_fetch_manifest", return_value=_valid_manifest()
+        ):
+            result = model_catalog.get_curated_nous_models()
+        assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
+
+    def test_openrouter_returns_none_when_catalog_empty(self, isolated_home):
+        from hermes_cli import model_catalog
+        with patch.object(model_catalog, "_fetch_manifest", return_value=None):
+            assert model_catalog.get_curated_openrouter_models() is None
+
+    def test_nous_returns_none_when_catalog_empty(self, isolated_home):
+        from hermes_cli import model_catalog
+        with patch.object(model_catalog, "_fetch_manifest", return_value=None):
+            assert model_catalog.get_curated_nous_models() is None
+
+
+class TestDisabled:
+    def test_disabled_config_short_circuits(self, isolated_home):
+        from hermes_cli import model_catalog
+        with patch.object(
+            model_catalog,
+            "_load_catalog_config",
+            return_value={
+                "enabled": False,
+                "url": "http://ignored",
+                "ttl_hours": 24.0,
+                "providers": {},
+            },
+        ):
+            with patch.object(model_catalog, "_fetch_manifest") as fetch:
+                result = model_catalog.get_catalog()
+        assert result == {}
+        fetch.assert_not_called()
+
+
+class TestProviderOverride:
+    def test_override_url_takes_precedence(self, isolated_home):
+        from hermes_cli import model_catalog
+
+        override_payload = {
+            "version": 1,
+            "providers": {
+                "openrouter": {
+                    "models": [
+                        {"id": "override/model", "description": "custom"},
+                    ]
+                }
+            },
+        }
+
+        def fake_fetch(url, timeout):
+            if "override" in url:
+                return override_payload
+            return _valid_manifest()
+
+        with patch.object(
+            model_catalog,
+            "_load_catalog_config",
+            return_value={
+                "enabled": True,
+                "url": "http://master",
+                "ttl_hours": 24.0,
+                "providers": {"openrouter": {"url": "http://override"}},
+            },
+        ):
+            with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
+                result = model_catalog.get_curated_openrouter_models()
+
+        assert result == [("override/model", "custom")]
+
+
+class TestIntegrationWithModelsModule:
+    """Exercise the fallback paths via the real callers in hermes_cli.models."""
+
+    def test_curated_nous_ids_falls_back_to_hardcoded_on_empty_catalog(
+        self, isolated_home
+    ):
+        from hermes_cli import model_catalog
+        from hermes_cli.models import get_curated_nous_model_ids, _PROVIDER_MODELS
+
+        with patch.object(model_catalog, "_fetch_manifest", return_value=None):
+            result = get_curated_nous_model_ids()
+
+        assert result == list(_PROVIDER_MODELS["nous"])
+
+    def test_curated_nous_ids_prefers_manifest(self, isolated_home):
+        from hermes_cli import model_catalog
+        from hermes_cli.models import get_curated_nous_model_ids
+
+        with patch.object(
+            model_catalog, "_fetch_manifest", return_value=_valid_manifest()
+        ):
+            result = get_curated_nous_model_ids()
+
+        assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
--- a/tests/hermes_cli/test_skills_hub.py
+++ b/tests/hermes_cli/test_skills_hub.py
@ -56,7 +56,7 @@ def three_source_env(monkeypatch, hub_env):
    import tools.skills_tool as skills_tool

    monkeypatch.setattr(hub, "HubLockFile", lambda: _DummyLockFile([_HUB_ENTRY]))
-    monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: list(_ALL_THREE_SKILLS))
+    monkeypatch.setattr(skills_tool, "_find_all_skills", lambda **_kwargs: list(_ALL_THREE_SKILLS))
    monkeypatch.setattr(skills_sync, "_read_manifest", lambda: dict(_BUILTIN_MANIFEST))

    return hub_env
@ -107,7 +107,7 @@ def test_do_list_initializes_hub_dir(monkeypatch, hub_env):
    import tools.skills_sync as skills_sync
    import tools.skills_tool as skills_tool

-    monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: [])
+    monkeypatch.setattr(skills_tool, "_find_all_skills", lambda **_kwargs: [])
    monkeypatch.setattr(skills_sync, "_read_manifest", lambda: {})

    hub_dir = hub_env
@ -154,6 +154,74 @@ def test_do_list_filter_builtin(three_source_env):
    assert "local-skill" not in output


+def test_do_list_renders_status_column(three_source_env, monkeypatch):
+    """Every list row should carry an enabled/disabled status (new in PR that
+    answered Mr Mochizuki's 'I just want to see what's live' question)."""
+    from agent import skill_utils
+
+    monkeypatch.setattr(skill_utils, "get_disabled_skill_names", lambda platform=None: set())
+    output = _capture()
+
+    assert "Status" in output
+    assert "enabled" in output.lower()
+    # Summary counts enabled skills.
+    assert "3 enabled, 0 disabled" in output
+
+
+def test_do_list_marks_disabled_skills(three_source_env, monkeypatch):
+    from agent import skill_utils
+
+    # Simulate `skills.disabled: [hub-skill]` in config.
+    monkeypatch.setattr(
+        skill_utils, "get_disabled_skill_names",
+        lambda platform=None: {"hub-skill"},
+    )
+    output = _capture()
+
+    # Row still appears (no --enabled-only), but marked disabled
+    assert "hub-skill" in output
+    assert "disabled" in output.lower()
+    assert "2 enabled, 1 disabled" in output
+
+
+def test_do_list_enabled_only_hides_disabled(three_source_env, monkeypatch):
+    from agent import skill_utils
+
+    monkeypatch.setattr(
+        skill_utils, "get_disabled_skill_names",
+        lambda platform=None: {"hub-skill"},
+    )
+    sink = StringIO()
+    console = Console(file=sink, force_terminal=False, color_system=None)
+    do_list(enabled_only=True, console=console)
+    output = sink.getvalue()
+
+    assert "hub-skill" not in output
+    assert "builtin-skill" in output
+    assert "local-skill" in output
+    assert "enabled only" in output.lower()
+    assert "2 enabled shown" in output
+
+
+def test_do_list_platform_env_is_ignored(three_source_env, monkeypatch):
+    """`hermes skills list` reads the active profile's config via
+    HERMES_HOME (swapped by -p), so it must NOT pass a platform arg to
+    ``get_disabled_skill_names`` — otherwise per-platform overrides
+    would silently leak in from HERMES_PLATFORM env."""
+    from agent import skill_utils
+
+    seen = {}
+
+    def _fake(platform=None):
+        seen["platform"] = platform
+        return set()
+
+    monkeypatch.setattr(skill_utils, "get_disabled_skill_names", _fake)
+    _capture()
+
+    assert seen["platform"] is None
+
+
 def test_do_check_reports_available_updates(monkeypatch):
    output = _capture_check(monkeypatch, [
        {"name": "hub-skill", "source": "skills.sh", "status": "update_available"},
--- a/tests/run_agent/test_review_prompt_class_first.py
+++ b/tests/run_agent/test_review_prompt_class_first.py
@ -0,0 +1,78 @@
+"""Behavior tests for the class-first skill review prompts.
+
+The skill review / combined review prompts steer the background review agent
+toward generalizing existing skills rather than accumulating near-duplicates.
+These tests assert the behavioral *instructions* are present — they do NOT
+snapshot the full prompt text (change-detector).
+"""
+
+from run_agent import AIAgent
+
+
+def test_skill_review_prompt_instructs_survey_first():
+    """Prompt must tell the reviewer to list existing skills before deciding."""
+    prompt = AIAgent._SKILL_REVIEW_PROMPT
+    assert "skills_list" in prompt, "must instruct the reviewer to call skills_list"
+    assert "skill_view" in prompt, "must instruct the reviewer to skill_view candidates"
+    assert "SURVEY" in prompt, "must name the survey step explicitly"
+
+
+def test_skill_review_prompt_is_class_first():
+    """Prompt must steer toward the CLASS of task, not the specific task."""
+    prompt = AIAgent._SKILL_REVIEW_PROMPT
+    assert "CLASS" in prompt, "must tell the reviewer to think about the task class"
+    assert "class level" in prompt, "must anchor naming at the class level"
+
+
+def test_skill_review_prompt_prefers_updating_existing():
+    """Prompt must prefer generalizing an existing skill over creating a new one."""
+    prompt = AIAgent._SKILL_REVIEW_PROMPT
+    assert "PREFER GENERALIZING" in prompt or "PREFER UPDATING" in prompt, (
+        "must state the update-over-create preference"
+    )
+    assert "ONLY CREATE A NEW SKILL" in prompt, (
+        "must gate new-skill creation behind a last-resort clause"
+    )
+
+
+def test_skill_review_prompt_flags_overlap_for_followup():
+    """Prompt must ask the reviewer to note overlapping skills for future review."""
+    prompt = AIAgent._SKILL_REVIEW_PROMPT
+    assert "overlap" in prompt.lower(), "must mention the overlap-flagging protocol"
+
+
+def test_skill_review_prompt_preserves_opt_out_clause():
+    """The 'Nothing to save.' escape clause must remain."""
+    prompt = AIAgent._SKILL_REVIEW_PROMPT
+    assert "Nothing to save." in prompt
+
+
+def test_combined_review_prompt_keeps_memory_section():
+    """Combined prompt must still cover memory review."""
+    prompt = AIAgent._COMBINED_REVIEW_PROMPT
+    assert "**Memory**" in prompt
+    assert "memory tool" in prompt
+
+
+def test_combined_review_prompt_skills_section_is_class_first():
+    """The **Skills** half of the combined prompt must follow the same protocol."""
+    prompt = AIAgent._COMBINED_REVIEW_PROMPT
+    assert "**Skills**" in prompt
+    assert "SURVEY" in prompt
+    assert "CLASS" in prompt
+    assert "skills_list" in prompt
+    assert "ONLY CREATE A NEW SKILL" in prompt
+
+
+def test_combined_review_prompt_preserves_opt_out_clause():
+    prompt = AIAgent._COMBINED_REVIEW_PROMPT
+    assert "Nothing to save." in prompt
+
+
+def test_memory_review_prompt_unchanged_in_structure():
+    """Memory-only review prompt stays focused on user facts — not touched by this change."""
+    prompt = AIAgent._MEMORY_REVIEW_PROMPT
+    # Guardrails: the memory-only prompt must NOT mention skills/surveys.
+    assert "skills_list" not in prompt
+    assert "SURVEY" not in prompt
+    assert "memory tool" in prompt
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@ -1485,6 +1485,48 @@ class TestListSessionsRich:
        assert "\n" not in sessions[0]["preview"]
        assert "Line one Line two" in sessions[0]["preview"]

+    def test_branch_session_visible_in_list(self, db):
+        """Branch sessions (parent ended with 'branched') must appear in list_sessions_rich."""
+        db.create_session("parent", "cli")
+        db.end_session("parent", "branched")
+        db.create_session("branch", "cli", parent_session_id="parent")
+        db.append_message("branch", "user", "Exploring the alternative approach")
+
+        sessions = db.list_sessions_rich()
+        ids = [s["id"] for s in sessions]
+        assert "branch" in ids, "Branch session should be visible in default list"
+
+    def test_subagent_session_still_hidden(self, db):
+        """Sub-agent children (parent NOT ended with 'branched') remain hidden."""
+        db.create_session("root", "cli")
+        db.create_session("delegate", "cli", parent_session_id="root")
+
+        sessions = db.list_sessions_rich()
+        ids = [s["id"] for s in sessions]
+        assert "delegate" not in ids, "Delegate sub-agent should not appear in default list"
+        assert "root" in ids
+
+    def test_compression_child_still_hidden(self, db):
+        """Compression continuation sessions remain hidden (parent ended with 'compression')."""
+        import time as _time
+        t0 = _time.time()
+        db.create_session("root", "cli")
+        db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0, "root"))
+        db._conn.execute(
+            "UPDATE sessions SET ended_at=?, end_reason='compression' WHERE id=?",
+            (t0 + 1800, "root"),
+        )
+        db._conn.commit()
+        db.create_session("continuation", "cli", parent_session_id="root")
+        db._conn.execute(
+            "UPDATE sessions SET started_at=? WHERE id=?", (t0 + 1801, "continuation")
+        )
+        db._conn.commit()
+
+        sessions = db.list_sessions_rich(project_compression_tips=False)
+        ids = [s["id"] for s in sessions]
+        assert "continuation" not in ids, "Compression continuation should stay hidden"
+

 class TestCompressionChainProjection:
    """Tests for lineage-aware list_sessions_rich — compressed conversations
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@ -1835,3 +1835,112 @@ def test_model_options_propagates_list_exception(monkeypatch):
    assert "error" in resp
    assert resp["error"]["code"] == 5033
    assert "catalog blew up" in resp["error"]["message"]
+
+
+# ---------------------------------------------------------------------------
+# prompt.submit — auto-title
+# ---------------------------------------------------------------------------
+
+class _ImmediateThread:
+    """Runs the target callable synchronously so assertions can follow."""
+
+    def __init__(self, target=None, daemon=None):
+        self._target = target
+
+    def start(self):
+        self._target()
+
+
+def test_prompt_submit_auto_titles_session_on_complete(monkeypatch):
+    """maybe_auto_title is called after a successful (complete) prompt."""
+
+    class _Agent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            return {
+                "final_response": "Rome was founded in 753 BC.",
+                "messages": [
+                    {"role": "user", "content": "Tell me about Rome"},
+                    {"role": "assistant", "content": "Rome was founded in 753 BC."},
+                ],
+            }
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+    monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
+    monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
+    monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
+    monkeypatch.setattr(server, "_get_db", lambda: None)
+
+    with patch("agent.title_generator.maybe_auto_title") as mock_title:
+        server.handle_request(
+            {
+                "id": "1",
+                "method": "prompt.submit",
+                "params": {"session_id": "sid", "text": "Tell me about Rome"},
+            }
+        )
+
+    mock_title.assert_called_once()
+    args = mock_title.call_args.args
+    assert args[1] == "session-key"
+    assert args[2] == "Tell me about Rome"
+    assert args[3] == "Rome was founded in 753 BC."
+
+
+def test_prompt_submit_skips_auto_title_when_interrupted(monkeypatch):
+    """maybe_auto_title must NOT be called when the agent was interrupted."""
+
+    class _Agent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            return {
+                "final_response": "partial answer",
+                "interrupted": True,
+                "messages": [],
+            }
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+    monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
+    monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
+    monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
+    monkeypatch.setattr(server, "_get_db", lambda: None)
+
+    with patch("agent.title_generator.maybe_auto_title") as mock_title:
+        server.handle_request(
+            {
+                "id": "1",
+                "method": "prompt.submit",
+                "params": {"session_id": "sid", "text": "Tell me about Rome"},
+            }
+        )
+
+    mock_title.assert_not_called()
+
+
+def test_prompt_submit_skips_auto_title_when_response_empty(monkeypatch):
+    """maybe_auto_title must NOT be called when the agent returns an empty reply."""
+
+    class _Agent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            return {
+                "final_response": "",
+                "messages": [],
+            }
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+    monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
+    monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
+    monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
+    monkeypatch.setattr(server, "_get_db", lambda: None)
+
+    with patch("agent.title_generator.maybe_auto_title") as mock_title:
+        server.handle_request(
+            {
+                "id": "1",
+                "method": "prompt.submit",
+                "params": {"session_id": "sid", "text": "Tell me about Rome"},
+            }
+        )
+
+    mock_title.assert_not_called()
--- a/tests/tools/test_browser_hybrid_routing.py
+++ b/tests/tools/test_browser_hybrid_routing.py
@ -0,0 +1,248 @@
+"""Tests for hybrid browser-backend routing (LAN/localhost auto-local).
+
+When a cloud browser provider (Browserbase / Browser-Use / Firecrawl) is
+configured globally, ``browser.auto_local_for_private_urls`` (default True)
+causes ``browser_navigate`` to transparently spawn a local Chromium sidecar
+for URLs whose host resolves to a private/loopback/LAN address, while
+public URLs continue to hit the cloud session in the same conversation.
+
+These tests cover the routing decision layer — session_key selection,
+sidecar detection, last-active-session tracking, and the config toggle.
+The downstream session creation is covered by test_browser_cloud_fallback.py.
+"""
+from unittest.mock import Mock
+
+import pytest
+
+import tools.browser_tool as browser_tool
+
+
+@pytest.fixture(autouse=True)
+def _reset_routing_state(monkeypatch):
+    """Clear module-level caches so each test starts clean."""
+    monkeypatch.setattr(browser_tool, "_active_sessions", {})
+    monkeypatch.setattr(browser_tool, "_last_active_session_key", {})
+    monkeypatch.setattr(browser_tool, "_cached_cloud_provider", None)
+    monkeypatch.setattr(browser_tool, "_cloud_provider_resolved", False)
+    monkeypatch.setattr(browser_tool, "_auto_local_for_private_urls_resolved", False)
+    monkeypatch.setattr(browser_tool, "_cached_auto_local_for_private_urls", True)
+    monkeypatch.setattr(browser_tool, "_start_browser_cleanup_thread", lambda: None)
+    monkeypatch.setattr(browser_tool, "_update_session_activity", lambda t: None)
+    # Default: no CDP override, no Camofox
+    monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
+    monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False)
+
+
+class TestNavigationSessionKey:
+    """Tests for _navigation_session_key URL-based routing decisions."""
+
+    def test_public_url_uses_bare_task_id(self, monkeypatch):
+        """Public URL with cloud provider configured → bare task_id (cloud)."""
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        key = browser_tool._navigation_session_key("default", "https://github.com/x/y")
+        assert key == "default"
+
+    def test_localhost_routes_to_local_sidecar(self, monkeypatch):
+        """``localhost`` URL → ``::local`` suffix when cloud configured + flag on."""
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
+        assert key == "default::local"
+
+    def test_loopback_ipv4_routes_to_local_sidecar(self, monkeypatch):
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        key = browser_tool._navigation_session_key("default", "http://127.0.0.1:8080/")
+        assert key == "default::local"
+
+    def test_rfc1918_lan_routes_to_local_sidecar(self, monkeypatch):
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        key = browser_tool._navigation_session_key("default", "http://192.168.1.50:8000/")
+        assert key == "default::local"
+
+    def test_ipv6_loopback_routes_to_local_sidecar(self, monkeypatch):
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        key = browser_tool._navigation_session_key("default", "http://[::1]:3000/")
+        assert key == "default::local"
+
+    def test_public_ip_literal_uses_bare_task_id(self, monkeypatch):
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        key = browser_tool._navigation_session_key("default", "https://8.8.8.8/")
+        assert key == "default"
+
+    def test_mdns_local_hostname_routes_to_sidecar(self, monkeypatch):
+        """``*.local`` mDNS / ``*.lan`` / ``*.internal`` hostnames route to sidecar."""
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        for host in ("raspberrypi.local", "printer.lan", "db.internal"):
+            key = browser_tool._navigation_session_key("default", f"http://{host}/")
+            assert key == "default::local", f"host {host!r} did not route to sidecar"
+
+    def test_no_cloud_provider_stays_on_bare_task_id(self, monkeypatch):
+        """When cloud provider is not configured, no hybrid routing happens."""
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: None)
+        key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
+        assert key == "default"
+
+    def test_camofox_mode_stays_on_bare_task_id(self, monkeypatch):
+        """Camofox is already local — no hybrid routing needed."""
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: True)
+        key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
+        assert key == "default"
+
+    def test_cdp_override_stays_on_bare_task_id(self, monkeypatch):
+        """A user-supplied CDP endpoint owns the whole session — no hybrid."""
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: "ws://localhost:9222")
+        key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
+        assert key == "default"
+
+    def test_feature_flag_off_disables_hybrid_routing(self, monkeypatch):
+        """``auto_local_for_private_urls: false`` keeps private URLs on cloud."""
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        monkeypatch.setattr(browser_tool, "_auto_local_for_private_urls", lambda: False)
+        key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
+        assert key == "default"
+
+    def test_none_task_id_defaults(self, monkeypatch):
+        """``None`` task_id resolves to 'default'."""
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
+        key = browser_tool._navigation_session_key(None, "http://localhost:3000/")
+        assert key == "default::local"
+
+
+class TestSessionKeyHelpers:
+    def test_is_local_sidecar_key(self):
+        assert browser_tool._is_local_sidecar_key("default::local")
+        assert browser_tool._is_local_sidecar_key("my_task::local")
+        assert not browser_tool._is_local_sidecar_key("default")
+        assert not browser_tool._is_local_sidecar_key("my_task")
+
+    def test_last_session_key_falls_back_to_task_id(self, monkeypatch):
+        """Without a recorded last-active key, returns the bare task_id."""
+        monkeypatch.setattr(browser_tool, "_last_active_session_key", {})
+        assert browser_tool._last_session_key("default") == "default"
+        assert browser_tool._last_session_key("task-42") == "task-42"
+        assert browser_tool._last_session_key(None) == "default"
+
+    def test_last_session_key_returns_recorded_key(self, monkeypatch):
+        monkeypatch.setattr(
+            browser_tool,
+            "_last_active_session_key",
+            {"default": "default::local", "task-42": "task-42"},
+        )
+        assert browser_tool._last_session_key("default") == "default::local"
+        assert browser_tool._last_session_key("task-42") == "task-42"
+        # Unknown task_id still falls back
+        assert browser_tool._last_session_key("other") == "other"
+
+
+class TestHybridRoutingSessionCreation:
+    """_get_session_info must force a local session when the key carries ``::local``."""
+
+    def test_local_sidecar_key_skips_cloud_provider(self, monkeypatch):
+        """A ``::local``-suffixed key creates a local session even when cloud is set."""
+        provider = Mock()
+        provider.create_session.return_value = {
+            "session_name": "should_not_be_used",
+            "bb_session_id": "bb_xxx",
+            "cdp_url": "wss://fake.browserbase.com/ws",
+        }
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+        monkeypatch.setattr(browser_tool, "_ensure_cdp_supervisor", lambda t: None)
+
+        session = browser_tool._get_session_info("default::local")
+
+        assert provider.create_session.call_count == 0
+        assert session["bb_session_id"] is None
+        assert session["cdp_url"] is None
+        assert session["features"]["local"] is True
+
+    def test_bare_task_id_with_cloud_provider_uses_cloud(self, monkeypatch):
+        """A bare task_id with cloud provider configured hits the cloud path."""
+        provider = Mock()
+        provider.create_session.return_value = {
+            "session_name": "cloud-sess",
+            "bb_session_id": "bb_123",
+            "cdp_url": "wss://real.browserbase.com/ws",
+        }
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+        monkeypatch.setattr(browser_tool, "_ensure_cdp_supervisor", lambda t: None)
+        monkeypatch.setattr(browser_tool, "_resolve_cdp_override", lambda u: u)
+
+        session = browser_tool._get_session_info("default")
+
+        assert provider.create_session.call_count == 1
+        assert session["bb_session_id"] == "bb_123"
+
+
+class TestCleanupHybridSessions:
+    """cleanup_browser(bare_task_id) must reap both cloud + local sidecar sessions."""
+
+    def test_cleanup_reaps_both_primary_and_sidecar(self, monkeypatch):
+        """Given a bare task_id with both sessions alive, both get cleaned."""
+        reaped = []
+
+        def _fake_cleanup_one(key):
+            reaped.append(key)
+
+        monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one)
+        monkeypatch.setattr(
+            browser_tool,
+            "_active_sessions",
+            {
+                "default": {"session_name": "cloud_sess"},
+                "default::local": {"session_name": "local_sess"},
+            },
+        )
+        monkeypatch.setattr(
+            browser_tool, "_last_active_session_key", {"default": "default::local"}
+        )
+
+        browser_tool.cleanup_browser("default")
+
+        assert set(reaped) == {"default", "default::local"}
+        # last-active pointer dropped
+        assert "default" not in browser_tool._last_active_session_key
+
+    def test_cleanup_reaps_only_primary_when_no_sidecar(self, monkeypatch):
+        """When no sidecar exists, only the primary is reaped."""
+        reaped = []
+
+        def _fake_cleanup_one(key):
+            reaped.append(key)
+
+        monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one)
+        monkeypatch.setattr(
+            browser_tool,
+            "_active_sessions",
+            {"default": {"session_name": "cloud_sess"}},
+        )
+
+        browser_tool.cleanup_browser("default")
+
+        assert reaped == ["default"]
+
+    def test_cleanup_sidecar_directly_keeps_primary(self, monkeypatch):
+        """Calling cleanup with a ``::local`` key reaps only the sidecar."""
+        reaped = []
+
+        def _fake_cleanup_one(key):
+            reaped.append(key)
+
+        monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one)
+        monkeypatch.setattr(
+            browser_tool,
+            "_active_sessions",
+            {
+                "default": {"session_name": "cloud_sess"},
+                "default::local": {"session_name": "local_sess"},
+            },
+        )
+        monkeypatch.setattr(
+            browser_tool, "_last_active_session_key", {"default": "default::local"}
+        )
+
+        browser_tool.cleanup_browser("default::local")
+
+        assert reaped == ["default::local"]
+        # Last-active pointer NOT dropped (primary task is still alive)
+        assert browser_tool._last_active_session_key.get("default") == "default::local"
--- a/tests/tools/test_credential_pool_env_fallback.py
+++ b/tests/tools/test_credential_pool_env_fallback.py
@ -0,0 +1,210 @@
+"""Tests for credential_pool .env fallback and auth credential_pool lookup.
+
+Covers the fix from #15914 / PR #15920:
+- _seed_from_env reads API keys from ~/.hermes/.env when not in os.environ
+- _resolve_api_key_provider_secret falls back to credential_pool when env vars are empty
+- env vars take priority over .env file (handled by get_env_value itself)
+- env vars take priority over credential pool (fallback only kicks in when env is empty)
+"""
+
+import os
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+def _make_pconfig(provider_id="deepseek", env_vars=None):
+    """Create a minimal ProviderConfig for testing.
+
+    Default provider_id is 'deepseek' because it's a real api_key provider
+    in PROVIDER_REGISTRY (needed for _seed_from_env's generic path).
+    """
+    from hermes_cli.auth import ProviderConfig
+    return ProviderConfig(
+        id=provider_id,
+        name=provider_id.title(),
+        auth_type="api_key",
+        api_key_env_vars=tuple(env_vars or [f"{provider_id.upper()}_API_KEY"]),
+    )
+
+
+@pytest.fixture
+def isolated_hermes_home(tmp_path, monkeypatch):
+    """Point HERMES_HOME at a temp dir and clear known API key env vars.
+
+    Also invalidates any cached get_env_value state by patching Path.home().
+    """
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_HOME", str(home))
+
+    # Clear all known API key env vars so get_env_value falls through to .env
+    for key in [
+        "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "OPENROUTER_API_KEY",
+        "ZAI_API_KEY", "DEEPSEEK_API_KEY", "ANTHROPIC_TOKEN",
+        "CLAUDE_CODE_OAUTH_TOKEN", "OPENAI_BASE_URL",
+    ]:
+        monkeypatch.delenv(key, raising=False)
+
+    return home
+
+
+def _write_env_file(home: Path, **kwargs) -> None:
+    """Write key=value pairs to ~/.hermes/.env."""
+    lines = [f"{k}={v}" for k, v in kwargs.items()]
+    (home / ".env").write_text("\n".join(lines) + "\n")
+
+
+class TestCredentialPoolSeedsFromDotEnv:
+    """_seed_from_env must read keys from ~/.hermes/.env, not just os.environ.
+
+    This is the load-bearing behaviour for the fix: when a user adds a key to
+    .env mid-session or via a non-CLI entry point that doesn't run
+    load_hermes_dotenv, the credential pool must still discover it.
+    """
+
+    def test_deepseek_key_from_dotenv_only(self, isolated_hermes_home):
+        """Key in .env but not os.environ → _seed_from_env adds a pool entry."""
+        _write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-only-12345")
+        assert "DEEPSEEK_API_KEY" not in os.environ
+
+        from agent.credential_pool import _seed_from_env
+        entries = []
+        changed, active_sources = _seed_from_env("deepseek", entries)
+
+        assert changed is True
+        assert "env:DEEPSEEK_API_KEY" in active_sources
+        assert any(
+            e.access_token == "sk-dotenv-only-12345"
+            and e.source == "env:DEEPSEEK_API_KEY"
+            for e in entries
+        ), f"Expected seeded entry with dotenv key, got: {[(e.source, e.access_token) for e in entries]}"
+
+    def test_openrouter_key_from_dotenv_only(self, isolated_hermes_home):
+        """OpenRouter path has its own branch — verify it also reads .env."""
+        _write_env_file(isolated_hermes_home, OPENROUTER_API_KEY="sk-or-dotenv-abc")
+        assert "OPENROUTER_API_KEY" not in os.environ
+
+        from agent.credential_pool import _seed_from_env
+        entries = []
+        changed, active_sources = _seed_from_env("openrouter", entries)
+
+        assert changed is True
+        assert "env:OPENROUTER_API_KEY" in active_sources
+        assert any(
+            e.access_token == "sk-or-dotenv-abc" for e in entries
+        )
+
+    def test_empty_dotenv_no_entries(self, isolated_hermes_home):
+        """No .env file, no env vars → no entries seeded (and no crash)."""
+        from agent.credential_pool import _seed_from_env
+        entries = []
+        changed, active_sources = _seed_from_env("deepseek", entries)
+        assert changed is False
+        assert active_sources == set()
+        assert entries == []
+
+    def test_os_environ_still_wins_over_dotenv(self, isolated_hermes_home, monkeypatch):
+        """get_env_value checks os.environ first — verify seeding picks that up."""
+        _write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-stale")
+        monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-env-fresh-xyz")
+
+        from agent.credential_pool import _seed_from_env
+        entries = []
+        changed, _ = _seed_from_env("deepseek", entries)
+
+        assert changed is True
+        seeded = [e for e in entries if e.source == "env:DEEPSEEK_API_KEY"]
+        assert len(seeded) == 1
+        assert seeded[0].access_token == "sk-env-fresh-xyz"
+
+
+class TestAuthResolvesFromDotEnv:
+    """_resolve_api_key_provider_secret must also read from ~/.hermes/.env."""
+
+    def test_key_from_dotenv_only(self, isolated_hermes_home):
+        """Key in .env but not os.environ → _resolve returns it with the env var source."""
+        _write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-resolve-789")
+        assert "DEEPSEEK_API_KEY" not in os.environ
+
+        from hermes_cli.auth import _resolve_api_key_provider_secret
+        key, source = _resolve_api_key_provider_secret(
+            provider_id="deepseek",
+            pconfig=_make_pconfig(),
+        )
+        assert key == "sk-dotenv-resolve-789"
+        assert source == "DEEPSEEK_API_KEY"
+
+
+class TestAuthCredentialPoolFallback:
+    """_resolve_api_key_provider_secret falls back to credential pool when env + dotenv are empty."""
+
+    def test_credential_pool_fallback_structure(self, isolated_hermes_home):
+        """Empty env + empty .env → auth falls back to credential pool."""
+        mock_entry = MagicMock()
+        mock_entry.access_token = "test-pool-key-12345"
+        mock_entry.runtime_api_key = ""
+
+        mock_pool = MagicMock()
+        mock_pool.has_credentials.return_value = True
+        mock_pool.peek.return_value = mock_entry
+
+        from hermes_cli.auth import _resolve_api_key_provider_secret
+        with patch("agent.credential_pool.load_pool", return_value=mock_pool):
+            key, source = _resolve_api_key_provider_secret(
+                provider_id="deepseek",
+                pconfig=_make_pconfig(),
+            )
+        assert "test-pool-key-12345" in key
+        assert "credential_pool" in source
+
+    def test_credential_pool_empty_returns_empty(self, isolated_hermes_home):
+        """Empty env + empty .env + empty pool → empty string."""
+        mock_pool = MagicMock()
+        mock_pool.has_credentials.return_value = False
+
+        from hermes_cli.auth import _resolve_api_key_provider_secret
+        with patch("agent.credential_pool.load_pool", return_value=mock_pool):
+            key, source = _resolve_api_key_provider_secret(
+                provider_id="deepseek",
+                pconfig=_make_pconfig(),
+            )
+        assert key == ""
+
+    def test_env_var_takes_priority_over_pool(self, isolated_hermes_home, monkeypatch):
+        """os.environ key wins — credential pool is NEVER consulted."""
+        monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-env-key-first-abc123")
+
+        mock_pool = MagicMock()
+        mock_pool.has_credentials.return_value = True
+
+        from hermes_cli.auth import _resolve_api_key_provider_secret
+        with patch("agent.credential_pool.load_pool", return_value=mock_pool) as mp:
+            key, source = _resolve_api_key_provider_secret(
+                provider_id="deepseek",
+                pconfig=_make_pconfig(),
+            )
+        assert key == "sk-env-key-first-abc123"
+        assert source == "DEEPSEEK_API_KEY"
+        # Pool should not even have been loaded — env var satisfied the request first
+        mp.assert_not_called()
+
+    def test_dotenv_takes_priority_over_pool(self, isolated_hermes_home):
+        """Key in .env beats credential pool — pool only fires when both env sources are empty."""
+        _write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-priority-xyz")
+        assert "DEEPSEEK_API_KEY" not in os.environ
+
+        mock_pool = MagicMock()
+        mock_pool.has_credentials.return_value = True
+
+        from hermes_cli.auth import _resolve_api_key_provider_secret
+        with patch("agent.credential_pool.load_pool", return_value=mock_pool) as mp:
+            key, source = _resolve_api_key_provider_secret(
+                provider_id="deepseek",
+                pconfig=_make_pconfig(),
+            )
+        assert key == "sk-dotenv-priority-xyz"
+        assert source == "DEEPSEEK_API_KEY"
+        mp.assert_not_called()
--- a/tests/tools/test_mcp_oauth.py
+++ b/tests/tools/test_mcp_oauth.py
@ -491,11 +491,36 @@ def test_configure_callback_port_uses_explicit_port():
    assert cfg["_resolved_port"] == 54321


-def test_parse_base_url_strips_path():
-    """_parse_base_url drops path components for OAuth discovery."""
-    from tools.mcp_oauth import _parse_base_url
+def test_build_oauth_auth_preserves_server_url_path():
+    """server_url with path is forwarded to OAuthClientProvider unmodified.
+
+    Regression for #16015: previously ``_parse_base_url`` stripped the path,
+    collapsing ``https://mcp.notion.com/mcp`` to ``https://mcp.notion.com`` and
+    breaking RFC 9728 protected-resource validation against servers whose PRM
+    advertises a path-scoped resource (Notion). The MCP SDK strips the path
+    itself for authorization-server discovery via
+    ``OAuthContext.get_authorization_base_url``; Hermes must not pre-strip.
+    """
+    from tools import mcp_oauth
+
+    captured: dict = {}
+
+    class _FakeProvider:
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+
+    with patch.object(mcp_oauth, "_OAUTH_AVAILABLE", True), \
+         patch.object(mcp_oauth, "OAuthClientProvider", _FakeProvider), \
+         patch.object(mcp_oauth, "_is_interactive", return_value=True), \
+         patch.object(mcp_oauth, "_maybe_preregister_client"), \
+         patch.object(mcp_oauth, "HermesTokenStorage") as mock_storage_cls:
+        mock_storage_cls.return_value = MagicMock(has_cached_tokens=lambda: True)
+        build_oauth_auth(
+            server_name="notion",
+            server_url="https://mcp.notion.com/mcp",
+            oauth_config={},
+        )
+
+    assert captured["server_url"] == "https://mcp.notion.com/mcp"

-    assert _parse_base_url("https://example.com/mcp/v1") == "https://example.com"
-    assert _parse_base_url("https://example.com") == "https://example.com"
-    assert _parse_base_url("https://host.example.com:8080/api") == "https://host.example.com:8080"

--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -483,6 +483,147 @@ def _is_local_backend() -> bool:
    return _is_camofox_mode() or _get_cloud_provider() is None


+_auto_local_for_private_urls_resolved = False
+_cached_auto_local_for_private_urls: bool = True
+
+
+def _auto_local_for_private_urls() -> bool:
+    """Return whether a cloud-configured install should auto-spawn a local
+    Chromium for LAN/localhost URLs.
+
+    Reads ``browser.auto_local_for_private_urls`` once (default ``True``) and
+    caches it for the process lifetime.  When enabled, ``browser_navigate``
+    routes URLs whose host resolves to a private/loopback/LAN address to a
+    local headless Chromium sidecar even when a cloud provider (Browserbase
+    / Browser-Use / Firecrawl) is configured globally.  Public URLs continue
+    to use the cloud provider in the same conversation.
+    """
+    global _auto_local_for_private_urls_resolved, _cached_auto_local_for_private_urls
+    if _auto_local_for_private_urls_resolved:
+        return _cached_auto_local_for_private_urls
+
+    _auto_local_for_private_urls_resolved = True
+    try:
+        from hermes_cli.config import read_raw_config
+        cfg = read_raw_config()
+        browser_cfg = cfg.get("browser", {})
+        if isinstance(browser_cfg, dict) and "auto_local_for_private_urls" in browser_cfg:
+            _cached_auto_local_for_private_urls = bool(
+                browser_cfg.get("auto_local_for_private_urls")
+            )
+    except Exception as e:
+        logger.debug("Could not read auto_local_for_private_urls from config: %s", e)
+    return _cached_auto_local_for_private_urls
+
+
+def _url_is_private(url: str) -> bool:
+    """Return True when the URL's host resolves to a private/LAN/loopback address.
+
+    Reuses ``tools.url_safety.is_safe_url`` as the oracle — if the SSRF check
+    would reject the URL, we treat it as "private" for routing purposes.  DNS
+    resolution failures are treated as NOT private (fall through to whatever
+    backend is configured, which will surface the DNS error naturally).
+    """
+    try:
+        from tools.url_safety import is_safe_url
+        # is_safe_url returns False for private/loopback/link-local/CGNAT AND
+        # for DNS failures.  We only want the private-network case here, so
+        # we parse + check the host shape as a DNS-failure sieve first.
+        from urllib.parse import urlparse
+        import ipaddress
+        import socket
+        parsed = urlparse(url)
+        hostname = (parsed.hostname or "").strip().lower().rstrip(".")
+        if not hostname:
+            return False
+        # Literal IP → check directly
+        try:
+            ip = ipaddress.ip_address(hostname)
+            return (
+                ip.is_private
+                or ip.is_loopback
+                or ip.is_link_local
+                or ip in ipaddress.ip_network("100.64.0.0/10")
+            )
+        except ValueError:
+            pass
+        # Hostname — must resolve to confirm it's private (bare "localhost"
+        # resolves to 127.0.0.1 via /etc/hosts).  Short-circuit on obvious
+        # names to avoid a DNS hop.
+        if hostname in ("localhost",) or hostname.endswith(".localhost"):
+            return True
+        if hostname.endswith(".local") or hostname.endswith(".lan") or hostname.endswith(".internal"):
+            return True
+        try:
+            addr_info = socket.getaddrinfo(hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM)
+        except socket.gaierror:
+            return False  # DNS fail → not private, let the normal path fail
+        for _, _, _, _, sockaddr in addr_info:
+            try:
+                ip = ipaddress.ip_address(sockaddr[0])
+            except ValueError:
+                continue
+            if (
+                ip.is_private
+                or ip.is_loopback
+                or ip.is_link_local
+                or ip in ipaddress.ip_network("100.64.0.0/10")
+            ):
+                return True
+        return False
+    except Exception as exc:
+        logger.debug("URL-privacy check failed for %s: %s", url, exc)
+        return False
+
+
+def _navigation_session_key(task_id: str, url: str) -> str:
+    """Pick the session key that should handle ``url`` for ``task_id``.
+
+    Returns the bare task_id unless ALL of these are true:
+      1. A cloud provider is configured (``_get_cloud_provider()`` is not None).
+      2. Auto-local routing is enabled (``browser.auto_local_for_private_urls``,
+         default True).
+      3. The URL resolves to a private/LAN/loopback address.
+      4. A CDP override is not active (that path owns the whole session).
+      5. Camofox mode is not active (Camofox is already local-only).
+
+    When all are true, returns ``f"{task_id}::local"`` so the hybrid-routing
+    path spawns a local Chromium sidecar while the cloud session (if any)
+    continues to serve public URLs.
+    """
+    if task_id is None:
+        task_id = "default"
+    if _get_cdp_override():
+        return task_id
+    if _is_camofox_mode():
+        return task_id
+    if _get_cloud_provider() is None:
+        return task_id
+    if not _auto_local_for_private_urls():
+        return task_id
+    if not _url_is_private(url):
+        return task_id
+    return f"{task_id}{_LOCAL_SUFFIX}"
+
+
+def _is_local_sidecar_key(session_key: str) -> bool:
+    """Return True when ``session_key`` is a hybrid-routing local sidecar."""
+    return session_key.endswith(_LOCAL_SUFFIX)
+
+
+def _last_session_key(task_id: str) -> str:
+    """Return the session key to use for a non-nav browser tool call.
+
+    If a previous ``browser_navigate`` on this task_id set a last-active key,
+    use it so snapshot/click/fill/etc. hit the same session.  Otherwise fall
+    back to the bare task_id (matches original behavior for tasks that never
+    triggered hybrid routing).
+    """
+    if task_id is None:
+        task_id = "default"
+    return _last_active_session_key.get(task_id, task_id)
+
+
 def _allow_private_urls() -> bool:
    """Return whether the browser is allowed to navigate to private/internal addresses.

@ -521,10 +662,25 @@ def _socket_safe_tmpdir() -> str:
    return tempfile.gettempdir()


-# Track active sessions per task
+# Track active sessions per "session key".
+#
+# A "session key" is either the bare task_id (cloud/default path) OR a composite
+# like f"{task_id}::local" when the hybrid-routing feature spawns a local sidecar
+# browser for a LAN/localhost URL while a cloud provider is configured globally.
+# Both forms flow through the same _active_sessions / _run_browser_command /
+# cleanup_browser code paths — the key is opaque to those internals.
+#
 # Stores: session_name (always), bb_session_id + cdp_url (cloud mode only)
-_active_sessions: Dict[str, Dict[str, str]] = {}  # task_id -> {session_name, ...}
-_recording_sessions: set = set()  # task_ids with active recordings
+_active_sessions: Dict[str, Dict[str, str]] = {}  # session_key -> {session_name, ...}
+_recording_sessions: set = set()  # session_keys with active recordings
+
+# Tracks the most recent session_key used per task_id. Set by browser_navigate()
+# after it chooses a backend for a URL; read by every non-nav browser tool
+# (snapshot/click/fill/eval/...) so they target the session that served the last
+# navigation.  Without this, a task that navigated to localhost on the local
+# sidecar would fall back to the cloud session on its next snapshot call.
+_last_active_session_key: Dict[str, str] = {}  # task_id -> session_key
+_LOCAL_SUFFIX = "::local"

 # Flag to track if cleanup has been done
 _cleanup_done = False
@ -1014,37 +1170,48 @@ def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]:

 def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
    """
-    Get or create session info for the given task.
-    
+    Get or create session info for the given session key.
+
    In cloud mode, creates a Browserbase session with proxies enabled.
    In local mode, generates a session name for agent-browser --session.
    Also starts the inactivity cleanup thread and updates activity tracking.
    Thread-safe: multiple subagents can call this concurrently.
-    
+
    Args:
-        task_id: Unique identifier for the task
-        
+        task_id: Session key.  Normally the task_id as-is, but may carry the
+            ``::local`` suffix for the hybrid-routing local sidecar — in that
+            case the cloud provider is skipped even when one is configured,
+            and a local Chromium session is created instead.
+
    Returns:
        Dict with session_name (always), bb_session_id + cdp_url (cloud only)
    """
    if task_id is None:
        task_id = "default"
-    
+
    # Start the cleanup thread if not running (handles inactivity timeouts)
    _start_browser_cleanup_thread()
-    
+
    # Update activity timestamp for this session
    _update_session_activity(task_id)
-    
+
    with _cleanup_lock:
        # Check if we already have a session for this task
        if task_id in _active_sessions:
            return _active_sessions[task_id]
-    
+
+    # Hybrid routing: session keys ending with ``::local`` force a local
+    # Chromium regardless of the globally-configured cloud provider.  Public
+    # URLs in the same conversation continue to use the cloud session under
+    # the bare task_id key.
+    force_local = _is_local_sidecar_key(task_id)
+
    # Create session outside the lock (network call in cloud mode)
    cdp_override = _get_cdp_override()
-    if cdp_override:
+    if cdp_override and not force_local:
        session_info = _create_cdp_session(task_id, cdp_override)
+    elif force_local:
+        session_info = _create_local_session(task_id)
    else:
        provider = _get_cloud_provider()
        if provider is None:
@ -1081,7 +1248,7 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
                    session_info["fallback_from_cloud"] = True
                    session_info["fallback_reason"] = str(e)
                    session_info["fallback_provider"] = provider_name
-    
+
    with _cleanup_lock:
        # Double-check: another thread may have created a session while we
        # were doing the network call. Use the existing one to avoid leaking
@ -1093,7 +1260,9 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
    # Lazy-start the CDP supervisor now that the session exists (if the
    # backend surfaces a CDP URL via override or session_info["cdp_url"]).
    # Idempotent; swallows errors. See _ensure_cdp_supervisor for details.
-    _ensure_cdp_supervisor(task_id)
+    # Skip for local sidecars — they have no CDP URL.
+    if not force_local:
+        _ensure_cdp_supervisor(task_id)

    return session_info

@ -1521,9 +1690,21 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
    # SSRF protection — block private/internal addresses before navigating.
    # Skipped for local backends (Camofox, headless Chromium without a cloud
    # provider) because the agent already has full local network access via
-    # the terminal tool.  Can also be opted out for cloud mode via
-    # ``browser.allow_private_urls`` in config.
-    if not _is_local_backend() and not _allow_private_urls() and not _is_safe_url(url):
+    # the terminal tool.  Also skipped when hybrid routing will auto-spawn a
+    # local Chromium sidecar for this URL (cloud provider configured +
+    # private URL + ``browser.auto_local_for_private_urls`` enabled) — the
+    # cloud provider never sees the URL in that case.  Can also be opted
+    # out globally via ``browser.allow_private_urls`` in config.
+    effective_task_id = task_id or "default"
+    nav_session_key = _navigation_session_key(effective_task_id, url)
+    auto_local_this_nav = _is_local_sidecar_key(nav_session_key)
+
+    if (
+        not _is_local_backend()
+        and not auto_local_this_nav
+        and not _allow_private_urls()
+        and not _is_safe_url(url)
+    ):
        return json.dumps({
            "success": False,
            "error": "Blocked: URL targets a private or internal address",
@ -1543,19 +1724,31 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
        from tools.browser_camofox import camofox_navigate
        return camofox_navigate(url, task_id)

-    effective_task_id = task_id or "default"
-    
+    if auto_local_this_nav:
+        logger.info(
+            "browser_navigate: auto-routing %s to local Chromium sidecar "
+            "(cloud provider %s stays on cloud for public URLs; "
+            "set browser.auto_local_for_private_urls: false to disable)",
+            url,
+            type(_get_cloud_provider()).__name__ if _get_cloud_provider() else "none",
+        )
+
    # Get session info to check if this is a new session
    # (will create one with features logged if not exists)
-    session_info = _get_session_info(effective_task_id)
+    session_info = _get_session_info(nav_session_key)
    is_first_nav = session_info.get("_first_nav", True)
-    
+
    # Auto-start recording if configured and this is first navigation
    if is_first_nav:
        session_info["_first_nav"] = False
-        _maybe_start_recording(effective_task_id)
+        _maybe_start_recording(nav_session_key)

-    result = _run_browser_command(effective_task_id, "open", [url], timeout=max(_get_command_timeout(), 60))
+    result = _run_browser_command(nav_session_key, "open", [url], timeout=max(_get_command_timeout(), 60))
+
+    # Remember which session served this nav so snapshot/click/fill/...
+    # on the same task_id hit it (critical when hybrid routing has both a
+    # cloud session and a local sidecar alive concurrently).
+    _last_active_session_key[effective_task_id] = nav_session_key
    
    if result.get("success"):
        data = result.get("data", {})
@ -1565,10 +1758,17 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
        # Post-redirect SSRF check — if the browser followed a redirect to a
        # private/internal address, block the result so the model can't read
        # internal content via subsequent browser_snapshot calls.
-        # Skipped for local backends (same rationale as the pre-nav check).
-        if not _is_local_backend() and not _allow_private_urls() and final_url and final_url != url and not _is_safe_url(final_url):
+        # Skipped for local backends (same rationale as the pre-nav check),
+        # and for the hybrid local sidecar (we're already on a local browser
+        # hitting a private URL by design).
+        if (
+            not _is_local_backend()
+            and not auto_local_this_nav
+            and not _allow_private_urls()
+            and final_url and final_url != url and not _is_safe_url(final_url)
+        ):
            # Navigate away to a blank page to prevent snapshot leaks
-            _run_browser_command(effective_task_id, "open", ["about:blank"], timeout=10)
+            _run_browser_command(nav_session_key, "open", ["about:blank"], timeout=10)
            return json.dumps({
                "success": False,
                "error": "Blocked: redirect landed on a private/internal address",
@ -1612,7 +1812,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
        # Auto-take a compact snapshot so the model can act immediately
        # without a separate browser_snapshot call.
        try:
-            snap_result = _run_browser_command(effective_task_id, "snapshot", ["-c"])
+            snap_result = _run_browser_command(nav_session_key, "snapshot", ["-c"])
            if snap_result.get("success"):
                snap_data = snap_result.get("data", {})
                snapshot_text = snap_data.get("snapshot", "")
@ -1652,7 +1852,7 @@ def browser_snapshot(
        from tools.browser_camofox import camofox_snapshot
        return camofox_snapshot(full, task_id, user_task)

-    effective_task_id = task_id or "default"
+    effective_task_id = _last_session_key(task_id or "default")
    
    # Build command args based on full flag
    args = []
@ -1714,7 +1914,7 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str:
        from tools.browser_camofox import camofox_click
        return camofox_click(ref, task_id)

-    effective_task_id = task_id or "default"
+    effective_task_id = _last_session_key(task_id or "default")
    
    # Ensure ref starts with @
    if not ref.startswith("@"):
@ -1750,7 +1950,7 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
        from tools.browser_camofox import camofox_type
        return camofox_type(ref, text, task_id)

-    effective_task_id = task_id or "default"
+    effective_task_id = _last_session_key(task_id or "default")
    
    # Ensure ref starts with @
    if not ref.startswith("@"):
@ -1804,7 +2004,7 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
            result = camofox_scroll(direction, task_id)
        return result

-    effective_task_id = task_id or "default"
+    effective_task_id = _last_session_key(task_id or "default")

    result = _run_browser_command(effective_task_id, "scroll", [direction, str(_SCROLL_PIXELS)])
    if not result.get("success"):
@ -1833,7 +2033,7 @@ def browser_back(task_id: Optional[str] = None) -> str:
        from tools.browser_camofox import camofox_back
        return camofox_back(task_id)

-    effective_task_id = task_id or "default"
+    effective_task_id = _last_session_key(task_id or "default")
    result = _run_browser_command(effective_task_id, "back", [])
    
    if result.get("success"):
@ -1864,7 +2064,7 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
        from tools.browser_camofox import camofox_press
        return camofox_press(key, task_id)

-    effective_task_id = task_id or "default"
+    effective_task_id = _last_session_key(task_id or "default")
    result = _run_browser_command(effective_task_id, "press", [key])
    
    if result.get("success"):
@ -1906,7 +2106,7 @@ def browser_console(clear: bool = False, expression: Optional[str] = None, task_
        from tools.browser_camofox import camofox_console
        return camofox_console(clear, task_id)

-    effective_task_id = task_id or "default"
+    effective_task_id = _last_session_key(task_id or "default")
    
    console_args = ["--clear"] if clear else []
    error_args = ["--clear"] if clear else []
@ -1945,7 +2145,7 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
    if _is_camofox_mode():
        return _camofox_eval(expression, task_id)

-    effective_task_id = task_id or "default"
+    effective_task_id = _last_session_key(task_id or "default")
    result = _run_browser_command(effective_task_id, "eval", [expression])

    if not result.get("success"):
@ -2077,7 +2277,7 @@ def browser_get_images(task_id: Optional[str] = None) -> str:
        from tools.browser_camofox import camofox_get_images
        return camofox_get_images(task_id)

-    effective_task_id = task_id or "default"
+    effective_task_id = _last_session_key(task_id or "default")
    
    # Use eval to run JavaScript that extracts images
    js_code = """JSON.stringify(
@ -2147,7 +2347,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]

    import base64
    import uuid as uuid_mod
-    effective_task_id = task_id or "default"
+    effective_task_id = _last_session_key(task_id or "default")
    
    # Save screenshot to persistent location so it can be shared with users
    from hermes_constants import get_hermes_dir
@ -2350,17 +2550,47 @@ def _cleanup_old_recordings(max_age_hours=72):

 def cleanup_browser(task_id: Optional[str] = None) -> None:
    """
-    Clean up browser session for a task.
-    
+    Clean up browser session(s) for a task.
+
    Called automatically when a task completes or when inactivity timeout is reached.
    Closes both the agent-browser/Browserbase session and Camofox sessions.
-    
+
+    When ``task_id`` is a bare task identifier (no ``::local`` suffix), reaps
+    BOTH the cloud/primary session AND any hybrid-routing local sidecar that
+    may have been spawned for LAN/localhost URLs in the same task.  When
+    ``task_id`` already carries a ``::local`` suffix (called from the inactivity
+    cleanup loop against a specific session key), reaps only that one.
+
    Args:
-        task_id: Task identifier to clean up
+        task_id: Task identifier (or explicit session key)
    """
    if task_id is None:
        task_id = "default"

+    # Expand to the full set of session keys to reap. For a bare task_id
+    # that includes the cloud/primary key + the local sidecar if one exists.
+    if _is_local_sidecar_key(task_id):
+        session_keys = [task_id]
+        bare_task_id = task_id[: -len(_LOCAL_SUFFIX)]
+    else:
+        session_keys = [task_id]
+        sidecar_key = f"{task_id}{_LOCAL_SUFFIX}"
+        with _cleanup_lock:
+            if sidecar_key in _active_sessions:
+                session_keys.append(sidecar_key)
+        bare_task_id = task_id
+
+    for session_key in session_keys:
+        _cleanup_single_browser_session(session_key)
+
+    # Drop the last-active pointer only when the bare task is being cleaned
+    # (i.e. not when we're only reaping a sidecar mid-task).
+    if not _is_local_sidecar_key(task_id):
+        _last_active_session_key.pop(bare_task_id, None)
+
+
+def _cleanup_single_browser_session(task_id: str) -> None:
+    """Internal: reap a single browser session by its exact session key."""
    # Stop the CDP supervisor for this task FIRST so we close our WebSocket
    # before the backend tears down the underlying CDP endpoint.
    _stop_cdp_supervisor(task_id)
@ -2379,32 +2609,33 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:

    logger.debug("cleanup_browser called for task_id: %s", task_id)
    logger.debug("Active sessions: %s", list(_active_sessions.keys()))
-    
+
    # Check if session exists (under lock), but don't remove yet -
    # _run_browser_command needs it to build the close command.
    with _cleanup_lock:
        session_info = _active_sessions.get(task_id)
-    
+
    if session_info:
        bb_session_id = session_info.get("bb_session_id", "unknown")
        logger.debug("Found session for task %s: bb_session_id=%s", task_id, bb_session_id)
-        
+
        # Stop auto-recording before closing (saves the file)
        _maybe_stop_recording(task_id)
-        
+
        # Try to close via agent-browser first (needs session in _active_sessions)
        try:
            _run_browser_command(task_id, "close", [], timeout=10)
            logger.debug("agent-browser close command completed for task %s", task_id)
        except Exception as e:
            logger.warning("agent-browser close failed for task %s: %s", task_id, e)
-        
+
        # Now remove from tracking under lock
        with _cleanup_lock:
            _active_sessions.pop(task_id, None)
            _session_last_activity.pop(task_id, None)
-        
-        # Cloud mode: close the cloud browser session via provider API
+
+        # Cloud mode: close the cloud browser session via provider API.
+        # Local sidecars have bb_session_id=None so this no-ops for them.
        if bb_session_id:
            provider = _get_cloud_provider()
            if provider is not None:
--- a/tools/mcp_oauth.py
+++ b/tools/mcp_oauth.py
@ -519,12 +519,6 @@ def _maybe_preregister_client(
    logger.debug("Pre-registered client_id=%s for '%s'", client_id, storage._server_name)


-def _parse_base_url(server_url: str) -> str:
-    """Strip path component from server URL, returning the base origin."""
-    parsed = urlparse(server_url)
-    return f"{parsed.scheme}://{parsed.netloc}"
-
-
 def build_oauth_auth(
    server_name: str,
    server_url: str,
@ -570,7 +564,7 @@ def build_oauth_auth(
    _maybe_preregister_client(storage, cfg, client_metadata)

    return OAuthClientProvider(
-        server_url=_parse_base_url(server_url),
+        server_url=server_url,
        client_metadata=client_metadata,
        storage=storage,
        redirect_handler=_redirect_handler,
--- a/tools/mcp_oauth_manager.py
+++ b/tools/mcp_oauth_manager.py
@ -362,7 +362,6 @@ class MCPOAuthManager:
            _configure_callback_port,
            _is_interactive,
            _maybe_preregister_client,
-            _parse_base_url,
            _redirect_handler,
            _wait_for_callback,
        )
@ -387,7 +386,7 @@ class MCPOAuthManager:

        return _HERMES_PROVIDER_CLS(
            server_name=server_name,
-            server_url=_parse_base_url(entry.server_url),
+            server_url=entry.server_url,
            client_metadata=client_metadata,
            storage=storage,
            redirect_handler=_redirect_handler,
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@ -776,7 +776,7 @@ class ProcessRegistry:

        # Only enqueue completion notification on the FIRST move.  Without
        # this guard, kill_process() and the reader thread can both call
-        # _move_to_finished(), producing duplicate [SYSTEM: ...] messages.
+        # _move_to_finished(), producing duplicate [IMPORTANT: ...] messages.
        if was_running and session.notify_on_complete:
            from tools.ansi_strip import strip_ansi
            output_tail = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -2321,6 +2321,26 @@ def _(rid, params: dict) -> dict:
                payload["rendered"] = rendered
            _emit("message.complete", sid, payload)

+            if (
+                status == "complete"
+                and isinstance(raw, str)
+                and raw.strip()
+                and isinstance(text, str)
+                and text.strip()
+            ):
+                try:
+                    from agent.title_generator import maybe_auto_title
+
+                    maybe_auto_title(
+                        _get_db(),
+                        session.get("session_key") or sid,
+                        text,
+                        raw,
+                        session.get("history", []),
+                    )
+                except Exception:
+                    pass
+
            # CLI parity: when voice-mode TTS is on, speak the agent reply
            # (cli.py:_voice_speak_response).  Only the final text — tool
            # calls / reasoning already stream separately and would be
@ -2550,48 +2570,6 @@ def _(rid, params: dict) -> dict:
    return _ok(rid, {"task_id": task_id})


-@method("prompt.btw")
-def _(rid, params: dict) -> dict:
-    session, err = _sess(params, rid)
-    if err:
-        return err
-    text, sid = params.get("text", ""), params.get("session_id", "")
-    if not text:
-        return _err(rid, 4012, "text required")
-    snapshot = list(session.get("history", []))
-
-    def run():
-        session_tokens = _set_session_context(session["session_key"])
-        try:
-            from run_agent import AIAgent
-
-            result = AIAgent(
-                model=_resolve_model(),
-                quiet_mode=True,
-                platform="tui",
-                max_iterations=8,
-                enabled_toolsets=[],
-            ).run_conversation(text, conversation_history=snapshot)
-            _emit(
-                "btw.complete",
-                sid,
-                {
-                    "text": (
-                        result.get("final_response", str(result))
-                        if isinstance(result, dict)
-                        else str(result)
-                    )
-                },
-            )
-        except Exception as e:
-            _emit("btw.complete", sid, {"text": f"error: {e}"})
-        finally:
-            _clear_session_context(session_tokens)
-
-    threading.Thread(target=run, daemon=True).start()
-    return _ok(rid, {"status": "running"})
-
-
 # ── Methods: respond ─────────────────────────────────────────────────


--- a/ui-tui/README.md
+++ b/ui-tui/README.md
@ -252,7 +252,6 @@ Primary event types the client handles today:
 | `sudo.request`           | `{ request_id }`                                |
 | `secret.request`         | `{ prompt, env_var, request_id }`               |
 | `background.complete`    | `{ task_id, text }`                             |
-| `btw.complete`           | `{ text }`                                      |
 | `error`                  | `{ message }`                                   |
 | `gateway.stderr`         | synthesized from child stderr                   |
 | `gateway.protocol_error` | synthesized from malformed stdout               |
--- a/ui-tui/packages/hermes-ink/src/ink/hooks/use-selection.ts
+++ b/ui-tui/packages/hermes-ink/src/ink/hooks/use-selection.ts
@ -9,9 +9,9 @@ import { type FocusMove, type SelectionState, shiftAnchor } from '../selection.j
 * Returns no-op functions when fullscreen mode is disabled.
 */
 export function useSelection(): {
-  copySelection: () => string
+  copySelection: () => Promise<string>
  /** Copy without clearing the highlight (for copy-on-select). */
-  copySelectionNoClear: () => string
+  copySelectionNoClear: () => Promise<string>
  clearSelection: () => void
  hasSelection: () => boolean
  /** Read the raw mutable selection state (for drag-to-scroll). */
@ -48,8 +48,8 @@ export function useSelection(): {
  return useMemo(() => {
    if (!ink) {
      return {
-        copySelection: () => '',
-        copySelectionNoClear: () => '',
+        copySelection: async () => '',
+        copySelectionNoClear: async () => '',
        clearSelection: () => {},
        hasSelection: () => false,
        getState: () => null,
--- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx
+++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx
@ -1302,11 +1302,13 @@ export default class Ink {
  }

  /**
-   * Copy the current selection to the clipboard without clearing the
-   * highlight. Matches iTerm2's copy-on-select behavior where the selected
-   * region stays visible after the automatic copy.
+   * Copy the current text selection to the system clipboard without clearing the
+   * selection. Returns the copied text when a clipboard path succeeded (native
+   * tool fired, tmux buffer loaded, or OSC 52 emitted), or '' when no path was
+   * taken (e.g. headless Linux without tmux). Matches iTerm2's copy-on-select
+   * behavior where the selected region stays visible after the automatic copy.
   */
-  copySelectionNoClear(): string {
+  async copySelectionNoClear(): Promise<string> {
    if (!hasSelection(this.selection)) {
      return ''
    }
@ -1314,28 +1316,41 @@ export default class Ink {
    const text = getSelectedText(this.selection, this.frontFrame.screen)

    if (text) {
-      // Raw OSC 52, or DCS-passthrough-wrapped OSC 52 inside tmux (tmux
-      // drops it silently unless allow-passthrough is on — no regression).
-      void setClipboard(text).then(raw => {
-        if (raw) {
-          this.options.stdout.write(raw)
+      try {
+        const { sequence, success } = await setClipboard(text)
+
+        if (sequence) {
+          this.options.stdout.write(sequence)
        }
-      })
+
+        if (success) {
+          return text
+        }
+
+        if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
+          console.error('[clipboard] no path reached the clipboard (headless + no tmux?) — set HERMES_TUI_FORCE_OSC52=1 to force the escape sequence')
+        }
+      } catch (err) {
+        if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
+          console.error('[clipboard] error:', err)
+        }
+      }
    }

-    return text
+    return ''
  }

  /**
   * Copy the current text selection to the system clipboard via OSC 52
-   * and clear the selection. Returns the copied text (empty if no selection).
+   * and clear the selection. Returns the copied text (empty if no selection
+   * or clipboard operation failed).
   */
-  copySelection(): string {
+  async copySelection(): Promise<string> {
    if (!hasSelection(this.selection)) {
      return ''
    }

-    const text = this.copySelectionNoClear()
+    const text = await this.copySelectionNoClear()
    clearSelection(this.selection)
    this.notifySelectionChange()

--- a/ui-tui/packages/hermes-ink/src/ink/termio/osc.test.ts
+++ b/ui-tui/packages/hermes-ink/src/ink/termio/osc.test.ts
@ -26,4 +26,26 @@ describe('shouldEmitClipboardSequence', () => {
      shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv)
    ).toBe(false)
  })
+
+  it('HERMES_TUI_FORCE_OSC52 takes precedence over TMUX suppression', () => {
+    // Without the override, local-in-tmux suppresses the OSC 52 sequence
+    // so the terminal multiplexer path wins. FORCE_OSC52=1 flips that
+    // back on for users whose tmux config supports passthrough.
+    expect(shouldEmitClipboardSequence({ TMUX: '/tmp/t,1,0' } as NodeJS.ProcessEnv)).toBe(false)
+    expect(
+      shouldEmitClipboardSequence({
+        HERMES_TUI_FORCE_OSC52: '1',
+        TMUX: '/tmp/t,1,0'
+      } as NodeJS.ProcessEnv)
+    ).toBe(true)
+  })
+
+  it('HERMES_TUI_FORCE_OSC52=0 suppresses OSC 52 even for remote or plain terminals', () => {
+    expect(
+      shouldEmitClipboardSequence({
+        HERMES_TUI_FORCE_OSC52: '0',
+        SSH_CONNECTION: '1'
+      } as NodeJS.ProcessEnv)
+    ).toBe(false)
+  })
 })
--- a/ui-tui/packages/hermes-ink/src/ink/termio/osc.ts
+++ b/ui-tui/packages/hermes-ink/src/ink/termio/osc.ts
@ -84,7 +84,11 @@ export function getClipboardPath(): ClipboardPath {
 }

 export function shouldEmitClipboardSequence(env: NodeJS.ProcessEnv = process.env): boolean {
-  const override = (env.HERMES_TUI_CLIPBOARD_OSC52 ?? env.HERMES_TUI_COPY_OSC52 ?? '').trim()
+  const override = (
+    env.HERMES_TUI_FORCE_OSC52 ??
+    env.HERMES_TUI_CLIPBOARD_OSC52 ??
+    env.HERMES_TUI_COPY_OSC52 ?? ''
+  ).trim()

  if (ENV_ON_RE.test(override)) {
    return true
@ -162,10 +166,23 @@ export async function tmuxLoadBuffer(text: string): Promise<boolean> {
 * utilities (pbcopy/wl-copy/xclip/xsel/clip.exe) always work locally. Over
 * SSH these would write to the remote clipboard — OSC 52 is the right path there.
 *
- * Returns the sequence for the caller to write to stdout (raw OSC 52
- * outside tmux, DCS-wrapped inside).
+ * Returns { sequence, success }:
+ *   - `sequence` is the bytes to write to stdout (raw OSC 52 outside tmux,
+ *     DCS-wrapped inside; empty string when we shouldn't emit).
+ *   - `success` is true when we believe SOME path reached the clipboard:
+ *     native tool fired (local), tmux buffer loaded, or an OSC 52 sequence
+ *     was emitted to the terminal. False only when no path was taken at
+ *     all (headless Linux with no tmux + osc52 suppressed, effectively).
+ *     This is best-effort — pbcopy/xclip are fire-and-forget, and OSC 52
+ *     depends on the outer terminal honoring the sequence — but it lets
+ *     callers distinguish "nothing attempted" from "attempted".
 */
-export async function setClipboard(text: string): Promise<string> {
+export type ClipboardResult = {
+  sequence: string
+  success: boolean
+}
+
+export async function setClipboard(text: string): Promise<ClipboardResult> {
  const b64 = Buffer.from(text, 'utf8').toString('base64')
  const raw = osc(OSC.CLIPBOARD, 'c', b64)
  const emitSequence = shouldEmitClipboardSequence(process.env)
@ -177,20 +194,28 @@ export async function setClipboard(text: string): Promise<string> {
  // (https://anthropic.slack.com/archives/C07VBSHV7EV/p1773943921788829).
  // Gated on SSH_CONNECTION (not SSH_TTY) since tmux panes inherit SSH_TTY
  // forever but SSH_CONNECTION is in tmux's default update-environment and
-  // clears on local attach. Fire-and-forget.
-  if (!process.env['SSH_CONNECTION']) {
-    copyNative(text)
-  }
+  // clears on local attach. Fire-and-forget, but `copyNativeAttempted`
+  // tells us whether ANY native path will be tried on this platform.
+  const nativeAttempted =
+    !process.env['SSH_CONNECTION'] && copyNative(text)

  const tmuxBufferLoaded = await tmuxLoadBuffer(text)

  // Inner OSC uses BEL directly (not osc()) — ST's ESC would need doubling
  // too, and BEL works everywhere for OSC 52.
-  if (tmuxBufferLoaded) {
-    return emitSequence ? tmuxPassthrough(`${ESC}]52;c;${b64}${BEL}`) : ''
-  }
+  const sequence = tmuxBufferLoaded
+    ? (emitSequence ? tmuxPassthrough(`${ESC}]52;c;${b64}${BEL}`) : '')
+    : (emitSequence ? raw : '')

-  return emitSequence ? raw : ''
+  // Success if any path was taken. Native and tmux are fire-and-forget,
+  // so we can't truly confirm the clipboard was written — but if native
+  // was attempted OR tmux buffer loaded OR we emitted OSC 52, the user's
+  // paste is likely to work. The only false case is "we did literally
+  // nothing" (e.g. local-in-tmux with osc52 suppressed and tmux buffer
+  // load failed), in which case reporting failure to the user is honest.
+  const success = nativeAttempted || tmuxBufferLoaded || sequence.length > 0
+
+  return { sequence, success }
 }

 // Linux clipboard tool: undefined = not yet probed, null = none available.
@ -198,65 +223,95 @@ export async function setClipboard(text: string): Promise<string> {
 // Cached after first attempt so repeated mouse-ups skip the probe chain.
 let linuxCopy: 'wl-copy' | 'xclip' | 'xsel' | null | undefined

+/** Internal: probe once and cache — wl-copy first, then xclip, then xsel. */
+async function probeLinuxCopy(): Promise<'wl-copy' | 'xclip' | 'xsel' | null> {
+  const opts = { useCwd: false, timeout: 500 }
+
+  const r = await execFileNoThrow('wl-copy', [], opts)
+
+  if (r.code === 0) {
+    return 'wl-copy'
+  }
+
+  const r2 = await execFileNoThrow('xclip', ['-selection', 'clipboard'], opts)
+
+  if (r2.code === 0) {
+    return 'xclip'
+  }
+
+  const r3 = await execFileNoThrow('xsel', ['--clipboard', '--input'], opts)
+
+  return r3.code === 0 ? 'xsel' : null
+}
+
 /**
 * Shell out to a native clipboard utility as a safety net for OSC 52.
 * Only called when not in an SSH session (over SSH, these would write to
 * the remote machine's clipboard — OSC 52 is the right path there).
 * Fire-and-forget: failures are silent since OSC 52 may have succeeded.
+ *
+ * Returns true when a native copy path was (or will be) attempted — i.e.
+ * we'll spawn pbcopy on macOS, clip on Windows, or a known-working Linux
+ * tool. Returns false only when we know no native tool is viable (Linux
+ * without DISPLAY/WAYLAND_DISPLAY, or previously-probed-to-null). The
+ * return value is used to decide whether to tell the user the copy
+ * succeeded — spawning is best-effort but good enough to claim success.
+ *
+ * Linux behaviour: if DISPLAY and WAYLAND_DISPLAY are both unset, native
+ * clipboard tools cannot work (they need a display server). In that case
+ * we skip probing entirely and treat linuxCopy as permanently null.
 */
-function copyNative(text: string): void {
+function copyNative(text: string): boolean {
  const opts = { input: text, useCwd: false, timeout: 2000 }

  switch (process.platform) {
    case 'darwin':
      void execFileNoThrow('pbcopy', [], opts)

-      return
+      return true
    case 'linux': {
-      if (linuxCopy === null) {
-        return
-      }
-
-      if (linuxCopy === 'wl-copy') {
-        void execFileNoThrow('wl-copy', [], opts)
-
-        return
-      }
-
-      if (linuxCopy === 'xclip') {
-        void execFileNoThrow('xclip', ['-selection', 'clipboard'], opts)
-
-        return
-      }
-
-      if (linuxCopy === 'xsel') {
-        void execFileNoThrow('xsel', ['--clipboard', '--input'], opts)
-
-        return
-      }
-
-      // First call: probe wl-copy (Wayland) then xclip/xsel (X11), cache winner.
-      void execFileNoThrow('wl-copy', [], opts).then(r => {
-        if (r.code === 0) {
-          linuxCopy = 'wl-copy'
-
-          return
+      // If we already probed (success or hard-fail), short-circuit.
+      if (linuxCopy !== undefined) {
+        if (linuxCopy === null) {
+          // No working native tool — skip silently.
+          return false
        }

-        void execFileNoThrow('xclip', ['-selection', 'clipboard'], opts).then(r2 => {
-          if (r2.code === 0) {
-            linuxCopy = 'xclip'
+        // linuxCopy is a known-working tool; fire-and-forget.
+        void execFileNoThrow(linuxCopy, linuxCopy === 'wl-copy' ? [] : ['-selection', 'clipboard'], opts)

-            return
-          }
+        return true
+      }

-          void execFileNoThrow('xsel', ['--clipboard', '--input'], opts).then(r3 => {
-            linuxCopy = r3.code === 0 ? 'xsel' : null
-          })
-        })
-      })
+      // No display server → native tools will fail immediately. Cache null.
+      if (!process.env.DISPLAY && !process.env.WAYLAND_DISPLAY) {
+        if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
+          console.error('[clipboard] [native] Linux: no DISPLAY or WAYLAND_DISPLAY — native clipboard unavailable')
+        }

-      return
+        linuxCopy = null
+
+        return false
+      }
+      // First call: probe in the background and cache the result for future copies.
+      // We don't await — this is fire-and-forget. Treat as an attempt:
+      // the probe will discover a tool and spawn it. If probing finds
+      // nothing, the NEXT copy will short-circuit above.
+      void (async () => {
+        const winner = await probeLinuxCopy()
+        linuxCopy = winner
+
+        if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
+          console.error(`[clipboard] [native] Linux: clipboard probe complete → ${winner ?? 'no tool available'}`)
+        }
+
+        // Actually perform the copy with the discovered tool.
+        if (winner) {
+          void execFileNoThrow(winner, winner === 'wl-copy' ? [] : ['-selection', 'clipboard'], opts)
+        }
+      })()
+
+      return true
    }

    case 'win32':
@ -264,8 +319,10 @@ function copyNative(text: string): void {
      // imperfect (system locale encoding) but good enough for a fallback.
      void execFileNoThrow('clip', [], opts)

-      return
+      return true
  }
+
+  return false
 }

 /** @internal test-only */
--- a/ui-tui/src/tests/createSlashHandler.test.ts
+++ b/ui-tui/src/tests/createSlashHandler.test.ts
@ -392,7 +392,7 @@ const buildComposer = () => ({
  hasSelection: false,
  paste: vi.fn(),
  queueRef: { current: [] as string[] },
-  selection: { copySelection: vi.fn(() => '') },
+  selection: { copySelection: vi.fn(async () => '') },
  setInput: vi.fn()
 })

--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@ -429,12 +429,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:

        return

-      case 'btw.complete':
-        dropBgTask('btw:x')
-        sys(`[btw] ${ev.payload.text}`)
-
-        return
-
      case 'subagent.spawn_requested':
        // Child built but not yet running (waiting on ThreadPoolExecutor slot).
        // Preserve completed state if a later event races in before this one.
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@ -33,7 +33,7 @@ export type StatusBarMode = 'bottom' | 'off' | 'top'
 export interface SelectionApi {
  captureScrolledRows: (firstRow: number, lastRow: number, side: 'above' | 'below') => void
  clearSelection: () => void
-  copySelection: () => string
+  copySelection: () => Promise<string>
  getState: () => unknown
  shiftAnchor: (dRow: number, minRow: number, maxRow: number) => void
  shiftSelection: (dRow: number, minRow: number, maxRow: number) => void
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@ -251,11 +251,17 @@ export const coreCommands: SlashCommand[] = [
  {
    help: 'copy selection or assistant message',
    name: 'copy',
-    run: (arg, ctx) => {
+    run: async (arg, ctx) => {
      const { sys } = ctx.transcript

-      if (!arg && ctx.composer.hasSelection && ctx.composer.selection.copySelection()) {
-        return sys('copied selection')
+      if (!arg && ctx.composer.hasSelection) {
+        const text = await ctx.composer.selection.copySelection()
+
+        if (text) {
+          return sys(`copied ${text.length} characters`)
+        } else {
+          return sys('clipboard copy failed — try HERMES_TUI_FORCE_OSC52=1 to force the escape sequence; HERMES_TUI_DEBUG_CLIPBOARD=1 for details')
+        }
      }

      if (arg && Number.isNaN(parseInt(arg, 10))) {
--- a/ui-tui/src/app/slash/commands/session.ts
+++ b/ui-tui/src/app/slash/commands/session.ts
@ -1,7 +1,6 @@
 import { attachedImageNotice, introMsg, toTranscriptMessages } from '../../../domain/messages.js'
 import type {
  BackgroundStartResponse,
-  BtwStartResponse,
  ConfigGetValueResponse,
  ConfigSetResponse,
  ImageAttachResponse,
@ -26,7 +25,7 @@ const persistedModelArg = (arg: string) => {

 export const sessionCommands: SlashCommand[] = [
  {
-    aliases: ['bg'],
+    aliases: ['bg', 'btw'],
    help: 'launch a background prompt',
    name: 'background',
    run: (arg, ctx) => {
@ -47,23 +46,6 @@ export const sessionCommands: SlashCommand[] = [
    }
  },

-  {
-    help: 'by-the-way follow-up',
-    name: 'btw',
-    run: (arg, ctx) => {
-      if (!arg) {
-        return ctx.transcript.sys('/btw <question>')
-      }
-
-      ctx.gateway.rpc<BtwStartResponse>('prompt.btw', { session_id: ctx.sid, text: arg }).then(
-        ctx.guarded(() => {
-          patchUiState(state => ({ ...state, bgTasks: new Set(state.bgTasks).add('btw:x') }))
-          ctx.transcript.sys('btw running…')
-        })
-      )
-    }
-  },
-
  {
    help: 'change or show model',
    aliases: ['provider'],
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@ -178,10 +178,6 @@ export interface BackgroundStartResponse {
  task_id?: string
 }

-export interface BtwStartResponse {
-  ok?: boolean
-}
-
 export interface ClarifyRespondResponse {
  ok?: boolean
 }
@ -403,7 +399,6 @@ export type GatewayEvent =
  | { payload: { request_id: string }; session_id?: string; type: 'sudo.request' }
  | { payload: { env_var: string; prompt: string; request_id: string }; session_id?: string; type: 'secret.request' }
  | { payload: { task_id: string; text: string }; session_id?: string; type: 'background.complete' }
-  | { payload: { text: string }; session_id?: string; type: 'btw.complete' }
  | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.spawn_requested' }
  | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.start' }
  | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.thinking' }
--- a/ui-tui/src/types/hermes-ink.d.ts
+++ b/ui-tui/src/types/hermes-ink.d.ts
@ -84,8 +84,8 @@ declare module '@hermes/ink' {
  export function withInkSuspended(run: RunExternalProcess): Promise<void>
  export function useInput(handler: InputHandler, options?: { readonly isActive?: boolean }): void
  export function useSelection(): {
-    readonly copySelection: () => string
-    readonly copySelectionNoClear: () => string
+    readonly copySelection: () => Promise<string>
+    readonly copySelectionNoClear: () => Promise<string>
    readonly clearSelection: () => void
    readonly hasSelection: () => boolean
    readonly getState: () => unknown
--- a/web/src/lib/gatewayClient.ts
+++ b/web/src/lib/gatewayClient.ts
@ -32,7 +32,6 @@ export type GatewayEventName =
  | "sudo.request"
  | "secret.request"
  | "background.complete"
-  | "btw.complete"
  | "error"
  | "skin.changed"
  | (string & {});
--- a/web/src/pages/ChatPage.tsx
+++ b/web/src/pages/ChatPage.tsx
@ -269,17 +269,17 @@ export default function ChatPage() {
      const payload = data.slice(semi + 1);
      if (payload === "?" || payload === "") return false; // read/clear — ignore
      try {
-        // atob returns a binary string (one byte per char); we need UTF-8
-        // decode so multi-byte codepoints (≥, →, emoji, CJK) round-trip
-        // correctly.  Without this step, the three UTF-8 bytes of `≥`
-        // would land in the clipboard as the three separate Latin-1
-        // characters `â‰¥`.
        const binary = atob(payload);
        const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
        const text = new TextDecoder("utf-8").decode(bytes);
-        navigator.clipboard.writeText(text).catch(() => {});
-      } catch {
-        // Malformed base64 — silently drop.
+        navigator.clipboard.writeText(text).catch((err) => {
+          // Most common reason: the Clipboard API requires a user gesture.
+          // This can fail when the OSC 52 response arrives outside the
+          // original keydown event's activation. Log to aid debugging.
+          console.warn("[dashboard clipboard] OSC 52 write failed:", err.message);
+        });
+      } catch (e) {
+        console.warn("[dashboard clipboard] malformed OSC 52 payload");
      }
      return true;
    });
@ -290,16 +290,31 @@ export default function ChatPage() {
    term.attachCustomKeyEventHandler((ev) => {
      if (ev.type !== "keydown") return true;

+      // Copy: Cmd+C on macOS, Ctrl+Shift+C on other platforms. Bare Ctrl+C
+      // is reserved for SIGINT to the TUI child — matches xterm / gnome-terminal /
+      // konsole / Windows Terminal. Ctrl+Shift+C only copies if a selection exists;
+      // without a selection it passes through to the TUI so agents can still
+      // react to the keypress.
+      // Paste: Cmd+Shift+V on macOS, Ctrl+Shift+V on others.
      const copyModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey;
      const pasteModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey;

      if (copyModifier && ev.key.toLowerCase() === "c") {
        const sel = term.getSelection();
        if (sel) {
-          navigator.clipboard.writeText(sel).catch(() => {});
+          // Direct writeText inside the keydown handler preserves the user
+          // gesture — async round-trips through OSC 52 can lose activation
+          // and fail with "Document is not focused".
+          navigator.clipboard.writeText(sel).catch((err) => {
+            console.warn("[dashboard clipboard] direct copy failed:", err.message);
+          });
+          // Clear xterm.js's highlight after copy (matches gnome-terminal).
+          term.clearSelection();
          ev.preventDefault();
          return false;
        }
+        // No selection → fall through so the TUI receives Ctrl+Shift+C
+        // (or the bare ev if the user used a different modifier).
      }

      if (pasteModifier && ev.key.toLowerCase() === "v") {
@ -308,7 +323,9 @@ export default function ChatPage() {
          .then((text) => {
            if (text) term.paste(text);
          })
-          .catch(() => {});
+          .catch((err) => {
+            console.warn("[dashboard clipboard] paste failed:", err.message);
+          });
        ev.preventDefault();
        return false;
      }
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@ -41,6 +41,7 @@ hermes [global-options] <command> [subcommand/options]
 | `hermes gateway` | Run or manage the messaging gateway service. |
 | `hermes setup` | Interactive setup wizard for all or part of the configuration. |
 | `hermes whatsapp` | Configure and pair the WhatsApp bridge. |
+| `hermes slack` | Slack helpers (currently: generate the app manifest with every command as a native slash). |
 | `hermes auth` | Manage credentials — add, list, remove, reset, set strategy. Handles OAuth flows for Codex/Nous/Anthropic. |
 | `hermes login` / `logout` | **Deprecated** — use `hermes auth` instead. |
 | `hermes status` | Show agent, auth, and platform status. |
@ -221,6 +222,33 @@ hermes whatsapp

 Runs the WhatsApp pairing/setup flow, including mode selection and QR-code pairing.

+## `hermes slack`
+
+```bash
+hermes slack manifest              # print manifest to stdout
+hermes slack manifest --write      # write to ~/.hermes/slack-manifest.json
+hermes slack manifest --slashes-only  # just the features.slash_commands array
+```
+
+Generates a Slack app manifest that registers every gateway command in
+`COMMAND_REGISTRY` (`/btw`, `/stop`, `/model`, …) as a first-class
+Slack slash command — matching Discord and Telegram parity. Paste the
+output into your Slack app config at
+[https://api.slack.com/apps](https://api.slack.com/apps) → your app →
+**Features → App Manifest → Edit**, then **Save**. Slack prompts for
+reinstall if scopes or slash commands changed.
+
+| Flag | Default | Purpose |
+|------|---------|---------|
+| `--write [PATH]` | stdout | Write to a file instead of stdout. Bare `--write` writes `$HERMES_HOME/slack-manifest.json`. |
+| `--name NAME` | `Hermes` | Bot display name in Slack. |
+| `--description DESC` | default blurb | Bot description shown in the Slack app directory. |
+| `--slashes-only` | off | Emit only `features.slash_commands` for merging into a manually-maintained manifest. |
+
+Run `hermes slack manifest --write` again after `hermes update` to pick
+up any new commands.
+
+
 ## `hermes login` / `hermes logout` *(Deprecated)*

 :::caution
--- a/website/docs/reference/model-catalog.md
+++ b/website/docs/reference/model-catalog.md
@ -0,0 +1,103 @@
+---
+sidebar_position: 11
+title: Model Catalog
+description: Remotely-hosted manifest driving curated model picker lists for OpenRouter and Nous Portal.
+---
+
+# Model Catalog
+
+Hermes fetches curated model lists for **OpenRouter** and **Nous Portal** from a JSON manifest hosted alongside the docs site. This lets maintainers update picker lists without shipping a new `hermes-agent` release.
+
+When the manifest is unreachable (offline, network blocked, hosting failure), Hermes silently falls back to the in-repo snapshot that ships with the CLI. The manifest never breaks the picker — worst case you see whatever list was bundled with your installed version.
+
+## Live manifest URL
+
+```
+https://hermes-agent.nousresearch.com/docs/api/model-catalog.json
+```
+
+Published on every merge to `main` via the existing `deploy-site.yml` GitHub Pages pipeline. The source of truth lives in the repo at `website/static/api/model-catalog.json`.
+
+## Schema
+
+```json
+{
+  "version": 1,
+  "updated_at": "2026-04-25T22:00:00Z",
+  "metadata": {},
+  "providers": {
+    "openrouter": {
+      "metadata": {},
+      "models": [
+        {"id": "moonshotai/kimi-k2.6", "description": "recommended", "metadata": {}},
+        {"id": "openai/gpt-5.4",       "description": ""}
+      ]
+    },
+    "nous": {
+      "metadata": {},
+      "models": [
+        {"id": "anthropic/claude-opus-4.7"},
+        {"id": "moonshotai/kimi-k2.6"}
+      ]
+    }
+  }
+}
+```
+
+Field notes:
+
+- **`version`** — integer schema version. Future schemas bump this; Hermes refuses manifests with versions it doesn't understand and falls back to the hardcoded snapshot.
+- **`metadata`** — free-form dict at the manifest, provider, and model level. Any keys. Hermes ignores unknown fields, so you can annotate entries (`"tier": "paid"`, `"tags": [...]`, etc.) without coordinating a schema change.
+- **`description`** — OpenRouter-only. Drives picker badge text (`"recommended"`, `"free"`, or empty). Nous Portal doesn't use this — free-tier gating is determined live from the Portal's pricing endpoint.
+- **Pricing and context length** are NOT in the manifest. Those come from live provider APIs (`/v1/models` endpoints, models.dev) at fetch time.
+
+## Fetch behavior
+
+| When | What happens |
+|---|---|
+| `/model` or `hermes model` | Fetches if disk cache is stale, else uses cache |
+| Disk cache fresh (< TTL) | No network hit |
+| Network failure with cache | Silent fallback to cache, one log line |
+| Network failure, no cache | Silent fallback to in-repo snapshot |
+| Manifest fails schema validation | Treated as unreachable |
+
+Cache location: `~/.hermes/cache/model_catalog.json`.
+
+## Config
+
+```yaml
+model_catalog:
+  enabled: true
+  url: https://hermes-agent.nousresearch.com/docs/api/model-catalog.json
+  ttl_hours: 24
+  providers: {}
+```
+
+Set `enabled: false` to disable remote fetch entirely and always use the in-repo snapshot.
+
+### Per-provider override URLs
+
+Third parties can self-host their own curation list using the same schema. Point a provider at a custom URL:
+
+```yaml
+model_catalog:
+  providers:
+    openrouter:
+      url: https://example.com/my-openrouter-curation.json
+```
+
+The overriding manifest only needs to populate the provider block(s) it cares about. Other providers continue to resolve against the master URL.
+
+## Updating the manifest
+
+Maintainers:
+
+```bash
+# Re-generate from the in-repo hardcoded lists (keeps manifest in sync after
+# editing OPENROUTER_MODELS or _PROVIDER_MODELS["nous"] in hermes_cli/models.py).
+python scripts/build_model_catalog.py
+```
+
+Then PR the resulting change to `website/static/api/model-catalog.json` to `main`. The docs site auto-deploys on merge and the new manifest is live within a few minutes.
+
+You can also hand-edit the JSON directly for fine-grained metadata changes that don't belong in the in-repo snapshot — the generator script is a convenience, not the single source of truth.
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@ -36,8 +36,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/resume [name]` | Resume a previously-named session |
 | `/status` | Show session info |
 | `/agents` (alias: `/tasks`) | Show active agents and running tasks across the current session. |
-| `/background <prompt>` (alias: `/bg`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
-| `/btw <question>` | Ephemeral side question using session context (no tools, not persisted). Useful for quick clarifications without affecting the conversation history. |
+| `/background <prompt>` (alias: `/bg`, `/btw`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
 | `/branch [name]` (alias: `/fork`) | Branch the current session (explore a different path) |

 ### Configuration
--- a/website/docs/user-guide/cli.md
+++ b/website/docs/user-guide/cli.md
@ -242,6 +242,10 @@ You can also change it inside the CLI:
 /busy status
 ```

+:::tip First-touch hint
+The very first time you press Enter while Hermes is working, Hermes prints a one-line reminder explaining the `/busy` knob (`"(tip) Your message interrupted the current run…"`). It only fires once per install — a flag in `config.yaml` under `onboarding.seen.busy_input_prompt` latches it. Delete that key to see the tip again.
+:::
+
 ### Suspending to Background

 On Unix systems, press **`Ctrl+Z`** to suspend Hermes to the background — just like any terminal process. The shell prints a confirmation:
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@ -146,7 +146,9 @@ terminal:

 **Requirements:** Docker Desktop or Docker Engine installed and running. Hermes probes `$PATH` plus common macOS install locations (`/usr/local/bin/docker`, `/opt/homebrew/bin/docker`, Docker Desktop app bundle).

-**Container lifecycle:** Each session starts a long-lived container (`docker run -d ... sleep 2h`). Commands run via `docker exec` with a login shell. On cleanup, the container is stopped and removed.
+**Container lifecycle:** Hermes reuses a single long-lived container (`docker run -d ... sleep 2h`) for every terminal and file-tool call made by the top-level agent, across sessions, `/new`, and `/reset`, for the lifetime of the Hermes process. Commands run via `docker exec` with a login shell, so working-directory changes, installed packages, and files in `/workspace` all persist from one tool call to the next. The container is stopped and removed on Hermes shutdown (or when the idle-sweep reclaims it).
+
+Subagents (`delegate_task`) and RL rollouts get their own isolated containers keyed by `task_id` — only the top-level agent shares the `default` container.

 **Security hardening:**
 - `--cap-drop ALL` with only `DAC_OVERRIDE`, `CHOWN`, `FOWNER` added back
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@ -86,6 +86,40 @@ FIRECRAWL_API_URL=http://localhost:3002
 FIRECRAWL_BROWSER_TTL=600
 ```

+### Hybrid routing: cloud for public URLs, local for LAN/localhost
+
+When a cloud provider is configured, Hermes auto-spawns a **local Chromium sidecar**
+for URLs that resolve to a private/loopback/LAN address (`localhost`, `127.0.0.1`,
+`192.168.x.x`, `10.x.x.x`, `172.16-31.x.x`, `*.local`, `*.lan`, `*.internal`,
+IPv6 loopback `::1`, link-local `169.254.x.x`). Public URLs continue to use the
+cloud provider in the same conversation.
+
+This solves the common "I'm developing locally but using Browserbase" workflow —
+the agent can screenshot your dashboard at `http://localhost:3000` AND scrape
+`https://github.com` without you switching providers or disabling the SSRF guard.
+The cloud provider never sees the private URL.
+
+The feature is **on by default**. To disable it (all URLs go to the configured
+cloud provider, as before):
+
+```yaml
+# ~/.hermes/config.yaml
+browser:
+  cloud_provider: browserbase
+  auto_local_for_private_urls: false
+```
+
+With auto-routing disabled, private URLs are rejected with
+`"Blocked: URL targets a private or internal address"` unless you also set
+`browser.allow_private_urls: true` (which lets the cloud provider attempt them —
+usually won't work since Browserbase etc. can't reach your LAN).
+
+Requirements: the local sidecar uses the same `agent-browser` CLI as pure local
+mode, so you need it installed (`hermes setup tools → Browser Automation`
+auto-installs it). Post-navigation redirects from a public URL onto a private
+address are still blocked (you can't use a redirect-to-internal trick to reach
+your LAN through the public path).
+
 ### Camofox local mode

 [Camofox](https://github.com/jo-inc/camofox-browser) is a self-hosted Node.js server wrapping Camoufox (a Firefox fork with C++ fingerprint spoofing). It provides local anti-detection browsing without cloud dependencies.
--- a/website/docs/user-guide/messaging/index.md
+++ b/website/docs/user-guide/messaging/index.md
@ -219,6 +219,17 @@ Send any message while the agent is working to interrupt it. Key behaviors:
 - **Multiple messages are combined** — messages sent during interruption are joined into one prompt
 - **`/stop` command** — interrupts without queuing a follow-up message

+### Queue vs interrupt (busy-input mode)
+
+By default, messaging a busy agent interrupts it. To switch the whole install so follow-ups queue behind the current task instead, set:
+
+```yaml
+display:
+  busy_input_mode: queue   # default: interrupt
+```
+
+The first time you message a busy agent on any platform, Hermes appends a one-line reminder to the busy-ack explaining the knob (`"💡 First-time tip — …"`). The reminder fires once per install — a flag under `onboarding.seen.busy_input_prompt` latches it. Delete that key to see the tip again.
+
 ## Tool Progress Notifications

 Control how much tool activity is displayed in `~/.hermes/config.yaml`:
--- a/website/docs/user-guide/messaging/slack.md
+++ b/website/docs/user-guide/messaging/slack.md
@ -29,13 +29,36 @@ the steps below.

 ## Step 1: Create a Slack App

+The fastest path is to paste a manifest Hermes generates for you. It
+declares every built-in slash command (`/btw`, `/stop`, `/model`, …),
+every required OAuth scope, every event subscription, and enables Socket
+Mode — all at once.
+
+### Option A: From a Hermes-generated manifest (recommended)
+
+1. Generate the manifest:
+   ```bash
+   hermes slack manifest --write
+   ```
+   This writes `~/.hermes/slack-manifest.json` and prints paste-in
+   instructions.
+2. Go to [https://api.slack.com/apps](https://api.slack.com/apps) →
+   **Create New App** → **From an app manifest**
+3. Pick your workspace, paste the JSON contents, review, click **Next**
+   → **Create**
+4. Skip ahead to **Step 6: Install App to Workspace**. The manifest
+   handled scopes, events, and slash commands for you.
+
+### Option B: From scratch (manual)
+
 1. Go to [https://api.slack.com/apps](https://api.slack.com/apps)
 2. Click **Create New App**
 3. Choose **From scratch**
 4. Enter an app name (e.g., "Hermes Agent") and select your workspace
 5. Click **Create App**

-You'll land on the app's **Basic Information** page.
+You'll land on the app's **Basic Information** page. Continue with
+Steps 2–6 below.

 ---

@ -203,6 +226,57 @@ The bot will **not** automatically join channels. You must invite it to each cha

 ---

+## Slash Commands
+
+Every Hermes command (`/btw`, `/stop`, `/new`, `/model`, `/help`, ...)
+is a native Slack slash command — exactly the way they work on Telegram
+and Discord. Type `/` in Slack and the autocomplete picker lists every
+Hermes command with its description.
+
+Under the hood: Hermes ships with a generated Slack app manifest (see
+Step 1, Option A) that declares every command in
+[`COMMAND_REGISTRY`](https://github.com/NousResearch/hermes-agent/blob/main/hermes_cli/commands.py)
+as a slash command. In Socket Mode, Slack routes the command event
+through the WebSocket regardless of the manifest's `url` field.
+
+### Refreshing slash commands after updates
+
+When Hermes adds new commands (e.g. after `hermes update`), regenerate
+the manifest and update your Slack app:
+
+```bash
+hermes slack manifest --write
+```
+
+Then in Slack:
+1. Open [https://api.slack.com/apps](https://api.slack.com/apps) →
+   your Hermes app
+2. **Features → App Manifest → Edit**
+3. Paste the new contents of `~/.hermes/slack-manifest.json`
+4. **Save**. Slack will prompt to reinstall the app if scopes or slash
+   commands changed.
+
+### Legacy `/hermes <subcommand>` still works
+
+For backward compatibility with older manifests, you can still type
+`/hermes btw run the tests` — Hermes routes it the same way as `/btw
+run the tests`. Free-form questions also work: `/hermes what's the
+weather?` is treated as a regular message.
+
+### Advanced: emit only the slash-commands array
+
+If you maintain your Slack manifest by hand and just want the slash
+command list:
+
+```bash
+hermes slack manifest --slashes-only > /tmp/slashes.json
+```
+
+Paste that array into the `features.slash_commands` key of your
+existing manifest.
+
+---
+
 ## How the Bot Responds

 Understanding how Hermes behaves in different contexts:
--- a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent.md
+++ b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent.md
@ -298,7 +298,6 @@ Type these during an interactive chat session.
 ### Utility
 ```
 /branch (/fork)      Branch the current session
-/btw                 Ephemeral side question (doesn't interrupt main task)
 /fast                Toggle priority/fast processing
 /browser             Open CDP browser connection
 /history             Show conversation history (CLI)
--- a/website/scripts/extract-skills.py
+++ b/website/scripts/extract-skills.py
@ -26,7 +26,6 @@ CATEGORY_LABELS = {
    "dogfood": "Dogfood",
    "domain": "Domain",
    "email": "Email",
-    "feeds": "Feeds",
    "gaming": "Gaming",
    "gifs": "GIFs",
    "github": "GitHub",
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@ -613,6 +613,7 @@ const sidebars: SidebarsConfig = {
        'reference/tools-reference',
        'reference/toolsets-reference',
        'reference/mcp-config-reference',
+        'reference/model-catalog',
        'reference/skills-catalog',
        'reference/optional-skills-catalog',
        'reference/faq',
--- a/website/static/api/model-catalog.json
+++ b/website/static/api/model-catalog.json
@ -0,0 +1,259 @@
+{
+  "version": 1,
+  "updated_at": "2026-04-26T12:34:42Z",
+  "metadata": {
+    "source": "hermes-agent repo",
+    "docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog"
+  },
+  "providers": {
+    "openrouter": {
+      "metadata": {
+        "display_name": "OpenRouter",
+        "note": "Descriptions drive picker badges. Live /api/v1/models filters curated ids by tool-calling support and free pricing."
+      },
+      "models": [
+        {
+          "id": "moonshotai/kimi-k2.6",
+          "description": "recommended"
+        },
+        {
+          "id": "deepseek/deepseek-v4-pro",
+          "description": ""
+        },
+        {
+          "id": "deepseek/deepseek-v4-flash",
+          "description": ""
+        },
+        {
+          "id": "anthropic/claude-opus-4.7",
+          "description": ""
+        },
+        {
+          "id": "anthropic/claude-opus-4.6",
+          "description": ""
+        },
+        {
+          "id": "anthropic/claude-sonnet-4.6",
+          "description": ""
+        },
+        {
+          "id": "qwen/qwen3.6-plus",
+          "description": ""
+        },
+        {
+          "id": "anthropic/claude-sonnet-4.5",
+          "description": ""
+        },
+        {
+          "id": "anthropic/claude-haiku-4.5",
+          "description": ""
+        },
+        {
+          "id": "openrouter/elephant-alpha",
+          "description": "free"
+        },
+        {
+          "id": "openai/gpt-5.5",
+          "description": ""
+        },
+        {
+          "id": "openai/gpt-5.4-mini",
+          "description": ""
+        },
+        {
+          "id": "xiaomi/mimo-v2.5-pro",
+          "description": ""
+        },
+        {
+          "id": "xiaomi/mimo-v2.5",
+          "description": ""
+        },
+        {
+          "id": "openai/gpt-5.3-codex",
+          "description": ""
+        },
+        {
+          "id": "google/gemini-3-pro-image-preview",
+          "description": ""
+        },
+        {
+          "id": "google/gemini-3-flash-preview",
+          "description": ""
+        },
+        {
+          "id": "google/gemini-3.1-pro-preview",
+          "description": ""
+        },
+        {
+          "id": "google/gemini-3.1-flash-lite-preview",
+          "description": ""
+        },
+        {
+          "id": "qwen/qwen3.5-plus-02-15",
+          "description": ""
+        },
+        {
+          "id": "qwen/qwen3.5-35b-a3b",
+          "description": ""
+        },
+        {
+          "id": "stepfun/step-3.5-flash",
+          "description": ""
+        },
+        {
+          "id": "minimax/minimax-m2.7",
+          "description": ""
+        },
+        {
+          "id": "minimax/minimax-m2.5",
+          "description": ""
+        },
+        {
+          "id": "minimax/minimax-m2.5:free",
+          "description": "free"
+        },
+        {
+          "id": "z-ai/glm-5.1",
+          "description": ""
+        },
+        {
+          "id": "z-ai/glm-5v-turbo",
+          "description": ""
+        },
+        {
+          "id": "z-ai/glm-5-turbo",
+          "description": ""
+        },
+        {
+          "id": "x-ai/grok-4.20",
+          "description": ""
+        },
+        {
+          "id": "nvidia/nemotron-3-super-120b-a12b",
+          "description": ""
+        },
+        {
+          "id": "nvidia/nemotron-3-super-120b-a12b:free",
+          "description": "free"
+        },
+        {
+          "id": "arcee-ai/trinity-large-preview:free",
+          "description": "free"
+        },
+        {
+          "id": "arcee-ai/trinity-large-thinking",
+          "description": ""
+        },
+        {
+          "id": "openai/gpt-5.5-pro",
+          "description": ""
+        },
+        {
+          "id": "openai/gpt-5.4-nano",
+          "description": ""
+        }
+      ]
+    },
+    "nous": {
+      "metadata": {
+        "display_name": "Nous Portal",
+        "note": "Free-tier gating is determined live via Portal pricing (partition_nous_models_by_tier), not this manifest."
+      },
+      "models": [
+        {
+          "id": "moonshotai/kimi-k2.6"
+        },
+        {
+          "id": "deepseek/deepseek-v4-pro"
+        },
+        {
+          "id": "deepseek/deepseek-v4-flash"
+        },
+        {
+          "id": "xiaomi/mimo-v2.5-pro"
+        },
+        {
+          "id": "xiaomi/mimo-v2.5"
+        },
+        {
+          "id": "anthropic/claude-opus-4.7"
+        },
+        {
+          "id": "anthropic/claude-opus-4.6"
+        },
+        {
+          "id": "anthropic/claude-sonnet-4.6"
+        },
+        {
+          "id": "anthropic/claude-sonnet-4.5"
+        },
+        {
+          "id": "anthropic/claude-haiku-4.5"
+        },
+        {
+          "id": "openai/gpt-5.5"
+        },
+        {
+          "id": "openai/gpt-5.4-mini"
+        },
+        {
+          "id": "openai/gpt-5.3-codex"
+        },
+        {
+          "id": "google/gemini-3-pro-preview"
+        },
+        {
+          "id": "google/gemini-3-flash-preview"
+        },
+        {
+          "id": "google/gemini-3.1-pro-preview"
+        },
+        {
+          "id": "google/gemini-3.1-flash-lite-preview"
+        },
+        {
+          "id": "qwen/qwen3.5-plus-02-15"
+        },
+        {
+          "id": "qwen/qwen3.5-35b-a3b"
+        },
+        {
+          "id": "stepfun/step-3.5-flash"
+        },
+        {
+          "id": "minimax/minimax-m2.7"
+        },
+        {
+          "id": "minimax/minimax-m2.5"
+        },
+        {
+          "id": "minimax/minimax-m2.5:free"
+        },
+        {
+          "id": "z-ai/glm-5.1"
+        },
+        {
+          "id": "z-ai/glm-5v-turbo"
+        },
+        {
+          "id": "z-ai/glm-5-turbo"
+        },
+        {
+          "id": "x-ai/grok-4.20-beta"
+        },
+        {
+          "id": "nvidia/nemotron-3-super-120b-a12b"
+        },
+        {
+          "id": "arcee-ai/trinity-large-thinking"
+        },
+        {
+          "id": "openai/gpt-5.5-pro"
+        },
+        {
+          "id": "openai/gpt-5.4-nano"
+        }
+      ]
+    }
+  }
+}