Merge origin/main, resolve conflicts (self._base_url_lower)

2026-07-20 15:33:54 +00:00 · 2026-03-18 04:09:00 -07:00 · 2026-03-18 04:09:00 -07:00 · e7844e9c8d
commit e7844e9c8d
parent 04101bc59e 1c761ae042
54 changed files with 2281 additions and 179 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -55,8 +55,8 @@ logger = logging.getLogger(__name__)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
-    "minimax": "MiniMax-M2.5-highspeed",
-    "minimax-cn": "MiniMax-M2.5-highspeed",
+    "minimax": "MiniMax-M2.7-highspeed",
+    "minimax-cn": "MiniMax-M2.7-highspeed",
    "anthropic": "claude-haiku-4-5-20251001",
    "ai-gateway": "google/gemini-3-flash",
    "opencode-zen": "gemini-3-flash",
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -45,16 +45,18 @@ class ContextCompressor:
        quiet_mode: bool = False,
        summary_model_override: str = None,
        base_url: str = "",
+        api_key: str = "",
    ):
        self.model = model
        self.base_url = base_url
+        self.api_key = api_key
        self.threshold_percent = threshold_percent
        self.protect_first_n = protect_first_n
        self.protect_last_n = protect_last_n
        self.summary_target_tokens = summary_target_tokens
        self.quiet_mode = quiet_mode

-        self.context_length = get_model_context_length(model, base_url=base_url)
+        self.context_length = get_model_context_length(model, base_url=base_url, api_key=api_key)
        self.threshold_tokens = int(self.context_length * threshold_percent)
        self.compression_count = 0
        self._context_probed = False  # True after a step-down from context error
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -10,6 +10,7 @@ import re
 import time
 from pathlib import Path
 from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse

 import requests
 import yaml
@ -21,6 +22,9 @@ logger = logging.getLogger(__name__)
 _model_metadata_cache: Dict[str, Dict[str, Any]] = {}
 _model_metadata_cache_time: float = 0
 _MODEL_CACHE_TTL = 3600
+_endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
+_endpoint_model_metadata_cache_time: Dict[str, float] = {}
+_ENDPOINT_MODEL_CACHE_TTL = 300

 # Descending tiers for context length probing when the model is unknown.
 # We start high and step down on context-length errors until one works.
@ -77,6 +81,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "kimi-k2-thinking-turbo": 262144,
    "kimi-k2-turbo-preview": 262144,
    "kimi-k2-0905-preview": 131072,
+    "MiniMax-M2.7": 204800,
+    "MiniMax-M2.7-highspeed": 204800,
    "MiniMax-M2.5": 204800,
    "MiniMax-M2.5-highspeed": 204800,
    "MiniMax-M2.1": 204800,
@ -121,6 +127,128 @@ DEFAULT_CONTEXT_LENGTHS = {
    "qwen-vl-max": 32768,
 }

+_CONTEXT_LENGTH_KEYS = (
+    "context_length",
+    "context_window",
+    "max_context_length",
+    "max_position_embeddings",
+    "max_model_len",
+    "max_input_tokens",
+    "max_sequence_length",
+    "max_seq_len",
+)
+
+_MAX_COMPLETION_KEYS = (
+    "max_completion_tokens",
+    "max_output_tokens",
+    "max_tokens",
+)
+
+
+def _normalize_base_url(base_url: str) -> str:
+    return (base_url or "").strip().rstrip("/")
+
+
+def _is_openrouter_base_url(base_url: str) -> bool:
+    return "openrouter.ai" in _normalize_base_url(base_url).lower()
+
+
+def _is_custom_endpoint(base_url: str) -> bool:
+    normalized = _normalize_base_url(base_url)
+    return bool(normalized) and not _is_openrouter_base_url(normalized)
+
+
+def _is_known_provider_base_url(base_url: str) -> bool:
+    normalized = _normalize_base_url(base_url)
+    if not normalized:
+        return False
+    parsed = urlparse(normalized if "://" in normalized else f"https://{normalized}")
+    host = parsed.netloc.lower() or parsed.path.lower()
+    known_hosts = (
+        "api.openai.com",
+        "chatgpt.com",
+        "api.anthropic.com",
+        "api.z.ai",
+        "api.moonshot.ai",
+        "api.kimi.com",
+        "api.minimax",
+    )
+    return any(known_host in host for known_host in known_hosts)
+
+
+def _iter_nested_dicts(value: Any):
+    if isinstance(value, dict):
+        yield value
+        for nested in value.values():
+            yield from _iter_nested_dicts(nested)
+    elif isinstance(value, list):
+        for item in value:
+            yield from _iter_nested_dicts(item)
+
+
+def _coerce_reasonable_int(value: Any, minimum: int = 1024, maximum: int = 10_000_000) -> Optional[int]:
+    try:
+        if isinstance(value, bool):
+            return None
+        if isinstance(value, str):
+            value = value.strip().replace(",", "")
+        result = int(value)
+    except (TypeError, ValueError):
+        return None
+    if minimum <= result <= maximum:
+        return result
+    return None
+
+
+def _extract_first_int(payload: Dict[str, Any], keys: tuple[str, ...]) -> Optional[int]:
+    keyset = {key.lower() for key in keys}
+    for mapping in _iter_nested_dicts(payload):
+        for key, value in mapping.items():
+            if str(key).lower() not in keyset:
+                continue
+            coerced = _coerce_reasonable_int(value)
+            if coerced is not None:
+                return coerced
+    return None
+
+
+def _extract_context_length(payload: Dict[str, Any]) -> Optional[int]:
+    return _extract_first_int(payload, _CONTEXT_LENGTH_KEYS)
+
+
+def _extract_max_completion_tokens(payload: Dict[str, Any]) -> Optional[int]:
+    return _extract_first_int(payload, _MAX_COMPLETION_KEYS)
+
+
+def _extract_pricing(payload: Dict[str, Any]) -> Dict[str, Any]:
+    alias_map = {
+        "prompt": ("prompt", "input", "input_cost_per_token", "prompt_token_cost"),
+        "completion": ("completion", "output", "output_cost_per_token", "completion_token_cost"),
+        "request": ("request", "request_cost"),
+        "cache_read": ("cache_read", "cached_prompt", "input_cache_read", "cache_read_cost_per_token"),
+        "cache_write": ("cache_write", "cache_creation", "input_cache_write", "cache_write_cost_per_token"),
+    }
+    for mapping in _iter_nested_dicts(payload):
+        normalized = {str(key).lower(): value for key, value in mapping.items()}
+        if not any(any(alias in normalized for alias in aliases) for aliases in alias_map.values()):
+            continue
+        pricing: Dict[str, Any] = {}
+        for target, aliases in alias_map.items():
+            for alias in aliases:
+                if alias in normalized and normalized[alias] not in (None, ""):
+                    pricing[target] = normalized[alias]
+                    break
+        if pricing:
+            return pricing
+    return {}
+
+
+def _add_model_aliases(cache: Dict[str, Dict[str, Any]], model_id: str, entry: Dict[str, Any]) -> None:
+    cache[model_id] = entry
+    if "/" in model_id:
+        bare_model = model_id.split("/", 1)[1]
+        cache.setdefault(bare_model, entry)
+

 def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any]]:
    """Fetch model metadata from OpenRouter (cached for 1 hour)."""
@ -137,15 +265,16 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
        cache = {}
        for model in data.get("data", []):
            model_id = model.get("id", "")
-            cache[model_id] = {
+            entry = {
                "context_length": model.get("context_length", 128000),
                "max_completion_tokens": model.get("top_provider", {}).get("max_completion_tokens", 4096),
                "name": model.get("name", model_id),
                "pricing": model.get("pricing", {}),
            }
+            _add_model_aliases(cache, model_id, entry)
            canonical = model.get("canonical_slug", "")
            if canonical and canonical != model_id:
-                cache[canonical] = cache[model_id]
+                _add_model_aliases(cache, canonical, entry)

        _model_metadata_cache = cache
        _model_metadata_cache_time = time.time()
@ -157,6 +286,75 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
        return _model_metadata_cache or {}


+def fetch_endpoint_model_metadata(
+    base_url: str,
+    api_key: str = "",
+    force_refresh: bool = False,
+) -> Dict[str, Dict[str, Any]]:
+    """Fetch model metadata from an OpenAI-compatible ``/models`` endpoint.
+
+    This is used for explicit custom endpoints where hardcoded global model-name
+    defaults are unreliable. Results are cached in memory per base URL.
+    """
+    normalized = _normalize_base_url(base_url)
+    if not normalized or _is_openrouter_base_url(normalized):
+        return {}
+
+    if not force_refresh:
+        cached = _endpoint_model_metadata_cache.get(normalized)
+        cached_at = _endpoint_model_metadata_cache_time.get(normalized, 0)
+        if cached is not None and (time.time() - cached_at) < _ENDPOINT_MODEL_CACHE_TTL:
+            return cached
+
+    candidates = [normalized]
+    if normalized.endswith("/v1"):
+        alternate = normalized[:-3].rstrip("/")
+    else:
+        alternate = normalized + "/v1"
+    if alternate and alternate not in candidates:
+        candidates.append(alternate)
+
+    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
+    last_error: Optional[Exception] = None
+
+    for candidate in candidates:
+        url = candidate.rstrip("/") + "/models"
+        try:
+            response = requests.get(url, headers=headers, timeout=10)
+            response.raise_for_status()
+            payload = response.json()
+            cache: Dict[str, Dict[str, Any]] = {}
+            for model in payload.get("data", []):
+                if not isinstance(model, dict):
+                    continue
+                model_id = model.get("id")
+                if not model_id:
+                    continue
+                entry: Dict[str, Any] = {"name": model.get("name", model_id)}
+                context_length = _extract_context_length(model)
+                if context_length is not None:
+                    entry["context_length"] = context_length
+                max_completion_tokens = _extract_max_completion_tokens(model)
+                if max_completion_tokens is not None:
+                    entry["max_completion_tokens"] = max_completion_tokens
+                pricing = _extract_pricing(model)
+                if pricing:
+                    entry["pricing"] = pricing
+                _add_model_aliases(cache, model_id, entry)
+
+            _endpoint_model_metadata_cache[normalized] = cache
+            _endpoint_model_metadata_cache_time[normalized] = time.time()
+            return cache
+        except Exception as exc:
+            last_error = exc
+
+    if last_error:
+        logger.debug("Failed to fetch model metadata from %s/models: %s", normalized, last_error)
+    _endpoint_model_metadata_cache[normalized] = {}
+    _endpoint_model_metadata_cache_time[normalized] = time.time()
+    return {}
+
+
 def _get_context_cache_path() -> Path:
    """Return path to the persistent context length cache file."""
    hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
@ -241,14 +439,15 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
    return None


-def get_model_context_length(model: str, base_url: str = "") -> int:
+def get_model_context_length(model: str, base_url: str = "", api_key: str = "") -> int:
    """Get the context length for a model.

    Resolution order:
    1. Persistent cache (previously discovered via probing)
-    2. OpenRouter API metadata
-    3. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match)
-    4. First probe tier (2M) — will be narrowed on first context error
+    2. Active endpoint metadata (/models for explicit custom endpoints)
+    3. OpenRouter API metadata
+    4. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match for hosted routes only)
+    5. First probe tier (2M) — will be narrowed on first context error
    """
    # 1. Check persistent cache (model+provider)
    if base_url:
@ -256,19 +455,31 @@ def get_model_context_length(model: str, base_url: str = "") -> int:
        if cached is not None:
            return cached

-    # 2. OpenRouter API metadata
+    # 2. Active endpoint metadata for explicit custom routes
+    if _is_custom_endpoint(base_url):
+        endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
+        if model in endpoint_metadata:
+            context_length = endpoint_metadata[model].get("context_length")
+            if isinstance(context_length, int):
+                return context_length
+        if not _is_known_provider_base_url(base_url):
+            # Explicit third-party endpoints should not borrow fuzzy global
+            # defaults from unrelated providers with similarly named models.
+            return CONTEXT_PROBE_TIERS[0]
+
+    # 3. OpenRouter API metadata
    metadata = fetch_model_metadata()
    if model in metadata:
        return metadata[model].get("context_length", 128000)

-    # 3. Hardcoded defaults (fuzzy match — longest key first for specificity)
+    # 4. Hardcoded defaults (fuzzy match — longest key first for specificity)
    for default_model, length in sorted(
        DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True
    ):
        if default_model in model or model in default_model:
            return length

-    # 4. Unknown model — start at highest probe tier
+    # 5. Unknown model — start at highest probe tier
    return CONTEXT_PROBE_TIERS[0]


--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@ -330,28 +330,34 @@ def build_skills_system_prompt(
    # Each entry: (skill_name, description)
    # Supports sub-categories: skills/mlops/training/axolotl/SKILL.md
    # -> category "mlops/training", skill "axolotl"
+    # Load disabled skill names once for the entire scan
+    try:
+        from tools.skills_tool import _get_disabled_skill_names
+        disabled = _get_disabled_skill_names()
+    except Exception:
+        disabled = set()
+
    skills_by_category: dict[str, list[tuple[str, str]]] = {}
    for skill_file in skills_dir.rglob("SKILL.md"):
-        is_compatible, _, desc = _parse_skill_file(skill_file)
+        is_compatible, frontmatter, desc = _parse_skill_file(skill_file)
        if not is_compatible:
            continue
-        # Skip skills whose conditional activation rules exclude them
-        conditions = _read_skill_conditions(skill_file)
-        if not _skill_should_show(conditions, available_tools, available_toolsets):
-            continue
        rel_path = skill_file.relative_to(skills_dir)
        parts = rel_path.parts
        if len(parts) >= 2:
-            # Category is everything between skills_dir and the skill folder
-            # e.g. parts = ("mlops", "training", "axolotl", "SKILL.md")
-            #   → category = "mlops/training", skill_name = "axolotl"
-            # e.g. parts = ("github", "github-auth", "SKILL.md")
-            #   → category = "github", skill_name = "github-auth"
            skill_name = parts[-2]
            category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
        else:
            category = "general"
            skill_name = skill_file.parent.name
+        # Respect user's disabled skills config
+        fm_name = frontmatter.get("name", skill_name)
+        if fm_name in disabled or skill_name in disabled:
+            continue
+        # Skip skills whose conditional activation rules exclude them
+        conditions = _read_skill_conditions(skill_file)
+        if not _skill_should_show(conditions, available_tools, available_toolsets):
+            continue
        skills_by_category.setdefault(category, []).append((skill_name, desc))

    if not skills_by_category:
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@ -157,9 +157,10 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    global _skill_commands
    _skill_commands = {}
    try:
-        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform
+        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
        if not SKILLS_DIR.exists():
            return _skill_commands
+        disabled = _get_disabled_skill_names()
        for skill_md in SKILLS_DIR.rglob("SKILL.md"):
            if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                continue
@ -170,6 +171,9 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
                if not skill_matches_platform(frontmatter):
                    continue
                name = frontmatter.get('name', skill_md.parent.name)
+                # Respect user's disabled skills config
+                if name in disabled:
+                    continue
                description = frontmatter.get('description', '')
                if not description:
                    for line in body.strip().split('\n'):
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@ -5,7 +5,7 @@ from datetime import datetime, timezone
 from decimal import Decimal
 from typing import Any, Dict, Literal, Optional

-from agent.model_metadata import fetch_model_metadata
+from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata

 DEFAULT_PRICING = {"input": 0.0, "output": 0.0}

@ -335,8 +335,21 @@ def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]


 def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:
-    metadata = fetch_model_metadata()
-    model_id = route.model
+    return _pricing_entry_from_metadata(
+        fetch_model_metadata(),
+        route.model,
+        source_url="https://openrouter.ai/docs/api/api-reference/models/get-models",
+        pricing_version="openrouter-models-api",
+    )
+
+
+def _pricing_entry_from_metadata(
+    metadata: Dict[str, Dict[str, Any]],
+    model_id: str,
+    *,
+    source_url: str,
+    pricing_version: str,
+) -> Optional[PricingEntry]:
    if model_id not in metadata:
        return None
    pricing = metadata[model_id].get("pricing") or {}
@ -355,6 +368,7 @@ def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:
    )
    if prompt is None and completion is None and request is None:
        return None
+
    def _per_token_to_per_million(value: Optional[Decimal]) -> Optional[Decimal]:
        if value is None:
            return None
@ -367,8 +381,8 @@ def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:
        cache_write_cost_per_million=_per_token_to_per_million(cache_write),
        request_cost=request,
        source="provider_models_api",
-        source_url="https://openrouter.ai/docs/api/api-reference/models/get-models",
-        pricing_version="openrouter-models-api",
+        source_url=source_url,
+        pricing_version=pricing_version,
        fetched_at=_UTC_NOW(),
    )

@ -377,6 +391,7 @@ def get_pricing_entry(
    model_name: str,
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
 ) -> Optional[PricingEntry]:
    route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
    if route.billing_mode == "subscription_included":
@ -390,6 +405,15 @@ def get_pricing_entry(
        )
    if route.provider == "openrouter":
        return _openrouter_pricing_entry(route)
+    if route.base_url:
+        entry = _pricing_entry_from_metadata(
+            fetch_endpoint_model_metadata(route.base_url, api_key=api_key or ""),
+            route.model,
+            source_url=f"{route.base_url.rstrip('/')}/models",
+            pricing_version="openai-compatible-models-api",
+        )
+        if entry:
+            return entry
    return _lookup_official_docs_pricing(route)


@ -460,6 +484,7 @@ def estimate_usage_cost(
    *,
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
 ) -> CostResult:
    route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
    if route.billing_mode == "subscription_included":
@ -471,7 +496,7 @@ def estimate_usage_cost(
            pricing_version="included-route",
        )

-    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
    if not entry:
        return CostResult(amount_usd=None, status="unknown", source="none", label="n/a")

@ -536,6 +561,7 @@ def has_known_pricing(
    model_name: str,
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
 ) -> bool:
    """Check whether we have pricing data for this model+route.

@ -545,7 +571,7 @@ def has_known_pricing(
    route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
    if route.billing_mode == "subscription_included":
        return True
-    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
    return entry is not None


@ -553,13 +579,14 @@ def get_pricing(
    model_name: str,
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
 ) -> Dict[str, float]:
    """Backward-compatible thin wrapper for legacy callers.

    Returns only non-cache input/output fields when a pricing entry exists.
    Unknown routes return zeroes.
    """
-    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
    if not entry:
        return {"input": 0.0, "output": 0.0}
    return {
@ -575,6 +602,7 @@ def estimate_cost_usd(
    *,
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
 ) -> float:
    """Backward-compatible helper for legacy callers.

@ -586,6 +614,7 @@ def estimate_cost_usd(
        CanonicalUsage(input_tokens=input_tokens, output_tokens=output_tokens),
        provider=provider,
        base_url=base_url,
+        api_key=api_key,
    )
    return float(result.amount_usd or _ZERO)

--- a/cli.py
+++ b/cli.py
@ -1217,6 +1217,9 @@ class HermesCLI:
        self._voice_tts_done = threading.Event()
        self._voice_tts_done.set()

+        # Status bar visibility (toggled via /statusbar)
+        self._status_bar_visible = True
+
        # Background task tracking: {task_id: threading.Thread}
        self._background_tasks: Dict[str, threading.Thread] = {}
        self._background_task_counter = 0
@ -1324,6 +1327,8 @@ class HermesCLI:
            return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}"

    def _get_status_bar_fragments(self):
+        if not self._status_bar_visible:
+            return []
        try:
            snapshot = self._get_status_bar_snapshot()
            width = shutil.get_terminal_size((80, 24)).columns
@ -3575,6 +3580,10 @@ class HermesCLI:
                self._handle_skills_command(cmd_original)
        elif canonical == "platforms":
            self._show_gateway_status()
+        elif canonical == "statusbar":
+            self._status_bar_visible = not self._status_bar_visible
+            state = "visible" if self._status_bar_visible else "hidden"
+            self.console.print(f"  Status bar {state}")
        elif canonical == "verbose":
            self._toggle_verbose()
        elif canonical == "reasoning":
@ -6613,9 +6622,12 @@ class HermesCLI:
            filter=Condition(lambda: cli_ref._voice_mode),
        )

-        status_bar = Window(
-            content=FormattedTextControl(lambda: cli_ref._get_status_bar_fragments()),
-            height=1,
+        status_bar = ConditionalContainer(
+            Window(
+                content=FormattedTextControl(lambda: cli_ref._get_status_bar_fragments()),
+                height=1,
+            ),
+            filter=Condition(lambda: cli_ref._status_bar_visible),
        )

        # Layout: interactive prompt widgets + ruled input at bottom.
--- a/cron/jobs.py
+++ b/cron/jobs.py
@ -34,6 +34,7 @@ HERMES_DIR = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
 CRON_DIR = HERMES_DIR / "cron"
 JOBS_FILE = CRON_DIR / "jobs.json"
 OUTPUT_DIR = CRON_DIR / "output"
+ONESHOT_GRACE_SECONDS = 120


 def _normalize_skill_list(skill: Optional[str] = None, skills: Optional[Any] = None) -> List[str]:
@ -220,6 +221,33 @@ def _ensure_aware(dt: datetime) -> datetime:
    return dt.astimezone(target_tz)


+def _recoverable_oneshot_run_at(
+    schedule: Dict[str, Any],
+    now: datetime,
+    *,
+    last_run_at: Optional[str] = None,
+) -> Optional[str]:
+    """Return a one-shot run time if it is still eligible to fire.
+
+    One-shot jobs get a small grace window so jobs created a few seconds after
+    their requested minute still run on the next tick. Once a one-shot has
+    already run, it is never eligible again.
+    """
+    if schedule.get("kind") != "once":
+        return None
+    if last_run_at:
+        return None
+
+    run_at = schedule.get("run_at")
+    if not run_at:
+        return None
+
+    run_at_dt = _ensure_aware(datetime.fromisoformat(run_at))
+    if run_at_dt >= now - timedelta(seconds=ONESHOT_GRACE_SECONDS):
+        return run_at
+    return None
+
+
 def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None) -> Optional[str]:
    """
    Compute the next run time for a schedule.
@ -229,9 +257,7 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
    now = _hermes_now()

    if schedule["kind"] == "once":
-        run_at = _ensure_aware(datetime.fromisoformat(schedule["run_at"]))
-        # If in the future, return it; if in the past, no more runs
-        return schedule["run_at"] if run_at > now else None
+        return _recoverable_oneshot_run_at(schedule, now, last_run_at=last_run_at)

    elif schedule["kind"] == "interval":
        minutes = schedule["minutes"]
@ -555,7 +581,26 @@ def get_due_jobs() -> List[Dict[str, Any]]:

        next_run = job.get("next_run_at")
        if not next_run:
-            continue
+            recovered_next = _recoverable_oneshot_run_at(
+                job.get("schedule", {}),
+                now,
+                last_run_at=job.get("last_run_at"),
+            )
+            if not recovered_next:
+                continue
+
+            job["next_run_at"] = recovered_next
+            next_run = recovered_next
+            logger.info(
+                "Job '%s' had no next_run_at; recovering one-shot run at %s",
+                job.get("name", job["id"]),
+                recovered_next,
+            )
+            for rj in raw_jobs:
+                if rj["id"] == job["id"]:
+                    rj["next_run_at"] = recovered_next
+                    needs_save = True
+                    break

        next_run_dt = _ensure_aware(datetime.fromisoformat(next_run))
        if next_run_dt <= now:
--- a/gateway/config.py
+++ b/gateway/config.py
@ -32,6 +32,15 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
    return bool(value)


+def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str:
+    """Normalize unauthorized DM behavior to a supported value."""
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in {"pair", "ignore"}:
+            return normalized
+    return default
+
+
 class Platform(Enum):
    """Supported messaging platforms."""
    LOCAL = "local"
@ -215,6 +224,9 @@ class GatewayConfig:
    # Session isolation in shared chats
    group_sessions_per_user: bool = True  # Isolate group/channel sessions per participant when user IDs are available

+    # Unauthorized DM policy
+    unauthorized_dm_behavior: str = "pair"  # "pair" or "ignore"
+
    # Streaming configuration
    streaming: StreamingConfig = field(default_factory=StreamingConfig)

@ -289,6 +301,7 @@ class GatewayConfig:
            "always_log_local": self.always_log_local,
            "stt_enabled": self.stt_enabled,
            "group_sessions_per_user": self.group_sessions_per_user,
+            "unauthorized_dm_behavior": self.unauthorized_dm_behavior,
            "streaming": self.streaming.to_dict(),
        }
    
@ -331,6 +344,10 @@ class GatewayConfig:
            stt_enabled = data.get("stt", {}).get("enabled") if isinstance(data.get("stt"), dict) else None

        group_sessions_per_user = data.get("group_sessions_per_user")
+        unauthorized_dm_behavior = _normalize_unauthorized_dm_behavior(
+            data.get("unauthorized_dm_behavior"),
+            "pair",
+        )

        return cls(
            platforms=platforms,
@ -343,9 +360,21 @@ class GatewayConfig:
            always_log_local=data.get("always_log_local", True),
            stt_enabled=_coerce_bool(stt_enabled, True),
            group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
+            unauthorized_dm_behavior=unauthorized_dm_behavior,
            streaming=StreamingConfig.from_dict(data.get("streaming", {})),
        )

+    def get_unauthorized_dm_behavior(self, platform: Optional[Platform] = None) -> str:
+        """Return the effective unauthorized-DM behavior for a platform."""
+        if platform:
+            platform_cfg = self.platforms.get(platform)
+            if platform_cfg and "unauthorized_dm_behavior" in platform_cfg.extra:
+                return _normalize_unauthorized_dm_behavior(
+                    platform_cfg.extra.get("unauthorized_dm_behavior"),
+                    self.unauthorized_dm_behavior,
+                )
+        return self.unauthorized_dm_behavior
+

 def load_gateway_config() -> GatewayConfig:
    """
@ -416,6 +445,38 @@ def load_gateway_config() -> GatewayConfig:
            if "always_log_local" in yaml_cfg:
                gw_data["always_log_local"] = yaml_cfg["always_log_local"]

+            if "unauthorized_dm_behavior" in yaml_cfg:
+                gw_data["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
+                    yaml_cfg.get("unauthorized_dm_behavior"),
+                    "pair",
+                )
+
+            # Bridge per-platform unauthorized_dm_behavior from config.yaml
+            platforms_data = gw_data.setdefault("platforms", {})
+            if not isinstance(platforms_data, dict):
+                platforms_data = {}
+                gw_data["platforms"] = platforms_data
+            for plat in Platform:
+                if plat == Platform.LOCAL:
+                    continue
+                platform_cfg = yaml_cfg.get(plat.value)
+                if not isinstance(platform_cfg, dict):
+                    continue
+                if "unauthorized_dm_behavior" not in platform_cfg:
+                    continue
+                plat_data = platforms_data.setdefault(plat.value, {})
+                if not isinstance(plat_data, dict):
+                    plat_data = {}
+                    platforms_data[plat.value] = plat_data
+                extra = plat_data.setdefault("extra", {})
+                if not isinstance(extra, dict):
+                    extra = {}
+                    plat_data["extra"] = extra
+                extra["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
+                    platform_cfg.get("unauthorized_dm_behavior"),
+                    gw_data.get("unauthorized_dm_behavior", "pair"),
+                )
+
            # Discord settings → env vars (env vars take precedence)
            discord_cfg = yaml_cfg.get("discord", {})
            if isinstance(discord_cfg, dict):
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@ -635,7 +635,7 @@ class MatrixAdapter(BasePlatformAdapter):
            source=source,
            raw_message=getattr(event, "source", {}),
            message_id=event.event_id,
-            reply_to=reply_to,
+            reply_to_message_id=reply_to,
        )

        await self.handle_message(msg_event)
--- a/gateway/run.py
+++ b/gateway/run.py
@ -434,6 +434,16 @@ class GatewayRunner:
        for session_key in list(managers.keys()):
            self._shutdown_gateway_honcho(session_key)
    
+    # -- Setup skill availability ----------------------------------------
+
+    def _has_setup_skill(self) -> bool:
+        """Check if the hermes-agent-setup skill is installed."""
+        try:
+            from tools.skill_manager_tool import _find_skill
+            return _find_skill("hermes-agent-setup") is not None
+        except Exception:
+            return False
+
    # -- Voice mode persistence ------------------------------------------

    _VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json"
@ -1251,6 +1261,13 @@ class GatewayRunner:
        if "@" in user_id:
            check_ids.add(user_id.split("@")[0])
        return bool(check_ids & allowed_ids)
+
+    def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str:
+        """Return how unauthorized DMs should be handled for a platform."""
+        config = getattr(self, "config", None)
+        if config and hasattr(config, "get_unauthorized_dm_behavior"):
+            return config.get_unauthorized_dm_behavior(platform)
+        return "pair"
    
    async def _handle_message(self, event: MessageEvent) -> Optional[str]:
        """
@ -1271,7 +1288,7 @@ class GatewayRunner:
        if not self._is_user_authorized(source):
            logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
            # In DMs: offer pairing code. In groups: silently ignore.
-            if source.chat_type == "dm":
+            if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair":
                platform_name = source.platform.value if source.platform else "unknown"
                code = self.pairing_store.generate_code(
                    platform_name, source.user_id, source.user_name or ""
@ -1874,6 +1891,37 @@ class GatewayRunner:
                message_text = await self._enrich_message_with_transcription(
                    message_text, audio_paths
                )
+                # If STT failed, send a direct message to the user so they
+                # know voice isn't configured — don't rely on the agent to
+                # relay the error clearly.
+                _stt_fail_markers = (
+                    "No STT provider",
+                    "STT is disabled",
+                    "can't listen",
+                    "VOICE_TOOLS_OPENAI_KEY",
+                )
+                if any(m in message_text for m in _stt_fail_markers):
+                    _stt_adapter = self.adapters.get(source.platform)
+                    _stt_meta = {"thread_id": source.thread_id} if source.thread_id else None
+                    if _stt_adapter:
+                        try:
+                            _stt_msg = (
+                                "🎤 I received your voice message but can't transcribe it — "
+                                "no speech-to-text provider is configured.\n\n"
+                                "To enable voice: install faster-whisper "
+                                "(`pip install faster-whisper` in the Hermes venv) "
+                                "and set `stt.enabled: true` in config.yaml, "
+                                "then /restart the gateway."
+                            )
+                            # Point to setup skill if it's installed
+                            if self._has_setup_skill():
+                                _stt_msg += "\n\nFor full setup instructions, type: `/skill hermes-agent-setup`"
+                            await _stt_adapter.send(
+                                source.chat_id, _stt_msg,
+                                metadata=_stt_meta,
+                            )
+                        except Exception:
+                            pass

        # -----------------------------------------------------------------
        # Enrich document messages with context notes for the agent
@ -3943,7 +3991,13 @@ class GatewayRunner:
            The enriched message string with transcriptions prepended.
        """
        if not getattr(self.config, "stt_enabled", True):
-            disabled_note = "[The user sent voice message(s), but transcription is disabled in config.]"
+            disabled_note = "[The user sent voice message(s), but transcription is disabled in config."
+            if self._has_setup_skill():
+                disabled_note += (
+                    " You have a skill called hermes-agent-setup that can help "
+                    "users configure Hermes features including voice, tools, and more."
+                )
+            disabled_note += "]"
            if user_text:
                return f"{disabled_note}\n\n{user_text}"
            return disabled_note
@ -3970,11 +4024,20 @@ class GatewayRunner:
                        "No STT provider" in error
                        or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set")
                    ):
-                        enriched_parts.append(
+                        _no_stt_note = (
                            "[The user sent a voice message but I can't listen "
-                            "to it right now~ No STT provider is configured "
-                            "(';w;') Let them know!]"
+                            "to it right now — no STT provider is configured. "
+                            "A direct message has already been sent to the user "
+                            "with setup instructions."
                        )
+                        if self._has_setup_skill():
+                            _no_stt_note += (
+                                " You have a skill called hermes-agent-setup "
+                                "that can help users configure Hermes features "
+                                "including voice, tools, and more."
+                            )
+                        _no_stt_note += "]"
+                        enriched_parts.append(_no_stt_note)
                    else:
                        enriched_parts.append(
                            "[The user sent a voice message but I had trouble "
--- a/gateway/status.py
+++ b/gateway/status.py
@ -87,6 +87,7 @@ def _looks_like_gateway_process(pid: int) -> bool:

    patterns = (
        "hermes_cli.main gateway",
+        "hermes_cli/main.py gateway",
        "hermes gateway",
        "gateway/run.py",
    )
@ -105,6 +106,7 @@ def _record_looks_like_gateway(record: dict[str, Any]) -> bool:
    cmdline = " ".join(str(part) for part in argv)
    patterns = (
        "hermes_cli.main gateway",
+        "hermes_cli/main.py gateway",
        "hermes gateway",
        "gateway/run.py",
    )
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@ -102,27 +102,22 @@ COMPACT_BANNER = """
 # =========================================================================

 def get_available_skills() -> Dict[str, List[str]]:
-    """Scan ~/.hermes/skills/ and return skills grouped by category."""
-    import os
+    """Return skills grouped by category, filtered by platform and disabled state.

-    hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
-    skills_dir = hermes_home / "skills"
-    skills_by_category = {}
-
-    if not skills_dir.exists():
-        return skills_by_category
-
-    for skill_file in skills_dir.rglob("SKILL.md"):
-        rel_path = skill_file.relative_to(skills_dir)
-        parts = rel_path.parts
-        if len(parts) >= 2:
-            category = parts[0]
-            skill_name = parts[-2]
-        else:
-            category = "general"
-            skill_name = skill_file.parent.name
-        skills_by_category.setdefault(category, []).append(skill_name)
+    Delegates to ``_find_all_skills()`` from ``tools/skills_tool`` which already
+    handles platform gating (``platforms:`` frontmatter) and respects the
+    user's ``skills.disabled`` config list.
+    """
+    try:
+        from tools.skills_tool import _find_all_skills
+        all_skills = _find_all_skills()  # already filtered
+    except Exception:
+        return {}

+    skills_by_category: Dict[str, List[str]] = {}
+    for skill in all_skills:
+        category = skill.get("category") or "general"
+        skills_by_category.setdefault(category, []).append(skill["name"])
    return skills_by_category


@ -233,6 +228,17 @@ def _format_context_length(tokens: int) -> str:
    return str(tokens)


+def _display_toolset_name(toolset_name: str) -> str:
+    """Normalize internal/legacy toolset identifiers for banner display."""
+    if not toolset_name:
+        return "unknown"
+    return (
+        toolset_name[:-6]
+        if toolset_name.endswith("_tools")
+        else toolset_name
+    )
+
+
 def build_welcome_banner(console: Console, model: str, cwd: str,
                         tools: List[dict] = None,
                         enabled_toolsets: List[str] = None,
@ -297,12 +303,12 @@ def build_welcome_banner(console: Console, model: str, cwd: str,

    for tool in tools:
        tool_name = tool["function"]["name"]
-        toolset = get_toolset_for_tool(tool_name) or "other"
+        toolset = _display_toolset_name(get_toolset_for_tool(tool_name) or "other")
        toolsets_dict.setdefault(toolset, []).append(tool_name)

    for item in unavailable_toolsets:
        toolset_id = item.get("id", item.get("name", "unknown"))
-        display_name = f"{toolset_id}_tools" if not toolset_id.endswith("_tools") else toolset_id
+        display_name = _display_toolset_name(toolset_id)
        if display_name not in toolsets_dict:
            toolsets_dict[display_name] = []
        for tool_name in item.get("tools", []):
@ -342,10 +348,10 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
                    colored_names.append(f"[{text}]{name}[/]")
            tools_str = ", ".join(colored_names)

-        right_lines.append(f"[dim #B8860B]{toolset}:[/] {tools_str}")
+        right_lines.append(f"[dim {dim}]{toolset}:[/] {tools_str}")

    if remaining_toolsets > 0:
-        right_lines.append(f"[dim #B8860B](and {remaining_toolsets} more toolsets...)[/]")
+        right_lines.append(f"[dim {dim}](and {remaining_toolsets} more toolsets...)[/]")

    # MCP Servers section (only if configured)
    try:
@ -356,12 +362,12 @@ def build_welcome_banner(console: Console, model: str, cwd: str,

    if mcp_status:
        right_lines.append("")
-        right_lines.append("[bold #FFBF00]MCP Servers[/]")
+        right_lines.append(f"[bold {accent}]MCP Servers[/]")
        for srv in mcp_status:
            if srv["connected"]:
                right_lines.append(
-                    f"[dim #B8860B]{srv['name']}[/] [#FFF8DC]({srv['transport']})[/] "
-                    f"[dim #B8860B]—[/] [#FFF8DC]{srv['tools']} tool(s)[/]"
+                    f"[dim {dim}]{srv['name']}[/] [{text}]({srv['transport']})[/] "
+                    f"[dim {dim}]—[/] [{text}]{srv['tools']} tool(s)[/]"
                )
            else:
                right_lines.append(
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -81,6 +81,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
               cli_only=True, args_hint="[text]", subcommands=("clear",)),
    CommandDef("personality", "Set a predefined personality", "Configuration",
               args_hint="[name]"),
+    CommandDef("statusbar", "Toggle the context/model status bar", "Configuration",
+               cli_only=True, aliases=("sb",)),
    CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
               "Configuration", cli_only=True),
    CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@ -31,6 +31,7 @@ def find_gateway_pids() -> list:
    pids = []
    patterns = [
        "hermes_cli.main gateway",
+        "hermes_cli/main.py gateway",
        "hermes gateway",
        "gateway/run.py",
    ]
@ -849,6 +850,46 @@ def launchd_stop():
    subprocess.run(["launchctl", "stop", "ai.hermes.gateway"], check=True)
    print("✓ Service stopped")

+def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0):
+    """Wait for the gateway process (by saved PID) to exit.
+
+    Uses the PID from the gateway.pid file — not launchd labels — so this
+    works correctly when multiple gateway instances run under separate
+    HERMES_HOME directories.
+
+    Args:
+        timeout: Total seconds to wait before giving up.
+        force_after: Seconds of graceful waiting before sending SIGKILL.
+    """
+    import time
+    from gateway.status import get_running_pid
+
+    deadline = time.monotonic() + timeout
+    force_deadline = time.monotonic() + force_after
+    force_sent = False
+
+    while time.monotonic() < deadline:
+        pid = get_running_pid()
+        if pid is None:
+            return  # Process exited cleanly.
+
+        if not force_sent and time.monotonic() >= force_deadline:
+            # Grace period expired — force-kill the specific PID.
+            try:
+                os.kill(pid, signal.SIGKILL)
+                print(f"⚠ Gateway PID {pid} did not exit gracefully; sent SIGKILL")
+            except (ProcessLookupError, PermissionError):
+                return  # Already gone or we can't touch it.
+            force_sent = True
+
+        time.sleep(0.3)
+
+    # Timed out even after SIGKILL.
+    remaining_pid = get_running_pid()
+    if remaining_pid is not None:
+        print(f"⚠ Gateway PID {remaining_pid} still running after {timeout}s — restart may fail")
+
+
 def launchd_restart():
    try:
        launchd_stop()
@ -856,6 +897,7 @@ def launchd_restart():
        if e.returncode != 3:
            raise
        print("↻ launchd job was unloaded; skipping stop")
+    _wait_for_gateway_exit()
    launchd_start()

 def launchd_status(deep: bool = False):
@ -1753,10 +1795,9 @@ def gateway_command(args):
            killed = kill_gateway_processes()
            if killed:
                print(f"✓ Stopped {killed} gateway process(es)")
-            
-            import time
-            time.sleep(2)
-            
+
+            _wait_for_gateway_exit(timeout=10.0, force_after=5.0)
+
            # Start fresh
            print("Starting gateway...")
            run_gateway(verbose=False)
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -28,17 +28,26 @@ GITHUB_MODELS_CATALOG_URL = COPILOT_MODELS_URL
 OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-opus-4.6",       "recommended"),
    ("anthropic/claude-sonnet-4.5",     ""),
-    ("openai/gpt-5.4-pro",              ""),
+    ("anthropic/claude-haiku-4.5",      ""),
    ("openai/gpt-5.4",                  ""),
+    ("openai/gpt-5.4-mini",             ""),
+    ("openrouter/hunter-alpha",          "free"),
+    ("openrouter/healer-alpha",          "free"),
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-preview",     ""),
    ("google/gemini-3-flash-preview",   ""),
    ("qwen/qwen3.5-plus-02-15",         ""),
    ("qwen/qwen3.5-35b-a3b",            ""),
    ("stepfun/step-3.5-flash",          ""),
-    ("z-ai/glm-5",                      ""),
-    ("moonshotai/kimi-k2.5",            ""),
    ("minimax/minimax-m2.5",            ""),
+    ("z-ai/glm-5",                      ""),
+    ("z-ai/glm-5-turbo",                ""),
+    ("moonshotai/kimi-k2.5",            ""),
+    ("x-ai/grok-4.20-beta",             ""),
+    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
+    ("arcee-ai/trinity-large-preview:free", "free"),
+    ("openai/gpt-5.4-pro",              ""),
+    ("openai/gpt-5.4-nano",             ""),
 ]

 _PROVIDER_MODELS: dict[str, list[str]] = {
@ -90,11 +99,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-0905-preview",
    ],
    "minimax": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
        "MiniMax-M2.5",
        "MiniMax-M2.5-highspeed",
        "MiniMax-M2.1",
    ],
    "minimax-cn": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
        "MiniMax-M2.5",
        "MiniMax-M2.5-highspeed",
        "MiniMax-M2.1",
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -76,8 +76,8 @@ _DEFAULT_PROVIDER_MODELS = {
    ],
    "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
-    "minimax": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
-    "minimax-cn": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
+    "minimax": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
+    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
 }
@ -1917,7 +1917,7 @@ def _install_neutts_deps() -> bool:
        return True
    except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
        print_error(f"Failed to install neutts: {e}")
-        print_info("Try manually: pip install neutts[all]")
+        print_info("Try manually: python -m pip install -U neutts[all]")
        return False


--- a/hermes_state.py
+++ b/hermes_state.py
@ -757,16 +757,14 @@ class SessionDB:
        if not query:
            return []

-        if source_filter is None:
-            source_filter = ["cli", "telegram", "discord", "whatsapp", "slack"]
-
        # Build WHERE clauses dynamically
        where_clauses = ["messages_fts MATCH ?"]
        params: list = [query]

-        source_placeholders = ",".join("?" for _ in source_filter)
-        where_clauses.append(f"s.source IN ({source_placeholders})")
-        params.extend(source_filter)
+        if source_filter is not None:
+            source_placeholders = ",".join("?" for _ in source_filter)
+            where_clauses.append(f"s.source IN ({source_placeholders})")
+            params.extend(source_filter)

        if role_filter:
            role_placeholders = ",".join("?" for _ in role_filter)
--- a/model_tools.py
+++ b/model_tools.py
@ -276,6 +276,7 @@ def get_tool_definitions(
 # The registry still holds their schemas; dispatch just returns a stub error
 # so if something slips through, the LLM sees a sensible message.
 _AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
+_READ_SEARCH_TOOLS = {"read_file", "search_files"}


 def handle_function_call(
@ -305,7 +306,6 @@ def handle_function_call(
    """
    # Notify the read-loop tracker when a non-read/search tool runs,
    # so the *consecutive* counter resets (reads after other work are fine).
-    _READ_SEARCH_TOOLS = {"read_file", "search_files"}
    if function_name not in _READ_SEARCH_TOOLS:
        try:
            from tools.file_tools import notify_other_tool_call
--- a/run_agent.py
+++ b/run_agent.py
@ -203,6 +203,27 @@ class IterationBudget:
 # When any of these appear in a batch, we fall back to sequential execution.
 _NEVER_PARALLEL_TOOLS = frozenset({"clarify"})

+# Read-only tools with no shared mutable session state.
+_PARALLEL_SAFE_TOOLS = frozenset({
+    "ha_get_state",
+    "ha_list_entities",
+    "ha_list_services",
+    "honcho_context",
+    "honcho_profile",
+    "honcho_search",
+    "read_file",
+    "search_files",
+    "session_search",
+    "skill_view",
+    "skills_list",
+    "vision_analyze",
+    "web_extract",
+    "web_search",
+})
+
+# File tools can run concurrently when they target independent paths.
+_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
+
 # Maximum number of concurrent worker threads for parallel tool execution.
 _MAX_TOOL_WORKERS = 8

@ -234,6 +255,74 @@ def _is_destructive_command(cmd: str) -> bool:
    return False


+def _should_parallelize_tool_batch(tool_calls) -> bool:
+    """Return True when a tool-call batch is safe to run concurrently."""
+    if len(tool_calls) <= 1:
+        return False
+
+    tool_names = [tc.function.name for tc in tool_calls]
+    if any(name in _NEVER_PARALLEL_TOOLS for name in tool_names):
+        return False
+
+    reserved_paths: list[Path] = []
+    for tool_call in tool_calls:
+        tool_name = tool_call.function.name
+        try:
+            function_args = json.loads(tool_call.function.arguments)
+        except Exception:
+            logging.debug(
+                "Could not parse args for %s — defaulting to sequential; raw=%s",
+                tool_name,
+                tool_call.function.arguments[:200],
+            )
+            return False
+        if not isinstance(function_args, dict):
+            logging.debug(
+                "Non-dict args for %s (%s) — defaulting to sequential",
+                tool_name,
+                type(function_args).__name__,
+            )
+            return False
+
+        if tool_name in _PATH_SCOPED_TOOLS:
+            scoped_path = _extract_parallel_scope_path(tool_name, function_args)
+            if scoped_path is None:
+                return False
+            if any(_paths_overlap(scoped_path, existing) for existing in reserved_paths):
+                return False
+            reserved_paths.append(scoped_path)
+            continue
+
+        if tool_name not in _PARALLEL_SAFE_TOOLS:
+            return False
+
+    return True
+
+
+def _extract_parallel_scope_path(tool_name: str, function_args: dict) -> Path | None:
+    """Return the normalized file target for path-scoped tools."""
+    if tool_name not in _PATH_SCOPED_TOOLS:
+        return None
+
+    raw_path = function_args.get("path")
+    if not isinstance(raw_path, str) or not raw_path.strip():
+        return None
+
+    # Avoid resolve(); the file may not exist yet.
+    return Path(raw_path).expanduser()
+
+
+def _paths_overlap(left: Path, right: Path) -> bool:
+    """Return True when two paths may refer to the same subtree."""
+    left_parts = left.parts
+    right_parts = right.parts
+    if not left_parts or not right_parts:
+        # Empty paths shouldn't reach here (guarded upstream), but be safe.
+        return bool(left_parts) == bool(right_parts) and bool(left_parts)
+    common_len = min(len(left_parts), len(right_parts))
+    return left_parts[:common_len] == right_parts[:common_len]
+
+
 def _inject_honcho_turn_context(content, turn_context: str):
    """Append Honcho recall to the current-turn user message without mutating history.

@ -263,11 +352,20 @@ def _inject_honcho_turn_context(content, turn_context: str):
 class AIAgent:
    """
    AI Agent with tool calling capabilities.
-    
+
    This class manages the conversation flow, tool execution, and response handling
    for AI models that support function calling.
    """
-    
+
+    @property
+    def base_url(self) -> str:
+        return self._base_url
+
+    @base_url.setter
+    def base_url(self, value: str) -> None:
+        self._base_url = value
+        self._base_url_lower = value.lower() if value else ""
+
    def __init__(
        self,
        base_url: str = None,
@ -389,10 +487,10 @@ class AIAgent:
            self.api_mode = api_mode
        elif self.provider == "openai-codex":
            self.api_mode = "codex_responses"
-        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
+        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
            self.api_mode = "codex_responses"
            self.provider = "openai-codex"
-        elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self.base_url.lower()):
+        elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
            self.api_mode = "anthropic_messages"
            self.provider = "anthropic"
        else:
@ -401,7 +499,7 @@ class AIAgent:
        # Pre-warm OpenRouter model metadata cache in a background thread.
        # fetch_model_metadata() is cached for 1 hour; this avoids a blocking
        # HTTP request on the first API response when pricing is estimated.
-        if self.provider == "openrouter" or "openrouter" in self.base_url.lower():
+        if self.provider == "openrouter" or "openrouter" in self._base_url_lower:
            threading.Thread(
                target=lambda: fetch_model_metadata(),
                daemon=True,
@ -445,7 +543,7 @@ class AIAgent:
        # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
        # Reduces input costs by ~75% on multi-turn conversations by caching the
        # conversation prefix. Uses system_and_3 strategy (4 breakpoints).
-        is_openrouter = "openrouter" in self.base_url.lower()
+        is_openrouter = "openrouter" in self._base_url_lower
        is_claude = "claude" in self.model.lower()
        is_native_anthropic = self.api_mode == "anthropic_messages"
        self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
@ -561,6 +659,7 @@ class AIAgent:
        if self.api_mode == "anthropic_messages":
            from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
            effective_key = api_key or resolve_anthropic_token() or ""
+            self.api_key = effective_key
            self._anthropic_api_key = effective_key
            self._anthropic_base_url = base_url
            from agent.anthropic_adapter import _is_oauth_token as _is_oat
@ -622,6 +721,7 @@ class AIAgent:
                    }
            
            self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
+            self.api_key = client_kwargs.get("api_key", "")
            try:
                self.client = self._create_openai_client(client_kwargs, reason="agent_init", shared=True)
                if not self.quiet_mode:
@ -745,16 +845,24 @@ class AIAgent:
        from tools.todo_tool import TodoStore
        self._todo_store = TodoStore()
        
+        # Load config once for memory, skills, and compression sections
+        try:
+            from hermes_cli.config import load_config as _load_agent_config
+            _agent_cfg = _load_agent_config()
+        except Exception:
+            _agent_cfg = {}
+
        # Persistent memory (MEMORY.md + USER.md) -- loaded from disk
        self._memory_store = None
        self._memory_enabled = False
        self._user_profile_enabled = False
        self._memory_nudge_interval = 10
        self._memory_flush_min_turns = 6
+        self._turns_since_memory = 0
+        self._iters_since_skill = 0
        if not skip_memory:
            try:
-                from hermes_cli.config import load_config as _load_mem_config
-                mem_config = _load_mem_config().get("memory", {})
+                mem_config = _agent_cfg.get("memory", {})
                self._memory_enabled = mem_config.get("memory_enabled", False)
                self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
                self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
@ -842,21 +950,16 @@ class AIAgent:
        # Skills config: nudge interval for skill creation reminders
        self._skill_nudge_interval = 10
        try:
-            from hermes_cli.config import load_config as _load_skills_config
-            skills_config = _load_skills_config().get("skills", {})
+            skills_config = _agent_cfg.get("skills", {})
            self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
        except Exception:
            pass
-        
+
        # Initialize context compressor for automatic context management
        # Compresses conversation when approaching model's context limit
        # Configuration via config.yaml (compression section)
-        try:
-            from hermes_cli.config import load_config as _load_compression_config
-            _compression_cfg = _load_compression_config().get("compression", {})
-            if not isinstance(_compression_cfg, dict):
-                _compression_cfg = {}
-        except ImportError:
+        _compression_cfg = _agent_cfg.get("compression", {})
+        if not isinstance(_compression_cfg, dict):
            _compression_cfg = {}
        compression_threshold = float(_compression_cfg.get("threshold", 0.50))
        compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
@ -871,6 +974,7 @@ class AIAgent:
            summary_model_override=compression_summary_model,
            quiet_mode=self.quiet_mode,
            base_url=self.base_url,
+            api_key=getattr(self, "api_key", ""),
        )
        self.compression_enabled = compression_enabled
        self._user_turn_count = 0
@ -926,8 +1030,8 @@ class AIAgent:
        OpenAI models use 'max_tokens'.
        """
        _is_direct_openai = (
-            "api.openai.com" in self.base_url.lower()
-            and "openrouter" not in self.base_url.lower()
+            "api.openai.com" in self._base_url_lower
+            and "openrouter" not in self._base_url_lower
        )
        if _is_direct_openai:
            return {"max_completion_tokens": value}
@ -2977,6 +3081,9 @@ class AIAgent:
            return False

        self._anthropic_api_key = new_token
+        # Update OAuth flag — token type may have changed (API key ↔ OAuth)
+        from agent.anthropic_adapter import _is_oauth_token
+        self._is_anthropic_oauth = _is_oauth_token(new_token)
        return True

    def _anthropic_messages_create(self, api_kwargs: dict):
@ -3368,11 +3475,12 @@ class AIAgent:

            if fb_api_mode == "anthropic_messages":
                # Build native Anthropic client instead of using OpenAI client
-                from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
+                from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token
                effective_key = fb_client.api_key or resolve_anthropic_token() or ""
                self._anthropic_api_key = effective_key
                self._anthropic_base_url = getattr(fb_client, "base_url", None)
                self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url)
+                self._is_anthropic_oauth = _is_oauth_token(effective_key)
                self.client = None
                self._client_kwargs = {}
            else:
@ -3678,10 +3786,10 @@ class AIAgent:

        extra_body = {}

-        _is_openrouter = "openrouter" in self.base_url.lower()
+        _is_openrouter = "openrouter" in self._base_url_lower
        _is_github_models = (
-            "models.github.ai" in self.base_url.lower()
-            or "api.githubcopilot.com" in self.base_url.lower()
+            "models.github.ai" in self._base_url_lower
+            or "api.githubcopilot.com" in self._base_url_lower
        )

        # Provider preferences (only, ignore, order, sort) are OpenRouter-
@ -3690,7 +3798,7 @@ class AIAgent:
        # for _is_nous when their backend is updated.
        if provider_preferences and _is_openrouter:
            extra_body["provider"] = provider_preferences
-        _is_nous = "nousresearch" in self.base_url.lower()
+        _is_nous = "nousresearch" in self._base_url_lower

        if self._supports_reasoning_extra_body():
            if _is_github_models:
@ -3728,21 +3836,20 @@ class AIAgent:
        Some providers/routes reject `reasoning` with 400s, so gate it to
        known reasoning-capable model families and direct Nous Portal.
        """
-        base_url = (self.base_url or "").lower()
-        if "nousresearch" in base_url:
+        if "nousresearch" in self._base_url_lower:
            return True
-        if "ai-gateway.vercel.sh" in base_url:
+        if "ai-gateway.vercel.sh" in self._base_url_lower:
            return True
-        if "models.github.ai" in base_url or "api.githubcopilot.com" in base_url:
+        if "models.github.ai" in self._base_url_lower or "api.githubcopilot.com" in self._base_url_lower:
            try:
                from hermes_cli.models import github_model_reasoning_efforts

                return bool(github_model_reasoning_efforts(self.model))
            except Exception:
                return False
-        if "openrouter" not in base_url:
+        if "openrouter" not in self._base_url_lower:
            return False
-        if "api.mistral.ai" in base_url:
+        if "api.mistral.ai" in self._base_url_lower:
            return False

        model = (self.model or "").lower()
@ -3960,7 +4067,7 @@ class AIAgent:

        try:
            # Build API messages for the flush call
-            _is_strict_api = "api.mistral.ai" in self.base_url.lower()
+            _is_strict_api = "api.mistral.ai" in self._base_url_lower
            api_messages = []
            for msg in messages:
                api_msg = msg.copy()
@ -4149,20 +4256,17 @@ class AIAgent:
    def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
        """Execute tool calls from the assistant message and append results to messages.

-        Dispatches to concurrent execution when multiple independent tool calls
-        are present, falling back to sequential execution for single calls or
-        when interactive tools (e.g. clarify) are in the batch.
+        Dispatches to concurrent execution only for batches that look
+        independent: read-only tools may always share the parallel path, while
+        file reads/writes may do so only when their target paths do not overlap.
        """
        tool_calls = assistant_message.tool_calls

-        # Single tool call or interactive tool present → sequential
-        if (len(tool_calls) <= 1
-                or any(tc.function.name in _NEVER_PARALLEL_TOOLS for tc in tool_calls)):
+        if not _should_parallelize_tool_batch(tool_calls):
            return self._execute_tool_calls_sequential(
                assistant_message, messages, effective_task_id, api_call_count
            )

-        # Multiple non-interactive tools → concurrent
        return self._execute_tool_calls_concurrent(
            assistant_message, messages, effective_task_id, api_call_count
        )
@ -4736,7 +4840,7 @@ class AIAgent:
        try:
            # Build API messages, stripping internal-only fields
            # (finish_reason, reasoning) that strict APIs like Mistral reject with 422
-            _is_strict_api = "api.mistral.ai" in self.base_url.lower()
+            _is_strict_api = "api.mistral.ai" in self._base_url_lower
            api_messages = []
            for msg in messages:
                api_msg = msg.copy()
@ -4757,7 +4861,7 @@ class AIAgent:
                    api_messages.insert(sys_offset + idx, pfm.copy())

            summary_extra_body = {}
-            _is_nous = "nousresearch" in self.base_url.lower()
+            _is_nous = "nousresearch" in self._base_url_lower
            if self._supports_reasoning_extra_body():
                if self.reasoning_config is not None:
                    summary_extra_body["reasoning"] = self.reasoning_config
@ -4920,8 +5024,9 @@ class AIAgent:
        self._incomplete_scratchpad_retries = 0
        self._codex_incomplete_retries = 0
        self._last_content_with_tools = None
-        self._turns_since_memory = 0
-        self._iters_since_skill = 0
+        # NOTE: _turns_since_memory and _iters_since_skill are NOT reset here.
+        # They are initialized in __init__ and must persist across run_conversation
+        # calls so that nudge logic accumulates correctly in CLI mode.
        self.iteration_budget = IterationBudget(self.max_iterations)
        
        # Initialize conversation (copy to avoid mutating the caller's list)
@ -5174,7 +5279,7 @@ class AIAgent:
                # strict providers like Mistral that reject unknown fields with 422.
                # Uses new dicts so the internal messages list retains the fields
                # for Codex Responses compatibility.
-                if "api.mistral.ai" in self.base_url.lower():
+                if "api.mistral.ai" in self._base_url_lower:
                    self._sanitize_tool_calls_for_strict_api(api_msg)
                # Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
                # The signature field helps maintain reasoning continuity
@ -5546,6 +5651,7 @@ class AIAgent:
                            canonical_usage,
                            provider=self.provider,
                            base_url=self.base_url,
+                            api_key=getattr(self, "api_key", ""),
                        )
                        if cost_result.amount_usd is not None:
                            self.session_estimated_cost_usd += float(cost_result.amount_usd)
@ -5939,10 +6045,6 @@ class AIAgent:
                        self._client_log_context(),
                        api_error,
                    )
-                    if retry_count >= max_retries:
-                        self._vprint(f"{self.log_prefix}⚠️  API call failed after {retry_count} attempts: {str(api_error)[:100]}")
-                        self._vprint(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
-                    
                    # Sleep in small increments so we can respond to interrupts quickly
                    # instead of blocking the entire wait_time in one sleep() call
                    sleep_end = time.time() + wait_time
--- a/skills/dogfood/hermes-agent-setup/SKILL.md
+++ b/skills/dogfood/hermes-agent-setup/SKILL.md
@ -0,0 +1,300 @@
+---
+name: hermes-agent-setup
+description: Help users configure Hermes Agent — CLI usage, setup wizard, model/provider selection, tools, skills, voice/STT/TTS, gateway, and troubleshooting. Use when someone asks to enable features, configure settings, or needs help with Hermes itself.
+version: 1.1.0
+author: Hermes Agent
+tags: [setup, configuration, tools, stt, tts, voice, hermes, cli, skills]
+---
+
+# Hermes Agent Setup & Configuration
+
+Use this skill when a user asks about configuring Hermes, enabling features, setting up voice, managing tools/skills, or troubleshooting.
+
+## Key Paths
+
+- Config: `~/.hermes/config.yaml`
+- API keys: `~/.hermes/.env`
+- Skills: `~/.hermes/skills/`
+- Hermes install: `~/.hermes/hermes-agent/`
+- Venv: `~/.hermes/hermes-agent/.venv/` (or `venv/`)
+
+## CLI Overview
+
+Hermes is used via the `hermes` command (or `python -m hermes_cli.main` from the repo).
+
+### Core commands:
+
+```
+hermes                          Interactive chat (default)
+hermes chat -q "question"       Single query, then exit
+hermes chat -m MODEL            Chat with a specific model
+hermes -c                       Resume most recent session
+hermes -c "project name"        Resume session by name
+hermes --resume SESSION_ID      Resume by exact ID
+hermes -w                       Isolated git worktree mode
+hermes -s skill1,skill2         Preload skills for the session
+hermes --yolo                   Skip dangerous command approval
+```
+
+### Configuration & setup:
+
+```
+hermes setup                    Interactive setup wizard (provider, API keys, model)
+hermes model                    Interactive model/provider selection
+hermes config                   View current configuration
+hermes config edit              Open config.yaml in $EDITOR
+hermes config set KEY VALUE     Set a config value directly
+hermes login                    Authenticate with a provider
+hermes logout                   Clear stored auth
+hermes doctor                   Check configuration and dependencies
+```
+
+### Tools & skills:
+
+```
+hermes tools                    Interactive tool enable/disable per platform
+hermes skills list              List installed skills
+hermes skills search QUERY      Search the skills hub
+hermes skills install NAME      Install a skill from the hub
+hermes skills config            Enable/disable skills per platform
+```
+
+### Gateway (messaging platforms):
+
+```
+hermes gateway run              Start the messaging gateway
+hermes gateway install          Install gateway as background service
+hermes gateway status           Check gateway status
+```
+
+### Session management:
+
+```
+hermes sessions list            List past sessions
+hermes sessions browse          Interactive session picker
+hermes sessions rename ID TITLE Rename a session
+hermes sessions export ID       Export session as markdown
+hermes sessions prune           Clean up old sessions
+```
+
+### Other:
+
+```
+hermes status                   Show status of all components
+hermes cron list                List cron jobs
+hermes insights                 Usage analytics
+hermes update                   Update to latest version
+hermes pairing                  Manage DM authorization codes
+```
+
+## Setup Wizard (`hermes setup`)
+
+The interactive setup wizard walks through:
+1. **Provider selection** — OpenRouter, Anthropic, OpenAI, Google, DeepSeek, and many more
+2. **API key entry** — stores securely in the env file
+3. **Model selection** — picks from available models for the chosen provider
+4. **Basic settings** — reasoning effort, tool preferences
+
+Run it from terminal:
+```bash
+cd ~/.hermes/hermes-agent
+source .venv/bin/activate
+python -m hermes_cli.main setup
+```
+
+To change just the model/provider later: `hermes model`
+
+## Skills Configuration (`hermes skills`)
+
+Skills are reusable instruction sets that extend what Hermes can do.
+
+### Managing skills:
+
+```bash
+hermes skills list              # Show installed skills
+hermes skills search "docker"   # Search the hub
+hermes skills install NAME      # Install from hub
+hermes skills config            # Enable/disable per platform
+```
+
+### Per-platform skill control:
+
+`hermes skills config` opens an interactive UI where you can enable or disable specific skills for each platform (cli, telegram, discord, etc.). Disabled skills won't appear in the agent's available skills list for that platform.
+
+### Loading skills in a session:
+
+- CLI: `hermes -s skill-name` or `hermes -s skill1,skill2`
+- Chat: `/skill skill-name`
+- Gateway: type `/skill skill-name` in any chat
+
+## Voice Messages (STT)
+
+Voice messages from Telegram/Discord/WhatsApp/Slack/Signal are auto-transcribed when an STT provider is available.
+
+### Provider priority (auto-detected):
+1. **Local faster-whisper** — free, no API key, runs on CPU/GPU
+2. **Groq Whisper** — free tier, needs GROQ_API_KEY
+3. **OpenAI Whisper** — paid, needs VOICE_TOOLS_OPENAI_KEY
+
+### Setup local STT (recommended):
+
+```bash
+cd ~/.hermes/hermes-agent
+source .venv/bin/activate  # or: source venv/bin/activate
+pip install faster-whisper
+```
+
+Add to config.yaml under the `stt:` section:
+```yaml
+stt:
+  enabled: true
+  provider: local
+  local:
+    model: base  # Options: tiny, base, small, medium, large-v3
+```
+
+Model downloads automatically on first use (~150 MB for base).
+
+### Setup Groq STT (free cloud):
+
+1. Get free key from https://console.groq.com
+2. Add GROQ_API_KEY to the env file
+3. Set provider to groq in config.yaml stt section
+
+### Verify STT:
+
+After config changes, restart the gateway (send /restart in chat, or restart `hermes gateway run`). Then send a voice message.
+
+## Voice Replies (TTS)
+
+Hermes can reply with voice when users send voice messages.
+
+### TTS providers (set API key in env file):
+
+| Provider | Env var | Free? |
+|----------|---------|-------|
+| ElevenLabs | ELEVENLABS_API_KEY | Free tier |
+| OpenAI | VOICE_TOOLS_OPENAI_KEY | Paid |
+| Kokoro (local) | None needed | Free |
+| Fish Audio | FISH_AUDIO_API_KEY | Free tier |
+
+### Voice commands (in any chat):
+- `/voice on` — voice reply to voice messages only
+- `/voice tts` — voice reply to all messages
+- `/voice off` — text only (default)
+
+## Enabling/Disabling Tools (`hermes tools`)
+
+### Interactive tool config:
+
+```bash
+cd ~/.hermes/hermes-agent
+source .venv/bin/activate
+python -m hermes_cli.main tools
+```
+
+This opens a curses UI to enable/disable toolsets per platform (cli, telegram, discord, slack, etc.).
+
+### After changing tools:
+
+Use `/reset` in the chat to start a fresh session with the new toolset. Tool changes do NOT take effect mid-conversation (this preserves prompt caching and avoids cost spikes).
+
+### Common toolsets:
+
+| Toolset | What it provides |
+|---------|-----------------|
+| terminal | Shell command execution |
+| file | File read/write/search/patch |
+| web | Web search and extraction |
+| browser | Browser automation (needs Browserbase) |
+| image_gen | AI image generation |
+| mcp | MCP server connections |
+| voice | Text-to-speech output |
+| cronjob | Scheduled tasks |
+
+## Installing Dependencies
+
+Some tools need extra packages:
+
+```bash
+cd ~/.hermes/hermes-agent && source .venv/bin/activate
+
+pip install faster-whisper    # Local STT (voice transcription)
+pip install browserbase       # Browser automation
+pip install mcp               # MCP server connections
+```
+
+## Config File Reference
+
+The main config file is `~/.hermes/config.yaml`. Key sections:
+
+```yaml
+# Model and provider
+model:
+  default: anthropic/claude-opus-4.6
+  provider: openrouter
+
+# Agent behavior
+agent:
+  max_turns: 90
+  reasoning_effort: high    # xhigh, high, medium, low, minimal, none
+
+# Voice
+stt:
+  enabled: true
+  provider: local           # local, groq, openai
+tts:
+  provider: elevenlabs      # elevenlabs, openai, kokoro, fish
+
+# Display
+display:
+  skin: default             # default, ares, mono, slate
+  tool_progress: full       # full, compact, off
+  background_process_notifications: all  # all, result, error, off
+```
+
+Edit with `hermes config edit` or `hermes config set KEY VALUE`.
+
+## Gateway Commands (Messaging Platforms)
+
+| Command | What it does |
+|---------|-------------|
+| /reset or /new | Fresh session (picks up new tool config) |
+| /help | Show all commands |
+| /model [name] | Show or change model |
+| /compact | Compress conversation to save context |
+| /voice [mode] | Configure voice replies |
+| /reasoning [effort] | Set reasoning level |
+| /sethome | Set home channel for cron/notifications |
+| /restart | Restart the gateway (picks up config changes) |
+| /status | Show session info |
+| /retry | Retry last message |
+| /undo | Remove last exchange |
+| /personality [name] | Set agent personality |
+| /skill [name] | Load a skill |
+
+## Troubleshooting
+
+### Voice messages not working
+1. Check stt.enabled is true in config.yaml
+2. Check a provider is available (faster-whisper installed, or API key set)
+3. Restart gateway after config changes (/restart)
+
+### Tool not available
+1. Run `hermes tools` to check if the toolset is enabled for your platform
+2. Some tools need env vars — check the env file
+3. Use /reset after enabling tools
+
+### Model/provider issues
+1. Run `hermes doctor` to check configuration
+2. Run `hermes login` to re-authenticate
+3. Check the env file has the right API key
+
+### Changes not taking effect
+- Gateway: /reset for tool changes, /restart for config changes
+- CLI: start a new session
+
+### Skills not showing up
+1. Check `hermes skills list` shows the skill
+2. Check `hermes skills config` has it enabled for your platform
+3. Load explicitly with `/skill name` or `hermes -s name`
--- a/skills/mlops/huggingface-hub/SKILL.md
+++ b/skills/mlops/huggingface-hub/SKILL.md
@ -0,0 +1,80 @@
+---
+name: huggingface-hub
+description: Hugging Face Hub CLI (hf) — download/upload models and datasets, manage repos, run SQL on datasets, deploy inference endpoints, manage Spaces, and more. Use when working with HuggingFace models, datasets, or infrastructure.
+version: 1.0.0
+author: Hugging Face
+license: MIT
+tags: [huggingface, hf, models, datasets, hub, mlops]
+---
+
+# Hugging Face CLI (`hf`) Reference Guide
+
+The `hf` command is the modern command-line interface for interacting with the Hugging Face Hub, providing tools to manage repositories, models, datasets, and Spaces.
+
+> **IMPORTANT:** The `hf` command replaces the now deprecated `huggingface-cli` command.
+
+## Quick Start
+*   **Installation:** `curl -LsSf https://hf.co/cli/install.sh | bash -s`
+*   **Help:** Use `hf --help` to view all available functions and real-world examples.
+*   **Authentication:** Recommended via `HF_TOKEN` environment variable or the `--token` flag.
+
+---
+
+## Core Commands
+
+### General Operations
+*   `hf download REPO_ID`: Download files from the Hub.
+*   `hf upload REPO_ID`: Upload files/folders (recommended for single-commit).
+*   `hf upload-large-folder REPO_ID LOCAL_PATH`: Recommended for resumable uploads of large directories.
+*   `hf sync`: Sync files between a local directory and a bucket.
+*   `hf env` / `hf version`: View environment and version details.
+
+### Authentication (`hf auth`)
+*   `login` / `logout`: Manage sessions using tokens from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens).
+*   `list` / `switch`: Manage and toggle between multiple stored access tokens.
+*   `whoami`: Identify the currently logged-in account.
+
+### Repository Management (`hf repos`)
+*   `create` / `delete`: Create or permanently remove repositories.
+*   `duplicate`: Clone a model, dataset, or Space to a new ID.
+*   `move`: Transfer a repository between namespaces.
+*   `branch` / `tag`: Manage Git-like references.
+*   `delete-files`: Remove specific files using patterns.
+
+---
+
+## Specialized Hub Interactions
+
+### Datasets & Models
+*   **Datasets:** `hf datasets list`, `info`, and `parquet` (list parquet URLs).
+*   **SQL Queries:** `hf datasets sql SQL` — Execute raw SQL via DuckDB against dataset parquet URLs.
+*   **Models:** `hf models list` and `info`.
+*   **Papers:** `hf papers list` — View daily papers.
+
+### Discussions & Pull Requests (`hf discussions`)
+*   Manage the lifecycle of Hub contributions: `list`, `create`, `info`, `comment`, `close`, `reopen`, and `rename`.
+*   `diff`: View changes in a PR.
+*   `merge`: Finalize pull requests.
+
+### Infrastructure & Compute
+*   **Endpoints:** Deploy and manage Inference Endpoints (`deploy`, `pause`, `resume`, `scale-to-zero`, `catalog`).
+*   **Jobs:** Run compute tasks on HF infrastructure. Includes `hf jobs uv` for running Python scripts with inline dependencies and `stats` for resource monitoring.
+*   **Spaces:** Manage interactive apps. Includes `dev-mode` and `hot-reload` for Python files without full restarts.
+
+### Storage & Automation
+*   **Buckets:** Full S3-like bucket management (`create`, `cp`, `mv`, `rm`, `sync`).
+*   **Cache:** Manage local storage with `list`, `prune` (remove detached revisions), and `verify` (checksum checks).
+*   **Webhooks:** Automate workflows by managing Hub webhooks (`create`, `watch`, `enable`/`disable`).
+*   **Collections:** Organize Hub items into collections (`add-item`, `update`, `list`).
+
+---
+
+## Advanced Usage & Tips
+
+### Global Flags
+*   `--format json`: Produces machine-readable output for automation.
+*   `-q` / `--quiet`: Limits output to IDs only.
+
+### Extensions & Skills
+*   **Extensions:** Extend CLI functionality via GitHub repositories using `hf extensions install REPO_ID`.
+*   **Skills:** Manage AI assistant skills with `hf skills add`.
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@ -188,6 +188,36 @@ class TestGetModelContextLength:
            result = get_model_context_length("custom/model")
            assert result == CONTEXT_PROBE_TIERS[0]

+    @patch("agent.model_metadata.fetch_model_metadata")
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    def test_custom_endpoint_metadata_beats_fuzzy_default(self, mock_endpoint_fetch, mock_fetch):
+        mock_fetch.return_value = {}
+        mock_endpoint_fetch.return_value = {
+            "zai-org/GLM-5-TEE": {"context_length": 65536}
+        }
+
+        result = get_model_context_length(
+            "zai-org/GLM-5-TEE",
+            base_url="https://llm.chutes.ai/v1",
+            api_key="test-key",
+        )
+
+        assert result == 65536
+
+    @patch("agent.model_metadata.fetch_model_metadata")
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    def test_custom_endpoint_without_metadata_skips_name_based_default(self, mock_endpoint_fetch, mock_fetch):
+        mock_fetch.return_value = {}
+        mock_endpoint_fetch.return_value = {}
+
+        result = get_model_context_length(
+            "zai-org/GLM-5-TEE",
+            base_url="https://llm.chutes.ai/v1",
+            api_key="test-key",
+        )
+
+        assert result == CONTEXT_PROBE_TIERS[0]
+

 # =========================================================================
 # fetch_model_metadata — caching, TTL, slugs, failures
@ -258,6 +288,25 @@ class TestFetchModelMetadata:
        assert "anthropic/claude-3.5-sonnet" in result
        assert result["anthropic/claude-3.5-sonnet"]["context_length"] == 200000

+    @patch("agent.model_metadata.requests.get")
+    def test_provider_prefixed_models_get_bare_aliases(self, mock_get):
+        self._reset_cache()
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "data": [{
+                "id": "provider/test-model",
+                "context_length": 123456,
+                "name": "Provider: Test Model",
+            }]
+        }
+        mock_response.raise_for_status = MagicMock()
+        mock_get.return_value = mock_response
+
+        result = fetch_model_metadata(force_refresh=True)
+
+        assert result["provider/test-model"]["context_length"] == 123456
+        assert result["test-model"]["context_length"] == 123456
+
    @patch("agent.model_metadata.requests.get")
    def test_ttl_expiry_triggers_refetch(self, mock_get):
        """Cache expires after _MODEL_CACHE_TTL seconds."""
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@ -309,6 +309,35 @@ class TestBuildSkillsSystemPrompt:
        assert "imessage" in result
        assert "Send iMessages" in result

+    def test_excludes_disabled_skills(self, monkeypatch, tmp_path):
+        """Skills in the user's disabled list should not appear in the system prompt."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        skills_dir = tmp_path / "skills" / "tools"
+        skills_dir.mkdir(parents=True)
+
+        enabled_skill = skills_dir / "web-search"
+        enabled_skill.mkdir()
+        (enabled_skill / "SKILL.md").write_text(
+            "---\nname: web-search\ndescription: Search the web\n---\n"
+        )
+
+        disabled_skill = skills_dir / "old-tool"
+        disabled_skill.mkdir()
+        (disabled_skill / "SKILL.md").write_text(
+            "---\nname: old-tool\ndescription: Deprecated tool\n---\n"
+        )
+
+        from unittest.mock import patch
+
+        with patch(
+            "tools.skills_tool._get_disabled_skill_names",
+            return_value={"old-tool"},
+        ):
+            result = build_skills_system_prompt()
+
+        assert "web-search" in result
+        assert "old-tool" not in result
+
    def test_includes_setup_needed_skills(self, monkeypatch, tmp_path):
        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
        monkeypatch.delenv("MISSING_API_KEY_XYZ", raising=False)
--- a/tests/agent/test_skill_commands.py
+++ b/tests/agent/test_skill_commands.py
@ -85,6 +85,21 @@ class TestScanSkillCommands:
            result = scan_skill_commands()
        assert "/generic-tool" in result

+    def test_excludes_disabled_skills(self, tmp_path):
+        """Disabled skills should not register slash commands."""
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "tools.skills_tool._get_disabled_skill_names",
+                return_value={"disabled-skill"},
+            ),
+        ):
+            _make_skill(tmp_path, "enabled-skill")
+            _make_skill(tmp_path, "disabled-skill")
+            result = scan_skill_commands()
+        assert "/enabled-skill" in result
+        assert "/disabled-skill" not in result
+

 class TestBuildPreloadedSkillsPrompt:
    def test_builds_prompt_for_multiple_named_skills(self, tmp_path):
--- a/tests/agent/test_usage_pricing.py
+++ b/tests/agent/test_usage_pricing.py
@ -99,3 +99,27 @@ def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(m
    )

    assert result.status == "unknown"
+
+
+def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
+    monkeypatch.setattr(
+        "agent.usage_pricing.fetch_endpoint_model_metadata",
+        lambda base_url, api_key=None: {
+            "zai-org/GLM-5-TEE": {
+                "pricing": {
+                    "prompt": "0.0000005",
+                    "completion": "0.000002",
+                }
+            }
+        },
+    )
+
+    entry = get_pricing_entry(
+        "zai-org/GLM-5-TEE",
+        provider="custom",
+        base_url="https://llm.chutes.ai/v1",
+        api_key="test-key",
+    )
+
+    assert float(entry.input_cost_per_million) == 0.5
+    assert float(entry.output_cost_per_million) == 2.0
--- a/tests/cron/test_jobs.py
+++ b/tests/cron/test_jobs.py
@ -2,7 +2,7 @@

 import json
 import pytest
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from unittest.mock import patch

@ -122,11 +122,29 @@ class TestComputeNextRun:
        schedule = {"kind": "once", "run_at": future}
        assert compute_next_run(schedule) == future

+    def test_once_recent_past_within_grace_returns_time(self, monkeypatch):
+        now = datetime(2026, 3, 18, 4, 22, 3, tzinfo=timezone.utc)
+        run_at = "2026-03-18T04:22:00+00:00"
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        schedule = {"kind": "once", "run_at": run_at}
+
+        assert compute_next_run(schedule) == run_at
+
    def test_once_past_returns_none(self):
        past = (datetime.now() - timedelta(hours=1)).isoformat()
        schedule = {"kind": "once", "run_at": past}
        assert compute_next_run(schedule) is None

+    def test_once_with_last_run_returns_none_even_within_grace(self, monkeypatch):
+        now = datetime(2026, 3, 18, 4, 22, 3, tzinfo=timezone.utc)
+        run_at = "2026-03-18T04:22:00+00:00"
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        schedule = {"kind": "once", "run_at": run_at}
+
+        assert compute_next_run(schedule, last_run_at=now.isoformat()) is None
+
    def test_interval_first_run(self):
        schedule = {"kind": "interval", "minutes": 60}
        result = compute_next_run(schedule)
@ -347,6 +365,67 @@ class TestGetDueJobs:
        due = get_due_jobs()
        assert len(due) == 0

+    def test_broken_recent_one_shot_without_next_run_is_recovered(self, tmp_cron_dir, monkeypatch):
+        now = datetime(2026, 3, 18, 4, 22, 30, tzinfo=timezone.utc)
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        run_at = "2026-03-18T04:22:00+00:00"
+        save_jobs(
+            [{
+                "id": "oneshot-recover",
+                "name": "Recover me",
+                "prompt": "Word of the day",
+                "schedule": {"kind": "once", "run_at": run_at, "display": "once at 2026-03-18 04:22"},
+                "schedule_display": "once at 2026-03-18 04:22",
+                "repeat": {"times": 1, "completed": 0},
+                "enabled": True,
+                "state": "scheduled",
+                "paused_at": None,
+                "paused_reason": None,
+                "created_at": "2026-03-18T04:21:00+00:00",
+                "next_run_at": None,
+                "last_run_at": None,
+                "last_status": None,
+                "last_error": None,
+                "deliver": "local",
+                "origin": None,
+            }]
+        )
+
+        due = get_due_jobs()
+
+        assert [job["id"] for job in due] == ["oneshot-recover"]
+        assert get_job("oneshot-recover")["next_run_at"] == run_at
+
+    def test_broken_stale_one_shot_without_next_run_is_not_recovered(self, tmp_cron_dir, monkeypatch):
+        now = datetime(2026, 3, 18, 4, 30, 0, tzinfo=timezone.utc)
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        save_jobs(
+            [{
+                "id": "oneshot-stale",
+                "name": "Too old",
+                "prompt": "Word of the day",
+                "schedule": {"kind": "once", "run_at": "2026-03-18T04:22:00+00:00", "display": "once at 2026-03-18 04:22"},
+                "schedule_display": "once at 2026-03-18 04:22",
+                "repeat": {"times": 1, "completed": 0},
+                "enabled": True,
+                "state": "scheduled",
+                "paused_at": None,
+                "paused_reason": None,
+                "created_at": "2026-03-18T04:21:00+00:00",
+                "next_run_at": None,
+                "last_run_at": None,
+                "last_status": None,
+                "last_error": None,
+                "deliver": "local",
+                "origin": None,
+            }]
+        )
+
+        assert get_due_jobs() == []
+        assert get_job("oneshot-stale")["next_run_at"] is None
+

 class TestSaveJobOutput:
    def test_creates_output_file(self, tmp_cron_dir):
--- a/tests/gateway/test_config.py
+++ b/tests/gateway/test_config.py
@ -115,6 +115,22 @@ class TestGatewayConfigRoundtrip:
        assert restored.quick_commands == {"limits": {"type": "exec", "command": "echo ok"}}
        assert restored.group_sessions_per_user is False

+    def test_roundtrip_preserves_unauthorized_dm_behavior(self):
+        config = GatewayConfig(
+            unauthorized_dm_behavior="ignore",
+            platforms={
+                Platform.WHATSAPP: PlatformConfig(
+                    enabled=True,
+                    extra={"unauthorized_dm_behavior": "pair"},
+                ),
+            },
+        )
+
+        restored = GatewayConfig.from_dict(config.to_dict())
+
+        assert restored.unauthorized_dm_behavior == "ignore"
+        assert restored.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"
+

 class TestLoadGatewayConfig:
    def test_bridges_quick_commands_from_config_yaml(self, tmp_path, monkeypatch):
@ -158,3 +174,21 @@ class TestLoadGatewayConfig:
        config = load_gateway_config()

        assert config.quick_commands == {}
+
+    def test_bridges_unauthorized_dm_behavior_from_config_yaml(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "unauthorized_dm_behavior: ignore\n"
+            "whatsapp:\n"
+            "  unauthorized_dm_behavior: pair\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        assert config.unauthorized_dm_behavior == "ignore"
+        assert config.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"
--- a/tests/gateway/test_status.py
+++ b/tests/gateway/test_status.py
@ -42,6 +42,26 @@ class TestGatewayPidState:

        assert status.get_running_pid() == os.getpid()

+    def test_get_running_pid_accepts_script_style_gateway_cmdline(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        pid_path = tmp_path / "gateway.pid"
+        pid_path.write_text(json.dumps({
+            "pid": os.getpid(),
+            "kind": "hermes-gateway",
+            "argv": ["/venv/bin/python", "/repo/hermes_cli/main.py", "gateway", "run", "--replace"],
+            "start_time": 123,
+        }))
+
+        monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
+        monkeypatch.setattr(
+            status,
+            "_read_process_cmdline",
+            lambda pid: "/venv/bin/python /repo/hermes_cli/main.py gateway run --replace",
+        )
+
+        assert status.get_running_pid() == os.getpid()
+

 class TestGatewayRuntimeStatus:
    def test_write_runtime_status_overwrites_stale_pid_on_restart(self, tmp_path, monkeypatch):
--- a/tests/gateway/test_unauthorized_dm_behavior.py
+++ b/tests/gateway/test_unauthorized_dm_behavior.py
@ -0,0 +1,137 @@
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionSource
+
+
+def _clear_auth_env(monkeypatch) -> None:
+    for key in (
+        "TELEGRAM_ALLOWED_USERS",
+        "DISCORD_ALLOWED_USERS",
+        "WHATSAPP_ALLOWED_USERS",
+        "SLACK_ALLOWED_USERS",
+        "SIGNAL_ALLOWED_USERS",
+        "EMAIL_ALLOWED_USERS",
+        "SMS_ALLOWED_USERS",
+        "MATTERMOST_ALLOWED_USERS",
+        "MATRIX_ALLOWED_USERS",
+        "DINGTALK_ALLOWED_USERS",
+        "GATEWAY_ALLOWED_USERS",
+        "TELEGRAM_ALLOW_ALL_USERS",
+        "DISCORD_ALLOW_ALL_USERS",
+        "WHATSAPP_ALLOW_ALL_USERS",
+        "SLACK_ALLOW_ALL_USERS",
+        "SIGNAL_ALLOW_ALL_USERS",
+        "EMAIL_ALLOW_ALL_USERS",
+        "SMS_ALLOW_ALL_USERS",
+        "MATTERMOST_ALLOW_ALL_USERS",
+        "MATRIX_ALLOW_ALL_USERS",
+        "DINGTALK_ALLOW_ALL_USERS",
+        "GATEWAY_ALLOW_ALL_USERS",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
+def _make_event(platform: Platform, user_id: str, chat_id: str) -> MessageEvent:
+    return MessageEvent(
+        text="hello",
+        message_id="m1",
+        source=SessionSource(
+            platform=platform,
+            user_id=user_id,
+            chat_id=chat_id,
+            user_name="tester",
+            chat_type="dm",
+        ),
+    )
+
+
+def _make_runner(platform: Platform, config: GatewayConfig):
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = config
+    adapter = SimpleNamespace(send=AsyncMock())
+    runner.adapters = {platform: adapter}
+    runner.pairing_store = MagicMock()
+    runner.pairing_store.is_approved.return_value = False
+    return runner, adapter
+
+
+@pytest.mark.asyncio
+async def test_unauthorized_dm_pairs_by_default(monkeypatch):
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        platforms={Platform.WHATSAPP: PlatformConfig(enabled=True)},
+    )
+    runner, adapter = _make_runner(Platform.WHATSAPP, config)
+    runner.pairing_store.generate_code.return_value = "ABC12DEF"
+
+    result = await runner._handle_message(
+        _make_event(
+            Platform.WHATSAPP,
+            "15551234567@s.whatsapp.net",
+            "15551234567@s.whatsapp.net",
+        )
+    )
+
+    assert result is None
+    runner.pairing_store.generate_code.assert_called_once_with(
+        "whatsapp",
+        "15551234567@s.whatsapp.net",
+        "tester",
+    )
+    adapter.send.assert_awaited_once()
+    assert "ABC12DEF" in adapter.send.await_args.args[1]
+
+
+@pytest.mark.asyncio
+async def test_unauthorized_whatsapp_dm_can_be_ignored(monkeypatch):
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        platforms={
+            Platform.WHATSAPP: PlatformConfig(
+                enabled=True,
+                extra={"unauthorized_dm_behavior": "ignore"},
+            ),
+        },
+    )
+    runner, adapter = _make_runner(Platform.WHATSAPP, config)
+
+    result = await runner._handle_message(
+        _make_event(
+            Platform.WHATSAPP,
+            "15551234567@s.whatsapp.net",
+            "15551234567@s.whatsapp.net",
+        )
+    )
+
+    assert result is None
+    runner.pairing_store.generate_code.assert_not_called()
+    adapter.send.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_global_ignore_suppresses_pairing_reply(monkeypatch):
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        unauthorized_dm_behavior="ignore",
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")},
+    )
+    runner, adapter = _make_runner(Platform.TELEGRAM, config)
+
+    result = await runner._handle_message(
+        _make_event(
+            Platform.TELEGRAM,
+            "12345",
+            "12345",
+        )
+    )
+
+    assert result is None
+    runner.pairing_store.generate_code.assert_not_called()
+    adapter.send.assert_not_awaited()
--- a/tests/hermes_cli/test_banner.py
+++ b/tests/hermes_cli/test_banner.py
@ -0,0 +1,70 @@
+"""Tests for banner toolset name normalization and skin color usage."""
+
+from unittest.mock import patch
+
+from rich.console import Console
+
+import hermes_cli.banner as banner
+import model_tools
+import tools.mcp_tool
+
+
+def test_display_toolset_name_strips_legacy_suffix():
+    assert banner._display_toolset_name("homeassistant_tools") == "homeassistant"
+    assert banner._display_toolset_name("honcho_tools") == "honcho"
+    assert banner._display_toolset_name("web_tools") == "web"
+
+
+def test_display_toolset_name_preserves_clean_names():
+    assert banner._display_toolset_name("browser") == "browser"
+    assert banner._display_toolset_name("file") == "file"
+    assert banner._display_toolset_name("terminal") == "terminal"
+
+
+def test_display_toolset_name_handles_empty():
+    assert banner._display_toolset_name("") == "unknown"
+    assert banner._display_toolset_name(None) == "unknown"
+
+
+def test_build_welcome_banner_uses_normalized_toolset_names():
+    """Unavailable toolsets should not have '_tools' appended in banner output."""
+    with (
+        patch.object(
+            model_tools,
+            "check_tool_availability",
+            return_value=(
+                ["web"],
+                [
+                    {"name": "homeassistant", "tools": ["ha_call_service"]},
+                    {"name": "honcho", "tools": ["honcho_conclude"]},
+                ],
+            ),
+        ),
+        patch.object(banner, "get_available_skills", return_value={}),
+        patch.object(banner, "get_update_result", return_value=None),
+        patch.object(tools.mcp_tool, "get_mcp_status", return_value=[]),
+    ):
+        console = Console(
+            record=True, force_terminal=False, color_system=None, width=160
+        )
+        banner.build_welcome_banner(
+            console=console,
+            model="anthropic/test-model",
+            cwd="/tmp/project",
+            tools=[
+                {"function": {"name": "web_search"}},
+                {"function": {"name": "read_file"}},
+            ],
+            get_toolset_for_tool=lambda name: {
+                "web_search": "web_tools",
+                "read_file": "file",
+            }.get(name),
+        )
+
+    output = console.export_text()
+    assert "homeassistant:" in output
+    assert "honcho:" in output
+    assert "web:" in output
+    assert "homeassistant_tools:" not in output
+    assert "honcho_tools:" not in output
+    assert "web_tools:" not in output
--- a/tests/hermes_cli/test_banner_skills.py
+++ b/tests/hermes_cli/test_banner_skills.py
@ -0,0 +1,68 @@
+"""Tests for banner get_available_skills() — disabled and platform filtering."""
+
+from unittest.mock import patch
+
+import pytest
+
+
+_MOCK_SKILLS = [
+    {"name": "skill-a", "description": "A skill", "category": "tools"},
+    {"name": "skill-b", "description": "B skill", "category": "tools"},
+    {"name": "skill-c", "description": "C skill", "category": "creative"},
+]
+
+
+def test_get_available_skills_delegates_to_find_all_skills():
+    """get_available_skills should call _find_all_skills (which handles filtering)."""
+    with patch("tools.skills_tool._find_all_skills", return_value=list(_MOCK_SKILLS)):
+        from hermes_cli.banner import get_available_skills
+        result = get_available_skills()
+
+    assert "tools" in result
+    assert "creative" in result
+    assert sorted(result["tools"]) == ["skill-a", "skill-b"]
+    assert result["creative"] == ["skill-c"]
+
+
+def test_get_available_skills_excludes_disabled():
+    """Disabled skills should not appear in the banner count."""
+    # _find_all_skills already filters disabled skills, so if we give it
+    # a filtered list, get_available_skills should reflect that.
+    filtered = [s for s in _MOCK_SKILLS if s["name"] != "skill-b"]
+    with patch("tools.skills_tool._find_all_skills", return_value=filtered):
+        from hermes_cli.banner import get_available_skills
+        result = get_available_skills()
+
+    all_names = [n for names in result.values() for n in names]
+    assert "skill-b" not in all_names
+    assert "skill-a" in all_names
+    assert len(all_names) == 2
+
+
+def test_get_available_skills_empty_when_no_skills():
+    """No skills installed returns empty dict."""
+    with patch("tools.skills_tool._find_all_skills", return_value=[]):
+        from hermes_cli.banner import get_available_skills
+        result = get_available_skills()
+
+    assert result == {}
+
+
+def test_get_available_skills_handles_import_failure():
+    """If _find_all_skills import fails, return empty dict gracefully."""
+    with patch("tools.skills_tool._find_all_skills", side_effect=ImportError("boom")):
+        from hermes_cli.banner import get_available_skills
+        result = get_available_skills()
+
+    assert result == {}
+
+
+def test_get_available_skills_null_category_becomes_general():
+    """Skills with None category should be grouped under 'general'."""
+    skills = [{"name": "orphan-skill", "description": "No cat", "category": None}]
+    with patch("tools.skills_tool._find_all_skills", return_value=skills):
+        from hermes_cli.banner import get_available_skills
+        result = get_available_skills()
+
+    assert "general" in result
+    assert result["general"] == ["orphan-skill"]
--- a/tests/hermes_cli/test_gateway.py
+++ b/tests/hermes_cli/test_gateway.py
@ -1,6 +1,8 @@
 """Tests for hermes_cli.gateway."""

+import signal
 from types import SimpleNamespace
+from unittest.mock import patch, call

 import hermes_cli.gateway as gateway

@ -169,3 +171,84 @@ def test_install_linux_gateway_from_setup_system_choice_as_root_installs(monkeyp

    assert (scope, did_install) == ("system", True)
    assert calls == [(True, True, "alice")]
+
+
+# ---------------------------------------------------------------------------
+# _wait_for_gateway_exit
+# ---------------------------------------------------------------------------
+
+
+class TestWaitForGatewayExit:
+    """PID-based wait with force-kill on timeout."""
+
+    def test_returns_immediately_when_no_pid(self, monkeypatch):
+        """If get_running_pid returns None, exit instantly."""
+        monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
+        # Should return without sleeping at all.
+        gateway._wait_for_gateway_exit(timeout=1.0, force_after=0.5)
+
+    def test_returns_when_process_exits_gracefully(self, monkeypatch):
+        """Process exits after a couple of polls — no SIGKILL needed."""
+        poll_count = 0
+
+        def mock_get_running_pid():
+            nonlocal poll_count
+            poll_count += 1
+            return 12345 if poll_count <= 2 else None
+
+        monkeypatch.setattr("gateway.status.get_running_pid", mock_get_running_pid)
+        monkeypatch.setattr("time.sleep", lambda _: None)
+
+        gateway._wait_for_gateway_exit(timeout=10.0, force_after=999.0)
+        # Should have polled until None was returned.
+        assert poll_count == 3
+
+    def test_force_kills_after_grace_period(self, monkeypatch):
+        """When the process doesn't exit, SIGKILL the saved PID."""
+        import time as _time
+
+        # Simulate monotonic time advancing past force_after
+        call_num = 0
+        def fake_monotonic():
+            nonlocal call_num
+            call_num += 1
+            # First two calls: initial deadline + force_deadline setup (time 0)
+            # Then each loop iteration advances time
+            return call_num * 2.0  # 2, 4, 6, 8, ...
+
+        kills = []
+        def mock_kill(pid, sig):
+            kills.append((pid, sig))
+
+        # get_running_pid returns the PID until kill is sent, then None
+        def mock_get_running_pid():
+            return None if kills else 42
+
+        monkeypatch.setattr("time.monotonic", fake_monotonic)
+        monkeypatch.setattr("time.sleep", lambda _: None)
+        monkeypatch.setattr("gateway.status.get_running_pid", mock_get_running_pid)
+        monkeypatch.setattr("os.kill", mock_kill)
+
+        gateway._wait_for_gateway_exit(timeout=10.0, force_after=5.0)
+        assert (42, signal.SIGKILL) in kills
+
+    def test_handles_process_already_gone_on_kill(self, monkeypatch):
+        """ProcessLookupError during SIGKILL is not fatal."""
+        import time as _time
+
+        call_num = 0
+        def fake_monotonic():
+            nonlocal call_num
+            call_num += 1
+            return call_num * 3.0  # Jump past force_after quickly
+
+        def mock_kill(pid, sig):
+            raise ProcessLookupError
+
+        monkeypatch.setattr("time.monotonic", fake_monotonic)
+        monkeypatch.setattr("time.sleep", lambda _: None)
+        monkeypatch.setattr("gateway.status.get_running_pid", lambda: 99)
+        monkeypatch.setattr("os.kill", mock_kill)
+
+        # Should not raise — ProcessLookupError means it's already gone.
+        gateway._wait_for_gateway_exit(timeout=10.0, force_after=2.0)
--- a/tests/test_fallback_model.py
+++ b/tests/test_fallback_model.py
@ -131,7 +131,7 @@ class TestTryActivateFallback:

    def test_activates_minimax_fallback(self):
        agent = _make_agent(
-            fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
+            fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"},
        )
        mock_client = _mock_resolve(
            api_key="sk-mm-key",
@ -139,10 +139,10 @@ class TestTryActivateFallback:
        )
        with patch(
            "agent.auxiliary_client.resolve_provider_client",
-            return_value=(mock_client, "MiniMax-M2.5"),
+            return_value=(mock_client, "MiniMax-M2.7"),
        ):
            assert agent._try_activate_fallback() is True
-            assert agent.model == "MiniMax-M2.5"
+            assert agent.model == "MiniMax-M2.7"
            assert agent.provider == "minimax"
            assert agent.client is mock_client

@ -165,7 +165,7 @@ class TestTryActivateFallback:
    def test_returns_false_when_no_api_key(self):
        """Fallback should fail gracefully when the API key env var is unset."""
        agent = _make_agent(
-            fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
+            fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"},
        )
        with patch(
            "agent.auxiliary_client.resolve_provider_client",
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@ -210,6 +210,25 @@ class TestFTS5Search:
        sources = [r["source"] for r in results]
        assert all(s == "telegram" for s in sources)

+    def test_search_default_sources_include_acp(self, db):
+        db.create_session(session_id="s1", source="acp")
+        db.append_message("s1", role="user", content="ACP question about Python")
+
+        results = db.search_messages("Python")
+        sources = [r["source"] for r in results]
+        assert "acp" in sources
+
+    def test_search_default_includes_all_platforms(self, db):
+        """Default search (no source_filter) should find sessions from any platform."""
+        for src in ("cli", "telegram", "signal", "homeassistant", "acp", "matrix"):
+            sid = f"s-{src}"
+            db.create_session(session_id=sid, source=src)
+            db.append_message(sid, role="user", content=f"universal search test from {src}")
+
+        results = db.search_messages("universal search test")
+        found_sources = {r["source"] for r in results}
+        assert found_sources == {"cli", "telegram", "signal", "homeassistant", "acp", "matrix"}
+
    def test_search_with_role_filter(self, db):
        db.create_session(session_id="s1", source="cli")
        db.append_message("s1", role="user", content="What is FastAPI?")
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@ -828,7 +828,7 @@ class TestConcurrentToolExecution:
                mock_con.assert_not_called()

    def test_multiple_tools_uses_concurrent_path(self, agent):
-        """Multiple non-interactive tools should use concurrent path."""
+        """Multiple read-only tools should use concurrent path."""
        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
        tc2 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c2")
        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
@ -839,6 +839,94 @@ class TestConcurrentToolExecution:
                mock_con.assert_called_once()
                mock_seq.assert_not_called()

+    def test_terminal_batch_forces_sequential(self, agent):
+        """Stateful tools should not share the concurrent execution path."""
+        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+        tc2 = _mock_tool_call(name="terminal", arguments='{"command":"pwd"}', call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_seq.assert_called_once()
+                mock_con.assert_not_called()
+
+    def test_write_batch_forces_sequential(self, agent):
+        """File mutations should stay ordered within a turn."""
+        tc1 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c1")
+        tc2 = _mock_tool_call(name="write_file", arguments='{"path":"x.py","content":"print(1)"}', call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_seq.assert_called_once()
+                mock_con.assert_not_called()
+
+    def test_disjoint_write_batch_uses_concurrent_path(self, agent):
+        """Independent file writes should still run concurrently."""
+        tc1 = _mock_tool_call(
+            name="write_file",
+            arguments='{"path":"src/a.py","content":"print(1)"}',
+            call_id="c1",
+        )
+        tc2 = _mock_tool_call(
+            name="write_file",
+            arguments='{"path":"src/b.py","content":"print(2)"}',
+            call_id="c2",
+        )
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_con.assert_called_once()
+                mock_seq.assert_not_called()
+
+    def test_overlapping_write_batch_forces_sequential(self, agent):
+        """Writes to the same file must stay ordered."""
+        tc1 = _mock_tool_call(
+            name="write_file",
+            arguments='{"path":"src/a.py","content":"print(1)"}',
+            call_id="c1",
+        )
+        tc2 = _mock_tool_call(
+            name="patch",
+            arguments='{"path":"src/a.py","old_string":"1","new_string":"2"}',
+            call_id="c2",
+        )
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_seq.assert_called_once()
+                mock_con.assert_not_called()
+
+    def test_malformed_json_args_forces_sequential(self, agent):
+        """Unparseable tool arguments should fall back to sequential."""
+        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+        tc2 = _mock_tool_call(name="web_search", arguments="NOT JSON {{{", call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_seq.assert_called_once()
+                mock_con.assert_not_called()
+
+    def test_non_dict_args_forces_sequential(self, agent):
+        """Tool arguments that parse to a non-dict type should fall back to sequential."""
+        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+        tc2 = _mock_tool_call(name="web_search", arguments='"just a string"', call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_seq.assert_called_once()
+                mock_con.assert_not_called()
+
    def test_concurrent_executes_all_tools(self, agent):
        """Concurrent path should execute all tools and append results in order."""
        tc1 = _mock_tool_call(name="web_search", arguments='{"q":"alpha"}', call_id="c1")
@ -965,6 +1053,39 @@ class TestConcurrentToolExecution:
        assert "ok" in result


+class TestPathsOverlap:
+    """Unit tests for the _paths_overlap helper."""
+
+    def test_same_path_overlaps(self):
+        from run_agent import _paths_overlap
+        assert _paths_overlap(Path("src/a.py"), Path("src/a.py"))
+
+    def test_siblings_do_not_overlap(self):
+        from run_agent import _paths_overlap
+        assert not _paths_overlap(Path("src/a.py"), Path("src/b.py"))
+
+    def test_parent_child_overlap(self):
+        from run_agent import _paths_overlap
+        assert _paths_overlap(Path("src"), Path("src/sub/a.py"))
+
+    def test_different_roots_do_not_overlap(self):
+        from run_agent import _paths_overlap
+        assert not _paths_overlap(Path("src/a.py"), Path("other/a.py"))
+
+    def test_nested_vs_flat_do_not_overlap(self):
+        from run_agent import _paths_overlap
+        assert not _paths_overlap(Path("src/sub/a.py"), Path("src/a.py"))
+
+    def test_empty_paths_do_not_overlap(self):
+        from run_agent import _paths_overlap
+        assert not _paths_overlap(Path(""), Path(""))
+
+    def test_one_empty_path_does_not_overlap(self):
+        from run_agent import _paths_overlap
+        assert not _paths_overlap(Path(""), Path("src/a.py"))
+        assert not _paths_overlap(Path("src/a.py"), Path(""))
+
+
 class TestHandleMaxIterations:
    def test_returns_summary(self, agent):
        resp = _mock_response(content="Here is a summary of what I did.")
@ -2774,3 +2895,135 @@ class TestNormalizeCodexDictArguments:
        msg, _ = agent._normalize_codex_response(response)
        tc = msg.tool_calls[0]
        assert tc.function.arguments == args_str
+
+
+# ---------------------------------------------------------------------------
+# OAuth flag and nudge counter fixes (salvaged from PR #1797)
+# ---------------------------------------------------------------------------
+
+
+class TestOAuthFlagAfterCredentialRefresh:
+    """_is_anthropic_oauth must update when token type changes during refresh."""
+
+    def test_oauth_flag_updates_api_key_to_oauth(self, agent):
+        """Refreshing from API key to OAuth token must set flag to True."""
+        agent.api_mode = "anthropic_messages"
+        agent._anthropic_api_key = "sk-ant-api-old"
+        agent._anthropic_client = MagicMock()
+        agent._is_anthropic_oauth = False
+
+        with (
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value="sk-ant-setup-oauth-token"),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+        ):
+            result = agent._try_refresh_anthropic_client_credentials()
+
+        assert result is True
+        assert agent._is_anthropic_oauth is True
+
+    def test_oauth_flag_updates_oauth_to_api_key(self, agent):
+        """Refreshing from OAuth to API key must set flag to False."""
+        agent.api_mode = "anthropic_messages"
+        agent._anthropic_api_key = "sk-ant-setup-old"
+        agent._anthropic_client = MagicMock()
+        agent._is_anthropic_oauth = True
+
+        with (
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value="sk-ant-api03-new-key"),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+        ):
+            result = agent._try_refresh_anthropic_client_credentials()
+
+        assert result is True
+        assert agent._is_anthropic_oauth is False
+
+
+class TestFallbackSetsOAuthFlag:
+    """_try_activate_fallback must set _is_anthropic_oauth for Anthropic fallbacks."""
+
+    def test_fallback_to_anthropic_oauth_sets_flag(self, agent):
+        agent._fallback_activated = False
+        agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-6"}
+
+        mock_client = MagicMock()
+        mock_client.base_url = "https://api.anthropic.com/v1"
+        mock_client.api_key = "sk-ant-setup-oauth-token"
+
+        with (
+            patch("agent.auxiliary_client.resolve_provider_client",
+                  return_value=(mock_client, None)),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value=None),
+        ):
+            result = agent._try_activate_fallback()
+
+        assert result is True
+        assert agent._is_anthropic_oauth is True
+
+    def test_fallback_to_anthropic_api_key_clears_flag(self, agent):
+        agent._fallback_activated = False
+        agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-6"}
+
+        mock_client = MagicMock()
+        mock_client.base_url = "https://api.anthropic.com/v1"
+        mock_client.api_key = "sk-ant-api03-regular-key"
+
+        with (
+            patch("agent.auxiliary_client.resolve_provider_client",
+                  return_value=(mock_client, None)),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value=None),
+        ):
+            result = agent._try_activate_fallback()
+
+        assert result is True
+        assert agent._is_anthropic_oauth is False
+
+
+class TestMemoryNudgeCounterPersistence:
+    """_turns_since_memory must persist across run_conversation calls."""
+
+    def test_counters_initialized_in_init(self):
+        """Counters must exist on the agent after __init__."""
+        with patch("run_agent.get_tool_definitions", return_value=[]):
+            a = AIAgent(
+                model="test", api_key="test-key", provider="openrouter",
+                skip_context_files=True, skip_memory=True,
+            )
+        assert hasattr(a, "_turns_since_memory")
+        assert hasattr(a, "_iters_since_skill")
+        assert a._turns_since_memory == 0
+        assert a._iters_since_skill == 0
+
+    def test_counters_not_reset_in_preamble(self):
+        """The run_conversation preamble must not zero the nudge counters."""
+        import inspect
+        src = inspect.getsource(AIAgent.run_conversation)
+        # The preamble resets many fields (retry counts, budget, etc.)
+        # before the main loop. Find that reset block and verify our
+        # counters aren't in it. The reset block ends at iteration_budget.
+        preamble_end = src.index("self.iteration_budget = IterationBudget")
+        preamble = src[:preamble_end]
+        assert "self._turns_since_memory = 0" not in preamble
+        assert "self._iters_since_skill = 0" not in preamble
+
+
+class TestDeadRetryCode:
+    """Unreachable retry_count >= max_retries after raise must not exist."""
+
+    def test_no_unreachable_max_retries_after_backoff(self):
+        import inspect
+        source = inspect.getsource(AIAgent.run_conversation)
+        occurrences = source.count("if retry_count >= max_retries:")
+        assert occurrences == 2, (
+            f"Expected 2 occurrences of 'if retry_count >= max_retries:' "
+            f"but found {occurrences}"
+        )
--- a/tests/tools/test_mcp_tool.py
+++ b/tests/tools/test_mcp_tool.py
@ -505,6 +505,42 @@ class TestToolsetInjection:
        assert "mcp_fs_list_files" not in fake_toolsets["non-hermes"]["tools"]
        # Original tools preserved
        assert "terminal" in fake_toolsets["hermes-cli"]["tools"]
+        # Server name becomes a standalone toolset
+        assert "fs" in fake_toolsets
+        assert "mcp_fs_list_files" in fake_toolsets["fs"]["tools"]
+        assert fake_toolsets["fs"]["description"].startswith("MCP server '")
+
+    def test_server_toolset_skips_builtin_collision(self):
+        """MCP server named after a built-in toolset shouldn't overwrite it."""
+        from tools.mcp_tool import MCPServerTask
+
+        mock_tools = [_make_mcp_tool("run", "Run command")]
+        mock_session = MagicMock()
+        fresh_servers = {}
+
+        async def fake_connect(name, config):
+            server = MCPServerTask(name)
+            server.session = mock_session
+            server._tools = mock_tools
+            return server
+
+        fake_toolsets = {
+            "hermes-cli": {"tools": ["terminal"], "description": "CLI", "includes": []},
+            # Built-in toolset named "terminal" — must not be overwritten
+            "terminal": {"tools": ["terminal"], "description": "Terminal tools", "includes": []},
+        }
+        fake_config = {"terminal": {"command": "npx", "args": []}}
+
+        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._servers", fresh_servers), \
+             patch("tools.mcp_tool._load_mcp_config", return_value=fake_config), \
+             patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
+             patch("toolsets.TOOLSETS", fake_toolsets):
+            from tools.mcp_tool import discover_mcp_tools
+            discover_mcp_tools()
+
+        # Built-in toolset preserved — description unchanged
+        assert fake_toolsets["terminal"]["description"] == "Terminal tools"

    def test_server_connection_failure_skipped(self):
        """If one server fails to connect, others still proceed."""
--- a/tests/tools/test_skills_tool.py
+++ b/tests/tools/test_skills_tool.py
@ -374,6 +374,35 @@ class TestSkillView:
        result = json.loads(raw)
        assert result["success"] is False

+    def test_view_disabled_skill_blocked(self, tmp_path):
+        """Disabled skills should not be viewable via skill_view."""
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "tools.skills_tool._is_skill_disabled",
+                return_value=True,
+            ),
+        ):
+            _make_skill(tmp_path, "hidden-skill")
+            raw = skill_view("hidden-skill")
+        result = json.loads(raw)
+        assert result["success"] is False
+        assert "disabled" in result["error"].lower()
+
+    def test_view_enabled_skill_allowed(self, tmp_path):
+        """Non-disabled skills should be viewable normally."""
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "tools.skills_tool._is_skill_disabled",
+                return_value=False,
+            ),
+        ):
+            _make_skill(tmp_path, "active-skill")
+            raw = skill_view("active-skill")
+        result = json.loads(raw)
+        assert result["success"] is True
+

 class TestSkillViewSecureSetupOnLoad:
    def test_requests_missing_required_env_and_continues(self, tmp_path, monkeypatch):
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@ -173,10 +173,6 @@ def _build_child_agent(
    from run_agent import AIAgent
    import model_tools

-    # Save the parent's resolved tool names before the child agent can
-    # overwrite the process-global via get_tool_definitions().
-    _saved_tool_names = list(model_tools._last_resolved_tool_names)
-
    # When no explicit toolsets given, inherit from parent's enabled toolsets
    # so disabled tools (e.g. web) don't leak to subagents.
    if toolsets:
@ -268,6 +264,13 @@ def _run_single_child(
    # Get the progress callback from the child agent
    child_progress_cb = getattr(child, 'tool_progress_callback', None)

+    # Save the parent's resolved tool names before the child agent can
+    # overwrite the process-global via get_tool_definitions().
+    # This must be in _run_single_child (not _build_child_agent) so the
+    # save/restore happens in the same scope as the try/finally.
+    import model_tools
+    _saved_tool_names = list(model_tools._last_resolved_tool_names)
+
    try:
        result = child.run_conversation(user_message=goal)

--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@ -1238,6 +1238,57 @@ def _convert_mcp_schema(server_name: str, mcp_tool) -> dict:
    }


+def _sync_mcp_toolsets(server_names: Optional[List[str]] = None) -> None:
+    """Expose each MCP server as a standalone toolset and inject into hermes-* sets.
+
+    Creates a real toolset entry in TOOLSETS for each server name (e.g.
+    TOOLSETS["github"] = {"tools": ["mcp_github_list_files", ...]}). This
+    makes raw server names resolvable in platform_toolsets overrides.
+
+    Also injects all MCP tools into hermes-* umbrella toolsets for the
+    default behavior.
+
+    Skips server names that collide with built-in toolsets.
+    """
+    from toolsets import TOOLSETS
+
+    if server_names is None:
+        server_names = list(_load_mcp_config().keys())
+
+    existing = _existing_tool_names()
+    all_mcp_tools: List[str] = []
+
+    for server_name in server_names:
+        safe_prefix = f"mcp_{server_name.replace('-', '_').replace('.', '_')}_"
+        server_tools = sorted(
+            t for t in existing if t.startswith(safe_prefix)
+        )
+        all_mcp_tools.extend(server_tools)
+
+        # Don't overwrite a built-in toolset that happens to share the name.
+        existing_ts = TOOLSETS.get(server_name)
+        if existing_ts and not str(existing_ts.get("description", "")).startswith("MCP server '"):
+            logger.warning(
+                "Skipping MCP toolset alias '%s' — a built-in toolset already uses that name",
+                server_name,
+            )
+            continue
+
+        TOOLSETS[server_name] = {
+            "description": f"MCP server '{server_name}' tools",
+            "tools": server_tools,
+            "includes": [],
+        }
+
+    # Also inject into hermes-* umbrella toolsets for default behavior.
+    for ts_name, ts in TOOLSETS.items():
+        if not ts_name.startswith("hermes-"):
+            continue
+        for tool_name in all_mcp_tools:
+            if tool_name not in ts["tools"]:
+                ts["tools"].append(tool_name)
+
+
 def _build_utility_schemas(server_name: str) -> List[dict]:
    """Build schemas for the MCP utility tools (resources & prompts).

@ -1523,6 +1574,7 @@ def discover_mcp_tools() -> List[str]:
        }

    if not new_servers:
+        _sync_mcp_toolsets(list(servers.keys()))
        return _existing_tool_names()

    # Start the background event loop for MCP connections
@ -1562,14 +1614,7 @@ def discover_mcp_tools() -> List[str]:
    # The outer timeout is generous: 120s total for parallel discovery.
    _run_on_mcp_loop(_discover_all(), timeout=120)

-    if all_tools:
-        # Dynamically inject into all hermes-* platform toolsets
-        from toolsets import TOOLSETS
-        for ts_name, ts in TOOLSETS.items():
-            if ts_name.startswith("hermes-"):
-                for tool_name in all_tools:
-                    if tool_name not in ts["tools"]:
-                        ts["tools"].append(tool_name)
+    _sync_mcp_toolsets(list(servers.keys()))

    # Print summary
    total_servers = len(new_servers)
--- a/tools/neutts_synth.py
+++ b/tools/neutts_synth.py
@ -8,7 +8,7 @@ Usage:
    python -m tools.neutts_synth --text "Hello" --out output.wav \
        --ref-audio samples/jo.wav --ref-text samples/jo.txt

-Requires: pip install neutts[all]
+Requires: python -m pip install -U neutts[all]
 System:   apt install espeak-ng  (or brew install espeak-ng)
 """

@ -75,7 +75,7 @@ def main():
    try:
        from neutts import NeuTTS
    except ImportError:
-        print("Error: neutts not installed. Run: pip install neutts[all]", file=sys.stderr)
+        print("Error: neutts not installed. Run: python -m pip install -U neutts[all]", file=sys.stderr)
        sys.exit(1)

    tts = NeuTTS(
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@ -1009,7 +1009,7 @@ async def rl_list_runs() -> str:
 TEST_MODELS = [
    {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
    {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
-    {"id": "minimax/minimax-m2.5", "name": "MiniMax M2.5", "scale": "large"},
+    {"id": "minimax/minimax-m2.7", "name": "MiniMax M2.7", "scale": "large"},
 ]

 # Default test parameters - quick but representative
@ -1370,7 +1370,7 @@ RL_CHECK_STATUS_SCHEMA = {"name": "rl_check_status", "description": "Get status
 RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}}
 RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}}
 RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}}
-RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.5"}}, "required": []}}
+RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.7"}}, "required": []}}

 _rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"]

--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@ -920,6 +920,20 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
                ensure_ascii=False,
            )

+        # Check if the skill is disabled by the user
+        resolved_name = parsed_frontmatter.get("name", skill_md.parent.name)
+        if _is_skill_disabled(resolved_name):
+            return json.dumps(
+                {
+                    "success": False,
+                    "error": (
+                        f"Skill '{resolved_name}' is disabled. "
+                        "Enable it with `hermes skills` or inspect the files directly on disk."
+                    ),
+                },
+                ensure_ascii=False,
+            )
+
        # If a specific file path is requested, read that instead
        if file_path and skill_dir:
            # Security: Prevent path traversal attacks
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@ -423,8 +423,8 @@ def text_to_speech_tool(
            if not _check_neutts_available():
                return json.dumps({
                    "success": False,
-                    "error": "NeuTTS provider selected but neutts_cli is not installed. "
-                             "Install the NeuTTS skill and run the bootstrap helper first."
+                    "error": "NeuTTS provider selected but neutts is not installed. "
+                             "Run hermes setup and choose NeuTTS, or install espeak-ng and run python -m pip install -U neutts[all]."
                }, ensure_ascii=False)
            logger.info("Generating speech with NeuTTS (local)...")
            _generate_neutts(text, file_str, tts_config)
--- a/website/docs/guides/use-mcp-with-hermes.md
+++ b/website/docs/guides/use-mcp-with-hermes.md
@ -37,8 +37,13 @@ That last part matters. Good MCP usage is not just “connect everything.” It

 ## Step 1: install MCP support

+If you installed Hermes with the standard install script, MCP support is already included (the installer runs `uv pip install -e ".[all]"`).
+
+If you installed without extras and need to add MCP separately:
+
 ```bash
-pip install hermes-agent[mcp]
+cd ~/.hermes/hermes-agent
+uv pip install -e ".[mcp]"
 ```

 For npm-based servers, make sure Node.js and `npx` are available.
--- a/website/docs/guides/use-voice-mode-with-hermes.md
+++ b/website/docs/guides/use-voice-mode-with-hermes.md
@ -72,6 +72,12 @@ pip install hermes-agent[messaging]
 pip install hermes-agent[tts-premium]
 ```

+### Local NeuTTS (optional)
+
+```bash
+python -m pip install -U neutts[all]
+```
+
 ### Everything

 ```bash
@ -84,18 +90,21 @@ pip install hermes-agent[all]

 ```bash
 brew install portaudio ffmpeg opus
+brew install espeak-ng
 ```

 ### Ubuntu / Debian

 ```bash
 sudo apt install portaudio19-dev ffmpeg libopus0
+sudo apt install espeak-ng
 ```

 Why these matter:
 - `portaudio` → microphone input / playback for CLI voice mode
 - `ffmpeg` → audio conversion for TTS and messaging delivery
 - `opus` → Discord voice codec support
+- `espeak-ng` → phonemizer backend for NeuTTS

 ## Step 4: choose STT and TTS providers

@ -133,9 +142,20 @@ ELEVENLABS_API_KEY=***
 #### Text-to-speech

 - `edge` → free and good enough for most users
+- `neutts` → free local/on-device TTS
 - `elevenlabs` → best quality
 - `openai` → good middle ground

+### If you use `hermes setup`
+
+If you choose NeuTTS in the setup wizard, Hermes checks whether `neutts` is already installed. If it is missing, the wizard tells you NeuTTS needs the Python package `neutts` and the system package `espeak-ng`, offers to install them for you, installs `espeak-ng` with your platform package manager, and then runs:
+
+```bash
+python -m pip install -U neutts[all]
+```
+
+If you skip that install or it fails, the wizard falls back to Edge TTS.
+
 ## Step 5: recommended config

 ```yaml
@ -159,6 +179,18 @@ tts:

 This is a good conservative default for most people.

+If you want local TTS instead, switch the `tts` block to:
+
+```yaml
+tts:
+  provider: "neutts"
+  neutts:
+    ref_audio: ''
+    ref_text: ''
+    model: neuphonic/neutts-air-q4-gguf
+    device: cpu
+```
+
 ## Use case 1: CLI voice mode

 ## Turn it on
--- a/website/docs/reference/faq.md
+++ b/website/docs/reference/faq.md
@ -372,8 +372,8 @@ hermes chat --continue

 **Solution:**
 ```bash
-# Ensure MCP dependencies are installed
-pip install hermes-agent[mcp]
+# Ensure MCP dependencies are installed (already included in standard install)
+cd ~/.hermes/hermes-agent && uv pip install -e ".[mcp]"

 # For npm-based servers, ensure Node.js is available
 node --version
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@ -186,11 +186,11 @@ hermes chat --provider kimi-coding --model moonshot-v1-auto
 # Requires: KIMI_API_KEY in ~/.hermes/.env

 # MiniMax (global endpoint)
-hermes chat --provider minimax --model MiniMax-Text-01
+hermes chat --provider minimax --model MiniMax-M2.7
 # Requires: MINIMAX_API_KEY in ~/.hermes/.env

 # MiniMax (China endpoint)
-hermes chat --provider minimax-cn --model MiniMax-Text-01
+hermes chat --provider minimax-cn --model MiniMax-M2.7
 # Requires: MINIMAX_CN_API_KEY in ~/.hermes/.env

 # Alibaba Cloud / DashScope (Qwen models)
@ -984,7 +984,7 @@ You can also change the reasoning effort at runtime with the `/reasoning` comman

 ```yaml
 tts:
-  provider: "edge"              # "edge" | "elevenlabs" | "openai"
+  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "neutts"
  edge:
    voice: "en-US-AriaNeural"   # 322 voices, 74 languages
  elevenlabs:
@ -993,6 +993,11 @@ tts:
  openai:
    model: "gpt-4o-mini-tts"
    voice: "alloy"              # alloy, echo, fable, onyx, nova, shimmer
+  neutts:
+    ref_audio: ''
+    ref_text: ''
+    model: neuphonic/neutts-air-q4-gguf
+    device: cpu
 ```

 This controls both the `text_to_speech` tool and spoken replies in voice mode (`/voice tts` in the CLI or messaging gateway).
@ -1140,6 +1145,21 @@ group_sessions_per_user: true  # true = per-user isolation in groups/channels, f

 For the behavior details and examples, see [Sessions](/docs/user-guide/sessions) and the [Discord guide](/docs/user-guide/messaging/discord).

+## Unauthorized DM Behavior
+
+Control what Hermes does when an unknown user sends a direct message:
+
+```yaml
+unauthorized_dm_behavior: pair
+
+whatsapp:
+  unauthorized_dm_behavior: ignore
+```
+
+- `pair` is the default. Hermes denies access, but replies with a one-time pairing code in DMs.
+- `ignore` silently drops unauthorized DMs.
+- Platform sections override the global default, so you can keep pairing enabled broadly while making one platform quieter.
+
 ## Quick Commands

 Define custom commands that run shell commands without invoking the LLM — zero token usage, instant execution. Especially useful from messaging platforms (Telegram, Discord, etc.) for quick server checks or utility scripts.
--- a/website/docs/user-guide/features/mcp.md
+++ b/website/docs/user-guide/features/mcp.md
@ -20,10 +20,11 @@ If you have ever wanted Hermes to use a tool that already exists somewhere else,

 ## Quick start

-1. Install MCP support:
+1. Install MCP support (already included if you used the standard install script):

 ```bash
-pip install hermes-agent[mcp]
+cd ~/.hermes/hermes-agent
+uv pip install -e ".[mcp]"
 ```

 2. Add an MCP server to `~/.hermes/config.yaml`:
@ -374,7 +375,9 @@ Inspect the project root and explain the directory layout.
 Check:

 ```bash
-pip install hermes-agent[mcp]
+# Verify MCP deps are installed (already included in standard install)
+cd ~/.hermes/hermes-agent && uv pip install -e ".[mcp]"
+
 node --version
 npx --version
 ```
--- a/website/docs/user-guide/features/rl-training.md
+++ b/website/docs/user-guide/features/rl-training.md
@ -147,7 +147,7 @@ Default configuration:
 - Tests 3 models at different scales for robustness:
  - `qwen/qwen3-8b` (small)
  - `z-ai/glm-4.7-flash` (medium)
-  - `minimax/minimax-m2.5` (large)
+  - `minimax/minimax-m2.7` (large)
 - Total: ~144 rollouts

 This validates:
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@ -10,13 +10,14 @@ Hermes Agent supports both text-to-speech output and voice message transcription

 ## Text-to-Speech

-Convert text to speech with three providers:
+Convert text to speech with four providers:

 | Provider | Quality | Cost | API Key |
 |----------|---------|------|---------|
 | **Edge TTS** (default) | Good | Free | None needed |
 | **ElevenLabs** | Excellent | Paid | `ELEVENLABS_API_KEY` |
 | **OpenAI TTS** | Good | Paid | `VOICE_TOOLS_OPENAI_KEY` |
+| **NeuTTS** | Good | Free | None needed |

 ### Platform Delivery

@ -32,7 +33,7 @@ Convert text to speech with three providers:
 ```yaml
 # In ~/.hermes/config.yaml
 tts:
-  provider: "edge"              # "edge" | "elevenlabs" | "openai"
+  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "neutts"
  edge:
    voice: "en-US-AriaNeural"   # 322 voices, 74 languages
  elevenlabs:
@ -41,6 +42,11 @@ tts:
  openai:
    model: "gpt-4o-mini-tts"
    voice: "alloy"              # alloy, echo, fable, onyx, nova, shimmer
+  neutts:
+    ref_audio: ''
+    ref_text: ''
+    model: neuphonic/neutts-air-q4-gguf
+    device: cpu
 ```

 ### Telegram Voice Bubbles & ffmpeg
@ -49,6 +55,7 @@ Telegram voice bubbles require Opus/OGG audio format:

 - **OpenAI and ElevenLabs** produce Opus natively — no extra setup
 - **Edge TTS** (default) outputs MP3 and needs **ffmpeg** to convert:
+- **NeuTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles

 ```bash
 # Ubuntu/Debian
@ -61,7 +68,7 @@ brew install ffmpeg
 sudo dnf install ffmpeg
 ```

-Without ffmpeg, Edge TTS audio is sent as a regular audio file (playable, but shows as a rectangular player instead of a voice bubble).
+Without ffmpeg, Edge TTS and NeuTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble).

 :::tip
 If you want voice bubbles without installing ffmpeg, switch to the OpenAI or ElevenLabs provider.
--- a/website/docs/user-guide/features/voice-mode.md
+++ b/website/docs/user-guide/features/voice-mode.md
@ -44,6 +44,9 @@ pip install hermes-agent[messaging]
 # Premium TTS (ElevenLabs)
 pip install hermes-agent[tts-premium]

+# Local TTS (NeuTTS, optional)
+python -m pip install -U neutts[all]
+
 # Everything at once
 pip install hermes-agent[all]
 ```
@ -54,6 +57,8 @@ pip install hermes-agent[all]
 | `messaging` | `discord.py[voice]`, `python-telegram-bot`, `aiohttp` | Discord & Telegram bots |
 | `tts-premium` | `elevenlabs` | ElevenLabs TTS provider |

+Optional local TTS provider: install `neutts` separately with `python -m pip install -U neutts[all]`. On first use it downloads the model automatically.
+
 :::info
 `discord.py[voice]` installs **PyNaCl** (for voice encryption) and **opus bindings** automatically. This is required for Discord voice channel support.
 :::
@ -63,9 +68,11 @@ pip install hermes-agent[all]
 ```bash
 # macOS
 brew install portaudio ffmpeg opus
+brew install espeak-ng   # for NeuTTS

 # Ubuntu/Debian
 sudo apt install portaudio19-dev ffmpeg libopus0
+sudo apt install espeak-ng   # for NeuTTS
 ```

 | Dependency | Purpose | Required For |
@ -73,6 +80,7 @@ sudo apt install portaudio19-dev ffmpeg libopus0
 | **PortAudio** | Microphone input and audio playback | CLI voice mode |
 | **ffmpeg** | Audio format conversion (MP3 → Opus, PCM → WAV) | All platforms |
 | **Opus** | Discord voice codec | Discord voice channels |
+| **espeak-ng** | Phonemizer backend | Local NeuTTS provider |

 ### API Keys

@ -84,8 +92,9 @@ Add to `~/.hermes/.env`:
 GROQ_API_KEY=your-key                 # Groq Whisper — fast, free tier (cloud)
 VOICE_TOOLS_OPENAI_KEY=your-key       # OpenAI Whisper — paid (cloud)

-# Text-to-Speech (optional — Edge TTS works without any key)
-ELEVENLABS_API_KEY=your-key           # ElevenLabs — premium quality
+# Text-to-Speech (optional — Edge TTS and NeuTTS work without any key)
+ELEVENLABS_API_KEY=***           # ElevenLabs — premium quality
+# VOICE_TOOLS_OPENAI_KEY above also enables OpenAI TTS
 ```

 :::tip
@ -303,8 +312,9 @@ DISCORD_ALLOWED_USERS=your-user-id
 # STT — local provider needs no key (pip install faster-whisper)
 # GROQ_API_KEY=your-key            # Alternative: cloud-based, fast, free tier

-# TTS — optional, Edge TTS (free) is the default
-# ELEVENLABS_API_KEY=your-key      # Premium quality
+# TTS — optional. Edge TTS and NeuTTS need no key.
+# ELEVENLABS_API_KEY=***      # Premium quality
+# VOICE_TOOLS_OPENAI_KEY=***  # OpenAI TTS / Whisper
 ```

 ### Start the Gateway
@ -385,7 +395,7 @@ stt:

 # Text-to-Speech
 tts:
-  provider: "edge"                 # "edge" (free) | "elevenlabs" | "openai"
+  provider: "edge"                 # "edge" (free) | "elevenlabs" | "openai" | "neutts"
  edge:
    voice: "en-US-AriaNeural"      # 322 voices, 74 languages
  elevenlabs:
@ -394,6 +404,11 @@ tts:
  openai:
    model: "gpt-4o-mini-tts"
    voice: "alloy"                 # alloy, echo, fable, onyx, nova, shimmer
+  neutts:
+    ref_audio: ''
+    ref_text: ''
+    model: neuphonic/neutts-air-q4-gguf
+    device: cpu
 ```

 ### Environment Variables
@ -410,9 +425,9 @@ STT_OPENAI_MODEL=whisper-1               # Override default OpenAI STT model
 GROQ_BASE_URL=https://api.groq.com/openai/v1     # Custom Groq endpoint
 STT_OPENAI_BASE_URL=https://api.openai.com/v1    # Custom OpenAI STT endpoint

-# Text-to-Speech providers (Edge TTS needs no key)
-ELEVENLABS_API_KEY=...             # ElevenLabs (premium quality)
-# OpenAI TTS uses VOICE_TOOLS_OPENAI_KEY
+# Text-to-Speech providers (Edge TTS and NeuTTS need no key)
+ELEVENLABS_API_KEY=***             # ElevenLabs (premium quality)
+# VOICE_TOOLS_OPENAI_KEY above also enables OpenAI TTS

 # Discord voice channel
 DISCORD_BOT_TOKEN=...
@ -440,6 +455,9 @@ Provider priority (automatic fallback): **local** > **groq** > **openai**
 | **Edge TTS** | Good | Free | ~1s | No |
 | **ElevenLabs** | Excellent | Paid | ~2s | Yes |
 | **OpenAI TTS** | Good | Paid | ~1.5s | Yes |
+| **NeuTTS** | Good | Free | Depends on CPU/GPU | No |
+
+NeuTTS uses the `tts.neutts` config block above.

 ---

--- a/website/docs/user-guide/messaging/whatsapp.md
+++ b/website/docs/user-guide/messaging/whatsapp.md
@ -97,6 +97,18 @@ WHATSAPP_MODE=bot                          # "bot" or "self-chat"
 WHATSAPP_ALLOWED_USERS=15551234567         # Comma-separated phone numbers (with country code, no +)
 ```

+Optional behavior settings in `~/.hermes/config.yaml`:
+
+```yaml
+unauthorized_dm_behavior: pair
+
+whatsapp:
+  unauthorized_dm_behavior: ignore
+```
+
+- `unauthorized_dm_behavior: pair` is the global default. Unknown DM senders get a pairing code.
+- `whatsapp.unauthorized_dm_behavior: ignore` makes WhatsApp stay silent for unauthorized DMs, which is usually the better choice for a private number.
+
 Then start the gateway:

 ```bash
@ -162,6 +174,7 @@ whatsapp:
 | **Bridge crashes or reconnect loops** | Restart the gateway, update Hermes, and re-pair if the session was invalidated by a WhatsApp protocol change. |
 | **Bot stops working after WhatsApp update** | Update Hermes to get the latest bridge version, then re-pair. |
 | **Messages not being received** | Verify `WHATSAPP_ALLOWED_USERS` includes the sender's number (with country code, no `+` or spaces). |
+| **Bot replies to strangers with a pairing code** | Set `whatsapp.unauthorized_dm_behavior: ignore` in `~/.hermes/config.yaml` if you want unauthorized DMs to be silently ignored instead. |

 ---

@ -173,6 +186,13 @@ of authorized users. Without this setting, the gateway will **deny all incoming
 safety measure.
 :::

+By default, unauthorized DMs still receive a pairing code reply. If you want a private WhatsApp number to stay completely silent to strangers, set:
+
+```yaml
+whatsapp:
+  unauthorized_dm_behavior: ignore
+```
+
 - The `~/.hermes/whatsapp/session` directory contains full session credentials — protect it like a password
 - Set file permissions: `chmod 700 ~/.hermes/whatsapp/session`
 - Use a **dedicated phone number** for the bot to isolate risk from your personal account
--- a/website/docs/user-guide/security.md
+++ b/website/docs/user-guide/security.md
@ -151,6 +151,19 @@ For more flexible authorization, Hermes includes a code-based pairing system. In
 3. The bot owner runs `hermes pairing approve <platform> <code>` on the CLI
 4. The user is permanently approved for that platform

+Control how unauthorized direct messages are handled in `~/.hermes/config.yaml`:
+
+```yaml
+unauthorized_dm_behavior: pair
+
+whatsapp:
+  unauthorized_dm_behavior: ignore
+```
+
+- `pair` is the default. Unauthorized DMs get a pairing code reply.
+- `ignore` silently drops unauthorized DMs.
+- Platform sections override the global default, so you can keep pairing on Telegram while keeping WhatsApp silent.
+
 **Security features** (based on OWASP + NIST SP 800-63-4 guidance):

 | Feature | Details |