Merge remote-tracking branch 'origin/main' into fix/bundle-size

2026-05-18 04:41:56 +00:00 · 2026-05-11 16:01:00 -04:00 · 2026-05-11 16:01:00 -04:00 · 3197b4de6d
commit 3197b4de6d
parent 9d645d98c4 ce0f529cde
1437 changed files with 219762 additions and 11968 deletions
--- a/tests/hermes_cli/conftest.py
+++ b/tests/hermes_cli/conftest.py
@ -0,0 +1,19 @@
+"""Fixtures shared across hermes_cli kanban tests."""
+
+from __future__ import annotations
+
+import pytest
+
+
+@pytest.fixture
+def all_assignees_spawnable(monkeypatch):
+    """Pretend every assignee maps to a real Hermes profile.
+
+    Most dispatcher tests use synthetic assignees ("alice", "bob") that
+    don't correspond to actual profile directories on disk. Without this
+    patch, the dispatcher's profile-exists guard (PR #20105) routes
+    those tasks into ``skipped_nonspawnable`` instead of spawning, which
+    would break tests that assert spawn behavior.
+    """
+    from hermes_cli import profiles
+    monkeypatch.setattr(profiles, "profile_exists", lambda name: True)
--- a/tests/hermes_cli/test_apply_profile_override.py
+++ b/tests/hermes_cli/test_apply_profile_override.py
@ -0,0 +1,141 @@
+"""Regression tests for _apply_profile_override HERMES_HOME guard (issue #22502).
+
+When HERMES_HOME is set to the hermes root (e.g. systemd hardcodes
+HERMES_HOME=/root/.hermes), _apply_profile_override must still read
+active_profile and update HERMES_HOME to the profile directory.
+
+When HERMES_HOME is already a profile directory (.../profiles/<name>),
+_apply_profile_override must trust it and return without re-reading
+active_profile (child-process inheritance contract).
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+
+import pytest
+
+
+def _run_apply_profile_override(
+    tmp_path, monkeypatch, *, hermes_home: str | None, active_profile: str | None,
+    argv: list[str] | None = None,
+):
+    """Run _apply_profile_override in isolation.
+
+    Returns the value of os.environ["HERMES_HOME"] after the call,
+    or None if unset.
+    """
+    hermes_root = tmp_path / ".hermes"
+    hermes_root.mkdir(parents=True, exist_ok=True)
+
+    if active_profile is not None:
+        (hermes_root / "active_profile").write_text(active_profile)
+
+    if active_profile and active_profile != "default":
+        (hermes_root / "profiles" / active_profile).mkdir(parents=True, exist_ok=True)
+
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    if hermes_home is not None:
+        monkeypatch.setenv("HERMES_HOME", hermes_home)
+    else:
+        monkeypatch.delenv("HERMES_HOME", raising=False)
+
+    monkeypatch.setattr(sys, "argv", argv or ["hermes", "gateway", "start"])
+
+    from hermes_cli.main import _apply_profile_override
+    _apply_profile_override()
+
+    return os.environ.get("HERMES_HOME")
+
+
+class TestApplyProfileOverrideHermesHomeGuard:
+    """Regression guard for issue #22502.
+
+    Verifies that HERMES_HOME pointing to the hermes root does NOT suppress
+    the active_profile check, while HERMES_HOME already pointing to a
+    profile directory IS trusted as-is.
+    """
+
+    def test_hermes_home_at_root_with_active_profile_is_redirected(
+        self, tmp_path, monkeypatch
+    ):
+        """HERMES_HOME=/root/.hermes + active_profile=coder must redirect
+        HERMES_HOME to .../profiles/coder.
+
+        Bug scenario from #22502: systemd sets HERMES_HOME to the hermes root
+        and the user switches to a profile via `hermes profile use`.
+        Before the fix, the guard returned early and active_profile was ignored.
+        """
+        hermes_root = tmp_path / ".hermes"
+        hermes_root.mkdir(parents=True, exist_ok=True)
+
+        result = _run_apply_profile_override(
+            tmp_path,
+            monkeypatch,
+            hermes_home=str(hermes_root),
+            active_profile="coder",
+        )
+
+        assert result is not None, "HERMES_HOME must be set after profile redirect"
+        assert "profiles" in result, (
+            f"Expected HERMES_HOME to point into profiles/ dir, got: {result!r}"
+        )
+        assert result.endswith("coder"), (
+            f"Expected HERMES_HOME to end with 'coder', got: {result!r}"
+        )
+
+    def test_hermes_home_already_profile_dir_is_trusted(self, tmp_path, monkeypatch):
+        """HERMES_HOME=.../profiles/coder must not be overridden even when
+        active_profile says something different.
+
+        Preserves the child-process inheritance contract: a subprocess spawned
+        with HERMES_HOME already set to a specific profile must stay in that
+        profile.
+        """
+        hermes_root = tmp_path / ".hermes"
+        profile_dir = hermes_root / "profiles" / "coder"
+        profile_dir.mkdir(parents=True, exist_ok=True)
+
+        (hermes_root / "active_profile").write_text("other")
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        monkeypatch.setenv("HERMES_HOME", str(profile_dir))
+        monkeypatch.setattr(sys, "argv", ["hermes", "gateway", "start"])
+
+        from hermes_cli.main import _apply_profile_override
+        _apply_profile_override()
+
+        assert os.environ.get("HERMES_HOME") == str(profile_dir), (
+            "HERMES_HOME must remain unchanged when already pointing to a profile dir"
+        )
+
+    def test_hermes_home_unset_reads_active_profile(self, tmp_path, monkeypatch):
+        """Classic case: HERMES_HOME unset + active_profile=coder must set
+        HERMES_HOME to the profile directory (existing behaviour must not regress).
+        """
+        result = _run_apply_profile_override(
+            tmp_path,
+            monkeypatch,
+            hermes_home=None,
+            active_profile="coder",
+        )
+
+        assert result is not None
+        assert "coder" in result
+
+    def test_hermes_home_unset_default_profile_no_redirect(self, tmp_path, monkeypatch):
+        """active_profile=default must not redirect HERMES_HOME."""
+        hermes_root = tmp_path / ".hermes"
+        hermes_root.mkdir(parents=True, exist_ok=True)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        monkeypatch.delenv("HERMES_HOME", raising=False)
+        monkeypatch.setattr(sys, "argv", ["hermes", "gateway", "start"])
+        (hermes_root / "active_profile").write_text("default")
+
+        from hermes_cli.main import _apply_profile_override
+        _apply_profile_override()
+
+        assert os.environ.get("HERMES_HOME") is None
--- a/tests/hermes_cli/test_auth_commands.py
+++ b/tests/hermes_cli/test_auth_commands.py
@ -5,8 +5,10 @@ from __future__ import annotations
 import base64
 import json
 from datetime import datetime, timezone
+from unittest.mock import patch

 import pytest
+import yaml


 def _write_auth_store(tmp_path, payload: dict) -> None:
@ -589,6 +591,39 @@ def test_logout_clears_stale_active_codex_without_provider_credentials(tmp_path,
    assert "provider: auto" in config_text


+def test_reset_config_provider_uses_atomic_yaml_write(tmp_path, monkeypatch):
+    """Logout config reset should delegate the YAML write atomically."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    config_path = hermes_home / "config.yaml"
+    original = {
+        "model": {
+            "default": "gpt-5.3-codex",
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+        }
+    }
+    config_path.write_text(yaml.safe_dump(original, sort_keys=False), encoding="utf-8")
+    original_text = config_path.read_text(encoding="utf-8")
+
+    from hermes_cli.auth import _reset_config_provider
+
+    def _boom(path, data, **kwargs):
+        assert path == config_path
+        assert data["model"]["provider"] == "auto"
+        assert data["model"]["base_url"] == "https://openrouter.ai/api/v1"
+        assert kwargs["sort_keys"] is False
+        raise OSError("simulated atomic write failure")
+
+    with patch("hermes_cli.auth.atomic_yaml_write", side_effect=_boom) as mock_write:
+        with pytest.raises(OSError, match="simulated atomic write failure"):
+            _reset_config_provider()
+
+    assert mock_write.call_count == 1
+    assert config_path.read_text(encoding="utf-8") == original_text
+
+
 def test_auth_list_does_not_call_mutating_select(monkeypatch, capsys):
    from hermes_cli.auth_commands import auth_list_command

--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@ -1,7 +1,6 @@
 """Regression tests for Nous OAuth refresh + agent-key mint interactions."""

 import json
-import os
 from datetime import datetime, timezone
 from pathlib import Path

@ -76,6 +75,20 @@ class TestResolveVerifyFallback:
        )
        assert result is False

+    def test_string_false_in_auth_state_does_not_disable_tls_verify(self):
+        import ssl
+        from hermes_cli.auth import _resolve_verify
+
+        result = _resolve_verify(auth_state={"tls": {"insecure": "false"}})
+        assert result is not False
+        assert result is True or isinstance(result, ssl.SSLContext)
+
+    def test_string_true_in_auth_state_disables_tls_verify(self):
+        from hermes_cli.auth import _resolve_verify
+
+        result = _resolve_verify(auth_state={"tls": {"insecure": "true"}})
+        assert result is False
+
    def test_no_ca_bundle_returns_true(self, monkeypatch):
        from hermes_cli.auth import _resolve_verify

@ -848,6 +861,46 @@ def test_refresh_token_reuse_detection_surfaces_actionable_message():
    assert exc_info.value.relogin_required is True


+def test_refresh_token_exchange_sends_refresh_token_header():
+    """Nous refresh tokens must be sent in a header so sandbox proxies can
+    substitute placeholder credentials without parsing form bodies.
+    """
+    from hermes_cli.auth import _refresh_access_token
+
+    class _FakeResponse:
+        status_code = 200
+
+        def json(self):
+            return {"access_token": "access-2", "refresh_token": "refresh-2"}
+
+    class _FakeClient:
+        def __init__(self):
+            self.kwargs = None
+
+        def post(self, *args, **kwargs):
+            del args
+            self.kwargs = kwargs
+            return _FakeResponse()
+
+    client = _FakeClient()
+
+    payload = _refresh_access_token(
+        client=client,
+        portal_base_url="https://portal.nousresearch.com",
+        client_id="hermes-cli",
+        refresh_token="refresh-1",
+    )
+
+    assert payload["access_token"] == "access-2"
+    assert payload["refresh_token"] == "refresh-2"
+    assert client.kwargs is not None
+    assert client.kwargs["headers"]["x-nous-refresh-token"] == "refresh-1"
+    assert client.kwargs["data"] == {
+        "grant_type": "refresh_token",
+        "client_id": "hermes-cli",
+    }
+
+
 def test_refresh_non_reuse_error_keeps_original_description():
    """Non-reuse invalid_grant errors must keep their original description untouched.

@ -882,3 +935,370 @@ def test_refresh_non_reuse_error_keeps_original_description():
    assert "Refresh session has been revoked" in str(exc_info.value)
    # Must not have been rewritten with the reuse message.
    assert "external process" not in str(exc_info.value).lower()
+
+
+# =============================================================================
+# Shared Nous token store — cross-profile persistence (Codex-style auto-import)
+# =============================================================================
+
+
+@pytest.fixture
+def shared_store_env(tmp_path, monkeypatch):
+    """Redirect HERMES_SHARED_AUTH_DIR to a tmp_path.
+
+    Required for every test that exercises the shared Nous store — the
+    in-auth.py seat belt refuses to touch the real user's shared store
+    under pytest, so tests that forget this fixture fail loudly instead
+    of corrupting real state.
+    """
+    shared_dir = tmp_path / "shared"
+    monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(shared_dir))
+    return shared_dir
+
+
+def test_shared_store_seat_belt_refuses_real_home_under_pytest(monkeypatch):
+    """Without HERMES_SHARED_AUTH_DIR override, the seat belt must trip.
+
+    Mirrors the existing ``_auth_file_path`` seat belt: forgetting to
+    redirect this store in a test must fail loudly instead of silently
+    writing to the user's real ``~/.hermes/shared/`` across CI runs.
+    """
+    from hermes_cli.auth import _nous_shared_store_path
+
+    monkeypatch.delenv("HERMES_SHARED_AUTH_DIR", raising=False)
+
+    with pytest.raises(RuntimeError, match="shared Nous auth store"):
+        _nous_shared_store_path()
+
+
+def test_shared_store_honors_env_override(tmp_path, monkeypatch):
+    """HERMES_SHARED_AUTH_DIR must redirect the path."""
+    from hermes_cli.auth import _nous_shared_store_path, NOUS_SHARED_STORE_FILENAME
+
+    custom_dir = tmp_path / "custom_shared"
+    monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(custom_dir))
+
+    path = _nous_shared_store_path()
+    assert path == custom_dir / NOUS_SHARED_STORE_FILENAME
+
+
+def test_shared_store_read_missing_returns_none(shared_store_env):
+    """Missing file → ``_read_shared_nous_state()`` returns None."""
+    from hermes_cli.auth import _read_shared_nous_state
+
+    assert _read_shared_nous_state() is None
+
+
+def test_shared_store_read_malformed_returns_none(shared_store_env):
+    """Unreadable / non-JSON file → None, not an exception."""
+    from hermes_cli.auth import _nous_shared_store_path, _read_shared_nous_state
+
+    path = _nous_shared_store_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text("{ not json")
+
+    assert _read_shared_nous_state() is None
+
+
+def test_shared_store_read_missing_required_fields_returns_none(shared_store_env):
+    """Payload without refresh_token → None (nothing worth importing)."""
+    from hermes_cli.auth import _nous_shared_store_path, _read_shared_nous_state
+
+    path = _nous_shared_store_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps({"_schema": 1, "access_token": "abc"}))
+
+    assert _read_shared_nous_state() is None
+
+
+def test_shared_store_write_and_read_roundtrip(shared_store_env):
+    """Write → read must preserve refresh_token + OAuth URLs."""
+    from hermes_cli.auth import (
+        _nous_shared_store_path,
+        _read_shared_nous_state,
+        _write_shared_nous_state,
+    )
+
+    _write_shared_nous_state(_full_state_fixture())
+
+    path = _nous_shared_store_path()
+    assert path.is_file()
+
+    # Permissions should be 0600 where the platform supports it.
+    mode = path.stat().st_mode & 0o777
+    assert mode == 0o600 or mode == 0o644  # 0o644 on platforms without chmod
+
+    loaded = _read_shared_nous_state()
+    assert loaded is not None
+    assert loaded["refresh_token"] == "refresh-tok"
+    assert loaded["access_token"] == "access-tok"
+    assert loaded["portal_base_url"] == "https://portal.example.com"
+    assert loaded["inference_base_url"] == "https://inference.example.com/v1"
+    # Volatile agent_key MUST NOT be persisted to the shared store
+    # (24h TTL, profile-specific — only long-lived OAuth tokens are
+    # cross-profile useful).
+    assert "agent_key" not in loaded
+
+
+def test_shared_store_write_skips_when_refresh_token_missing(shared_store_env):
+    """Write is a no-op when refresh_token is absent (nothing to share)."""
+    from hermes_cli.auth import _nous_shared_store_path, _write_shared_nous_state
+
+    state = dict(_full_state_fixture())
+    state["refresh_token"] = ""
+
+    _write_shared_nous_state(state)
+
+    assert not _nous_shared_store_path().is_file()
+
+
+def test_persist_nous_credentials_mirrors_to_shared_store(
+    tmp_path, monkeypatch, shared_store_env,
+):
+    """persist_nous_credentials must populate BOTH per-profile auth.json
+    AND the shared store, so a future profile's `hermes auth add nous
+    --type oauth` can one-tap import instead of redoing device-code.
+    """
+    from hermes_cli.auth import (
+        _nous_shared_store_path,
+        _read_shared_nous_state,
+        persist_nous_credentials,
+    )
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(
+        json.dumps({"version": 1, "providers": {}})
+    )
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    persist_nous_credentials(_full_state_fixture())
+
+    # Per-profile auth.json populated
+    payload = json.loads((hermes_home / "auth.json").read_text())
+    assert "nous" in payload.get("providers", {})
+
+    # Shared store populated with the same refresh_token
+    shared = _read_shared_nous_state()
+    assert shared is not None
+    assert shared["refresh_token"] == "refresh-tok"
+
+    # Shared file path lives under the tmp override, NOT the real home
+    assert str(_nous_shared_store_path()).startswith(str(shared_store_env))
+
+
+def test_try_import_shared_returns_none_when_store_missing(shared_store_env):
+    """No shared store → no rehydrate (fall through to device-code)."""
+    from hermes_cli.auth import _try_import_shared_nous_state
+
+    assert _try_import_shared_nous_state() is None
+
+
+def test_try_import_shared_returns_none_on_refresh_failure(
+    shared_store_env, monkeypatch,
+):
+    """If the portal rejects the stored refresh_token (revoked, expired,
+    portal down), _try_import_shared_nous_state must return None so the
+    login flow falls back to a fresh device-code run.
+    """
+    from hermes_cli import auth as auth_mod
+
+    # Seed the shared store
+    auth_mod._write_shared_nous_state(_full_state_fixture())
+
+    # Make refresh fail
+    def _boom(*_args, **_kwargs):
+        raise AuthError(
+            "Refresh session has been revoked",
+            provider="nous",
+            code="invalid_grant",
+            relogin_required=True,
+        )
+
+    monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _boom)
+
+    assert auth_mod._try_import_shared_nous_state() is None
+
+
+def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch):
+    """Happy path: stored refresh_token is accepted, forced refresh+mint
+    returns a fresh access_token + agent_key, and the returned dict has
+    every field persist_nous_credentials() needs.
+    """
+    from hermes_cli import auth as auth_mod
+
+    auth_mod._write_shared_nous_state(_full_state_fixture())
+
+    def _fake_refresh(state, **kwargs):
+        # Simulate portal returning fresh tokens + a new agent_key
+        assert kwargs.get("force_refresh") is True
+        assert kwargs.get("force_mint") is True
+        return {
+            **state,
+            "access_token": "fresh-access-tok",
+            "refresh_token": "fresh-refresh-tok",  # rotated
+            "agent_key": "new-agent-key",
+            "agent_key_expires_at": "2026-04-19T22:00:00+00:00",
+        }
+
+    monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
+
+    result = auth_mod._try_import_shared_nous_state()
+
+    assert result is not None
+    assert result["access_token"] == "fresh-access-tok"
+    assert result["refresh_token"] == "fresh-refresh-tok"
+    assert result["agent_key"] == "new-agent-key"
+    # Preserved from shared state
+    assert result["portal_base_url"] == "https://portal.example.com"
+    assert result["client_id"] == "hermes-cli"
+
+
+def test_shared_store_survives_across_profile_switch(
+    tmp_path, monkeypatch, shared_store_env,
+):
+    """End-to-end: profile A logs in → shared store populated → profile B
+    (different HERMES_HOME) sees the same shared state and can rehydrate
+    without re-running device-code.
+    """
+    from hermes_cli import auth as auth_mod
+
+    # Profile A: login, which mirrors to shared store
+    profile_a = tmp_path / "profile_a"
+    profile_a.mkdir(parents=True, exist_ok=True)
+    (profile_a / "auth.json").write_text(
+        json.dumps({"version": 1, "providers": {}})
+    )
+    monkeypatch.setenv("HERMES_HOME", str(profile_a))
+    auth_mod.persist_nous_credentials(_full_state_fixture())
+
+    # Profile A's auth.json has nous
+    a_payload = json.loads((profile_a / "auth.json").read_text())
+    assert "nous" in a_payload.get("providers", {})
+
+    # Profile B: fresh HERMES_HOME, no auth yet, but the shared store
+    # persists — _read_shared_nous_state() must still return the tokens.
+    profile_b = tmp_path / "profile_b"
+    profile_b.mkdir(parents=True, exist_ok=True)
+    (profile_b / "auth.json").write_text(
+        json.dumps({"version": 1, "providers": {}})
+    )
+    monkeypatch.setenv("HERMES_HOME", str(profile_b))
+
+    # B's own auth.json has no nous
+    b_payload = json.loads((profile_b / "auth.json").read_text())
+    assert "nous" not in b_payload.get("providers", {})
+
+    # But the shared store is visible
+    shared = auth_mod._read_shared_nous_state()
+    assert shared is not None
+    assert shared["refresh_token"] == "refresh-tok"
+
+    # And a successful rehydrate + persist lands nous into profile B
+    def _fake_refresh(state, **kwargs):
+        return {
+            **state,
+            "access_token": "b-access-tok",
+            "refresh_token": "b-refresh-tok",
+            "agent_key": "b-agent-key",
+            "agent_key_expires_at": "2026-04-19T22:00:00+00:00",
+        }
+
+    monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
+    result = auth_mod._try_import_shared_nous_state()
+    assert result is not None
+
+    auth_mod.persist_nous_credentials(result)
+
+    b_payload = json.loads((profile_b / "auth.json").read_text())
+    assert "nous" in b_payload.get("providers", {})
+    assert b_payload["providers"]["nous"]["refresh_token"] == "b-refresh-tok"
+
+    # Shared store was updated with the rotated refresh_token too
+    shared_after = auth_mod._read_shared_nous_state()
+    assert shared_after is not None
+    assert shared_after["refresh_token"] == "b-refresh-tok"
+
+
+def test_runtime_refresh_uses_newer_shared_token_before_local_stale_token(
+    tmp_path, monkeypatch, shared_store_env,
+):
+    """A sibling profile may rotate the single-use Nous refresh token.
+
+    When this profile later wakes with an expired local token, runtime
+    resolution must adopt the shared token before refreshing. Otherwise it
+    can submit the stale local refresh token and trigger portal reuse
+    revocation for the whole shared session.
+    """
+    from hermes_cli import auth as auth_mod
+
+    profile_b = tmp_path / "profile_b"
+    _setup_nous_auth(
+        profile_b,
+        access_token="local-expired-access",
+        refresh_token="local-stale-refresh",
+    )
+    monkeypatch.setenv("HERMES_HOME", str(profile_b))
+
+    shared_state = _full_state_fixture()
+    shared_state["access_token"] = "shared-fresh-access"
+    shared_state["refresh_token"] = "shared-fresh-refresh"
+    shared_state["expires_at"] = "2099-01-01T00:00:00+00:00"
+    auth_mod._write_shared_nous_state(shared_state)
+
+    def _refresh_should_not_happen(**_kwargs):
+        raise AssertionError("stale profile-local refresh token was used")
+
+    minted_with: list[str] = []
+
+    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
+        minted_with.append(access_token)
+        return _mint_payload(api_key="agent-key-from-shared-token")
+
+    monkeypatch.setattr(auth_mod, "_refresh_access_token", _refresh_should_not_happen)
+    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
+
+    creds = auth_mod.resolve_nous_runtime_credentials(
+        min_key_ttl_seconds=300,
+        force_mint=True,
+    )
+
+    assert creds["api_key"] == "agent-key-from-shared-token"
+    assert minted_with == ["shared-fresh-access"]
+
+    profile_state = auth_mod.get_provider_auth_state("nous")
+    assert profile_state is not None
+    assert profile_state["refresh_token"] == "shared-fresh-refresh"
+    assert profile_state["access_token"] == "shared-fresh-access"
+
+
+def test_managed_gateway_access_token_uses_newer_shared_token(
+    tmp_path, monkeypatch, shared_store_env,
+):
+    """Managed-tool token reads share the same stale-refresh-token hazard."""
+    from hermes_cli import auth as auth_mod
+
+    profile_b = tmp_path / "profile_b"
+    _setup_nous_auth(
+        profile_b,
+        access_token="local-expired-access",
+        refresh_token="local-stale-refresh",
+    )
+    monkeypatch.setenv("HERMES_HOME", str(profile_b))
+
+    shared_state = _full_state_fixture()
+    shared_state["access_token"] = "shared-fresh-access"
+    shared_state["refresh_token"] = "shared-fresh-refresh"
+    shared_state["expires_at"] = "2099-01-01T00:00:00+00:00"
+    auth_mod._write_shared_nous_state(shared_state)
+
+    def _refresh_should_not_happen(**_kwargs):
+        raise AssertionError("stale profile-local refresh token was used")
+
+    monkeypatch.setattr(auth_mod, "_refresh_access_token", _refresh_should_not_happen)
+
+    assert auth_mod.resolve_nous_access_token() == "shared-fresh-access"
+
+    profile_state = auth_mod.get_provider_auth_state("nous")
+    assert profile_state is not None
+    assert profile_state["refresh_token"] == "shared-fresh-refresh"
--- a/tests/hermes_cli/test_auth_profile_fallback.py
+++ b/tests/hermes_cli/test_auth_profile_fallback.py
@ -0,0 +1,360 @@
+"""Tests for cross-profile auth fallback.
+
+When ``HERMES_HOME`` points to a named profile, ``read_credential_pool()``
+and ``get_provider_auth_state()`` fall back to the global-root
+``auth.json`` per-provider when the profile has no entries for that
+provider.  Writes still target the profile only.
+
+See the #18594 follow-up report: profile workers couldn't see providers
+authenticated only at the global root.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+
+def _make_auth_store(pool: dict | None = None, providers: dict | None = None) -> dict:
+    store: dict = {"version": 1}
+    if pool is not None:
+        store["credential_pool"] = pool
+    if providers is not None:
+        store["providers"] = providers
+    return store
+
+
+@pytest.fixture()
+def profile_env(tmp_path, monkeypatch):
+    """Set up a global root + an active profile under Path.home()/.hermes/profiles/coder.
+
+    * Path.home() -> tmp_path
+    * Global root -> tmp_path/.hermes            (has its own auth.json fixture)
+    * Profile     -> tmp_path/.hermes/profiles/coder   (active, HERMES_HOME points here)
+
+    This mirrors the real "named profile mounted under the default root"
+    layout that profile users actually have on disk.
+    """
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    global_root = tmp_path / ".hermes"
+    global_root.mkdir()
+    profile_dir = global_root / "profiles" / "coder"
+    profile_dir.mkdir(parents=True)
+    monkeypatch.setenv("HERMES_HOME", str(profile_dir))
+    return {"global": global_root, "profile": profile_dir}
+
+
+def _write(path: Path, payload: dict) -> None:
+    path.write_text(json.dumps(payload, indent=2))
+
+
+# ---------------------------------------------------------------------------
+# read_credential_pool — provider-slice reads
+# ---------------------------------------------------------------------------
+
+
+def test_profile_with_zero_entries_falls_back_to_global(profile_env):
+    """Empty profile pool inherits the global-root entries for that provider."""
+    from hermes_cli.auth import read_credential_pool
+
+    _write(profile_env["global"] / "auth.json", _make_auth_store(pool={
+        "openrouter": [{
+            "id": "glob-1",
+            "label": "global-key",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-or-global",
+        }],
+    }))
+    # Profile auth.json: exists but has no openrouter entries.
+    _write(profile_env["profile"] / "auth.json", _make_auth_store(pool={}))
+
+    entries = read_credential_pool("openrouter")
+    assert len(entries) == 1
+    assert entries[0]["id"] == "glob-1"
+    assert entries[0]["access_token"] == "sk-or-global"
+
+
+def test_profile_with_entries_fully_shadows_global(profile_env):
+    """Once the profile has any entries for a provider, global is ignored."""
+    from hermes_cli.auth import read_credential_pool
+
+    _write(profile_env["global"] / "auth.json", _make_auth_store(pool={
+        "openrouter": [{
+            "id": "glob-1",
+            "label": "global-key",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-or-global",
+        }],
+    }))
+    _write(profile_env["profile"] / "auth.json", _make_auth_store(pool={
+        "openrouter": [{
+            "id": "prof-1",
+            "label": "profile-key",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-or-profile",
+        }],
+    }))
+
+    entries = read_credential_pool("openrouter")
+    assert len(entries) == 1
+    assert entries[0]["id"] == "prof-1"
+    assert entries[0]["access_token"] == "sk-or-profile"
+
+
+def test_per_provider_shadowing_is_independent(profile_env):
+    """Profile can override one provider while inheriting another from global."""
+    from hermes_cli.auth import read_credential_pool
+
+    _write(profile_env["global"] / "auth.json", _make_auth_store(pool={
+        "openrouter": [{
+            "id": "glob-or",
+            "label": "global-or",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-or-global",
+        }],
+        "anthropic": [{
+            "id": "glob-ant",
+            "label": "global-ant",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-ant-global",
+        }],
+    }))
+    _write(profile_env["profile"] / "auth.json", _make_auth_store(pool={
+        # Profile has openrouter only — anthropic should still fall back.
+        "openrouter": [{
+            "id": "prof-or",
+            "label": "profile-or",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-or-profile",
+        }],
+    }))
+
+    or_entries = read_credential_pool("openrouter")
+    ant_entries = read_credential_pool("anthropic")
+    assert [e["id"] for e in or_entries] == ["prof-or"]
+    assert [e["id"] for e in ant_entries] == ["glob-ant"]
+
+
+def test_missing_global_auth_file_is_safe(profile_env):
+    """Profile processes that never had a global auth.json still work."""
+    from hermes_cli.auth import read_credential_pool
+
+    # No global auth.json written at all.
+    _write(profile_env["profile"] / "auth.json", _make_auth_store(pool={
+        "openrouter": [{
+            "id": "prof-1",
+            "label": "profile",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-profile",
+        }],
+    }))
+
+    assert read_credential_pool("openrouter")[0]["id"] == "prof-1"
+    assert read_credential_pool("anthropic") == []
+
+
+def test_malformed_global_auth_file_does_not_break_profile_read(profile_env):
+    (profile_env["global"] / "auth.json").write_text("{not valid json")
+    _write(profile_env["profile"] / "auth.json", _make_auth_store(pool={
+        "openrouter": [{
+            "id": "prof-1",
+            "label": "profile",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-profile",
+        }],
+    }))
+
+    from hermes_cli.auth import read_credential_pool
+
+    # Profile reads still work; malformed global is silently ignored.
+    assert read_credential_pool("openrouter")[0]["id"] == "prof-1"
+    # And no fallback for anthropic since global is unreadable.
+    assert read_credential_pool("anthropic") == []
+
+
+# ---------------------------------------------------------------------------
+# read_credential_pool — whole-pool reads (provider_id=None)
+# ---------------------------------------------------------------------------
+
+
+def test_whole_pool_merges_global_providers_when_missing_locally(profile_env):
+    from hermes_cli.auth import read_credential_pool
+
+    _write(profile_env["global"] / "auth.json", _make_auth_store(pool={
+        "openrouter": [{
+            "id": "glob-or",
+            "label": "global-or",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-or-global",
+        }],
+        "anthropic": [{
+            "id": "glob-ant",
+            "label": "global-ant",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-ant-global",
+        }],
+    }))
+    _write(profile_env["profile"] / "auth.json", _make_auth_store(pool={
+        "openrouter": [{
+            "id": "prof-or",
+            "label": "profile-or",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-or-profile",
+        }],
+    }))
+
+    pool = read_credential_pool(None)
+    # Profile wins for openrouter, global fills in anthropic.
+    assert [e["id"] for e in pool["openrouter"]] == ["prof-or"]
+    assert [e["id"] for e in pool["anthropic"]] == ["glob-ant"]
+
+
+# ---------------------------------------------------------------------------
+# get_provider_auth_state — singleton fallback
+# ---------------------------------------------------------------------------
+
+
+def test_provider_auth_state_falls_back_to_global_when_profile_has_none(profile_env):
+    from hermes_cli.auth import get_provider_auth_state
+
+    _write(profile_env["global"] / "auth.json", _make_auth_store(providers={
+        "nous": {"access_token": "nous-global", "refresh_token": "rt-global"},
+    }))
+    _write(profile_env["profile"] / "auth.json", _make_auth_store(providers={}))
+
+    state = get_provider_auth_state("nous")
+    assert state is not None
+    assert state["access_token"] == "nous-global"
+
+
+def test_provider_auth_state_profile_wins_when_present(profile_env):
+    from hermes_cli.auth import get_provider_auth_state
+
+    _write(profile_env["global"] / "auth.json", _make_auth_store(providers={
+        "nous": {"access_token": "nous-global"},
+    }))
+    _write(profile_env["profile"] / "auth.json", _make_auth_store(providers={
+        "nous": {"access_token": "nous-profile"},
+    }))
+
+    state = get_provider_auth_state("nous")
+    assert state is not None
+    assert state["access_token"] == "nous-profile"
+
+
+def test_provider_auth_state_returns_none_when_neither_has_it(profile_env):
+    from hermes_cli.auth import get_provider_auth_state
+
+    _write(profile_env["global"] / "auth.json", _make_auth_store(providers={}))
+    _write(profile_env["profile"] / "auth.json", _make_auth_store(providers={}))
+
+    assert get_provider_auth_state("nous") is None
+
+
+# ---------------------------------------------------------------------------
+# Classic mode — no fallback path should ever trigger
+# ---------------------------------------------------------------------------
+
+
+def test_classic_mode_does_not_double_read_same_file(tmp_path, monkeypatch):
+    """In classic mode (HERMES_HOME == global root), no fallback path runs.
+
+    This guards against the merge accidentally duplicating entries when the
+    profile and global resolve to the same directory.
+    """
+    # Put Path.home() under a subdir so the seat belt in _auth_file_path()
+    # sees tmp_path/home/.hermes as the "real home" — which is NOT equal
+    # to the HERMES_HOME we set (tmp_path/classic), so the guard passes.
+    fake_home = tmp_path / "home"
+    fake_home.mkdir()
+    monkeypatch.setattr(Path, "home", lambda: fake_home)
+    hermes_home = tmp_path / "classic"
+    hermes_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _write(hermes_home / "auth.json", _make_auth_store(pool={
+        "openrouter": [{
+            "id": "only",
+            "label": "classic",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-classic",
+        }],
+    }))
+
+    from hermes_cli.auth import read_credential_pool, _global_auth_file_path
+
+    # Classic mode: HERMES_HOME is set to a custom path that is NOT under
+    # ~/.hermes/profiles/ — get_default_hermes_root() returns HERMES_HOME
+    # itself, so the profile root and global root are the same directory,
+    # and the helper correctly returns None (no fallback).
+    assert _global_auth_file_path() is None
+    # And the read should return exactly one entry (not two).
+    entries = read_credential_pool("openrouter")
+    assert len(entries) == 1
+    assert entries[0]["id"] == "only"
+
+
+# ---------------------------------------------------------------------------
+# Writes stay scoped to the profile
+# ---------------------------------------------------------------------------
+
+
+def test_write_credential_pool_targets_profile_not_global(profile_env):
+    from hermes_cli.auth import read_credential_pool, write_credential_pool
+
+    _write(profile_env["global"] / "auth.json", _make_auth_store(pool={
+        "openrouter": [{
+            "id": "glob-1",
+            "label": "global",
+            "auth_type": "api_key",
+            "priority": 0,
+            "source": "manual",
+            "access_token": "sk-global",
+        }],
+    }))
+
+    write_credential_pool("openrouter", [{
+        "id": "prof-new",
+        "label": "profile-new",
+        "auth_type": "api_key",
+        "priority": 0,
+        "source": "manual",
+        "access_token": "sk-profile-new",
+    }])
+
+    # Global auth.json unchanged.
+    global_data = json.loads((profile_env["global"] / "auth.json").read_text())
+    assert global_data["credential_pool"]["openrouter"][0]["id"] == "glob-1"
+
+    # Profile auth.json holds the new entry.
+    profile_data = json.loads((profile_env["profile"] / "auth.json").read_text())
+    assert profile_data["credential_pool"]["openrouter"][0]["id"] == "prof-new"
+
+    # Subsequent read returns profile (shadows global).
+    assert [e["id"] for e in read_credential_pool("openrouter")] == ["prof-new"]
--- a/tests/hermes_cli/test_auth_toctou_file_modes.py
+++ b/tests/hermes_cli/test_auth_toctou_file_modes.py
@ -0,0 +1,202 @@
+"""Regression tests for TOCTOU-safe credential file writers in ``hermes_cli.auth``.
+
+Background
+==========
+The three writers below used to create a temp file via ``Path.write_text`` /
+``Path.open('w')`` and only ``chmod``'d it to ``0o600`` afterward. Between
+create and chmod the file existed at the process umask (typically ``0o644``),
+briefly exposing OAuth tokens to other local users on multi-user hosts. The
+fix switches them to ``os.open(O_EXCL, mode=0o600)`` + ``os.fdopen`` +
+``fsync`` so the file is atomic at ``0o600`` on creation. Mirrors the fixes
+shipped for ``agent/google_oauth.py`` (#19673) and ``tools/mcp_oauth.py``
+(#21148).
+
+These tests stay green only while the token file and its parent directory
+end up at ``0o600`` / ``0o700`` after every write. POSIX-only — the mode-bit
+enforcement does not exist on Windows.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import stat
+import sys
+from unittest.mock import patch
+
+import pytest
+
+
+pytestmark = pytest.mark.skipif(
+    sys.platform.startswith("win"),
+    reason="POSIX mode bits not enforced on Windows",
+)
+
+
+# ---------------------------------------------------------------------------
+# _save_auth_store  (~/.hermes/auth.json — every native OAuth provider)
+# ---------------------------------------------------------------------------
+
+
+def test_save_auth_store_writes_0o600_with_0o700_parent(tmp_path, monkeypatch):
+    """``_save_auth_store`` must land ``auth.json`` at 0o600 and parent at 0o700."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    old_umask = os.umask(0o022)  # make the race observable if it regresses
+    try:
+        from hermes_cli import auth as auth_mod
+
+        auth_store = {
+            "version": auth_mod.AUTH_STORE_VERSION,
+            "providers": {"openai-codex": {"tokens": {"access_token": "secret-x"}}},
+            "active_provider": "openai-codex",
+        }
+        auth_path = auth_mod._save_auth_store(auth_store)
+    finally:
+        os.umask(old_umask)
+
+    mode = stat.S_IMODE(auth_path.stat().st_mode)
+    parent_mode = stat.S_IMODE(auth_path.parent.stat().st_mode)
+
+    assert mode == 0o600, (
+        f"auth.json mode 0o{mode:o} != 0o600 — TOCTOU race regressed"
+    )
+    assert parent_mode == 0o700, (
+        f"auth.json parent dir mode 0o{parent_mode:o} != 0o700 — siblings can traverse"
+    )
+
+    # Content survived the rewrite
+    data = json.loads(auth_path.read_text())
+    assert data["providers"]["openai-codex"]["tokens"]["access_token"] == "secret-x"
+
+
+# ---------------------------------------------------------------------------
+# _save_qwen_cli_tokens  (Qwen CLI OAuth tokens)
+# ---------------------------------------------------------------------------
+
+
+def test_save_qwen_cli_tokens_writes_0o600_with_0o700_parent(tmp_path, monkeypatch):
+    """``_save_qwen_cli_tokens`` must land the token file at 0o600 and parent at 0o700."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    # The Qwen CLI auth path lives under $HOME/.qwen by default — isolate it.
+    monkeypatch.setenv("HOME", str(tmp_path))
+    old_umask = os.umask(0o022)
+    try:
+        from hermes_cli import auth as auth_mod
+
+        tokens = {
+            "access_token": "qwen-secret",
+            "refresh_token": "qwen-refresh",
+            "token_type": "Bearer",
+            "expiry_date": 123,
+        }
+        auth_path = auth_mod._save_qwen_cli_tokens(tokens)
+    finally:
+        os.umask(old_umask)
+
+    mode = stat.S_IMODE(auth_path.stat().st_mode)
+    parent_mode = stat.S_IMODE(auth_path.parent.stat().st_mode)
+
+    assert mode == 0o600, (
+        f"Qwen token file mode 0o{mode:o} != 0o600 — TOCTOU race regressed"
+    )
+    assert parent_mode == 0o700, (
+        f"Qwen token parent dir mode 0o{parent_mode:o} != 0o700"
+    )
+
+    data = json.loads(auth_path.read_text())
+    assert data["access_token"] == "qwen-secret"
+
+
+# ---------------------------------------------------------------------------
+# Nous shared-credential store write (inside _write_shared_nous_state)
+# ---------------------------------------------------------------------------
+
+
+def test_shared_nous_store_writes_0o600_with_0o700_parent(tmp_path, monkeypatch):
+    """The Nous shared-credential store must land at 0o600 / parent 0o700."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    # _nous_shared_store_path() refuses to touch the real shared store during
+    # pytest runs; redirect it into tmp_path explicitly. Use a distinct
+    # subdirectory name (``shared_override``) so the guard's "real user
+    # home" reference — which currently tracks HERMES_HOME via
+    # get_default_hermes_root() — can't collide with our override and
+    # falsely claim we're writing to the real user's shared store.
+    monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(tmp_path / "shared_override"))
+    old_umask = os.umask(0o022)
+    try:
+        from hermes_cli import auth as auth_mod
+
+        state = {
+            "access_token": "nous-access-xxx",
+            "refresh_token": "nous-refresh-xxx",
+            "token_type": "Bearer",
+            "scope": "openid profile",
+            "client_id": "test-client",
+            "obtained_at": "2026-01-01T00:00:00Z",
+            "expires_at": "2026-01-01T01:00:00Z",
+        }
+        auth_mod._write_shared_nous_state(state)
+        path = auth_mod._nous_shared_store_path()
+    finally:
+        os.umask(old_umask)
+
+    assert path.exists(), "shared Nous store was not written"
+    mode = stat.S_IMODE(path.stat().st_mode)
+    parent_mode = stat.S_IMODE(path.parent.stat().st_mode)
+
+    assert mode == 0o600, (
+        f"Nous shared store mode 0o{mode:o} != 0o600 — TOCTOU race regressed"
+    )
+    assert parent_mode == 0o700, (
+        f"Nous shared store parent dir mode 0o{parent_mode:o} != 0o700"
+    )
+
+    data = json.loads(path.read_text())
+    assert data["refresh_token"] == "nous-refresh-xxx"
+
+
+# ---------------------------------------------------------------------------
+# Atomicity: verify ``os.open`` is called with an explicit 0o600 mode.
+# ---------------------------------------------------------------------------
+
+
+def test_save_auth_store_uses_os_open_with_0o600_mode(tmp_path, monkeypatch):
+    """Regression: the writer must call ``os.open`` with an explicit restricted
+    mode so the file is created at 0o600 atomically — closing the TOCTOU
+    window the previous ``Path.open('w')`` left open (fd inherited process
+    umask and was briefly 0o644 before post-write chmod)."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    observed_opens: list[tuple[str, int, int]] = []
+    real_os_open = os.open
+
+    def spying_os_open(path, flags, mode=0o777, *args, **kwargs):
+        observed_opens.append((str(path), flags, mode))
+        return real_os_open(path, flags, mode, *args, **kwargs)
+
+    with patch.object(os, "open", spying_os_open):
+        from hermes_cli import auth as auth_mod
+
+        auth_mod._save_auth_store(
+            {"version": auth_mod.AUTH_STORE_VERSION, "providers": {}}
+        )
+
+    auth_tmp_opens = [
+        (p, fl, m) for (p, fl, m) in observed_opens if "auth.json.tmp" in p
+    ]
+    assert auth_tmp_opens, (
+        f"os.open was never called for the auth.json temp file; "
+        f"observed={observed_opens!r}"
+    )
+    for path, flags, mode in auth_tmp_opens:
+        assert flags & os.O_CREAT, f"auth.json temp open missing O_CREAT: path={path}"
+        assert flags & os.O_EXCL, (
+            f"auth.json temp open missing O_EXCL — TOCTOU-safe pattern regressed: "
+            f"path={path}, flags={flags}"
+        )
+        # Must be exactly S_IRUSR | S_IWUSR (0o600) — no group/other bits.
+        expected = stat.S_IRUSR | stat.S_IWUSR
+        assert mode == expected, (
+            f"auth.json temp open mode 0o{mode:o} != 0o{expected:o} — "
+            f"umask would apply and potentially expose tokens"
+        )
--- a/tests/hermes_cli/test_backup.py
+++ b/tests/hermes_cli/test_backup.py
@ -471,6 +471,32 @@ class TestImport:
        with pytest.raises(SystemExit):
            run_import(args)

+    @pytest.mark.skipif(os.name != "posix", reason="POSIX file permissions only")
+    def test_restores_secret_files_with_0600_perms(self, tmp_path, monkeypatch):
+        """Secret files must end up at 0600 after restore (zipfile drops mode bits)."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        zip_path = tmp_path / "backup.zip"
+        self._make_backup_zip(zip_path, {
+            "config.yaml": "model: openrouter\n",
+            ".env": "OPENROUTER_API_KEY=sk-secret\n",
+            "auth.json": '{"providers": {"nous": "token"}}',
+            "state.db": b"SQLite format 3\x00",
+            "profiles/coder/.env": "ANTHROPIC_API_KEY=sk-ant-secret\n",
+        })
+
+        args = Namespace(zipfile=str(zip_path), force=True)
+
+        from hermes_cli.backup import run_import
+        run_import(args)
+
+        for rel in (".env", "auth.json", "state.db", "profiles/coder/.env"):
+            mode = (hermes_home / rel).stat().st_mode & 0o777
+            assert mode == 0o600, f"{rel} restored with mode {oct(mode)}, expected 0o600"
+

 # ---------------------------------------------------------------------------
 # Round-trip test
@ -1348,6 +1374,53 @@ class TestPreUpdateBackup:
        from hermes_cli.backup import create_pre_update_backup
        assert create_pre_update_backup(hermes_home=tmp_path / "does-not-exist") is None

+    def test_keep_zero_does_not_delete_freshly_created_backup(self, hermes_home):
+        """Regression: ``backup_keep: 0`` previously triggered ``backups[0:]``
+        in the pruner — wiping the just-created zip and leaving the user
+        with no recovery point.  The floor (keep>=1) preserves the new file
+        regardless of misconfiguration; users who don't want backups should
+        set ``pre_update_backup: false`` instead.
+        """
+        from hermes_cli.backup import create_pre_update_backup
+        out = create_pre_update_backup(hermes_home=hermes_home, keep=0)
+        assert out is not None
+        assert out.exists(), (
+            "keep=0 silently deleted the freshly-created backup; floor "
+            "should preserve the just-written file."
+        )
+
+    def test_keep_negative_does_not_delete_freshly_created_backup(self, hermes_home):
+        """Mirror coverage: any value <1 should be floored, not literally
+        applied as a slice index."""
+        from hermes_cli.backup import create_pre_update_backup
+        out = create_pre_update_backup(hermes_home=hermes_home, keep=-3)
+        assert out is not None
+        assert out.exists()
+
+    def test_keep_zero_still_prunes_older_backups(self, hermes_home):
+        """The floor preserves the new backup but should NOT regress the
+        rotation behaviour for older zips: a third call with keep=0 must
+        still remove pre-existing backups beyond the (floored) limit of 1.
+        """
+        import time as _t
+        from hermes_cli.backup import create_pre_update_backup
+
+        first = create_pre_update_backup(hermes_home=hermes_home, keep=5)
+        _t.sleep(1.05)
+        second = create_pre_update_backup(hermes_home=hermes_home, keep=5)
+        _t.sleep(1.05)
+        third = create_pre_update_backup(hermes_home=hermes_home, keep=0)
+
+        remaining = {
+            p.name for p in (hermes_home / "backups").iterdir()
+            if p.name.startswith("pre-update-")
+        }
+        assert third.name in remaining, "Floor must preserve the new backup"
+        assert first.name not in remaining and second.name not in remaining, (
+            f"keep=0 floor of 1 should still prune older backups; "
+            f"remaining={remaining}"
+        )
+

 class TestRunPreUpdateBackup:
    """Tests for the ``_run_pre_update_backup`` wrapper in main.py —
--- a/tests/hermes_cli/test_bedrock_model_picker.py
+++ b/tests/hermes_cli/test_bedrock_model_picker.py
@ -203,6 +203,30 @@ class TestListAuthenticatedProvidersBedrock:
        bedrock = next((p for p in providers if p["slug"] == "bedrock"), None)
        assert bedrock is None, "bedrock should NOT appear when AWS credentials are absent"

+    def test_non_bedrock_picker_does_not_probe_full_aws_chain(self, monkeypatch):
+        """Non-Bedrock provider discovery must not touch boto3's full credential chain."""
+        from hermes_cli.model_switch import list_authenticated_providers
+
+        monkeypatch.delenv("AWS_PROFILE", raising=False)
+        monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False)
+        monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False)
+        monkeypatch.delenv("AWS_BEARER_TOKEN_BEDROCK", raising=False)
+        monkeypatch.delenv("AWS_WEB_IDENTITY_TOKEN_FILE", raising=False)
+        monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", raising=False)
+        monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_FULL_URI", raising=False)
+
+        calls = {"has_aws_credentials": 0}
+
+        def _has_aws_credentials():
+            calls["has_aws_credentials"] += 1
+            return False
+
+        with patch("agent.bedrock_adapter.has_aws_credentials", side_effect=_has_aws_credentials):
+            providers = list_authenticated_providers(current_provider="openrouter", max_models=0)
+
+        assert calls["has_aws_credentials"] == 0
+        assert all(p["slug"] != "bedrock" for p in providers)
+
    def test_bedrock_falls_back_to_curated_when_discovery_fails(self, monkeypatch):
        """When discover_bedrock_models() raises, fall back to curated list without crashing."""
        from hermes_cli.model_switch import list_authenticated_providers
--- a/tests/hermes_cli/test_cmd_update.py
+++ b/tests/hermes_cli/test_cmd_update.py
@ -111,12 +111,14 @@ class TestCmdUpdateBranchFallback:
    def test_update_refreshes_repo_and_tui_node_dependencies(
        self, mock_run, mock_which, mock_args
    ):
+        from hermes_cli import main as hm
+
        mock_which.side_effect = {"uv": "/usr/bin/uv", "npm": "/usr/bin/npm"}.get
        mock_run.side_effect = _make_run_side_effect(
            branch="main", verify_ok=True, commit_count="1"
        )
-
-        cmd_update(mock_args)
+        with patch.object(hm, "_is_termux_env", return_value=False):
+            cmd_update(mock_args)

        npm_calls = [
            (call.args[0], call.kwargs.get("cwd"))
@ -136,21 +138,28 @@ class TestCmdUpdateBranchFallback:
            "--no-audit",
            "--progress=false",
        ]
-        assert npm_calls == [
+        assert npm_calls[:2] == [
            (full_flags, PROJECT_ROOT),
            (full_flags, PROJECT_ROOT / "ui-tui"),
-            (["/usr/bin/npm", "ci", "--silent"], PROJECT_ROOT / "web"),
-            (["/usr/bin/npm", "run", "build"], PROJECT_ROOT / "web"),
        ]
+        if len(npm_calls) > 2:
+            assert npm_calls[2:] == [
+                (["/usr/bin/npm", "ci", "--silent"], PROJECT_ROOT / "web"),
+                (["/usr/bin/npm", "run", "build"], PROJECT_ROOT / "web"),
+            ]

-    def test_update_non_interactive_skips_migration_prompt(self, mock_args, capsys):
-        """When stdin/stdout aren't TTYs, config migration prompt is skipped."""
+    def test_update_non_interactive_runs_safe_config_migrations(self, mock_args, capsys):
+        """Dashboard/web updates apply non-interactive migrations before restart."""
        with patch("shutil.which", return_value=None), patch(
            "subprocess.run"
        ) as mock_run, patch("builtins.input") as mock_input, patch(
            "hermes_cli.config.get_missing_env_vars", return_value=["MISSING_KEY"]
-        ), patch("hermes_cli.config.get_missing_config_fields", return_value=[]), patch(
-            "hermes_cli.config.check_config_version", return_value=(1, 2)
+        ), patch(
+            "hermes_cli.config.get_missing_config_fields",
+            return_value=[{"key": "new.option", "default": True}],
+        ), patch("hermes_cli.config.check_config_version", return_value=(1, 2)), patch(
+            "hermes_cli.config.migrate_config",
+            return_value={"env_added": [], "config_added": ["new.option"]},
        ), patch("hermes_cli.main.sys") as mock_sys:
            mock_sys.stdin.isatty.return_value = False
            mock_sys.stdout.isatty.return_value = False
@ -161,5 +170,119 @@ class TestCmdUpdateBranchFallback:
            cmd_update(mock_args)

            mock_input.assert_not_called()
+            from hermes_cli.config import migrate_config
+
+            migrate_config.assert_called_once_with(interactive=False, quiet=False)
            captured = capsys.readouterr()
-            assert "Non-interactive session" in captured.out
+            assert "applying safe config migrations" in captured.out
+            assert "API keys require manual entry" in captured.out
+
+
+class TestCmdUpdateProfileSkillSync:
+    """cmd_update syncs bundled skills to all profiles, including the active one.
+
+    Regression guard for #16176: previously the active profile was excluded
+    from the seed_profile_skills loop, leaving it on stale skill content.
+    """
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_active_profile_included_in_skill_sync(
+        self, mock_run, _mock_which, mock_args, capsys
+    ):
+        from pathlib import Path
+
+        mock_run.side_effect = _make_run_side_effect(
+            branch="main", verify_ok=True, commit_count="1"
+        )
+
+        default_p = SimpleNamespace(name="default", path=Path("/fake/.hermes"))
+        active_p = SimpleNamespace(name="bit", path=Path("/fake/.hermes/profiles/bit"))
+        other_p = SimpleNamespace(name="work", path=Path("/fake/.hermes/profiles/work"))
+        all_profiles = [default_p, active_p, other_p]
+
+        synced_paths = []
+
+        def fake_seed(path, quiet=False):
+            synced_paths.append(path)
+            return {"copied": [], "updated": [], "user_modified": []}
+
+        empty_sync = {"copied": [], "updated": [], "user_modified": [], "cleaned": []}
+
+        with (
+            patch("hermes_cli.profiles.list_profiles", return_value=all_profiles),
+            patch("hermes_cli.profiles.seed_profile_skills", side_effect=fake_seed),
+            patch("tools.skills_sync.sync_skills", return_value=empty_sync),
+        ):
+            cmd_update(mock_args)
+
+        assert active_p.path in synced_paths, (
+            f"Active profile 'bit' must be included in skill sync; got: {synced_paths}"
+        )
+        assert set(synced_paths) == {p.path for p in all_profiles}, (
+            f"All profiles must be synced; got: {synced_paths}"
+        )
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_single_profile_default_is_synced(
+        self, mock_run, _mock_which, mock_args, capsys
+    ):
+        from pathlib import Path
+
+        mock_run.side_effect = _make_run_side_effect(
+            branch="main", verify_ok=True, commit_count="1"
+        )
+
+        default_p = SimpleNamespace(name="default", path=Path("/fake/.hermes"))
+        synced_paths = []
+
+        def fake_seed(path, quiet=False):
+            synced_paths.append(path)
+            return {"copied": [], "updated": [], "user_modified": []}
+
+        empty_sync = {"copied": [], "updated": [], "user_modified": [], "cleaned": []}
+
+        with (
+            patch("hermes_cli.profiles.list_profiles", return_value=[default_p]),
+            patch("hermes_cli.profiles.seed_profile_skills", side_effect=fake_seed),
+            patch("tools.skills_sync.sync_skills", return_value=empty_sync),
+        ):
+            cmd_update(mock_args)
+
+        assert default_p.path in synced_paths
+
+
+def test_is_termux_env_true_for_termux_prefix():
+    from hermes_cli import main as hm
+
+    assert hm._is_termux_env({"PREFIX": "/data/data/com.termux/files/usr"}) is True
+
+
+def test_is_termux_env_false_for_non_termux_prefix():
+    from hermes_cli import main as hm
+
+    assert hm._is_termux_env({"PREFIX": "/usr/local"}) is False
+
+
+def test_load_installable_optional_extras_supports_termux_group(tmp_path, monkeypatch):
+    from hermes_cli import main as hm
+
+    pyproject = tmp_path / "pyproject.toml"
+    pyproject.write_text(
+        """
+[project]
+name = "x"
+version = "0.0.0"
+
+[project.optional-dependencies]
+all = ["x[mcp]"]
+termux-all = ["x[termux]", "x[mcp]"]
+mcp = ["mcp>=1"]
+termux = ["rich>=14"]
+""".strip()
+    )
+    monkeypatch.setattr(hm, "PROJECT_ROOT", tmp_path)
+
+    assert hm._load_installable_optional_extras(group="all") == ["mcp"]
+    assert hm._load_installable_optional_extras(group="termux-all") == ["termux", "mcp"]
--- a/tests/hermes_cli/test_codex_cli_model_picker.py
+++ b/tests/hermes_cli/test_codex_cli_model_picker.py
@ -75,6 +75,37 @@ def test_normal_path_still_works(hermes_auth_only_env):
    assert "openai-codex" in slugs


+def test_codex_picker_uses_live_codex_catalog(hermes_auth_only_env, tmp_path, monkeypatch):
+    """The gateway /model picker should surface Codex CLI-only listed models."""
+    from hermes_cli.model_switch import list_authenticated_providers
+
+    codex_home = tmp_path / "codex-home"
+    codex_home.mkdir()
+    (codex_home / "models_cache.json").write_text(json.dumps({
+        "models": [
+            {"slug": "gpt-5.5", "priority": 0, "supported_in_api": True},
+            {"slug": "gpt-5.3-codex-spark", "priority": 7, "supported_in_api": False},
+        ]
+    }))
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+    # Force the cache fallback path — without this the test issues a real
+    # 10s HTTP probe to chatgpt.com/backend-api/codex/models which is both
+    # slow and non-deterministic in CI/sandboxed environments.
+    monkeypatch.setattr(
+        "hermes_cli.codex_models._fetch_models_from_api",
+        lambda access_token: [],
+    )
+
+    providers = list_authenticated_providers(
+        current_provider="openai-codex",
+        max_models=10,
+    )
+
+    codex = next(p for p in providers if p["slug"] == "openai-codex")
+    assert "gpt-5.3-codex-spark" in codex["models"]
+    assert codex["total_models"] == len(codex["models"])
+
+
@pytest.fixture()
 def claude_code_only_env(tmp_path, monkeypatch):
    """Set up an environment where Anthropic credentials only exist in
--- a/tests/hermes_cli/test_codex_models.py
+++ b/tests/hermes_cli/test_codex_models.py
@ -1,10 +1,6 @@
 import json
-import os
-import sys
 from unittest.mock import patch

-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
 from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids


@ -17,6 +13,7 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch
            {
                "models": [
                    {"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
+                    {"slug": "gpt-5.3-codex-spark", "priority": 6, "supported_in_api": False},
                    {"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
                    {"slug": "gpt-5.4", "priority": 1, "supported_in_api": True},
                    {"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
@ -31,6 +28,9 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch
    assert models[0] == "gpt-5.2-codex"
    assert "gpt-5.1-codex" in models
    assert "gpt-5.3-codex" in models
+    # Codex CLI marks Spark unsupported in the public API, but the Codex
+    # backend still accepts it via the OAuth-backed CLI/Hermes route.
+    assert "gpt-5.3-codex-spark" in models
    # Non-codex-suffixed models are included when the cache says they're available
    assert "gpt-5.4" in models
    assert "gpt-5.4-mini" in models
@ -54,7 +54,7 @@ def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatc

    assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS
    assert "gpt-5.4" in models
-    assert "gpt-5.3-codex-spark" not in models
+    assert "gpt-5.3-codex-spark" in models


 def test_get_codex_model_ids_adds_forward_compat_models_from_templates(monkeypatch):
@ -65,7 +65,49 @@ def test_get_codex_model_ids_adds_forward_compat_models_from_templates(monkeypat

    models = get_codex_model_ids(access_token="codex-access-token")

-    assert models == ["gpt-5.2-codex", "gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex"]
+    assert models == [
+        "gpt-5.2-codex",
+        "gpt-5.4-mini",
+        "gpt-5.4",
+        "gpt-5.3-codex",
+        "gpt-5.3-codex-spark",
+    ]
+
+
+def test_fetch_from_api_keeps_supported_in_api_false_models(monkeypatch):
+    """Regression: gpt-5.3-codex-spark is returned by the live Codex backend
+    with ``supported_in_api: false`` because it isn't in the public OpenAI
+    API. The Codex CLI / OAuth route still serves it for ChatGPT Pro
+    accounts, so we must not drop it on that flag. visibility=hidden is
+    the separate signal that *should* still filter entries out.
+    """
+    import sys
+    from hermes_cli import codex_models
+
+    class _FakeResp:
+        status_code = 200
+
+        def json(self):
+            return {
+                "models": [
+                    {"slug": "gpt-5.5", "priority": 0, "supported_in_api": True},
+                    {"slug": "gpt-5.3-codex-spark", "priority": 7, "supported_in_api": False},
+                    {"slug": "gpt-5-internal", "priority": 99, "visibility": "hidden"},
+                ]
+            }
+
+    class _FakeHttpx:
+        @staticmethod
+        def get(url, headers=None, timeout=None):
+            return _FakeResp()
+
+    monkeypatch.setitem(sys.modules, "httpx", _FakeHttpx)
+
+    models = codex_models._fetch_models_from_api(access_token="tok")
+
+    assert "gpt-5.5" in models
+    assert "gpt-5.3-codex-spark" in models
+    assert "gpt-5-internal" not in models


 def test_model_command_uses_runtime_access_token_for_codex_list(monkeypatch):
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@ -13,6 +13,7 @@ from hermes_cli.commands import (
    SlashCommandAutoSuggest,
    SlashCommandCompleter,
    _CMD_NAME_LIMIT,
+    _SLACK_RESERVED_COMMANDS,
    _TG_NAME_LIMIT,
    _clamp_command_names,
    _clamp_telegram_names,
@ -108,6 +109,12 @@ class TestResolveCommand:
        assert resolve_command("reload_mcp").name == "reload-mcp"
        assert resolve_command("tasks").name == "agents"

+    def test_topic_is_gateway_command(self):
+        topic = resolve_command("topic")
+        assert topic is not None
+        assert topic.name == "topic"
+        assert "topic" in GATEWAY_KNOWN_COMMANDS
+
    def test_leading_slash_stripped(self):
        assert resolve_command("/help").name == "help"
        assert resolve_command("/bg").name == "background"
@ -235,6 +242,13 @@ class TestTelegramBotCommands:
                tg_name = cmd.name.replace("-", "_")
                assert tg_name not in names

+    def test_excludes_commands_with_required_args(self):
+        names = {name for name, _ in telegram_bot_commands()}
+        assert "background" not in names
+        assert "queue" not in names
+        assert "steer" not in names
+        assert "background" in GATEWAY_KNOWN_COMMANDS
+

 class TestSlackSubcommandMap:
    def test_returns_dict(self):
@ -299,9 +313,19 @@ class TestSlackNativeSlashes:
    def test_includes_canonical_commands(self):
        names = {n for n, _d, _h in slack_native_slashes()}
        # Sample of gateway-available canonical commands
-        for expected in ("new", "stop", "background", "model", "help", "status"):
+        for expected in ("new", "stop", "background", "model", "help"):
            assert expected in names, f"missing canonical /{expected}"

+    def test_excludes_slack_reserved_commands(self):
+        """Slack built-in commands (e.g. /status, /me, /join) cannot be
+        registered by apps and must be excluded from the manifest.
+        Users can still reach them via /hermes <command>."""
+        names = {n for n, _d, _h in slack_native_slashes()}
+        for reserved in _SLACK_RESERVED_COMMANDS:
+            assert reserved not in names, (
+                f"/{reserved} is a Slack built-in and must not appear in the manifest"
+            )
+
    def test_includes_aliases_as_first_class_slashes(self):
        """Aliases (/btw, /bg, /reset, /q) must be registered as standalone
        slashes — this is the whole point of native-slashes parity."""
@ -319,6 +343,9 @@ class TestSlackNativeSlashes:
        Telegram but not Slack (because of Slack's 50-slash cap), this
        test fails loudly so we can curate the list rather than silently
        dropping parity.
+
+        Slack-reserved built-in commands (e.g. /status) are excluded
+        from parity checks since they cannot be registered on Slack.
        """
        slack_names = {n for n, _d, _h in slack_native_slashes()}
        tg_names = {n for n, _d in telegram_bot_commands()}
@ -329,7 +356,8 @@ class TestSlackNativeSlashes:

        slack_norm = {_norm(n) for n in slack_names}
        tg_norm = {_norm(n) for n in tg_names}
-        missing = tg_norm - slack_norm
+        reserved_norm = {_norm(n) for n in _SLACK_RESERVED_COMMANDS}
+        missing = (tg_norm - slack_norm) - reserved_norm
        assert not missing, (
            f"commands on Telegram but missing from Slack native slashes: {sorted(missing)}"
        )
@ -405,6 +433,21 @@ class TestGatewayConfigGate:
        joined = "\n".join(lines)
        assert "`/verbose" in joined

+    def test_config_gate_quoted_false_stays_disabled_everywhere(self, tmp_path, monkeypatch):
+        """Quoted false must not enable config-gated gateway commands."""
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text('display:\n  tool_progress_command: "false"\n')
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        lines = gateway_help_lines()
+        joined = "\n".join(lines)
+        names = {name for name, _ in telegram_bot_commands()}
+        mapping = slack_subcommand_map()
+
+        assert "`/verbose" not in joined
+        assert "verbose" not in names
+        assert "verbose" not in mapping
+
    def test_config_gate_excluded_from_telegram_when_off(self, tmp_path, monkeypatch):
        config_file = tmp_path / "config.yaml"
        config_file.write_text("display:\n  tool_progress_command: false\n")
@ -792,6 +835,103 @@ class TestClampTelegramNames:
        assert result[0] == ("foo", "d1")


+class TestClampCommandNamesTriples:
+    """Tests for _clamp_command_names with 3-tuples (name, desc, cmd_key).
+
+    Skill entries pass through _clamp_command_names as 3-tuples so the
+    original cmd_key survives name truncation.  Before the fix in PR #18951,
+    the code stripped cmd_key into a side-dict keyed by the *original*
+    (name, desc) pair — after truncation the lookup key no longer matched,
+    silently losing the cmd_key.
+    """
+
+    def test_short_triple_preserved(self):
+        entries = [("skill", "A skill", "/skill")]
+        result = _clamp_command_names(entries, set())
+        assert result == [("skill", "A skill", "/skill")]
+
+    def test_long_name_preserves_cmd_key(self):
+        long = "a" * 50
+        cmd_key = f"/{long}"
+        result = _clamp_command_names([(long, "desc", cmd_key)], set())
+        assert len(result) == 1
+        name, desc, key = result[0]
+        assert len(name) == _CMD_NAME_LIMIT
+        assert key == cmd_key, "cmd_key must survive name clamping"
+
+    def test_collision_preserves_cmd_key(self):
+        prefix = "x" * _CMD_NAME_LIMIT
+        long = "x" * 50
+        result = _clamp_command_names(
+            [(long, "desc", "/long-skill")], reserved={prefix},
+        )
+        assert len(result) == 1
+        name, _desc, key = result[0]
+        assert name == "x" * (_CMD_NAME_LIMIT - 1) + "0"
+        assert key == "/long-skill"
+
+    def test_multiple_long_names_preserve_respective_keys(self):
+        base = "y" * 40
+        entries = [
+            (base + "_alpha", "d1", "/alpha-skill"),
+            (base + "_beta", "d2", "/beta-skill"),
+        ]
+        result = _clamp_command_names(entries, set())
+        assert len(result) == 2
+        assert result[0][2] == "/alpha-skill"
+        assert result[1][2] == "/beta-skill"
+
+    def test_backward_compat_with_pairs(self):
+        """Legacy 2-tuple callers (Telegram) must still work."""
+        entries = [("help", "Show help"), ("status", "Show status")]
+        result = _clamp_command_names(entries, set())
+        assert result == entries
+
+
+class TestDiscordSkillCmdKeyDispatch:
+    """Integration: discord_skill_commands preserves cmd_key for long names.
+
+    This tests the full pipeline: skill_commands → _collect_gateway_skill_entries
+    → _clamp_command_names → returned triples, verifying that skills with names
+    exceeding Discord's 32-char limit still have their original cmd_key for
+    dispatch.
+    """
+
+    def test_long_skill_name_retains_cmd_key(self, tmp_path, monkeypatch):
+        from unittest.mock import patch
+
+        long_name = "this-is-a-very-long-skill-name-that-exceeds-limit"
+        cmd_key = f"/{long_name}"
+        fake_skills_dir = tmp_path / "skills"
+        fake_skills_dir.mkdir(exist_ok=True)
+        # Use resolved path — macOS /var → /private/var symlink
+        # causes SKILLS_DIR.resolve() to differ from tmp_path.
+        resolved_dir = str(fake_skills_dir.resolve())
+
+        fake_cmds = {
+            cmd_key: {
+                "name": long_name,
+                "description": "A skill with a long name",
+                "skill_md_path": f"{resolved_dir}/{long_name}/SKILL.md",
+                "skill_dir": f"{resolved_dir}/{long_name}",
+            },
+        }
+
+        with patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds), \
+             patch("tools.skills_tool.SKILLS_DIR", fake_skills_dir), \
+             patch("agent.skill_utils.get_external_skills_dirs", return_value=[]):
+            entries, hidden = discord_skill_commands(
+                max_slots=100, reserved_names=set(),
+            )
+
+        assert len(entries) == 1
+        name, desc, key = entries[0]
+        assert len(name) <= _CMD_NAME_LIMIT, "Name should be clamped to 32 chars"
+        assert key == cmd_key, (
+            f"cmd_key must be the original /{long_name}, got {key!r}"
+        )
+
+
 class TestTelegramMenuCommands:
    """Integration: telegram_menu_commands enforces the 32-char limit."""

@ -869,6 +1009,73 @@ class TestTelegramMenuCommands:
        assert "my_enabled_skill" in menu_names
        assert "my_disabled_skill" not in menu_names

+    def test_external_dir_skills_included_in_telegram_menu(self, tmp_path, monkeypatch):
+        """External skills (``skills.external_dirs``) must appear in the Telegram menu.
+
+        Regression test for #8110 — external skills were visible to the
+        agent and CLI but silently excluded from gateway slash menus
+        because ``_collect_gateway_skill_entries`` only accepted skills
+        whose path started with ``SKILLS_DIR``.
+
+        Also verifies the trailing-slash boundary: a directory that
+        simply shares a prefix with a configured ``external_dirs`` entry
+        (``/tmp/my-skills-extra`` vs ``/tmp/my-skills``) must NOT be
+        admitted.
+        """
+        from unittest.mock import patch
+
+        local_dir = tmp_path / "skills"
+        local_dir.mkdir()
+        external_dir = tmp_path / "my-skills"
+        external_dir.mkdir()
+        lookalike_dir = tmp_path / "my-skills-extra"
+        lookalike_dir.mkdir()
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text(
+            f"skills:\n  external_dirs:\n    - {external_dir}\n"
+        )
+
+        fake_cmds = {
+            "/local-one": {
+                "name": "local-one",
+                "description": "Local",
+                "skill_md_path": f"{local_dir}/local-one/SKILL.md",
+                "skill_dir": f"{local_dir}/local-one",
+            },
+            "/morning-briefing": {
+                "name": "morning-briefing",
+                "description": "External skill",
+                "skill_md_path": f"{external_dir}/morning-briefing/SKILL.md",
+                "skill_dir": f"{external_dir}/morning-briefing",
+            },
+            "/lookalike-skill": {
+                "name": "lookalike-skill",
+                "description": "Lives in a sibling dir that shares a prefix",
+                "skill_md_path": f"{lookalike_dir}/lookalike-skill/SKILL.md",
+                "skill_dir": f"{lookalike_dir}/lookalike-skill",
+            },
+        }
+
+        with (
+            patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
+            patch("tools.skills_tool.SKILLS_DIR", local_dir),
+            patch(
+                "agent.skill_utils.get_external_skills_dirs",
+                return_value=[external_dir],
+            ),
+        ):
+            menu, _ = telegram_menu_commands(max_commands=100)
+
+        menu_names = {n for n, _ in menu}
+        assert "local_one" in menu_names, "local skill must appear"
+        assert "morning_briefing" in menu_names, (
+            "external skill from skills.external_dirs must appear (fixes #8110)"
+        )
+        assert "lookalike_skill" not in menu_names, (
+            "prefix-match sibling directories must not be admitted"
+        )
+
    def test_special_chars_in_skill_names_sanitized(self, tmp_path, monkeypatch):
        """Skills with +, /, or other special chars produce valid Telegram names."""
        from unittest.mock import patch
@ -1323,6 +1530,119 @@ class TestDiscordSkillCommandsByCategory:
        assert "vllm" in names
        assert len(uncategorized) == 0

+    def test_no_legacy_25x25_cap(self, tmp_path, monkeypatch):
+        """The old nested-layout caps (25 groups × 25 skills/group) are gone.
+
+        The live caller flattens categories into a single autocomplete list,
+        which Discord fetches dynamically — the per-command 8KB payload
+        concern from the old nested layout (#11321, #10259) no longer applies.
+        Guards against accidentally re-introducing the caps, which would
+        silently drop skills in the 26th+ alphabetical category (the exact
+        failure mode users were hitting with 29 category dirs on real
+        installs).
+        """
+        from unittest.mock import patch
+
+        fake_skills_dir = str(tmp_path / "skills")
+
+        # Build 30 categories (> old _MAX_GROUPS=25) each with 30 skills
+        # (> old _MAX_PER_GROUP=25).
+        fake_cmds = {}
+        for c in range(30):
+            cat = f"cat{c:02d}"  # cat00, cat01, ..., cat29 — 30 categories
+            for s in range(30):
+                name = f"skill-{c:02d}-{s:02d}"
+                skill_subdir = tmp_path / "skills" / cat / name
+                skill_subdir.mkdir(parents=True, exist_ok=True)
+                (skill_subdir / "SKILL.md").write_text("---\nname: x\n---\n")
+                fake_cmds[f"/{name}"] = {
+                    "name": name,
+                    "description": f"Category {cat} skill {s}",
+                    "skill_md_path": f"{fake_skills_dir}/{cat}/{name}/SKILL.md",
+                }
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        with (
+            patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path / "skills"),
+        ):
+            categories, uncategorized, hidden = discord_skill_commands_by_category(
+                reserved_names=set(),
+            )
+
+        # Every category should be present — no 25-group cap
+        assert len(categories) == 30, (
+            f"expected all 30 categories, got {len(categories)} "
+            f"(cap from old nested layout must be removed)"
+        )
+        # Every skill in every category must be present — no 25-per-group cap
+        for cat_name, entries in categories.items():
+            assert len(entries) == 30, (
+                f"category {cat_name}: expected 30 skills, got {len(entries)} "
+                f"(cap from old nested layout must be removed)"
+            )
+        # Nothing should be reported hidden for the cap reason (the only
+        # legitimate hidden reason now is name clamp collisions, which
+        # don't happen here since all names are unique).
+        assert hidden == 0
+
+    def test_external_dirs_skills_included(self, tmp_path, monkeypatch):
+        """Skills in ``skills.external_dirs`` must appear in /skill autocomplete.
+
+        #18741 fixed this for the flat ``discord_skill_commands`` collector
+        but left ``discord_skill_commands_by_category`` (the live caller for
+        Discord's ``/skill`` command) still filtering by
+        ``SKILLS_DIR`` prefix only. Regression guard that both collectors
+        now accept external-dir skills.
+        """
+        from unittest.mock import patch
+
+        local_skills_dir = tmp_path / "local-skills"
+        external_dir = tmp_path / "external-skills"
+
+        (local_skills_dir / "creative" / "local-skill").mkdir(parents=True)
+        (local_skills_dir / "creative" / "local-skill" / "SKILL.md").write_text("")
+
+        (external_dir / "mlops" / "external-skill").mkdir(parents=True)
+        (external_dir / "mlops" / "external-skill" / "SKILL.md").write_text("")
+
+        fake_cmds = {
+            "/local-skill": {
+                "name": "local-skill",
+                "description": "Local",
+                "skill_md_path": str(local_skills_dir / "creative" / "local-skill" / "SKILL.md"),
+            },
+            "/external-skill": {
+                "name": "external-skill",
+                "description": "External",
+                "skill_md_path": str(external_dir / "mlops" / "external-skill" / "SKILL.md"),
+            },
+        }
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        with (
+            patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
+            patch("tools.skills_tool.SKILLS_DIR", local_skills_dir),
+            patch(
+                "agent.skill_utils.get_external_skills_dirs",
+                return_value=[external_dir],
+            ),
+        ):
+            categories, uncategorized, hidden = discord_skill_commands_by_category(
+                reserved_names=set(),
+            )
+
+        # Local skill → grouped under "creative"
+        assert "creative" in categories
+        assert any(n == "local-skill" for n, _d, _k in categories["creative"])
+        # External skill → grouped under its own top-level dir "mlops"
+        assert "mlops" in categories, (
+            "external-dir skills must be included — the old SKILLS_DIR-only "
+            "prefix check was broken for by_category (completes #18741)"
+        )
+        assert any(n == "external-skill" for n, _d, _k in categories["mlops"])
+        assert uncategorized == []
+        assert hidden == 0
+

 # ---------------------------------------------------------------------------
 # Plugin slash command integration
@ -1354,6 +1674,19 @@ class TestPluginCommandEnumeration:
        names = {name for name, _desc in telegram_bot_commands()}
        assert "metricas" in names

+    def test_plugin_command_with_required_args_excluded_from_telegram_menu(self, monkeypatch):
+        """Telegram BotCommand selections cannot supply required arguments."""
+        self._patch_plugin_commands(monkeypatch, {
+            "background-job": {
+                "handler": lambda _a: "ok",
+                "description": "Run a background job",
+                "args_hint": "<prompt>",
+                "plugin": "jobs-plugin",
+            }
+        })
+        names = {name for name, _desc in telegram_bot_commands()}
+        assert "background_job" not in names
+
    def test_plugin_command_appears_in_slack_subcommand_map(self, monkeypatch):
        """/hermes metricas must route through the Slack subcommand map."""
        self._patch_plugin_commands(monkeypatch, {
--- a/tests/hermes_cli/test_config.py
+++ b/tests/hermes_cli/test_config.py
@ -81,6 +81,81 @@ class TestLoadConfigDefaults:
            assert "max_turns" not in config


+class TestLoadConfigParseFailure:
+    """A YAML parse failure must NOT silently fall back to defaults.
+
+    Before issue #23570 this was a single ``print(...)`` that scrolled past
+    on the first invocation — users saw aux-fallback misbehavior with no clue
+    their config.yaml was being ignored. The helper must:
+      * log at WARNING (so ``hermes logs`` surfaces it)
+      * also write to stderr (so it's visible at startup even before
+        ``setup_logging()`` has wired up file handlers)
+      * dedup on (path, mtime_ns, size) so concurrent loads don't spam
+      * re-warn after the user edits the file (different mtime)
+    """
+
+    def test_logs_and_warns_on_parse_failure(self, tmp_path, caplog, capsys):
+        # Reset the dedup cache so this test isn't affected by other tests
+        # that may have warned about a different broken config.
+        from hermes_cli import config as cfg_mod
+        cfg_mod._CONFIG_PARSE_WARNED.clear()
+
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            (tmp_path / "config.yaml").write_text("\tbroken tab indent:\n")
+
+            import logging
+            with caplog.at_level(logging.WARNING, logger="hermes_cli.config"):
+                config = load_config()
+
+            # Falls back to defaults — confirms the silent-fallback we're warning about
+            assert config["model"] == DEFAULT_CONFIG["model"]
+
+            # WARNING-level log was emitted with file path + reason
+            assert any(
+                str(tmp_path / "config.yaml") in rec.message
+                and "Falling back to default config" in rec.message
+                for rec in caplog.records
+            ), f"expected WARNING log, got: {[r.message for r in caplog.records]}"
+
+            # stderr also got a user-visible message (with the ⚠️ marker so it
+            # stands out at hermes startup before logging is configured)
+            captured = capsys.readouterr()
+            assert "hermes config:" in captured.err
+            assert str(tmp_path / "config.yaml") in captured.err
+
+    def test_dedup_on_repeated_load_same_file(self, tmp_path, capsys):
+        from hermes_cli import config as cfg_mod
+        cfg_mod._CONFIG_PARSE_WARNED.clear()
+
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            (tmp_path / "config.yaml").write_text("\tbroken:\n")
+
+            load_config()
+            first = capsys.readouterr().err
+            assert "hermes config:" in first
+
+            load_config()
+            second = capsys.readouterr().err
+            assert second == "", "second load should NOT re-warn (same file, same mtime)"
+
+    def test_rewarns_after_file_edit(self, tmp_path, capsys):
+        import time
+        from hermes_cli import config as cfg_mod
+        cfg_mod._CONFIG_PARSE_WARNED.clear()
+
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            (tmp_path / "config.yaml").write_text("\tbroken:\n")
+            load_config()
+            capsys.readouterr()  # discard first warning
+
+            # Edit the file (still broken, but different content) — mtime changes
+            time.sleep(0.05)
+            (tmp_path / "config.yaml").write_text("\tstill broken differently:\n")
+            load_config()
+            after_edit = capsys.readouterr().err
+            assert "hermes config:" in after_edit, "edited file should re-warn"
+
+
 class TestSaveAndLoadRoundtrip:
    def test_roundtrip(self, tmp_path):
        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
--- a/tests/hermes_cli/test_curator_archive_prune.py
+++ b/tests/hermes_cli/test_curator_archive_prune.py
@ -0,0 +1,269 @@
+"""Tests for `hermes curator archive` and `hermes curator prune`.
+
+Covers:
+- archive refuses pinned skills with an `unpin` hint
+- archive returns 0/1 based on archive_skill() success
+- prune filters pinned and already-archived, applies --days threshold
+- prune falls back to created_at when last_activity_at is null
+- prune --dry-run makes no state changes
+- prune --yes skips confirmation
+- prune --days validation
+"""
+
+from __future__ import annotations
+
+import io
+from contextlib import redirect_stdout, redirect_stderr
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+
+def _ns(**kwargs):
+    return SimpleNamespace(**kwargs)
+
+
+# ─── archive ────────────────────────────────────────────────────────────────
+
+
+def test_archive_refuses_pinned(monkeypatch, capsys):
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    monkeypatch.setattr(skill_usage, "get_record", lambda name: {"pinned": True})
+    called = []
+    monkeypatch.setattr(
+        skill_usage, "archive_skill",
+        lambda name: called.append(name) or (True, "should not get here"),
+    )
+
+    rc = curator_cli._cmd_archive(_ns(skill="pinned-skill"))
+    assert rc == 1
+    assert called == []
+    out = capsys.readouterr().out
+    assert "pinned" in out.lower()
+    assert "hermes curator unpin" in out
+
+
+def test_archive_calls_archive_skill(monkeypatch, capsys):
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    monkeypatch.setattr(skill_usage, "get_record", lambda name: {"pinned": False})
+    monkeypatch.setattr(
+        skill_usage, "archive_skill",
+        lambda name: (True, f"archived to .archive/{name}"),
+    )
+    rc = curator_cli._cmd_archive(_ns(skill="my-skill"))
+    assert rc == 0
+    assert "archived to .archive/my-skill" in capsys.readouterr().out
+
+
+def test_archive_reports_failure(monkeypatch, capsys):
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    monkeypatch.setattr(skill_usage, "get_record", lambda name: {"pinned": False})
+    monkeypatch.setattr(
+        skill_usage, "archive_skill",
+        lambda name: (False, f"skill '{name}' is bundled or hub-installed; never archive"),
+    )
+    rc = curator_cli._cmd_archive(_ns(skill="hub-slug"))
+    assert rc == 1
+    assert "bundled or hub-installed" in capsys.readouterr().out
+
+
+# ─── prune ──────────────────────────────────────────────────────────────────
+
+
+def _mk_record(name, *, idle_days=0, pinned=False, state="active", created_idle_days=None):
+    import datetime as _dt
+    now = _dt.datetime.now(_dt.timezone.utc)
+    last_activity = (now - _dt.timedelta(days=idle_days)).isoformat() if idle_days else None
+    created_delta = created_idle_days if created_idle_days is not None else idle_days
+    created = (now - _dt.timedelta(days=created_delta)).isoformat()
+    return {
+        "name": name,
+        "state": state,
+        "pinned": pinned,
+        "last_activity_at": last_activity,
+        "created_at": created,
+        "activity_count": 0 if idle_days == 0 and last_activity is None else 1,
+    }
+
+
+def test_prune_days_validation(monkeypatch, capsys):
+    import hermes_cli.curator as curator_cli
+    rc = curator_cli._cmd_prune(_ns(days=0, yes=True, dry_run=False))
+    assert rc == 2
+    err = capsys.readouterr().err
+    assert "--days must be >= 1" in err
+
+
+def test_prune_nothing_to_do(monkeypatch, capsys):
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    monkeypatch.setattr(skill_usage, "agent_created_report", lambda: [])
+    rc = curator_cli._cmd_prune(_ns(days=30, yes=True, dry_run=False))
+    assert rc == 0
+    assert "nothing to prune" in capsys.readouterr().out
+
+
+def test_prune_filters_pinned_and_archived(monkeypatch, capsys):
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    rows = [
+        _mk_record("old-pinned", idle_days=200, pinned=True),
+        _mk_record("old-archived", idle_days=200, state="archived"),
+        _mk_record("recent", idle_days=10),
+        _mk_record("old-active", idle_days=200),
+    ]
+    monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
+    archived = []
+    monkeypatch.setattr(
+        skill_usage, "archive_skill",
+        lambda name: archived.append(name) or (True, f"archived {name}"),
+    )
+
+    rc = curator_cli._cmd_prune(_ns(days=30, yes=True, dry_run=False))
+    assert rc == 0
+    assert archived == ["old-active"]
+    out = capsys.readouterr().out
+    assert "old-active" in out
+    assert "old-pinned" not in out
+    assert "old-archived" not in out
+    assert "recent" not in out
+    assert "archived 1/1" in out
+
+
+def test_prune_falls_back_to_created_at_when_never_used(monkeypatch, capsys):
+    """Never-used skills must be prunable via created_at — otherwise immortal."""
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    rows = [_mk_record("never-used", idle_days=0, created_idle_days=200)]
+    # Force last_activity_at to None explicitly
+    rows[0]["last_activity_at"] = None
+
+    monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
+    archived = []
+    monkeypatch.setattr(
+        skill_usage, "archive_skill",
+        lambda name: archived.append(name) or (True, "ok"),
+    )
+    rc = curator_cli._cmd_prune(_ns(days=90, yes=True, dry_run=False))
+    assert rc == 0
+    assert archived == ["never-used"]
+
+
+def test_prune_dry_run_makes_no_changes(monkeypatch, capsys):
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    rows = [_mk_record("old-skill", idle_days=200)]
+    monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
+    archived = []
+    monkeypatch.setattr(
+        skill_usage, "archive_skill",
+        lambda name: archived.append(name) or (True, "ok"),
+    )
+    rc = curator_cli._cmd_prune(_ns(days=30, yes=True, dry_run=True))
+    assert rc == 0
+    assert archived == []
+    out = capsys.readouterr().out
+    assert "old-skill" in out
+    assert "dry run" in out
+
+
+def test_prune_prompts_without_yes(monkeypatch, capsys):
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    rows = [_mk_record("old-skill", idle_days=200)]
+    monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
+    archived = []
+    monkeypatch.setattr(
+        skill_usage, "archive_skill",
+        lambda name: archived.append(name) or (True, "ok"),
+    )
+    monkeypatch.setattr("builtins.input", lambda _prompt: "n")
+    rc = curator_cli._cmd_prune(_ns(days=30, yes=False, dry_run=False))
+    assert rc == 1
+    assert archived == []
+    assert "aborted" in capsys.readouterr().out
+
+
+def test_prune_confirms_with_y(monkeypatch, capsys):
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    rows = [_mk_record("old-skill", idle_days=200)]
+    monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
+    archived = []
+    monkeypatch.setattr(
+        skill_usage, "archive_skill",
+        lambda name: archived.append(name) or (True, "ok"),
+    )
+    monkeypatch.setattr("builtins.input", lambda _prompt: "y")
+    rc = curator_cli._cmd_prune(_ns(days=30, yes=False, dry_run=False))
+    assert rc == 0
+    assert archived == ["old-skill"]
+
+
+def test_prune_reports_partial_failure(monkeypatch, capsys):
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    rows = [
+        _mk_record("ok-skill", idle_days=200),
+        _mk_record("bad-skill", idle_days=200),
+    ]
+    monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
+
+    def fake_archive(name):
+        if name == "bad-skill":
+            return False, "disk full"
+        return True, "ok"
+
+    monkeypatch.setattr(skill_usage, "archive_skill", fake_archive)
+    rc = curator_cli._cmd_prune(_ns(days=30, yes=True, dry_run=False))
+    assert rc == 1
+    out = capsys.readouterr().out
+    assert "archived 1/2" in out
+    assert "bad-skill: disk full" in out
+
+
+# ─── argparse wiring ────────────────────────────────────────────────────────
+
+
+def test_archive_and_prune_registered():
+    import argparse
+    import hermes_cli.curator as curator_cli
+
+    parser = argparse.ArgumentParser(prog="hermes curator")
+    curator_cli.register_cli(parser)
+
+    args = parser.parse_args(["archive", "my-skill"])
+    assert args.skill == "my-skill"
+    assert args.func.__name__ == "_cmd_archive"
+
+    args = parser.parse_args(["prune", "--days", "45", "--yes", "--dry-run"])
+    assert args.days == 45
+    assert args.yes is True
+    assert args.dry_run is True
+    assert args.func.__name__ == "_cmd_prune"
+
+
+def test_prune_defaults():
+    import argparse
+    import hermes_cli.curator as curator_cli
+
+    parser = argparse.ArgumentParser(prog="hermes curator")
+    curator_cli.register_cli(parser)
+    args = parser.parse_args(["prune"])
+    assert args.days == 90
+    assert args.yes is False
+    assert args.dry_run is False
--- a/tests/hermes_cli/test_curator_recent_run_notice.py
+++ b/tests/hermes_cli/test_curator_recent_run_notice.py
@ -0,0 +1,162 @@
+"""Tests for `_print_curator_recent_run_notice`.
+
+The notice prints the most recent curator run summary on `hermes update`,
+exactly once per run. Show-once is enforced by stamping
+`last_run_summary_shown_at` in curator state after printing.
+
+Why this matters: the curator runs in the background (gateway tick + CLI
+session start) so users normally never see the rename map. `hermes update`
+is the high-attention surface where consolidations should land.
+"""
+
+from __future__ import annotations
+
+import importlib
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture
+def curator_env(tmp_path, monkeypatch, capsys):
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    (home / "skills").mkdir()
+    (home / "logs").mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+    import hermes_constants
+    importlib.reload(hermes_constants)
+    from agent import curator
+    importlib.reload(curator)
+    from hermes_cli import main as hermes_main
+    importlib.reload(hermes_main)
+
+    yield {
+        "curator": curator,
+        "main": hermes_main,
+        "capsys": capsys,
+    }
+
+
+def _set_state(curator_mod, **fields):
+    state = curator_mod.load_state()
+    state.update(fields)
+    curator_mod.save_state(state)
+
+
+def test_silent_when_no_curator_run_yet(curator_env):
+    """First-run notice handles this case; recent-run notice stays silent."""
+    curator_env["main"]._print_curator_recent_run_notice()
+    out = curator_env["capsys"].readouterr().out
+    assert "Skill curator — last run" not in out
+
+
+def test_silent_when_summary_is_single_line(curator_env):
+    """No archives = no rename map = nothing to surface. But still stamps shown."""
+    now = datetime.now(timezone.utc).isoformat()
+    _set_state(
+        curator_env["curator"],
+        last_run_at=now,
+        last_run_summary="auto: no changes; llm: no change",
+    )
+    curator_env["main"]._print_curator_recent_run_notice()
+    out = curator_env["capsys"].readouterr().out
+    assert "Skill curator — last run" not in out
+    # Should still mark shown so we don't reconsider on every update.
+    state = curator_env["curator"].load_state()
+    assert state["last_run_summary_shown_at"] == now
+
+
+def test_prints_multiline_summary_with_rename_map(curator_env):
+    """Multi-line summary (rename map appended) prints with timestamp + footer."""
+    now = datetime.now(timezone.utc).isoformat()
+    summary = (
+        "auto: 1 marked stale; llm: consolidated 2 into 1\n"
+        "archived 2 skill(s):\n"
+        "  • pdf-extraction → document-tools\n"
+        "  • docx-extraction → document-tools\n"
+        "full report: hermes curator status"
+    )
+    _set_state(
+        curator_env["curator"],
+        last_run_at=now,
+        last_run_summary=summary,
+    )
+    curator_env["main"]._print_curator_recent_run_notice()
+    out = curator_env["capsys"].readouterr().out
+    assert "Skill curator — last run" in out
+    assert "pdf-extraction → document-tools" in out
+    assert "docx-extraction → document-tools" in out
+    assert "shows once per curator run" in out
+
+
+def test_show_once_semantics(curator_env):
+    """Calling twice prints once; second call is silent until a new run lands."""
+    now = datetime.now(timezone.utc).isoformat()
+    summary = (
+        "auto: no changes; llm: consolidated 1 into 1\n"
+        "archived 1 skill(s):\n"
+        "  • old → new\n"
+        "full report: hermes curator status"
+    )
+    _set_state(
+        curator_env["curator"],
+        last_run_at=now,
+        last_run_summary=summary,
+    )
+
+    curator_env["main"]._print_curator_recent_run_notice()
+    first = curator_env["capsys"].readouterr().out
+    assert "old → new" in first
+
+    curator_env["main"]._print_curator_recent_run_notice()
+    second = curator_env["capsys"].readouterr().out
+    assert second == "", "second call must be silent (already shown)"
+
+
+def test_new_run_resets_show_once(curator_env):
+    """A newer curator run with rename data prints again, even though one was already shown."""
+    older = (datetime.now(timezone.utc) - timedelta(hours=8)).isoformat()
+    _set_state(
+        curator_env["curator"],
+        last_run_at=older,
+        last_run_summary=(
+            "auto: no changes; llm: consolidated 1 into 1\n"
+            "archived 1 skill(s):\n"
+            "  • thing-a → umbrella\n"
+            "full report: hermes curator status"
+        ),
+    )
+    curator_env["main"]._print_curator_recent_run_notice()
+    curator_env["capsys"].readouterr()  # drain
+
+    # New run lands.
+    newer = datetime.now(timezone.utc).isoformat()
+    _set_state(
+        curator_env["curator"],
+        last_run_at=newer,
+        last_run_summary=(
+            "auto: no changes; llm: consolidated 1 into 1\n"
+            "archived 1 skill(s):\n"
+            "  • thing-b → umbrella\n"
+            "full report: hermes curator status"
+        ),
+    )
+    curator_env["main"]._print_curator_recent_run_notice()
+    out = curator_env["capsys"].readouterr().out
+    assert "thing-b → umbrella" in out
+    assert "thing-a" not in out  # only the newer run shows
+
+
+def test_format_time_ago_buckets(curator_env):
+    """Smoke test the time formatter — drives the `last run Xh ago` line."""
+    fmt = curator_env["main"]._format_time_ago
+    now = datetime.now(timezone.utc)
+    assert fmt((now - timedelta(seconds=10)).isoformat()) == "just now"
+    assert fmt((now - timedelta(minutes=5)).isoformat()) == "5m ago"
+    assert fmt((now - timedelta(hours=3)).isoformat()) == "3h ago"
+    assert fmt((now - timedelta(days=2)).isoformat()) == "2d ago"
+    assert fmt("not-a-real-iso-string") == "recently"
--- a/tests/hermes_cli/test_curator_run.py
+++ b/tests/hermes_cli/test_curator_run.py
@ -0,0 +1,87 @@
+"""Tests for `hermes curator run` CLI behavior."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+
+def _args(**kwargs):
+    values = {
+        "dry_run": False,
+        "synchronous": False,
+        "background": False,
+    }
+    values.update(kwargs)
+    return SimpleNamespace(**values)
+
+
+def test_run_defaults_to_synchronous(monkeypatch, capsys):
+    import agent.curator as curator_state
+    import hermes_cli.curator as curator_cli
+
+    calls = []
+    monkeypatch.setattr(curator_state, "is_enabled", lambda: True)
+    monkeypatch.setattr(
+        curator_state,
+        "run_curator_review",
+        lambda **kwargs: calls.append(kwargs) or {"auto_transitions": {}},
+    )
+
+    assert curator_cli._cmd_run(_args()) == 0
+
+    assert calls[0]["synchronous"] is True
+    assert calls[0]["dry_run"] is False
+    assert "background" not in capsys.readouterr().out
+
+
+def test_run_background_opts_into_async(monkeypatch, capsys):
+    import agent.curator as curator_state
+    import hermes_cli.curator as curator_cli
+
+    calls = []
+    monkeypatch.setattr(curator_state, "is_enabled", lambda: True)
+    monkeypatch.setattr(
+        curator_state,
+        "run_curator_review",
+        lambda **kwargs: calls.append(kwargs) or {"auto_transitions": {}},
+    )
+
+    assert curator_cli._cmd_run(_args(background=True)) == 0
+
+    assert calls[0]["synchronous"] is False
+    assert "llm pass running in background" in capsys.readouterr().out
+
+
+def test_run_sync_wins_over_background(monkeypatch):
+    import agent.curator as curator_state
+    import hermes_cli.curator as curator_cli
+
+    calls = []
+    monkeypatch.setattr(curator_state, "is_enabled", lambda: True)
+    monkeypatch.setattr(
+        curator_state,
+        "run_curator_review",
+        lambda **kwargs: calls.append(kwargs) or {"auto_transitions": {}},
+    )
+
+    assert curator_cli._cmd_run(_args(synchronous=True, background=True)) == 0
+
+    assert calls[0]["synchronous"] is True
+
+
+def test_dry_run_default_reports_synchronous_wording(monkeypatch, capsys):
+    import agent.curator as curator_state
+    import hermes_cli.curator as curator_cli
+
+    monkeypatch.setattr(curator_state, "is_enabled", lambda: True)
+    monkeypatch.setattr(
+        curator_state,
+        "run_curator_review",
+        lambda **kwargs: {"auto_transitions": {}},
+    )
+
+    assert curator_cli._cmd_run(_args(dry_run=True)) == 0
+
+    out = capsys.readouterr().out
+    assert "When the report lands" not in out
+    assert "Read the report with `hermes curator status`" in out
--- a/tests/hermes_cli/test_curator_status.py
+++ b/tests/hermes_cli/test_curator_status.py
@ -114,6 +114,12 @@ def test_status_shows_most_and_least_used_sections(curator_status_env):
    env["make_skill"]("top-dog")
    env["make_skill"]("middling")
    env["make_skill"]("never-used")
+    # Mark all three as agent-created so they enter the curator's catalog.
+    # Under the provenance-marker semantics, skills must be explicitly opted
+    # into curator management (normally via the background-review fork when
+    # it creates a skill through skill_manage).
+    for n in ("top-dog", "middling", "never-used"):
+        env["skill_usage"].mark_agent_created(n)

    # Bump use_count differentially. All three counters (use/view/patch) feed
    # into activity_count, so bumping use alone is enough to make activity
@ -150,7 +156,9 @@ def test_status_hides_most_active_when_all_zero(curator_status_env):
    env = curator_status_env
    env["make_skill"]("a")
    env["make_skill"]("b")
-    # No bumps.
+    # Mark both as agent-created so the catalog lists them. No bumps.
+    env["skill_usage"].mark_agent_created("a")
+    env["skill_usage"].mark_agent_created("b")

    out = _capture_status(env["curator_cli"])

@ -167,3 +175,28 @@ def test_status_no_skills_produces_clean_empty_output(curator_status_env):
    # None of the ranking sections render
    assert "most active" not in out
    assert "least active" not in out
+
+
+def test_status_marks_missing_last_report_path(monkeypatch, capsys, tmp_path):
+    import agent.curator as curator_state
+    import hermes_cli.curator as curator_cli
+    import tools.skill_usage as skill_usage
+
+    missing_report = tmp_path / "stale-report"
+    monkeypatch.setattr(curator_state, "load_state", lambda: {
+        "paused": False,
+        "last_run_at": None,
+        "last_run_summary": "auto: no changes",
+        "run_count": 1,
+        "last_report_path": str(missing_report),
+    })
+    monkeypatch.setattr(curator_state, "is_enabled", lambda: True)
+    monkeypatch.setattr(curator_state, "get_interval_hours", lambda: 168)
+    monkeypatch.setattr(curator_state, "get_stale_after_days", lambda: 30)
+    monkeypatch.setattr(curator_state, "get_archive_after_days", lambda: 90)
+    monkeypatch.setattr(skill_usage, "agent_created_report", lambda: [])
+
+    assert curator_cli._cmd_status(SimpleNamespace()) == 0
+
+    out = capsys.readouterr().out
+    assert f"last report:    {missing_report} (missing)" in out
--- a/tests/hermes_cli/test_custom_provider_model_switch.py
+++ b/tests/hermes_cli/test_custom_provider_model_switch.py
@ -56,7 +56,6 @@ class TestCustomProviderModelSwitch:
            "sk-test",
            "https://vllm.example.com/v1",
            timeout=8.0,
-            api_mode=None,
        )

    def test_can_switch_to_different_model(self, config_home):
@ -141,12 +140,18 @@ class TestCustomProviderModelSwitch:
            "api_mode": "anthropic_messages",
        }

-        with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]), \
+        with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]) as mock_fetch, \
             patch.dict("sys.modules", {"simple_term_menu": None}), \
             patch("builtins.input", return_value="1"), \
             patch("builtins.print"):
            _model_flow_named_custom({}, provider_info)

+        mock_fetch.assert_called_once_with(
+            "***",
+            "https://proxy.example.com/anthropic",
+            timeout=8.0,
+            api_mode="anthropic_messages",
+        )
        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
        model = config.get("model")
        assert isinstance(model, dict)
@ -215,7 +220,6 @@ class TestCustomProviderModelSwitch:
            "sk-live-example-provider",
            "https://api.example-provider.test/v1",
            timeout=8.0,
-            api_mode=None,
        )
        config = yaml.safe_load(config_path.read_text()) or {}
        assert config["model"]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"
--- a/tests/hermes_cli/test_debug.py
+++ b/tests/hermes_cli/test_debug.py
@ -273,6 +273,108 @@ class TestCaptureLogSnapshot:
        assert "rotated agent data" in snap.full_text


+# ---------------------------------------------------------------------------
+# Capture log redaction (force=True applies regardless of HERMES_REDACT_SECRETS)
+# ---------------------------------------------------------------------------
+
+# A vendor-prefixed token used across redaction tests. Long enough to clear
+# the redactor's `floor` parameter so it actually masks rather than fully blanks.
+_REDACT_FIXTURE_TOKEN = "sk-proj-A1B2C3D4E5F6G7H8I9J0aA"
+
+
+class TestCaptureLogSnapshotRedaction:
+    """Pin upload-time redaction at the _capture_log_snapshot boundary."""
+
+    @pytest.fixture
+    def hermes_home_with_secret(self, tmp_path, monkeypatch):
+        """Isolated HERMES_HOME whose agent.log contains a vendor-prefixed token."""
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(home))
+        # Baseline fixture: no explicit env-var opinion. With the post-#17691
+        # default of ON, the default-path tests below exercise the
+        # secure-default behaviour. The `force=True` regression test
+        # setenvs to "false" inline to prove force=True works even when
+        # the runtime flag is disabled.
+        monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
+
+        logs_dir = home / "logs"
+        logs_dir.mkdir()
+        (logs_dir / "agent.log").write_text(
+            f"2026-04-12 17:00:00 INFO config: api_key={_REDACT_FIXTURE_TOKEN} loaded\n"
+        )
+        (logs_dir / "errors.log").write_text("")
+        (logs_dir / "gateway.log").write_text("")
+        return home
+
+    def test_default_redacts_tail_and_full_text(self, hermes_home_with_secret):
+        from hermes_cli.debug import _capture_log_snapshot
+
+        snap = _capture_log_snapshot("agent", tail_lines=10)
+
+        # Both views the upload uses must be sanitized.
+        assert _REDACT_FIXTURE_TOKEN not in snap.tail_text
+        assert snap.full_text is not None
+        assert _REDACT_FIXTURE_TOKEN not in snap.full_text
+
+    def test_redact_false_passes_through(self, hermes_home_with_secret):
+        from hermes_cli.debug import _capture_log_snapshot
+
+        snap = _capture_log_snapshot("agent", tail_lines=10, redact=False)
+
+        # Original token survives when the caller opts out.
+        assert _REDACT_FIXTURE_TOKEN in snap.tail_text
+        assert _REDACT_FIXTURE_TOKEN in (snap.full_text or "")
+
+    def test_force_true_works_when_redaction_disabled(
+        self, hermes_home_with_secret, monkeypatch
+    ):
+        """Regression test: redact_sensitive_text short-circuits without force=True.
+
+        If a future refactor drops `force=True` from `_redact_log_text`, this
+        test fails immediately. Without `force=True`, the redactor returns the
+        input unchanged when HERMES_REDACT_SECRETS=false, and the share-time
+        redaction feature ships silently broken for users who opted out of
+        runtime redaction (e.g. developers working on the redactor itself).
+        """
+        import os
+
+        # Force the runtime flag off so we're exercising the force=True path,
+        # not the default-on path.
+        monkeypatch.setenv("HERMES_REDACT_SECRETS", "false")
+
+        from hermes_cli.debug import _capture_log_snapshot
+
+        assert os.environ.get("HERMES_REDACT_SECRETS", "") == "false"
+
+        snap = _capture_log_snapshot("agent", tail_lines=10)
+
+        assert _REDACT_FIXTURE_TOKEN not in snap.tail_text
+        assert snap.full_text is not None
+        assert _REDACT_FIXTURE_TOKEN not in snap.full_text
+
+    def test_capture_default_log_snapshots_threads_redact(
+        self, hermes_home_with_secret
+    ):
+        from hermes_cli.debug import _capture_default_log_snapshots
+
+        snaps = _capture_default_log_snapshots(50)
+
+        # Default threads redact=True to all three captured logs.
+        assert _REDACT_FIXTURE_TOKEN not in snaps["agent"].tail_text
+        assert _REDACT_FIXTURE_TOKEN not in (snaps["agent"].full_text or "")
+
+    def test_capture_default_log_snapshots_no_redact_passes_through(
+        self, hermes_home_with_secret
+    ):
+        from hermes_cli.debug import _capture_default_log_snapshots
+
+        snaps = _capture_default_log_snapshots(50, redact=False)
+
+        assert _REDACT_FIXTURE_TOKEN in snaps["agent"].tail_text
+        assert _REDACT_FIXTURE_TOKEN in (snaps["agent"].full_text or "")
+
+
 # ---------------------------------------------------------------------------
 # Debug report collection
 # ---------------------------------------------------------------------------
@ -556,6 +658,124 @@ class TestRunDebugShare:
        assert "all failed" in out.err


+# ---------------------------------------------------------------------------
+# Share-time redaction wiring + visible banner
+# ---------------------------------------------------------------------------
+
+class TestRunDebugShareRedaction:
+    """End-to-end: --no-redact flag, banner injection, default behavior."""
+
+    @pytest.fixture
+    def hermes_home_with_secret(self, tmp_path, monkeypatch):
+        """Isolated HERMES_HOME whose agent.log contains a vendor-prefixed token."""
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(home))
+        monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
+
+        logs_dir = home / "logs"
+        logs_dir.mkdir()
+        (logs_dir / "agent.log").write_text(
+            f"2026-04-12 17:00:00 INFO config: api_key={_REDACT_FIXTURE_TOKEN} loaded\n"
+        )
+        (logs_dir / "errors.log").write_text("")
+        (logs_dir / "gateway.log").write_text(
+            f"2026-04-12 17:00:01 INFO gateway.run: token {_REDACT_FIXTURE_TOKEN}\n"
+        )
+        return home
+
+    def test_default_share_redacts_uploaded_content(
+        self, hermes_home_with_secret, capsys
+    ):
+        """The uploaded report and full-log pastes do not contain the raw token."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+        args.no_redact = False
+
+        captured: list[str] = []
+
+        def fake_upload(content, expiry_days=7):
+            captured.append(content)
+            return f"https://paste.rs/{len(captured)}"
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \
+             patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload):
+            run_debug_share(args)
+
+        # At least the report plus one full log paste reached the upload path.
+        assert len(captured) >= 2
+        for content in captured:
+            assert _REDACT_FIXTURE_TOKEN not in content, (
+                "raw token leaked into upload-bound content"
+            )
+
+    def test_default_share_includes_redaction_banner(
+        self, hermes_home_with_secret, capsys
+    ):
+        """Each upload-bound paste carries the visible redaction banner."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+        args.no_redact = False
+
+        captured: list[str] = []
+
+        def fake_upload(content, expiry_days=7):
+            captured.append(content)
+            return f"https://paste.rs/{len(captured)}"
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \
+             patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload):
+            run_debug_share(args)
+
+        for content in captured:
+            assert "redacted at upload time" in content, (
+                "redaction banner missing from upload-bound content"
+            )
+
+    def test_no_redact_flag_disables_redaction_and_banner(
+        self, hermes_home_with_secret, capsys
+    ):
+        """--no-redact preserves original log content and omits the banner."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+        args.no_redact = True
+
+        captured: list[str] = []
+
+        def fake_upload(content, expiry_days=7):
+            captured.append(content)
+            return f"https://paste.rs/{len(captured)}"
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \
+             patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload):
+            run_debug_share(args)
+
+        # The agent.log paste should now contain the raw token.
+        assert any(_REDACT_FIXTURE_TOKEN in c for c in captured), (
+            "expected raw token in --no-redact upload"
+        )
+        # No banner anywhere when redaction is disabled.
+        for content in captured:
+            assert "redacted at upload time" not in content, (
+                "banner present with --no-redact"
+            )
+
+
 # ---------------------------------------------------------------------------
 # run_debug router
 # ---------------------------------------------------------------------------
--- a/tests/hermes_cli/test_destructive_slash_confirm_gate.py
+++ b/tests/hermes_cli/test_destructive_slash_confirm_gate.py
@ -0,0 +1,86 @@
+"""Tests for the approvals.destructive_slash_confirm config gate.
+
+Destructive session slash commands (/clear, /new, /reset, /undo) discard
+conversation state.  This config key (default True) gates a three-option
+confirmation prompt — "Always Approve" flips the key to False so future
+destructive commands run silently.
+
+See gateway/run.py::_maybe_confirm_destructive_slash and
+cli.py::_confirm_destructive_slash for the runtime gate.
+"""
+
+from __future__ import annotations
+
+from hermes_cli.config import DEFAULT_CONFIG
+
+
+class TestDestructiveSlashConfirmDefault:
+    def test_default_config_has_the_key(self):
+        approvals = DEFAULT_CONFIG.get("approvals")
+        assert isinstance(approvals, dict)
+        assert "destructive_slash_confirm" in approvals
+
+    def test_default_is_true(self):
+        # New installs confirm by default — destructive commands must not
+        # silently wipe history without an explicit user "yes".
+        assert DEFAULT_CONFIG["approvals"]["destructive_slash_confirm"] is True
+
+    def test_shape_matches_other_approval_keys(self):
+        approvals = DEFAULT_CONFIG["approvals"]
+        assert isinstance(approvals.get("destructive_slash_confirm"), bool)
+        # Sibling key shape sanity — same flat dict level as mcp_reload_confirm.
+        assert isinstance(approvals.get("mcp_reload_confirm"), bool)
+
+
+class TestUserConfigMerge:
+    """If a user has a pre-existing config without this key, load_config
+    should fill it in from DEFAULT_CONFIG (deep merge preserves keys the
+    user didn't override)."""
+
+    def test_existing_user_config_without_key_gets_default(self, tmp_path, monkeypatch):
+        import yaml
+
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        cfg_path = home / "config.yaml"
+        legacy = {
+            "approvals": {"mode": "manual", "timeout": 60, "cron_mode": "deny"},
+        }
+        cfg_path.write_text(yaml.safe_dump(legacy))
+
+        monkeypatch.setenv("HERMES_HOME", str(home))
+        import importlib
+        import hermes_cli.config as cfg_mod
+        importlib.reload(cfg_mod)
+
+        cfg = cfg_mod.load_config()
+        assert cfg["approvals"]["destructive_slash_confirm"] is True
+
+    def test_existing_user_config_with_false_key_survives_merge(
+        self, tmp_path, monkeypatch,
+    ):
+        """A user who clicked "Always Approve" (key=false) must keep that
+        setting — the default-true value must not win on later loads.
+        """
+        import yaml
+
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        cfg_path = home / "config.yaml"
+        user_cfg = {
+            "approvals": {
+                "mode": "manual",
+                "timeout": 60,
+                "cron_mode": "deny",
+                "destructive_slash_confirm": False,
+            },
+        }
+        cfg_path.write_text(yaml.safe_dump(user_cfg))
+
+        monkeypatch.setenv("HERMES_HOME", str(home))
+        import importlib
+        import hermes_cli.config as cfg_mod
+        importlib.reload(cfg_mod)
+
+        cfg = cfg_mod.load_config()
+        assert cfg["approvals"]["destructive_slash_confirm"] is False
--- a/tests/hermes_cli/test_discord_skill_clamp_warning.py
+++ b/tests/hermes_cli/test_discord_skill_clamp_warning.py
@ -0,0 +1,246 @@
+"""Tests for Discord /skill 32-char clamp collision warnings.
+
+Discord's per-command name limit is 32 chars, so
+``discord_skill_commands_by_category`` clamps skill slugs to that width
+before deduping. When two skills share the same 32-char prefix, only
+the first (alphabetical) wins; the second is dropped. Previously the
+drop was silent — the ``hidden`` count incremented but nothing named
+which skills collided, so authors had no way to discover the drop
+short of noticing that their skill was missing from the autocomplete.
+
+This module pins the upgraded behavior: a WARNING log with both full
+cmd_keys + the clamped name, so whoever named the skills sees the
+collision and can rename one.
+"""
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from unittest.mock import patch
+
+
+def test_clamp_collision_emits_warning_naming_both_skills(
+    tmp_path: Path, caplog
+) -> None:
+    """Two skills with identical first 32 chars — warning names both."""
+    from hermes_cli.commands import discord_skill_commands_by_category
+
+    # Craft cmd_keys that share the first 32 chars.
+    # 40-char prefix 'skill-collision-prefix-identical-first-32'
+    #   -> clamped to 'skill-collision-prefix-identical'
+    prefix = "skill-collision-prefix-identical"  # exactly 32 chars
+    name_a = prefix + "-alpha"  # /skill-collision-prefix-identical-alpha
+    name_b = prefix + "-bravo"  # /skill-collision-prefix-identical-bravo
+    assert name_a[:32] == name_b[:32] == prefix
+
+    skills_dir = tmp_path / "skills"
+    for nm in (name_a, name_b):
+        d = skills_dir / "creative" / nm
+        d.mkdir(parents=True)
+        (d / "SKILL.md").write_text("---\nname: x\n---\n")
+
+    fake_cmds = {
+        f"/{name_a}": {
+            "name": name_a,
+            "description": "Alpha",
+            "skill_md_path": str(skills_dir / "creative" / name_a / "SKILL.md"),
+        },
+        f"/{name_b}": {
+            "name": name_b,
+            "description": "Bravo",
+            "skill_md_path": str(skills_dir / "creative" / name_b / "SKILL.md"),
+        },
+    }
+
+    with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
+        patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
+    ), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
+        categories, uncategorized, hidden = discord_skill_commands_by_category(
+            reserved_names=set(),
+        )
+
+    # One skill made it through, one was dropped (hidden counted).
+    assert hidden == 1
+    kept_names = [n for n, _d, _k in categories.get("creative", [])]
+    assert len(kept_names) == 1
+    # Alphabetical iteration means the -alpha variant wins the slot.
+    assert kept_names[0] == prefix  # clamped
+
+    # Exactly one warning, naming BOTH full cmd_keys and the clamped name.
+    warnings = [
+        r for r in caplog.records
+        if r.levelno == logging.WARNING and "clamp" in r.getMessage()
+    ]
+    assert len(warnings) == 1, (
+        f"expected exactly one clamp-collision warning, got {len(warnings)}: "
+        f"{[r.getMessage() for r in warnings]}"
+    )
+    msg = warnings[0].getMessage()
+    assert f"/{name_a}" in msg, f"winner not named in warning: {msg!r}"
+    assert f"/{name_b}" in msg, f"loser not named in warning: {msg!r}"
+    assert prefix in msg, f"clamped name not in warning: {msg!r}"
+
+
+def test_clamp_collision_with_reserved_name_emits_distinct_warning(
+    tmp_path: Path, caplog
+) -> None:
+    """A skill clashing with a reserved gateway command gets its own phrasing.
+
+    The reserved-vs-skill case is operationally different — the fix is
+    still "rename the skill," but there's no second skill to also
+    rename. The warning should say so explicitly.
+    """
+    from hermes_cli.commands import discord_skill_commands_by_category
+
+    # Reserved name 'help' is 4 chars — make a skill whose slug
+    # clamps to 'help' (so, exactly 'help').
+    reserved = "help"
+    skills_dir = tmp_path / "skills"
+    d = skills_dir / "creative" / reserved
+    d.mkdir(parents=True)
+    (d / "SKILL.md").write_text("---\nname: x\n---\n")
+
+    fake_cmds = {
+        f"/{reserved}": {
+            "name": reserved,
+            "description": "desc",
+            "skill_md_path": str(d / "SKILL.md"),
+        },
+    }
+
+    with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
+        patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
+    ), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
+        categories, uncategorized, hidden = discord_skill_commands_by_category(
+            reserved_names={"help"},
+        )
+
+    # Skill dropped in favor of the reserved command.
+    assert hidden == 1
+    assert categories == {}
+    assert uncategorized == []
+
+    warnings = [
+        r for r in caplog.records
+        if r.levelno == logging.WARNING and "reserved" in r.getMessage()
+    ]
+    assert len(warnings) == 1, (
+        f"expected one reserved-name collision warning, got "
+        f"{[r.getMessage() for r in warnings]}"
+    )
+    msg = warnings[0].getMessage()
+    assert f"/{reserved}" in msg
+    assert "reserved" in msg.lower()
+
+
+def test_no_collision_no_warning(tmp_path: Path, caplog) -> None:
+    """Sanity: two distinct-prefix skills produce zero warnings."""
+    from hermes_cli.commands import discord_skill_commands_by_category
+
+    skills_dir = tmp_path / "skills"
+    for nm in ("alpha", "bravo"):
+        d = skills_dir / "creative" / nm
+        d.mkdir(parents=True)
+        (d / "SKILL.md").write_text("---\nname: x\n---\n")
+
+    fake_cmds = {
+        "/alpha": {
+            "name": "alpha", "description": "",
+            "skill_md_path": str(skills_dir / "creative" / "alpha" / "SKILL.md"),
+        },
+        "/bravo": {
+            "name": "bravo", "description": "",
+            "skill_md_path": str(skills_dir / "creative" / "bravo" / "SKILL.md"),
+        },
+    }
+
+    with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
+        patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
+    ), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
+        categories, uncategorized, hidden = discord_skill_commands_by_category(
+            reserved_names=set(),
+        )
+
+    assert hidden == 0
+    assert {n for n, _d, _k in categories["creative"]} == {"alpha", "bravo"}
+    clamp_warnings = [
+        r for r in caplog.records
+        if r.levelno == logging.WARNING
+        and ("clamp" in r.getMessage() or "reserved" in r.getMessage())
+    ]
+    assert clamp_warnings == []
+
+
+def test_long_skill_name_preserves_cmd_key_through_by_category(
+    tmp_path: Path,
+) -> None:
+    """Skills with names > 32 chars must keep their original cmd_key.
+
+    ``discord_skill_commands_by_category`` clamps the display name to 32
+    chars but the third tuple element (cmd_key) must stay as the original
+    ``/full-skill-name`` so that ``_skill_handler`` dispatches via
+    ``_run_simple_slash`` with the full command, not the truncated one.
+
+    This is the actual runtime path used by the Discord adapter via
+    ``_refresh_skill_catalog_state``.
+    """
+    from hermes_cli.commands import discord_skill_commands_by_category
+
+    skills_dir = tmp_path / "skills"
+    skills_dir.mkdir()
+    resolved = str(skills_dir.resolve())
+
+    long_name = "generate-ascii-art-from-text-description-detailed"
+    cmd_key = f"/{long_name}"
+    fake_cmds = {
+        cmd_key: {
+            "name": long_name,
+            "description": "Generate ASCII art from a text description",
+            "skill_md_path": f"{resolved}/creative/{long_name}/SKILL.md",
+            "skill_dir": f"{resolved}/creative/{long_name}",
+        },
+        "/short-skill": {
+            "name": "short-skill",
+            "description": "A short skill",
+            "skill_md_path": f"{resolved}/creative/short-skill/SKILL.md",
+            "skill_dir": f"{resolved}/creative/short-skill",
+        },
+    }
+
+    with patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds), \
+         patch("tools.skills_tool.SKILLS_DIR", skills_dir):
+        categories, uncategorized, hidden = discord_skill_commands_by_category(
+            reserved_names=set(),
+        )
+
+    # Flatten (same as _refresh_skill_catalog_state does)
+    entries = list(uncategorized)
+    for cat_skills in categories.values():
+        entries.extend(cat_skills)
+
+    # Build lookup (same as _refresh_skill_catalog_state does)
+    skill_lookup = {n: (d, k) for n, d, k in entries}
+
+    # Find the long skill
+    long_entry = [e for e in entries if e[2] == cmd_key]
+    assert len(long_entry) == 1, f"Long skill should appear once, got: {long_entry}"
+
+    display_name, desc, key = long_entry[0]
+    assert len(display_name) <= 32, (
+        f"Display name should be clamped to 32 chars, got {len(display_name)}"
+    )
+    assert key == cmd_key, (
+        f"cmd_key must be the original /{long_name}, got {key!r}"
+    )
+
+    # Verify lookup works: clamped display name -> original cmd_key
+    assert display_name in skill_lookup
+    _desc, looked_up_key = skill_lookup[display_name]
+    assert looked_up_key == cmd_key, (
+        f"Lookup must map clamped name to original cmd_key, got {looked_up_key!r}"
+    )
+
+    # Short skill should also be present and correct
+    short_entry = [e for e in entries if e[2] == "/short-skill"]
+    assert len(short_entry) == 1
+    assert short_entry[0][0] == "short-skill"
--- a/tests/hermes_cli/test_doctor.py
+++ b/tests/hermes_cli/test_doctor.py
@ -51,6 +51,57 @@ class TestProviderEnvDetection:
        assert not _has_provider_env_config(content)


+class TestDoctorEnvFileEncoding:
+    """Regression for #18637 (bug 3): `hermes doctor` crashed on Windows
+    Chinese locale (GBK) because `.env` was read with Path.read_text() which
+    defaults to the system locale encoding, not UTF-8."""
+
+    def test_doctor_reads_env_as_utf8_even_when_locale_is_not_utf8(
+        self, monkeypatch, tmp_path
+    ):
+        import pathlib
+
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        # Write a UTF-8 .env containing an em dash (U+2014 = e2 80 94). The
+        # 0x94 byte is exactly the one the issue reporter hit: it's invalid
+        # as a GBK trailing byte in this position, so locale-default reads
+        # raise UnicodeDecodeError on Chinese Windows.
+        env_path = hermes_home / ".env"
+        env_path.write_text(
+            "OPENAI_API_KEY=sk-test  # em-dash here — should not crash\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setattr(doctor_mod, "HERMES_HOME", hermes_home)
+
+        orig_read_text = pathlib.Path.read_text
+
+        def gbk_like_read_text(self, encoding=None, errors=None, **kwargs):
+            # Simulate a GBK locale: refuse to decode this specific UTF-8
+            # .env unless the caller pins encoding="utf-8".
+            if self == env_path and encoding != "utf-8":
+                raise UnicodeDecodeError(
+                    "gbk", b"\x94", 0, 1, "illegal multibyte sequence"
+                )
+            return orig_read_text(self, encoding=encoding, errors=errors, **kwargs)
+
+        monkeypatch.setattr(pathlib.Path, "read_text", gbk_like_read_text)
+
+        # Short-circuit the expensive tool-availability probe — we only
+        # need doctor to reach the .env read without crashing.
+        fake_model_tools = types.SimpleNamespace(
+            check_tool_availability=lambda *a, **kw: (_ for _ in ()).throw(SystemExit(0)),
+            TOOLSET_REQUIREMENTS={},
+        )
+        monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+
+        # Run doctor. If the .env read still uses locale encoding, this
+        # raises UnicodeDecodeError and the test fails.
+        with pytest.raises(SystemExit):
+            doctor_mod.run_doctor(Namespace(fix=False))
+
+
 class TestDoctorToolAvailabilityOverrides:
    def test_marks_honcho_available_when_configured(self, monkeypatch):
        monkeypatch.setattr(doctor, "_honcho_is_configured_for_doctor", lambda: True)
@ -75,6 +126,47 @@ class TestDoctorToolAvailabilityOverrides:
        assert available == []
        assert unavailable == [honcho_entry]

+    def test_marks_kanban_available_only_when_missing_worker_env_gate(self, monkeypatch):
+        monkeypatch.setattr(doctor, "_honcho_is_configured_for_doctor", lambda: False)
+        monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False)
+
+        available, unavailable = doctor._apply_doctor_tool_availability_overrides(
+            [],
+            [{"name": "kanban", "env_vars": [], "tools": ["kanban_show"]}],
+        )
+
+        assert available == ["kanban"]
+        assert unavailable == []
+
+    def test_leaves_kanban_unavailable_when_worker_env_is_set(self, monkeypatch):
+        monkeypatch.setenv("HERMES_KANBAN_TASK", "probe")
+        kanban_entry = {"name": "kanban", "env_vars": [], "tools": ["kanban_show"]}
+
+        available, unavailable = doctor._apply_doctor_tool_availability_overrides(
+            [],
+            [kanban_entry],
+        )
+
+        assert available == []
+        assert unavailable == [kanban_entry]
+
+    def test_leaves_non_worker_kanban_failure_unavailable(self, monkeypatch):
+        monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False)
+        kanban_entry = {"name": "kanban", "env_vars": [], "tools": ["kanban_show", "not_a_kanban_tool"]}
+
+        available, unavailable = doctor._apply_doctor_tool_availability_overrides(
+            [],
+            [kanban_entry],
+        )
+
+        assert available == []
+        assert unavailable == [kanban_entry]
+
+    def test_kanban_doctor_detail_explains_worker_gate(self, monkeypatch):
+        monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False)
+
+        assert doctor._doctor_tool_availability_detail("kanban") == "(runtime-gated; loaded only for dispatcher-spawned workers)"
+

 class TestHonchoDoctorConfigDetection:
    def test_reports_configured_when_enabled_with_api_key(self, monkeypatch):
@ -286,6 +378,11 @@ def test_run_doctor_termux_treats_docker_and_browser_warnings_as_expected(monkey
    assert "1) pkg install nodejs" in out
    assert "2) npm install -g agent-browser" in out
    assert "3) agent-browser install" in out
+    assert "Termux compatibility fallbacks:" in out
+    assert "use .[termux-all] for broad compatibility" in out
+    assert "Matrix E2EE extra is excluded on Termux" in out
+    assert "Local faster-whisper extra is excluded on Termux" in out
+    assert "STT fallback: use Groq Whisper (set GROQ_API_KEY) or OpenAI Whisper (set VOICE_TOOLS_OPENAI_KEY)." in out
    assert "docker not found (optional)" not in out


@ -430,6 +527,46 @@ def test_run_doctor_accepts_hermes_provider_ids_that_catalog_aliases(
        )


+
+
+def test_run_doctor_accepts_kimi_coding_cn_provider(monkeypatch, tmp_path):
+    home = tmp_path / ".hermes"
+    home.mkdir(parents=True, exist_ok=True)
+    (home / ".env").write_text("KIMI_CN_API_KEY=***\n", encoding="utf-8")
+    (home / "config.yaml").write_text(
+        "model:\n"
+        "  provider: kimi-coding-cn\n"
+        "  default: kimi-k2.6\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+    monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", tmp_path / "project")
+    monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+    (tmp_path / "project").mkdir(exist_ok=True)
+
+    fake_model_tools = types.SimpleNamespace(
+        check_tool_availability=lambda *a, **kw: ([], []),
+        TOOLSET_REQUIREMENTS={},
+    )
+    monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+
+    try:
+        from hermes_cli import auth as _auth_mod
+        monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+        monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+        monkeypatch.setattr(_auth_mod, "get_auth_status", lambda provider: {"logged_in": True})
+    except Exception:
+        pass
+
+    buf = io.StringIO()
+    with contextlib.redirect_stdout(buf):
+        doctor_mod.run_doctor(Namespace(fix=False))
+
+    out = buf.getvalue()
+    assert "model.provider 'kimi-coding-cn' is not a recognised provider" not in out
+
+
 def test_run_doctor_termux_does_not_mark_browser_available_without_agent_browser(monkeypatch, tmp_path):
    home = tmp_path / ".hermes"
    home.mkdir(parents=True, exist_ok=True)
@ -520,6 +657,60 @@ def test_run_doctor_kimi_cn_env_is_detected_and_probe_is_null_safe(monkeypatch,
    assert any(url == "https://api.moonshot.cn/v1/models" for url, _, _ in calls)


+def test_run_doctor_dashscope_retries_china_endpoint_after_intl_unauthorized(monkeypatch, tmp_path):
+    home = tmp_path / ".hermes"
+    home.mkdir(parents=True, exist_ok=True)
+    (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
+    (home / ".env").write_text("DASHSCOPE_API_KEY=sk-test\n", encoding="utf-8")
+    project = tmp_path / "project"
+    project.mkdir(exist_ok=True)
+
+    monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+    monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+    monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+    monkeypatch.setenv("DASHSCOPE_API_KEY", "sk-test")
+    monkeypatch.delenv("DASHSCOPE_BASE_URL", raising=False)
+
+    fake_model_tools = types.SimpleNamespace(
+        check_tool_availability=lambda *a, **kw: ([], []),
+        TOOLSET_REQUIREMENTS={},
+    )
+    monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+
+    try:
+        from hermes_cli import auth as _auth_mod
+        monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+        monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+    except ImportError:
+        pass
+
+    calls = []
+
+    def fake_get(url, headers=None, timeout=None):
+        calls.append((url, headers, timeout))
+        status = 200 if "dashscope.aliyuncs.com" in url else 401
+        return types.SimpleNamespace(status_code=status)
+
+    import httpx
+    monkeypatch.setattr(httpx, "get", fake_get)
+
+    buf = io.StringIO()
+    with contextlib.redirect_stdout(buf):
+        doctor_mod.run_doctor(Namespace(fix=False))
+    out = buf.getvalue()
+
+    assert "Alibaba/DashScope" in out
+    assert "invalid API key" not in out
+    assert any(
+        url == "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models"
+        for url, _, _ in calls
+    )
+    assert any(
+        url == "https://dashscope.aliyuncs.com/compatible-mode/v1/models"
+        for url, _, _ in calls
+    )
+
+
@pytest.mark.parametrize("base_url", [None, "https://opencode.ai/zen/go/v1"])
 def test_run_doctor_opencode_go_skips_invalid_models_probe(monkeypatch, tmp_path, base_url):
    home = tmp_path / ".hermes"
@ -572,3 +763,79 @@ def test_run_doctor_opencode_go_skips_invalid_models_probe(monkeypatch, tmp_path
    )
    assert not any(url == "https://opencode.ai/zen/go/v1/models" for url, _, _ in calls)
    assert not any("opencode" in url.lower() and "models" in url.lower() for url, _, _ in calls)
+
+
+class TestGitHubTokenCheck:
+    """Tests for GitHub token / gh auth detection in doctor."""
+
+    def test_no_token_and_not_gh_authenticated_shows_warn(self, monkeypatch, tmp_path):
+        home = tmp_path / ".hermes"
+        home.mkdir(parents=True, exist_ok=True)
+        monkeypatch.setenv("HERMES_HOME", str(home))
+        monkeypatch.setenv("PATH", "/nonexistent")  # gh not found
+
+        from hermes_cli.doctor import run_doctor, _DHH
+        import io, contextlib
+
+        buf = io.StringIO()
+        with contextlib.redirect_stdout(buf):
+            run_doctor(Namespace(fix=False))
+        out = buf.getvalue()
+
+        assert "No GITHUB_TOKEN" in out
+        assert "60 req/hr" in out
+
+    def test_token_env_present_shows_ok(self, monkeypatch, tmp_path):
+        home = tmp_path / ".hermes"
+        home.mkdir(parents=True, exist_ok=True)
+        monkeypatch.setenv("HERMES_HOME", str(home))
+        monkeypatch.setenv("GITHUB_TOKEN", "ghp_test123")
+        monkeypatch.setenv("PATH", "/nonexistent")  # gh not found
+
+        from hermes_cli.doctor import run_doctor
+        import io, contextlib
+
+        buf = io.StringIO()
+        with contextlib.redirect_stdout(buf):
+            run_doctor(Namespace(fix=False))
+        out = buf.getvalue()
+
+        assert "GitHub token configured" in out
+
+    def test_gh_authenticated_without_env_token_shows_ok(self, monkeypatch, tmp_path):
+        home = tmp_path / ".hermes"
+        home.mkdir(parents=True, exist_ok=True)
+        monkeypatch.setenv("HERMES_HOME", str(home))
+        # No GITHUB_TOKEN or GH_TOKEN
+        monkeypatch.delenv("GITHUB_TOKEN", raising=False)
+        monkeypatch.delenv("GH_TOKEN", raising=False)
+
+        # Mock gh to return success
+        import shutil
+        real_which = shutil.which
+        def mock_which(cmd):
+            return "/usr/local/bin/gh" if cmd == "gh" else real_which(cmd)
+        monkeypatch.setattr(shutil, "which", mock_which)
+
+        call_log = []
+        def mock_run(cmd, **kwargs):
+            call_log.append(cmd)
+            if cmd[:2] == ["gh", "auth"]:
+                result = types.SimpleNamespace(returncode=0, stdout="", stderr="")
+            else:
+                result = types.SimpleNamespace(returncode=1, stdout="", stderr="")
+            return result
+
+        import subprocess
+        monkeypatch.setattr(subprocess, "run", mock_run)
+
+        from hermes_cli.doctor import run_doctor
+        import io, contextlib
+
+        buf = io.StringIO()
+        with contextlib.redirect_stdout(buf):
+            run_doctor(Namespace(fix=False))
+        out = buf.getvalue()
+
+        assert "gh auth" in str(call_log) or any(c[0] == "gh" for c in call_log), f"gh not called: {call_log}"
+        assert "GitHub authenticated via gh CLI" in out or "token configured" in out
--- a/tests/hermes_cli/test_doctor_dedicated_provider_skip.py
+++ b/tests/hermes_cli/test_doctor_dedicated_provider_skip.py
@ -0,0 +1,50 @@
+"""Regression: hermes doctor must not run a generic Bearer-auth health
+check for providers that already have a dedicated check (Anthropic,
+OpenRouter, Bedrock).
+
+Anthropic's native API requires `x-api-key` + `anthropic-version` headers;
+the generic loop sends `Authorization: Bearer ...` which Anthropic answers
+with HTTP 404. The dedicated check at hermes_cli/doctor.py already covers
+Anthropic with the right headers, so the pluggable profile must be
+skipped by `_build_apikey_providers_list()`.
+
+See: NousResearch/hermes-agent#22346
+"""
+
+from __future__ import annotations
+
+
+def test_build_apikey_providers_list_skips_dedicated_check_providers():
+    from hermes_cli import doctor
+
+    # Force a rebuild — the module caches the list on first call.
+    doctor._APIKEY_PROVIDERS_CACHE = None
+    entries = doctor._build_apikey_providers_list()
+
+    # Tuple shape: (display_name, env_vars, default_url, base_env, supports_health_check)
+    names = {entry[0].lower() for entry in entries}
+    assert not any("anthropic" in name for name in names), (
+        f"Anthropic provider profile leaked into generic Bearer-auth health "
+        f"check loop. Dedicated check above already covers it with "
+        f"x-api-key headers. Got entries: {sorted(names)}"
+    )
+    assert not any("openrouter" in name for name in names), (
+        f"OpenRouter has a dedicated check; generic loop must skip it. "
+        f"Got: {sorted(names)}"
+    )
+    assert not any("bedrock" in name for name in names), (
+        f"Bedrock uses AWS SDK creds, not Bearer auth; generic loop must skip. "
+        f"Got: {sorted(names)}"
+    )
+
+
+def test_build_apikey_providers_list_includes_non_dedicated_providers():
+    """Sanity guard: the skip-set must not strip every provider."""
+    from hermes_cli import doctor
+
+    doctor._APIKEY_PROVIDERS_CACHE = None
+    entries = doctor._build_apikey_providers_list()
+
+    names = {entry[0] for entry in entries}
+    assert "DeepSeek" in names
+    assert "Z.AI / GLM" in names
--- a/tests/hermes_cli/test_env_loader.py
+++ b/tests/hermes_cli/test_env_loader.py
@ -37,7 +37,7 @@ def test_project_env_is_sanitized_before_loading(tmp_path, monkeypatch):
    home = tmp_path / "hermes"
    project_env = tmp_path / ".env"
    project_env.write_text(
-        "TELEGRAM_BOT_TOKEN=8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
+        "TELEGRAM_BOT_TOKEN=0123456789:test"
        "ANTHROPIC_API_KEY=sk-ant-test123\n",
        encoding="utf-8",
    )
@ -48,7 +48,7 @@ def test_project_env_is_sanitized_before_loading(tmp_path, monkeypatch):
    loaded = load_hermes_dotenv(hermes_home=home, project_env=project_env)

    assert loaded == [project_env]
-    assert os.getenv("TELEGRAM_BOT_TOKEN") == "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
+    assert os.getenv("TELEGRAM_BOT_TOKEN") == "0123456789:test"
    assert os.getenv("ANTHROPIC_API_KEY") == "sk-ant-test123"


--- a/tests/hermes_cli/test_env_sanitize_on_load.py
+++ b/tests/hermes_cli/test_env_sanitize_on_load.py
@ -14,7 +14,7 @@ def test_load_env_sanitizes_concatenated_lines():
    """
    from hermes_cli.config import load_env

-    token = "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
+    token = "0123456789:test"
    # Simulate concatenated line: TOKEN=xxx followed immediately by another key
    corrupted = f"TELEGRAM_BOT_TOKEN={token}ANTHROPIC_API_KEY=sk-ant-test123\n"

@ -67,7 +67,7 @@ def test_env_loader_sanitizes_before_dotenv():
    """Verify env_loader._sanitize_env_file_if_needed fixes corrupted files."""
    from hermes_cli.env_loader import _sanitize_env_file_if_needed

-    token = "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
+    token = "0123456789:test"
    corrupted = f"TELEGRAM_BOT_TOKEN={token}ANTHROPIC_API_KEY=sk-ant-test\n"

    with tempfile.NamedTemporaryFile(
--- a/tests/hermes_cli/test_gateway.py
+++ b/tests/hermes_cli/test_gateway.py
@ -13,6 +13,21 @@ def _install_fake_gateway_run(monkeypatch, start_gateway):
    module = ModuleType("gateway.run")
    module.start_gateway = start_gateway
    monkeypatch.setitem(sys.modules, "gateway.run", module)
+    # ``run_gateway()`` calls ``refresh_systemd_unit_if_needed()`` on every
+    # invocation so that restart settings stay current after exit-code-75
+    # respawns. That helper writes to ``Path.home() / ".config/systemd/user
+    # /hermes-gateway.service"`` and runs ``systemctl --user daemon-reload``
+    # — both target the *real* user environment because the conftest only
+    # sandboxes ``HERMES_HOME``, not ``HOME``. Tests that drive
+    # ``run_gateway()`` end-to-end with a fake ``start_gateway`` MUST stub
+    # the refresh call too, or every run rewrites the developer's installed
+    # unit (baking in the test's pytest-tmp ``HERMES_HOME`` value, which
+    # systemd then uses on the next boot — silently breaking the gateway
+    # for the developer).
+    monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
+    monkeypatch.setattr(
+        gateway, "refresh_systemd_unit_if_needed", lambda system=False: False
+    )


 def test_run_gateway_exits_cleanly_on_keyboard_interrupt(monkeypatch, capsys):
@ -53,6 +68,103 @@ def test_run_gateway_exits_nonzero_when_start_gateway_reports_failure(monkeypatc
    assert calls == [(True, None)]


+def test_run_gateway_refuses_root_in_official_docker(monkeypatch, tmp_path, capsys):
+    project_root = tmp_path / "opt" / "hermes"
+    (project_root / "docker").mkdir(parents=True)
+    (project_root / "docker" / "entrypoint.sh").write_text("#!/bin/sh\n")
+
+    monkeypatch.setattr(gateway, "PROJECT_ROOT", project_root)
+    monkeypatch.setattr(gateway.os, "geteuid", lambda: 0)
+    monkeypatch.delenv("HERMES_ALLOW_ROOT_GATEWAY", raising=False)
+    monkeypatch.setattr(gateway, "_is_official_docker_checkout", lambda: True)
+
+    with pytest.raises(SystemExit) as exc_info:
+        gateway.run_gateway()
+
+    assert exc_info.value.code == 1
+    out = capsys.readouterr().out
+    assert "Refusing to run the Hermes gateway as root" in out
+    assert "/opt/hermes/docker/entrypoint.sh" in out
+
+
+def test_run_gateway_root_guard_has_escape_hatch(monkeypatch):
+    calls = []
+
+    def fake_start_gateway(*, replace, verbosity):
+        calls.append((replace, verbosity))
+        return object()
+
+    _install_fake_gateway_run(monkeypatch, fake_start_gateway)
+    monkeypatch.setattr(gateway.asyncio, "run", lambda coro: True)
+    monkeypatch.setattr(gateway.os, "geteuid", lambda: 0)
+    monkeypatch.setattr(gateway, "_is_official_docker_checkout", lambda: True)
+    monkeypatch.setenv("HERMES_ALLOW_ROOT_GATEWAY", "1")
+
+    gateway.run_gateway(verbose=2, replace=True)
+
+    assert calls == [(True, 2)]
+
+
+def test_run_gateway_windows_foreground_keeps_ctrl_c_enabled(monkeypatch):
+    calls = []
+
+    def fake_start_gateway(*, replace, verbosity):
+        calls.append((replace, verbosity))
+        return object()
+
+    class _TTY:
+        def isatty(self):
+            return True
+
+    signal_calls = []
+
+    def fake_signal(sig, handler):
+        signal_calls.append((sig, handler))
+
+    _install_fake_gateway_run(monkeypatch, fake_start_gateway)
+    monkeypatch.setattr(gateway, "is_windows", lambda: True)
+    monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
+    monkeypatch.setattr(gateway.sys, "stdin", _TTY())
+    monkeypatch.delenv("HERMES_GATEWAY_DETACHED", raising=False)
+    monkeypatch.setattr(gateway.signal, "signal", fake_signal)
+    monkeypatch.setattr(gateway.asyncio, "run", lambda coro: True)
+
+    gateway.run_gateway()
+
+    assert calls == [(False, 0)]
+    assert (gateway.signal.SIGINT, gateway.signal.SIG_IGN) not in signal_calls
+
+
+def test_run_gateway_windows_detached_absorbs_console_controls(monkeypatch):
+    calls = []
+
+    def fake_start_gateway(*, replace, verbosity):
+        calls.append((replace, verbosity))
+        return object()
+
+    class _TTY:
+        def isatty(self):
+            return True
+
+    signal_calls = []
+
+    def fake_signal(sig, handler):
+        signal_calls.append((sig, handler))
+
+    _install_fake_gateway_run(monkeypatch, fake_start_gateway)
+    monkeypatch.setattr(gateway, "is_windows", lambda: True)
+    monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
+    monkeypatch.setattr(gateway.sys, "stdin", _TTY())
+    monkeypatch.setenv("HERMES_GATEWAY_DETACHED", "1")
+    monkeypatch.setattr(gateway.signal, "signal", fake_signal)
+    monkeypatch.setattr(gateway.asyncio, "run", lambda coro: True)
+
+    gateway.run_gateway()
+
+    assert calls == [(False, 0)]
+    assert (gateway.signal.SIGINT, gateway.signal.SIG_IGN) in signal_calls
+
+
 class TestSystemdLingerStatus:
    def test_reports_enabled(self, monkeypatch):
        monkeypatch.setattr(gateway, "is_linux", lambda: True)
@ -307,9 +419,22 @@ def test_find_gateway_pids_falls_back_to_pid_file_when_process_scan_fails(monkey
    monkeypatch.setattr(gateway, "is_windows", lambda: False)
    monkeypatch.setattr("gateway.status.get_running_pid", lambda: 321)

+    # /proc walk is the first path tried (#22693). Force os.listdir on /proc
+    # to raise so the function falls back to ps, where fake_run takes over.
+    _real_listdir = gateway.os.listdir
+    def _no_proc_listdir(path):
+        if path == "/proc":
+            raise OSError("test stub: /proc unavailable")
+        return _real_listdir(path)
+    monkeypatch.setattr(gateway.os, "listdir", _no_proc_listdir)
+
    def fake_run(cmd, **kwargs):
        if cmd[:4] == ["ps", "-A", "eww", "-o"]:
            return SimpleNamespace(returncode=1, stdout="", stderr="ps failed")
+        if cmd[:3] == ["ps", "-o", "ppid="]:
+            # _get_ancestor_pids() walks up the tree; return "no parent" so
+            # the loop terminates cleanly.
+            return SimpleNamespace(returncode=1, stdout="", stderr="")
        raise AssertionError(f"Unexpected command: {cmd}")

    monkeypatch.setattr(gateway.subprocess, "run", fake_run)
@ -409,14 +534,21 @@ class TestWaitForGatewayExit:

 class TestStopProfileGateway:
    def test_stop_profile_gateway_keeps_pid_file_when_process_still_running(self, monkeypatch):
-        calls = {"kill": 0, "remove": 0}
+        calls = {"kill": 0, "alive_probes": 0, "remove": 0}

        monkeypatch.setattr("gateway.status.get_running_pid", lambda: 12345)
+        # Post-#21561: the stop loop sends one SIGTERM via ``os.kill`` then
+        # polls liveness via ``gateway.status._pid_exists`` (safe on
+        # Windows — bpo-14484). Instrument both seams separately.
        monkeypatch.setattr(
            gateway.os,
            "kill",
            lambda pid, sig: calls.__setitem__("kill", calls["kill"] + 1),
        )
+        monkeypatch.setattr(
+            "gateway.status._pid_exists",
+            lambda pid: calls.__setitem__("alive_probes", calls["alive_probes"] + 1) or True,
+        )
        monkeypatch.setattr("time.sleep", lambda _: None)
        monkeypatch.setattr(
            "gateway.status.remove_pid_file",
@ -424,5 +556,6 @@ class TestStopProfileGateway:
        )

        assert gateway.stop_profile_gateway() is True
-        assert calls["kill"] == 21
+        assert calls["kill"] == 1          # one SIGTERM
+        assert calls["alive_probes"] == 20 # 20 liveness polls over the 2s window
        assert calls["remove"] == 0
--- a/tests/hermes_cli/test_gateway_proc_fallback.py
+++ b/tests/hermes_cli/test_gateway_proc_fallback.py
@ -0,0 +1,138 @@
+"""Tests for /proc-based gateway PID detection in Docker environments.
+
+Verifies that _scan_gateway_pids() uses /proc/*/cmdline when available
+(Docker without procps) and falls back to ps only when /proc is absent.
+
+See: NousResearch/hermes-agent#7622
+"""
+
+import os
+from unittest.mock import MagicMock, patch
+
+import hermes_cli.gateway as gateway_mod
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+_GATEWAY_CMD = "python -m hermes_cli.main gateway run"
+_OTHER_CMD = "python -m some_other_thing"
+
+
+def _fake_proc_dir(entries: dict):
+    """Return side_effects that simulate /proc: isdir → True, listdir → pids,
+    open(cmdline) → null-delimited command bytes."""
+    def _isdir(path):
+        return str(path) == "/proc"
+
+    def _listdir(path):
+        if str(path) == "/proc":
+            return [str(pid) for pid in entries] + ["self", "version"]
+        raise FileNotFoundError(path)
+
+    def _open(path, mode="r", **kwargs):
+        path_str = str(path)
+        if "/cmdline" in path_str:
+            pid = int(path_str.split("/proc/")[1].split("/")[0])
+            raw = entries.get(pid, "").encode("utf-8").replace(b" ", b"\x00")
+            m = MagicMock()
+            m.read.return_value = raw
+            m.__enter__ = lambda s: s
+            m.__exit__ = MagicMock(return_value=False)
+            return m
+        raise FileNotFoundError(path)
+
+    return _isdir, _listdir, _open
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestProcFallback:
+    """_scan_gateway_pids reads /proc when available, skips ps."""
+
+    def test_detects_gateway_pid_via_proc(self):
+        my_pid = os.getpid()
+        entries = {
+            my_pid: "python -m hermes_cli.main",   # own process — excluded
+            12345: _GATEWAY_CMD,
+            99999: _OTHER_CMD,
+        }
+        _isdir, _listdir, _open = _fake_proc_dir(entries)
+
+        with (
+            patch("hermes_cli.gateway.is_windows", return_value=False),
+            patch("os.path.isdir", side_effect=_isdir),
+            patch("os.listdir", side_effect=_listdir),
+            patch("builtins.open", side_effect=_open),
+            patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
+            patch("subprocess.run") as mock_ps,
+        ):
+            pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
+
+        assert 12345 in pids
+        assert 99999 not in pids
+        mock_ps.assert_not_called()  # ps must NOT be called when /proc worked
+
+    def test_excludes_own_pid_from_proc_scan(self):
+        my_pid = os.getpid()
+        entries = {my_pid: _GATEWAY_CMD}
+        _isdir, _listdir, _open = _fake_proc_dir(entries)
+
+        with (
+            patch("hermes_cli.gateway.is_windows", return_value=False),
+            patch("os.path.isdir", side_effect=_isdir),
+            patch("os.listdir", side_effect=_listdir),
+            patch("builtins.open", side_effect=_open),
+            patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
+            patch("subprocess.run"),
+        ):
+            pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
+
+        assert my_pid not in pids
+
+    def test_falls_back_to_ps_when_proc_absent(self):
+        ps_output = f"12345 {_GATEWAY_CMD}\n99999 {_OTHER_CMD}\n"
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.stdout = ps_output
+
+        with (
+            patch("hermes_cli.gateway.is_windows", return_value=False),
+            patch("os.path.isdir", return_value=False),
+            patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
+            patch("subprocess.run", return_value=mock_result) as mock_ps,
+        ):
+            pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
+
+        mock_ps.assert_called_once()
+        assert 12345 in pids
+
+    def test_proc_permission_error_skips_pid(self):
+        def _isdir(path):
+            return str(path) == "/proc"
+
+        def _listdir(path):
+            if str(path) == "/proc":
+                return ["12345", "self"]
+            raise FileNotFoundError
+
+        def _open(path, mode="r", **kwargs):
+            raise PermissionError("no access")
+
+        with (
+            patch("hermes_cli.gateway.is_windows", return_value=False),
+            patch("os.path.isdir", side_effect=_isdir),
+            patch("os.listdir", side_effect=_listdir),
+            patch("builtins.open", side_effect=_open),
+            patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
+            patch("subprocess.run") as mock_ps,
+        ):
+            pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
+
+        # PermissionError swallowed — empty result, no crash
+        assert 12345 not in pids
+        mock_ps.assert_not_called()  # /proc dir existed, so ps not called
--- a/tests/hermes_cli/test_gateway_service.py
+++ b/tests/hermes_cli/test_gateway_service.py
@ -1,13 +1,16 @@
 """Tests for gateway service management helpers."""

 import os
-import pwd
+import subprocess
 from pathlib import Path
 from types import SimpleNamespace

 import pytest

+pwd = pytest.importorskip("pwd")
+
 import hermes_cli.gateway as gateway_cli
+from gateway import status
 from gateway.restart import (
    DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
    GATEWAY_SERVICE_RESTART_EXIT_CODE,
@ -89,6 +92,13 @@ class TestSystemdServiceRefresh:
        monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")

        calls = []
+        monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
+        monkeypatch.setattr(gateway_cli, "_recover_pending_systemd_restart", lambda system=False, previous_pid=None: False)
+        monkeypatch.setattr(
+            gateway_cli,
+            "_wait_for_systemd_service_restart",
+            lambda system=False, previous_pid=None: calls.append(("wait", system, previous_pid)) or True,
+        )

        def fake_run(cmd, check=True, **kwargs):
            calls.append(cmd)
@ -99,16 +109,218 @@ class TestSystemdServiceRefresh:
        gateway_cli.systemd_restart()

        assert unit_path.read_text(encoding="utf-8") == "new unit\n"
-        assert calls[:4] == [
+        assert calls[:5] == [
            ["systemctl", "--user", "daemon-reload"],
-            ["systemctl", "--user", "show", gateway_cli.get_service_name(), "--no-pager", "--property", "ActiveState,SubState,Result,ExecMainStatus"],
+            ["systemctl", "--user", "show", gateway_cli.get_service_name(), "--no-pager", "--property", "ActiveState,SubState,Result,ExecMainStatus,MainPID"],
            ["systemctl", "--user", "reset-failed", gateway_cli.get_service_name()],
-            ["systemctl", "--user", "reload-or-restart", gateway_cli.get_service_name()],
+            ["systemctl", "--user", "restart", gateway_cli.get_service_name()],
+            ("wait", False, None),
        ]

+    def test_systemd_stop_marks_running_gateway_as_planned_stop(self, monkeypatch):
+        calls = []
+        markers = []
+
+        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
+        monkeypatch.setattr(status, "get_running_pid", lambda cleanup_stale=True: 321)
+        monkeypatch.setattr(
+            status,
+            "write_planned_stop_marker",
+            lambda pid: markers.append(pid) or True,
+        )
+
+        def fake_run_systemctl(args, **kwargs):
+            calls.append(args)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
+
+        gateway_cli.systemd_stop()
+
+        assert markers == [321]
+        assert calls == [["stop", gateway_cli.get_service_name()]]
+
+    def test_systemd_stop_timeout_prints_status_guidance(self, monkeypatch, capsys):
+        markers = []
+
+        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
+        monkeypatch.setattr(status, "get_running_pid", lambda cleanup_stale=True: 321)
+        monkeypatch.setattr(
+            status,
+            "write_planned_stop_marker",
+            lambda pid: markers.append(pid) or True,
+        )
+
+        def fake_run_systemctl(args, **kwargs):
+            raise subprocess.TimeoutExpired(args, kwargs.get("timeout"))
+
+        monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
+
+        gateway_cli.systemd_stop()
+
+        assert markers == [321]
+        output = capsys.readouterr().out
+        assert "still stopping after 90s" in output
+        assert "hermes gateway status" in output
+
+    def test_systemd_restart_timeout_prints_status_guidance(self, monkeypatch, capsys):
+        """`hermes gateway restart` must not surface a raw TimeoutExpired traceback.
+
+        The dashboard spawns `hermes gateway restart` in the background; when a
+        wedged adapter websocket pushes drain past the 90s CLI timeout, the
+        dashboard would previously show a Python traceback (issue #19937
+        follow-up: the same failure mode applies to restart, not just stop).
+        """
+        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
+        monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda: None)
+        monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
+        monkeypatch.setattr(status, "get_running_pid", lambda cleanup_stale=True: None)
+        monkeypatch.setattr(gateway_cli, "_systemd_main_pid", lambda system=False: None)
+        monkeypatch.setattr(
+            gateway_cli,
+            "_recover_pending_systemd_restart",
+            lambda system=False, previous_pid=None: False,
+        )
+        monkeypatch.setattr(
+            gateway_cli,
+            "_systemd_service_is_start_limited",
+            lambda system=False: False,
+        )
+
+        def fake_run_systemctl(args, **kwargs):
+            # reset-failed is a pre-step (check=False, 30s) — let it pass.
+            if args and args[0] == "reset-failed":
+                return SimpleNamespace(returncode=0, stdout="", stderr="")
+            raise subprocess.TimeoutExpired(args, kwargs.get("timeout"))
+
+        monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
+
+        gateway_cli.systemd_restart()
+
+        output = capsys.readouterr().out
+        assert "still restarting after 90s" in output
+        assert "hermes gateway status" in output
+
+    def test_run_gateway_refreshes_outdated_unit_on_boot(self, tmp_path, monkeypatch):
+        """run_gateway() should refresh the systemd unit on boot so that
+        restart settings take effect even when the process was respawned
+        via exit-code-75 (bypassing `hermes gateway restart`)."""
+        unit_path = tmp_path / "hermes-gateway.service"
+        unit_path.write_text("old unit\n", encoding="utf-8")
+
+        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
+        monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+
+        calls = []
+
+        def fake_run(cmd, check=True, **kwargs):
+            calls.append(cmd)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        # Prevent run_gateway from actually starting the gateway
+        async def fake_start_gateway(**kwargs):
+            return True
+
+        monkeypatch.setattr("gateway.run.start_gateway", fake_start_gateway)
+
+        gateway_cli.run_gateway()
+
+        assert unit_path.read_text(encoding="utf-8") == "new unit\n"
+        assert ["systemctl", "--user", "daemon-reload"] in calls
+
+    def test_refresh_refuses_to_bake_pytest_tmpdir_into_real_user_unit(
+        self, tmp_path, monkeypatch
+    ):
+        """Defense in depth: ``refresh_systemd_unit_if_needed()`` runs every
+        time ``run_gateway()`` starts. The user-scope unit path resolves
+        under ``Path.home()`` (NOT sandboxed by conftest), and
+        ``generate_systemd_unit()`` bakes ``HERMES_HOME`` into the unit's
+        ``Environment=`` line. Without this guard, any test that drives
+        ``run_gateway()`` end-to-end on a real Linux dev box silently
+        rewrites the developer's installed gateway unit with a
+        ``/tmp/pytest-of-.../hermes_test`` HERMES_HOME — silently breaking
+        their gateway on the next boot. The guard sniffs the generated
+        unit body for tmpdir markers and refuses the write. Tests that
+        legitimately exercise the refresh flow patch
+        ``generate_systemd_unit`` to return synthetic content that doesn't
+        carry those markers.
+        """
+        unit_path = tmp_path / "hermes-gateway.service"
+        unit_path.write_text("old unit\n", encoding="utf-8")
+
+        monkeypatch.setattr(
+            gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path
+        )
+        # Realistic generated unit referencing a pytest tmpdir HERMES_HOME
+        polluted_unit = (
+            "[Service]\n"
+            'Environment="HERMES_HOME=/tmp/pytest-of-alice/pytest-42/'
+            'popen-gw0/test_x/hermes_test"\n'
+        )
+        monkeypatch.setattr(
+            gateway_cli,
+            "generate_systemd_unit",
+            lambda system=False, run_as_user=None: polluted_unit,
+        )
+
+        # If the guard fails, daemon-reload would be called — record it.
+        ran = []
+
+        def fake_run(cmd, check=True, **kwargs):
+            ran.append(cmd)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        result = gateway_cli.refresh_systemd_unit_if_needed(system=False)
+
+        assert result is False, "refresh should refuse to write a polluted unit"
+        assert (
+            unit_path.read_text(encoding="utf-8") == "old unit\n"
+        ), "installed unit must be left untouched"
+        assert not any(
+            "daemon-reload" in str(c) for c in ran
+        ), "daemon-reload must not run when write was refused"
+
+
+class TestRequireServiceInstalled:
+    def test_exits_with_install_hint_when_unit_missing(self, tmp_path, monkeypatch, capsys):
+        unit_path = tmp_path / "hermes-gateway.service"
+        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
+
+        with pytest.raises(SystemExit) as exc_info:
+            gateway_cli._require_service_installed("start")
+
+        assert exc_info.value.code == 1
+        out = capsys.readouterr().out
+        assert "not installed" in out
+        assert "hermes gateway install" in out
+
+    def test_passes_when_unit_exists(self, tmp_path, monkeypatch):
+        unit_path = tmp_path / "hermes-gateway.service"
+        unit_path.write_text("[Unit]\n", encoding="utf-8")
+        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
+
+        gateway_cli._require_service_installed("start")
+

 class TestGeneratedSystemdUnits:
-    def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self):
+    def _expected_timeout_stop_sec(self) -> str:
+        timeout = int(max(60, DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT) + 30)
+        return f"TimeoutStopSec={timeout}"
+
+    def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self, monkeypatch):
+        monkeypatch.setattr(
+            gateway_cli,
+            "_get_restart_drain_timeout",
+            lambda: DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
+        )
        unit = gateway_cli.generate_systemd_unit(system=False)

        assert "ExecStart=" in unit
@ -118,7 +330,7 @@ class TestGeneratedSystemdUnits:
        # TimeoutStopSec must exceed the default drain_timeout (60s) so
        # systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
        # (tool subprocess kill, adapter disconnect) runs — issue #8202.
-        assert "TimeoutStopSec=90" in unit
+        assert self._expected_timeout_stop_sec() in unit

    def test_user_unit_includes_resolved_node_directory_in_path(self, monkeypatch):
        monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: "/home/test/.nvm/versions/node/v24.14.0/bin/node" if cmd == "node" else None)
@ -127,7 +339,49 @@ class TestGeneratedSystemdUnits:

        assert "/home/test/.nvm/versions/node/v24.14.0/bin" in unit

-    def test_system_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self):
+    def test_user_unit_includes_wsl_windows_interop_paths(self, monkeypatch):
+        monkeypatch.setattr(gateway_cli, "is_wsl", lambda: True)
+        monkeypatch.setenv(
+            "PATH",
+            "/usr/local/bin:/mnt/c/WINDOWS/system32:/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/",
+        )
+        monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: None)
+
+        unit = gateway_cli.generate_systemd_unit(system=False)
+
+        assert "/mnt/c/WINDOWS/system32" in unit
+        assert "/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/" in unit
+
+    def test_user_unit_omits_windows_interop_paths_outside_wsl(self, monkeypatch):
+        monkeypatch.setattr(gateway_cli, "is_wsl", lambda: False)
+        monkeypatch.setenv("PATH", "/usr/local/bin:/mnt/c/WINDOWS/system32")
+        monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: None)
+
+        unit = gateway_cli.generate_systemd_unit(system=False)
+
+        assert "/mnt/c/WINDOWS/system32" not in unit
+
+    def test_system_unit_includes_wsl_windows_interop_paths(self, monkeypatch):
+        monkeypatch.setattr(gateway_cli, "is_wsl", lambda: True)
+        monkeypatch.setattr(
+            gateway_cli,
+            "_system_service_identity",
+            lambda run_as_user=None: ("alice", "alice", "/home/alice"),
+        )
+        monkeypatch.setattr(gateway_cli, "_hermes_home_for_target_user", lambda home: "/home/alice/.hermes")
+        monkeypatch.setenv("PATH", "/usr/local/bin:/mnt/c/WINDOWS/system32")
+        monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: None)
+
+        unit = gateway_cli.generate_systemd_unit(system=True, run_as_user="alice")
+
+        assert "/mnt/c/WINDOWS/system32" in unit
+
+    def test_system_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self, monkeypatch):
+        monkeypatch.setattr(
+            gateway_cli,
+            "_get_restart_drain_timeout",
+            lambda: DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
+        )
        unit = gateway_cli.generate_systemd_unit(system=True)

        assert "ExecStart=" in unit
@ -137,7 +391,7 @@ class TestGeneratedSystemdUnits:
        # TimeoutStopSec must exceed the default drain_timeout (60s) so
        # systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
        # (tool subprocess kill, adapter disconnect) runs — issue #8202.
-        assert "TimeoutStopSec=90" in unit
+        assert self._expected_timeout_stop_sec() in unit
        assert "WantedBy=multi-user.target" in unit


@ -483,64 +737,145 @@ class TestGatewayServiceDetection:
        assert gateway_cli._is_service_running() is False

 class TestGatewaySystemServiceRouting:
-    def test_systemd_restart_self_requests_graceful_restart_and_waits(self, monkeypatch, capsys):
+    def test_systemd_restart_gracefully_restarts_running_service_and_waits(self, monkeypatch, capsys):
        calls = []

        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
        monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: calls.append(("refresh", system)))
+        monkeypatch.setattr(gateway_cli, "_get_restart_drain_timeout", lambda: 12.0)
        monkeypatch.setattr(
            "gateway.status.get_running_pid",
            lambda: 654,
        )
        monkeypatch.setattr(
            gateway_cli,
-            "_request_gateway_self_restart",
-            lambda pid: calls.append(("self", pid)) or True,
+            "_graceful_restart_via_sigusr1",
+            lambda pid, timeout: calls.append(("graceful", pid, timeout)) or True,
        )

-        # Simulate: old process dies immediately, new process becomes active
-        kill_call_count = [0]
-        def fake_kill(pid, sig):
-            kill_call_count[0] += 1
-            if kill_call_count[0] >= 2:  # first call checks, second = dead
-                raise ProcessLookupError()
-        monkeypatch.setattr(os, "kill", fake_kill)
-
-        # Simulate systemctl reset-failed/start followed by an active unit
-        new_pid = [None]
+        # Simulate systemctl reset-failed/restart followed by an active unit.
+        # A plain start does not break systemd's auto-restart timer once the
+        # old gateway has exited with the planned restart code.
        def fake_subprocess_run(cmd, **kwargs):
            if "reset-failed" in cmd:
                calls.append(("reset-failed", cmd))
                return SimpleNamespace(stdout="", returncode=0)
-            if "start" in cmd:
-                calls.append(("start", cmd))
+            if "restart" in cmd:
+                calls.append(("restart", cmd))
                return SimpleNamespace(stdout="", returncode=0)
-            if "show" in cmd:
-                new_pid[0] = 999
-                return SimpleNamespace(
-                    stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
-                    returncode=0,
-                )
            raise AssertionError(f"Unexpected systemctl call: {cmd}")

        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_subprocess_run)
-        # get_running_pid returns new PID after restart
-        pid_calls = [0]
-        def fake_get_pid():
-            pid_calls[0] += 1
-            return 999 if pid_calls[0] > 1 else 654
-        monkeypatch.setattr("gateway.status.get_running_pid", fake_get_pid)
+        monkeypatch.setattr(
+            gateway_cli,
+            "_wait_for_systemd_service_restart",
+            lambda system=False, previous_pid=None: calls.append(("wait", system, previous_pid)) or True,
+        )

        gateway_cli.systemd_restart()

-        assert ("self", 654) in calls
+        assert ("graceful", 654, 17.0) in calls
        assert any(call[0] == "reset-failed" for call in calls)
-        assert any(call[0] == "start" for call in calls)
+        assert any(call[0] == "restart" for call in calls)
+        assert ("wait", False, 654) in calls
        out = capsys.readouterr().out.lower()
-        assert "restarted" in out
+        assert "restarting gracefully" in out
+
+    def test_systemd_restart_uses_systemd_main_pid_when_pid_file_is_missing(self, monkeypatch, capsys):
+        calls = []
+
+        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
+        monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
+        monkeypatch.setattr(gateway_cli, "_get_restart_drain_timeout", lambda: 10.0)
+        monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
+        monkeypatch.setattr(
+            gateway_cli,
+            "_read_systemd_unit_properties",
+            lambda system=False: {
+                "ActiveState": "active",
+                "SubState": "running",
+                "Result": "success",
+                "ExecMainStatus": "0",
+                "MainPID": "777",
+            },
+        )
+        monkeypatch.setattr(
+            gateway_cli,
+            "_graceful_restart_via_sigusr1",
+            lambda pid, timeout: calls.append(("graceful", pid, timeout)) or True,
+        )
+        monkeypatch.setattr(gateway_cli, "_run_systemctl", lambda args, **kwargs: calls.append(args) or SimpleNamespace(stdout="", returncode=0))
+        monkeypatch.setattr(
+            gateway_cli,
+            "_wait_for_systemd_service_restart",
+            lambda system=False, previous_pid=None: calls.append(("wait", system, previous_pid)) or True,
+        )
+
+        gateway_cli.systemd_restart()
+
+        assert ("graceful", 777, 15.0) in calls
+        assert ("wait", False, 777) in calls
+        assert "restarting gracefully (pid 777)" in capsys.readouterr().out.lower()
+
+    def test_wait_for_systemd_restart_waits_for_runtime_running(self, monkeypatch, capsys):
+        monkeypatch.setattr(
+            gateway_cli,
+            "_read_systemd_unit_properties",
+            lambda system=False: {
+                "ActiveState": "active",
+                "SubState": "running",
+                "Result": "success",
+                "ExecMainStatus": "0",
+                "MainPID": "999",
+            },
+        )
+        monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
+        monkeypatch.setattr(
+            gateway_cli,
+            "_gateway_runtime_status_for_pid",
+            lambda pid: {"pid": pid, "gateway_state": "running"},
+        )
+
+        assert gateway_cli._wait_for_systemd_service_restart(previous_pid=777, timeout=0.1) is True
+        assert "restarted (pid 999)" in capsys.readouterr().out.lower()
+
+    def test_systemd_restart_reports_start_limit_hit(self, monkeypatch, capsys):
+        calls = []
+
+        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
+        monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
+        monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
+        monkeypatch.setattr(gateway_cli, "_recover_pending_systemd_restart", lambda system=False, previous_pid=None: False)
+
+        def fake_run_systemctl(args, **kwargs):
+            calls.append(args)
+            if args[0] == "show":
+                return SimpleNamespace(stdout="ActiveState=inactive\nSubState=dead\nResult=success\nExecMainStatus=0\nMainPID=0\n", stderr="", returncode=0)
+            if args[0] == "reset-failed":
+                return SimpleNamespace(stdout="", stderr="", returncode=0)
+            if args[0] == "restart":
+                raise subprocess.CalledProcessError(
+                    1,
+                    ["systemctl", "--user", *args],
+                    stderr="Job failed. See result 'start-limit-hit'.",
+                )
+            raise AssertionError(f"Unexpected args: {args}")
+
+        monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
+
+        gateway_cli.systemd_restart()
+
+        assert ["restart", gateway_cli.get_service_name()] in calls
+        out = capsys.readouterr().out.lower()
+        assert "rate-limited by systemd" in out
+        assert "reset-failed" in out

    def test_systemd_restart_recovers_failed_planned_restart(self, monkeypatch, capsys):
        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
        monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
        monkeypatch.setattr(
            "gateway.status.read_runtime_status",
@ -581,6 +916,11 @@ class TestGatewaySystemServiceRouting:
            "gateway.status.get_running_pid",
            lambda: 999 if started["value"] else None,
        )
+        monkeypatch.setattr(
+            gateway_cli,
+            "_gateway_runtime_status_for_pid",
+            lambda pid: {"pid": pid, "gateway_state": "running"},
+        )

        gateway_cli.systemd_restart()

@ -999,20 +1339,17 @@ class TestSystemServiceIdentityRootHandling:

    def test_auto_detected_root_is_rejected(self, monkeypatch):
        """When root is auto-detected (not explicitly requested), raise."""
-        import pwd
        import grp

        monkeypatch.delenv("SUDO_USER", raising=False)
        monkeypatch.setenv("USER", "root")
        monkeypatch.setenv("LOGNAME", "root")

-        import pytest
        with pytest.raises(ValueError, match="pass --run-as-user root to override"):
            gateway_cli._system_service_identity(run_as_user=None)

    def test_explicit_root_is_allowed(self, monkeypatch):
        """When root is explicitly passed via --run-as-user root, allow it."""
-        import pwd
        import grp

        root_info = pwd.getpwnam("root")
@ -1024,7 +1361,6 @@ class TestSystemServiceIdentityRootHandling:

    def test_non_root_user_passes_through(self, monkeypatch):
        """Normal non-root user works as before."""
-        import pwd
        import grp

        monkeypatch.delenv("SUDO_USER", raising=False)
@ -2047,3 +2383,171 @@ class TestSystemdInstallOffersLegacyRemoval:

        assert prompt_called["count"] == 0
        assert remove_called["invoked"] is False
+
+
+class TestSystemScopeRequiresRootError:
+    """Tests for the SystemScopeRequiresRootError replacement of sys.exit(1).
+
+    Before this change, ``_require_root_for_system_service`` called
+    ``sys.exit(1)`` when non-root code tried a system-scope systemd
+    operation. The wizard's ``except Exception`` guards don't catch
+    ``SystemExit`` (it's a ``BaseException`` subclass), so the user was
+    dumped at a bare shell prompt mid-setup. The fix raises a typed
+    exception instead, which the wizard intercepts and handles with
+    actionable remediation.
+    """
+
+    def test_require_root_raises_when_non_root(self, monkeypatch):
+        monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
+
+        with pytest.raises(gateway_cli.SystemScopeRequiresRootError) as excinfo:
+            gateway_cli._require_root_for_system_service("start")
+
+        assert excinfo.value.args[0] == "System gateway start requires root. Re-run with sudo."
+        assert excinfo.value.args[1] == "start"
+        # str(e) renders only the message, not the tuple repr, so that
+        # wizard format strings like f"Failed: {e}" print cleanly.
+        assert str(excinfo.value) == "System gateway start requires root. Re-run with sudo."
+        assert f"Failed: {excinfo.value}" == "Failed: System gateway start requires root. Re-run with sudo."
+
+    def test_require_root_noop_when_root(self, monkeypatch):
+        monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 0)
+
+        # Should not raise, should not exit
+        gateway_cli._require_root_for_system_service("start")
+
+    def test_error_is_runtime_error_subclass(self):
+        """Wizards use ``except Exception`` guards — the error must be a
+        ``RuntimeError`` (catchable by ``Exception``), NOT a ``SystemExit``
+        (``BaseException``), so the wizard can recover from it.
+        """
+        err = gateway_cli.SystemScopeRequiresRootError("msg", "start")
+        assert isinstance(err, RuntimeError)
+        assert isinstance(err, Exception)
+        assert not isinstance(err, SystemExit)
+
+
+class TestSystemScopeWizardPreCheck:
+    """Tests for _system_scope_wizard_would_need_root — the guard the
+    wizard uses to detect the dead-end BEFORE prompting the user to start
+    a service that will fail without sudo.
+    """
+
+    @staticmethod
+    def _setup_units(tmp_path, monkeypatch, system_present: bool, user_present: bool):
+        sys_dir = tmp_path / "sys"
+        usr_dir = tmp_path / "usr"
+        sys_dir.mkdir()
+        usr_dir.mkdir()
+        if system_present:
+            (sys_dir / "hermes-gateway.service").write_text("[Unit]\n")
+        if user_present:
+            (usr_dir / "hermes-gateway.service").write_text("[Unit]\n")
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_systemd_unit_path",
+            lambda system=False: (sys_dir if system else usr_dir) / "hermes-gateway.service",
+        )
+
+    def test_non_root_with_only_system_unit_returns_true(self, tmp_path, monkeypatch):
+        self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=False)
+        monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
+
+        assert gateway_cli._system_scope_wizard_would_need_root() is True
+
+    def test_root_never_needs_root(self, tmp_path, monkeypatch):
+        self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=False)
+        monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 0)
+
+        assert gateway_cli._system_scope_wizard_would_need_root() is False
+
+    def test_non_root_with_user_unit_present_returns_false(self, tmp_path, monkeypatch):
+        # User-scope unit present — user can start it themselves, no sudo needed.
+        self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=True)
+        monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
+
+        assert gateway_cli._system_scope_wizard_would_need_root() is False
+
+    def test_non_root_with_no_units_returns_false(self, tmp_path, monkeypatch):
+        self._setup_units(tmp_path, monkeypatch, system_present=False, user_present=False)
+        monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
+
+        assert gateway_cli._system_scope_wizard_would_need_root() is False
+
+    def test_non_root_with_explicit_system_arg_returns_true(self, tmp_path, monkeypatch):
+        # Caller passed system=True explicitly (e.g. ``hermes gateway start --system``).
+        self._setup_units(tmp_path, monkeypatch, system_present=False, user_present=False)
+        monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
+
+        assert gateway_cli._system_scope_wizard_would_need_root(system=True) is True
+
+
+class TestSystemScopeRemediationOutput:
+    """Tests for _print_system_scope_remediation — the actionable guidance
+    shown when the wizard detects a system-scope-only setup as non-root.
+    """
+
+    def test_start_remediation_mentions_sudo_systemctl_and_uninstall(self, capsys, monkeypatch):
+        monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
+
+        gateway_cli._print_system_scope_remediation("start")
+        out = capsys.readouterr().out
+
+        assert "system-wide service" in out
+        assert "start requires root" in out
+        assert "sudo systemctl start hermes-gateway" in out
+        assert "sudo hermes gateway uninstall --system" in out
+        assert "hermes gateway install" in out
+
+    def test_restart_remediation_uses_systemctl_restart(self, capsys, monkeypatch):
+        monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
+
+        gateway_cli._print_system_scope_remediation("restart")
+        out = capsys.readouterr().out
+
+        assert "restart requires root" in out
+        assert "sudo systemctl restart hermes-gateway" in out
+
+    def test_stop_remediation_uses_systemctl_stop(self, capsys, monkeypatch):
+        monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
+
+        gateway_cli._print_system_scope_remediation("stop")
+        out = capsys.readouterr().out
+
+        assert "stop requires root" in out
+        assert "sudo systemctl stop hermes-gateway" in out
+
+
+class TestGatewayCommandCatchesSystemScopeError:
+    """The direct CLI path (``hermes gateway start --system`` etc.) must
+    still exit 1 with a clean message when non-root. The top-level
+    ``gateway_command`` catches ``SystemScopeRequiresRootError`` and
+    converts it back to ``sys.exit(1)``, preserving existing CLI behavior.
+    """
+
+    def test_non_root_system_start_exits_one_with_clean_message(self, tmp_path, monkeypatch, capsys):
+        sys_dir = tmp_path / "sys"
+        usr_dir = tmp_path / "usr"
+        sys_dir.mkdir()
+        usr_dir.mkdir()
+        (sys_dir / "hermes-gateway.service").write_text("[Unit]\n")
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_systemd_unit_path",
+            lambda system=False: (sys_dir if system else usr_dir) / "hermes-gateway.service",
+        )
+        monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+        monkeypatch.setattr(gateway_cli, "kill_gateway_processes", lambda **kw: 0)
+
+        args = SimpleNamespace(gateway_command="start", system=True, all=False)
+
+        with pytest.raises(SystemExit) as excinfo:
+            gateway_cli.gateway_command(args)
+
+        assert excinfo.value.code == 1
+        out = capsys.readouterr().out
+        # Renders the message, NOT the ``('msg', 'action')`` tuple repr
+        assert "System gateway start requires root. Re-run with sudo." in out
+        assert "('" not in out  # no tuple repr leaking through
--- a/tests/hermes_cli/test_gmi_provider.py
+++ b/tests/hermes_cli/test_gmi_provider.py
@ -269,9 +269,9 @@ class TestGmiModelMetadata:

 class TestGmiAuxiliary:
    def test_aux_default_model(self):
-        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+        from agent.auxiliary_client import _get_aux_model_for_provider

-        assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "google/gemini-3.1-flash-lite-preview"
+        assert _get_aux_model_for_provider("gmi") == "google/gemini-3.1-flash-lite-preview"

    def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
        monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
@ -284,6 +284,22 @@ class TestGmiAuxiliary:
        assert model == "google/gemini-3.1-flash-lite-preview"
        assert mock_openai.call_args.kwargs["api_key"] == "gmi-test-key"
        assert mock_openai.call_args.kwargs["base_url"] == "https://api.gmi-serving.com/v1"
+        # GMI profile declares default_headers with a HermesAgent User-Agent
+        # for traffic attribution. The generic profile-fallback branch in
+        # resolve_provider_client should carry it through to the OpenAI client.
+        headers = mock_openai.call_args.kwargs.get("default_headers", {})
+        assert headers.get("User-Agent", "").startswith("HermesAgent/")
+
+    def test_gmi_profile_declares_hermes_user_agent(self):
+        """The GMI plugin sets a HermesAgent/<ver> User-Agent on its profile."""
+        from providers import get_provider_profile
+
+        profile = get_provider_profile("gmi")
+        assert profile is not None
+        ua = profile.default_headers.get("User-Agent", "")
+        assert ua.startswith("HermesAgent/"), (
+            f"expected GMI profile User-Agent to start with 'HermesAgent/', got {ua!r}"
+        )

    def test_resolve_provider_client_accepts_gmi_alias(self, monkeypatch):
        monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
--- a/tests/hermes_cli/test_goals.py
+++ b/tests/hermes_cli/test_goals.py
@ -0,0 +1,516 @@
+"""Tests for hermes_cli/goals.py — persistent cross-turn goals."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Fixtures
+# ──────────────────────────────────────────────────────────────────────
+
+
+@pytest.fixture
+def hermes_home(tmp_path, monkeypatch):
+    """Isolated HERMES_HOME so SessionDB.state_meta writes don't clobber the real one."""
+    from pathlib import Path
+
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_HOME", str(home))
+
+    # Bust the goal-module's DB cache for each test so it re-resolves HERMES_HOME.
+    from hermes_cli import goals
+
+    goals._DB_CACHE.clear()
+    yield home
+    goals._DB_CACHE.clear()
+
+
+# ──────────────────────────────────────────────────────────────────────
+# _parse_judge_response
+# ──────────────────────────────────────────────────────────────────────
+
+
+class TestParseJudgeResponse:
+    def test_clean_json_done(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        done, reason, _ = _parse_judge_response('{"done": true, "reason": "all good"}')
+        assert done is True
+        assert reason == "all good"
+
+    def test_clean_json_continue(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        done, reason, _ = _parse_judge_response('{"done": false, "reason": "more work needed"}')
+        assert done is False
+        assert reason == "more work needed"
+
+    def test_json_in_markdown_fence(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        raw = '```json\n{"done": true, "reason": "done"}\n```'
+        done, reason, _ = _parse_judge_response(raw)
+        assert done is True
+        assert "done" in reason
+
+    def test_json_embedded_in_prose(self):
+        """Some models prefix reasoning before emitting JSON — we extract it."""
+        from hermes_cli.goals import _parse_judge_response
+
+        raw = 'Looking at this... the agent says X. Verdict: {"done": false, "reason": "partial"}'
+        done, reason, _ = _parse_judge_response(raw)
+        assert done is False
+        assert reason == "partial"
+
+    def test_string_done_values(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        for s in ("true", "yes", "done", "1"):
+            done, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
+            assert done is True
+        for s in ("false", "no", "not yet"):
+            done, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
+            assert done is False
+
+    def test_malformed_json_fails_open(self):
+        """Non-JSON → not done, with error-ish reason (so judge_goal can map to continue)."""
+        from hermes_cli.goals import _parse_judge_response
+
+        done, reason, _ = _parse_judge_response("this is not json at all")
+        assert done is False
+        assert reason  # non-empty
+
+    def test_empty_response(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        done, reason, _ = _parse_judge_response("")
+        assert done is False
+        assert reason
+
+
+# ──────────────────────────────────────────────────────────────────────
+# judge_goal — fail-open semantics
+# ──────────────────────────────────────────────────────────────────────
+
+
+class TestJudgeGoal:
+    def test_empty_goal_skipped(self):
+        from hermes_cli.goals import judge_goal
+
+        verdict, _, _ = judge_goal("", "some response")
+        assert verdict == "skipped"
+
+    def test_empty_response_continues(self):
+        from hermes_cli.goals import judge_goal
+
+        verdict, _, _ = judge_goal("ship the thing", "")
+        assert verdict == "continue"
+
+    def test_no_aux_client_continues(self):
+        """Fail-open: if no aux client, we must return continue, not skipped/done."""
+        from hermes_cli import goals
+
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(None, None),
+        ):
+            verdict, _, _ = goals.judge_goal("my goal", "my response")
+        assert verdict == "continue"
+
+    def test_api_error_continues(self):
+        """Judge exception → fail-open continue (don't wedge progress on judge bugs)."""
+        from hermes_cli import goals
+
+        fake_client = MagicMock()
+        fake_client.chat.completions.create.side_effect = RuntimeError("boom")
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(fake_client, "judge-model"),
+        ):
+            verdict, reason, _ = goals.judge_goal("goal", "response")
+        assert verdict == "continue"
+        assert "judge error" in reason.lower()
+
+    def test_judge_says_done(self):
+        from hermes_cli import goals
+
+        fake_client = MagicMock()
+        fake_client.chat.completions.create.return_value = MagicMock(
+            choices=[
+                MagicMock(
+                    message=MagicMock(content='{"done": true, "reason": "achieved"}')
+                )
+            ]
+        )
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(fake_client, "judge-model"),
+        ):
+            verdict, reason, _ = goals.judge_goal("goal", "agent response")
+        assert verdict == "done"
+        assert reason == "achieved"
+
+    def test_judge_says_continue(self):
+        from hermes_cli import goals
+
+        fake_client = MagicMock()
+        fake_client.chat.completions.create.return_value = MagicMock(
+            choices=[
+                MagicMock(
+                    message=MagicMock(content='{"done": false, "reason": "not yet"}')
+                )
+            ]
+        )
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(fake_client, "judge-model"),
+        ):
+            verdict, reason, _ = goals.judge_goal("goal", "agent response")
+        assert verdict == "continue"
+        assert reason == "not yet"
+
+
+# ──────────────────────────────────────────────────────────────────────
+# GoalManager lifecycle + persistence
+# ──────────────────────────────────────────────────────────────────────
+
+
+class TestGoalManager:
+    def test_no_goal_initial(self, hermes_home):
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="test-sid-1")
+        assert mgr.state is None
+        assert not mgr.is_active()
+        assert not mgr.has_goal()
+        assert "No active goal" in mgr.status_line()
+
+    def test_set_then_status(self, hermes_home):
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="test-sid-2", default_max_turns=5)
+        state = mgr.set("port the thing")
+        assert state.goal == "port the thing"
+        assert state.status == "active"
+        assert state.max_turns == 5
+        assert state.turns_used == 0
+        assert mgr.is_active()
+        assert "active" in mgr.status_line().lower()
+        assert "port the thing" in mgr.status_line()
+
+    def test_set_rejects_empty(self, hermes_home):
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="test-sid-3")
+        with pytest.raises(ValueError):
+            mgr.set("")
+        with pytest.raises(ValueError):
+            mgr.set("   ")
+
+    def test_pause_and_resume(self, hermes_home):
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="test-sid-4")
+        mgr.set("goal text")
+        mgr.pause(reason="user-paused")
+        assert mgr.state.status == "paused"
+        assert not mgr.is_active()
+        assert mgr.has_goal()
+
+        mgr.resume()
+        assert mgr.state.status == "active"
+        assert mgr.is_active()
+
+    def test_clear(self, hermes_home):
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="test-sid-5")
+        mgr.set("goal")
+        mgr.clear()
+        assert mgr.state is None
+        assert not mgr.is_active()
+
+    def test_persistence_across_managers(self, hermes_home):
+        """Key invariant: a second manager on the same session sees the goal.
+
+        This is what makes /resume work — each session rebinds its
+        GoalManager and picks up the saved state.
+        """
+        from hermes_cli.goals import GoalManager
+
+        mgr1 = GoalManager(session_id="persist-sid")
+        mgr1.set("do the thing")
+
+        mgr2 = GoalManager(session_id="persist-sid")
+        assert mgr2.state is not None
+        assert mgr2.state.goal == "do the thing"
+        assert mgr2.is_active()
+
+    def test_evaluate_after_turn_done(self, hermes_home):
+        """Judge says done → status=done, no continuation."""
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="eval-sid-1")
+        mgr.set("ship it")
+
+        with patch.object(goals, "judge_goal", return_value=("done", "shipped", False)):
+            decision = mgr.evaluate_after_turn("I shipped the feature.")
+
+        assert decision["verdict"] == "done"
+        assert decision["should_continue"] is False
+        assert decision["continuation_prompt"] is None
+        assert mgr.state.status == "done"
+        assert mgr.state.turns_used == 1
+
+    def test_evaluate_after_turn_continue_under_budget(self, hermes_home):
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="eval-sid-2", default_max_turns=5)
+        mgr.set("a long goal")
+
+        with patch.object(goals, "judge_goal", return_value=("continue", "more work", False)):
+            decision = mgr.evaluate_after_turn("made some progress")
+
+        assert decision["verdict"] == "continue"
+        assert decision["should_continue"] is True
+        assert decision["continuation_prompt"] is not None
+        assert "a long goal" in decision["continuation_prompt"]
+        assert mgr.state.status == "active"
+        assert mgr.state.turns_used == 1
+
+    def test_evaluate_after_turn_budget_exhausted(self, hermes_home):
+        """When turn budget hits ceiling, auto-pause instead of continuing."""
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="eval-sid-3", default_max_turns=2)
+        mgr.set("hard goal")
+
+        with patch.object(goals, "judge_goal", return_value=("continue", "not yet", False)):
+            d1 = mgr.evaluate_after_turn("step 1")
+            assert d1["should_continue"] is True
+            assert mgr.state.turns_used == 1
+            assert mgr.state.status == "active"
+
+            d2 = mgr.evaluate_after_turn("step 2")
+            # turns_used is now 2 which equals max_turns → paused
+            assert d2["should_continue"] is False
+            assert mgr.state.status == "paused"
+            assert mgr.state.turns_used == 2
+            assert "budget" in (mgr.state.paused_reason or "").lower()
+
+    def test_evaluate_after_turn_inactive(self, hermes_home):
+        """evaluate_after_turn is a no-op when goal isn't active."""
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="eval-sid-4")
+        d = mgr.evaluate_after_turn("anything")
+        assert d["verdict"] == "inactive"
+        assert d["should_continue"] is False
+
+        mgr.set("a goal")
+        mgr.pause()
+        d2 = mgr.evaluate_after_turn("anything")
+        assert d2["verdict"] == "inactive"
+        assert d2["should_continue"] is False
+
+    def test_continuation_prompt_shape(self, hermes_home):
+        """The continuation prompt must include the goal text verbatim —
+        and must be safe to inject as a user-role message (prompt-cache
+        invariants: no system-prompt mutation)."""
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="cont-sid")
+        mgr.set("port goal command to hermes")
+        prompt = mgr.next_continuation_prompt()
+        assert prompt is not None
+        assert "port goal command to hermes" in prompt
+        assert prompt.strip()  # non-empty
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Smoke: CommandDef is wired
+# ──────────────────────────────────────────────────────────────────────
+
+
+def test_goal_command_in_registry():
+    from hermes_cli.commands import resolve_command
+
+    cmd = resolve_command("goal")
+    assert cmd is not None
+    assert cmd.name == "goal"
+
+
+def test_goal_command_dispatches_in_cli_registry_helpers():
+    """goal shows up in autocomplete / help categories alongside other Session cmds."""
+    from hermes_cli.commands import COMMANDS, COMMANDS_BY_CATEGORY
+
+    assert "/goal" in COMMANDS
+    session_cmds = COMMANDS_BY_CATEGORY.get("Session", {})
+    assert "/goal" in session_cmds
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Auto-pause on consecutive judge parse failures
+# ──────────────────────────────────────────────────────────────────────
+
+
+class TestJudgeParseFailureAutoPause:
+    """Regression: weak judge models (e.g. deepseek-v4-flash) that return
+    empty strings or non-JSON prose must auto-pause the loop after N turns
+    instead of burning the whole turn budget."""
+
+    def test_parse_response_flags_empty_as_parse_failure(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        done, reason, parse_failed = _parse_judge_response("")
+        assert done is False
+        assert parse_failed is True
+        assert "empty" in reason.lower()
+
+    def test_parse_response_flags_non_json_as_parse_failure(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        done, reason, parse_failed = _parse_judge_response(
+            "Let me analyze whether the goal is fully satisfied based on the agent's response..."
+        )
+        assert done is False
+        assert parse_failed is True
+        assert "not json" in reason.lower()
+
+    def test_parse_response_clean_json_is_not_parse_failure(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        done, _, parse_failed = _parse_judge_response(
+            '{"done": false, "reason": "more work"}'
+        )
+        assert done is False
+        assert parse_failed is False
+
+    def test_api_error_does_not_count_as_parse_failure(self):
+        """Transient network/API errors must not trip the auto-pause guard."""
+        from hermes_cli import goals
+
+        fake_client = MagicMock()
+        fake_client.chat.completions.create.side_effect = RuntimeError("connection reset")
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(fake_client, "judge-model"),
+        ):
+            verdict, _, parse_failed = goals.judge_goal("goal", "response")
+        assert verdict == "continue"
+        assert parse_failed is False
+
+    def test_empty_judge_reply_flagged_as_parse_failure(self):
+        """End-to-end: judge returns empty content → parse_failed=True."""
+        from hermes_cli import goals
+
+        fake_client = MagicMock()
+        fake_client.chat.completions.create.return_value = MagicMock(
+            choices=[MagicMock(message=MagicMock(content=""))]
+        )
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(fake_client, "judge-model"),
+        ):
+            verdict, _, parse_failed = goals.judge_goal("goal", "response")
+        assert verdict == "continue"
+        assert parse_failed is True
+
+    def test_auto_pause_after_three_consecutive_parse_failures(self, hermes_home):
+        """N=3 consecutive parse failures → auto-pause with config pointer."""
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager, DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES
+
+        assert DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES == 3
+        mgr = GoalManager(session_id="parse-fail-sid-1", default_max_turns=20)
+        mgr.set("do a thing")
+
+        with patch.object(
+            goals, "judge_goal", return_value=("continue", "judge returned empty response", True)
+        ):
+            d1 = mgr.evaluate_after_turn("step 1")
+            assert d1["should_continue"] is True
+            assert mgr.state.consecutive_parse_failures == 1
+
+            d2 = mgr.evaluate_after_turn("step 2")
+            assert d2["should_continue"] is True
+            assert mgr.state.consecutive_parse_failures == 2
+
+            d3 = mgr.evaluate_after_turn("step 3")
+            assert d3["should_continue"] is False
+            assert d3["status"] == "paused"
+            assert mgr.state.consecutive_parse_failures == 3
+            # Message points at the config surface so the user can fix it.
+            assert "auxiliary" in d3["message"]
+            assert "goal_judge" in d3["message"]
+            assert "config.yaml" in d3["message"]
+
+    def test_parse_failure_counter_resets_on_good_reply(self, hermes_home):
+        """A single good judge reply resets the counter — transient flakes don't pause."""
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="parse-fail-sid-2", default_max_turns=20)
+        mgr.set("another goal")
+
+        # Two parse failures…
+        with patch.object(
+            goals, "judge_goal", return_value=("continue", "not json", True)
+        ):
+            mgr.evaluate_after_turn("step 1")
+            mgr.evaluate_after_turn("step 2")
+            assert mgr.state.consecutive_parse_failures == 2
+
+        # …then one clean reply resets the counter.
+        with patch.object(
+            goals, "judge_goal", return_value=("continue", "making progress", False)
+        ):
+            d = mgr.evaluate_after_turn("step 3")
+            assert d["should_continue"] is True
+            assert mgr.state.consecutive_parse_failures == 0
+
+    def test_parse_failure_counter_not_incremented_by_api_errors(self, hermes_home):
+        """API/transport errors must NOT count toward the auto-pause threshold."""
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="parse-fail-sid-3", default_max_turns=20)
+        mgr.set("goal")
+
+        with patch.object(
+            goals, "judge_goal", return_value=("continue", "judge error: RuntimeError", False)
+        ):
+            for _ in range(5):
+                d = mgr.evaluate_after_turn("still going")
+                assert d["should_continue"] is True
+            assert mgr.state.consecutive_parse_failures == 0
+            assert mgr.state.status == "active"
+
+    def test_consecutive_parse_failures_persists_across_goalmanager_reloads(
+        self, hermes_home
+    ):
+        """The counter must be durable so cross-session resumes see it."""
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager, load_goal
+
+        mgr = GoalManager(session_id="parse-fail-sid-4", default_max_turns=20)
+        mgr.set("persistent goal")
+
+        with patch.object(
+            goals, "judge_goal", return_value=("continue", "empty", True)
+        ):
+            mgr.evaluate_after_turn("r")
+            mgr.evaluate_after_turn("r")
+
+        reloaded = load_goal("parse-fail-sid-4")
+        assert reloaded is not None
+        assert reloaded.consecutive_parse_failures == 2
--- a/tests/hermes_cli/test_kanban_boards.py
+++ b/tests/hermes_cli/test_kanban_boards.py
@ -0,0 +1,492 @@
+"""Tests for the multi-board kanban layer (``hermes kanban boards …``).
+
+Covers the pieces added when boards became a first-class concept:
+
+* Slug validation and normalisation.
+* Path resolution for ``default`` (legacy ``<root>/kanban.db``) vs
+  named boards (``<root>/kanban/boards/<slug>/kanban.db``).
+* Current-board persistence via ``<root>/kanban/current`` and
+  ``HERMES_KANBAN_BOARD`` env var.
+* ``connect(board=)`` isolation — writes on one board don't leak.
+* ``create_board`` / ``list_boards`` / ``remove_board`` round trip.
+* CLI surface: ``hermes kanban boards list/create/switch/rm``.
+* ``_default_spawn`` injects ``HERMES_KANBAN_BOARD`` into worker env.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+# Ensure the worktree (not the stale global clone) is first on sys.path.
+_WORKTREE = Path(__file__).resolve().parents[2]
+if str(_WORKTREE) not in sys.path:
+    sys.path.insert(0, str(_WORKTREE))
+
+from hermes_cli import kanban_db as kb
+
+
+# ---------------------------------------------------------------------------
+# Fixture
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def fresh_home(tmp_path, monkeypatch):
+    """Isolated HERMES_HOME with no prior kanban state.
+
+    The autouse hermetic conftest already nukes credentials + TZ; this
+    fixture layers a per-test HERMES_HOME plus a path-init cache reset
+    so each test sees a truly empty board set.
+    """
+    home = tmp_path / "hermes_home"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    for var in (
+        "HERMES_KANBAN_DB",
+        "HERMES_KANBAN_WORKSPACES_ROOT",
+        "HERMES_KANBAN_HOME",
+        "HERMES_KANBAN_BOARD",
+    ):
+        monkeypatch.delenv(var, raising=False)
+    # Also reset hermes_constants cache so get_default_hermes_root() re-reads.
+    try:
+        import hermes_constants
+        hermes_constants._cached_default_hermes_root = None  # type: ignore[attr-defined]
+    except Exception:
+        pass
+    # Kanban module-level init cache must not leak between tests.
+    kb._INITIALIZED_PATHS.clear()
+    return home
+
+
+# ---------------------------------------------------------------------------
+# Slug validation
+# ---------------------------------------------------------------------------
+
+class TestSlugValidation:
+    @pytest.mark.parametrize("good", [
+        "default", "atm10-server", "hermes-agent", "proj_1", "a",
+        "very-long-but-still-ok-slug-with-hyphens-and-numbers-1234",
+    ])
+    def test_accepts_valid(self, good):
+        assert kb._normalize_board_slug(good) == good
+
+    @pytest.mark.parametrize("bad", [
+        "-leading-hyphen", "_leading_underscore",
+        "with/slash", "with space",
+        "has.dot", "has?question",
+        "..", "../etc", "foo\x00bar",
+    ])
+    def test_rejects_invalid(self, bad):
+        with pytest.raises(ValueError):
+            kb._normalize_board_slug(bad)
+
+    def test_empty_returns_none(self):
+        assert kb._normalize_board_slug(None) is None
+        assert kb._normalize_board_slug("") is None
+        assert kb._normalize_board_slug("   ") is None
+
+    def test_auto_lowercases(self):
+        # Uppercase is auto-downcased (friendlier than rejecting). ``Default``
+        # → ``default``, ``ATM10`` → ``atm10``. The on-disk slug is always
+        # lowercase regardless of what the user typed.
+        assert kb._normalize_board_slug("Default") == "default"
+        assert kb._normalize_board_slug("ATM10-Server") == "atm10-server"
+
+
+# ---------------------------------------------------------------------------
+# Path resolution
+# ---------------------------------------------------------------------------
+
+class TestPathResolution:
+    def test_default_board_legacy_path(self, fresh_home):
+        """The default board's DB lives at ``<root>/kanban.db`` for back-compat."""
+        assert kb.kanban_db_path() == fresh_home / "kanban.db"
+        assert kb.kanban_db_path(board="default") == fresh_home / "kanban.db"
+
+    def test_named_board_under_boards_dir(self, fresh_home):
+        p = kb.kanban_db_path(board="atm10-server")
+        assert p == fresh_home / "kanban" / "boards" / "atm10-server" / "kanban.db"
+
+    def test_workspaces_per_board(self, fresh_home):
+        assert kb.workspaces_root() == fresh_home / "kanban" / "workspaces"
+        # Uppercase input gets auto-downcased to the on-disk slug.
+        assert kb.workspaces_root(board="projA") == (
+            fresh_home / "kanban" / "boards" / "proja" / "workspaces"
+        )
+
+    def test_logs_per_board(self, fresh_home):
+        assert kb.worker_logs_dir() == fresh_home / "kanban" / "logs"
+        assert kb.worker_logs_dir(board="other") == (
+            fresh_home / "kanban" / "boards" / "other" / "logs"
+        )
+
+    def test_env_var_db_override_still_wins(self, fresh_home, tmp_path, monkeypatch):
+        """``HERMES_KANBAN_DB`` pins the file regardless of board= arg."""
+        forced = tmp_path / "custom.db"
+        monkeypatch.setenv("HERMES_KANBAN_DB", str(forced))
+        assert kb.kanban_db_path() == forced
+        assert kb.kanban_db_path(board="ignored") == forced
+
+    def test_env_var_workspaces_override(self, fresh_home, tmp_path, monkeypatch):
+        forced = tmp_path / "ws"
+        monkeypatch.setenv("HERMES_KANBAN_WORKSPACES_ROOT", str(forced))
+        assert kb.workspaces_root(board="any") == forced
+
+
+# ---------------------------------------------------------------------------
+# Current-board resolution
+# ---------------------------------------------------------------------------
+
+class TestCurrentBoard:
+    def test_default_when_unset(self, fresh_home):
+        assert kb.get_current_board() == "default"
+
+    def test_env_var_takes_precedence(self, fresh_home, monkeypatch):
+        # Create the board so the env-var value is honoured (get_current_board
+        # trusts env-var validity, but the resolution chain doesn't require
+        # the board to exist; we just test that env trumps).
+        kb.create_board("envboard")
+        monkeypatch.setenv("HERMES_KANBAN_BOARD", "envboard")
+        assert kb.get_current_board() == "envboard"
+
+    def test_file_pointer_honoured(self, fresh_home):
+        kb.create_board("filepick")
+        kb.set_current_board("filepick")
+        assert kb.get_current_board() == "filepick"
+
+    def test_stale_file_pointer_falls_back_to_default(self, fresh_home):
+        current = fresh_home / "kanban" / "current"
+        current.parent.mkdir(parents=True, exist_ok=True)
+        current.write_text("missing-board\n", encoding="utf-8")
+
+        assert kb.get_current_board() == "default"
+        assert not kb.board_exists("missing-board")
+        assert [b["slug"] for b in kb.list_boards()] == ["default"]
+
+    def test_env_beats_file(self, fresh_home, monkeypatch):
+        kb.create_board("a")
+        kb.create_board("b")
+        kb.set_current_board("a")
+        monkeypatch.setenv("HERMES_KANBAN_BOARD", "b")
+        assert kb.get_current_board() == "b"
+
+    def test_invalid_env_falls_through(self, fresh_home, monkeypatch):
+        monkeypatch.setenv("HERMES_KANBAN_BOARD", "!!bad!!")
+        # Should not crash — falls through to default.
+        assert kb.get_current_board() == "default"
+
+    def test_clear_current_board(self, fresh_home):
+        kb.create_board("x")
+        kb.set_current_board("x")
+        kb.clear_current_board()
+        assert kb.get_current_board() == "default"
+
+    def test_kanban_db_path_reads_current(self, fresh_home):
+        """kanban_db_path() with no args respects the on-disk pointer."""
+        kb.create_board("my-proj")
+        kb.set_current_board("my-proj")
+        expected = fresh_home / "kanban" / "boards" / "my-proj" / "kanban.db"
+        assert kb.kanban_db_path() == expected
+
+
+# ---------------------------------------------------------------------------
+# Board CRUD
+# ---------------------------------------------------------------------------
+
+class TestBoardCRUD:
+    def test_create_and_list(self, fresh_home):
+        assert [b["slug"] for b in kb.list_boards()] == ["default"]
+        kb.create_board("foo", name="Foo Board", description="test")
+        slugs = [b["slug"] for b in kb.list_boards()]
+        assert slugs == ["default", "foo"]
+
+    def test_create_is_idempotent(self, fresh_home):
+        kb.create_board("bar")
+        kb.create_board("bar")  # no error
+        slugs = [b["slug"] for b in kb.list_boards()]
+        assert slugs == ["default", "bar"]
+
+    def test_create_writes_metadata(self, fresh_home):
+        meta = kb.create_board(
+            "baz",
+            name="Baz",
+            description="desc",
+            icon="📦",
+            color="#abcdef",
+        )
+        assert meta["slug"] == "baz"
+        assert meta["name"] == "Baz"
+        assert meta["icon"] == "📦"
+        # Round-trip via read_board_metadata.
+        again = kb.read_board_metadata("baz")
+        assert again["name"] == "Baz"
+        assert again["description"] == "desc"
+        assert again["icon"] == "📦"
+
+    def test_remove_archive(self, fresh_home):
+        kb.create_board("toremove")
+        res = kb.remove_board("toremove")
+        assert res["action"] == "archived"
+        assert Path(res["new_path"]).exists()
+        assert "toremove" not in [b["slug"] for b in kb.list_boards()]
+
+    def test_remove_hard_delete(self, fresh_home):
+        kb.create_board("nuke")
+        d = kb.board_dir("nuke")
+        assert d.exists()
+        res = kb.remove_board("nuke", archive=False)
+        assert res["action"] == "deleted"
+        assert not d.exists()
+
+    def test_remove_default_forbidden(self, fresh_home):
+        with pytest.raises(ValueError, match="default"):
+            kb.remove_board("default")
+
+    def test_remove_nonexistent_raises(self, fresh_home):
+        with pytest.raises(ValueError, match="does not exist"):
+            kb.remove_board("nosuch")
+
+    def test_remove_clears_current_pointer(self, fresh_home):
+        kb.create_board("pinned")
+        kb.set_current_board("pinned")
+        kb.remove_board("pinned")
+        assert kb.get_current_board() == "default"
+
+    def test_rename_updates_metadata(self, fresh_home):
+        kb.create_board("slug-immutable")
+        kb.write_board_metadata("slug-immutable", name="New Display Name")
+        assert kb.read_board_metadata("slug-immutable")["name"] == "New Display Name"
+        # Slug must not change.
+        assert kb.board_exists("slug-immutable")
+
+
+# ---------------------------------------------------------------------------
+# Connection isolation
+# ---------------------------------------------------------------------------
+
+class TestConnectionIsolation:
+    def test_tasks_do_not_leak_across_boards(self, fresh_home):
+        kb.create_board("alpha")
+        kb.create_board("beta")
+
+        with kb.connect(board="alpha") as conn:
+            kb.create_task(conn, title="alpha-task-1", assignee="dev")
+            kb.create_task(conn, title="alpha-task-2", assignee="dev")
+
+        with kb.connect(board="beta") as conn:
+            kb.create_task(conn, title="beta-only", assignee="dev")
+
+        with kb.connect(board="alpha") as conn:
+            a = kb.list_tasks(conn)
+        with kb.connect(board="beta") as conn:
+            b = kb.list_tasks(conn)
+        with kb.connect(board="default") as conn:
+            d = kb.list_tasks(conn)
+
+        assert {t.title for t in a} == {"alpha-task-1", "alpha-task-2"}
+        assert {t.title for t in b} == {"beta-only"}
+        assert d == []
+
+    def test_connect_without_args_uses_current(self, fresh_home):
+        kb.create_board("curr")
+        kb.set_current_board("curr")
+        with kb.connect() as conn:
+            kb.create_task(conn, title="implicit", assignee="x")
+        with kb.connect(board="curr") as conn:
+            tasks = kb.list_tasks(conn)
+        assert [t.title for t in tasks] == ["implicit"]
+
+    def test_connect_env_var_overrides_current(self, fresh_home, monkeypatch):
+        kb.create_board("persist")
+        kb.create_board("envwin")
+        kb.set_current_board("persist")
+        monkeypatch.setenv("HERMES_KANBAN_BOARD", "envwin")
+        with kb.connect() as conn:
+            kb.create_task(conn, title="via-env", assignee="x")
+        with kb.connect(board="envwin") as conn:
+            assert [t.title for t in kb.list_tasks(conn)] == ["via-env"]
+        with kb.connect(board="persist") as conn:
+            assert kb.list_tasks(conn) == []
+
+
+# ---------------------------------------------------------------------------
+# Worker spawn env injection
+# ---------------------------------------------------------------------------
+
+class TestWorkerSpawnEnv:
+    """Ensure the dispatcher pins ``HERMES_KANBAN_BOARD`` / DB / workspaces on spawn.
+
+    We monkey-patch ``subprocess.Popen`` to capture the child env without
+    actually spawning anything.
+    """
+
+    def test_default_spawn_sets_env_vars(self, fresh_home, monkeypatch):
+        captured = {}
+
+        class FakeProc:
+            pid = 12345
+
+        def fake_popen(cmd, *args, **kwargs):
+            captured["cmd"] = cmd
+            captured["env"] = kwargs.get("env", {})
+            return FakeProc()
+
+        monkeypatch.setattr(subprocess, "Popen", fake_popen)
+        kb.create_board("spawntest")
+
+        task = kb.Task(
+            id="t_abc",
+            title="worker test",
+            body=None,
+            assignee="teknium",
+            status="ready",
+            priority=0,
+            created_by="user",
+            created_at=0,
+            started_at=None,
+            completed_at=None,
+            workspace_kind="scratch",
+            workspace_path=None,
+            claim_lock=None,
+            claim_expires=None,
+            tenant=None,
+        )
+
+        kb._default_spawn(task, str(fresh_home / "ws"), board="spawntest")
+
+        env = captured["env"]
+        assert env["HERMES_KANBAN_BOARD"] == "spawntest"
+        assert env["HERMES_KANBAN_TASK"] == "t_abc"
+        # DB path should match the per-board DB, not the legacy default.
+        expected_db = fresh_home / "kanban" / "boards" / "spawntest" / "kanban.db"
+        assert env["HERMES_KANBAN_DB"] == str(expected_db)
+        expected_ws = fresh_home / "kanban" / "boards" / "spawntest" / "workspaces"
+        assert env["HERMES_KANBAN_WORKSPACES_ROOT"] == str(expected_ws)
+
+    def test_default_board_spawn_keeps_legacy_paths(self, fresh_home, monkeypatch):
+        captured = {}
+
+        class FakeProc:
+            pid = 1
+
+        def fake_popen(cmd, *args, **kwargs):
+            captured["env"] = kwargs.get("env", {})
+            return FakeProc()
+
+        monkeypatch.setattr(subprocess, "Popen", fake_popen)
+        task = kb.Task(
+            id="t_def",
+            title="",
+            body=None,
+            assignee="teknium",
+            status="ready",
+            priority=0,
+            created_by=None,
+            created_at=0,
+            started_at=None,
+            completed_at=None,
+            workspace_kind="scratch",
+            workspace_path=None,
+            claim_lock=None,
+            claim_expires=None,
+            tenant=None,
+        )
+        kb._default_spawn(task, str(fresh_home / "ws"), board=None)
+        env = captured["env"]
+        assert env["HERMES_KANBAN_BOARD"] == "default"
+        assert env["HERMES_KANBAN_DB"] == str(fresh_home / "kanban.db")
+
+
+# ---------------------------------------------------------------------------
+# CLI surface
+# ---------------------------------------------------------------------------
+
+def _cli(args: list[str], env_extra: dict | None = None) -> subprocess.CompletedProcess:
+    """Run ``hermes kanban …`` with PYTHONPATH pinned to the worktree."""
+    env = dict(os.environ)
+    env["PYTHONPATH"] = str(_WORKTREE)
+    if env_extra:
+        env.update(env_extra)
+    return subprocess.run(
+        [sys.executable, "-m", "hermes_cli.main", "kanban"] + args,
+        env=env,
+        capture_output=True,
+        text=True,
+        cwd=str(_WORKTREE),
+        timeout=30,
+    )
+
+
+class TestCLI:
+    def test_boards_list_default_only(self, tmp_path):
+        env = {"HERMES_HOME": str(tmp_path)}
+        res = _cli(["boards", "list", "--json"], env_extra=env)
+        assert res.returncode == 0, res.stderr
+        data = json.loads(res.stdout)
+        slugs = [b["slug"] for b in data]
+        assert slugs == ["default"]
+        assert data[0]["is_current"] is True
+
+    def test_boards_create_and_switch(self, tmp_path):
+        env = {"HERMES_HOME": str(tmp_path)}
+        r1 = _cli(
+            ["boards", "create", "myproj", "--name", "My Project", "--switch"],
+            env_extra=env,
+        )
+        assert r1.returncode == 0, r1.stderr
+        assert "created" in r1.stdout
+        assert "Switched" in r1.stdout
+
+        r2 = _cli(["boards", "list", "--json"], env_extra=env)
+        data = json.loads(r2.stdout)
+        cur = [b for b in data if b["is_current"]][0]
+        assert cur["slug"] == "myproj"
+
+    def test_per_board_task_isolation_via_cli(self, tmp_path):
+        env = {"HERMES_HOME": str(tmp_path)}
+        assert _cli(["boards", "create", "projA"], env_extra=env).returncode == 0
+        assert _cli(["boards", "create", "projB"], env_extra=env).returncode == 0
+
+        # Create one task on each via --board.
+        r = _cli(["--board", "projA", "create", "Task A", "--assignee", "dev"], env_extra=env)
+        assert r.returncode == 0, r.stderr
+        r = _cli(["--board", "projB", "create", "Task B", "--assignee", "dev"], env_extra=env)
+        assert r.returncode == 0, r.stderr
+
+        # list on each board only shows its own.
+        listA = _cli(["--board", "projA", "list", "--json"], env_extra=env)
+        listB = _cli(["--board", "projB", "list", "--json"], env_extra=env)
+        listD = _cli(["list", "--json"], env_extra=env)
+
+        titlesA = [t["title"] for t in json.loads(listA.stdout)]
+        titlesB = [t["title"] for t in json.loads(listB.stdout)]
+        titlesD = [t["title"] for t in json.loads(listD.stdout)]
+
+        assert titlesA == ["Task A"]
+        assert titlesB == ["Task B"]
+        assert titlesD == []
+
+    def test_board_flag_rejects_unknown(self, tmp_path):
+        env = {"HERMES_HOME": str(tmp_path)}
+        r = _cli(["--board", "ghost", "list"], env_extra=env)
+        # main.py's dispatcher doesn't propagate return codes today, so we
+        # assert the user-visible signal: a stderr error message. Whether
+        # the exit code stays 0 is a separate (pre-existing) issue.
+        assert "does not exist" in r.stderr
+
+    def test_boards_rm_archives(self, tmp_path):
+        env = {"HERMES_HOME": str(tmp_path)}
+        _cli(["boards", "create", "rmme"], env_extra=env)
+        r = _cli(["boards", "rm", "rmme"], env_extra=env)
+        assert r.returncode == 0, r.stderr
+        assert "archived" in r.stdout
+        # Default board list no longer shows it.
+        res = _cli(["boards", "list", "--json"], env_extra=env)
+        slugs = [b["slug"] for b in json.loads(res.stdout)]
+        assert "rmme" not in slugs
--- a/tests/hermes_cli/test_kanban_cli.py
+++ b/tests/hermes_cli/test_kanban_cli.py
@ -0,0 +1,404 @@
+"""Tests for the kanban CLI surface (hermes_cli.kanban)."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from pathlib import Path
+
+import pytest
+
+from hermes_cli import kanban as kc
+from hermes_cli import kanban_db as kb
+
+
+@pytest.fixture
+def kanban_home(tmp_path, monkeypatch):
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    kb.init_db()
+    return home
+
+
+# ---------------------------------------------------------------------------
+# Workspace flag parsing
+# ---------------------------------------------------------------------------
+
+@pytest.mark.parametrize(
+    "value,expected",
+    [
+        ("scratch",              ("scratch", None)),
+        ("worktree",              ("worktree", None)),
+        ("dir:/tmp/work",         ("dir", "/tmp/work")),
+    ],
+)
+def test_parse_workspace_flag_valid(value, expected):
+    assert kc._parse_workspace_flag(value) == expected
+
+
+def test_parse_workspace_flag_expands_user():
+    kind, path = kc._parse_workspace_flag("dir:~/vault")
+    assert kind == "dir"
+    assert path.endswith("/vault")
+    assert not path.startswith("~")
+
+
+@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"])
+def test_parse_workspace_flag_rejects(bad):
+    if not bad:
+        # Empty -> defaults; not an error.
+        assert kc._parse_workspace_flag(bad) == ("scratch", None)
+        return
+    with pytest.raises(argparse.ArgumentTypeError):
+        kc._parse_workspace_flag(bad)
+
+
+# ---------------------------------------------------------------------------
+# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use)
+# ---------------------------------------------------------------------------
+
+def test_run_slash_no_args_shows_usage(kanban_home):
+    out = kc.run_slash("")
+    assert "kanban" in out.lower()
+    assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower()
+
+
+def test_run_slash_create_and_list(kanban_home):
+    out = kc.run_slash("create 'ship feature' --assignee alice")
+    assert "Created" in out
+    out = kc.run_slash("list")
+    assert "ship feature" in out
+    assert "alice" in out
+
+
+def test_run_slash_create_with_parent_and_cascade(kanban_home):
+    # Parent then child via --parent
+    out1 = kc.run_slash("create 'parent' --assignee alice")
+    # Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)"
+    import re
+    m = re.search(r"(t_[a-f0-9]+)", out1)
+    assert m
+    p = m.group(1)
+    out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}")
+    assert "todo" in out2  # child starts as todo
+
+    # Complete parent; list should promote child to ready
+    kc.run_slash(f"complete {p}")
+    # Explicit filter: child should now be ready (was todo before complete).
+    ready_list = kc.run_slash("list --status ready")
+    assert "child" in ready_list
+
+
+def test_run_slash_show_includes_comments(kanban_home):
+    out = kc.run_slash("create 'x'")
+    import re
+    tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
+    kc.run_slash(f"comment {tid} 'source is paywalled'")
+    show = kc.run_slash(f"show {tid}")
+    assert "source is paywalled" in show
+
+
+def test_run_slash_block_unblock_cycle(kanban_home):
+    out = kc.run_slash("create 'x' --assignee alice")
+    import re
+    tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
+    # Claim first so block() finds it running
+    kc.run_slash(f"claim {tid}")
+    assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'")
+    assert "Unblocked" in kc.run_slash(f"unblock {tid}")
+
+
+def test_run_slash_json_output(kanban_home):
+    out = kc.run_slash("create 'jsontask' --assignee alice --json")
+    payload = json.loads(out)
+    assert payload["title"] == "jsontask"
+    assert payload["assignee"] == "alice"
+    assert payload["status"] == "ready"
+
+
+def test_run_slash_dispatch_dry_run_counts(kanban_home):
+    kc.run_slash("create 'a' --assignee alice")
+    kc.run_slash("create 'b' --assignee bob")
+    out = kc.run_slash("dispatch --dry-run")
+    assert "Spawned:" in out
+
+
+def test_run_slash_context_output_format(kanban_home):
+    out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'")
+    import re
+    tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
+    kc.run_slash(f"comment {tid} 'remember to include performance section'")
+    ctx = kc.run_slash(f"context {tid}")
+    assert "tech spec" in ctx
+    assert "write an RFC" in ctx
+    assert "performance section" in ctx
+
+
+def test_run_slash_tenant_filter(kanban_home):
+    kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice")
+    kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice")
+    a = kc.run_slash("list --tenant biz-a")
+    b = kc.run_slash("list --tenant biz-b")
+    assert "biz-a task" in a and "biz-b task" not in a
+    assert "biz-b task" in b and "biz-a task" not in b
+
+
+def test_run_slash_usage_error_returns_message(kanban_home):
+    # Missing required argument for create
+    out = kc.run_slash("create")
+    assert "usage" in out.lower() or "error" in out.lower()
+
+
+def test_run_slash_assign_reassigns(kanban_home):
+    out = kc.run_slash("create 'x' --assignee alice")
+    import re
+    tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
+    assert "Assigned" in kc.run_slash(f"assign {tid} bob")
+    show = kc.run_slash(f"show {tid}")
+    assert "bob" in show
+
+
+def test_run_slash_link_unlink(kanban_home):
+    a = kc.run_slash("create 'a'")
+    b = kc.run_slash("create 'b'")
+    import re
+    ta = re.search(r"(t_[a-f0-9]+)", a).group(1)
+    tb = re.search(r"(t_[a-f0-9]+)", b).group(1)
+    assert "Linked" in kc.run_slash(f"link {ta} {tb}")
+    # After link, b is todo
+    show = kc.run_slash(f"show {tb}")
+    assert "todo" in show
+    assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}")
+
+
+# ---------------------------------------------------------------------------
+# Integration with the COMMAND_REGISTRY
+# ---------------------------------------------------------------------------
+
+def test_kanban_is_resolvable():
+    from hermes_cli.commands import resolve_command
+
+    cmd = resolve_command("kanban")
+    assert cmd is not None
+    assert cmd.name == "kanban"
+
+
+def test_kanban_bypasses_active_session_guard():
+    from hermes_cli.commands import should_bypass_active_session
+
+    assert should_bypass_active_session("kanban")
+
+
+def test_kanban_in_autocomplete_table():
+    from hermes_cli.commands import COMMANDS, SUBCOMMANDS
+
+    assert "/kanban" in COMMANDS
+    subs = SUBCOMMANDS.get("/kanban") or []
+    assert "create" in subs
+    assert "dispatch" in subs
+
+
+def test_kanban_not_gateway_only():
+    # kanban is available in BOTH CLI and gateway surfaces.
+    from hermes_cli.commands import COMMAND_REGISTRY
+
+    cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
+    assert not cmd.cli_only
+    assert not cmd.gateway_only
+
+
+# ---------------------------------------------------------------------------
+# reclaim + reassign CLI smoke tests
+# ---------------------------------------------------------------------------
+
+def test_run_slash_reclaim_running_task(kanban_home):
+    import re
+    import time
+    import secrets
+    from hermes_cli import kanban_db as kb
+
+    out1 = kc.run_slash("create 'stuck worker task' --assignee broken-model")
+    m = re.search(r"(t_[a-f0-9]+)", out1)
+    assert m
+    tid = m.group(1)
+
+    # Simulate a running claim outside TTL.
+    conn = kb.connect()
+    try:
+        lock = secrets.token_hex(4)
+        conn.execute(
+            "UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
+            "worker_pid=? WHERE id=?",
+            (lock, int(time.time()) + 3600, 4242, tid),
+        )
+        conn.execute(
+            "INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
+            "worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
+            (tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
+        )
+        rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
+        conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
+        conn.commit()
+    finally:
+        conn.close()
+
+    out = kc.run_slash(f"reclaim {tid} --reason 'test'")
+    assert "Reclaimed" in out, out
+    # Status back to ready.
+    out2 = kc.run_slash(f"show {tid}")
+    assert "ready" in out2.lower()
+
+
+def test_run_slash_reassign_with_reclaim_flag(kanban_home):
+    import re
+    import time
+    import secrets
+    from hermes_cli import kanban_db as kb
+
+    out1 = kc.run_slash("create 'switch model' --assignee orig")
+    m = re.search(r"(t_[a-f0-9]+)", out1)
+    tid = m.group(1)
+
+    # Simulate a running claim.
+    conn = kb.connect()
+    try:
+        lock = secrets.token_hex(4)
+        conn.execute(
+            "UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
+            "worker_pid=? WHERE id=?",
+            (lock, int(time.time()) + 3600, 4242, tid),
+        )
+        conn.execute(
+            "INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
+            "worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
+            (tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
+        )
+        rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
+        conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
+        conn.commit()
+    finally:
+        conn.close()
+
+    out = kc.run_slash(f"reassign {tid} newbie --reclaim --reason 'switch'")
+    assert "Reassigned" in out, out
+    out2 = kc.run_slash(f"show {tid}")
+    assert "newbie" in out2
+
+
+# ---------------------------------------------------------------------------
+# /kanban specify — slash surface (same entry point CLI + gateway use)
+# ---------------------------------------------------------------------------
+
+def test_run_slash_specify_end_to_end(kanban_home, monkeypatch):
+    """The /kanban specify slash command routes through run_slash, which
+    both the interactive CLI and every gateway platform use. This test
+    covers both surfaces."""
+    from unittest.mock import MagicMock
+
+    # Create a triage task via the same slash surface.
+    create_out = kc.run_slash("create 'rough idea' --triage")
+    import re
+    m = re.search(r"(t_[a-f0-9]+)", create_out)
+    assert m, f"no task id in: {create_out!r}"
+    tid = m.group(1)
+
+    # Mock the auxiliary client so we don't hit a real provider.
+    resp = MagicMock()
+    resp.choices = [MagicMock()]
+    resp.choices[0].message.content = (
+        '{"title": "Spec: rough idea", "body": "**Goal**\\nShip it."}'
+    )
+    fake_client = MagicMock()
+    fake_client.chat.completions.create = MagicMock(return_value=resp)
+    monkeypatch.setattr(
+        "agent.auxiliary_client.get_text_auxiliary_client",
+        lambda *a, **kw: (fake_client, "test-model"),
+    )
+
+    # Specify via slash.
+    out = kc.run_slash(f"specify {tid}")
+    assert "Specified" in out
+    assert tid in out
+
+    # Task is promoted and retitled.
+    with kb.connect() as conn:
+        task = kb.get_task(conn, tid)
+    assert task.status in {"todo", "ready"}
+    assert task.title == "Spec: rough idea"
+
+
+def test_run_slash_specify_help_is_reachable(kanban_home):
+    """`-h`/`--help` on a subcommand returns the actual help text — see
+    issue #21794. argparse writes help to stdout and exits 0; run_slash
+    must capture both streams and treat exit 0 as success, not error."""
+    out = kc.run_slash("specify --help")
+    assert "specify" in out.lower()
+    # Help dump should NOT come back wrapped as a usage error.
+    assert not out.startswith("⚠")
+
+
+# ---------------------------------------------------------------------------
+# /kanban help / no-args / unknown-action UX (issue #21794)
+# ---------------------------------------------------------------------------
+
+def test_run_slash_bare_returns_curated_help(kanban_home):
+    """Bare `/kanban` returns the curated short-help block — not a 5KB
+    argparse usage dump."""
+    out = kc.run_slash("")
+    assert "/kanban" in out
+    assert "list" in out
+    assert "show" in out
+    # Sanity: should be a chat-friendly size, not the raw usage tree.
+    assert len(out) < 2000
+    # Shouldn't surface argparse's usage-error sentinel.
+    assert "usage error" not in out.lower()
+
+
+@pytest.mark.parametrize("alias", ["help", "--help", "-h", "?"])
+def test_run_slash_help_aliases_match_bare(kanban_home, alias):
+    """Every documented help alias produces the same curated output."""
+    bare = kc.run_slash("")
+    out = kc.run_slash(alias)
+    assert out == bare
+
+
+def test_run_slash_subcommand_help_returns_help_text(kanban_home):
+    """`/kanban show -h` returns the actual subcommand help, not a
+    fake `(usage error: 0)` sentinel."""
+    out = kc.run_slash("show -h")
+    assert "task_id" in out
+    assert "/kanban show" in out
+    assert not out.startswith("⚠")
+
+
+def test_run_slash_unknown_action_friendly_error(kanban_home):
+    """Unknown subcommand surfaces a single-line usage error prefixed
+    with our marker — no `(usage error: 2)` wrapping, no doubled
+    `kanban kanban` prog string."""
+    out = kc.run_slash("frobnicate")
+    assert "/kanban" in out
+    assert "frobnicate" in out
+    assert "/kanban-wrap" not in out
+    assert "/kanban kanban" not in out
+    assert "(usage error: " not in out
+
+
+def test_run_slash_missing_required_arg_friendly_error(kanban_home):
+    """Missing positional argument shows the subcommand-scoped usage
+    line, not the top-level kanban tree."""
+    out = kc.run_slash("show")
+    assert "/kanban show" in out
+    assert "task_id" in out
+
+
+def test_run_slash_board_override_restores_prior_env(kanban_home, monkeypatch):
+    kb.create_board("alpha")
+    kb.create_board("beta")
+    monkeypatch.setenv("HERMES_KANBAN_BOARD", "beta")
+
+    kc.run_slash("--board alpha list")
+
+    assert os.environ.get("HERMES_KANBAN_BOARD") == "beta"
--- a/tests/hermes_cli/test_kanban_core_functionality.py
+++ b/tests/hermes_cli/test_kanban_core_functionality.py
--- a/tests/hermes_cli/test_kanban_db.py
+++ b/tests/hermes_cli/test_kanban_db.py
--- a/tests/hermes_cli/test_kanban_diagnostics.py
+++ b/tests/hermes_cli/test_kanban_diagnostics.py
@ -0,0 +1,557 @@
+"""Tests for hermes_cli.kanban_diagnostics — rule-engine that produces
+structured distress signals (diagnostics) for kanban tasks.
+
+These tests exercise each rule in isolation using minimal in-memory
+task/event/run fixtures (no DB) plus a few integration-style cases
+that round-trip through the real kanban_db to make sure the rule
+engine works on sqlite3.Row objects as well as dataclasses.
+"""
+
+from __future__ import annotations
+
+import time
+from pathlib import Path
+
+import pytest
+
+from hermes_cli import kanban_db as kb
+from hermes_cli import kanban_diagnostics as kd
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def kanban_home(tmp_path, monkeypatch):
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    kb.init_db()
+    return home
+
+
+def _task(**overrides):
+    base = {
+        "id": "t_demo00",
+        "title": "demo task",
+        "assignee": "demo",
+        "status": "ready",
+        "consecutive_failures": 0,
+        "last_failure_error": None,
+    }
+    base.update(overrides)
+    return base
+
+
+def _event(kind, ts=None, **payload):
+    return {
+        "kind": kind,
+        "created_at": int(ts if ts is not None else time.time()),
+        "payload": payload or None,
+    }
+
+
+def _run(outcome="completed", run_id=1, error=None):
+    return {
+        "id": run_id,
+        "outcome": outcome,
+        "error": error,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Each rule — positive + negative + clearing
+# ---------------------------------------------------------------------------
+
+
+def test_hallucinated_cards_fires_on_blocked_event():
+    task = _task(status="ready")
+    events = [
+        _event("created", ts=100),
+        _event("completion_blocked_hallucination", ts=200,
+               phantom_cards=["t_bad1", "t_bad2"],
+               verified_cards=["t_good1"]),
+    ]
+    # ``now=300`` keeps the synthetic event timestamps in scope without
+    # tripping the stranded_in_ready rule (events are 100/200 epoch
+    # which time.time() would treat as ~50yr old).
+    diags = kd.compute_task_diagnostics(task, events, [], now=300)
+    halluc = [d for d in diags if d.kind == "hallucinated_cards"]
+    assert len(halluc) == 1
+    d = halluc[0]
+    assert d.severity == "error"
+    assert d.data["phantom_ids"] == ["t_bad1", "t_bad2"]
+    # Generic recovery actions always available; comment action too.
+    kinds = [a.kind for a in d.actions]
+    assert "comment" in kinds
+    assert "reassign" in kinds
+
+
+def test_hallucinated_cards_clears_on_subsequent_completion():
+    task = _task(status="done")
+    events = [
+        _event("completion_blocked_hallucination", ts=100, phantom_cards=["t_x"]),
+        _event("completed", ts=200, summary="retry worked"),
+    ]
+    diags = kd.compute_task_diagnostics(task, events, [])
+    assert diags == []
+
+
+def test_prose_phantom_refs_fires_after_clean_completion():
+    # Prose scan emits its event AFTER the completed event in the DB
+    # path, but a subsequent clean completion clears it. Phantom id
+    # must be valid hex — the scanner regex is ``t_[a-f0-9]{8,}``.
+    task = _task(status="done")
+    events = [
+        _event("completed", ts=100, summary="referenced t_bad", result_len=0),
+        _event("suspected_hallucinated_references", ts=101,
+               phantom_refs=["t_deadbeef99"], source="completion_summary"),
+    ]
+    diags = kd.compute_task_diagnostics(task, events, [])
+    assert len(diags) == 1
+    assert diags[0].kind == "prose_phantom_refs"
+    assert diags[0].severity == "warning"
+    assert diags[0].data["phantom_refs"] == ["t_deadbeef99"]
+
+
+def test_prose_phantom_refs_clears_on_later_clean_edit():
+    task = _task(status="done")
+    events = [
+        _event("completed", ts=100, summary="bad"),
+        _event("suspected_hallucinated_references", ts=101,
+               phantom_refs=["t_ffff0000cc"]),
+        _event("edited", ts=200, fields=["result", "summary"]),
+    ]
+    diags = kd.compute_task_diagnostics(task, events, [])
+    assert diags == []
+
+
+def test_repeated_failures_fires_at_threshold_on_spawn():
+    """A task with multiple spawn_failed runs gets a spawn-flavoured
+    diagnostic (title mentions 'spawn', suggested action is ``doctor``).
+    """
+    task = _task(status="ready", consecutive_failures=3,
+                 last_failure_error="Profile 'debugger' does not exist")
+    runs = [
+        _run(outcome="spawn_failed", run_id=1),
+        _run(outcome="spawn_failed", run_id=2),
+        _run(outcome="spawn_failed", run_id=3),
+    ]
+    diags = kd.compute_task_diagnostics(task, [], runs)
+    assert len(diags) == 1
+    d = diags[0]
+    assert d.kind == "repeated_failures"
+    assert d.severity == "error"
+    # CLI hints are what operators actually need here.
+    suggested = [a.label for a in d.actions if a.suggested]
+    assert any("doctor" in s for s in suggested)
+
+
+def test_repeated_failures_fires_on_timeout_loop():
+    """The rule surfaces for timeout loops too — that's the point of
+    unifying the counter. Suggested action is 'check logs', not
+    'fix profile'."""
+    task = _task(status="ready", consecutive_failures=3,
+                 last_failure_error="elapsed 600s > limit 300s")
+    runs = [
+        _run(outcome="timed_out", run_id=1),
+        _run(outcome="timed_out", run_id=2),
+        _run(outcome="timed_out", run_id=3),
+    ]
+    diags = kd.compute_task_diagnostics(task, [], runs)
+    assert len(diags) == 1
+    d = diags[0]
+    assert d.kind == "repeated_failures"
+    assert d.data["most_recent_outcome"] == "timed_out"
+    suggested = [a.label for a in d.actions if a.suggested]
+    assert any("log" in s.lower() for s in suggested)
+
+
+def test_repeated_failures_escalates_to_critical():
+    task = _task(consecutive_failures=6, last_failure_error="boom")
+    diags = kd.compute_task_diagnostics(task, [], [])
+    assert diags[0].severity == "critical"
+
+
+def test_repeated_failures_below_threshold_silent():
+    task = _task(consecutive_failures=2)
+    assert kd.compute_task_diagnostics(task, [], []) == []
+
+
+def test_repeated_crashes_counts_trailing_streak_only():
+    task = _task(status="ready", assignee="crashy")
+    runs = [
+        _run(outcome="completed", run_id=1),
+        _run(outcome="crashed", run_id=2, error="OOM"),
+        _run(outcome="crashed", run_id=3, error="OOM again"),
+    ]
+    diags = kd.compute_task_diagnostics(task, [], runs)
+    assert len(diags) == 1
+    d = diags[0]
+    assert d.kind == "repeated_crashes"
+    # 2 consecutive crashes at the end → default threshold 2 → error severity.
+    assert d.severity == "error"
+    assert d.data["consecutive_crashes"] == 2
+
+
+def test_repeated_crashes_breaks_on_recent_success():
+    task = _task(status="ready", assignee="fixed")
+    runs = [
+        _run(outcome="crashed", run_id=1),
+        _run(outcome="crashed", run_id=2),
+        _run(outcome="completed", run_id=3),
+    ]
+    assert kd.compute_task_diagnostics(task, [], runs) == []
+
+
+def test_repeated_crashes_escalates_on_many_crashes():
+    task = _task(status="ready", assignee="x")
+    runs = [_run(outcome="crashed", run_id=i) for i in range(1, 6)]  # 5 in a row
+    diags = kd.compute_task_diagnostics(task, [], runs)
+    assert diags[0].severity == "critical"
+
+
+def test_stuck_in_blocked_fires_past_threshold():
+    now = int(time.time())
+    task = _task(status="blocked")
+    events = [
+        _event("blocked", ts=now - 3600 * 48, reason="needs approval"),
+    ]
+    diags = kd.compute_task_diagnostics(
+        task, events, [], now=now,
+    )
+    assert len(diags) == 1
+    d = diags[0]
+    assert d.kind == "stuck_in_blocked"
+    assert d.severity == "warning"
+    assert d.data["age_hours"] >= 48
+
+
+def test_stuck_in_blocked_silent_with_recent_comment():
+    now = int(time.time())
+    task = _task(status="blocked")
+    events = [
+        _event("blocked", ts=now - 3600 * 48),
+        _event("commented", ts=now - 3600 * 2, author="human"),
+    ]
+    assert kd.compute_task_diagnostics(task, events, [], now=now) == []
+
+
+def test_stuck_in_blocked_silent_when_not_blocked():
+    task = _task(status="ready")
+    events = [_event("blocked", ts=1000)]
+    assert kd.compute_task_diagnostics(task, events, [], now=9999999) == []
+
+
+def test_repeated_crashes_surfaces_actual_error_in_title():
+    """The title should lead with the actual error text so operators
+    see WHAT broke (e.g. rate-limit, auth, OOM) without opening logs.
+    """
+    task = _task(status="ready", assignee="x")
+    runs = [
+        _run(outcome="crashed", run_id=1, error="openai: 429 Too Many Requests"),
+        _run(outcome="crashed", run_id=2, error="openai: 429 Too Many Requests"),
+    ]
+    diags = kd.compute_task_diagnostics(task, [], runs)
+    assert len(diags) == 1
+    d = diags[0]
+    assert "429" in d.title
+    assert "Too Many Requests" in d.title
+    # Full error in detail.
+    assert "429 Too Many Requests" in d.detail
+
+
+def test_repeated_crashes_no_error_fallback_title():
+    task = _task(status="ready", assignee="x")
+    runs = [
+        _run(outcome="crashed", run_id=1, error=None),
+        _run(outcome="crashed", run_id=2, error=None),
+    ]
+    diags = kd.compute_task_diagnostics(task, [], runs)
+    assert "no error recorded" in diags[0].title
+
+
+def test_repeated_failures_surfaces_actual_error_in_title():
+    task = _task(consecutive_failures=5,
+                 last_failure_error="insufficient_quota: billing limit reached")
+    diags = kd.compute_task_diagnostics(task, [], [])
+    assert len(diags) == 1
+    d = diags[0]
+    assert "insufficient_quota" in d.title or "billing limit" in d.title
+    assert "insufficient_quota" in d.detail
+
+
+def test_repeated_crashes_truncates_huge_tracebacks():
+    """Full Python tracebacks can be tens of KB. The title stays one
+    line (≤160 chars); the detail caps at 500 chars + ellipsis so the
+    card doesn't explode visually."""
+    huge = "Traceback (most recent call last):\n" + ("  File\n" * 500)
+    task = _task(status="ready")
+    runs = [
+        _run(outcome="crashed", run_id=1, error=huge),
+        _run(outcome="crashed", run_id=2, error=huge),
+    ]
+    diags = kd.compute_task_diagnostics(task, [], runs)
+    d = diags[0]
+    # Title only the first line, capped.
+    assert "\n" not in d.title
+    assert len(d.title) < 250
+    # Detail contains the snippet with ellipsis.
+    assert d.detail.endswith("…") or len(d.detail) < 700
+
+
+# ---------------------------------------------------------------------------
+# Severity sorting
+# ---------------------------------------------------------------------------
+
+
+def test_diagnostics_sorted_critical_first():
+    """A task with both a critical (many spawn failures) and a warning
+    (prose phantoms) diagnostic should list the critical one first."""
+    task = _task(status="done", consecutive_failures=10,
+                 last_failure_error="nope")
+    events = [
+        _event("completed", ts=100, summary="referenced t_missing"),
+        _event("suspected_hallucinated_references", ts=101,
+               phantom_refs=["t_missing11"]),
+    ]
+    diags = kd.compute_task_diagnostics(task, events, [])
+    kinds = [d.kind for d in diags]
+    assert kinds[0] == "repeated_failures"  # critical
+    assert "prose_phantom_refs" in kinds
+
+
+# ---------------------------------------------------------------------------
+# Integration — runs through real kanban_db so sqlite.Row fields work
+# ---------------------------------------------------------------------------
+
+
+def test_engine_works_on_sqlite_row_objects(kanban_home):
+    """Regression: the rule functions must handle sqlite3.Row (which
+    supports mapping access but not attribute access and isn't a dict)
+    as well as dataclass Task / plain dict. The API layer passes Row
+    objects directly.
+    """
+    conn = kb.connect()
+    try:
+        parent = kb.create_task(conn, title="p", assignee="w")
+        real = kb.create_task(conn, title="r", assignee="x", created_by="w")
+        with pytest.raises(kb.HallucinatedCardsError):
+            kb.complete_task(
+                conn, parent,
+                summary="with phantom", created_cards=[real, "t_deadbeef1"],
+            )
+        # Pull Row objects the way the API helper does.
+        row = conn.execute(
+            "SELECT * FROM tasks WHERE id = ?", (parent,),
+        ).fetchone()
+        events = list(conn.execute(
+            "SELECT * FROM task_events WHERE task_id = ? ORDER BY id",
+            (parent,),
+        ).fetchall())
+        runs = list(conn.execute(
+            "SELECT * FROM task_runs WHERE task_id = ? ORDER BY id",
+            (parent,),
+        ).fetchall())
+        diags = kd.compute_task_diagnostics(row, events, runs)
+        assert len(diags) == 1
+        assert diags[0].kind == "hallucinated_cards"
+        assert "t_deadbeef1" in diags[0].data["phantom_ids"]
+    finally:
+        conn.close()
+
+
+# ---------------------------------------------------------------------------
+# Error-tolerance: a broken rule shouldn't 500 the whole compute call
+# ---------------------------------------------------------------------------
+
+
+def test_broken_rule_is_isolated(monkeypatch):
+    def _bad_rule(task, events, runs, now, cfg):
+        raise RuntimeError("synthetic rule bug")
+
+    # Insert a broken rule at the front of the registry; subsequent
+    # rules should still run and produce their diagnostics.
+    monkeypatch.setattr(kd, "_RULES", [_bad_rule] + kd._RULES)
+
+    task = _task(consecutive_failures=5, last_failure_error="e")
+    diags = kd.compute_task_diagnostics(task, [], [])
+    # The broken rule silently drops, the real one still fires.
+    kinds = [d.kind for d in diags]
+    assert "repeated_failures" in kinds
+
+
+# ---------------------------------------------------------------------------
+# stranded_in_ready
+#
+# Surfaces ready tasks that nobody has claimed within the threshold.
+# Identity-agnostic by design: catches typo'd assignees, deleted profiles,
+# down external worker pools, and misconfigured dispatchers in one rule.
+# ---------------------------------------------------------------------------
+
+
+def test_stranded_in_ready_fires_when_age_exceeds_threshold():
+    """Default threshold = 30 min. A ready task promoted 45 min ago
+    with no claim should fire as a warning."""
+    now = 100_000
+    task = _task(status="ready", assignee="demo", claim_lock=None)
+    # 45 min = 2700s, threshold = 1800s.
+    events = [_event("created", ts=now - 45 * 60)]
+    diags = kd.compute_task_diagnostics(task, events, [], now=now)
+    stranded = [d for d in diags if d.kind == "stranded_in_ready"]
+    assert len(stranded) == 1
+    assert stranded[0].severity == "warning"
+    assert stranded[0].data["age_seconds"] == 45 * 60
+    assert stranded[0].data["assignee"] == "demo"
+
+
+def test_stranded_in_ready_silent_below_threshold():
+    """A ready task only 10 min old should NOT fire."""
+    now = 100_000
+    task = _task(status="ready", assignee="demo", claim_lock=None)
+    events = [_event("created", ts=now - 10 * 60)]
+    diags = kd.compute_task_diagnostics(task, events, [], now=now)
+    assert [d for d in diags if d.kind == "stranded_in_ready"] == []
+
+
+def test_stranded_in_ready_skips_non_ready_status():
+    """Tasks not in ready status are out of scope (running tasks have
+    their own crash / failure rules)."""
+    now = 100_000
+    for status in ("running", "blocked", "done", "todo", "triage"):
+        task = _task(status=status, assignee="demo")
+        events = [_event("created", ts=now - 6 * 3600)]
+        diags = kd.compute_task_diagnostics(task, events, [], now=now)
+        assert [d for d in diags if d.kind == "stranded_in_ready"] == [], status
+
+
+def test_stranded_in_ready_skips_unassigned_tasks():
+    """Empty assignee = `skipped_unassigned` on the dispatcher already.
+    Don't double-flag here."""
+    now = 100_000
+    task = _task(status="ready", assignee="", claim_lock=None)
+    events = [_event("created", ts=now - 6 * 3600)]
+    diags = kd.compute_task_diagnostics(task, events, [], now=now)
+    assert [d for d in diags if d.kind == "stranded_in_ready"] == []
+
+
+def test_stranded_in_ready_skips_claimed_tasks():
+    """A live claim_lock means a worker is on it — even an old one. Don't
+    second-guess: the run-level liveness signal owns that decision."""
+    now = 100_000
+    task = _task(
+        status="ready", assignee="demo", claim_lock="run_xyz",
+    )
+    events = [_event("created", ts=now - 6 * 3600)]
+    diags = kd.compute_task_diagnostics(task, events, [], now=now)
+    assert [d for d in diags if d.kind == "stranded_in_ready"] == []
+
+
+def test_stranded_in_ready_uses_latest_ready_transition():
+    """When multiple ready-transition events exist, the rule should
+    age-from the most recent — a task reclaimed 20 min ago is NOT
+    stranded for 6h even if it was first created 6h ago."""
+    now = 100_000
+    task = _task(status="ready", assignee="demo")
+    events = [
+        _event("created", ts=now - 6 * 3600),       # 6 h ago
+        _event("reclaimed", ts=now - 20 * 60),      # 20 min ago — wins
+    ]
+    diags = kd.compute_task_diagnostics(task, events, [], now=now)
+    assert [d for d in diags if d.kind == "stranded_in_ready"] == []
+
+
+def test_stranded_in_ready_severity_escalates_with_age():
+    """warning → error → critical at 2x and 6x threshold."""
+    now = 100_000
+    task = _task(status="ready", assignee="demo")
+    # Default threshold = 1800s.
+    cases = [
+        (45 * 60, "warning"),    # 1.5x → warning
+        (90 * 60, "error"),      # 3x → error
+        (4 * 3600, "critical"),  # 8x → critical
+    ]
+    for age, expected in cases:
+        events = [_event("created", ts=now - age)]
+        diags = kd.compute_task_diagnostics(task, events, [], now=now)
+        stranded = [d for d in diags if d.kind == "stranded_in_ready"]
+        assert len(stranded) == 1, f"age={age}"
+        assert stranded[0].severity == expected, (
+            f"age={age} expected {expected}, got {stranded[0].severity}"
+        )
+
+
+def test_stranded_in_ready_respects_config_override():
+    """Config override changes the threshold."""
+    now = 100_000
+    task = _task(status="ready", assignee="demo")
+    events = [_event("created", ts=now - 10 * 60)]  # 10 min
+    # Default 30 min — wouldn't fire.
+    diags = kd.compute_task_diagnostics(task, events, [], now=now)
+    assert [d for d in diags if d.kind == "stranded_in_ready"] == []
+    # Lower the threshold to 5 min — now it fires.
+    diags = kd.compute_task_diagnostics(
+        task, events, [], now=now,
+        config={"stranded_threshold_seconds": 5 * 60},
+    )
+    stranded = [d for d in diags if d.kind == "stranded_in_ready"]
+    assert len(stranded) == 1
+
+
+def test_stranded_in_ready_falls_back_to_created_at():
+    """When events have no ready-transition kind, the rule falls back
+    to the task's ``created_at`` so an ancient stranded task isn't
+    invisible just because its events got pruned."""
+    now = 100_000
+    task = _task(
+        status="ready", assignee="demo", created_at=now - 4 * 3600,
+    )
+    # No qualifying events.
+    events = [_event("commented", ts=now - 100)]
+    diags = kd.compute_task_diagnostics(task, events, [], now=now)
+    stranded = [d for d in diags if d.kind == "stranded_in_ready"]
+    assert len(stranded) == 1
+    assert stranded[0].data["age_seconds"] == 4 * 3600
+
+
+def test_stranded_in_ready_works_on_real_db_row(kanban_home):
+    """Round-trip through real kanban_db.connect() — confirms the rule
+    works on sqlite3.Row objects, not just dicts."""
+    import time as _t
+    conn = kb.connect()
+    try:
+        # Create a task and force its created_at into the past.
+        tid = kb.create_task(conn, title="stranded one", assignee="ghost")
+        old_ts = int(_t.time()) - 90 * 60  # 90 min old
+        conn.execute(
+            "UPDATE tasks SET status = 'ready', created_at = ? WHERE id = ?",
+            (old_ts, tid),
+        )
+        conn.commit()
+
+        task_row = conn.execute(
+            "SELECT * FROM tasks WHERE id = ?", (tid,)
+        ).fetchone()
+        events = list(conn.execute(
+            "SELECT * FROM task_events WHERE task_id = ? ORDER BY created_at",
+            (tid,),
+        ).fetchall())
+        # Override created event timestamps too so age calc lines up.
+        conn.execute(
+            "UPDATE task_events SET created_at = ? WHERE task_id = ?",
+            (old_ts, tid),
+        )
+        conn.commit()
+        events = list(conn.execute(
+            "SELECT * FROM task_events WHERE task_id = ?", (tid,),
+        ).fetchall())
+
+        diags = kd.compute_task_diagnostics(task_row, events, [])
+        stranded = [d for d in diags if d.kind == "stranded_in_ready"]
+        assert len(stranded) == 1
+        assert stranded[0].data["assignee"] == "ghost"
+    finally:
+        conn.close()
--- a/tests/hermes_cli/test_kanban_notify.py
+++ b/tests/hermes_cli/test_kanban_notify.py
@ -0,0 +1,481 @@
+import asyncio
+import pytest
+
+from pathlib import Path
+from types import SimpleNamespace
+from hermes_cli import kanban_db as kb
+from unittest.mock import AsyncMock, MagicMock, patch
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def kanban_home(tmp_path, monkeypatch):
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    kb.init_db()
+    return home
+
+
+@pytest.mark.asyncio
+async def test_notifier_unsubs_after_completed_event(kanban_home):
+    """
+    Subscription should be remove after completed event
+    """
+    import hermes_cli.kanban_db as kb
+    from gateway.run import GatewayRunner
+    from gateway.config import Platform
+
+    conn = kb.connect()
+    try:
+        tid = kb.create_task(conn, title="test task", assignee="worker1")
+        kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat1")
+        kb.complete_task(conn, tid, result="completed by agent")
+    finally:
+        conn.close()
+
+    runner = object.__new__(GatewayRunner)
+    runner._running = True
+    runner._kanban_sub_fail_counts = {}
+
+    fake_adapter = MagicMock()
+
+    async def _send_and_stop(chat_id, msg, metadata=None):
+        runner._running = False
+
+    fake_adapter.send = AsyncMock(side_effect=_send_and_stop)
+    runner.adapters = {Platform.TELEGRAM: fake_adapter}
+
+    _orig_sleep = asyncio.sleep
+
+    async def _fast_sleep(_):
+        await _orig_sleep(0)
+
+    with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
+        await asyncio.wait_for(
+            runner._kanban_notifier_watcher(interval=1),
+            timeout=10.0,
+        )
+
+    fake_adapter.send.assert_called_once()
+    call_msg = fake_adapter.send.call_args[0][1]
+    assert "completed" in call_msg
+
+    conn = kb.connect()
+    try:
+        subs = kb.list_notify_subs(conn, tid)
+    finally:
+        conn.close()
+    assert subs == [], "Subscription should be unsub after completed event"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize('kind', ["gave_up", "crashed", "timed_out"])
+async def test_notifier_unsubs_after_abnormal_events(kind, kanban_home):
+    """
+    Event kinds gave_up / crashed / timed_out send a notification but DO
+    NOT delete the subscription. The dispatcher may respawn the task and
+    fire the same event kind again (e.g. a worker that crashes, gets
+    reclaimed, and crashes a second time); the user must hear about the
+    second event too. Subscriptions are removed only when the task hits
+    a truly final status (done / archived) — see the comment on
+    TERMINAL_KINDS in gateway/run.py and PR #21398.
+    """
+    import hermes_cli.kanban_db as kb
+    from gateway.run import GatewayRunner
+    from gateway.config import Platform
+
+    conn = kb.connect()
+
+    try:
+        tid = kb.create_task(conn, title=f"test {kind} task", assignee="worker1")
+        kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat1")
+        kb._append_event(conn, tid, kind=kind)
+    finally:
+        conn.close()
+
+    runner = object.__new__(GatewayRunner)
+    runner._running = True
+    runner._kanban_sub_fail_counts = {}
+
+    fake_adapter = MagicMock()
+
+    async def _send_and_stop(chat_id, msg, metadata=None):
+        runner._running = False
+
+    fake_adapter.send = AsyncMock(side_effect=_send_and_stop)
+    runner.adapters = {Platform.TELEGRAM: fake_adapter}
+
+    _orig_sleep = asyncio.sleep
+
+    async def _fast_sleep(_):
+        await _orig_sleep(0)
+
+    with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
+        await asyncio.wait_for(
+            runner._kanban_notifier_watcher(interval=1),
+            timeout=10.0,
+        )
+
+    # The user is notified about the abnormal event...
+    fake_adapter.send.assert_called_once()
+    assert kind.replace('_', ' ') in fake_adapter.send.call_args[0][1]
+
+    # ...but the subscription survives so a respawn-then-same-event cycle
+    # reaches the user too. The cursor (last_event_id) advanced inside
+    # the same write txn as the claim, so the same event won't re-fire.
+    conn = kb.connect()
+    try:
+        subs = kb.list_notify_subs(conn, tid)
+    finally:
+        conn.close()
+    assert len(subs) == 1, (
+        f"Subscription should survive {kind!r} so the next cycle of the "
+        f"same event reaches the user; got {subs!r}"
+    )
+    assert int(subs[0]["last_event_id"]) >= 1, (
+        "Cursor should have advanced past the delivered event "
+        "(claim_unseen_events_for_sub advances atomically inside the "
+        "same write txn as the read)."
+    )
+
+
+@pytest.mark.asyncio
+async def test_notifier_second_blocked_delivers(kanban_home):
+    """
+    After the first blocked, should receive second blocked notification.
+    """
+    import hermes_cli.kanban_db as kb
+    from gateway.run import GatewayRunner
+    from gateway.config import Platform
+
+    runner = object.__new__(GatewayRunner)
+    runner._running = True
+    runner._kanban_sub_fail_counts = {}
+
+    delivered_msgs: list[str] = []
+
+    async def _capture_send(chat_id, msg, metadata=None):
+        delivered_msgs.append(msg)
+
+    fake_adapter = MagicMock()
+    fake_adapter.send = AsyncMock(side_effect=_capture_send)
+    runner.adapters = {Platform.TELEGRAM: fake_adapter}
+
+    _orig_sleep = asyncio.sleep
+    tick_count = 0
+
+    async def _fast_sleep(_):
+        nonlocal tick_count
+        await _orig_sleep(0)
+        tick_count += 1
+        if tick_count >= 6:
+            runner._running = False
+
+    conn = kb.connect()
+    try:
+        tid = kb.create_task(conn, title="test task", assignee="worker1")
+        kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat1")
+
+        # Cycle 1: blocked
+        kb.block_task(conn, tid, reason="first block")
+    finally:
+        conn.close()
+
+    with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
+        await asyncio.wait_for(
+            runner._kanban_notifier_watcher(interval=1),
+            timeout=10.0,
+        )
+
+    # Cycle 2: unblock → block run again
+    runner._running = True
+    tick_count = 0
+
+    conn = kb.connect()
+    try:
+        kb.unblock_task(conn, tid)
+        kb.block_task(conn, tid, reason="second block")
+    finally:
+        conn.close()
+
+    with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
+        await asyncio.wait_for(
+            runner._kanban_notifier_watcher(interval=1),
+            timeout=10.0,
+        )
+
+    blocked_deliveries = [m for m in delivered_msgs if "blocked" in m]
+    assert "second block" not in blocked_deliveries[0]
+    assert "second block" in blocked_deliveries[1]
+    assert len(blocked_deliveries) == 2, (
+        f"Should receive 2 blocked notification, but only get {len(blocked_deliveries)} count\n"
+        f"Message {delivered_msgs}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Regression: gateway watchers must not double-init the kanban DB.
+#
+# Both the notifier watcher (`_kanban_notifier_watcher`) and the dispatcher
+# tick (`_tick_once_for_board`) used to call `_kb.connect(board=slug)`
+# immediately followed by `_kb.init_db(board=slug)`. Since `connect()`
+# already runs the schema + idempotent migration on first open per process,
+# the explicit `init_db()` was redundant — and worse, `init_db()`
+# deliberately busts the per-process cache and re-runs the migration on a
+# *second* connection, which races the first.  On legacy DBs this surfaced
+# as `duplicate column name: <col>` (now tolerated by
+# `_add_column_if_missing`) and intermittent `database is locked` errors
+# (issue #21378).
+#
+# The fix removes the `init_db()` calls in both watchers; this regression
+# test pins that behaviour so we don't reintroduce them.
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_notifier_does_not_call_init_db(kanban_home):
+    """Notifier watcher path must not invoke `_kb.init_db` (issue #21378)."""
+    import hermes_cli.kanban_db as kb
+    from gateway.run import GatewayRunner
+    from gateway.config import Platform
+
+    runner = object.__new__(GatewayRunner)
+    runner._running = True
+    runner._kanban_sub_fail_counts = {}
+
+    fake_adapter = MagicMock()
+    fake_adapter.send = AsyncMock()
+    runner.adapters = {Platform.TELEGRAM: fake_adapter}
+
+    _orig_sleep = asyncio.sleep
+    tick_count = 0
+
+    async def _fast_sleep(_):
+        nonlocal tick_count
+        await _orig_sleep(0)
+        tick_count += 1
+        if tick_count >= 3:
+            runner._running = False
+
+    init_db_calls: list[object] = []
+    real_init_db = kb.init_db
+
+    def _spy_init_db(*args, **kwargs):
+        init_db_calls.append((args, kwargs))
+        return real_init_db(*args, **kwargs)
+
+    with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep), \
+         patch("hermes_cli.kanban_db.init_db", side_effect=_spy_init_db):
+        await asyncio.wait_for(
+            runner._kanban_notifier_watcher(interval=1),
+            timeout=10.0,
+        )
+
+    assert init_db_calls == [], (
+        "_kanban_notifier_watcher must not call init_db on every tick — "
+        "connect() handles first-run schema init. "
+        "Reintroducing init_db revives issue #21378. "
+        f"Got {len(init_db_calls)} call(s): {init_db_calls}"
+    )
+
+
+def test_dispatcher_tick_does_not_call_init_db(kanban_home, monkeypatch):
+    """`_tick_once_for_board` must not invoke `_kb.init_db` (issue #21378).
+
+    `connect()` already runs the schema + idempotent migration on first open
+    per process. The explicit `init_db()` call was redundant and triggered a
+    second migration on a second connection that raced the first.
+    """
+    import hermes_cli.kanban_db as kb
+    from gateway.run import GatewayRunner
+    from unittest.mock import patch
+
+    runner = object.__new__(GatewayRunner)
+
+    init_db_calls: list[object] = []
+    real_init_db = kb.init_db
+
+    def _spy_init_db(*args, **kwargs):
+        init_db_calls.append((args, kwargs))
+        return real_init_db(*args, **kwargs)
+
+    # The dispatcher watcher's tick lives as a local closure inside
+    # `_kanban_dispatcher_watcher`. Read the source and assert the
+    # specific patterns that would reintroduce the bug are absent.
+    import inspect
+    src = inspect.getsource(GatewayRunner._kanban_dispatcher_watcher)
+    assert "_kb.init_db(board=slug)" not in src, (
+        "_kanban_dispatcher_watcher must not call _kb.init_db(board=slug) — "
+        "see issue #21378. Use connect() alone; it runs migrations on first "
+        "open per process."
+    )
+
+    notifier_src = inspect.getsource(GatewayRunner._kanban_notifier_watcher)
+    assert "_kb.init_db(board=slug)" not in notifier_src, (
+        "_kanban_notifier_watcher must not call _kb.init_db(board=slug) — "
+        "see issue #21378."
+    )
+
+
+@pytest.mark.asyncio
+async def test_notifier_skips_subscription_owned_by_other_profile(kanban_home):
+    """Each gateway keeps its watcher on, but only the subscribing profile claims."""
+    import hermes_cli.kanban_db as kb
+    from gateway.run import GatewayRunner
+    from gateway.config import Platform
+
+    conn = kb.connect()
+    try:
+        tid = kb.create_task(conn, title="owned task", assignee="backend-engineer")
+        kb.add_notify_sub(
+            conn,
+            task_id=tid,
+            platform="telegram",
+            chat_id="chat1",
+            notifier_profile="default",
+        )
+        kb.complete_task(conn, tid, result="done")
+    finally:
+        conn.close()
+
+    runner = object.__new__(GatewayRunner)
+    runner._running = True
+    runner._kanban_sub_fail_counts = {}
+    runner._kanban_notifier_profile = "business-partner"
+
+    fake_adapter = MagicMock()
+    fake_adapter.send = AsyncMock()
+    runner.adapters = {Platform.TELEGRAM: fake_adapter}
+
+    _orig_sleep = asyncio.sleep
+    tick_count = 0
+
+    async def _fast_sleep(_):
+        nonlocal tick_count
+        await _orig_sleep(0)
+        tick_count += 1
+        if tick_count >= 3:
+            runner._running = False
+
+    with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
+        await asyncio.wait_for(
+            runner._kanban_notifier_watcher(interval=1),
+            timeout=10.0,
+        )
+
+    fake_adapter.send.assert_not_called()
+    conn = kb.connect()
+    try:
+        subs = kb.list_notify_subs(conn, tid)
+    finally:
+        conn.close()
+    assert len(subs) == 1
+    assert int(subs[0]["last_event_id"]) == 0, "wrong profile must not claim the event"
+
+
+@pytest.mark.asyncio
+async def test_notifier_delivers_subscription_owned_by_current_profile(kanban_home):
+    """The gateway for the profile that created/subscribed the task reports it."""
+    import hermes_cli.kanban_db as kb
+    from gateway.run import GatewayRunner
+    from gateway.config import Platform
+
+    conn = kb.connect()
+    try:
+        tid = kb.create_task(conn, title="owned task", assignee="backend-engineer")
+        kb.add_notify_sub(
+            conn,
+            task_id=tid,
+            platform="telegram",
+            chat_id="chat1",
+            notifier_profile="default",
+        )
+        kb.complete_task(conn, tid, result="done")
+    finally:
+        conn.close()
+
+    runner = object.__new__(GatewayRunner)
+    runner._running = True
+    runner._kanban_sub_fail_counts = {}
+    runner._kanban_notifier_profile = "default"
+
+    fake_adapter = MagicMock()
+
+    async def _send_and_stop(chat_id, msg, metadata=None):
+        runner._running = False
+
+    fake_adapter.send = AsyncMock(side_effect=_send_and_stop)
+    runner.adapters = {Platform.TELEGRAM: fake_adapter}
+
+    _orig_sleep = asyncio.sleep
+
+    async def _fast_sleep(_):
+        await _orig_sleep(0)
+
+    with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
+        await asyncio.wait_for(
+            runner._kanban_notifier_watcher(interval=1),
+            timeout=10.0,
+        )
+
+    fake_adapter.send.assert_called_once()
+    conn = kb.connect()
+    try:
+        subs = kb.list_notify_subs(conn, tid)
+    finally:
+        conn.close()
+    assert subs == []
+
+
+@pytest.mark.asyncio
+async def test_gateway_create_autosubscribes_on_explicit_board(kanban_home):
+    """`/kanban --board <slug> create ...` must subscribe on that board.
+
+    The gateway handler currently auto-subscribes after `/kanban create`,
+    but the create detection must still work when the shared `--board`
+    flag appears before the subcommand, and the subscription must land in
+    that board's DB rather than the ambient/default board.
+    """
+    from gateway.run import GatewayRunner
+    from gateway.config import Platform
+
+    kb.create_board("projx")
+
+    runner = object.__new__(GatewayRunner)
+    source = SimpleNamespace(
+        platform=Platform.TELEGRAM,
+        chat_id="chat1",
+        thread_id="th1",
+        user_id="u1",
+    )
+    event = SimpleNamespace(
+        text='/kanban --board projx create "hello" --assignee alice',
+        source=source,
+    )
+
+    out = await GatewayRunner._handle_kanban_command(runner, event)
+
+    assert "subscribed" in out.lower()
+
+    conn = kb.connect(board="projx")
+    try:
+        subs = kb.list_notify_subs(conn)
+        tasks = kb.list_tasks(conn)
+    finally:
+        conn.close()
+
+    assert [t.title for t in tasks] == ["hello"]
+    assert len(subs) == 1
+    assert subs[0]["chat_id"] == "chat1"
+    assert subs[0]["thread_id"] == "th1"
+
+    conn = kb.connect(board="default")
+    try:
+        assert kb.list_notify_subs(conn) == []
+    finally:
+        conn.close()
--- a/tests/hermes_cli/test_kanban_specify.py
+++ b/tests/hermes_cli/test_kanban_specify.py
@ -0,0 +1,337 @@
+"""Tests for the specifier module + `hermes kanban specify` CLI surface.
+
+The auxiliary LLM client is mocked — these tests don't hit any network or
+real provider. They exercise the prompt plumbing, response parsing, DB
+writes, and CLI flag surface.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json as jsonlib
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from hermes_cli import kanban as kanban_cli
+from hermes_cli import kanban_db as kb
+from hermes_cli import kanban_specify as spec
+
+
+@pytest.fixture
+def kanban_home(tmp_path, monkeypatch):
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    kb.init_db()
+    return home
+
+
+def _fake_aux_response(content: str):
+    """Build a minimal object shaped like an OpenAI chat.completions result.
+
+    The specifier only reads ``resp.choices[0].message.content``, so we
+    avoid importing the openai SDK and build the tree with MagicMock.
+    """
+    resp = MagicMock()
+    resp.choices = [MagicMock()]
+    resp.choices[0].message.content = content
+    return resp
+
+
+def _mock_client_returning(content: str):
+    client = MagicMock()
+    client.chat.completions.create = MagicMock(return_value=_fake_aux_response(content))
+    return client
+
+
+def _patch_aux_client(content: str, *, model: str = "test-model"):
+    """Patch get_text_auxiliary_client at its source + at the module that
+    imported it lazily inside specify_task. Both patches are needed
+    because kanban_specify imports the function inside the function body.
+    """
+    client = _mock_client_returning(content)
+    return patch(
+        "agent.auxiliary_client.get_text_auxiliary_client",
+        return_value=(client, model),
+    ), client
+
+
+# ---------------------------------------------------------------------------
+# JSON extraction helpers
+# ---------------------------------------------------------------------------
+
+def test_extract_json_blob_handles_plain_json():
+    raw = '{"title": "T", "body": "B"}'
+    assert spec._extract_json_blob(raw) == {"title": "T", "body": "B"}
+
+
+def test_extract_json_blob_handles_fenced_json():
+    raw = '```json\n{"title": "T", "body": "B"}\n```'
+    assert spec._extract_json_blob(raw) == {"title": "T", "body": "B"}
+
+
+def test_extract_json_blob_handles_prose_preamble():
+    raw = 'Sure! Here you go:\n{"title": "T", "body": "B"}\nThanks.'
+    assert spec._extract_json_blob(raw) == {"title": "T", "body": "B"}
+
+
+def test_extract_json_blob_returns_none_for_unparseable():
+    assert spec._extract_json_blob("no json here") is None
+    assert spec._extract_json_blob("") is None
+    assert spec._extract_json_blob("{not: valid}") is None
+
+
+# ---------------------------------------------------------------------------
+# specify_task (module-level entry point)
+# ---------------------------------------------------------------------------
+
+def test_specify_task_happy_path(kanban_home):
+    with kb.connect() as conn:
+        tid = kb.create_task(conn, title="rough", triage=True)
+
+    content = jsonlib.dumps({
+        "title": "Refined rough",
+        "body": "**Goal**\nA concrete goal.",
+    })
+    p, _ = _patch_aux_client(content)
+    with p:
+        outcome = spec.specify_task(tid, author="ace")
+
+    assert outcome.ok is True
+    assert outcome.task_id == tid
+    assert outcome.new_title == "Refined rough"
+
+    with kb.connect() as conn:
+        task = kb.get_task(conn, tid)
+    # Parent-free → recompute_ready promotes to ready.
+    assert task.status == "ready"
+    assert task.title == "Refined rough"
+    assert "**Goal**" in (task.body or "")
+
+
+def test_specify_task_falls_back_to_body_only_on_bad_json(kanban_home):
+    with kb.connect() as conn:
+        tid = kb.create_task(conn, title="keep title", triage=True)
+
+    # Model returned plain markdown, no JSON object.
+    content = "Goal: Do a thing.\nApproach: Steps here."
+    p, _ = _patch_aux_client(content)
+    with p:
+        outcome = spec.specify_task(tid)
+
+    assert outcome.ok is True
+    with kb.connect() as conn:
+        t = kb.get_task(conn, tid)
+    # Title preserved (no JSON with a title key).
+    assert t.title == "keep title"
+    # Body replaced with the raw response.
+    assert "Goal:" in (t.body or "")
+
+
+def test_specify_task_rejects_non_triage_task(kanban_home):
+    with kb.connect() as conn:
+        tid = kb.create_task(conn, title="ready task")
+
+    p, client = _patch_aux_client("unused")
+    with p:
+        outcome = spec.specify_task(tid)
+
+    assert outcome.ok is False
+    assert "not in triage" in outcome.reason
+    # LLM must not be invoked for a non-triage task — fail cheap.
+    assert client.chat.completions.create.call_count == 0
+
+
+def test_specify_task_unknown_id(kanban_home):
+    p, client = _patch_aux_client("unused")
+    with p:
+        outcome = spec.specify_task("t_nope")
+    assert outcome.ok is False
+    assert "unknown task" in outcome.reason
+    assert client.chat.completions.create.call_count == 0
+
+
+def test_specify_task_no_aux_client_configured(kanban_home):
+    with kb.connect() as conn:
+        tid = kb.create_task(conn, title="rough", triage=True)
+
+    with patch(
+        "agent.auxiliary_client.get_text_auxiliary_client",
+        return_value=(None, ""),
+    ):
+        outcome = spec.specify_task(tid)
+
+    assert outcome.ok is False
+    assert "auxiliary client" in outcome.reason
+    # Task must stay in triage — we never touched it.
+    with kb.connect() as conn:
+        assert kb.get_task(conn, tid).status == "triage"
+
+
+def test_specify_task_llm_api_error_keeps_task_in_triage(kanban_home):
+    with kb.connect() as conn:
+        tid = kb.create_task(conn, title="rough", triage=True)
+
+    client = MagicMock()
+    client.chat.completions.create = MagicMock(side_effect=RuntimeError("429 rate limited"))
+    with patch(
+        "agent.auxiliary_client.get_text_auxiliary_client",
+        return_value=(client, "test-model"),
+    ):
+        outcome = spec.specify_task(tid)
+
+    assert outcome.ok is False
+    assert "LLM error" in outcome.reason
+    with kb.connect() as conn:
+        assert kb.get_task(conn, tid).status == "triage"
+
+
+def test_specify_task_empty_llm_response(kanban_home):
+    with kb.connect() as conn:
+        tid = kb.create_task(conn, title="rough", triage=True)
+
+    p, _ = _patch_aux_client("")
+    with p:
+        outcome = spec.specify_task(tid)
+
+    assert outcome.ok is False
+    with kb.connect() as conn:
+        assert kb.get_task(conn, tid).status == "triage"
+
+
+def test_list_triage_ids(kanban_home):
+    with kb.connect() as conn:
+        a = kb.create_task(conn, title="a", triage=True)
+        b = kb.create_task(conn, title="b", triage=True, tenant="proj-1")
+        kb.create_task(conn, title="c")  # not triage — excluded
+
+    ids_all = spec.list_triage_ids()
+    assert set(ids_all) == {a, b}
+    ids_tenant = spec.list_triage_ids(tenant="proj-1")
+    assert ids_tenant == [b]
+
+
+# ---------------------------------------------------------------------------
+# CLI wiring — argparse + _cmd_specify
+# ---------------------------------------------------------------------------
+
+def _run_cli(*argv: str) -> int:
+    """Invoke the `hermes kanban …` argparse surface directly."""
+    root = argparse.ArgumentParser()
+    subp = root.add_subparsers(dest="cmd")
+    kanban_cli.build_parser(subp)
+    ns = root.parse_args(["kanban", *argv])
+    return kanban_cli.kanban_command(ns)
+
+
+def test_cli_specify_requires_id_or_all(kanban_home, capsys):
+    rc = _run_cli("specify")
+    assert rc == 2
+    err = capsys.readouterr().err
+    assert "requires a task id or --all" in err
+
+
+def test_cli_specify_rejects_both_id_and_all(kanban_home, capsys):
+    with kb.connect() as conn:
+        tid = kb.create_task(conn, title="rough", triage=True)
+    rc = _run_cli("specify", tid, "--all")
+    assert rc == 2
+    err = capsys.readouterr().err
+    assert "either a task id OR --all" in err
+
+
+def test_cli_specify_single_id_success(kanban_home, capsys):
+    with kb.connect() as conn:
+        tid = kb.create_task(conn, title="rough", triage=True)
+
+    content = jsonlib.dumps({"title": "clean", "body": "body"})
+    p, _ = _patch_aux_client(content)
+    with p:
+        rc = _run_cli("specify", tid)
+    assert rc == 0
+    out = capsys.readouterr().out
+    assert tid in out
+    assert "→ todo" in out or "-> todo" in out or "→" in out
+
+
+def test_cli_specify_all_success_and_json(kanban_home, capsys):
+    with kb.connect() as conn:
+        a = kb.create_task(conn, title="a", triage=True)
+        b = kb.create_task(conn, title="b", triage=True)
+
+    content = jsonlib.dumps({"title": "spec", "body": "body"})
+    p, _ = _patch_aux_client(content)
+    with p:
+        rc = _run_cli("specify", "--all", "--json")
+    assert rc == 0
+    lines = [l for l in capsys.readouterr().out.strip().splitlines() if l]
+    # One JSON object per task + nothing else.
+    assert len(lines) == 2
+    parsed = [jsonlib.loads(l) for l in lines]
+    ids = {row["task_id"] for row in parsed}
+    assert ids == {a, b}
+    assert all(row["ok"] for row in parsed)
+
+
+def test_cli_specify_all_empty_triage_column(kanban_home, capsys):
+    rc = _run_cli("specify", "--all")
+    assert rc == 0
+    assert "No triage tasks" in capsys.readouterr().out
+
+
+def test_cli_specify_all_returns_1_when_every_task_fails(kanban_home, capsys):
+    with kb.connect() as conn:
+        kb.create_task(conn, title="a", triage=True)
+        kb.create_task(conn, title="b", triage=True)
+
+    with patch(
+        "agent.auxiliary_client.get_text_auxiliary_client",
+        return_value=(None, ""),  # no aux client → every task fails
+    ):
+        rc = _run_cli("specify", "--all")
+
+    assert rc == 1
+
+
+def test_cli_specify_tenant_filter(kanban_home, capsys):
+    with kb.connect() as conn:
+        outside = kb.create_task(conn, title="outside", triage=True)
+        inside = kb.create_task(
+            conn, title="inside", triage=True, tenant="proj-a",
+        )
+
+    content = jsonlib.dumps({"title": "spec", "body": "body"})
+    p, _ = _patch_aux_client(content)
+    with p:
+        rc = _run_cli("specify", "--all", "--tenant", "proj-a", "--json")
+    assert rc == 0
+    lines = [
+        jsonlib.loads(l)
+        for l in capsys.readouterr().out.strip().splitlines()
+        if l
+    ]
+    ids = {row["task_id"] for row in lines}
+    assert ids == {inside}
+
+    # The outside task stays in triage.
+    with kb.connect() as conn:
+        assert kb.get_task(conn, outside).status == "triage"
+        # The inside task was promoted.
+        assert kb.get_task(conn, inside).status in {"todo", "ready"}
+
+
+def test_cli_specify_author_passed_through(kanban_home, capsys):
+    with kb.connect() as conn:
+        tid = kb.create_task(conn, title="rough", triage=True)
+
+    content = jsonlib.dumps({"title": "fresh title", "body": "fresh body"})
+    p, _ = _patch_aux_client(content)
+    with p:
+        rc = _run_cli("specify", tid, "--author", "custom-agent")
+    assert rc == 0
+    with kb.connect() as conn:
+        comments = kb.list_comments(conn, tid)
+    assert comments and comments[0].author == "custom-agent"
--- a/tests/hermes_cli/test_kanban_specify_db.py
+++ b/tests/hermes_cli/test_kanban_specify_db.py
@ -0,0 +1,184 @@
+"""Tests for kb.specify_triage_task — the DB-layer atomic promotion
+from the triage column to todo. LLM-free by design."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from hermes_cli import kanban_db as kb
+
+
+@pytest.fixture
+def kanban_home(tmp_path, monkeypatch):
+    """Isolated HERMES_HOME with an empty kanban DB."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    kb.init_db()
+    return home
+
+
+def _create_triage(conn, title="rough idea", body=None, assignee=None):
+    return kb.create_task(
+        conn,
+        title=title,
+        body=body,
+        assignee=assignee,
+        triage=True,
+    )
+
+
+def test_specify_promotes_triage_to_todo(kanban_home):
+    with kb.connect() as conn:
+        tid = _create_triage(conn, title="rough idea")
+        assert kb.get_task(conn, tid).status == "triage"
+    with kb.connect() as conn:
+        ok = kb.specify_triage_task(
+            conn,
+            tid,
+            title="Refined: rough idea",
+            body="**Goal**\nDo the thing.",
+            author="specifier-bot",
+        )
+    assert ok is True
+    with kb.connect() as conn:
+        task = kb.get_task(conn, tid)
+    # No parents → recompute_ready should have flipped it past todo to ready.
+    assert task.status == "ready"
+    assert task.title == "Refined: rough idea"
+    assert "**Goal**" in (task.body or "")
+
+
+def test_specify_with_open_parent_lands_in_todo_not_ready(kanban_home):
+    # Parent-gated specified tasks must not jump the dispatcher — they go
+    # to todo and wait for parent completion like any other gated task.
+    with kb.connect() as conn:
+        parent = kb.create_task(conn, title="parent work")
+        child = _create_triage(conn, title="child idea")
+        kb.link_tasks(conn, parent, child)
+        # After linking with an open parent, triage status should still be
+        # 'triage' (linking doesn't touch triage tasks).
+        assert kb.get_task(conn, child).status == "triage"
+    with kb.connect() as conn:
+        ok = kb.specify_triage_task(
+            conn,
+            child,
+            body="full spec",
+            author="specifier",
+        )
+    assert ok is True
+    with kb.connect() as conn:
+        t = kb.get_task(conn, child)
+    # Parent still open → specified child sits in 'todo', not 'ready'.
+    assert t.status == "todo"
+
+
+def test_specify_refuses_non_triage_task(kanban_home):
+    with kb.connect() as conn:
+        tid = kb.create_task(conn, title="normal task")
+        assert kb.get_task(conn, tid).status == "ready"
+    with kb.connect() as conn:
+        ok = kb.specify_triage_task(conn, tid, body="won't apply")
+    assert ok is False
+    with kb.connect() as conn:
+        # Status unchanged.
+        assert kb.get_task(conn, tid).status == "ready"
+
+
+def test_specify_returns_false_for_unknown_id(kanban_home):
+    with kb.connect() as conn:
+        ok = kb.specify_triage_task(conn, "t_does_not_exist", body="x")
+    assert ok is False
+
+
+def test_specify_rejects_blank_title(kanban_home):
+    with kb.connect() as conn:
+        tid = _create_triage(conn, title="rough")
+    with kb.connect() as conn, pytest.raises(ValueError):
+        kb.specify_triage_task(conn, tid, title="   ", body="ok")
+
+
+def test_specify_emits_event(kanban_home):
+    with kb.connect() as conn:
+        tid = _create_triage(conn, title="rough")
+    with kb.connect() as conn:
+        kb.specify_triage_task(
+            conn, tid, title="new", body="b", author="ace"
+        )
+    with kb.connect() as conn:
+        events = kb.list_events(conn, tid)
+    kinds = [e.kind for e in events]
+    assert "specified" in kinds
+    # The specified event records which fields actually changed as a
+    # JSON payload under task_events.payload.
+    spec_ev = next(e for e in events if e.kind == "specified")
+    assert spec_ev.payload is not None
+    fields = spec_ev.payload.get("changed_fields") or []
+    assert "title" in fields
+    assert "body" in fields
+
+
+def test_specify_records_audit_comment_only_when_author_given(kanban_home):
+    # With author → comment added.
+    with kb.connect() as conn:
+        tid1 = _create_triage(conn, title="a")
+        kb.specify_triage_task(
+            conn, tid1, title="A-spec", body="b", author="ace"
+        )
+        comments1 = kb.list_comments(conn, tid1)
+    assert len(comments1) == 1
+    assert "Specified" in comments1[0].body
+    assert comments1[0].author == "ace"
+
+    # Without author → no comment (silent).
+    with kb.connect() as conn:
+        tid2 = _create_triage(conn, title="b")
+        kb.specify_triage_task(conn, tid2, title="B-spec", body="b")
+        comments2 = kb.list_comments(conn, tid2)
+    assert comments2 == []
+
+
+def test_specify_skips_comment_when_nothing_changed(kanban_home):
+    # Create triage task with title and body already set; pass identical
+    # values to specify. Should promote to todo but skip audit comment.
+    with kb.connect() as conn:
+        tid = _create_triage(conn, title="same", body="same body")
+    with kb.connect() as conn:
+        ok = kb.specify_triage_task(
+            conn,
+            tid,
+            title="same",
+            body="same body",
+            author="ace",
+        )
+    assert ok is True
+    with kb.connect() as conn:
+        # Promoted.
+        assert kb.get_task(conn, tid).status in {"todo", "ready"}
+        # No audit comment because neither field changed.
+        assert kb.list_comments(conn, tid) == []
+
+
+def test_specify_with_only_body_preserves_title(kanban_home):
+    with kb.connect() as conn:
+        tid = _create_triage(conn, title="keep this title")
+    with kb.connect() as conn:
+        kb.specify_triage_task(conn, tid, body="new body only")
+    with kb.connect() as conn:
+        t = kb.get_task(conn, tid)
+    assert t.title == "keep this title"
+    assert t.body == "new body only"
+
+
+def test_specify_second_call_noop_false(kanban_home):
+    # Promoting twice must not crash and the second call returns False
+    # because the task is no longer in triage.
+    with kb.connect() as conn:
+        tid = _create_triage(conn, title="once")
+    with kb.connect() as conn:
+        assert kb.specify_triage_task(conn, tid, body="spec") is True
+    with kb.connect() as conn:
+        assert kb.specify_triage_task(conn, tid, body="spec again") is False
--- a/tests/hermes_cli/test_list_picker_providers.py
+++ b/tests/hermes_cli/test_list_picker_providers.py
@ -0,0 +1,261 @@
+"""Tests for ``list_picker_providers`` — the /model picker filter.
+
+``list_picker_providers`` wraps ``list_authenticated_providers`` and
+post-processes the result for interactive pickers (Telegram, Discord):
+
+- OpenRouter's ``models`` are replaced with the live-filtered output of
+  ``fetch_openrouter_models``, so IDs the live catalog no longer carries
+  drop out.
+- Provider rows with an empty ``models`` list are dropped, except custom
+  endpoints (``is_user_defined=True`` with an ``api_url``) where the user
+  may supply their own model set through config.
+
+These tests exercise the filter in isolation by mocking
+``list_authenticated_providers`` and ``fetch_openrouter_models`` so no
+network or auth state is required.
+"""
+
+import pytest
+from hermes_cli import model_switch
+
+
+def _make_provider(slug, name=None, models=None, *, is_current=False,
+                   is_user_defined=False, source="built-in", api_url=None):
+    """Build a dict shaped like ``list_authenticated_providers`` output."""
+    entry = {
+        "slug": slug,
+        "name": name or slug.title(),
+        "is_current": is_current,
+        "is_user_defined": is_user_defined,
+        "models": list(models or []),
+        "total_models": len(models or []),
+        "source": source,
+    }
+    if api_url is not None:
+        entry["api_url"] = api_url
+    return entry
+
+
+def test_openrouter_models_replaced_with_live_catalog(monkeypatch):
+    """OpenRouter row's ``models`` should come from fetch_openrouter_models."""
+    base = [
+        _make_provider("openrouter", models=["openai/gpt-stale", "old/model"]),
+    ]
+    live = [("openai/gpt-5.4", "recommended"), ("moonshotai/kimi-k2.6", "")]
+
+    monkeypatch.setattr(model_switch, "list_authenticated_providers",
+                        lambda **kw: list(base))
+    monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
+                        lambda *a, **kw: list(live))
+
+    result = model_switch.list_picker_providers(max_models=50)
+
+    assert len(result) == 1
+    openrouter = result[0]
+    assert openrouter["slug"] == "openrouter"
+    assert openrouter["models"] == ["openai/gpt-5.4", "moonshotai/kimi-k2.6"]
+    assert openrouter["total_models"] == 2
+
+
+def test_openrouter_falls_back_to_base_models_on_fetch_failure(monkeypatch):
+    """If the live catalog fetch raises, keep whatever base provided."""
+    fallback_models = ["openai/gpt-5.4", "moonshotai/kimi-k2.6"]
+    base = [_make_provider("openrouter", models=fallback_models)]
+
+    def _raise(*_a, **_kw):
+        raise RuntimeError("network down")
+
+    monkeypatch.setattr(model_switch, "list_authenticated_providers",
+                        lambda **kw: list(base))
+    monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models", _raise)
+
+    result = model_switch.list_picker_providers(max_models=50)
+
+    assert len(result) == 1
+    assert result[0]["models"] == fallback_models
+
+
+def test_openrouter_empty_live_catalog_drops_row(monkeypatch):
+    """If the live catalog returns nothing for OpenRouter, drop the row."""
+    base = [_make_provider("openrouter", models=["something/stale"])]
+
+    monkeypatch.setattr(model_switch, "list_authenticated_providers",
+                        lambda **kw: list(base))
+    monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
+                        lambda *a, **kw: [])
+
+    result = model_switch.list_picker_providers(max_models=50)
+
+    assert result == []
+
+
+def test_non_openrouter_rows_passed_through_unchanged(monkeypatch):
+    """Non-OpenRouter providers keep their curated ``models`` as-is."""
+    base = [
+        _make_provider("anthropic", models=["claude-sonnet-4-6", "claude-opus-4-7"]),
+        _make_provider("gemini", models=["gemini-3-flash-preview"]),
+    ]
+
+    monkeypatch.setattr(model_switch, "list_authenticated_providers",
+                        lambda **kw: list(base))
+    # fetch_openrouter_models must not be consulted when there's no openrouter row
+    monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
+                        lambda *a, **kw: pytest.fail("should not be called"))
+
+    result = model_switch.list_picker_providers(max_models=50)
+
+    assert [p["slug"] for p in result] == ["anthropic", "gemini"]
+    assert result[0]["models"] == ["claude-sonnet-4-6", "claude-opus-4-7"]
+    assert result[1]["models"] == ["gemini-3-flash-preview"]
+
+
+def test_empty_models_row_dropped(monkeypatch):
+    """Built-in provider with an empty ``models`` list is dropped."""
+    base = [
+        _make_provider("anthropic", models=[]),  # drop
+        _make_provider("openrouter", models=["anything"]),  # replaced by live
+    ]
+
+    monkeypatch.setattr(model_switch, "list_authenticated_providers",
+                        lambda **kw: list(base))
+    monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
+                        lambda *a, **kw: [("openai/gpt-5.4", "recommended")])
+
+    result = model_switch.list_picker_providers(max_models=50)
+
+    assert [p["slug"] for p in result] == ["openrouter"]
+
+
+def test_custom_endpoint_with_api_url_kept_when_models_empty(monkeypatch):
+    """User-defined endpoints with an ``api_url`` survive even if models empty.
+
+    Rationale: custom endpoints may accept any model id the user types --
+    the picker still shows the row so the user can enter one manually.
+    """
+    base = [
+        _make_provider("local-ollama", is_user_defined=True,
+                       api_url="http://localhost:11434/v1", models=[],
+                       source="user-config"),
+    ]
+
+    monkeypatch.setattr(model_switch, "list_authenticated_providers",
+                        lambda **kw: list(base))
+    monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
+                        lambda *a, **kw: [])
+
+    result = model_switch.list_picker_providers(max_models=50)
+
+    assert len(result) == 1
+    assert result[0]["slug"] == "local-ollama"
+    assert result[0]["models"] == []
+
+
+def test_user_defined_without_api_url_and_empty_models_dropped(monkeypatch):
+    """An is_user_defined row WITHOUT api_url and no models is still dropped.
+
+    The exemption is specifically for custom endpoints that can accept
+    arbitrary model ids; without an api_url there's nothing to point at.
+    """
+    base = [
+        _make_provider("orphan", is_user_defined=True, api_url=None, models=[]),
+    ]
+
+    monkeypatch.setattr(model_switch, "list_authenticated_providers",
+                        lambda **kw: list(base))
+    monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
+                        lambda *a, **kw: [])
+
+    result = model_switch.list_picker_providers(max_models=50)
+
+    assert result == []
+
+
+def test_max_models_caps_openrouter_live_output(monkeypatch):
+    """``max_models`` caps how many OpenRouter IDs land in the row."""
+    live = [(f"vendor/model-{i}", "") for i in range(20)]
+    base = [_make_provider("openrouter", models=["placeholder"])]
+
+    monkeypatch.setattr(model_switch, "list_authenticated_providers",
+                        lambda **kw: list(base))
+    monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
+                        lambda *a, **kw: list(live))
+
+    result = model_switch.list_picker_providers(max_models=5)
+
+    assert len(result) == 1
+    assert len(result[0]["models"]) == 5
+    assert result[0]["models"] == [mid for mid, _ in live[:5]]
+    # total_models reflects the full live catalog, not the capped slice.
+    assert result[0]["total_models"] == 20
+
+
+def test_passthrough_kwargs_to_base(monkeypatch):
+    """All kwargs must be forwarded to ``list_authenticated_providers`` unchanged.
+
+    The gateway /model picker passes ``current_base_url`` and ``current_model``
+    so custom endpoint grouping can mark the current row. Dropping those kwargs
+    regressed Telegram/Discord into the text-list fallback.
+    """
+    captured = {}
+
+    def _capture(**kwargs):
+        captured.update(kwargs)
+        return []
+
+    monkeypatch.setattr(model_switch, "list_authenticated_providers", _capture)
+    monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
+                        lambda *a, **kw: [])
+
+    model_switch.list_picker_providers(
+        current_provider="openrouter",
+        current_base_url="http://x",
+        current_model="openai/gpt-5.4",
+        user_providers={"foo": {"api": "http://x"}},
+        custom_providers=[{"name": "bar", "base_url": "http://y"}],
+        max_models=12,
+    )
+
+    assert captured["current_provider"] == "openrouter"
+    assert captured["current_base_url"] == "http://x"
+    assert captured["current_model"] == "openai/gpt-5.4"
+    assert captured["user_providers"] == {"foo": {"api": "http://x"}}
+    assert captured["custom_providers"] == [{"name": "bar", "base_url": "http://y"}]
+    assert captured["max_models"] == 12
+
+
+def test_current_custom_endpoint_passthrough_marks_current_row(monkeypatch):
+    """Interactive picker should preserve current custom endpoint semantics."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr("agent.models_dev.PROVIDER_TO_MODELS_DEV", {})
+    monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
+    monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
+                        lambda *a, **kw: [])
+
+    result = model_switch.list_picker_providers(
+        current_provider="custom:ollama",
+        current_base_url="http://localhost:11434/v1",
+        current_model="glm-5.1",
+        user_providers={},
+        custom_providers=[
+            {
+                "name": "Ollama — GLM 5.1",
+                "base_url": "http://localhost:11434/v1",
+                "api_key": "ollama",
+                "model": "glm-5.1",
+            },
+            {
+                "name": "Ollama — Qwen3",
+                "base_url": "http://localhost:11434/v1",
+                "api_key": "ollama",
+                "model": "qwen3",
+            },
+        ],
+        max_models=50,
+    )
+
+    custom_rows = [p for p in result if p.get("is_user_defined")]
+    assert len(custom_rows) == 1
+    row = custom_rows[0]
+    assert row["slug"] == "custom:ollama"
+    assert row["is_current"] is True
+    assert row["models"] == ["glm-5.1", "qwen3"]
--- a/tests/hermes_cli/test_mcp_add_command_dest.py
+++ b/tests/hermes_cli/test_mcp_add_command_dest.py
@ -0,0 +1,87 @@
+"""Regression test: ``hermes mcp add --command`` must not clobber the
+top-level ``args.command`` subparser dest.
+
+The top-level argparse parser uses ``dest="command"`` for its subparsers
+(``hermes_cli/_parser.py``).  The dispatcher in ``hermes_cli/main.py``
+reads ``args.command`` to decide which command to run; if it is ``None``
+it falls through to interactive chat.
+
+The ``mcp add`` subparser exposes a ``--command`` flag (the stdio command
+for an MCP server, e.g. ``npx``).  Without an explicit ``dest=``, argparse
+derives the dest from the flag name and writes ``args.command = None``
+when the flag is omitted, overwriting the top-level ``"mcp"`` value.  As a
+result, ``hermes mcp add foo --url ...`` silently launches chat instead
+of registering an MCP server.
+
+The fix: declare the flag with ``dest="mcp_command"``.  The CLI flag name
+is unchanged; only the in-memory attribute moves.
+
+We replicate the relevant parser shape here rather than importing the
+real builder, mirroring ``test_argparse_flag_propagation.py`` and
+``test_subparser_routing_fallback.py``.
+"""
+
+import argparse
+
+
+def _build_parser():
+    """Minimal replica of the slice of the hermes parser that exhibits
+    the bug: top-level subparsers (dest="command") and ``mcp add`` with
+    its ``--command`` flag.
+    """
+    parser = argparse.ArgumentParser(prog="hermes")
+    subparsers = parser.add_subparsers(dest="command")
+
+    subparsers.add_parser("chat")
+
+    mcp_p = subparsers.add_parser("mcp")
+    mcp_sub = mcp_p.add_subparsers(dest="mcp_action")
+
+    mcp_add = mcp_sub.add_parser("add")
+    mcp_add.add_argument("name")
+    mcp_add.add_argument("--url")
+    mcp_add.add_argument("--command", dest="mcp_command")
+
+    return parser
+
+
+class TestMcpAddCommandDest:
+    def test_url_invocation_preserves_top_level_command(self):
+        """`hermes mcp add foo --url ...` must keep args.command == "mcp".
+
+        Before the dest fix this was clobbered to None, sending the
+        dispatcher into the chat fallback.
+        """
+        parser = _build_parser()
+        args = parser.parse_args(
+            ["mcp", "add", "foo", "--url", "https://example.com/mcp"]
+        )
+
+        assert args.command == "mcp"
+        assert args.mcp_action == "add"
+        assert args.name == "foo"
+        assert args.url == "https://example.com/mcp"
+        assert args.mcp_command is None
+
+    def test_command_flag_writes_to_mcp_command_dest(self):
+        """`--command npx` must populate args.mcp_command, not args.command."""
+        parser = _build_parser()
+        args = parser.parse_args(
+            ["mcp", "add", "github", "--command", "npx"]
+        )
+
+        assert args.command == "mcp"
+        assert args.mcp_command == "npx"
+
+    def test_bare_mcp_add_does_not_clobber_command(self):
+        """Even without --url or --command, args.command stays "mcp".
+
+        Catches the regression at the parser layer regardless of which
+        transport flag the user passes.
+        """
+        parser = _build_parser()
+        args = parser.parse_args(["mcp", "add", "foo"])
+
+        assert args.command == "mcp"
+        assert args.mcp_command is None
+        assert args.url is None
--- a/tests/hermes_cli/test_mcp_config.py
+++ b/tests/hermes_cli/test_mcp_config.py
@ -43,7 +43,7 @@ def _make_args(**kwargs):
    defaults = {
        "name": "test-server",
        "url": None,
-        "command": None,
+        "mcp_command": None,
        "args": None,
        "auth": None,
        "preset": None,
@ -233,7 +233,7 @@ class TestMcpAdd:

        cmd_mcp_add(_make_args(
            name="github",
-            command="npx",
+            mcp_command="npx",
            args=["@mcp/github"],
        ))
        out = capsys.readouterr().out
@ -291,7 +291,7 @@ class TestMcpAdd:

        cmd_mcp_add(_make_args(
            name="github",
-            command="npx",
+            mcp_command="npx",
            args=["@mcp/github"],
            env=["MY_API_KEY=secret123", "DEBUG=true"],
        ))
@ -313,7 +313,7 @@ class TestMcpAdd:

        cmd_mcp_add(_make_args(
            name="github",
-            command="npx",
+            mcp_command="npx",
            args=["@mcp/github"],
            env=["BAD-NAME=value"],
        ))
@ -390,7 +390,7 @@ class TestMcpAdd:
        cmd_mcp_add(_make_args(
            name="custom",
            preset="testmcp",
-            command="uvx",
+            mcp_command="uvx",
            args=["custom-server"],
        ))
        out = capsys.readouterr().out
--- a/tests/hermes_cli/test_model_catalog.py
+++ b/tests/hermes_cli/test_model_catalog.py
@ -3,6 +3,7 @@
 from __future__ import annotations

 import json
+import os
 import time
 from pathlib import Path
 from unittest.mock import patch
@ -282,3 +283,48 @@ class TestIntegrationWithModelsModule:
            result = get_curated_nous_model_ids()

        assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
+
+    def test_picker_nous_row_uses_manifest(self, tmp_path, monkeypatch):
+        """The /model picker must surface the manifest's nous list, not the
+        in-repo _PROVIDER_MODELS["nous"] snapshot. Regression: before this
+        fix, list_authenticated_providers() built the curated dict from
+        _PROVIDER_MODELS only — so newly-added Portal models never reached
+        the slash-command picker until the next Hermes release.
+        """
+        # We deliberately do NOT use the ``isolated_home`` fixture here:
+        # that fixture monkeypatches ``Path.home`` to ``tmp_path``, which
+        # trips the auth-store seat-belt in ``_auth_file_path()`` because
+        # ``HERMES_HOME / auth.json`` then resolves to the same path the
+        # seat-belt thinks is the "real" user store. Use the autouse
+        # ``_hermetic_environment`` HERMES_HOME directly instead.
+        import importlib
+        from hermes_cli import model_catalog
+        importlib.reload(model_catalog)
+        try:
+            from hermes_cli.model_switch import list_picker_providers
+
+            active_home = Path(os.environ["HERMES_HOME"])
+            (active_home / "auth.json").write_text(
+                json.dumps(
+                    {
+                        "providers": {"nous": {"access_token": "fake"}},
+                        "credential_pool": {},
+                    }
+                )
+            )
+
+            with patch.object(
+                model_catalog, "_fetch_manifest", return_value=_valid_manifest()
+            ):
+                picker = list_picker_providers(
+                    current_provider="nous", max_models=99
+                )
+        finally:
+            model_catalog.reset_cache()
+
+        nous_row = next((r for r in picker if r["slug"] == "nous"), None)
+        assert nous_row is not None, "nous row must appear when authed"
+        assert nous_row["models"] == [
+            "anthropic/claude-opus-4.7",
+            "moonshotai/kimi-k2.6",
+        ]
--- a/tests/hermes_cli/test_model_provider_persistence.py
+++ b/tests/hermes_cli/test_model_provider_persistence.py
@ -71,6 +71,32 @@ class TestSaveModelChoiceAlwaysDict:


 class TestProviderPersistsAfterModelSave:
+    def test_update_config_for_provider_uses_atomic_yaml_write(self, config_home):
+        """Provider switches should delegate config writes to atomic_yaml_write."""
+        from hermes_cli.auth import _update_config_for_provider
+
+        config_path = config_home / "config.yaml"
+        original_text = config_path.read_text(encoding="utf-8")
+
+        def _boom(path, data, **kwargs):
+            assert path == config_path
+            assert data["model"]["provider"] == "nous"
+            assert data["model"]["base_url"] == "https://inference.example.com/v1"
+            assert data["model"]["default"] == "some-old-model"
+            assert kwargs["sort_keys"] is False
+            raise OSError("simulated atomic write failure")
+
+        with patch("hermes_cli.auth.atomic_yaml_write", side_effect=_boom) as mock_write:
+            with pytest.raises(OSError, match="simulated atomic write failure"):
+                _update_config_for_provider(
+                    "nous",
+                    "https://inference.example.com/v1/",
+                    default_model="llama-3.3",
+                )
+
+        assert mock_write.call_count == 1
+        assert config_path.read_text(encoding="utf-8") == original_text
+
    def test_api_key_provider_saved_when_model_was_string(self, config_home, monkeypatch):
        """_model_flow_api_key_provider must persist the provider even when
        config.model started as a plain string."""
@ -260,32 +286,6 @@ class TestProviderPersistsAfterModelSave:
        assert model.get("default") == "minimax-m2.5"
        assert model.get("api_mode") == "anthropic_messages"

-    def test_lmstudio_provider_saved_when_selected(self, config_home, monkeypatch):
-        from hermes_cli.config import load_config
-        from hermes_cli.main import _model_flow_api_key_provider
-
-        monkeypatch.setenv("LM_API_KEY", "lm-token")
-        monkeypatch.setattr(
-            "hermes_cli.auth._prompt_model_selection",
-            lambda models, current_model="": "publisher/model-a",
-        )
-        monkeypatch.setattr("hermes_cli.auth.deactivate_provider", lambda: None)
-        monkeypatch.setattr(
-            "hermes_cli.models.fetch_lmstudio_models",
-            lambda api_key=None, base_url=None, timeout=5.0: ["publisher/model-a"],
-        )
-
-        with patch("builtins.input", side_effect=[""]):
-            _model_flow_api_key_provider(load_config(), "lmstudio", "old-model")
-
-        import yaml
-
-        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
-        model = config.get("model")
-        assert isinstance(model, dict)
-        assert model.get("provider") == "lmstudio"
-        assert model.get("base_url") == "http://127.0.0.1:1234/v1"
-        assert model.get("default") == "publisher/model-a"


 class TestBaseUrlValidation:
@ -360,32 +360,3 @@ class TestBaseUrlValidation:
        saved = get_env_value("GLM_BASE_URL") or ""
        assert saved == "", "Empty input should not save a base URL"

-    def test_stepfun_provider_saved_with_selected_region(self, config_home, monkeypatch):
-        from hermes_cli.main import _model_flow_stepfun
-        from hermes_cli.config import load_config, get_env_value
-
-        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-test-key")
-
-        with patch(
-            "hermes_cli.main._prompt_provider_choice",
-            return_value=1,
-        ), patch(
-            "hermes_cli.models.fetch_api_models",
-            return_value=["step-3.5-flash", "step-3-agent-lite"],
-        ), patch(
-            "hermes_cli.auth._prompt_model_selection",
-            return_value="step-3-agent-lite",
-        ), patch(
-            "hermes_cli.auth.deactivate_provider",
-        ):
-            _model_flow_stepfun(load_config(), "old-model")
-
-        import yaml
-
-        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
-        model = config.get("model")
-        assert isinstance(model, dict)
-        assert model.get("provider") == "stepfun"
-        assert model.get("default") == "step-3-agent-lite"
-        assert model.get("base_url") == "https://api.stepfun.com/step_plan/v1"
-        assert get_env_value("STEPFUN_BASE_URL") == "https://api.stepfun.com/step_plan/v1"
--- a/tests/hermes_cli/test_model_switch_custom_providers.py
+++ b/tests/hermes_cli/test_model_switch_custom_providers.py
@ -506,3 +506,64 @@ def test_lmstudio_picker_skips_probe_when_not_configured(monkeypatch):
    )

    assert "base_url" not in captured
+
+
+def test_custom_providers_uses_live_models_for_multi_model_endpoint(monkeypatch):
+    """Custom providers with api_key + base_url should prefer live /models.
+
+    Custom providers (section 4 of list_authenticated_providers) point at
+    gateways like Bifrost that expose hundreds of models.  Reading only the
+    static ``models:`` dict from config.yaml leaves the /model picker with
+    a stale subset.  Live discovery fills the picker with all available
+    models from the endpoint.
+    """
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
+
+    calls = []
+
+    def fake_fetch_api_models(api_key, base_url):
+        calls.append((api_key, base_url))
+        return ["gateway-model-a", "gateway-model-b", "gateway-model-c"]
+
+    monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models)
+
+    custom_providers = [
+        {
+            "name": "my-gateway",
+            "api_key": "sk-gateway-key",
+            "base_url": "https://gateway.example.com/v1",
+            "model": "gateway-model-a",
+            "models": {
+                "gateway-model-a": {"context_length": 128000},
+                "gateway-model-b": {"context_length": 128000},
+            },
+        }
+    ]
+
+    providers = list_authenticated_providers(
+        current_provider="openrouter",
+        current_base_url="https://openrouter.ai/api/v1",
+        custom_providers=custom_providers,
+        max_models=50,
+    )
+
+    gateway_prov = next(
+        (
+            p
+            for p in providers
+            if p.get("api_url") == "https://gateway.example.com/v1"
+        ),
+        None,
+    )
+
+    assert gateway_prov is not None, "Custom provider group not found in results"
+    assert calls == [("sk-gateway-key", "https://gateway.example.com/v1")], (
+        "fetch_api_models must be called with the custom provider's credentials"
+    )
+    assert gateway_prov["models"] == [
+        "gateway-model-a",
+        "gateway-model-b",
+        "gateway-model-c",
+    ], "Live models must replace the static subset"
+    assert gateway_prov["total_models"] == 3
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@ -770,15 +770,6 @@ class TestValidateCodexAutoCorrection:
        assert result.get("corrected_model") is None
        assert result["message"] is None

-    def test_very_different_name_falls_to_suggestions(self):
-        """Names too different for auto-correction are rejected with a suggestion list."""
-        codex_models = ["gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex"]
-        with patch("hermes_cli.models.provider_model_ids", return_value=codex_models):
-            result = validate_requested_model("totally-wrong", "openai-codex")
-        assert result["accepted"] is False
-        assert result["recognized"] is False
-        assert result.get("corrected_model") is None
-        assert "not found" in result["message"]


 # -- probe_api_models — Cloudflare UA mitigation --------------------------------
--- a/tests/hermes_cli/test_ollama_cloud_provider.py
+++ b/tests/hermes_cli/test_ollama_cloud_provider.py
@ -401,6 +401,103 @@ class TestOllamaCloudProvidersNew:
        assert pdef.transport == "openai_chat"


+# ── Cloud Suffix Stripping ──
+
+class TestOllamaCloudSuffixStripping:
+    """models.dev appends :cloud / -cloud suffixes that the live API omits.
+
+    fetch_ollama_cloud_models() must normalise these before the dedup merge so
+    users never see broken IDs like 'kimi-k2.6:cloud' in the model picker.
+    """
+
+    def test_strips_colon_cloud_suffix(self, tmp_path, monkeypatch):
+        """:cloud suffix from models.dev is stripped before merge."""
+        from hermes_cli.models import fetch_ollama_cloud_models
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
+
+        mock_mdev = {
+            "ollama-cloud": {
+                "models": {"kimi-k2.6:cloud": {"tool_call": True}}
+            }
+        }
+        with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
+            result = fetch_ollama_cloud_models(force_refresh=True)
+
+        assert "kimi-k2.6" in result
+        assert "kimi-k2.6:cloud" not in result
+
+    def test_strips_dash_cloud_suffix(self, tmp_path, monkeypatch):
+        """-cloud suffix from models.dev is stripped before merge."""
+        from hermes_cli.models import fetch_ollama_cloud_models
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
+
+        mock_mdev = {
+            "ollama-cloud": {
+                "models": {"qwen3-coder:480b-cloud": {"tool_call": True}}
+            }
+        }
+        with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
+            result = fetch_ollama_cloud_models(force_refresh=True)
+
+        assert "qwen3-coder:480b" in result
+        assert "qwen3-coder:480b-cloud" not in result
+
+    def test_no_duplicate_when_live_clean_and_mdev_suffixed(self, tmp_path, monkeypatch):
+        """Live API returns clean ID; mdev has :cloud variant — result has exactly one entry."""
+        from hermes_cli.models import fetch_ollama_cloud_models
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
+
+        mock_mdev = {
+            "ollama-cloud": {
+                "models": {
+                    "kimi-k2.6:cloud": {"tool_call": True},
+                    "glm-5.1:cloud": {"tool_call": True},
+                }
+            }
+        }
+        with patch("hermes_cli.models.fetch_api_models", return_value=["kimi-k2.6", "glm-5.1"]), \
+             patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
+            result = fetch_ollama_cloud_models(force_refresh=True)
+
+        assert result.count("kimi-k2.6") == 1
+        assert result.count("glm-5.1") == 1
+        assert "kimi-k2.6:cloud" not in result
+        assert "glm-5.1:cloud" not in result
+
+    def test_unsuffixed_model_id_unchanged(self, tmp_path, monkeypatch):
+        """Model IDs without :cloud / -cloud suffix are passed through unchanged."""
+        from hermes_cli.models import fetch_ollama_cloud_models
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
+
+        mock_mdev = {
+            "ollama-cloud": {
+                "models": {"nemotron-3-nano:30b": {"tool_call": True}}
+            }
+        }
+        with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
+            result = fetch_ollama_cloud_models(force_refresh=True)
+
+        assert "nemotron-3-nano:30b" in result
+
+    def test_strip_suffix_helper(self):
+        """Unit test for the _strip_ollama_cloud_suffix helper."""
+        from hermes_cli.models import _strip_ollama_cloud_suffix
+
+        assert _strip_ollama_cloud_suffix("kimi-k2.6:cloud") == "kimi-k2.6"
+        assert _strip_ollama_cloud_suffix("glm-5.1:cloud") == "glm-5.1"
+        assert _strip_ollama_cloud_suffix("qwen3-coder:480b-cloud") == "qwen3-coder:480b"
+        assert _strip_ollama_cloud_suffix("nemotron-3-nano:30b") == "nemotron-3-nano:30b"
+        assert _strip_ollama_cloud_suffix("") == ""
+
+
 # ── Auxiliary Model ──

 class TestOllamaCloudAuxiliary:
--- a/tests/hermes_cli/test_openai_codex_model_validation_fallback.py
+++ b/tests/hermes_cli/test_openai_codex_model_validation_fallback.py
@ -0,0 +1,64 @@
+"""Regression tests for OpenAI Codex model validation when the listing lags behind
+actually usable backend model IDs.
+
+The bug originally reported in #16172: `/model` and `switch_model()` rejected
+`gpt-5.3-codex-spark` because the curated listing omitted it, even though direct
+runtime calls succeeded. PR #19729 fixed this by soft-accepting unknown-but-
+plausible Codex slugs with a warning, and this test pins the soft-accept
+behavior so it doesn't regress.
+
+Note: gpt-5.3-codex-spark itself is now in the curated catalog (PR #22991),
+so the real-world Spark request takes the `recognized=True` fast path. This
+test still uses Spark as the example slug but explicitly mocks
+``provider_model_ids`` to omit it, exercising the soft-accept path generically
+for any future entitlement-gated Codex slug that ships before Hermes catalogs
+it.
+"""
+
+from unittest.mock import patch
+
+from hermes_cli.model_switch import switch_model
+from hermes_cli.models import validate_requested_model
+
+
+def test_openai_codex_unknown_but_plausible_model_is_accepted_with_warning():
+    """If the Codex listing is incomplete, `/model` should soft-accept the model
+    with a warning instead of hard-rejecting it.
+    """
+    with patch(
+        "hermes_cli.models.provider_model_ids",
+        return_value=["gpt-5.5", "gpt-5.4", "gpt-5.3-codex"],
+    ):
+        result = validate_requested_model("gpt-5.3-codex-spark", "openai-codex")
+
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is False
+    assert "gpt-5.3-codex-spark" in result["message"]
+    assert "OpenAI Codex model listing" in result["message"]
+    assert "Similar models" in result["message"]
+    assert "gpt-5.3-codex" in result["message"]
+
+
+def test_switch_model_allows_openai_codex_model_missing_from_listing():
+    """switch_model() should succeed for Codex models that the runtime accepts
+    even when the listing has not caught up yet.
+    """
+    with patch(
+        "hermes_cli.models.provider_model_ids",
+        return_value=["gpt-5.5", "gpt-5.4", "gpt-5.3-codex"],
+    ):
+        result = switch_model(
+            "gpt-5.3-codex-spark",
+            current_provider="openai-codex",
+            current_model="gpt-5.4",
+            current_base_url="",
+            current_api_key="",
+            user_providers=None,
+        )
+
+    assert result.success is True
+    assert result.new_model == "gpt-5.3-codex-spark"
+    assert result.target_provider == "openai-codex"
+    assert result.warning_message
+    assert "OpenAI Codex model listing" in result.warning_message
--- a/tests/hermes_cli/test_opencode_go_flat_namespace.py
+++ b/tests/hermes_cli/test_opencode_go_flat_namespace.py
@ -0,0 +1,159 @@
+"""Tests for opencode-go / opencode-zen flat-namespace model handling.
+
+OpenCode Go is NOT a vendor/model aggregator like OpenRouter — its
+``/v1/models`` endpoint returns bare IDs (``minimax-m2.7``, ``deepseek-v4-flash``)
+and the inference API rejects vendor-prefixed names with HTTP 401
+"Model not supported".
+
+Two bugs this exercises:
+
+1. ``switch_model('deepseek-v4-flash', current_provider='opencode-go')`` used
+   to silently switch the user off opencode-go to native ``deepseek`` because
+   ``detect_provider_for_model`` matched the bare name against the static
+   deepseek catalog.  Fix: once step d matches the model in the current
+   aggregator's live catalog, skip ``detect_provider_for_model``.
+
+2. ``normalize_model_for_provider('minimax/minimax-m2.7', 'opencode-go')``
+   used to pass the ``minimax/`` prefix through unchanged.  When user configs
+   contained prefixed fallback entries (commonly copied from aggregator slugs),
+   the fallback activation path sent ``minimax/minimax-m2.7`` to opencode-go
+   which returned HTTP 401.  Fix: opencode-go/opencode-zen strip ANY leading
+   ``vendor/`` prefix because their APIs are flat-namespace.
+"""
+
+from unittest.mock import patch
+
+from hermes_cli.model_normalize import normalize_model_for_provider
+from hermes_cli.model_switch import switch_model
+
+
+# Live catalog opencode-go currently returns from /v1/models (snapshot).
+_OPENCODE_GO_LIVE = [
+    "minimax-m2.7", "minimax-m2.5",
+    "kimi-k2.6", "kimi-k2.5",
+    "glm-5.1", "glm-5",
+    "deepseek-v4-pro", "deepseek-v4-flash",
+    "qwen3.6-plus", "qwen3.5-plus",
+    "mimo-v2-pro", "mimo-v2-omni", "mimo-v2.5-pro", "mimo-v2.5",
+]
+
+
+# ---------------------------------------------------------------------------
+# normalize_model_for_provider: strip vendor prefix for flat-namespace providers
+# ---------------------------------------------------------------------------
+
+
+def test_opencode_go_strips_deepseek_prefix():
+    assert normalize_model_for_provider(
+        "deepseek/deepseek-v4-flash", "opencode-go"
+    ) == "deepseek-v4-flash"
+
+
+def test_opencode_go_strips_minimax_prefix():
+    assert normalize_model_for_provider(
+        "minimax/minimax-m2.7", "opencode-go"
+    ) == "minimax-m2.7"
+
+
+def test_opencode_go_strips_moonshotai_prefix():
+    # Moonshot's aggregator vendor is `moonshotai/...` — a common copy-paste
+    # from OpenRouter slugs.  opencode-go serves it bare as `kimi-k2.6`.
+    assert normalize_model_for_provider(
+        "moonshotai/kimi-k2.6", "opencode-go"
+    ) == "kimi-k2.6"
+
+
+def test_opencode_go_bare_name_unchanged():
+    assert normalize_model_for_provider(
+        "kimi-k2.6", "opencode-go"
+    ) == "kimi-k2.6"
+
+
+def test_opencode_go_preserves_dot_versioning():
+    # opencode-go uses dot-versioned IDs (`mimo-v2.5-pro`, not hyphen).
+    assert normalize_model_for_provider(
+        "xiaomi/mimo-v2.5-pro", "opencode-go"
+    ) == "mimo-v2.5-pro"
+
+
+def test_opencode_zen_still_hyphenates_claude():
+    # Regression: opencode-zen's Claude hyphen conversion must still work.
+    assert normalize_model_for_provider(
+        "anthropic/claude-sonnet-4.6", "opencode-zen"
+    ) == "claude-sonnet-4-6"
+
+
+def test_opencode_zen_bare_claude_hyphenated():
+    assert normalize_model_for_provider(
+        "claude-sonnet-4.6", "opencode-zen"
+    ) == "claude-sonnet-4-6"
+
+
+def test_opencode_zen_strips_arbitrary_vendor_prefix():
+    assert normalize_model_for_provider(
+        "minimax/minimax-m2.5-free", "opencode-zen"
+    ) == "minimax-m2.5-free"
+
+
+def test_openrouter_still_prepends_vendor():
+    # Regression: real aggregators must still get vendor/model format.
+    assert normalize_model_for_provider(
+        "claude-sonnet-4.6", "openrouter"
+    ) == "anthropic/claude-sonnet-4.6"
+
+
+# ---------------------------------------------------------------------------
+# switch_model: live-catalog match on opencode-go must not trigger
+# cross-provider auto-switch via detect_provider_for_model
+# ---------------------------------------------------------------------------
+
+
+def _run_switch(raw_input: str, **extra):
+    """Call switch_model with opencode-go as current provider, mocking the
+    live catalog so the test doesn't hit the network."""
+    defaults = dict(
+        current_provider="opencode-go",
+        current_model="kimi-k2.6",
+        current_base_url="https://opencode.ai/zen/go/v1",
+        current_api_key="sk-test-opencode-go",
+        is_global=False,
+    )
+    defaults.update(extra)
+
+    def fake_list_provider_models(provider: str):
+        if provider == "opencode-go":
+            return list(_OPENCODE_GO_LIVE)
+        # For other providers, return empty so tests don't depend on them.
+        return []
+
+    with patch(
+        "hermes_cli.model_switch.list_provider_models",
+        side_effect=fake_list_provider_models,
+    ):
+        return switch_model(raw_input=raw_input, **defaults)
+
+
+def test_deepseek_v4_flash_stays_on_opencode_go():
+    """Regression: ``/model deepseek-v4-flash`` while on opencode-go must
+    NOT switch to native deepseek just because deepseek's static catalog
+    also contains that name."""
+    result = _run_switch("deepseek-v4-flash")
+    assert result.target_provider == "opencode-go", (
+        f"Expected to stay on opencode-go, got {result.target_provider}. "
+        f"detect_provider_for_model hijacked the bare name."
+    )
+    assert result.new_model == "deepseek-v4-flash"
+
+
+def test_deepseek_v4_pro_stays_on_opencode_go():
+    """Same bug class as the flash variant."""
+    result = _run_switch("deepseek-v4-pro")
+    assert result.target_provider == "opencode-go"
+    assert result.new_model == "deepseek-v4-pro"
+
+
+def test_kimi_k2_6_stays_on_opencode_go():
+    """Regression guard: this path was always working, keep it working."""
+    result = _run_switch("kimi-k2.6", current_model="deepseek-v4-pro")
+    assert result.target_provider == "opencode-go"
+    assert result.new_model == "kimi-k2.6"
--- a/tests/hermes_cli/test_pin_kanban_board_env.py
+++ b/tests/hermes_cli/test_pin_kanban_board_env.py
@ -0,0 +1,75 @@
+"""Tests for `_pin_kanban_board_env` helper invoked by `cmd_chat`.
+
+Regression coverage for #20074: a chat session must export the active kanban
+board into `HERMES_KANBAN_BOARD` at boot so subprocess shell-outs (e.g.
+`hermes kanban …`) inherit the same board the in-process kanban tools resolve.
+Without this, a concurrent `hermes kanban boards switch` from another session
+can flip the global current-board file mid-turn and silently divert the
+shell calls to a different DB.
+"""
+import importlib
+import os
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _isolate_kanban_board_env():
+    """Snapshot `HERMES_KANBAN_BOARD` and restore it after the test.
+
+    `_pin_kanban_board_env()` writes to ``os.environ`` directly, bypassing
+    any ``monkeypatch.setenv`` tracking. Without this fixture the mutation
+    leaks into subsequent tests and breaks anything that resolves a kanban
+    path from the env (e.g. ``TestSharedBoardPaths`` in test_kanban_db.py).
+    """
+    prev = os.environ.get("HERMES_KANBAN_BOARD")
+    os.environ.pop("HERMES_KANBAN_BOARD", None)
+    try:
+        yield
+    finally:
+        if prev is None:
+            os.environ.pop("HERMES_KANBAN_BOARD", None)
+        else:
+            os.environ["HERMES_KANBAN_BOARD"] = prev
+
+
+def test_pin_writes_resolved_board_when_env_unset(monkeypatch):
+    main_mod = importlib.import_module("hermes_cli.main")
+
+    import hermes_cli.kanban_db as kdb
+    monkeypatch.setattr(kdb, "get_current_board", lambda: "space")
+
+    main_mod._pin_kanban_board_env()
+
+    assert main_mod.os.environ.get("HERMES_KANBAN_BOARD") == "space"
+
+
+def test_pin_does_not_overwrite_existing_env(monkeypatch):
+    monkeypatch.setenv("HERMES_KANBAN_BOARD", "preset")
+    main_mod = importlib.import_module("hermes_cli.main")
+
+    import hermes_cli.kanban_db as kdb
+
+    def _explode():
+        raise AssertionError("get_current_board must not be called when env is set")
+
+    monkeypatch.setattr(kdb, "get_current_board", _explode)
+
+    main_mod._pin_kanban_board_env()
+
+    assert main_mod.os.environ.get("HERMES_KANBAN_BOARD") == "preset"
+
+
+def test_pin_swallows_resolution_failures(monkeypatch):
+    main_mod = importlib.import_module("hermes_cli.main")
+
+    import hermes_cli.kanban_db as kdb
+
+    def _boom():
+        raise RuntimeError("disk gone")
+
+    monkeypatch.setattr(kdb, "get_current_board", _boom)
+
+    main_mod._pin_kanban_board_env()
+
+    assert "HERMES_KANBAN_BOARD" not in main_mod.os.environ
--- a/tests/hermes_cli/test_plugins.py
+++ b/tests/hermes_cli/test_plugins.py
@ -21,6 +21,7 @@ from hermes_cli.plugins import (
    get_plugin_command_handler,
    get_plugin_commands,
    get_pre_tool_call_block_message,
+    resolve_plugin_command_result,
    discover_plugins,
    invoke_hook,
 )
@ -329,6 +330,7 @@ class TestPluginHooks:
        assert "post_api_request" in VALID_HOOKS
        assert "transform_terminal_output" in VALID_HOOKS
        assert "transform_tool_result" in VALID_HOOKS
+        assert "transform_llm_output" in VALID_HOOKS

    def test_valid_hooks_include_pre_gateway_dispatch(self):
        assert "pre_gateway_dispatch" in VALID_HOOKS
@ -1061,6 +1063,45 @@ class TestPluginCommands:
        assert mgr._plugin_commands["cmd-b"]["plugin"] == "plugin-b"


+class TestPluginCommandResultResolution:
+    def test_returns_sync_values_unchanged(self):
+        assert resolve_plugin_command_result("ok") == "ok"
+
+    def test_awaits_async_result_without_running_loop(self):
+        async def _handler():
+            return "async-ok"
+
+        assert resolve_plugin_command_result(_handler()) == "async-ok"
+
+    def test_awaits_async_result_with_running_loop(self, monkeypatch):
+        class _Loop:
+            pass
+
+        async def _handler():
+            return "threaded-ok"
+
+        monkeypatch.setattr("hermes_cli.plugins.asyncio.get_running_loop", lambda: _Loop())
+        assert resolve_plugin_command_result(_handler()) == "threaded-ok"
+
+    def test_running_loop_timeout_does_not_hang_forever(self, monkeypatch):
+        """Threaded path must abort a hung async handler instead of blocking the caller."""
+        import asyncio as _asyncio
+
+        class _Loop:
+            pass
+
+        async def _slow_handler():
+            await _asyncio.sleep(10)
+            return "should-not-reach"
+
+        monkeypatch.setattr("hermes_cli.plugins.asyncio.get_running_loop", lambda: _Loop())
+        monkeypatch.setattr("hermes_cli.plugins._PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS", 0.1)
+
+        import pytest
+        with pytest.raises(TimeoutError):
+            resolve_plugin_command_result(_slow_handler())
+
+
 # ── TestPluginDispatchTool ────────────────────────────────────────────────


@ -1191,3 +1232,77 @@ class TestPluginDispatchTool:
            result = ctx.dispatch_tool("fake", {})

        assert '"error"' in result
+
+
+class TestPluginDebugLogging:
+    """HERMES_PLUGINS_DEBUG opt-in stderr handler for plugin developers."""
+
+    def test_debug_handler_not_installed_when_env_var_absent(self, monkeypatch):
+        """Without the env var, no stderr handler is attached."""
+        monkeypatch.delenv("HERMES_PLUGINS_DEBUG", raising=False)
+        from hermes_cli import plugins as plugins_mod
+
+        # Snapshot, then force a re-evaluation.
+        original_installed = plugins_mod._DEBUG_HANDLER_INSTALLED
+        original_debug = plugins_mod._PLUGINS_DEBUG
+        original_handlers = list(plugins_mod.logger.handlers)
+        try:
+            plugins_mod._DEBUG_HANDLER_INSTALLED = False
+            plugins_mod._install_plugin_debug_handler(force=True)
+            assert plugins_mod._PLUGINS_DEBUG is False
+            assert plugins_mod._DEBUG_HANDLER_INSTALLED is False
+            # No new stderr handler was attached.
+            assert plugins_mod.logger.handlers == original_handlers
+        finally:
+            plugins_mod._DEBUG_HANDLER_INSTALLED = original_installed
+            plugins_mod._PLUGINS_DEBUG = original_debug
+            plugins_mod.logger.handlers = original_handlers
+
+    def test_debug_handler_installed_when_env_var_set(self, monkeypatch):
+        """With HERMES_PLUGINS_DEBUG=1, a DEBUG-level stderr handler is attached."""
+        monkeypatch.setenv("HERMES_PLUGINS_DEBUG", "1")
+        from hermes_cli import plugins as plugins_mod
+
+        original_installed = plugins_mod._DEBUG_HANDLER_INSTALLED
+        original_debug = plugins_mod._PLUGINS_DEBUG
+        original_level = plugins_mod.logger.level
+        original_handlers = list(plugins_mod.logger.handlers)
+        try:
+            plugins_mod._DEBUG_HANDLER_INSTALLED = False
+            plugins_mod._install_plugin_debug_handler(force=True)
+            assert plugins_mod._PLUGINS_DEBUG is True
+            assert plugins_mod._DEBUG_HANDLER_INSTALLED is True
+            assert plugins_mod.logger.level == logging.DEBUG
+            new_handlers = [
+                h for h in plugins_mod.logger.handlers if h not in original_handlers
+            ]
+            assert len(new_handlers) == 1
+            assert isinstance(new_handlers[0], logging.StreamHandler)
+            assert new_handlers[0].level == logging.DEBUG
+        finally:
+            plugins_mod._DEBUG_HANDLER_INSTALLED = original_installed
+            plugins_mod._PLUGINS_DEBUG = original_debug
+            plugins_mod.logger.setLevel(original_level)
+            plugins_mod.logger.handlers = original_handlers
+
+    def test_debug_handler_idempotent(self, monkeypatch):
+        """Calling install twice (without force) does not double-attach."""
+        monkeypatch.setenv("HERMES_PLUGINS_DEBUG", "1")
+        from hermes_cli import plugins as plugins_mod
+
+        original_installed = plugins_mod._DEBUG_HANDLER_INSTALLED
+        original_debug = plugins_mod._PLUGINS_DEBUG
+        original_level = plugins_mod.logger.level
+        original_handlers = list(plugins_mod.logger.handlers)
+        try:
+            plugins_mod._DEBUG_HANDLER_INSTALLED = False
+            plugins_mod._install_plugin_debug_handler(force=True)
+            count_after_first = len(plugins_mod.logger.handlers)
+            plugins_mod._install_plugin_debug_handler()  # no force
+            count_after_second = len(plugins_mod.logger.handlers)
+            assert count_after_first == count_after_second
+        finally:
+            plugins_mod._DEBUG_HANDLER_INSTALLED = original_installed
+            plugins_mod._PLUGINS_DEBUG = original_debug
+            plugins_mod.logger.setLevel(original_level)
+            plugins_mod.logger.handlers = original_handlers
--- a/tests/hermes_cli/test_plugins_cmd.py
+++ b/tests/hermes_cli/test_plugins_cmd.py
@ -12,9 +12,11 @@ import pytest
 import yaml

 from hermes_cli.plugins_cmd import (
+    PluginOperationError,
    _copy_example_files,
    _read_manifest,
    _repo_name_from_url,
+    _resolve_git_executable,
    _resolve_git_url,
    _sanitize_plugin_name,
    plugins_command,
@ -99,6 +101,69 @@ class TestResolveGitUrl:
            _resolve_git_url("a/b/c")


+# ── _resolve_git_executable ─────────────────────────────────────────────────
+
+
+class TestResolveGitExecutable:
+    """Fallback resolution when bare ``git`` is not discoverable via ``PATH``."""
+
+    def teardown_method(self):
+        _resolve_git_executable.cache_clear()
+
+    def test_prefers_shutil_which(self):
+        import hermes_cli.plugins_cmd as pc
+
+        _resolve_git_executable.cache_clear()
+        with patch.object(pc.shutil, "which", return_value="/usr/local/bin/git"):
+            assert pc._resolve_git_executable() == "/usr/local/bin/git"
+
+    def test_fallback_posix_first_matching_path(self):
+        import hermes_cli.plugins_cmd as pc
+
+        _resolve_git_executable.cache_clear()
+
+        def _isfile(p: str) -> bool:
+            return p == "/usr/local/bin/git"
+
+        with patch.object(pc.shutil, "which", return_value=None):
+            with patch.object(pc.os, "name", "posix"):
+                with patch.object(pc.os.path, "isfile", side_effect=_isfile):
+                    assert pc._resolve_git_executable() == "/usr/local/bin/git"
+
+    def test_returns_none_when_unavailable(self):
+        import hermes_cli.plugins_cmd as pc
+
+        _resolve_git_executable.cache_clear()
+        with patch.object(pc.shutil, "which", return_value=None):
+            with patch.object(pc.os, "name", "posix"):
+                with patch.object(pc.os.path, "isfile", return_value=False):
+                    assert pc._resolve_git_executable() is None
+
+    def test_git_pull_uses_resolved_executable(self, tmp_path):
+        import hermes_cli.plugins_cmd as pc
+
+        _resolve_git_executable.cache_clear()
+        with patch.object(
+            pc,
+            "_resolve_git_executable",
+            return_value="/resolved/git",
+        ):
+            with patch.object(pc.subprocess, "run") as run:
+                run.return_value = MagicMock(returncode=0, stdout="Already up to date\n", stderr="")
+                ok, msg = pc._git_pull_plugin_dir(tmp_path)
+        assert ok is True
+        run.assert_called_once()
+        assert run.call_args[0][0][0] == "/resolved/git"
+
+    def test_install_core_raises_when_git_unresolved(self):
+        import hermes_cli.plugins_cmd as pc
+
+        _resolve_git_executable.cache_clear()
+        with patch.object(pc, "_resolve_git_executable", return_value=None):
+            with pytest.raises(PluginOperationError, match="git is not installed"):
+                pc._install_plugin_core("owner/repo", force=True)
+
+
 # ── _repo_name_from_url ──────────────────────────────────────────────────


@ -508,7 +573,7 @@ class TestPromptPluginEnvVars:


 class TestCursesRadiolist:
-    """Test the curses_radiolist function (non-TTY fallback path)."""
+    """Test the curses_radiolist function."""

    def test_non_tty_returns_default(self):
        from hermes_cli.curses_ui import curses_radiolist
@ -524,6 +589,14 @@ class TestCursesRadiolist:
            result = curses_radiolist("Pick", ["x", "y"], selected=0, cancel_returns=1)
            assert result == 1

+    def test_keyboard_interrupt_returns_cancel_value(self):
+        from hermes_cli.curses_ui import curses_radiolist
+
+        with patch("sys.stdin") as mock_stdin, patch("curses.wrapper", side_effect=KeyboardInterrupt):
+            mock_stdin.isatty.return_value = True
+            result = curses_radiolist("Pick", ["x", "y"], selected=0, cancel_returns=-1)
+            assert result == -1
+

 # ── Provider discovery helpers ───────────────────────────────────────────

--- a/tests/hermes_cli/test_post_setup_gating.py
+++ b/tests/hermes_cli/test_post_setup_gating.py
@ -0,0 +1,71 @@
+"""Tests for the post_setup install-state gate in `_toolset_needs_configuration_prompt`.
+
+Regression coverage for the cua-driver silent-no-op bug (issue #22737).
+
+When a no-key provider's only install side-effect is a `post_setup` hook
+(cua-driver, etc.), the gate function used to fall through to the
+`_toolset_has_keys` catch-all, which returned True for any provider with
+empty `env_vars` — causing `hermes tools` to write the toolset to config
+and exit `✓ Saved` without ever invoking the post_setup install. These
+tests pin the new predicate-aware behaviour so the regression doesn't
+sneak back in.
+"""
+
+from __future__ import annotations
+
+
+class TestPostSetupGate:
+    def test_cua_driver_missing_forces_setup(self, monkeypatch, tmp_path):
+        """When cua-driver isn't on PATH, the gate must return True so the
+        provider-setup flow runs and triggers `_run_post_setup`."""
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setattr(tools_config.shutil, "which", lambda name: None)
+
+        assert tools_config._toolset_needs_configuration_prompt(
+            "computer_use", {}
+        ) is True
+
+    def test_cua_driver_installed_skips_setup(self, monkeypatch, tmp_path):
+        """When cua-driver is already on PATH, the gate must return False
+        so a re-save through `hermes tools` doesn't re-prompt the user."""
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setattr(
+            tools_config.shutil,
+            "which",
+            lambda name: "/usr/local/bin/cua-driver" if name == "cua-driver" else None,
+        )
+
+        assert tools_config._toolset_needs_configuration_prompt(
+            "computer_use", {}
+        ) is False
+
+    def test_post_setup_predicate_exception_does_not_block(self, monkeypatch):
+        """A predicate that raises must be treated as 'satisfied' so a
+        broken check can't strand the user in an infinite setup loop."""
+        from hermes_cli import tools_config
+
+        def _boom():
+            raise RuntimeError("predicate broken")
+
+        monkeypatch.setitem(tools_config._POST_SETUP_INSTALLED, "cua_driver", _boom)
+        assert tools_config._post_setup_already_installed("cua_driver") is True
+
+    def test_unregistered_post_setup_treated_as_satisfied(self):
+        """post_setup keys without a registered predicate must default to
+        'satisfied' so we don't change behaviour for hooks we haven't
+        explicitly opted in (kittentts, piper, agent_browser, etc.)."""
+        from hermes_cli import tools_config
+
+        assert tools_config._post_setup_already_installed("does_not_exist") is True
+
+    def test_cua_driver_predicate_registered(self):
+        """Keep an explicit pin on the cua_driver entry so accidental
+        deletion of the registry row would fail this test rather than
+        silently restore the original silent-no-op bug."""
+        from hermes_cli import tools_config
+
+        assert "cua_driver" in tools_config._POST_SETUP_INSTALLED
--- a/tests/hermes_cli/test_profile_distribution.py
+++ b/tests/hermes_cli/test_profile_distribution.py
@ -0,0 +1,584 @@
+"""Tests for hermes_cli.profile_distribution — git-based profile installs.
+
+Covers manifest parsing, version requirement checks, install / update / describe
+on local-directory sources, and guards on what can and can't be installed.
+
+Transport-layer tests (git clone, URL handling) are exercised through live
+E2E runs, not unit tests — git itself is tested upstream, and subprocess-
+mocking git would just test the mock.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+import pytest
+
+from hermes_cli.profile_distribution import (
+    DEFAULT_DIST_OWNED,
+    DistributionError,
+    DistributionManifest,
+    EnvRequirement,
+    MANIFEST_FILENAME,
+    USER_OWNED_EXCLUDE,
+    _env_template_from_manifest,
+    _looks_like_git_url,
+    _parse_semver,
+    check_hermes_requires,
+    describe_distribution,
+    install_distribution,
+    plan_install,
+    read_manifest,
+    update_distribution,
+    write_manifest,
+)
+
+
+# ---------------------------------------------------------------------------
+# Isolated profile env (matches tests/hermes_cli/test_profiles.py)
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture()
+def profile_env(tmp_path, monkeypatch):
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    default_home = tmp_path / ".hermes"
+    default_home.mkdir(exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(default_home))
+    return tmp_path
+
+
+def _make_staging_dir(root: Path, name: str = "src", *, manifest: DistributionManifest = None) -> Path:
+    """Build a local distribution staging directory (what a git clone would
+    contain after .git is removed).
+
+    Lays down a minimal but representative tree: SOUL.md, config.yaml,
+    mcp.json, one skill, one cron file, plus the distribution.yaml manifest.
+    """
+    staged = root / f"staging_{name}"
+    staged.mkdir(parents=True, exist_ok=True)
+    (staged / "SOUL.md").write_text("I am Source.\n")
+    (staged / "config.yaml").write_text("model:\n  model: gpt-4\n")
+    (staged / "mcp.json").write_text('{"servers": {}}\n')
+    (staged / "skills").mkdir(exist_ok=True)
+    (staged / "skills" / "demo").mkdir(exist_ok=True)
+    (staged / "skills" / "demo" / "SKILL.md").write_text(
+        "---\nname: demo\ndescription: test\n---\n# Demo skill\n"
+    )
+    (staged / "cron").mkdir(exist_ok=True)
+    (staged / "cron" / "daily.json").write_text('{"schedule": "0 9 * * *"}')
+
+    mf = manifest or DistributionManifest(name=name, version="0.1.0")
+    write_manifest(staged, mf)
+    return staged
+
+
+# ===========================================================================
+# Manifest parsing
+# ===========================================================================
+
+
+class TestManifestParsing:
+
+    def test_minimal_manifest(self, tmp_path):
+        (tmp_path / MANIFEST_FILENAME).write_text("name: minimal\n")
+        m = read_manifest(tmp_path)
+        assert m.name == "minimal"
+        assert m.version == "0.1.0"
+        assert m.env_requires == []
+        assert m.distribution_owned == []
+
+    def test_full_manifest(self, tmp_path):
+        (tmp_path / MANIFEST_FILENAME).write_text(
+            "name: telem\n"
+            "version: 1.2.3\n"
+            "description: Telem monitor\n"
+            "hermes_requires: '>=0.12.0'\n"
+            "author: Kyle\n"
+            "license: MIT\n"
+            "env_requires:\n"
+            "  - name: OPENAI_API_KEY\n"
+            "    description: OpenAI key\n"
+            "  - name: GRAPH_URL\n"
+            "    required: false\n"
+            "    default: http://127.0.0.1:8000\n"
+            "distribution_owned:\n"
+            "  - SOUL.md\n"
+            "  - skills/\n"
+        )
+        m = read_manifest(tmp_path)
+        assert m.name == "telem"
+        assert m.version == "1.2.3"
+        assert m.author == "Kyle"
+        assert m.license == "MIT"
+        assert len(m.env_requires) == 2
+        assert m.env_requires[0].name == "OPENAI_API_KEY"
+        assert m.env_requires[0].required is True
+        assert m.env_requires[1].required is False
+        assert m.env_requires[1].default == "http://127.0.0.1:8000"
+        assert m.distribution_owned == ["SOUL.md", "skills"]
+
+    def test_missing_name_rejected(self, tmp_path):
+        (tmp_path / MANIFEST_FILENAME).write_text("version: 1.0\n")
+        with pytest.raises(DistributionError, match="missing 'name'"):
+            read_manifest(tmp_path)
+
+    def test_env_requires_not_list_rejected(self, tmp_path):
+        (tmp_path / MANIFEST_FILENAME).write_text(
+            "name: bad\nenv_requires:\n  name: FOO\n"
+        )
+        with pytest.raises(DistributionError, match="env_requires must be a list"):
+            read_manifest(tmp_path)
+
+    def test_read_manifest_returns_none_when_absent(self, tmp_path):
+        assert read_manifest(tmp_path) is None
+
+    def test_owned_paths_default(self):
+        m = DistributionManifest(name="x")
+        assert m.owned_paths() == list(DEFAULT_DIST_OWNED)
+
+    def test_owned_paths_explicit(self):
+        m = DistributionManifest(name="x", distribution_owned=["SOUL.md", "skills"])
+        assert m.owned_paths() == ["SOUL.md", "skills"]
+
+    def test_roundtrip_write_read(self, tmp_path):
+        original = DistributionManifest(
+            name="rt",
+            version="1.0.0",
+            description="roundtrip",
+            env_requires=[EnvRequirement(name="FOO", description="foo")],
+        )
+        write_manifest(tmp_path, original)
+        parsed = read_manifest(tmp_path)
+        assert parsed.name == "rt"
+        assert parsed.env_requires[0].name == "FOO"
+
+
+# ===========================================================================
+# Version requirement checks
+# ===========================================================================
+
+
+class TestVersionRequires:
+
+    @pytest.mark.parametrize("spec,cur,ok", [
+        ("", "0.1.0", True),
+        (">=0.12.0", "0.12.0", True),
+        (">=0.12.0", "0.13.0", True),
+        (">=0.12.0", "0.11.9", False),
+        ("==0.12.0", "0.12.0", True),
+        ("==0.12.0", "0.13.0", False),
+        ("!=0.12.0", "0.13.0", True),
+        (">0.12.0", "0.12.1", True),
+        (">0.12.0", "0.12.0", False),
+        ("<0.13.0", "0.12.9", True),
+        ("<=0.12.0", "0.12.0", True),
+        ("0.12.0", "0.13.0", True),     # Bare = >=
+        ("0.12.0", "0.11.0", False),    # Bare = >=
+    ])
+    def test_check_matrix(self, spec, cur, ok):
+        if ok:
+            check_hermes_requires(spec, cur)
+        else:
+            with pytest.raises(DistributionError, match="requires Hermes"):
+                check_hermes_requires(spec, cur)
+
+    def test_parse_semver_handles_prerelease(self):
+        assert _parse_semver("0.12.0-rc1") == (0, 12, 0)
+        assert _parse_semver("v0.12.0+abc") == (0, 12, 0)
+
+    def test_parse_semver_pads(self):
+        assert _parse_semver("1") == (1, 0, 0)
+        assert _parse_semver("1.2") == (1, 2, 0)
+
+    def test_parse_semver_rejects_garbage(self):
+        with pytest.raises(DistributionError, match="Unparseable"):
+            _parse_semver("not-a-version")
+
+
+# ===========================================================================
+# Env template
+# ===========================================================================
+
+
+class TestEnvTemplate:
+
+    def test_required_is_uncommented(self):
+        m = DistributionManifest(
+            name="x",
+            env_requires=[EnvRequirement(name="FOO", description="foo key")],
+        )
+        out = _env_template_from_manifest(m)
+        assert "# foo key" in out
+        assert "# (required)" in out
+        assert "FOO=" in out
+        # No leading `# ` before FOO=
+        assert "\nFOO=" in out or out.startswith("FOO=") or "\nFOO=\n" in out or "FOO=\n" in out
+
+    def test_optional_is_commented(self):
+        m = DistributionManifest(
+            name="x",
+            env_requires=[EnvRequirement(name="BAR", required=False, default="http://x")],
+        )
+        out = _env_template_from_manifest(m)
+        assert "# (optional)" in out
+        assert "# BAR=http://x" in out
+
+    def test_empty_env_requires_is_header_only(self):
+        m = DistributionManifest(name="x")
+        out = _env_template_from_manifest(m)
+        assert "Hermes distribution" in out
+        assert "FOO" not in out
+
+
+# ===========================================================================
+# Source URL detection
+# ===========================================================================
+
+
+class TestLooksLikeGitUrl:
+
+    @pytest.mark.parametrize("src", [
+        "github.com/user/repo",
+        "https://github.com/user/repo",
+        "https://github.com/user/repo.git",
+        "http://example.com/repo",
+        "git@github.com:user/repo.git",
+        "ssh://git@example.com/repo.git",
+        "git://example.com/repo.git",
+    ])
+    def test_accepts_git_sources(self, src):
+        assert _looks_like_git_url(src)
+
+    @pytest.mark.parametrize("src", [
+        "/tmp/local/path",
+        "./relative/dir",
+        "~/profile",
+        "some-random-string",
+    ])
+    def test_rejects_non_git(self, src):
+        assert not _looks_like_git_url(src)
+
+
+# ===========================================================================
+# Install — fresh and force (from a local-directory source)
+# ===========================================================================
+
+
+class TestInstall:
+
+    def test_install_from_directory(self, profile_env):
+        staged = _make_staging_dir(profile_env, "src")
+        plan = install_distribution(str(staged), name="installed")
+        assert plan.target_dir.is_dir()
+        assert (plan.target_dir / "SOUL.md").read_text() == "I am Source.\n"
+        assert (plan.target_dir / "skills" / "demo" / "SKILL.md").exists()
+        assert (plan.target_dir / "mcp.json").exists()
+        # Manifest on disk records canonical name + provenance
+        m = read_manifest(plan.target_dir)
+        assert m.name == "installed"
+        assert m.source == str(staged)
+
+    def test_install_uses_manifest_name_when_no_override(self, profile_env):
+        mf = DistributionManifest(name="telem", version="1.0.0")
+        staged = _make_staging_dir(profile_env, "telem", manifest=mf)
+        plan = install_distribution(str(staged))
+        assert plan.manifest.name == "telem"
+        assert plan.target_dir.name == "telem"
+
+    def test_install_rejects_existing_without_force(self, profile_env):
+        staged = _make_staging_dir(profile_env, "src")
+        install_distribution(str(staged), name="existing")
+        with pytest.raises(DistributionError, match="already exists"):
+            install_distribution(str(staged), name="existing")
+
+    def test_install_with_force_overwrites(self, profile_env):
+        staged = _make_staging_dir(profile_env, "src")
+        install_distribution(str(staged), name="target")
+        # Install again with --force succeeds
+        plan = install_distribution(str(staged), name="target", force=True)
+        assert plan.target_dir.is_dir()
+
+    def test_install_rejects_default_name(self, profile_env):
+        staged = _make_staging_dir(profile_env, "src")
+        with pytest.raises(DistributionError, match="Cannot install"):
+            install_distribution(str(staged), name="default")
+
+    def test_install_rejects_non_distribution_directory(self, profile_env, tmp_path):
+        bogus = tmp_path / "bogus_dir"
+        bogus.mkdir()
+        (bogus / "some_file").write_text("hi")
+        with pytest.raises(DistributionError, match="No distribution.yaml"):
+            plan_install(str(bogus), tmp_path / "work", override_name="x")
+
+    def test_install_rejects_unknown_source(self, profile_env, tmp_path):
+        with pytest.raises(DistributionError, match="Cannot resolve"):
+            plan_install("definitely-not-a-thing", tmp_path / "work", override_name="x")
+
+    def test_install_emits_env_example_when_manifest_has_env(self, profile_env):
+        mf = DistributionManifest(
+            name="needs_env",
+            version="0.1.0",
+            env_requires=[EnvRequirement(name="OPENAI_API_KEY", description="key")],
+        )
+        staged = _make_staging_dir(profile_env, "needs_env", manifest=mf)
+        plan = install_distribution(str(staged), name="needs_env")
+        example = plan.target_dir / ".env.EXAMPLE"
+        assert example.is_file()
+        assert "OPENAI_API_KEY" in example.read_text()
+
+    def test_install_enforces_hermes_requires(self, profile_env, monkeypatch):
+        # Pin current Hermes version to something well below the requirement
+        import hermes_cli
+        monkeypatch.setattr(hermes_cli, "__version__", "0.1.0", raising=False)
+
+        mf = DistributionManifest(
+            name="future",
+            version="1.0.0",
+            hermes_requires=">=99.0.0",
+        )
+        staged = _make_staging_dir(profile_env, "future", manifest=mf)
+        with pytest.raises(DistributionError, match="requires Hermes"):
+            install_distribution(str(staged), name="future")
+
+
+# ===========================================================================
+# Update — preserves user data, preserves config by default
+# ===========================================================================
+
+
+class TestUpdate:
+
+    def test_update_preserves_user_data(self, profile_env):
+        # 1. Build staging dir, install
+        staged = _make_staging_dir(profile_env, "src")
+        plan = install_distribution(str(staged), name="telem")
+
+        # 2. Add user-owned data to the installed profile
+        (plan.target_dir / "memories").mkdir(exist_ok=True)
+        (plan.target_dir / "memories" / "MEMORY.md").write_text("# USER MEMORY\n")
+        (plan.target_dir / ".env").write_text("OPENAI_API_KEY=sk-user\n")
+        (plan.target_dir / "auth.json").write_text('{"user": "auth"}')
+        (plan.target_dir / "sessions").mkdir(exist_ok=True)
+        (plan.target_dir / "sessions" / "chat.json").write_text('{"s": 1}')
+
+        # 3. Bump source in the staging dir
+        (staged / "SOUL.md").write_text("I am Source v2.\n")
+
+        # 4. Update
+        update_distribution("telem", force_config=False)
+
+        # 5. Dist-owned changed
+        assert (plan.target_dir / "SOUL.md").read_text() == "I am Source v2.\n"
+        # 6. User-owned preserved
+        assert (plan.target_dir / "memories" / "MEMORY.md").read_text() == "# USER MEMORY\n"
+        assert (plan.target_dir / ".env").read_text() == "OPENAI_API_KEY=sk-user\n"
+        assert (plan.target_dir / "auth.json").read_text() == '{"user": "auth"}'
+        assert (plan.target_dir / "sessions" / "chat.json").read_text() == '{"s": 1}'
+
+    def test_update_preserves_config_by_default(self, profile_env):
+        staged = _make_staging_dir(profile_env, "src")
+        plan = install_distribution(str(staged), name="t2")
+
+        # User edits config
+        (plan.target_dir / "config.yaml").write_text(
+            "model:\n  model: gpt-5\n# user override\n"
+        )
+
+        # Bump source config
+        (staged / "config.yaml").write_text("model:\n  model: claude\n")
+
+        update_distribution("t2", force_config=False)
+        assert "gpt-5" in (plan.target_dir / "config.yaml").read_text()
+        assert "user override" in (plan.target_dir / "config.yaml").read_text()
+
+    def test_update_force_config_overwrites(self, profile_env):
+        staged = _make_staging_dir(profile_env, "src")
+        plan = install_distribution(str(staged), name="t3")
+
+        (plan.target_dir / "config.yaml").write_text("model:\n  model: gpt-5\n")
+
+        (staged / "config.yaml").write_text("model:\n  model: claude\n")
+
+        update_distribution("t3", force_config=True)
+        assert "claude" in (plan.target_dir / "config.yaml").read_text()
+        assert "gpt-5" not in (plan.target_dir / "config.yaml").read_text()
+
+    def test_update_missing_manifest_errors(self, profile_env):
+        # Make a profile without a manifest; update must refuse
+        from hermes_cli.profiles import create_profile
+        create_profile(name="plain", no_alias=True)
+        with pytest.raises(DistributionError, match="not a distribution"):
+            update_distribution("plain")
+
+
+# ===========================================================================
+# describe_distribution — info subcommand
+# ===========================================================================
+
+
+class TestDescribe:
+
+    def test_describe_existing_distribution(self, profile_env):
+        mf = DistributionManifest(
+            name="telem",
+            version="1.0.0",
+            description="compliance monitor",
+            env_requires=[EnvRequirement(name="API", description="api key")],
+        )
+        staged = _make_staging_dir(profile_env, "telem", manifest=mf)
+        install_distribution(str(staged), name="telem")
+        data = describe_distribution("telem")
+        assert data["name"] == "telem"
+        assert data["version"] == "1.0.0"
+        assert data["env_requires"][0]["name"] == "API"
+
+    def test_describe_non_distribution_returns_empty(self, profile_env):
+        from hermes_cli.profiles import create_profile
+        create_profile(name="plain", no_alias=True)
+        assert describe_distribution("plain") == {}
+
+    def test_describe_missing_profile_raises(self, profile_env):
+        with pytest.raises(DistributionError, match="does not exist"):
+            describe_distribution("nonexistent")
+
+
+# ===========================================================================
+# Security — USER_OWNED_EXCLUDE covers the right paths
+# ===========================================================================
+
+
+class TestSecurity:
+
+    def test_user_owned_exclude_covers_credentials(self):
+        assert "auth.json" in USER_OWNED_EXCLUDE
+        assert ".env" in USER_OWNED_EXCLUDE
+        assert "memories" in USER_OWNED_EXCLUDE
+        assert "sessions" in USER_OWNED_EXCLUDE
+        assert "local" in USER_OWNED_EXCLUDE
+
+    def test_install_does_not_import_credentials_from_staging(self, profile_env):
+        """If an author accidentally ships auth.json or .env in their
+        staging dir, the installer must NOT copy them to the target profile."""
+        staged = _make_staging_dir(profile_env, "src")
+        # Author leaks credentials into the staging tree (shouldn't happen, but...)
+        (staged / "auth.json").write_text('{"leaked": true}')
+        (staged / ".env").write_text("LEAKED=1")
+
+        plan = install_distribution(str(staged), name="clean")
+        assert not (plan.target_dir / "auth.json").exists(), "auth.json leaked"
+        # Fresh profile may have its own .env via the bootstrap; what we care
+        # about is that the leaked content didn't land in the target.
+        if (plan.target_dir / ".env").exists():
+            assert "LEAKED" not in (plan.target_dir / ".env").read_text()
+
+
+# ===========================================================================
+# Install-time metadata (installed_at stamp)
+# ===========================================================================
+
+
+class TestInstalledAtStamp:
+
+    def test_install_stamps_installed_at(self, profile_env):
+        staged = _make_staging_dir(profile_env, "src")
+        plan = install_distribution(str(staged), name="stamped")
+        mf = read_manifest(plan.target_dir)
+        assert mf.installed_at, "installed_at should be set after install"
+        # ISO-8601 UTC sanity: starts with 4-digit year, contains 'T', ends with '+00:00'.
+        assert mf.installed_at[:4].isdigit()
+        assert "T" in mf.installed_at
+        assert mf.installed_at.endswith("+00:00")
+
+    def test_update_refreshes_installed_at(self, profile_env, monkeypatch):
+        staged = _make_staging_dir(profile_env, "src")
+        install_distribution(str(staged), name="demo")
+        from hermes_cli.profiles import get_profile_dir
+        first = read_manifest(get_profile_dir("demo")).installed_at
+
+        # Freeze `datetime.now()` to a fixed future time so we can observe that
+        # update writes a NEW stamp (installs within the same second otherwise
+        # collide at iso-8601 seconds resolution).
+        import datetime as _dt
+        class _FakeDT(_dt.datetime):
+            @classmethod
+            def now(cls, tz=None):
+                return _dt.datetime(2099, 1, 1, 0, 0, 0, tzinfo=tz or _dt.timezone.utc)
+        monkeypatch.setattr(
+            "hermes_cli.profile_distribution.datetime", _FakeDT, raising=True
+        )
+
+        from hermes_cli.profile_distribution import update_distribution
+        update_distribution("demo")
+        refreshed = read_manifest(get_profile_dir("demo")).installed_at
+        assert refreshed != first, "installed_at should change on update"
+        assert refreshed.startswith("2099-01-01"), refreshed
+
+
+# ===========================================================================
+# ProfileInfo exposes distribution metadata
+# ===========================================================================
+
+
+class TestProfileInfoDistribution:
+
+    def test_installed_distribution_shows_in_list(self, profile_env):
+        staged = _make_staging_dir(
+            profile_env, "src",
+            manifest=DistributionManifest(name="telem", version="1.2.3"),
+        )
+        install_distribution(str(staged), name="telem")
+
+        from hermes_cli.profiles import list_profiles
+        rows = {p.name: p for p in list_profiles()}
+        assert "telem" in rows
+        row = rows["telem"]
+        assert row.distribution_name == "telem"
+        assert row.distribution_version == "1.2.3"
+        assert row.distribution_source  # path populated, exact value depends on fixture
+
+    def test_plain_profile_has_no_distribution_fields(self, profile_env):
+        from hermes_cli.profiles import create_profile, list_profiles
+        create_profile(name="plain", no_alias=True)
+        rows = {p.name: p for p in list_profiles()}
+        assert rows["plain"].distribution_name is None
+        assert rows["plain"].distribution_version is None
+
+    def test_malformed_manifest_does_not_break_list(self, profile_env):
+        from hermes_cli.profiles import create_profile, list_profiles, get_profile_dir
+        create_profile(name="brokenmeta", no_alias=True)
+        # Write a distribution.yaml that isn't a valid mapping
+        (get_profile_dir("brokenmeta") / "distribution.yaml").write_text(
+            "not: [a, valid, mapping\n"  # broken YAML
+        )
+        # list_profiles must NOT raise; distribution_* stay None for this row.
+        rows = {p.name: p for p in list_profiles()}
+        assert rows["brokenmeta"].distribution_name is None
+
+
+# ===========================================================================
+# Error surfaces: validation failures should propagate as DistributionError
+# or ValueError (both caught and rendered cleanly by the CLI handler)
+# ===========================================================================
+
+
+class TestErrorSurfaces:
+
+    def test_bad_profile_name_raises_valueerror_not_traceback(self, profile_env, tmp_path):
+        """A manifest whose 'name' can't be used as a profile identifier
+        should raise ValueError from validate_profile_name — the CLI handler
+        catches both DistributionError and ValueError so users see a clean
+        'Error: ...' line instead of a Python traceback.
+        """
+        mf = DistributionManifest(name="Invalid Name With Spaces", version="0.1.0")
+        staged = _make_staging_dir(profile_env, "bad", manifest=mf)
+        with pytest.raises((ValueError, DistributionError)):
+            plan_install(str(staged), tmp_path / "work")
+
+    def test_path_traversal_name_rejected(self, profile_env, tmp_path):
+        mf = DistributionManifest(name="../../etc/passwd", version="0.1.0")
+        staged = _make_staging_dir(profile_env, "bad", manifest=mf)
+        with pytest.raises((ValueError, DistributionError)):
+            plan_install(str(staged), tmp_path / "work")
+
--- a/tests/hermes_cli/test_profiles.py
+++ b/tests/hermes_cli/test_profiles.py
@ -15,6 +15,7 @@ from unittest.mock import patch, MagicMock
 import pytest

 from hermes_cli.profiles import (
+    normalize_profile_name,
    validate_profile_name,
    get_profile_dir,
    create_profile,
@ -32,6 +33,9 @@ from hermes_cli.profiles import (
    generate_zsh_completion,
    _get_profiles_root,
    _get_default_hermes_home,
+    seed_profile_skills,
+    has_bundled_skills_opt_out,
+    NO_BUNDLED_SKILLS_MARKER,
 )


@ -58,6 +62,24 @@ def profile_env(tmp_path, monkeypatch):
 # TestValidateProfileName
 # ===================================================================

+class TestNormalizeProfileName:
+    """Tests for normalize_profile_name()."""
+
+    def test_title_case_normalized(self):
+        assert normalize_profile_name("Jules") == "jules"
+        assert normalize_profile_name("  Librarian ") == "librarian"
+
+    def test_default_case_insensitive(self):
+        assert normalize_profile_name("Default") == "default"
+        assert normalize_profile_name("DEFAULT") == "default"
+
+    def test_empty_raises(self):
+        with pytest.raises(ValueError, match="cannot be empty"):
+            normalize_profile_name("")
+        with pytest.raises(ValueError, match="cannot be empty"):
+            normalize_profile_name("   ")
+
+
 class TestValidateProfileName:
    """Tests for validate_profile_name()."""

@ -66,6 +88,11 @@ class TestValidateProfileName:
        # Should not raise
        validate_profile_name(name)

+    def test_uppercase_rejected(self):
+        # validate_profile_name is strict — callers normalize first, then validate.
+        with pytest.raises(ValueError):
+            validate_profile_name("Jules")
+
    @pytest.mark.parametrize("name", ["UPPER", "has space", ".hidden", "-leading"])
    def test_invalid_names_rejected(self, name):
        with pytest.raises(ValueError):
@ -89,6 +116,14 @@ class TestValidateProfileName:
        with pytest.raises(ValueError):
            validate_profile_name("")

+    @pytest.mark.parametrize("name", ["hermes", "test", "tmp", "root", "sudo"])
+    def test_reserved_names_rejected(self, name):
+        """Reserved names collide with the Hermes install itself or with
+        common system binaries — reject them at validate time so
+        create/install/rename all share one gate."""
+        with pytest.raises(ValueError, match="reserved"):
+            validate_profile_name(name)
+

 # ===================================================================
 # TestGetProfileDir
@ -107,6 +142,10 @@ class TestGetProfileDir:
        result = get_profile_dir("coder")
        assert result == tmp_path / ".hermes" / "profiles" / "coder"

+    def test_named_profile_matching_is_case_insensitive(self, profile_env):
+        tmp_path = profile_env
+        assert get_profile_dir("Coder") == tmp_path / ".hermes" / "profiles" / "coder"
+

 # ===================================================================
 # TestCreateProfile
@ -205,6 +244,64 @@ class TestCreateProfile:
        assert (profile_dir / "memories" / "note.md").read_text() == "remember this"
        assert not (profile_dir / "profiles").exists()

+    def test_clone_all_excludes_default_infrastructure(self, profile_env):
+        """--clone-all from default profile excludes hermes-agent, .worktrees,
+        bin, node_modules at root, plus __pycache__/*.pyc/*.pyo/*.sock/*.tmp
+        at any depth.  Profile data (config, env, skills, sessions, logs,
+        state.db) must be preserved — clone-all means "complete snapshot
+        minus infrastructure."
+        """
+        tmp_path = profile_env
+        default_home = tmp_path / ".hermes"
+        # Simulate infrastructure dirs that only the default profile has
+        (default_home / "hermes-agent" / ".git").mkdir(parents=True)
+        (default_home / "hermes-agent" / "venv" / "bin").mkdir(parents=True)
+        (default_home / "hermes-agent" / "README.md").write_text("repo")
+        (default_home / ".worktrees" / "some-tree").mkdir(parents=True)
+        (default_home / "profiles" / "other").mkdir(parents=True)
+        (default_home / "profiles" / "other" / "config.yaml").write_text("x")
+        (default_home / "bin").mkdir(exist_ok=True)
+        (default_home / "bin" / "tool").write_text("binary")
+        (default_home / "node_modules" / ".package-lock.json").mkdir(parents=True)
+        # Bytecode + temp files at nested depth (universal exclusion)
+        (default_home / "skills" / "my-skill" / "__pycache__").mkdir(parents=True)
+        (default_home / "skills" / "my-skill" / "__pycache__" / "module.cpython-311.pyc").write_text("stale")
+        (default_home / "skills" / "my-skill" / "module.pyc").write_text("stale")
+        (default_home / "skills" / "my-skill" / "module.pyo").write_text("stale")
+        (default_home / "data.sock").write_text("socket")
+        (default_home / "data.tmp").write_text("tmp")
+        # Profile data that SHOULD be copied
+        (default_home / "skills" / "my-skill").mkdir(parents=True, exist_ok=True)
+        (default_home / "skills" / "my-skill" / "SKILL.md").write_text("skill")
+        (default_home / "config.yaml").write_text("model: gpt-4")
+        (default_home / ".env").write_text("KEY=val")
+        (default_home / "state.db").write_text("sessions-data")
+        (default_home / "sessions").mkdir(exist_ok=True)
+        (default_home / "logs").mkdir(exist_ok=True)
+        (default_home / "logs" / "gateway.log").write_text("log")
+
+        profile_dir = create_profile("cloned", clone_all=True, no_alias=True)
+
+        # Infrastructure must be excluded
+        assert not (profile_dir / "hermes-agent").exists()
+        assert not (profile_dir / ".worktrees").exists()
+        assert not (profile_dir / "profiles").exists()
+        assert not (profile_dir / "bin").exists()
+        assert not (profile_dir / "node_modules").exists()
+        # Universal exclusions at any depth
+        assert not (profile_dir / "data.sock").exists()
+        assert not (profile_dir / "data.tmp").exists()
+        assert not (profile_dir / "skills" / "my-skill" / "__pycache__").exists()
+        assert not (profile_dir / "skills" / "my-skill" / "module.pyc").exists()
+        assert not (profile_dir / "skills" / "my-skill" / "module.pyo").exists()
+        # All profile data must be present
+        assert (profile_dir / "skills" / "my-skill" / "SKILL.md").read_text() == "skill"
+        assert (profile_dir / "config.yaml").read_text() == "model: gpt-4"
+        assert (profile_dir / ".env").read_text() == "KEY=val"
+        assert (profile_dir / "state.db").read_text() == "sessions-data"
+        assert (profile_dir / "sessions").exists()
+        assert (profile_dir / "logs" / "gateway.log").read_text() == "log"
+
    def test_clone_config_missing_files_skipped(self, profile_env):
        """Clone config gracefully skips files that don't exist in source."""
        profile_dir = create_profile("coder", clone_config=True, no_alias=True)
@ -215,6 +312,116 @@ class TestCreateProfile:
        assert (profile_dir / "SOUL.md").exists()


+# ===================================================================
+# TestNoSkillsOptOut
+# ===================================================================
+
+class TestNoSkillsOptOut:
+    """Tests for `hermes profile create --no-skills` and the opt-out marker."""
+
+    def test_no_skills_writes_marker_and_skips_seeding(self, profile_env):
+        profile_dir = create_profile("orchestrator", no_alias=True, no_skills=True)
+
+        # Marker file is present
+        marker = profile_dir / NO_BUNDLED_SKILLS_MARKER
+        assert marker.is_file(), "expected .no-bundled-skills marker in profile root"
+        assert "--no-skills" in marker.read_text()
+
+        # has_bundled_skills_opt_out() agrees
+        assert has_bundled_skills_opt_out(profile_dir) is True
+
+        # skills/ dir exists (profile bootstrapping still creates the dir) but
+        # contains nothing yet because create_profile itself doesn't seed.
+        assert (profile_dir / "skills").is_dir()
+        assert list((profile_dir / "skills").iterdir()) == []
+
+    def test_no_skills_conflicts_with_clone(self, profile_env):
+        with pytest.raises(ValueError, match="mutually exclusive"):
+            create_profile(
+                "orchestrator",
+                no_alias=True,
+                no_skills=True,
+                clone_config=True,
+            )
+
+    def test_no_skills_conflicts_with_clone_all(self, profile_env):
+        with pytest.raises(ValueError, match="mutually exclusive"):
+            create_profile(
+                "orchestrator",
+                no_alias=True,
+                no_skills=True,
+                clone_all=True,
+            )
+
+    def test_seed_profile_skills_respects_marker(self, profile_env):
+        """seed_profile_skills() must no-op on opted-out profiles even when
+        called directly (e.g. by `hermes update`'s all-profile sync loop)."""
+        profile_dir = create_profile("orchestrator", no_alias=True, no_skills=True)
+
+        # Call seed_profile_skills() directly — it should NOT invoke subprocess,
+        # NOT modify the skills/ dir, and return a dict with skipped_opt_out=True.
+        result = seed_profile_skills(profile_dir, quiet=True)
+
+        assert result is not None
+        assert result.get("skipped_opt_out") is True
+        assert result.get("copied") == []
+        # skills/ stays empty — no subprocess ran
+        assert list((profile_dir / "skills").iterdir()) == []
+
+    def test_default_profile_gets_skills_seeded(self, profile_env, monkeypatch):
+        """Sanity: without --no-skills, seed_profile_skills() runs the real
+        subprocess path. Mock the subprocess so the test is hermetic, and
+        just confirm the marker is NOT checked in the non-opt-out case."""
+        import subprocess as _sp
+
+        profile_dir = create_profile("coder", no_alias=True)
+        # No marker — not opted out
+        assert not (profile_dir / NO_BUNDLED_SKILLS_MARKER).exists()
+        assert has_bundled_skills_opt_out(profile_dir) is False
+
+        # Mock subprocess.run to avoid actually running skill sync in tests
+        calls = []
+
+        def fake_run(*args, **kwargs):
+            calls.append(args)
+            return _sp.CompletedProcess(
+                args=args, returncode=0, stdout='{"copied": ["x"]}', stderr=""
+            )
+
+        monkeypatch.setattr("subprocess.run", fake_run)
+        result = seed_profile_skills(profile_dir, quiet=True)
+
+        # Subprocess was invoked (the opt-out branch did NOT short-circuit)
+        assert len(calls) == 1
+        assert result == {"copied": ["x"]}
+
+    def test_delete_marker_re_enables_seeding(self, profile_env, monkeypatch):
+        """Deleting .no-bundled-skills opts the profile back in."""
+        import subprocess as _sp
+
+        profile_dir = create_profile("orchestrator", no_alias=True, no_skills=True)
+        assert has_bundled_skills_opt_out(profile_dir) is True
+
+        # First call: opted out, returns skipped dict without touching subprocess
+        called = []
+        monkeypatch.setattr(
+            "subprocess.run",
+            lambda *a, **kw: (called.append(a), _sp.CompletedProcess(
+                args=a, returncode=0, stdout='{"copied": []}', stderr=""
+            ))[1],
+        )
+        r1 = seed_profile_skills(profile_dir, quiet=True)
+        assert r1.get("skipped_opt_out") is True
+        assert called == []
+
+        # Delete marker → next call runs the real path
+        (profile_dir / NO_BUNDLED_SKILLS_MARKER).unlink()
+        assert has_bundled_skills_opt_out(profile_dir) is False
+        r2 = seed_profile_skills(profile_dir, quiet=True)
+        assert r2 == {"copied": []}
+        assert len(called) == 1
+
+
 # ===================================================================
 # TestDeleteProfile
 # ===================================================================
--- a/tests/hermes_cli/test_prompt_api_key.py
+++ b/tests/hermes_cli/test_prompt_api_key.py
@ -0,0 +1,157 @@
+"""Tests for ``_prompt_api_key`` — the shared Keep/Replace/Clear menu used by
+``hermes setup`` / ``hermes model`` when an API key already exists in ``.env``.
+
+Regression coverage for #16394: the wizard used to silently skip the key prompt
+when any value was present (even malformed junk), leaving users stuck.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+
+@pytest.fixture
+def profile_env(tmp_path, monkeypatch):
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    (home / ".env").write_text("")
+    return home
+
+
+def _pconfig(name="deepseek"):
+    from hermes_cli.auth import PROVIDER_REGISTRY
+    return PROVIDER_REGISTRY[name]
+
+
+def _run_prompt(existing_key, choice, new_key="", provider_id="", pconfig_name="deepseek"):
+    """Invoke _prompt_api_key with mocked input()/getpass() responses."""
+    from hermes_cli import main as m
+
+    pconfig = _pconfig(pconfig_name)
+    with patch("builtins.input", return_value=choice), \
+         patch("getpass.getpass", return_value=new_key):
+        return m._prompt_api_key(pconfig, existing_key, provider_id=provider_id)
+
+
+# First-time entry ────────────────────────────────────────────────────────────
+
+def test_first_time_save_new_key(profile_env):
+    from hermes_cli.config import get_env_value
+
+    key, abort = _run_prompt(existing_key="", choice="", new_key="sk-abcdef")
+    assert key == "sk-abcdef"
+    assert abort is False
+    assert get_env_value("DEEPSEEK_API_KEY") == "sk-abcdef"
+
+
+def test_first_time_cancelled(profile_env):
+    key, abort = _run_prompt(existing_key="", choice="", new_key="")
+    assert key == ""
+    assert abort is True
+
+
+# Already configured — K / R / C ───────────────────────────────────────────────
+
+def test_keep_default_empty_input(profile_env):
+    from hermes_cli.config import save_env_value
+    save_env_value("DEEPSEEK_API_KEY", "sk-existing")
+
+    key, abort = _run_prompt(existing_key="sk-existing", choice="")
+    assert key == "sk-existing"
+    assert abort is False
+
+
+def test_keep_letter_k(profile_env):
+    key, abort = _run_prompt(existing_key="sk-existing", choice="k")
+    assert key == "sk-existing"
+    assert abort is False
+
+
+def test_keep_on_unrecognised_input(profile_env):
+    """Garbage input falls through to keep — never destroys the user's key."""
+    key, abort = _run_prompt(existing_key="sk-existing", choice="xyz")
+    assert key == "sk-existing"
+    assert abort is False
+
+
+def test_replace_saves_new_key(profile_env):
+    from hermes_cli.config import get_env_value, save_env_value
+    save_env_value("DEEPSEEK_API_KEY", "sk-malformed-junk")
+
+    key, abort = _run_prompt(
+        existing_key="sk-malformed-junk", choice="r", new_key="sk-fresh"
+    )
+    assert key == "sk-fresh"
+    assert abort is False
+    assert get_env_value("DEEPSEEK_API_KEY") == "sk-fresh"
+
+
+def test_replace_cancelled_preserves_key(profile_env):
+    """Empty entry to the Replace prompt means cancel — keeps the old key intact."""
+    from hermes_cli.config import get_env_value, save_env_value
+    save_env_value("DEEPSEEK_API_KEY", "sk-existing")
+
+    key, abort = _run_prompt(
+        existing_key="sk-existing", choice="r", new_key=""
+    )
+    assert key == "sk-existing"
+    assert abort is False
+    assert get_env_value("DEEPSEEK_API_KEY") == "sk-existing"
+
+
+def test_clear_wipes_env_and_aborts(profile_env):
+    from hermes_cli.config import get_env_value, save_env_value
+    save_env_value("DEEPSEEK_API_KEY", "sk-existing")
+    save_env_value("OTHER_VAR", "keep-me")
+
+    key, abort = _run_prompt(existing_key="sk-existing", choice="c")
+    assert key == ""
+    assert abort is True
+    # Cleared, but sibling entries untouched.
+    assert not get_env_value("DEEPSEEK_API_KEY")
+    assert get_env_value("OTHER_VAR") == "keep-me"
+
+
+def test_ctrl_c_at_choice_prompt_keeps(profile_env):
+    from hermes_cli import main as m
+
+    pconfig = _pconfig("deepseek")
+    with patch("builtins.input", side_effect=KeyboardInterrupt):
+        key, abort = m._prompt_api_key(pconfig, "sk-existing")
+    assert key == "sk-existing"
+    assert abort is False
+
+
+# LM Studio no-auth placeholder ────────────────────────────────────────────────
+
+def test_lmstudio_first_time_empty_uses_placeholder(profile_env):
+    from hermes_cli.auth import LMSTUDIO_NOAUTH_PLACEHOLDER
+    from hermes_cli.config import get_env_value
+
+    key, abort = _run_prompt(
+        existing_key="", choice="", new_key="",
+        provider_id="lmstudio", pconfig_name="lmstudio",
+    )
+    assert key == LMSTUDIO_NOAUTH_PLACEHOLDER
+    assert abort is False
+    assert get_env_value("LM_API_KEY") == LMSTUDIO_NOAUTH_PLACEHOLDER
+
+
+def test_lmstudio_replace_empty_does_not_overwrite_with_placeholder(profile_env):
+    """On REPLACE with empty input, preserve the user's existing key — do NOT
+    silently substitute the placeholder.  The placeholder path only fires for
+    first-time configuration where the user has made no explicit choice yet."""
+    from hermes_cli.config import get_env_value, save_env_value
+    save_env_value("LM_API_KEY", "my-real-lmstudio-key")
+
+    key, abort = _run_prompt(
+        existing_key="my-real-lmstudio-key", choice="r", new_key="",
+        provider_id="lmstudio", pconfig_name="lmstudio",
+    )
+    assert key == "my-real-lmstudio-key"
+    assert abort is False
+    assert get_env_value("LM_API_KEY") == "my-real-lmstudio-key"
--- a/tests/hermes_cli/test_redact_config_bridge.py
+++ b/tests/hermes_cli/test_redact_config_bridge.py
@ -72,11 +72,13 @@ def test_redact_secrets_false_in_config_yaml_is_honored(tmp_path):
    assert "ENV_VAR=false" in result.stdout


-def test_redact_secrets_default_false_when_unset(tmp_path):
-    """Without the config key, redaction stays OFF by default.
+def test_redact_secrets_default_true_when_unset(tmp_path):
+    """Without the config key or env var, redaction is ON by default (#17691).

-    Secret redaction is opt-in — users who want it must set
-    `security.redact_secrets: true` explicitly (or HERMES_REDACT_SECRETS=true).
+    Secret redaction is a secure default — users who need raw credential
+    values in tool output (e.g. working on the redactor itself) must set
+    `security.redact_secrets: false` explicitly (or
+    `HERMES_REDACT_SECRETS=false`).
    """
    hermes_home = tmp_path / ".hermes"
    hermes_home.mkdir()
@ -107,7 +109,7 @@ def test_redact_secrets_default_false_when_unset(tmp_path):
        timeout=30,
    )
    assert result.returncode == 0, f"probe failed: {result.stderr}"
-    assert "REDACT_ENABLED=False" in result.stdout
+    assert "REDACT_ENABLED=True" in result.stdout


 def test_redact_secrets_true_in_config_yaml_is_honored(tmp_path):
--- a/tests/hermes_cli/test_relaunch.py
+++ b/tests/hermes_cli/test_relaunch.py
@ -152,4 +152,135 @@ class TestRelaunch:
        with pytest.raises(SystemExit):
            relaunch_mod.relaunch(["--resume", "abc"])

-        assert calls == [("/usr/bin/hermes", ["/usr/bin/hermes", "--resume", "abc"])]
+        assert calls == [("/usr/bin/hermes", ["/usr/bin/hermes", "--resume", "abc"])]
+
+    def test_windows_uses_subprocess_not_execvp(self, monkeypatch):
+        """On Windows, os.execvp raises OSError "Exec format error" when the
+        target is a .cmd shim or console-script wrapper (both common for
+        hermes).  relaunch() must detect win32 and use subprocess.run +
+        sys.exit instead."""
+        monkeypatch.setattr(relaunch_mod.sys, "platform", "win32")
+        monkeypatch.setattr(relaunch_mod, "resolve_hermes_bin", lambda: r"C:\Users\test\hermes.exe")
+
+        import subprocess as _subprocess
+
+        captured_argv = []
+
+        def fake_subprocess_run(argv, **kwargs):
+            captured_argv.append(list(argv))
+            class _Result:
+                returncode = 0
+            return _Result()
+
+        monkeypatch.setattr(_subprocess, "run", fake_subprocess_run)
+
+        # execvp MUST NOT be called on Windows — route must go through subprocess
+        execvp_calls = []
+
+        def fake_execvp(*args, **kwargs):
+            execvp_calls.append(args)
+            raise AssertionError("os.execvp must not be called on Windows")
+
+        monkeypatch.setattr(relaunch_mod.os, "execvp", fake_execvp)
+
+        with pytest.raises(SystemExit) as exc_info:
+            relaunch_mod.relaunch(["chat"])
+
+        assert exc_info.value.code == 0
+        assert execvp_calls == []
+        assert captured_argv == [[r"C:\Users\test\hermes.exe", "chat"]]
+
+    def test_windows_propagates_child_exit_code(self, monkeypatch):
+        """A non-zero exit from the child should flow through to sys.exit."""
+        monkeypatch.setattr(relaunch_mod.sys, "platform", "win32")
+        monkeypatch.setattr(relaunch_mod, "resolve_hermes_bin", lambda: r"C:\hermes.exe")
+
+        import subprocess as _subprocess
+
+        def fake_run(argv, **kwargs):
+            class _Result:
+                returncode = 42
+            return _Result()
+
+        monkeypatch.setattr(_subprocess, "run", fake_run)
+        monkeypatch.setattr(relaunch_mod.os, "execvp", lambda *a, **kw: None)
+
+        with pytest.raises(SystemExit) as exc_info:
+            relaunch_mod.relaunch(["chat"])
+        assert exc_info.value.code == 42
+
+    def test_windows_surfaces_oserror_with_help(self, monkeypatch, capsys):
+        """When subprocess itself raises OSError (file-not-found / bad format),
+        we must NOT let it bubble up as a cryptic traceback — print a
+        user-readable hint and sys.exit(1)."""
+        monkeypatch.setattr(relaunch_mod.sys, "platform", "win32")
+        monkeypatch.setattr(relaunch_mod, "resolve_hermes_bin", lambda: r"C:\missing.exe")
+
+        import subprocess as _subprocess
+
+        def fake_run(argv, **kwargs):
+            raise OSError(2, "No such file or directory")
+
+        monkeypatch.setattr(_subprocess, "run", fake_run)
+        monkeypatch.setattr(relaunch_mod.os, "execvp", lambda *a, **kw: None)
+
+        with pytest.raises(SystemExit) as exc_info:
+            relaunch_mod.relaunch(["chat"])
+        assert exc_info.value.code == 1
+        err = capsys.readouterr().err
+        assert "relaunch failed" in err
+        assert "open a new terminal" in err.lower() or "path" in err.lower()
+
+
+class TestResolveHermesBinWindowsPyGuard:
+    """On Windows, resolve_hermes_bin MUST NOT return a .py path.
+    os.access(x, os.X_OK) returns True for .py files on Windows because
+    PATHEXT includes .py when the Python launcher is installed — but
+    subprocess.run can't actually exec a .py directly, so the relaunch
+    would fail with the cryptic "%1 is not a valid Win32 application" error.
+    """
+
+    def test_windows_rejects_py_argv0_falls_through_to_path(self, monkeypatch, tmp_path):
+        """On Windows, if sys.argv[0] is a .py file, we must skip the
+        argv[0] fast-path and fall through to PATH / python -m."""
+        # Build a fake .py script that "passes" the isfile + X_OK checks.
+        script = tmp_path / "main.py"
+        script.write_text("# stub")
+
+        monkeypatch.setattr(relaunch_mod.sys, "platform", "win32")
+        monkeypatch.setattr(relaunch_mod.sys, "argv", [str(script), "chat"])
+        # Force PATH lookup to return a hermes.exe so the test doesn't
+        # exercise the None-fallback path (that's a separate test).
+        monkeypatch.setattr(
+            relaunch_mod.shutil, "which",
+            lambda name: r"C:\venv\Scripts\hermes.exe" if name == "hermes" else None,
+        )
+
+        bin_path = relaunch_mod.resolve_hermes_bin()
+        # Must NOT be the .py — must be the hermes.exe PATH entry.
+        assert bin_path == r"C:\venv\Scripts\hermes.exe"
+
+    def test_posix_still_accepts_py_argv0(self, monkeypatch, tmp_path):
+        """POSIX behaviour unchanged: argv[0] pointing at an executable
+        script (including .py with a shebang + chmod +x) is fine to return
+        because POSIX exec can route through the shebang line."""
+        if sys.platform == "win32":
+            pytest.skip("POSIX semantics")
+        script = tmp_path / "hermes"
+        script.write_text("#!/usr/bin/env python3\n")
+        script.chmod(0o755)
+        monkeypatch.setattr(relaunch_mod.sys, "argv", [str(script), "chat"])
+        assert relaunch_mod.resolve_hermes_bin() == str(script)
+
+    def test_windows_py_argv0_with_no_hermes_on_path_returns_none(self, monkeypatch, tmp_path):
+        """Bulletproof fallback: if argv0 is .py on Windows AND hermes.exe
+        isn't on PATH, return None so the caller falls back to
+        python -m hermes_cli.main."""
+        script = tmp_path / "main.py"
+        script.write_text("# stub")
+
+        monkeypatch.setattr(relaunch_mod.sys, "platform", "win32")
+        monkeypatch.setattr(relaunch_mod.sys, "argv", [str(script), "chat"])
+        monkeypatch.setattr(relaunch_mod.shutil, "which", lambda name: None)
+
+        assert relaunch_mod.resolve_hermes_bin() is None
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@ -897,6 +897,58 @@ def test_named_custom_provider_does_not_shadow_builtin_provider(monkeypatch):
    assert resolved["requested_provider"] == "nous"


+def test_named_custom_provider_wins_over_builtin_alias(monkeypatch):
+    """A custom_providers entry named after a built-in *alias* (not a canonical
+    provider name) must win over the built-in.  Regression guard for #15743:
+    when users define ``custom_providers: [{name: kimi, ...}]`` and reference
+    ``provider: kimi``, the built-in alias rewriting (``kimi`` → ``kimi-coding``)
+    would otherwise hijack the request and send it to the wrong endpoint.
+    """
+    monkeypatch.setattr(
+        rp,
+        "load_config",
+        lambda: {
+            "custom_providers": [
+                {
+                    "name": "kimi",
+                    "base_url": "https://my-custom-kimi.example.com/v1",
+                    "api_key": "my-kimi-key",
+                }
+            ]
+        },
+    )
+
+    entry = rp._get_named_custom_provider("kimi")
+
+    assert entry is not None
+    assert entry["base_url"] == "https://my-custom-kimi.example.com/v1"
+    assert entry["api_key"] == "my-kimi-key"
+
+
+def test_named_custom_provider_skipped_for_canonical_built_in(monkeypatch):
+    """Companion to the test above: ``nous`` is a canonical provider name
+    (``resolve_provider('nous') == 'nous'``), so a custom entry with that name
+    should NOT be returned — the built-in wins as before.
+    """
+    monkeypatch.setattr(
+        rp,
+        "load_config",
+        lambda: {
+            "custom_providers": [
+                {
+                    "name": "nous",
+                    "base_url": "http://localhost:1234/v1",
+                    "api_key": "shadow-key",
+                }
+            ]
+        },
+    )
+
+    entry = rp._get_named_custom_provider("nous")
+
+    assert entry is None
+
+
 def test_explicit_openrouter_skips_openai_base_url(monkeypatch):
    """When the user explicitly requests openrouter, OPENAI_BASE_URL
    (which may point to a custom endpoint) must not override the
--- a/tests/hermes_cli/test_session_handoff.py
+++ b/tests/hermes_cli/test_session_handoff.py
@ -0,0 +1,202 @@
+"""Tests for session handoff (CLI to gateway platform).
+
+The handoff state machine lives on the ``sessions`` table:
+
+    None  → "pending" → "running" → ("completed" | "failed")
+
+CLI side calls ``request_handoff`` and poll-waits on ``get_handoff_state``.
+Gateway side iterates ``list_pending_handoffs``, calls ``claim_handoff`` to
+flip pending → running, and finishes with ``complete_handoff`` or
+``fail_handoff``.
+"""
+
+from __future__ import annotations
+
+import time
+
+import pytest
+
+from hermes_state import SessionDB
+
+
+class TestHandoffStateDB:
+    """Test the handoff schema + helper methods on SessionDB."""
+
+    @pytest.fixture
+    def db(self, tmp_path, monkeypatch):
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(home))
+        return SessionDB(db_path=home / "state.db")
+
+    def _make_session(self, db, session_id, source="cli", title=None):
+        """Insert a session row directly for testing."""
+        def _do(conn):
+            conn.execute(
+                "INSERT OR IGNORE INTO sessions (id, source, title, started_at) "
+                "VALUES (?, ?, ?, ?)",
+                (session_id, source, title, time.time()),
+            )
+        db._execute_write(_do)
+
+    def test_columns_exist(self, db):
+        db._conn.execute(
+            "SELECT handoff_state, handoff_platform, handoff_error "
+            "FROM sessions LIMIT 0"
+        )
+
+    def test_request_handoff_marks_pending(self, db):
+        sid = "sess-1"
+        self._make_session(db, sid)
+
+        assert db.request_handoff(sid, "telegram") is True
+
+        state = db.get_handoff_state(sid)
+        assert state == {
+            "state": "pending",
+            "platform": "telegram",
+            "error": None,
+        }
+
+    def test_request_handoff_rejects_in_flight(self, db):
+        sid = "sess-2"
+        self._make_session(db, sid)
+
+        assert db.request_handoff(sid, "telegram") is True
+        # Still pending → reject re-request
+        assert db.request_handoff(sid, "discord") is False
+
+        # And after gateway claims it (running) → still rejected
+        assert db.claim_handoff(sid) is True
+        assert db.request_handoff(sid, "discord") is False
+
+    def test_request_handoff_after_terminal_state_resets_error(self, db):
+        sid = "sess-3"
+        self._make_session(db, sid)
+        db.request_handoff(sid, "telegram")
+        db.claim_handoff(sid)
+        db.fail_handoff(sid, "earlier failure")
+
+        # User retries — should be allowed and clear the prior error.
+        assert db.request_handoff(sid, "discord") is True
+        state = db.get_handoff_state(sid)
+        assert state["state"] == "pending"
+        assert state["platform"] == "discord"
+        assert state["error"] is None
+
+    def test_list_pending_handoffs_excludes_running_and_terminal(self, db):
+        a, b, c, d = "sess-a", "sess-b", "sess-c", "sess-d"
+        for sid in (a, b, c, d):
+            self._make_session(db, sid)
+
+        db.request_handoff(a, "telegram")
+        db.request_handoff(b, "discord")
+        db.request_handoff(c, "telegram")
+        db.claim_handoff(c)  # c is now running, not pending
+        db.request_handoff(d, "slack")
+        db.claim_handoff(d)
+        db.complete_handoff(d)  # d is terminal
+
+        pending = db.list_pending_handoffs()
+        ids = [r["id"] for r in pending]
+        assert set(ids) == {a, b}
+
+    def test_claim_handoff_is_atomic(self, db):
+        sid = "sess-claim"
+        self._make_session(db, sid)
+        db.request_handoff(sid, "telegram")
+
+        # First claim wins
+        assert db.claim_handoff(sid) is True
+        # Second claim is a no-op (state is now "running", not "pending")
+        assert db.claim_handoff(sid) is False
+        assert db.get_handoff_state(sid)["state"] == "running"
+
+    def test_complete_handoff_clears_error(self, db):
+        sid = "sess-complete"
+        self._make_session(db, sid)
+        db.request_handoff(sid, "telegram")
+        db.claim_handoff(sid)
+        db.fail_handoff(sid, "transient")
+        # User retries; mock the watcher path
+        db.request_handoff(sid, "telegram")
+        db.claim_handoff(sid)
+        db.complete_handoff(sid)
+
+        state = db.get_handoff_state(sid)
+        assert state["state"] == "completed"
+        assert state["error"] is None
+
+    def test_fail_handoff_records_reason(self, db):
+        sid = "sess-fail"
+        self._make_session(db, sid)
+        db.request_handoff(sid, "telegram")
+        db.claim_handoff(sid)
+        db.fail_handoff(sid, "no home channel for telegram")
+
+        state = db.get_handoff_state(sid)
+        assert state["state"] == "failed"
+        assert state["error"] == "no home channel for telegram"
+
+    def test_fail_handoff_truncates_long_reasons(self, db):
+        sid = "sess-fail-long"
+        self._make_session(db, sid)
+        db.request_handoff(sid, "telegram")
+        db.claim_handoff(sid)
+
+        # 1000-character error string
+        big_err = "x" * 1000
+        db.fail_handoff(sid, big_err)
+
+        state = db.get_handoff_state(sid)
+        assert len(state["error"]) <= 500
+
+    def test_get_handoff_state_for_unknown_session(self, db):
+        assert db.get_handoff_state("does-not-exist") is None
+
+    def test_full_pending_to_completed_flow(self, db):
+        """End-to-end sequence the CLI + gateway watcher follow."""
+        sid = "sess-flow"
+        self._make_session(db, sid, title="my session")
+        db.append_message(sid, "user", "Hello")
+        db.append_message(sid, "assistant", "Hi there!")
+
+        # CLI: request handoff
+        assert db.request_handoff(sid, "telegram") is True
+        assert db.get_handoff_state(sid)["state"] == "pending"
+
+        # Gateway watcher: discover + claim
+        pending = db.list_pending_handoffs()
+        assert len(pending) == 1
+        assert pending[0]["id"] == sid
+        assert db.claim_handoff(sid) is True
+        assert db.get_handoff_state(sid)["state"] == "running"
+
+        # Gateway uses get_messages to load the transcript (real flow uses
+        # session_store.switch_session which reads the same table).
+        messages = db.get_messages(sid)
+        assert [m["role"] for m in messages] == ["user", "assistant"]
+
+        # Gateway: mark completed
+        db.complete_handoff(sid)
+        assert db.get_handoff_state(sid)["state"] == "completed"
+        assert db.list_pending_handoffs() == []
+
+
+class TestHandoffCommandRegistration:
+    """Slash-command surface checks."""
+
+    def test_command_registered(self):
+        from hermes_cli.commands import resolve_command
+        cmd = resolve_command("handoff")
+        assert cmd is not None
+        assert cmd.name == "handoff"
+        assert cmd.category == "Session"
+
+    def test_command_is_cli_only(self):
+        """`/handoff` is initiated from the CLI; gateway shouldn't expose it."""
+        from hermes_cli.commands import resolve_command, GATEWAY_KNOWN_COMMANDS
+        cmd = resolve_command("handoff")
+        assert cmd is not None
+        assert cmd.cli_only is True
+        assert "handoff" not in GATEWAY_KNOWN_COMMANDS
--- a/tests/hermes_cli/test_setup.py
+++ b/tests/hermes_cli/test_setup.py
@ -613,3 +613,35 @@ def test_offer_launch_chat_falls_back_to_module(monkeypatch):
        setup_mod._offer_launch_chat()

    assert exec_calls == [(sys.executable, [sys.executable, "-m", "hermes_cli.main", "chat"])]
+
+
+def test_setup_slack_saves_home_channel(monkeypatch):
+    """_setup_slack() saves SLACK_HOME_CHANNEL when the user provides one."""
+    saved = {}
+    prompts = iter(["xoxb-test-token", "xapp-test-token", "", "C01ABC2DE3F"])
+
+    monkeypatch.setattr(setup_mod, "get_env_value", lambda key: "")
+    monkeypatch.setattr(setup_mod, "save_env_value", lambda k, v: saved.update({k: v}))
+    monkeypatch.setattr(setup_mod, "prompt", lambda *_a, **_kw: next(prompts))
+    monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *_a, **_kw: False)
+    monkeypatch.setattr(setup_mod, "_write_slack_manifest_and_instruct", lambda: None)
+
+    setup_mod._setup_slack()
+
+    assert saved.get("SLACK_HOME_CHANNEL") == "C01ABC2DE3F"
+
+
+def test_setup_slack_home_channel_empty_not_saved(monkeypatch):
+    """_setup_slack() does not save SLACK_HOME_CHANNEL when left blank."""
+    saved = {}
+    prompts = iter(["xoxb-test-token", "xapp-test-token", "", ""])
+
+    monkeypatch.setattr(setup_mod, "get_env_value", lambda key: "")
+    monkeypatch.setattr(setup_mod, "save_env_value", lambda k, v: saved.update({k: v}))
+    monkeypatch.setattr(setup_mod, "prompt", lambda *_a, **_kw: next(prompts))
+    monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *_a, **_kw: False)
+    monkeypatch.setattr(setup_mod, "_write_slack_manifest_and_instruct", lambda: None)
+
+    setup_mod._setup_slack()
+
+    assert "SLACK_HOME_CHANNEL" not in saved
--- a/tests/hermes_cli/test_setup_agent_settings.py
+++ b/tests/hermes_cli/test_setup_agent_settings.py
@ -4,11 +4,16 @@ from hermes_cli.setup import setup_agent_settings


 def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monkeypatch, capsys):
-    """The helper text should match the value shown in the prompt."""
+    """The helper text should match the value shown in the prompt.
+
+    After PR#18413 max_turns is read exclusively from config.yaml — the
+    .env `HERMES_MAX_ITERATIONS` fallback was removed because it was
+    shadowing the user's current config (see the 60-vs-500 incident).
+    """
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))

    config = {
-        "agent": {"max_turns": 90},
+        "agent": {"max_turns": 60},
        "display": {"tool_progress": "all"},
        "compression": {"threshold": 0.50},
        "session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
@ -16,10 +21,10 @@ def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monk

    prompt_answers = iter(["60", "all", "0.5"])

-    monkeypatch.setattr("hermes_cli.setup.get_env_value", lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "")
    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
    monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
+    monkeypatch.setattr("hermes_cli.setup.remove_env_value", lambda *args, **kwargs: None)
    monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)

    setup_agent_settings(config)
@ -27,3 +32,47 @@ def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monk
    out = capsys.readouterr().out
    assert "Press Enter to keep 60." in out
    assert "Default is 90" not in out
+
+
+def test_setup_agent_settings_prefers_config_over_stale_env(tmp_path, monkeypatch, capsys):
+    """Config.yaml wins even when a stale .env value disagrees.
+
+    Regression guard for the bug where `.env HERMES_MAX_ITERATIONS=60`
+    from an old `hermes setup` run shadowed `agent.max_turns: 500` in
+    config.yaml. The wizard must now display the config value.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    config = {
+        "agent": {"max_turns": 500},  # user bumped this in config.yaml
+        "display": {"tool_progress": "all"},
+        "compression": {"threshold": 0.50},
+        "session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
+    }
+
+    prompt_answers = iter(["500", "all", "0.5"])
+
+    # Simulate stale .env value — the wizard must ignore this.
+    monkeypatch.setattr(
+        "hermes_cli.setup.get_env_value",
+        lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "",
+    )
+    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
+    monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
+
+    removed_keys: list[str] = []
+    monkeypatch.setattr(
+        "hermes_cli.setup.remove_env_value",
+        lambda key: (removed_keys.append(key), True)[1],
+    )
+    monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
+
+    setup_agent_settings(config)
+
+    out = capsys.readouterr().out
+    # Config value wins
+    assert "Press Enter to keep 500." in out
+    assert "Press Enter to keep 60." not in out
+    # And the stale .env entry gets cleaned up
+    assert "HERMES_MAX_ITERATIONS" in removed_keys
--- a/tests/hermes_cli/test_setup_prompt_menus.py
+++ b/tests/hermes_cli/test_setup_prompt_menus.py
@ -1,6 +1,28 @@
 from hermes_cli import setup as setup_mod


+def test_prompt_strips_bracketed_paste_markers(monkeypatch):
+    monkeypatch.setattr(
+        "builtins.input",
+        lambda _prompt="": "\x1b[200~sk-ant-api-key\x1b[201~",
+    )
+
+    value = setup_mod.prompt("API key")
+
+    assert value == "sk-ant-api-key"
+
+
+def test_password_prompt_strips_bracketed_paste_markers(monkeypatch):
+    monkeypatch.setattr(
+        "getpass.getpass",
+        lambda _prompt="": "\x1b[200~secret-token\x1b[201~",
+    )
+
+    value = setup_mod.prompt("API key", password=True)
+
+    assert value == "secret-token"
+
+
 def test_prompt_choice_uses_curses_helper(monkeypatch):
    monkeypatch.setattr(setup_mod, "_curses_prompt_choice", lambda question, choices, default=0, description=None: 1)

--- a/tests/hermes_cli/test_slack_cli.py
+++ b/tests/hermes_cli/test_slack_cli.py
@ -0,0 +1,30 @@
+"""Tests for Slack CLI helpers."""
+
+from hermes_cli.slack_cli import _build_full_manifest
+
+
+class TestSlackFullManifest:
+    """Generated full Slack app manifest used by `hermes slack manifest`."""
+
+    def test_app_home_messages_are_writable(self):
+        manifest = _build_full_manifest("Hermes", "Your Hermes agent on Slack")
+
+        assert manifest["features"]["app_home"] == {
+            "home_tab_enabled": False,
+            "messages_tab_enabled": True,
+            "messages_tab_read_only_enabled": False,
+        }
+
+    def test_private_channel_directory_scope_is_included(self):
+        manifest = _build_full_manifest("Hermes", "Your Hermes agent on Slack")
+
+        bot_scopes = manifest["oauth_config"]["scopes"]["bot"]
+        assert "groups:read" in bot_scopes
+
+    def test_assistant_features_remain_enabled(self):
+        manifest = _build_full_manifest("Hermes", "Your Hermes agent on Slack")
+
+        assert "assistant_view" in manifest["features"]
+        assert "assistant:write" in manifest["oauth_config"]["scopes"]["bot"]
+        bot_events = manifest["settings"]["event_subscriptions"]["bot_events"]
+        assert "assistant_thread_started" in bot_events
--- a/tests/hermes_cli/test_spotify_auth.py
+++ b/tests/hermes_cli/test_spotify_auth.py
@ -88,6 +88,51 @@ def test_auth_spotify_status_command_reports_logged_in(capsys, monkeypatch: pyte
    assert "client_id: spotify-client" in output


+def test_spotify_logout_does_not_reset_model_provider(
+    tmp_path,
+    monkeypatch: pytest.MonkeyPatch,
+    capsys,
+) -> None:
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    config_path = tmp_path / "config.yaml"
+    config_path.write_text(
+        "model:\n"
+        "  default: gemini-3-flash\n"
+        "  provider: custom:local\n"
+        "  base_url: http://localhost:11434/v1\n"
+        "  api_key: ${LOCAL_API_KEY}\n",
+        encoding="utf-8",
+    )
+
+    with auth_mod._auth_store_lock():
+        store = auth_mod._load_auth_store()
+        auth_mod._store_provider_state(
+            store,
+            "spotify",
+            {
+                "client_id": "spotify-client",
+                "access_token": "access-token",
+                "refresh_token": "refresh-token",
+                "expires_at": "2099-01-01T00:00:00+00:00",
+            },
+            set_active=False,
+        )
+        auth_mod._save_auth_store(store)
+
+    auth_mod.logout_command(SimpleNamespace(provider="spotify"))
+
+    output = capsys.readouterr().out
+    assert "Logged out of Spotify." in output
+    assert "Model provider configuration was unchanged." in output
+    assert auth_mod.get_provider_auth_state("spotify") is None
+    assert config_path.read_text(encoding="utf-8") == (
+        "model:\n"
+        "  default: gemini-3-flash\n"
+        "  provider: custom:local\n"
+        "  base_url: http://localhost:11434/v1\n"
+        "  api_key: ${LOCAL_API_KEY}\n"
+    )
+

 def test_spotify_interactive_setup_persists_client_id(
    tmp_path,
--- a/tests/hermes_cli/test_startup_plugin_gating.py
+++ b/tests/hermes_cli/test_startup_plugin_gating.py
@ -0,0 +1,180 @@
+"""Guards for CLI startup performance regression.
+
+``hermes_cli.main`` skips eager plugin discovery at argparse-setup time
+when the invocation is clearly targeting a known built-in subcommand.
+This saves 500-650ms on ``hermes --help``, ``hermes version``,
+``hermes logs``, etc., by not importing ``google.cloud.pubsub_v1``,
+``aiohttp``, ``grpc``, and friends.
+
+Two invariants:
+
+1. ``_BUILTIN_SUBCOMMANDS`` must contain every subcommand that is actually
+   registered by ``main()``.  If an entry is missing, plugin discovery
+   runs unnecessarily for that command (correctness-safe, just slow).
+   If an entry is PRESENT but the subcommand doesn't exist, a plugin
+   could shadow the name — also bad.
+
+2. ``_plugin_cli_discovery_needed()`` returns the right answer for the
+   flag/positional parsing cases it's meant to handle.
+"""
+
+from __future__ import annotations
+
+import io
+import re
+import sys
+from contextlib import redirect_stdout
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.main import (
+    _BUILTIN_SUBCOMMANDS,
+    _first_positional_argv,
+    _plugin_cli_discovery_needed,
+)
+
+
+# ── helper: grab the live set of top-level subcommands from argparse ───────
+
+
+def _live_subcommand_names() -> set[str]:
+    """Run ``hermes --help`` in-process and parse the subcommand block.
+
+    We patch ``_plugin_cli_discovery_needed`` to always return False so
+    plugin-registered commands aren't included — we're validating the
+    built-in-only set.
+    """
+    from hermes_cli import main as _main
+
+    argv_backup = sys.argv[:]
+    sys.argv = ["hermes", "--help"]
+    buf = io.StringIO()
+    try:
+        with patch.object(_main, "_plugin_cli_discovery_needed", return_value=False):
+            with redirect_stdout(buf):
+                with pytest.raises(SystemExit):
+                    _main.main()
+    finally:
+        sys.argv = argv_backup
+
+    text = buf.getvalue()
+    # argparse prints "{chat,model,...}" somewhere in the help output
+    m = re.search(r"\{([a-zA-Z0-9_,\-]+)\}", text)
+    assert m, f"Could not find subcommand group in --help output:\n{text[:500]}"
+    return set(m.group(1).split(","))
+
+
+# ── _first_positional_argv ─────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize(
+    "argv,expected",
+    [
+        (["hermes"], None),
+        (["hermes", "--help"], None),
+        (["hermes", "-h"], None),
+        (["hermes", "--version"], None),
+        (["hermes", "-w"], None),
+        # -p / --profile is stripped from sys.argv by
+        # _apply_profile_override() at import time, so it never reaches
+        # _first_positional_argv. We test with just -w / --tui here.
+        (["hermes", "-w", "--tui"], None),
+        (["hermes", "version"], "version"),
+        (["hermes", "--tui", "chat"], "chat"),
+        (["hermes", "-w", "logs"], "logs"),
+        (["hermes", "chat", "hello world"], "chat"),
+        (["hermes", "gateway", "run"], "gateway"),
+        # Top-level value-taking flags: the value should be skipped.
+        (["hermes", "-m", "gpt5", "chat"], "chat"),
+        (["hermes", "--model", "gpt5", "chat", "hi"], "chat"),
+        (["hermes", "-m", "gpt5", "--provider", "openai", "chat"], "chat"),
+        (["hermes", "-z", "hello world"], None),
+        (["hermes", "-z", "hello", "chat"], "chat"),
+        (["hermes", "--model=gpt5", "chat"], "chat"),     # inline form
+        (["hermes", "--", "chat"], "chat"),               # -- terminator
+        (["hermes", "-w", "--"], None),
+        # Unknown positional after skipped flags → plugin-cmd candidate.
+        (["hermes", "some-plugin-cmd"], "some-plugin-cmd"),
+        (["hermes", "-m", "gpt5", "some-plugin-cmd"], "some-plugin-cmd"),
+    ],
+)
+def test_first_positional_argv(argv, expected):
+    with patch.object(sys, "argv", argv):
+        assert _first_positional_argv() == expected
+
+
+# ── _plugin_cli_discovery_needed ───────────────────────────────────────────
+
+
+@pytest.mark.parametrize(
+    "argv",
+    [
+        ["hermes"],                          # bare → chat
+        ["hermes", "--help"],                # top-level help
+        ["hermes", "-h"],
+        ["hermes", "version"],               # known built-in
+        ["hermes", "logs"],
+        ["hermes", "gateway", "run"],
+        ["hermes", "--tui"],
+        ["hermes", "-w", "--tui"],
+        ["hermes", "chat", "hi"],
+        ["hermes", "help"],                  # accepted built-in-ish
+        ["hermes", "-m", "gpt5", "chat"],    # flag-value-skipping
+    ],
+)
+def test_discovery_skipped_for_builtins(argv):
+    with patch.object(sys, "argv", argv):
+        assert _plugin_cli_discovery_needed() is False
+
+
+@pytest.mark.parametrize(
+    "argv",
+    [
+        ["hermes", "meet", "join"],          # potential google_meet plugin
+        ["hermes", "honcho", "status"],      # potential memory plugin
+        ["hermes", "unknown-subcmd"],
+    ],
+)
+def test_discovery_runs_for_unknown_positional(argv):
+    with patch.object(sys, "argv", argv):
+        assert _plugin_cli_discovery_needed() is True
+
+
+# ── _BUILTIN_SUBCOMMANDS ↔ argparse registration parity ────────────────────
+
+
+def test_builtin_set_covers_every_registered_subcommand():
+    """Every subcommand registered in main() must appear in the set.
+
+    Missing entries cause a slow-path regression (correctness stays
+    fine — discovery just runs unnecessarily).
+    """
+    live = _live_subcommand_names()
+    # "help" is synthetic — an argparse-implicit convenience we include
+    # in the set so ``hermes help <cmd>`` skips discovery; it won't show
+    # up as a subparser in the --help output.
+    declared = _BUILTIN_SUBCOMMANDS - {"help"}
+    missing_from_declaration = live - declared
+    assert not missing_from_declaration, (
+        f"_BUILTIN_SUBCOMMANDS is missing these live subcommands: "
+        f"{sorted(missing_from_declaration)}. Add them to "
+        f"hermes_cli/main.py::_BUILTIN_SUBCOMMANDS so plugin discovery "
+        f"can be skipped when the user targets them."
+    )
+
+
+def test_builtin_set_has_no_phantom_entries():
+    """No entry in the set should refer to a subcommand that no longer exists.
+
+    A phantom entry means plugin discovery gets incorrectly skipped for
+    a name that — if a plugin actually registered it — would fail to
+    parse. Keeps the set honest.
+    """
+    live = _live_subcommand_names()
+    allowed_synthetic = {"help"}
+    phantom = _BUILTIN_SUBCOMMANDS - live - allowed_synthetic
+    assert not phantom, (
+        f"_BUILTIN_SUBCOMMANDS has entries that are not registered as "
+        f"top-level subparsers: {sorted(phantom)}"
+    )
--- a/tests/hermes_cli/test_suppress_eio_on_interrupt.py
+++ b/tests/hermes_cli/test_suppress_eio_on_interrupt.py
@ -113,3 +113,123 @@ class TestOuterExceptEIO:
        assert not (getattr(exc, "errno", None) == errno.EIO)
        assert "is not registered" not in str(exc)
        assert "Bad file descriptor" not in str(exc)
+
+
+# ---------------------------------------------------------------------------
+# Signal handler – guarded logger.debug (#13710 regression)
+# ---------------------------------------------------------------------------
+#
+# CPython's logging module is not reentrant-safe.  ``Logger.isEnabledFor``
+# caches level results in ``Logger._cache``; under shutdown races the cache
+# can be cleared (``Logger._clear_cache``) or mid-mutation when the signal
+# fires, raising ``KeyError: <level_int>`` (e.g. ``KeyError: 10`` for DEBUG)
+# from inside the handler.  If that KeyError escapes, it bypasses the
+# ``raise KeyboardInterrupt()`` on the next line, which in turn bypasses
+# prompt_toolkit's normal interrupt unwind and surfaces as the EIO cascade
+# from #13710.
+#
+# The fix: wrap the ``logger.debug`` call in the signal handler in a bare
+# ``try/except Exception: pass`` so logging can never raise through it.
+#
+# These tests verify the contract: the handler must raise KeyboardInterrupt
+# (and nothing else) regardless of whether logger.debug succeeds or blows up.
+
+
+def _make_signal_handler(logger, agent_state):
+    """Build a standalone copy of ``_signal_handler``.
+
+    The real handler is defined as a closure inside ``CLI._run_interactive``;
+    we reconstruct an equivalent here so the unit tests don't need a full
+    CLI instance.  Mirrors cli.py:_signal_handler as of #13710 regression
+    fix — guarded logger.debug + agent interrupt + KeyboardInterrupt.
+    """
+    def _signal_handler(signum, frame):
+        # Guarded: logging must never raise through a signal handler.
+        try:
+            logger.debug("Received signal %s, triggering graceful shutdown", signum)
+        except Exception:
+            pass  # never let logging raise from a signal handler (#13710 regression)
+        try:
+            if agent_state.get("agent") and agent_state.get("running"):
+                agent_state["agent"].interrupt(f"received signal {signum}")
+        except Exception:
+            pass  # never block signal handling
+        raise KeyboardInterrupt()
+    return _signal_handler
+
+
+class TestSignalHandlerLoggingRace:
+    """#13710 regression — logger.debug in signal handler must not escape.
+
+    If the DEBUG-level ``logging._cache`` lookup races with a concurrent
+    ``_clear_cache`` (e.g. from another thread reconfiguring logging during
+    shutdown), ``logger.debug`` can raise ``KeyError: 10``.  The signal
+    handler must swallow that and still raise KeyboardInterrupt.
+    """
+
+    def test_keyboard_interrupt_raised_on_normal_path(self):
+        """Sanity: handler raises KeyboardInterrupt when logging works."""
+        logger = MagicMock()
+        handler = _make_signal_handler(logger, {})
+        with pytest.raises(KeyboardInterrupt):
+            handler(15, None)  # SIGTERM
+        logger.debug.assert_called_once()
+
+    def test_keyboard_interrupt_raised_when_logger_raises_keyerror(self):
+        """logger.debug raising KeyError(10) must not escape — KeyboardInterrupt wins.
+
+        This is the exact failure signature from the #13710 regression: the
+        CPython 3.11 ``Logger._cache[level]`` race surfaces as KeyError on
+        the integer level value, and previously propagated out of the
+        signal handler before the ``raise KeyboardInterrupt()`` could fire.
+        """
+        logger = MagicMock()
+        logger.debug.side_effect = KeyError(10)  # DEBUG level int
+        handler = _make_signal_handler(logger, {})
+        # Must still raise KeyboardInterrupt, NOT KeyError.
+        with pytest.raises(KeyboardInterrupt):
+            handler(15, None)
+
+    def test_keyboard_interrupt_raised_when_logger_raises_generic(self):
+        """Any Exception from logger.debug must be swallowed by the guard."""
+        logger = MagicMock()
+        logger.debug.side_effect = RuntimeError("logging is shutting down")
+        handler = _make_signal_handler(logger, {})
+        with pytest.raises(KeyboardInterrupt):
+            handler(15, None)
+
+    def test_agent_interrupt_still_fires_when_logger_raises(self):
+        """Even if logger.debug blows up, the agent interrupt must still run.
+
+        The whole point of the grace window is cleaning up the agent's
+        subprocess group.  A logging race must not skip that step.
+        """
+        logger = MagicMock()
+        logger.debug.side_effect = KeyError(10)
+        agent = MagicMock()
+        handler = _make_signal_handler(logger, {"agent": agent, "running": True})
+        with pytest.raises(KeyboardInterrupt):
+            handler(15, None)
+        agent.interrupt.assert_called_once_with("received signal 15")
+
+    def test_agent_interrupt_failure_also_does_not_escape(self):
+        """Defense-in-depth: agent.interrupt() raising must not escape either."""
+        logger = MagicMock()
+        agent = MagicMock()
+        agent.interrupt.side_effect = RuntimeError("agent already torn down")
+        handler = _make_signal_handler(logger, {"agent": agent, "running": True})
+        with pytest.raises(KeyboardInterrupt):
+            handler(15, None)
+
+    def test_base_exception_from_logger_is_not_swallowed(self):
+        """BaseException (e.g. SystemExit) must still propagate — only Exception is caught.
+
+        The guard uses ``except Exception`` deliberately; BaseException
+        subclasses like SystemExit or a nested KeyboardInterrupt should
+        still be honored so we don't mask real shutdown signals.
+        """
+        logger = MagicMock()
+        logger.debug.side_effect = SystemExit(1)
+        handler = _make_signal_handler(logger, {})
+        with pytest.raises(SystemExit):
+            handler(15, None)
--- a/tests/hermes_cli/test_teams_pipeline_plugin_cli.py
+++ b/tests/hermes_cli/test_teams_pipeline_plugin_cli.py
@ -0,0 +1,214 @@
+"""Tests for the teams_pipeline plugin CLI."""
+
+from __future__ import annotations
+
+import json
+from argparse import ArgumentParser, Namespace
+from types import SimpleNamespace
+
+import pytest
+
+from plugins.teams_pipeline.cli import register_cli, teams_pipeline_command
+from plugins.teams_pipeline.store import TeamsPipelineStore
+
+
+@pytest.fixture(autouse=True)
+def _isolate(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+
+def _make_args(**kwargs):
+    defaults = {
+        "teams_pipeline_action": None,
+        "store_path": "",
+        "status": "",
+        "limit": 20,
+        "job_id": "",
+        "meeting_id": "",
+        "join_web_url": "",
+        "tenant_id": "",
+        "call_record_id": "",
+        "resource": "",
+        "notification_url": "",
+        "change_type": "updated",
+        "expiration": "",
+        "client_state": "",
+        "lifecycle_notification_url": "",
+        "latest_supported_tls_version": "v1_2",
+        "subscription_id": "",
+        "force_refresh": False,
+        "renew_within_hours": 24,
+        "extend_hours": 24,
+        "dry_run": False,
+    }
+    defaults.update(kwargs)
+    return Namespace(**defaults)
+
+
+def test_register_cli_builds_tree():
+    parser = ArgumentParser()
+    register_cli(parser)
+    args = parser.parse_args(["list"])
+    assert args.teams_pipeline_action == "list"
+
+
+def test_list_prints_recent_jobs(capsys, tmp_path):
+    store = TeamsPipelineStore(tmp_path / "teams_pipeline_store.json")
+    store.upsert_job(
+        "job-1",
+        {
+            "event_id": "evt-1",
+            "source_event_type": "updated",
+            "dedupe_key": "evt-1",
+            "status": "completed",
+            "meeting_ref": {"meeting_id": "meeting-1"},
+        },
+    )
+
+    teams_pipeline_command(
+        _make_args(
+            teams_pipeline_action="list",
+            store_path=str(tmp_path / "teams_pipeline_store.json"),
+        )
+    )
+    out = capsys.readouterr().out
+    assert "job-1" in out
+    assert "meeting-1" in out
+
+
+def test_show_prints_job_json(capsys, tmp_path):
+    store = TeamsPipelineStore(tmp_path / "teams_pipeline_store.json")
+    store.upsert_job(
+        "job-1",
+        {
+            "event_id": "evt-1",
+            "source_event_type": "updated",
+            "dedupe_key": "evt-1",
+            "status": "completed",
+            "meeting_ref": {"meeting_id": "meeting-1"},
+        },
+    )
+
+    teams_pipeline_command(
+        _make_args(
+            teams_pipeline_action="show",
+            job_id="job-1",
+            store_path=str(tmp_path / "teams_pipeline_store.json"),
+        )
+    )
+    out = capsys.readouterr().out
+    payload = json.loads(out)
+    assert payload["job_id"] == "job-1"
+    assert payload["meeting_ref"]["meeting_id"] == "meeting-1"
+
+
+def test_fetch_requires_meeting_identifier(capsys):
+    teams_pipeline_command(_make_args(teams_pipeline_action="fetch"))
+    out = capsys.readouterr().out
+    assert "meeting_id or join_web_url is required" in out
+
+
+def test_subscriptions_lists_graph_subscriptions(monkeypatch, capsys):
+    class FakeClient:
+        async def collect_paginated(self, path):
+            assert path == "/subscriptions"
+            return [
+                {
+                    "id": "sub-1",
+                    "resource": "communications/onlineMeetings/getAllTranscripts",
+                    "changeType": "updated",
+                    "expirationDateTime": "2026-05-05T00:00:00Z",
+                }
+            ]
+
+    monkeypatch.setattr("plugins.teams_pipeline.cli.build_graph_client", lambda: FakeClient())
+    teams_pipeline_command(_make_args(teams_pipeline_action="subscriptions"))
+    out = capsys.readouterr().out
+    assert "sub-1" in out
+    assert "getAllTranscripts" in out
+
+
+def test_subscribe_defaults_to_created_for_transcript_resources(monkeypatch, capsys):
+    captured = {}
+
+    class FakeClient:
+        async def post_json(self, path, json_body=None, headers=None):
+            captured["path"] = path
+            captured["json_body"] = json_body
+            return {
+                "id": "sub-transcript",
+                "resource": json_body["resource"],
+                "changeType": json_body["changeType"],
+                "notificationUrl": json_body["notificationUrl"],
+                "expirationDateTime": json_body["expirationDateTime"],
+            }
+
+    monkeypatch.setattr("plugins.teams_pipeline.cli.build_graph_client", lambda: FakeClient())
+    teams_pipeline_command(
+        _make_args(
+            teams_pipeline_action="subscribe",
+            resource="communications/onlineMeetings/getAllTranscripts",
+            notification_url="https://example.com/webhooks/msgraph",
+            change_type="",
+        )
+    )
+    payload = json.loads(capsys.readouterr().out)
+    assert captured["path"] == "/subscriptions"
+    assert captured["json_body"]["changeType"] == "created"
+    assert payload["changeType"] == "created"
+
+
+def test_token_health_force_refresh(monkeypatch, capsys):
+    class FakeProvider:
+        def inspect_token_health(self):
+            return {"configured": True, "cache_state": "warm"}
+
+        async def get_access_token(self, force_refresh=False):
+            assert force_refresh is True
+            return "token-123"
+
+    monkeypatch.setattr(
+        "plugins.teams_pipeline.cli.MicrosoftGraphTokenProvider",
+        SimpleNamespace(from_env=lambda: FakeProvider()),
+    )
+    teams_pipeline_command(_make_args(teams_pipeline_action="token-health", force_refresh=True))
+    payload = json.loads(capsys.readouterr().out)
+    assert payload["configured"] is True
+    assert payload["last_refresh_succeeded"] is True
+    assert payload["access_token_length"] == len("token-123")
+
+
+def test_validate_accepts_msgraph_credentials_for_graph_delivery(monkeypatch, capsys, tmp_path):
+    from gateway.config import Platform, PlatformConfig
+
+    monkeypatch.setenv("MSGRAPH_TENANT_ID", "tenant")
+    monkeypatch.setenv("MSGRAPH_CLIENT_ID", "client")
+    monkeypatch.setenv("MSGRAPH_CLIENT_SECRET", "secret")
+
+    gateway_config = SimpleNamespace(
+        platforms={
+            Platform.MSGRAPH_WEBHOOK: PlatformConfig(enabled=True, extra={}),
+            Platform("teams"): PlatformConfig(
+                enabled=True,
+                extra={
+                    "delivery_mode": "graph",
+                    "team_id": "team-1",
+                    "channel_id": "channel-1",
+                },
+            ),
+        }
+    )
+    monkeypatch.setattr(
+        "plugins.teams_pipeline.cli.load_gateway_config",
+        lambda: gateway_config,
+    )
+
+    teams_pipeline_command(
+        _make_args(
+            teams_pipeline_action="validate",
+            store_path=str(tmp_path / "teams_pipeline_store.json"),
+        )
+    )
+    payload = json.loads(capsys.readouterr().out)
+    assert payload["ok"] is True
+    assert payload["issues"] == []
--- a/tests/hermes_cli/test_tencent_tokenhub_provider.py
+++ b/tests/hermes_cli/test_tencent_tokenhub_provider.py
@ -192,13 +192,19 @@ class TestTencentTokenhubCanonicalProvider:


 class TestTencentInOpenRouterAndNous:
-    """tencent/hy3-preview:free should appear in OpenRouter and Nous curated lists."""
+    """tencent/hy3-preview:free and tencent/hy3-preview should appear in OpenRouter and Nous curated lists."""

    def test_in_openrouter_fallback(self):
        from hermes_cli.models import OPENROUTER_MODELS
        ids = [mid for mid, _ in OPENROUTER_MODELS]
        assert "tencent/hy3-preview:free" in ids

+    def test_paid_in_openrouter_fallback(self):
+        """tencent/hy3-preview (paid, no :free suffix) should also be in OpenRouter list."""
+        from hermes_cli.models import OPENROUTER_MODELS
+        ids = [mid for mid, _ in OPENROUTER_MODELS]
+        assert "tencent/hy3-preview" in ids
+
    def test_in_nous_provider_models(self):
        from hermes_cli.models import _PROVIDER_MODELS
        assert "tencent/hy3-preview" in _PROVIDER_MODELS["nous"]
@ -298,12 +304,20 @@ class TestTencentTokenhubURLMapping:


 class TestTencentTokenhubContextLength:
-    """hy3-preview context length is registered."""
+    """hy3-preview has a context-length entry registered.

-    def test_hy3_preview_context_length(self):
+    Asserting the relationship (registered + ≥ 4096) instead of a
+    specific value, per AGENTS.md "Don't write change-detector tests".
+    The previous version of this class pinned an exact integer that
+    broke whenever Tencent / OpenRouter bumped the published context
+    window (#22268).
+    """
+
+    def test_hy3_preview_has_registered_context_length(self):
        from agent.model_metadata import get_model_context_length
        ctx = get_model_context_length("hy3-preview")
-        assert ctx == 256000
+        assert isinstance(ctx, int)
+        assert ctx >= 4096, f"hy3-preview context length looks unset/wrong: {ctx}"


 # =============================================================================
@ -420,7 +434,7 @@ class TestTencentTokenhubCLIDispatch:


 class TestTencentTokenhubModelCatalogJSON:
-    """Verify tencent/hy3-preview:free is present in the website model-catalog.json."""
+    """Verify tencent/hy3-preview:free and tencent/hy3-preview are present in the website model-catalog.json."""

    def test_in_model_catalog_json(self):
        catalog_path = os.path.join(
@ -445,6 +459,7 @@ class TestTencentTokenhubModelCatalogJSON:
                for model in provider_entry.get("models", []):
                    all_ids.add(model.get("id", ""))
        assert "tencent/hy3-preview:free" in all_ids
+        assert "tencent/hy3-preview" in all_ids


 # =============================================================================
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@ -2,12 +2,16 @@

 from unittest.mock import patch

+import pytest
+
 from hermes_cli.tools_config import (
    _DEFAULT_OFF_TOOLSETS,
    _apply_toolset_change,
    _configure_provider,
+    _reconfigure_provider,
    _get_platform_tools,
    _platform_toolset_summary,
+    _reconfigure_tool,
    _save_platform_tools,
    _toolset_has_keys,
    CONFIGURABLE_TOOLSETS,
@ -115,12 +119,79 @@ def test_get_platform_tools_homeassistant_toolset_off_for_cron_when_hass_token_m
    assert "homeassistant" not in cron_enabled


+def test_get_platform_tools_expands_composite_when_mixed_with_configurable():
+    """``[hermes-cli, spotify]`` (composite + configurable) must keep the full
+    ``hermes-cli`` toolset alongside the explicit Spotify opt-in. The
+    has_explicit_config branch used to drop ``hermes-cli`` on the floor,
+    leaving sessions with only ``{spotify, kanban}``."""
+    config = {"platform_toolsets": {"cli": ["hermes-cli", "spotify"]}}
+
+    enabled = _get_platform_tools(config, "cli", include_default_mcp_servers=False)
+
+    # Native tools must reappear.
+    for ts in ("terminal", "file", "web", "browser", "memory", "delegation",
+               "code_execution", "todo", "session_search", "skills"):
+        assert ts in enabled, f"{ts} should be enabled when hermes-cli is listed"
+    # User explicitly opted into Spotify — must survive _DEFAULT_OFF_TOOLSETS subtraction.
+    assert "spotify" in enabled
+
+
+def test_get_platform_tools_composite_only_unchanged():
+    """Composite-only config (no configurable in list) must still take the
+    else-branch path and produce the full toolset — guards against the new
+    code accidentally hijacking the composite-only case."""
+    composite_only = _get_platform_tools(
+        {"platform_toolsets": {"cli": ["hermes-cli"]}},
+        "cli",
+        include_default_mcp_servers=False,
+    )
+    default = _get_platform_tools({}, "cli", include_default_mcp_servers=False)
+
+    assert composite_only == default
+
+
+def test_get_platform_tools_configurable_only_no_expansion():
+    """Configurable-only list (no composite) must not pull in unrelated
+    toolsets — guards against the expansion firing when ``composite_tools``
+    is empty."""
+    config = {"platform_toolsets": {"cli": ["terminal", "file"]}}
+
+    enabled = _get_platform_tools(config, "cli", include_default_mcp_servers=False)
+
+    assert "terminal" in enabled
+    assert "file" in enabled
+    # Web shouldn't sneak in via the new expansion path.
+    assert "web" not in enabled
+
+
+def test_get_platform_tools_mixed_does_not_resurrect_default_off():
+    """Expansion must subtract _DEFAULT_OFF_TOOLSETS from the implicit
+    pull-in. Without this, ``hermes-cli`` expansion would re-enable
+    ``moa`` / ``rl`` / ``homeassistant`` for users who never opted in."""
+    config = {"platform_toolsets": {"cli": ["hermes-cli", "terminal"]}}
+
+    enabled = _get_platform_tools(config, "cli", include_default_mcp_servers=False)
+
+    assert "terminal" in enabled
+    assert "moa" not in enabled
+    assert "rl" not in enabled
+
+
 def test_get_platform_tools_preserves_explicit_empty_selection():
    config = {"platform_toolsets": {"cli": []}}

    enabled = _get_platform_tools(config, "cli")

-    assert enabled == set()
+    # An explicit empty list disables every CONFIGURABLE toolset (web,
+    # terminal, memory, …). Non-configurable platform toolsets that ride
+    # along on the platform's default composite (e.g. `kanban`, whose tools
+    # live in _HERMES_CORE_TOOLS but aren't user-toggleable) are still
+    # auto-recovered by _get_platform_tools so saving via `hermes tools`
+    # doesn't silently drop them. The contract this test guards is the
+    # configurable side: nothing the user could have checked in the TUI
+    # checklist should reappear here.
+    configurable = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
+    assert enabled.isdisjoint(configurable)


 def test_apply_toolset_change_from_default_does_not_enable_default_off_toolsets():
@ -459,6 +530,33 @@ def test_local_browser_provider_is_saved_explicitly(monkeypatch):
    assert config["browser"]["cloud_provider"] == "local"


+def test_reconfigure_lists_enabled_web_without_existing_provider_config(monkeypatch):
+    config = {"platform_toolsets": {"cli": ["web"]}}
+    seen = {}
+    configured = []
+
+    monkeypatch.setattr(
+        "hermes_cli.tools_config._toolset_has_keys",
+        lambda ts_key, config=None: False,
+    )
+
+    def fake_prompt_choice(question, choices, default=0):
+        seen["choices"] = choices
+        return 0
+
+    monkeypatch.setattr("hermes_cli.tools_config._prompt_choice", fake_prompt_choice)
+    monkeypatch.setattr(
+        "hermes_cli.tools_config._configure_tool_category_for_reconfig",
+        lambda ts_key, cat, config: configured.append(ts_key),
+    )
+    monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
+
+    _reconfigure_tool(config)
+
+    assert any("Web Search" in choice for choice in seen["choices"])
+    assert configured == ["web"]
+
+
 def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
    monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: True)
    monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
@ -861,3 +959,27 @@ def test_get_effective_configurable_toolsets_dedupes_bundled_plugins():
    assert len(spotify_rows) == 1, spotify_rows
    # Built-in label wins over the plugin label.
    assert spotify_rows[0][1] == "🎵 Spotify"
+
+
+@pytest.mark.parametrize("provider,config_key,expected", [
+    # managed provider → use_gateway True
+    ({"name": "T", "tts_provider": "elevenlabs", "managed_nous_feature": "tts", "env_vars": []}, "tts", True),
+    ({"name": "B", "browser_provider": "browserbase", "managed_nous_feature": "browser", "env_vars": []}, "browser", True),
+    ({"name": "W", "web_backend": "tavily", "managed_nous_feature": "web", "env_vars": []}, "web", True),
+    # self-hosted provider → use_gateway False
+    ({"name": "T", "tts_provider": "elevenlabs", "env_vars": []}, "tts", False),
+    ({"name": "B", "browser_provider": "browserbase", "env_vars": []}, "browser", False),
+    ({"name": "W", "web_backend": "tavily", "env_vars": []}, "web", False),
+])
+def test_reconfigure_provider_syncs_use_gateway(provider, config_key, expected):
+    config = {}
+    _reconfigure_provider(provider, config)
+    assert config[config_key]["use_gateway"] is expected
+
+
+def test_reconfigure_browser_provider_overwrites_stale_use_gateway():
+    # Switching from managed (use_gateway=True) to self-hosted must clear the stale flag.
+    config = {"browser": {"cloud_provider": "managed-browser", "use_gateway": True}}
+    provider = {"name": "Browserbase", "browser_provider": "browserbase", "env_vars": []}
+    _reconfigure_provider(provider, config)
+    assert config["browser"]["use_gateway"] is False
--- a/tests/hermes_cli/test_tui_npm_install.py
+++ b/tests/hermes_cli/test_tui_npm_install.py
@ -69,6 +69,39 @@ def test_no_install_when_only_optional_peer_package_missing_from_hidden_lock(tmp
    assert main_mod._tui_need_npm_install(tmp_path) is False


+def test_no_install_when_only_peer_annotation_differs(tmp_path: Path, main_mod) -> None:
+    """npm 9 drops the ``peer`` flag from the hidden lock on dev-deps that are
+    *also* declared as peers.  That's a cosmetic difference — the package is
+    installed at the requested version — so it must not trigger a reinstall.
+    Regression for the TUI-in-Docker failure where 16 such mismatches caused
+    `Installing TUI dependencies…` → EACCES on every launch.
+    """
+    _touch_ink(tmp_path)
+    (tmp_path / "package-lock.json").write_text(
+        '{"packages":{'
+        '"node_modules/foo":{"version":"1.0.0","dev":true,"peer":true,"resolved":"https://x/foo.tgz"}'
+        '}}'
+    )
+    (tmp_path / "node_modules" / ".package-lock.json").write_text(
+        '{"packages":{'
+        '"node_modules/foo":{"version":"1.0.0","dev":true,"resolved":"https://x/foo.tgz"}'
+        '}}'
+    )
+    assert main_mod._tui_need_npm_install(tmp_path) is False
+
+
+def test_install_when_version_differs_even_with_peer_drop(tmp_path: Path, main_mod) -> None:
+    """The peer-drop tolerance must not mask a real version skew."""
+    _touch_ink(tmp_path)
+    (tmp_path / "package-lock.json").write_text(
+        '{"packages":{"node_modules/foo":{"version":"2.0.0","dev":true,"peer":true}}}'
+    )
+    (tmp_path / "node_modules" / ".package-lock.json").write_text(
+        '{"packages":{"node_modules/foo":{"version":"1.0.0","dev":true}}}'
+    )
+    assert main_mod._tui_need_npm_install(tmp_path) is True
+
+
 def test_no_install_when_lock_older_than_marker(tmp_path: Path, main_mod) -> None:
    _touch_ink(tmp_path)
    (tmp_path / "package-lock.json").write_text("{}")
--- a/tests/hermes_cli/test_tui_resume_flow.py
+++ b/tests/hermes_cli/test_tui_resume_flow.py
@ -36,7 +36,14 @@ def test_cmd_chat_tui_continue_uses_latest_tui_session(monkeypatch, main_mod):
        calls.append(source)
        return "20260408_235959_a1b2c3" if source == "tui" else None

-    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None, toolsets=None):
+    def fake_launch(
+        resume_session_id=None,
+        tui_dev=False,
+        model=None,
+        provider=None,
+        toolsets=None,
+        **kwargs,
+    ):
        captured["resume"] = resume_session_id
        raise SystemExit(0)

@ -63,7 +70,14 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
            return "20260408_235959_d4e5f6"
        return None

-    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None, toolsets=None):
+    def fake_launch(
+        resume_session_id=None,
+        tui_dev=False,
+        model=None,
+        provider=None,
+        toolsets=None,
+        **kwargs,
+    ):
        captured["resume"] = resume_session_id
        raise SystemExit(0)

@ -81,7 +95,14 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
 def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod):
    captured = {}

-    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None, toolsets=None):
+    def fake_launch(
+        resume_session_id=None,
+        tui_dev=False,
+        model=None,
+        provider=None,
+        toolsets=None,
+        **kwargs,
+    ):
        captured["resume"] = resume_session_id
        raise SystemExit(0)

@ -99,7 +120,14 @@ def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod)
 def test_cmd_chat_tui_passes_model_and_provider(monkeypatch, main_mod):
    captured = {}

-    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None, toolsets=None):
+    def fake_launch(
+        resume_session_id=None,
+        tui_dev=False,
+        model=None,
+        provider=None,
+        toolsets=None,
+        **kwargs,
+    ):
        captured.update(
            {
                "model": model,
@ -130,7 +158,14 @@ def test_cmd_chat_tui_passes_model_and_provider(monkeypatch, main_mod):
 def test_cmd_chat_tui_passes_toolsets(monkeypatch, main_mod):
    captured = {}

-    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None, toolsets=None):
+    def fake_launch(
+        resume_session_id=None,
+        tui_dev=False,
+        model=None,
+        provider=None,
+        toolsets=None,
+        **kwargs,
+    ):
        captured["toolsets"] = toolsets
        raise SystemExit(0)

@ -142,22 +177,74 @@ def test_cmd_chat_tui_passes_toolsets(monkeypatch, main_mod):
    assert captured["toolsets"] == "web,terminal"


+def test_cmd_chat_tui_forwards_chat_flags(monkeypatch, main_mod):
+    captured = {}
+
+    def fake_launch(resume_session_id=None, **kwargs):
+        captured["resume_session_id"] = resume_session_id
+        captured.update(kwargs)
+        raise SystemExit(0)
+
+    monkeypatch.setattr(main_mod, "_launch_tui", fake_launch)
+
+    with pytest.raises(SystemExit):
+        main_mod.cmd_chat(
+            _args(
+                skills=["foo,bar"],
+                verbose=True,
+                quiet=True,
+                query="hello",
+                image="/tmp/cat.png",
+                worktree=True,
+                checkpoints=True,
+                pass_session_id=True,
+                max_turns=7,
+                accept_hooks=True,
+            )
+        )
+
+    assert captured["skills"] == ["foo,bar"]
+    assert captured["verbose"] is True
+    assert captured["quiet"] is True
+    assert captured["query"] == "hello"
+    assert captured["image"] == "/tmp/cat.png"
+    assert captured["worktree"] is True
+    assert captured["checkpoints"] is True
+    assert captured["pass_session_id"] is True
+    assert captured["max_turns"] == 7
+    assert captured["accept_hooks"] is True
+
+
 def test_main_top_level_tui_accepts_toolsets(monkeypatch, main_mod):
    captured = {}

    import hermes_cli.config as config_mod

    monkeypatch.setattr(sys, "argv", ["hermes", "--tui", "--toolsets", "web,terminal"])
-    monkeypatch.setitem(sys.modules, "hermes_cli.plugins", types.SimpleNamespace(discover_plugins=lambda: None))
-    monkeypatch.setitem(sys.modules, "tools.mcp_tool", types.SimpleNamespace(discover_mcp_tools=lambda: None))
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.plugins",
+        types.SimpleNamespace(discover_plugins=lambda: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "tools.mcp_tool",
+        types.SimpleNamespace(discover_mcp_tools=lambda: None),
+    )
    monkeypatch.setattr(config_mod, "load_config", lambda: {})
    monkeypatch.setattr(config_mod, "get_container_exec_info", lambda: None)
    monkeypatch.setitem(
        sys.modules,
        "agent.shell_hooks",
-        types.SimpleNamespace(register_from_config=lambda _cfg, accept_hooks=False: None),
+        types.SimpleNamespace(
+            register_from_config=lambda _cfg, accept_hooks=False: None
+        ),
+    )
+    monkeypatch.setattr(
+        main_mod,
+        "cmd_chat",
+        lambda args: captured.update({"toolsets": args.toolsets, "tui": args.tui}),
    )
-    monkeypatch.setattr(main_mod, "cmd_chat", lambda args: captured.update({"toolsets": args.toolsets, "tui": args.tui}))

    main_mod.main()

@ -169,27 +256,49 @@ def test_main_top_level_oneshot_accepts_toolsets(monkeypatch, main_mod):

    import hermes_cli.config as config_mod

-    monkeypatch.setattr(sys, "argv", ["hermes", "-z", "hello", "--toolsets", "web,terminal"])
-    monkeypatch.setitem(sys.modules, "hermes_cli.plugins", types.SimpleNamespace(discover_plugins=lambda: None))
-    monkeypatch.setitem(sys.modules, "tools.mcp_tool", types.SimpleNamespace(discover_mcp_tools=lambda: None))
+    monkeypatch.setattr(
+        sys, "argv", ["hermes", "-z", "hello", "--toolsets", "web,terminal"]
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.plugins",
+        types.SimpleNamespace(discover_plugins=lambda: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "tools.mcp_tool",
+        types.SimpleNamespace(discover_mcp_tools=lambda: None),
+    )
    monkeypatch.setattr(config_mod, "load_config", lambda: {})
    monkeypatch.setattr(config_mod, "get_container_exec_info", lambda: None)
    monkeypatch.setitem(
        sys.modules,
        "agent.shell_hooks",
-        types.SimpleNamespace(register_from_config=lambda _cfg, accept_hooks=False: None),
+        types.SimpleNamespace(
+            register_from_config=lambda _cfg, accept_hooks=False: None
+        ),
    )
    monkeypatch.setitem(
        sys.modules,
        "hermes_cli.oneshot",
-        types.SimpleNamespace(run_oneshot=lambda prompt, **kwargs: captured.update({"prompt": prompt, **kwargs}) or 0),
+        types.SimpleNamespace(
+            run_oneshot=lambda prompt, **kwargs: captured.update(
+                {"prompt": prompt, **kwargs}
+            )
+            or 0
+        ),
    )

    with pytest.raises(SystemExit) as exc:
        main_mod.main()

    assert exc.value.code == 0
-    assert captured == {"prompt": "hello", "model": None, "provider": None, "toolsets": "web,terminal"}
+    assert captured == {
+        "prompt": "hello",
+        "model": None,
+        "provider": None,
+        "toolsets": "web,terminal",
+    }


 def _stub_plugin_discovery(monkeypatch):
@ -256,7 +365,9 @@ def test_oneshot_accepts_plugin_toolset_after_discovery(monkeypatch):
    monkeypatch.setitem(
        sys.modules,
        "hermes_cli.plugins",
-        types.SimpleNamespace(discover_plugins=lambda: discovered.update({"ready": True})),
+        types.SimpleNamespace(
+            discover_plugins=lambda: discovered.update({"ready": True})
+        ),
    )

    valid, error = _validate_explicit_toolsets("plugin_demo")
@ -308,6 +419,72 @@ def test_oneshot_distinguishes_disabled_mcp_from_unknown(monkeypatch, capsys):
    assert "mcp-off" in err


+def test_oneshot_wires_session_db_for_recall(monkeypatch):
+    """hermes -z bypasses HermesCLI, but recall still needs SessionDB."""
+    from hermes_cli.oneshot import _run_agent
+
+    captured = {}
+    sentinel_db = object()
+
+    class FakeAgent:
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+            self.suppress_status_output = False
+            self.stream_delta_callback = object()
+            self.tool_gen_callback = object()
+
+        def chat(self, prompt):
+            captured["prompt"] = prompt
+            return "ok"
+
+    class FakeSessionDB:
+        def __new__(cls):
+            return sentinel_db
+
+    def mod(name, **attrs):
+        module = types.ModuleType(name)
+        for key, value in attrs.items():
+            setattr(module, key, value)
+        return module
+
+    monkeypatch.setitem(sys.modules, "run_agent", mod("run_agent", AIAgent=FakeAgent))
+    monkeypatch.setitem(sys.modules, "hermes_state", mod("hermes_state", SessionDB=FakeSessionDB))
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.config",
+        mod("hermes_cli.config", load_config=lambda: {"model": {"default": "m"}}),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.models",
+        mod("hermes_cli.models", detect_provider_for_model=lambda *_args, **_kwargs: None),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.runtime_provider",
+        mod(
+            "hermes_cli.runtime_provider",
+            resolve_runtime_provider=lambda **_kwargs: {
+                "api_key": "k",
+                "base_url": "u",
+                "provider": "p",
+                "api_mode": "chat_completions",
+                "credential_pool": None,
+            },
+        ),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.tools_config",
+        mod("hermes_cli.tools_config", _get_platform_tools=lambda *_args, **_kwargs: {"session_search"}),
+    )
+
+    assert _run_agent("recall this") == "ok"
+    assert captured["session_db"] is sentinel_db
+    assert captured["enabled_toolsets"] == ["session_search"]
+    assert captured["prompt"] == "recall this"
+
+
 def test_launch_tui_exports_model_provider_and_toolsets(monkeypatch, main_mod):
    captured = {}
    active_path_during_call = None
@ -328,7 +505,9 @@ def test_launch_tui_exports_model_provider_and_toolsets(monkeypatch, main_mod):
    monkeypatch.setattr(main_mod.subprocess, "call", fake_call)

    with pytest.raises(SystemExit):
-        main_mod._launch_tui(model="nous/hermes-test", provider="nous", toolsets="web, terminal")
+        main_mod._launch_tui(
+            model="nous/hermes-test", provider="nous", toolsets="web, terminal"
+        )

    env = captured["env"]
    assert env["HERMES_MODEL"] == "nous/hermes-test"
--- a/tests/hermes_cli/test_update_autostash.py
+++ b/tests/hermes_cli/test_update_autostash.py
@ -311,7 +311,8 @@ def test_cmd_update_retries_optional_extras_individually_when_all_fails(monkeypa
    """When .[all] fails, update should keep base deps and retry extras individually."""
    _setup_update_mocks(monkeypatch, tmp_path)
    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/uv" if name == "uv" else None)
-    monkeypatch.setattr(hermes_main, "_load_installable_optional_extras", lambda: ["matrix", "mcp"])
+    monkeypatch.setattr(hermes_main, "_is_termux_env", lambda env=None: False)
+    monkeypatch.setattr(hermes_main, "_load_installable_optional_extras", lambda group="all": ["matrix", "mcp"])

    recorded = []

@ -323,15 +324,15 @@ def test_cmd_update_retries_optional_extras_individually_when_all_fails(monkeypa
            return SimpleNamespace(stdout="main\n", stderr="", returncode=0)
        if cmd == ["git", "rev-list", "HEAD..origin/main", "--count"]:
            return SimpleNamespace(stdout="1\n", stderr="", returncode=0)
-        if cmd == ["git", "pull", "origin", "main"]:
+        if cmd == ["git", "pull", "--ff-only", "origin", "main"]:
            return SimpleNamespace(stdout="Updating\n", stderr="", returncode=0)
-        if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[all]", "--quiet"]:
+        if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[all]"]:
            raise CalledProcessError(returncode=1, cmd=cmd)
-        if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".", "--quiet"]:
+        if cmd == ["/usr/bin/uv", "pip", "install", "-e", "."]:
            return SimpleNamespace(returncode=0)
-        if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[matrix]", "--quiet"]:
+        if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[matrix]"]:
            raise CalledProcessError(returncode=1, cmd=cmd)
-        if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[mcp]", "--quiet"]:
+        if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[mcp]"]:
            return SimpleNamespace(returncode=0)
        # Catch-all must include stdout/stderr so consumers that parse
        # output (e.g. the dashboard-restart `ps -A` scan added in the
@ -344,10 +345,10 @@ def test_cmd_update_retries_optional_extras_individually_when_all_fails(monkeypa

    install_cmds = [c for c in recorded if "pip" in c and "install" in c]
    assert install_cmds == [
-        ["/usr/bin/uv", "pip", "install", "-e", ".[all]", "--quiet"],
-        ["/usr/bin/uv", "pip", "install", "-e", ".", "--quiet"],
-        ["/usr/bin/uv", "pip", "install", "-e", ".[matrix]", "--quiet"],
-        ["/usr/bin/uv", "pip", "install", "-e", ".[mcp]", "--quiet"],
+        ["/usr/bin/uv", "pip", "install", "-e", ".[all]"],
+        ["/usr/bin/uv", "pip", "install", "-e", "."],
+        ["/usr/bin/uv", "pip", "install", "-e", ".[matrix]"],
+        ["/usr/bin/uv", "pip", "install", "-e", ".[mcp]"],
    ]

    out = capsys.readouterr().out
@ -360,6 +361,7 @@ def test_cmd_update_succeeds_with_extras(monkeypatch, tmp_path):
    """When .[all] succeeds, no fallback should be attempted."""
    _setup_update_mocks(monkeypatch, tmp_path)
    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/uv" if name == "uv" else None)
+    monkeypatch.setattr(hermes_main, "_is_termux_env", lambda env=None: False)

    recorded = []

@ -371,7 +373,7 @@ def test_cmd_update_succeeds_with_extras(monkeypatch, tmp_path):
            return SimpleNamespace(stdout="main\n", stderr="", returncode=0)
        if cmd == ["git", "rev-list", "HEAD..origin/main", "--count"]:
            return SimpleNamespace(stdout="1\n", stderr="", returncode=0)
-        if cmd == ["git", "pull", "origin", "main"]:
+        if cmd == ["git", "pull", "--ff-only", "origin", "main"]:
            return SimpleNamespace(stdout="Updating\n", stderr="", returncode=0)
        return SimpleNamespace(returncode=0, stdout="", stderr="")

@ -384,6 +386,54 @@ def test_cmd_update_succeeds_with_extras(monkeypatch, tmp_path):
    assert ".[all]" in install_cmds[0]


+def test_install_with_optional_fallback_honors_custom_group(monkeypatch):
+    """Termux update path should target .[termux-all] when requested."""
+    calls = []
+    monkeypatch.setattr(
+        hermes_main,
+        "_load_installable_optional_extras",
+        lambda group="all": ["termux", "mcp"] if group == "termux-all" else [],
+    )
+
+    def fake_run_with_heartbeat(cmd, **kwargs):
+        calls.append(cmd)
+        if cmd[-1] == ".[termux-all]":
+            raise CalledProcessError(returncode=1, cmd=cmd)
+        return None
+
+    monkeypatch.setattr(hermes_main, "_run_install_with_heartbeat", fake_run_with_heartbeat)
+
+    hermes_main._install_python_dependencies_with_optional_fallback(
+        ["/usr/bin/uv", "pip"],
+        group="termux-all",
+    )
+
+    assert calls == [
+        ["/usr/bin/uv", "pip", "install", "-e", ".[termux-all]"],
+        ["/usr/bin/uv", "pip", "install", "-e", "."],
+        ["/usr/bin/uv", "pip", "install", "-e", ".[termux]"],
+        ["/usr/bin/uv", "pip", "install", "-e", ".[mcp]"],
+    ]
+
+
+def test_install_heartbeat_prints_when_dependency_install_is_silent(monkeypatch, capsys):
+    """Long quiet installs should emit periodic heartbeat lines."""
+
+    def fake_run(cmd, **kwargs):
+        hermes_main._time.sleep(1.2)
+        return SimpleNamespace(returncode=0)
+
+    monkeypatch.setattr(hermes_main.subprocess, "run", fake_run)
+
+    hermes_main._run_install_with_heartbeat(
+        ["uv", "pip", "install", "-e", "."],
+        heartbeat_interval_seconds=1,
+    )
+
+    out = capsys.readouterr().out
+    assert "still installing dependencies" in out
+
+
 # ---------------------------------------------------------------------------
 # ff-only fallback to reset --hard on diverged history
 # ---------------------------------------------------------------------------
--- a/tests/hermes_cli/test_update_gateway_restart.py
+++ b/tests/hermes_cli/test_update_gateway_restart.py
@ -392,6 +392,91 @@ class TestCmdUpdateLaunchdRestart:
        captured = capsys.readouterr().out
        assert "Restart manually: hermes gateway run" in captured

+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_restarts_profile_manual_gateways(
+        self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
+    ):
+        """Profile-mapped manual gateways are relaunched automatically after update."""
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: True)
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_launchd_plist_path",
+            lambda: tmp_path / "ai.hermes.gateway.plist",
+        )
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            launchctl_loaded=False,
+        )
+        process = gateway_cli.ProfileGatewayProcess(
+            profile="coder",
+            path=tmp_path / ".hermes" / "profiles" / "coder",
+            pid=12345,
+        )
+
+        # ``find_gateway_pids`` is invoked twice: once to enumerate manual
+         # PIDs to restart, then again ~3s later by the post-restart survivor
+         # sweep (#17648). Return the live PID first, then an empty list to
+         # simulate the process actually exiting after the graceful restart
+         # — otherwise the sweep would SIGKILL pid 12345 even though graceful
+         # drain succeeded, and ``kill.assert_not_called()`` would fire.
+        with patch.object(gateway_cli, "find_gateway_pids", side_effect=[[12345], []]), \
+             patch.object(gateway_cli, "find_profile_gateway_processes", return_value=[process]), \
+             patch.object(gateway_cli, "launch_detached_profile_gateway_restart", return_value=True) as restart, \
+             patch.object(gateway_cli, "_graceful_restart_via_sigusr1", return_value=True) as graceful, \
+             patch("os.kill") as kill:
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        restart.assert_called_once_with("coder", 12345)
+        graceful.assert_called_once()
+        # Graceful drain succeeded — no SIGTERM fallback needed.
+        kill.assert_not_called()
+        assert "Restarting manual gateway profile(s): coder" in captured
+        assert "Restart manually: hermes gateway run" not in captured
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_profile_manual_gateway_falls_back_to_sigterm(
+        self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
+    ):
+        """When graceful SIGUSR1 drain fails, manual profile restart falls back to SIGTERM."""
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: True)
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_launchd_plist_path",
+            lambda: tmp_path / "ai.hermes.gateway.plist",
+        )
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            launchctl_loaded=False,
+        )
+        process = gateway_cli.ProfileGatewayProcess(
+            profile="coder",
+            path=tmp_path / ".hermes" / "profiles" / "coder",
+            pid=12345,
+        )
+
+        # See note in ``test_update_restarts_profile_manual_gateways``: the
+        # post-restart survivor sweep (#17648) re-queries ``find_gateway_pids``
+        # ~3s after the restart attempt. Return ``[]`` on the second call so
+        # the SIGTERM fallback isn't escalated to SIGKILL by the sweep.
+        with patch.object(gateway_cli, "find_gateway_pids", side_effect=[[12345], []]), \
+             patch.object(gateway_cli, "find_profile_gateway_processes", return_value=[process]), \
+             patch.object(gateway_cli, "launch_detached_profile_gateway_restart", return_value=True) as restart, \
+             patch.object(gateway_cli, "_graceful_restart_via_sigusr1", return_value=False) as graceful, \
+             patch("os.kill") as kill:
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        restart.assert_called_once_with("coder", 12345)
+        graceful.assert_called_once()
+        # Graceful drain returned False → SIGTERM fallback.
+        kill.assert_called_once()
+        assert "Restarting manual gateway profile(s): coder" in captured
+
    @patch("shutil.which", return_value=None)
    @patch("subprocess.run")
    def test_update_with_systemd_still_restarts_via_systemd(
@ -568,6 +653,77 @@ class TestCmdUpdateLaunchdRestart:
            "Drain path failed; expected fallback `systemctl restart`."
        )

+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_bypasses_restartsec_after_graceful_drain(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """After a graceful SIGUSR1 drain, cmd_update must issue
+        ``reset-failed`` + ``start`` to bypass the unit's ``RestartSec``
+        cooldown (default 60s on our unit file) rather than passively
+        waiting for systemd's auto-restart. Collapses the post-drain delay
+        from ~60s to ~5s on a voluntary restart.
+        """
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+
+        def side_effect(cmd, **kwargs):
+            joined = " ".join(str(c) for c in cmd)
+            if "rev-parse" in joined and "--abbrev-ref" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="main\n", stderr="")
+            if "rev-parse" in joined and "--verify" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+            if "rev-list" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="3\n", stderr="")
+            if "systemctl" in joined and "list-units" in joined:
+                if "--user" in joined:
+                    return subprocess.CompletedProcess(
+                        cmd, 0,
+                        stdout="hermes-gateway.service loaded active running\n",
+                        stderr="",
+                    )
+                return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+            if "systemctl" in joined and "is-active" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
+            if "systemctl" in joined and "show" in joined and "MainPID" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
+            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+        mock_run.side_effect = side_effect
+
+        # Simulate a successful graceful drain so cmd_update reaches the
+        # post-drain restart bypass.
+        monkeypatch.setattr(
+            "hermes_cli.gateway._graceful_restart_via_sigusr1",
+            lambda pid, drain_timeout: True,
+        )
+
+        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
+            cmd_update(mock_args)
+
+        calls = [
+            " ".join(str(a) for a in c.args[0])
+            for c in mock_run.call_args_list
+            if "systemctl" in " ".join(str(a) for a in c.args[0])
+        ]
+
+        # Must have called ``reset-failed hermes-gateway`` AND ``start
+        # hermes-gateway`` explicitly so systemd bypasses RestartSec.
+        reset_calls = [c for c in calls if "reset-failed" in c and "hermes-gateway" in c]
+        start_calls = [
+            c for c in calls
+            if "start" in c and "hermes-gateway" in c and "restart" not in c
+        ]
+        assert reset_calls, (
+            f"Expected explicit `reset-failed hermes-gateway` after graceful drain; "
+            f"systemctl calls were: {calls}"
+        )
+        assert start_calls, (
+            f"Expected explicit `start hermes-gateway` after graceful drain to "
+            f"bypass RestartSec; systemctl calls were: {calls}"
+        )
+
    @patch("shutil.which", return_value=None)
    @patch("subprocess.run")
    def test_update_no_gateway_running_skips_restart(
@ -797,15 +953,25 @@ class TestServicePidExclusion:
            launchctl_loaded=True,
        )

+        # Survivor sweep (#17648) re-queries ``find_gateway_pids`` after
+         # SIGTERM. ``os.kill`` is mocked, so the PID never "dies" — track
+         # the killed-via-SIGTERM PIDs ourselves and exclude them on later
+         # calls to simulate the OS reaping the process. Without this the
+         # sweep escalates with SIGKILL and ``manual_kills == 2`` instead of 1.
+        _killed_pids: set[int] = set()
+
        def fake_find(exclude_pids=None, all_profiles=False):
-            _exclude = exclude_pids or set()
+            _exclude = (exclude_pids or set()) | _killed_pids
            return [p for p in [SERVICE_PID, MANUAL_PID] if p not in _exclude]

+        def fake_kill(pid, _sig):
+            _killed_pids.add(pid)
+
        with patch.object(
            gateway_cli, "_get_service_pids", return_value={SERVICE_PID}
        ), patch.object(
            gateway_cli, "find_gateway_pids", side_effect=fake_find,
-        ), patch("os.kill") as mock_kill:
+        ), patch("os.kill", side_effect=fake_kill) as mock_kill:
            cmd_update(mock_args)

        captured = capsys.readouterr().out
@ -1261,3 +1427,232 @@ class TestCmdUpdateLegacyGatewayWarning:
        assert "Legacy Hermes gateway" in captured
        assert "(system scope)" in captured
        assert "sudo" in captured
+
+
+# ---------------------------------------------------------------------------
+# cmd_update — reset-failed precedes systemctl restart on fallback path
+# ---------------------------------------------------------------------------
+
+
+def _systemctl_calls(mock_run, subcommand):
+    """Return every subprocess.run call that was `systemctl [--user] <subcommand>`."""
+    out = []
+    for call in mock_run.call_args_list:
+        argv = call.args[0]
+        joined = " ".join(str(c) for c in argv)
+        if "systemctl" in joined and subcommand in joined:
+            out.append(argv)
+    return out
+
+
+class TestCmdUpdateResetFailedBeforeRestart:
+    """`hermes update` must call `systemctl reset-failed` before every
+    fallback `systemctl restart` so a systemd-parked `failed` state from
+    earlier auto-restart crashes (CHDIR, OOM, filesystem race) doesn't
+    permanently strand the unit.
+
+    Mirrors the recovery pattern `hermes gateway restart` (systemd_restart)
+    adopted in PR #20949.  Without this, users hit "gateway never comes
+    back after update" until they manually run `systemctl reset-failed`.
+    """
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_reset_failed_runs_before_fallback_restart(
+        self, mock_run, _mock_which, mock_args, monkeypatch,
+    ):
+        """When SIGUSR1 drain times out, the fallback systemctl restart
+        MUST be preceded by a `reset-failed` call against the same unit."""
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            systemd_active=True,
+        )
+
+        # Force the graceful SIGUSR1 path to report failure so cmd_update
+        # falls back to systemctl restart.
+        orig = mock_run.side_effect
+        def wrapped(cmd, **kwargs):
+            joined = " ".join(str(c) for c in cmd)
+            if "systemctl" in joined and "show" in joined and "MainPID" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
+            return orig(cmd, **kwargs)
+        mock_run.side_effect = wrapped
+        monkeypatch.setattr(
+            "hermes_cli.gateway._graceful_restart_via_sigusr1",
+            lambda pid, drain_timeout: False,
+        )
+
+        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
+            cmd_update(mock_args)
+
+        reset_calls = _systemctl_calls(mock_run, "reset-failed")
+        restart_calls = _systemctl_calls(mock_run, "restart")
+
+        assert any(
+            "hermes-gateway" in " ".join(str(c) for c in call)
+            for call in reset_calls
+        ), (
+            "Expected `systemctl reset-failed hermes-gateway` before the "
+            "fallback `systemctl restart`, got reset_calls=%r" % (reset_calls,)
+        )
+        assert restart_calls, "Fallback systemctl restart should still run"
+
+        # Order check: the first reset-failed must come before the first restart.
+        first_reset_idx = None
+        first_restart_idx = None
+        for idx, call in enumerate(mock_run.call_args_list):
+            joined = " ".join(str(c) for c in call.args[0])
+            if "systemctl" in joined and "reset-failed" in joined and first_reset_idx is None:
+                first_reset_idx = idx
+            if "systemctl" in joined and "restart" in joined and "hermes-gateway" in joined:
+                if first_restart_idx is None:
+                    first_restart_idx = idx
+        assert first_reset_idx is not None and first_restart_idx is not None
+        assert first_reset_idx < first_restart_idx, (
+            f"reset-failed (call #{first_reset_idx}) must precede "
+            f"restart (call #{first_restart_idx}) so the unit isn't "
+            "blocked by systemd's failed-state backoff."
+        )
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_reset_failed_also_runs_before_retry_restart(
+        self, mock_run, _mock_which, mock_args, monkeypatch,
+    ):
+        """If the first fallback restart spawns a process that dies
+        immediately (is-active stays inactive), the retry restart must
+        ALSO be preceded by a reset-failed — otherwise the retry races
+        the unit's own failed-state transition."""
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+
+        # is-active toggles:
+        #   first call (discovery / check active)  -> "active"
+        #   later calls (post-restart verify)      -> "inactive"
+        # Using a state counter so both the initial check and the verify
+        # loops behave realistically.
+        is_active_calls = {"n": 0}
+
+        def side_effect(cmd, **kwargs):
+            joined = " ".join(str(c) for c in cmd)
+            if "rev-parse" in joined and "--abbrev-ref" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="main\n", stderr="")
+            if "rev-parse" in joined and "--verify" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+            if "rev-list" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="3\n", stderr="")
+            if "systemctl" in joined and "list-units" in joined:
+                if "--user" in joined:
+                    return subprocess.CompletedProcess(
+                        cmd, 0,
+                        stdout="hermes-gateway.service loaded active running\n",
+                        stderr="",
+                    )
+                return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+            if "systemctl" in joined and "is-active" in joined:
+                is_active_calls["n"] += 1
+                # First check: the unit is active (so we enter the restart path).
+                # Subsequent polling: inactive, which drives the retry branch.
+                if is_active_calls["n"] == 1:
+                    return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
+                return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
+            if "systemctl" in joined and "show" in joined and "MainPID" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
+            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+        mock_run.side_effect = side_effect
+
+        # Force graceful SIGUSR1 to fail → fallback restart path.
+        monkeypatch.setattr(
+            "hermes_cli.gateway._graceful_restart_via_sigusr1",
+            lambda pid, drain_timeout: False,
+        )
+
+        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
+            cmd_update(mock_args)
+
+        reset_calls = _systemctl_calls(mock_run, "reset-failed")
+        restart_calls = _systemctl_calls(mock_run, "restart")
+
+        # Two restart attempts (initial + retry), two reset-failed calls.
+        gateway_restarts = [
+            c for c in restart_calls
+            if "hermes-gateway" in " ".join(str(a) for a in c)
+        ]
+        gateway_resets = [
+            c for c in reset_calls
+            if "hermes-gateway" in " ".join(str(a) for a in c)
+        ]
+        assert len(gateway_restarts) >= 2, (
+            f"Expected both initial + retry restart calls, got {len(gateway_restarts)}"
+        )
+        assert len(gateway_resets) >= 2, (
+            f"Expected reset-failed before BOTH restart attempts, "
+            f"got {len(gateway_resets)} reset-failed call(s)"
+        )
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_final_failure_message_tells_user_to_reset_failed(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """When both fallback restart attempts fail, the final error
+        message must include `systemctl reset-failed` as part of the
+        manual recovery hint — not just `systemctl restart` on its own,
+        which is the step that just failed twice."""
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+
+        is_active_calls = {"n": 0}
+
+        def side_effect(cmd, **kwargs):
+            joined = " ".join(str(c) for c in cmd)
+            if "rev-parse" in joined and "--abbrev-ref" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="main\n", stderr="")
+            if "rev-parse" in joined and "--verify" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+            if "rev-list" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="3\n", stderr="")
+            if "systemctl" in joined and "list-units" in joined:
+                if "--user" in joined:
+                    return subprocess.CompletedProcess(
+                        cmd, 0,
+                        stdout="hermes-gateway.service loaded active running\n",
+                        stderr="",
+                    )
+                return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+            if "systemctl" in joined and "is-active" in joined:
+                is_active_calls["n"] += 1
+                if is_active_calls["n"] == 1:
+                    return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
+                return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
+            if "systemctl" in joined and "show" in joined and "MainPID" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
+            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+        mock_run.side_effect = side_effect
+        monkeypatch.setattr(
+            "hermes_cli.gateway._graceful_restart_via_sigusr1",
+            lambda pid, drain_timeout: False,
+        )
+
+        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "failed to stay running" in captured, (
+            "Expected the terminal failure message to fire when both "
+            f"restart attempts don't survive.  Got:\n{captured}"
+        )
+        assert "reset-failed" in captured, (
+            "Final recovery hint must include `reset-failed` so users "
+            "know how to escape systemd's parked failed state.  Got:\n"
+            f"{captured}"
+        )
+        assert "hermes-gateway" in captured
--- a/tests/hermes_cli/test_update_yes_flag.py
+++ b/tests/hermes_cli/test_update_yes_flag.py
@ -0,0 +1,137 @@
+"""Tests for `hermes update --yes / -y` — assume yes for interactive prompts.
+
+Covers:
+  1. argparse parses the flag
+  2. Config-migration prompt is auto-answered (no input() call) and migrate_config
+     runs with interactive=False so API-key prompts are skipped
+  3. Autostash restore prompt is auto-answered (prompt_for_restore == False, no
+     input() call) and the stash is applied automatically
+"""
+
+import subprocess
+from types import SimpleNamespace
+from unittest.mock import patch
+
+from hermes_cli.main import cmd_update
+
+
+def _make_run_side_effect(
+    branch="main", verify_ok=True, commit_count="1", dirty=False
+):
+    """Minimal subprocess.run side_effect for the update flow."""
+
+    def side_effect(cmd, **kwargs):
+        joined = " ".join(str(c) for c in cmd)
+
+        if "rev-parse" in joined and "--abbrev-ref" in joined:
+            return subprocess.CompletedProcess(cmd, 0, stdout=f"{branch}\n", stderr="")
+        if "rev-parse" in joined and "--verify" in joined:
+            return subprocess.CompletedProcess(
+                cmd, 0 if verify_ok else 128, stdout="", stderr=""
+            )
+        if "rev-list" in joined:
+            return subprocess.CompletedProcess(
+                cmd, 0, stdout=f"{commit_count}\n", stderr=""
+            )
+        # `git status --porcelain` for dirty-tree detection during autostash.
+        if "status" in joined and "--porcelain" in joined:
+            out = " M hermes_cli/main.py\n" if dirty else ""
+            return subprocess.CompletedProcess(cmd, 0, stdout=out, stderr="")
+        # `git stash list` — return a stash ref when dirty (so _stash_local_changes
+        # gets something to return). _stash_local_changes_if_needed is what we
+        # actually patch in tests that exercise restore, so this is a catch-all.
+        if "stash" in joined and "list" in joined:
+            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+    return side_effect
+
+
+class TestUpdateYesConfigMigration:
+    """--yes auto-answers the config-migration prompt and skips API-key prompts."""
+
+    @patch("hermes_cli.config.migrate_config")
+    @patch("hermes_cli.config.check_config_version", return_value=(1, 2))
+    @patch("hermes_cli.config.get_missing_config_fields", return_value=[])
+    @patch("hermes_cli.config.get_missing_env_vars", return_value=["NEW_KEY"])
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_yes_auto_migrates_without_input(
+        self,
+        mock_run,
+        _mock_which,
+        _mock_missing_env,
+        _mock_missing_cfg,
+        _mock_version,
+        mock_migrate,
+        capsys,
+    ):
+        mock_run.side_effect = _make_run_side_effect(
+            branch="main", verify_ok=True, commit_count="1"
+        )
+        mock_migrate.return_value = {"env_added": [], "config_added": []}
+
+        args = SimpleNamespace(yes=True)
+
+        with patch("builtins.input") as mock_input:
+            cmd_update(args)
+            # Never prompted the user.
+            mock_input.assert_not_called()
+
+        # migrate_config was invoked with interactive=False — API-key prompts
+        # are suppressed, matching gateway-mode semantics.
+        assert mock_migrate.call_count == 1
+        _, kwargs = mock_migrate.call_args
+        assert kwargs.get("interactive") is False
+
+        out = capsys.readouterr().out
+        assert "--yes: auto-applying config migration" in out
+        # The "Would you like to configure them now?" prompt text never appears.
+        assert "Would you like to configure them now?" not in out
+
+    @patch("hermes_cli.config.migrate_config")
+    @patch("hermes_cli.config.check_config_version", return_value=(1, 2))
+    @patch("hermes_cli.config.get_missing_config_fields", return_value=[])
+    @patch("hermes_cli.config.get_missing_env_vars", return_value=["NEW_KEY"])
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_no_yes_flag_still_prompts_in_tty(
+        self,
+        mock_run,
+        _mock_which,
+        _mock_missing_env,
+        _mock_missing_cfg,
+        _mock_version,
+        mock_migrate,
+        capsys,
+    ):
+        """Regression guard: without --yes, the TTY prompt path still fires."""
+        mock_run.side_effect = _make_run_side_effect(
+            branch="main", verify_ok=True, commit_count="1"
+        )
+        mock_migrate.return_value = {"env_added": [], "config_added": []}
+
+        args = SimpleNamespace(yes=False)
+
+        # Patch ``sys.stdin.isatty`` and ``sys.stdout.isatty`` directly on the
+        # real ``sys`` module instead of replacing ``hermes_cli.main.sys`` with
+        # a MagicMock. The MagicMock approach was flaky under ``pytest-xdist``
+        # — a sibling test that imported ``hermes_cli.main`` first could leave
+        # a different ``sys`` reference resolved inside the function and the
+        # mock would never be consulted, with CI then taking the
+        # "Non-interactive session" branch instead of prompting.
+        import sys as _sys
+
+        with patch("builtins.input", return_value="n") as mock_input, patch.object(
+            _sys.stdin, "isatty", return_value=True
+        ), patch.object(_sys.stdout, "isatty", return_value=True):
+            cmd_update(args)
+            # The user was actually prompted.
+            assert mock_input.called
+            prompts = [c.args[0] if c.args else "" for c in mock_input.call_args_list]
+            assert any("configure them now" in p for p in prompts)
+
+
+class TestUpdateYesStashRestore:
+    """--yes auto-restores the pre-update autostash without prompting."""
+
--- a/tests/hermes_cli/test_user_providers_model_switch.py
+++ b/tests/hermes_cli/test_user_providers_model_switch.py
@ -839,3 +839,148 @@ def test_get_named_custom_provider_transport_resolves_via_display_name(monkeypat
    result = rp._get_named_custom_provider("Codex Provider")
    assert result is not None
    assert result["api_mode"] == "codex_responses"
+
+
+# =============================================================================
+# Regression: user_providers override for private models not listed by /v1/models
+# =============================================================================
+
+_REJECTED_VALIDATION = {
+    "accepted": False,
+    "persist": False,
+    "recognized": False,
+    "message": "not found",
+}
+
+
+def _run_user_provider_override_case(
+    *,
+    slug,
+    name,
+    base_url,
+    models,
+    raw_input,
+):
+    """Run ``switch_model`` with a private user provider and a rejected API check.
+
+    The bug in PR #17964 was that ``user_providers`` was treated like a list,
+    so private models listed in ``models:`` never triggered the override path.
+    These tests keep the validation failure in place and prove the config list
+    still wins for both dict- and list-shaped ``models`` entries.
+    """
+    from unittest.mock import patch
+
+    user_providers = {
+        slug: {
+            "name": name,
+            "api": base_url,
+            "discover_models": False,
+            "models": models,
+        }
+    }
+
+    with patch("hermes_cli.model_switch.resolve_alias", return_value=None), \
+         patch("hermes_cli.model_switch.list_provider_models", return_value=[]), \
+         patch("hermes_cli.model_switch.normalize_model_for_provider", side_effect=lambda model, provider: model), \
+         patch("hermes_cli.models.validate_requested_model", return_value=_REJECTED_VALIDATION), \
+         patch("hermes_cli.models.detect_provider_for_model", return_value=None), \
+         patch("hermes_cli.model_switch.get_model_info", return_value=None), \
+         patch("hermes_cli.model_switch.get_model_capabilities", return_value=None), \
+         patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={"api_key": "***", "base_url": base_url, "api_mode": "anthropic_messages"}):
+        return switch_model(
+            raw_input=raw_input,
+            current_provider=slug,
+            current_model="old-model",
+            current_base_url=base_url,
+            user_providers=user_providers,
+            custom_providers=[],
+        )
+
+
+@pytest.mark.parametrize(
+    ("slug", "name", "base_url", "models", "raw_input", "expected_model"),
+    [
+        (
+            "kimi-coding",
+            "Kimi Coding Plan",
+            "https://api.kimi.com/coding",
+            {"kimi-k2.6": {}},
+            "kimi-k2.6",
+            "kimi-k2.6",
+        ),
+        (
+            "kimi-dedicated",
+            "Kimi Dedicated",
+            "https://api.kimi.com/v1",
+            [{"name": "moonshotai/Kimi-K2.6-ACED"}],
+            "moonshotai/Kimi-K2.6-ACED",
+            "moonshotai/Kimi-K2.6-ACED",
+        ),
+    ],
+    ids=["kimi-coding-plan-dict", "kimi-k2-6-aced-list"],
+)
+def test_user_provider_override_accepts_listed_private_models(
+    slug,
+    name,
+    base_url,
+    models,
+    raw_input,
+    expected_model,
+):
+    """Private models listed in providers: config should override /v1/models misses.
+
+    Covers both config shapes the fix now accepts:
+    - dict models for the Kimi Coding Plan K2p6 case
+    - list-of-dicts models for the Kimi-K2.6-ACED dedicated case
+    """
+    result = _run_user_provider_override_case(
+        slug=slug,
+        name=name,
+        base_url=base_url,
+        models=models,
+        raw_input=raw_input,
+    )
+
+    assert result.success is True
+    assert result.new_model == expected_model
+    assert result.error_message == ""
+
+
+@pytest.mark.parametrize(
+    ("slug", "name", "base_url", "models", "raw_input"),
+    [
+        (
+            "kimi-coding",
+            "Kimi Coding Plan",
+            "https://api.kimi.com/coding",
+            {"kimi-k2.6": {}},
+            "kimi-k2.6-mangled",
+        ),
+        (
+            "kimi-dedicated",
+            "Kimi Dedicated",
+            "https://api.kimi.com/v1",
+            [{"name": "moonshotai/Kimi-K2.6-ACED"}],
+            "moonshotai/Kimi-K2.6-ACED!!!",
+        ),
+    ],
+    ids=["kimi-coding-plan-dict-mangled", "kimi-k2-6-aced-list-mangled"],
+)
+def test_user_provider_override_rejects_mangled_private_models(
+    slug,
+    name,
+    base_url,
+    models,
+    raw_input,
+):
+    """Malformed model names should fail cleanly, not crash or auto-accept."""
+    result = _run_user_provider_override_case(
+        slug=slug,
+        name=name,
+        base_url=base_url,
+        models=models,
+        raw_input=raw_input,
+    )
+
+    assert result.success is False
+    assert result.error_message == "not found"
--- a/tests/hermes_cli/test_voice_wrapper.py
+++ b/tests/hermes_cli/test_voice_wrapper.py
@ -31,6 +31,243 @@ class TestPublicAPI:
        assert callable(speak_text)


+class TestNormalizeVoiceRecordKeyForPromptToolkit:
+    """Round-9 Copilot review regression on #19835.
+
+    Classic CLI only normalized ``ctrl+`` / ``alt+``, so TUI-valid
+    aliases like ``control+``, ``option+``, ``opt+`` silently bound a
+    different (or no) shortcut in the CLI. Normalizer now maps the
+    same set of aliases the TUI parser accepts, so one config value
+    binds identically in both runtimes.
+    """
+
+    def test_ctrl_and_alt_map_to_prompt_toolkit_form(self):
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl+b") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("alt+r") == "a-r"
+
+    def test_control_option_opt_aliases_match_tui_parser(self):
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("control+o") == "c-o"
+        assert normalize_voice_record_key_for_prompt_toolkit("option+space") == "a-space"
+        assert normalize_voice_record_key_for_prompt_toolkit("opt+enter") == "a-enter"
+
+    def test_case_insensitive(self):
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("Ctrl+B") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("CONTROL+O") == "c-o"
+
+    def test_non_string_falls_back_to_default(self):
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit(None) == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit(1) == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit(True) == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit({}) == "c-b"
+
+    def test_empty_string_falls_back(self):
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("") == "c-b"
+
+    def test_super_win_fall_back_to_default_in_cli(self):
+        """prompt_toolkit has no super modifier, so ``super+b`` / ``win+o``
+        would crash the classic CLI at startup if passed through. Fall
+        back to the documented default; the CLI binding site is
+        expected to warn so users know the shortcut is TUI-only
+        (Copilot round-11 on #19835)."""
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("super+b") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("win+o") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("windows+o") == "c-b"
+
+    # Round-10 Copilot review regressions on #19835.
+    def test_strips_whitespace_within_and_around(self):
+        """``ctrl + b`` / ``  option + space  `` are accepted by the TUI
+        parser; the CLI normalizer must mirror that or the same config
+        binds different shortcuts across runtimes."""
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl + b") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("  option + space  ") == "a-space"
+
+    def test_named_key_aliases_collapse_to_prompt_toolkit_canonical(self):
+        """TUI accepts ``return`` / ``esc`` / ``bs`` / ``del`` etc.;
+        CLI must collapse to prompt_toolkit's canonical spelling
+        (``enter`` / ``escape`` / ``backspace`` / ``delete``)."""
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl+return") == "c-enter"
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl+esc") == "c-escape"
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl+bs") == "c-backspace"
+        assert normalize_voice_record_key_for_prompt_toolkit("alt+del") == "a-delete"
+
+    def test_typoed_named_keys_fall_back_to_default(self):
+        """``ctrl+spcae`` would otherwise pass through as ``c-spcae`` and
+        prompt_toolkit would reject it at startup — fall back instead."""
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl+spcae") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl+f5") == "c-b"
+
+    def test_bare_char_and_multi_modifier_fall_back(self):
+        """TUI parser rejects bare-char (``o``) and multi-modifier
+        (``ctrl+alt+r``) configs; the CLI normalizer must match."""
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("o") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("b") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl+alt+r") == "c-b"
+
+    def test_reserved_ctrl_chars_fall_back(self):
+        """``ctrl+c`` / ``ctrl+d`` / ``ctrl+l`` are always claimed by
+        the CLI's prompt_toolkit input layer or terminal driver; match
+        the TUI parser's rejection to keep /voice status honest."""
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl+c") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl+d") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("ctrl+l") == "c-b"
+
+    def test_unknown_modifier_falls_back(self):
+        """``meta+b`` is ambiguous on the wire (Alt on xterm, Cmd on
+        legacy macOS), same class as the TUI parser's rejection."""
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("meta+b") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("shift+b") == "c-b"
+
+    # Round-14 Copilot review regression on #19835. On macOS the TUI
+    # parser rejects alt+c/d/l because hermes-ink reports Alt as
+    # ``key.meta`` and isActionMod(darwin) accepts it. The CLI
+    # normalizer must mirror that platform-gated rejection so shared
+    # configs like ``option+c`` don't bind Alt+C in the CLI while the
+    # TUI falls back to Ctrl+B.
+    def test_alt_cdl_rejected_on_macos(self, monkeypatch):
+        monkeypatch.setattr("sys.platform", "darwin")
+
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("alt+c") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("alt+d") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("alt+l") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("option+c") == "c-b"
+        assert normalize_voice_record_key_for_prompt_toolkit("opt+d") == "c-b"
+        # Other alt letters still bind on darwin.
+        assert normalize_voice_record_key_for_prompt_toolkit("alt+r") == "a-r"
+        assert normalize_voice_record_key_for_prompt_toolkit("alt+space") == "a-space"
+
+    def test_alt_cdl_allowed_on_non_macos(self, monkeypatch):
+        monkeypatch.setattr("sys.platform", "linux")
+
+        from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
+
+        assert normalize_voice_record_key_for_prompt_toolkit("alt+c") == "a-c"
+        assert normalize_voice_record_key_for_prompt_toolkit("alt+d") == "a-d"
+        assert normalize_voice_record_key_for_prompt_toolkit("alt+l") == "a-l"
+
+
+class TestVoiceRecordKeyFromConfig:
+    """Round-11 Copilot review regression on #19835.
+
+    ``load_config()`` preserves YAML scalar overrides, so a hand-edited
+    ``voice: true`` or ``voice: cmd+b`` made the naive
+    ``cfg.get('voice', {}).get('record_key')`` chain raise
+    AttributeError before voice could run. The shape-safe extractor
+    returns None for every malformed shape so the call-site fallback
+    (``normalize_…`` / ``format_…``) surfaces the documented default.
+    """
+
+    def test_dict_voice_with_string_record_key(self):
+        from hermes_cli.voice import voice_record_key_from_config
+
+        assert voice_record_key_from_config({"voice": {"record_key": "ctrl+o"}}) == "ctrl+o"
+
+    def test_non_dict_config_root(self):
+        from hermes_cli.voice import voice_record_key_from_config
+
+        for bad_root in (None, True, 1, "ctrl+b", [], ["ctrl+b"]):
+            assert voice_record_key_from_config(bad_root) is None, bad_root
+
+    def test_non_dict_voice_entry(self):
+        from hermes_cli.voice import voice_record_key_from_config
+
+        for bad_voice in (None, True, "cmd+b", 42, ["ctrl+b"]):
+            assert voice_record_key_from_config({"voice": bad_voice}) is None, bad_voice
+
+    def test_missing_record_key_returns_none(self):
+        from hermes_cli.voice import voice_record_key_from_config
+
+        assert voice_record_key_from_config({"voice": {"beep_enabled": True}}) is None
+        assert voice_record_key_from_config({}) is None
+
+    def test_normalizer_accepts_extractor_output_directly(self):
+        """voice_record_key_from_config + normalize_… must compose —
+        None / non-string scalars all fall back to c-b."""
+        from hermes_cli.voice import (
+            normalize_voice_record_key_for_prompt_toolkit,
+            voice_record_key_from_config,
+        )
+
+        for raw in (None, True, 1, "cmd+b", ["ctrl+b"]):
+            extracted = voice_record_key_from_config({"voice": raw})
+            assert normalize_voice_record_key_for_prompt_toolkit(extracted) == "c-b"
+
+
+class TestFormatVoiceRecordKeyForStatus:
+    """Round-10 Copilot review regression on #19835.
+
+    ``/voice status`` used to print the raw scalar (``True`` / ``1``)
+    for non-string configs even though the actual binding falls back
+    to Ctrl+B. The formatter routes through the same normalizer so
+    status always matches what the CLI actually binds.
+    """
+
+    def test_ctrl_and_alt_letter_keys_render_canonically(self):
+        from hermes_cli.voice import format_voice_record_key_for_status
+
+        assert format_voice_record_key_for_status("ctrl+b") == "Ctrl+B"
+        assert format_voice_record_key_for_status("ctrl+o") == "Ctrl+O"
+        assert format_voice_record_key_for_status("alt+r") == "Alt+R"
+
+    def test_named_keys_render_in_title_case(self):
+        from hermes_cli.voice import format_voice_record_key_for_status
+
+        assert format_voice_record_key_for_status("ctrl+space") == "Ctrl+Space"
+        assert format_voice_record_key_for_status("alt+enter") == "Alt+Enter"
+        assert format_voice_record_key_for_status("ctrl+esc") == "Ctrl+Escape"
+
+    def test_aliases_render_via_normalized_form(self):
+        from hermes_cli.voice import format_voice_record_key_for_status
+
+        assert format_voice_record_key_for_status("control+o") == "Ctrl+O"
+        assert format_voice_record_key_for_status("option+space") == "Alt+Space"
+        assert format_voice_record_key_for_status("opt+enter") == "Alt+Enter"
+
+    def test_non_string_scalar_falls_back_to_ctrl_b_label(self):
+        from hermes_cli.voice import format_voice_record_key_for_status
+
+        # Copilot round-10 regression: previously /voice status printed
+        # the raw scalar ("True" / "1") even though the actual binding
+        # fell back to Ctrl+B.
+        assert format_voice_record_key_for_status(True) == "Ctrl+B"
+        assert format_voice_record_key_for_status(1) == "Ctrl+B"
+        assert format_voice_record_key_for_status(None) == "Ctrl+B"
+        assert format_voice_record_key_for_status({}) == "Ctrl+B"
+
+    def test_malformed_configs_fall_back_to_ctrl_b(self):
+        from hermes_cli.voice import format_voice_record_key_for_status
+
+        assert format_voice_record_key_for_status("ctrl+spcae") == "Ctrl+B"
+        assert format_voice_record_key_for_status("ctrl+alt+r") == "Ctrl+B"
+        assert format_voice_record_key_for_status("") == "Ctrl+B"
+        assert format_voice_record_key_for_status("  ") == "Ctrl+B"
+
+
 class TestStopWithoutStart:
    def test_returns_none_when_no_recording_active(self, monkeypatch):
        """Idempotent no-op: stop before start must not raise or touch state."""
@ -72,6 +309,7 @@ class TestContinuousAPI:

        # Isolate from any state left behind by other tests in the session.
        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_stopping", False, raising=False)
        monkeypatch.setattr(voice, "_continuous_recorder", None)

        assert voice.is_continuous_active() is False
@ -106,11 +344,20 @@ class TestContinuousAPI:

        monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder())

-        voice.start_continuous(on_transcript=lambda _t: None)
+        started = voice.start_continuous(on_transcript=lambda _t: None)

        # The guard inside start_continuous short-circuits before rec.start()
+        assert started is True
        assert called["n"] == 0

+    def test_start_returns_false_while_stopping(self, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_stopping", True, raising=False)
+
+        assert voice.start_continuous(on_transcript=lambda _t: None) is False
+

 class TestContinuousLoopSimulation:
    """End-to-end simulation of the VAD loop with a fake recorder.
@ -131,6 +378,8 @@ class TestContinuousLoopSimulation:
        monkeypatch.setattr(voice, "_continuous_on_transcript", None)
        monkeypatch.setattr(voice, "_continuous_on_status", None)
        monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
+        monkeypatch.setattr(voice, "_continuous_auto_restart", True, raising=False)
+        monkeypatch.setattr(voice, "_play_beep", lambda *_, **__: None)

        class FakeRecorder:
            _silence_threshold = 200
@ -144,13 +393,20 @@ class TestContinuousLoopSimulation:
                self.cancelled = 0
                # Preset WAV path returned by stop()
                self.next_stop_wav = "/tmp/fake.wav"
+                self.fail_stop = False
+                self.fail_next_start = False

            def start(self, on_silence_stop=None):
+                if self.fail_next_start:
+                    self.fail_next_start = False
+                    raise RuntimeError("boom")
                self.start_calls += 1
                self.last_callback = on_silence_stop
                self.is_recording = True

            def stop(self):
+                if self.fail_stop:
+                    raise RuntimeError("stop failed")
                self.stopped += 1
                self.is_recording = False
                return self.next_stop_wav
@ -196,6 +452,204 @@ class TestContinuousLoopSimulation:

        voice.stop_continuous()

+    def test_auto_restart_false_stops_after_first_transcript(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "single shot"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        statuses = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_status=lambda s: statuses.append(s),
+            auto_restart=False,
+        )
+        fake_recorder.last_callback()
+
+        assert transcripts == ["single shot"]
+        assert fake_recorder.start_calls == 1
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+
+    def test_auto_restart_false_retains_silent_strikes_across_starts(
+        self, fake_recorder, monkeypatch
+    ):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": ""},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        silent_limit_fired = []
+
+        for _ in range(3):
+            voice.start_continuous(
+                on_transcript=lambda _t: None,
+                on_silent_limit=lambda: silent_limit_fired.append(True),
+                auto_restart=False,
+            )
+            fake_recorder.last_callback()
+
+        assert silent_limit_fired == [True]
+        assert voice.is_continuous_active() is False
+        assert fake_recorder.start_calls == 3
+
+    def test_force_transcribe_stop_delivers_current_buffer(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        class ImmediateThread:
+            def __init__(self, target, daemon=False):
+                self.target = target
+
+            def start(self):
+                self.target()
+
+        monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "manual stop"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        statuses = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_status=lambda s: statuses.append(s),
+        )
+        voice.stop_continuous(force_transcribe=True)
+
+        assert fake_recorder.stopped == 1
+        assert transcripts == ["manual stop"]
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+
+    def test_force_transcribe_empty_single_shots_hit_silent_limit(
+        self, fake_recorder, monkeypatch
+    ):
+        import hermes_cli.voice as voice
+
+        class ImmediateThread:
+            def __init__(self, target, daemon=False):
+                self.target = target
+
+            def start(self):
+                self.target()
+
+        monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": ""},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        silent_limit_fired = []
+
+        for _ in range(3):
+            voice.start_continuous(
+                on_transcript=lambda _t: None,
+                on_silent_limit=lambda: silent_limit_fired.append(True),
+                auto_restart=False,
+            )
+            voice.stop_continuous(force_transcribe=True)
+
+        assert silent_limit_fired == [True]
+        assert fake_recorder.stopped == 3
+        assert voice._continuous_no_speech_count == 0
+
+    def test_force_transcribe_valid_single_shot_resets_silent_strikes(
+        self, fake_recorder, monkeypatch
+    ):
+        import hermes_cli.voice as voice
+
+        class ImmediateThread:
+            def __init__(self, target, daemon=False):
+                self.target = target
+
+            def start(self):
+                self.target()
+
+        monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
+        monkeypatch.setattr(voice, "_continuous_no_speech_count", 2)
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "manual stop"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        silent_limit_fired = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_silent_limit=lambda: silent_limit_fired.append(True),
+            auto_restart=False,
+        )
+        voice.stop_continuous(force_transcribe=True)
+
+        assert transcripts == ["manual stop"]
+        assert silent_limit_fired == []
+        assert voice._continuous_no_speech_count == 0
+
+    def test_force_transcribe_stop_failure_cancels_and_clears_stopping(
+        self, fake_recorder, monkeypatch
+    ):
+        import hermes_cli.voice as voice
+
+        class ImmediateThread:
+            def __init__(self, target, daemon=False):
+                self.target = target
+
+            def start(self):
+                self.target()
+
+        monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
+        fake_recorder.fail_stop = True
+
+        statuses = []
+        voice.start_continuous(
+            on_transcript=lambda _t: None,
+            on_status=lambda s: statuses.append(s),
+        )
+        voice.stop_continuous(force_transcribe=True)
+
+        assert fake_recorder.cancelled == 1
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+        assert voice._continuous_stopping is False
+
+    def test_restart_failure_reports_idle(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "hello world"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        statuses = []
+        voice.start_continuous(on_transcript=lambda _t: None, on_status=statuses.append)
+
+        fake_recorder.fail_next_start = True
+        fake_recorder.last_callback()
+
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+
    def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
        import hermes_cli.voice as voice

--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@ -1826,6 +1826,117 @@ class TestNormaliseThemeExtensions:
        assert r["componentStyles"]["card"] == {"opacity": "0.8", "zIndex": "5"}


+class TestPluginAPIAuth:
+    """Tests that plugin API routes require the session token (issue #19533)."""
+
+    @pytest.fixture(autouse=True)
+    def _setup_test_client(self, monkeypatch, _isolate_hermes_home):
+        """Create a TestClient without the session token header."""
+        try:
+            from starlette.testclient import TestClient
+        except ImportError:
+            pytest.skip("fastapi/starlette not installed")
+
+        import hermes_state
+        from hermes_constants import get_hermes_home
+        from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
+
+        monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
+
+        self.client = TestClient(app)
+        self.auth_client = TestClient(app)
+        self.auth_client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
+
+    def test_plugin_route_requires_auth(self):
+        """Plugin API routes should return 401 without a valid session token."""
+        # Use a known plugin route (kanban board)
+        resp = self.client.get("/api/plugins/kanban/board")
+        assert resp.status_code == 401
+
+    def test_plugin_route_allows_auth(self):
+        """Plugin API routes should work with a valid session token.
+
+        Use ``/api/plugins/example/hello`` from the example-dashboard plugin —
+        a stable, side-effect-free GET that's always loaded in tests. With a
+        valid token the handler should run (200); without one the middleware
+        should 401 before the handler is reached.
+        """
+        # Without auth: middleware blocks before reaching the handler.
+        resp = self.client.get("/api/plugins/example/hello")
+        assert resp.status_code == 401
+
+        # With auth: handler runs.
+        resp = self.auth_client.get("/api/plugins/example/hello")
+        assert resp.status_code == 200
+
+    def test_plugin_post_requires_auth(self):
+        """Plugin POST routes should return 401 without a valid session token."""
+        resp = self.client.post("/api/plugins/kanban/tasks", json={"title": "test"})
+        assert resp.status_code == 401
+
+    def test_plugin_patch_requires_auth(self):
+        """Plugin PATCH routes should return 401 without a valid session token.
+
+        PATCH is the mutation method most commonly used by the dashboard for
+        kanban task edits — explicitly cover it so a future middleware
+        regression that whitelists non-GET methods can't sneak through.
+        """
+        resp = self.client.patch(
+            "/api/plugins/kanban/tasks/t_fake",
+            json={"title": "renamed"},
+        )
+        assert resp.status_code == 401
+
+    def test_plugin_delete_requires_auth(self):
+        """Plugin DELETE routes should return 401 without a valid session token."""
+        resp = self.client.delete("/api/plugins/kanban/tasks/t_fake")
+        assert resp.status_code == 401
+
+    def test_non_kanban_plugin_route_requires_auth(self):
+        """Auth must be plugin-agnostic, not kanban-specific.
+
+        The middleware fix is at the gate level (no per-plugin allowlist),
+        so any plugin's API surface — kanban, hermes-achievements, future
+        plugins — must require the session token. Hit a non-kanban plugin
+        path to lock that in.
+        """
+        # Real plugin path (hermes-achievements is loaded by default).
+        resp = self.client.get("/api/plugins/hermes-achievements/overview")
+        assert resp.status_code == 401
+        # Same for an arbitrary plugin namespace that doesn't even exist —
+        # the middleware should 401 before routing decides 404, so an
+        # attacker can't fingerprint plugin names by status codes.
+        resp = self.client.get("/api/plugins/_definitely_not_a_plugin_/anything")
+        assert resp.status_code == 401
+
+    def test_plugin_websocket_unaffected_by_http_middleware(self):
+        """The kanban /events WebSocket has its own ``?token=`` check;
+        the HTTP middleware change must not start gating WS upgrades.
+
+        Starlette doesn't run HTTP middleware on WebSocket upgrades anyway,
+        but pin the behavior so a future refactor that moves auth into a
+        shared layer can't silently break the WS auth contract.
+        """
+        from starlette.websockets import WebSocketDisconnect
+        from hermes_cli.web_server import _SESSION_TOKEN
+
+        # Without a token the WS endpoint must close the upgrade itself
+        # (its own _check_ws_token), NOT 401 from the HTTP middleware.
+        try:
+            with self.client.websocket_connect(
+                "/api/plugins/kanban/events"
+            ):
+                pass  # if we got here without disconnect, the WS accepted us
+        except WebSocketDisconnect:
+            pass  # expected — WS endpoint rejected via its own check
+        except Exception:
+            # The kanban plugin may not be mounted in this test environment,
+            # in which case the route doesn't exist at all (3xx/4xx during
+            # upgrade). That's fine for this regression — it only matters
+            # that the HTTP middleware didn't start intercepting WS upgrades.
+            pass
+
+
 class TestDashboardPluginManifestExtensions:
    """Tests for the extended plugin manifest fields (tab.override,
    tab.hidden, slots) read by _discover_dashboard_plugins()."""
--- a/tests/hermes_cli/test_web_ui_build.py
+++ b/tests/hermes_cli/test_web_ui_build.py
@ -13,7 +13,7 @@ from unittest.mock import patch

 import pytest

-from hermes_cli.main import _web_ui_build_needed, _build_web_ui
+from hermes_cli.main import _web_ui_build_needed, _build_web_ui, _run_npm_install_deterministic


 def _touch(path: Path, offset: float = 0.0) -> None:
@ -119,3 +119,92 @@ class TestBuildWebUISkipsWhenFresh:

        assert result is True
        assert mock_run.call_count == 2  # npm install + npm run build
+
+    def test_npm_install_uses_utf8_replace_output_decoding(self, tmp_path):
+        web_dir, _ = _make_web_dir(tmp_path)
+        (web_dir / "package-lock.json").write_text("{}", encoding="utf-8")
+
+        mock_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
+        with patch("hermes_cli.main.subprocess.run", return_value=mock_cp) as mock_run:
+            result = _run_npm_install_deterministic("/usr/bin/npm", web_dir)
+
+        assert result.returncode == 0
+        _, kwargs = mock_run.call_args
+        assert kwargs["text"] is True
+        assert kwargs["encoding"] == "utf-8"
+        assert kwargs["errors"] == "replace"
+
+    def test_web_build_uses_utf8_replace_output_decoding(self, tmp_path):
+        web_dir, _ = _make_web_dir(tmp_path)
+
+        mock_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
+        with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
+             patch("hermes_cli.main.subprocess.run", side_effect=[mock_cp, mock_cp]) as mock_run:
+            result = _build_web_ui(web_dir)
+
+        assert result is True
+        _, build_kwargs = mock_run.call_args_list[1]
+        assert build_kwargs["text"] is True
+        assert build_kwargs["encoding"] == "utf-8"
+        assert build_kwargs["errors"] == "replace"
+
+
+class TestBuildWebUIRetryAndStaleFallback:
+    """Coverage for the retry + stale-dist fallback added in #23824 / issue #23817."""
+
+    def test_retries_build_once_on_failure(self, tmp_path):
+        web_dir, _ = _make_web_dir(tmp_path)
+        Subprocess = __import__("subprocess")
+        # install: success; build attempt 1: fail; build attempt 2: success
+        install_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        build_fail = Subprocess.CompletedProcess([], 1, stdout="", stderr="EPERM")
+        build_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
+             patch("hermes_cli.main._time.sleep") as mock_sleep, \
+             patch("hermes_cli.main.subprocess.run",
+                   side_effect=[install_ok, build_fail, build_ok]) as mock_run:
+            result = _build_web_ui(web_dir)
+
+        assert result is True
+        assert mock_run.call_count == 3  # install + build + retry
+        mock_sleep.assert_called_once_with(3)
+
+    def test_falls_back_to_stale_dist_when_retry_also_fails(self, tmp_path, capsys):
+        web_dir, dist_dir = _make_web_dir(tmp_path)
+        # Stale dist exists but is older than source
+        _touch(dist_dir / "index.html", offset=-100)
+        _touch(web_dir / "src" / "App.tsx")  # newer source -> build_needed=True
+
+        Subprocess = __import__("subprocess")
+        install_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        build_fail = Subprocess.CompletedProcess([], 1, stdout="", stderr="vite ENOMEM")
+        with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
+             patch("hermes_cli.main._time.sleep"), \
+             patch("hermes_cli.main.subprocess.run",
+                   side_effect=[install_ok, build_fail, build_fail]):
+            result = _build_web_ui(web_dir, fatal=True)
+
+        # MUST return True (serve stale) — issue #23817 — even with fatal=True,
+        # because cmd_dashboard passes fatal=True and is the primary caller.
+        assert result is True
+        out = capsys.readouterr().out
+        assert "serving stale dist as fallback" in out
+        assert "vite ENOMEM" in out  # stderr surfaced to user
+
+    def test_hard_fails_when_no_dist_to_fall_back_to(self, tmp_path, capsys):
+        web_dir, _ = _make_web_dir(tmp_path)
+
+        Subprocess = __import__("subprocess")
+        install_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        build_fail = Subprocess.CompletedProcess([], 1, stdout="", stderr="vite ENOMEM")
+        with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
+             patch("hermes_cli.main._time.sleep"), \
+             patch("hermes_cli.main.subprocess.run",
+                   side_effect=[install_ok, build_fail, build_fail]):
+            result = _build_web_ui(web_dir, fatal=True)
+
+        assert result is False
+        out = capsys.readouterr().out
+        assert "Web UI build failed" in out
+        assert "vite ENOMEM" in out
+        assert "Run manually" in out